forked from ~ljy/RK356X_SDK_RELEASE

hc
2023-12-09 95099d4622f8cb224d94e314c7a8e0df60b13f87
kernel/drivers/irqchip/irq-gic-v3-its.c
....@@ -1,31 +1,24 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Copyright (C) 2013-2017 ARM Limited, All Rights Reserved.
34 * Author: Marc Zyngier <marc.zyngier@arm.com>
4
- *
5
- * This program is free software; you can redistribute it and/or modify
6
- * it under the terms of the GNU General Public License version 2 as
7
- * published by the Free Software Foundation.
8
- *
9
- * This program is distributed in the hope that it will be useful,
10
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
- * GNU General Public License for more details.
13
- *
14
- * You should have received a copy of the GNU General Public License
15
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
165 */
176
187 #include <linux/acpi.h>
198 #include <linux/acpi_iort.h>
9
+#include <linux/bitfield.h>
2010 #include <linux/bitmap.h>
2111 #include <linux/cpu.h>
12
+#include <linux/crash_dump.h>
2213 #include <linux/delay.h>
2314 #include <linux/dma-iommu.h>
15
+#include <linux/efi.h>
2416 #include <linux/interrupt.h>
17
+#include <linux/iopoll.h>
2518 #include <linux/irqdomain.h>
2619 #include <linux/list.h>
27
-#include <linux/list_sort.h>
2820 #include <linux/log2.h>
21
+#include <linux/memblock.h>
2922 #include <linux/mm.h>
3023 #include <linux/msi.h>
3124 #include <linux/of.h>
....@@ -51,6 +44,7 @@
5144 #define ITS_FLAGS_WORKAROUND_CAVIUM_23144 (1ULL << 2)
5245
5346 #define RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING (1 << 0)
47
+#define RDIST_FLAGS_RD_TABLES_PREALLOCATED (1 << 1)
5448
5549 static u32 lpi_id_bits;
5650
....@@ -63,7 +57,7 @@
6357 #define LPI_PROPBASE_SZ ALIGN(BIT(LPI_NRBITS), SZ_64K)
6458 #define LPI_PENDBASE_SZ ALIGN(BIT(LPI_NRBITS) / 8, SZ_64K)
6559
66
-#define LPI_PROP_DEFAULT_PRIO 0xa0
60
+#define LPI_PROP_DEFAULT_PRIO GICD_INT_DEF_PRI
6761
6862 /*
6963 * Collection structure - just an ID, and a redistributor address to
....@@ -102,6 +96,7 @@
10296 struct mutex dev_alloc_lock;
10397 struct list_head entry;
10498 void __iomem *base;
99
+ void __iomem *sgir_base;
105100 phys_addr_t phys_base;
106101 struct its_cmd_block *cmd_base;
107102 struct its_cmd_block *cmd_write;
....@@ -109,24 +104,36 @@
109104 struct its_collection *collections;
110105 struct fwnode_handle *fwnode_handle;
111106 u64 (*get_msi_base)(struct its_device *its_dev);
107
+ u64 typer;
112108 u64 cbaser_save;
113109 u32 ctlr_save;
110
+ u32 mpidr;
114111 struct list_head its_device_list;
115112 u64 flags;
116113 unsigned long list_nr;
117
- u32 ite_size;
118
- u32 device_ids;
119114 int numa_node;
120115 unsigned int msi_domain_flags;
121116 u32 pre_its_base; /* for Socionext Synquacer */
122
- bool is_v4;
123117 int vlpi_redist_offset;
124118 };
119
+
120
+#define is_v4(its) (!!((its)->typer & GITS_TYPER_VLPIS))
121
+#define is_v4_1(its) (!!((its)->typer & GITS_TYPER_VMAPP))
122
+#define device_ids(its) (FIELD_GET(GITS_TYPER_DEVBITS, (its)->typer) + 1)
125123
126124 #define ITS_ITT_ALIGN SZ_256
127125
128126 /* The maximum number of VPEID bits supported by VLPI commands */
129
-#define ITS_MAX_VPEID_BITS (16)
127
+#define ITS_MAX_VPEID_BITS \
128
+ ({ \
129
+ int nvpeid = 16; \
130
+ if (gic_rdists->has_rvpeid && \
131
+ gic_rdists->gicd_typer2 & GICD_TYPER2_VIL) \
132
+ nvpeid = 1 + (gic_rdists->gicd_typer2 & \
133
+ GICD_TYPER2_VID); \
134
+ \
135
+ nvpeid; \
136
+ })
130137 #define ITS_MAX_VPEID (1 << (ITS_MAX_VPEID_BITS))
131138
132139 /* Convert page order to size in bytes */
....@@ -137,7 +144,7 @@
137144 u16 *col_map;
138145 irq_hw_number_t lpi_base;
139146 int nr_lpis;
140
- struct mutex vlpi_lock;
147
+ raw_spinlock_t vlpi_lock;
141148 struct its_vm *vm;
142149 struct its_vlpi_map *vlpi_maps;
143150 int nr_vlpis;
....@@ -167,6 +174,13 @@
167174 int next_victim;
168175 } vpe_proxy;
169176
177
+struct cpu_lpi_count {
178
+ atomic_t managed;
179
+ atomic_t unmanaged;
180
+};
181
+
182
+static DEFINE_PER_CPU(struct cpu_lpi_count, cpu_lpi_count);
183
+
170184 static LIST_HEAD(its_nodes);
171185 static DEFINE_RAW_SPINLOCK(its_lock);
172186 static struct rdists *gic_rdists;
....@@ -179,8 +193,18 @@
179193 static DEFINE_IDA(its_vpeid_ida);
180194
181195 #define gic_data_rdist() (raw_cpu_ptr(gic_rdists->rdist))
196
+#define gic_data_rdist_cpu(cpu) (per_cpu_ptr(gic_rdists->rdist, cpu))
182197 #define gic_data_rdist_rd_base() (gic_data_rdist()->rd_base)
183198 #define gic_data_rdist_vlpi_base() (gic_data_rdist_rd_base() + SZ_128K)
199
+
200
+/*
201
+ * Skip ITSs that have no vLPIs mapped, unless we're on GICv4.1, as we
202
+ * always have vSGIs mapped.
203
+ */
204
+static bool require_its_list_vmovp(struct its_vm *vm, struct its_node *its)
205
+{
206
+ return (gic_rdists->has_rvpeid || vm->vlpi_count[its->list_nr]);
207
+}
184208
185209 static u16 get_its_list(struct its_vm *vm)
186210 {
....@@ -188,14 +212,20 @@
188212 unsigned long its_list = 0;
189213
190214 list_for_each_entry(its, &its_nodes, entry) {
191
- if (!its->is_v4)
215
+ if (!is_v4(its))
192216 continue;
193217
194
- if (vm->vlpi_count[its->list_nr])
218
+ if (require_its_list_vmovp(vm, its))
195219 __set_bit(its->list_nr, &its_list);
196220 }
197221
198222 return (u16)its_list;
223
+}
224
+
225
+static inline u32 its_get_event_id(struct irq_data *d)
226
+{
227
+ struct its_device *its_dev = irq_data_get_irq_chip_data(d);
228
+ return d->hwirq - its_dev->event_map.lpi_base;
199229 }
200230
201231 static struct its_collection *dev_event_to_col(struct its_device *its_dev,
....@@ -204,6 +234,64 @@
204234 struct its_node *its = its_dev->its;
205235
206236 return its->collections + its_dev->event_map.col_map[event];
237
+}
238
+
239
+static struct its_vlpi_map *dev_event_to_vlpi_map(struct its_device *its_dev,
240
+ u32 event)
241
+{
242
+ if (WARN_ON_ONCE(event >= its_dev->event_map.nr_lpis))
243
+ return NULL;
244
+
245
+ return &its_dev->event_map.vlpi_maps[event];
246
+}
247
+
248
+static struct its_vlpi_map *get_vlpi_map(struct irq_data *d)
249
+{
250
+ if (irqd_is_forwarded_to_vcpu(d)) {
251
+ struct its_device *its_dev = irq_data_get_irq_chip_data(d);
252
+ u32 event = its_get_event_id(d);
253
+
254
+ return dev_event_to_vlpi_map(its_dev, event);
255
+ }
256
+
257
+ return NULL;
258
+}
259
+
260
+static int vpe_to_cpuid_lock(struct its_vpe *vpe, unsigned long *flags)
261
+{
262
+ raw_spin_lock_irqsave(&vpe->vpe_lock, *flags);
263
+ return vpe->col_idx;
264
+}
265
+
266
+static void vpe_to_cpuid_unlock(struct its_vpe *vpe, unsigned long flags)
267
+{
268
+ raw_spin_unlock_irqrestore(&vpe->vpe_lock, flags);
269
+}
270
+
271
+static int irq_to_cpuid_lock(struct irq_data *d, unsigned long *flags)
272
+{
273
+ struct its_vlpi_map *map = get_vlpi_map(d);
274
+ int cpu;
275
+
276
+ if (map) {
277
+ cpu = vpe_to_cpuid_lock(map->vpe, flags);
278
+ } else {
279
+ /* Physical LPIs are already locked via the irq_desc lock */
280
+ struct its_device *its_dev = irq_data_get_irq_chip_data(d);
281
+ cpu = its_dev->event_map.col_map[its_get_event_id(d)];
282
+ /* Keep GCC quiet... */
283
+ *flags = 0;
284
+ }
285
+
286
+ return cpu;
287
+}
288
+
289
+static void irq_to_cpuid_unlock(struct irq_data *d, unsigned long flags)
290
+{
291
+ struct its_vlpi_map *map = get_vlpi_map(d);
292
+
293
+ if (map)
294
+ vpe_to_cpuid_unlock(map->vpe, flags);
207295 }
208296
209297 static struct its_collection *valid_col(struct its_collection *col)
....@@ -305,6 +393,19 @@
305393 u16 seq_num;
306394 u16 its_list;
307395 } its_vmovp_cmd;
396
+
397
+ struct {
398
+ struct its_vpe *vpe;
399
+ } its_invdb_cmd;
400
+
401
+ struct {
402
+ struct its_vpe *vpe;
403
+ u8 sgi;
404
+ u8 priority;
405
+ bool enable;
406
+ bool group;
407
+ bool clear;
408
+ } its_vsgi_cmd;
308409 };
309410 };
310411
....@@ -312,7 +413,10 @@
312413 * The ITS command block, which is what the ITS actually parses.
313414 */
314415 struct its_cmd_block {
315
- u64 raw_cmd[4];
416
+ union {
417
+ u64 raw_cmd[4];
418
+ __le64 raw_cmd_le[4];
419
+ };
316420 };
317421
318422 #define ITS_CMD_QUEUE_SZ SZ_64K
....@@ -418,13 +522,70 @@
418522 its_mask_encode(&cmd->raw_cmd[3], vpt_size, 4, 0);
419523 }
420524
525
+static void its_encode_vconf_addr(struct its_cmd_block *cmd, u64 vconf_pa)
526
+{
527
+ its_mask_encode(&cmd->raw_cmd[0], vconf_pa >> 16, 51, 16);
528
+}
529
+
530
+static void its_encode_alloc(struct its_cmd_block *cmd, bool alloc)
531
+{
532
+ its_mask_encode(&cmd->raw_cmd[0], alloc, 8, 8);
533
+}
534
+
535
+static void its_encode_ptz(struct its_cmd_block *cmd, bool ptz)
536
+{
537
+ its_mask_encode(&cmd->raw_cmd[0], ptz, 9, 9);
538
+}
539
+
540
+static void its_encode_vmapp_default_db(struct its_cmd_block *cmd,
541
+ u32 vpe_db_lpi)
542
+{
543
+ its_mask_encode(&cmd->raw_cmd[1], vpe_db_lpi, 31, 0);
544
+}
545
+
546
+static void its_encode_vmovp_default_db(struct its_cmd_block *cmd,
547
+ u32 vpe_db_lpi)
548
+{
549
+ its_mask_encode(&cmd->raw_cmd[3], vpe_db_lpi, 31, 0);
550
+}
551
+
552
+static void its_encode_db(struct its_cmd_block *cmd, bool db)
553
+{
554
+ its_mask_encode(&cmd->raw_cmd[2], db, 63, 63);
555
+}
556
+
557
+static void its_encode_sgi_intid(struct its_cmd_block *cmd, u8 sgi)
558
+{
559
+ its_mask_encode(&cmd->raw_cmd[0], sgi, 35, 32);
560
+}
561
+
562
+static void its_encode_sgi_priority(struct its_cmd_block *cmd, u8 prio)
563
+{
564
+ its_mask_encode(&cmd->raw_cmd[0], prio >> 4, 23, 20);
565
+}
566
+
567
+static void its_encode_sgi_group(struct its_cmd_block *cmd, bool grp)
568
+{
569
+ its_mask_encode(&cmd->raw_cmd[0], grp, 10, 10);
570
+}
571
+
572
+static void its_encode_sgi_clear(struct its_cmd_block *cmd, bool clr)
573
+{
574
+ its_mask_encode(&cmd->raw_cmd[0], clr, 9, 9);
575
+}
576
+
577
+static void its_encode_sgi_enable(struct its_cmd_block *cmd, bool en)
578
+{
579
+ its_mask_encode(&cmd->raw_cmd[0], en, 8, 8);
580
+}
581
+
421582 static inline void its_fixup_cmd(struct its_cmd_block *cmd)
422583 {
423584 /* Let's fixup BE commands */
424
- cmd->raw_cmd[0] = cpu_to_le64(cmd->raw_cmd[0]);
425
- cmd->raw_cmd[1] = cpu_to_le64(cmd->raw_cmd[1]);
426
- cmd->raw_cmd[2] = cpu_to_le64(cmd->raw_cmd[2]);
427
- cmd->raw_cmd[3] = cpu_to_le64(cmd->raw_cmd[3]);
585
+ cmd->raw_cmd_le[0] = cpu_to_le64(cmd->raw_cmd[0]);
586
+ cmd->raw_cmd_le[1] = cpu_to_le64(cmd->raw_cmd[1]);
587
+ cmd->raw_cmd_le[2] = cpu_to_le64(cmd->raw_cmd[2]);
588
+ cmd->raw_cmd_le[3] = cpu_to_le64(cmd->raw_cmd[3]);
428589 }
429590
430591 static struct its_collection *its_build_mapd_cmd(struct its_node *its,
....@@ -601,19 +762,45 @@
601762 struct its_cmd_block *cmd,
602763 struct its_cmd_desc *desc)
603764 {
604
- unsigned long vpt_addr;
765
+ unsigned long vpt_addr, vconf_addr;
605766 u64 target;
606
-
607
- vpt_addr = virt_to_phys(page_address(desc->its_vmapp_cmd.vpe->vpt_page));
608
- target = desc->its_vmapp_cmd.col->target_address + its->vlpi_redist_offset;
767
+ bool alloc;
609768
610769 its_encode_cmd(cmd, GITS_CMD_VMAPP);
611770 its_encode_vpeid(cmd, desc->its_vmapp_cmd.vpe->vpe_id);
612771 its_encode_valid(cmd, desc->its_vmapp_cmd.valid);
772
+
773
+ if (!desc->its_vmapp_cmd.valid) {
774
+ if (is_v4_1(its)) {
775
+ alloc = !atomic_dec_return(&desc->its_vmapp_cmd.vpe->vmapp_count);
776
+ its_encode_alloc(cmd, alloc);
777
+ }
778
+
779
+ goto out;
780
+ }
781
+
782
+ vpt_addr = virt_to_phys(page_address(desc->its_vmapp_cmd.vpe->vpt_page));
783
+ target = desc->its_vmapp_cmd.col->target_address + its->vlpi_redist_offset;
784
+
613785 its_encode_target(cmd, target);
614786 its_encode_vpt_addr(cmd, vpt_addr);
615787 its_encode_vpt_size(cmd, LPI_NRBITS - 1);
616788
789
+ if (!is_v4_1(its))
790
+ goto out;
791
+
792
+ vconf_addr = virt_to_phys(page_address(desc->its_vmapp_cmd.vpe->its_vm->vprop_page));
793
+
794
+ alloc = !atomic_fetch_inc(&desc->its_vmapp_cmd.vpe->vmapp_count);
795
+
796
+ its_encode_alloc(cmd, alloc);
797
+
798
+ /* We can only signal PTZ when alloc==1. Why do we have two bits? */
799
+ its_encode_ptz(cmd, alloc);
800
+ its_encode_vconf_addr(cmd, vconf_addr);
801
+ its_encode_vmapp_default_db(cmd, desc->its_vmapp_cmd.vpe->vpe_db_lpi);
802
+
803
+out:
617804 its_fixup_cmd(cmd);
618805
619806 return valid_vpe(its, desc->its_vmapp_cmd.vpe);
....@@ -625,7 +812,7 @@
625812 {
626813 u32 db;
627814
628
- if (desc->its_vmapti_cmd.db_enabled)
815
+ if (!is_v4_1(its) && desc->its_vmapti_cmd.db_enabled)
629816 db = desc->its_vmapti_cmd.vpe->vpe_db_lpi;
630817 else
631818 db = 1023;
....@@ -648,7 +835,7 @@
648835 {
649836 u32 db;
650837
651
- if (desc->its_vmovi_cmd.db_enabled)
838
+ if (!is_v4_1(its) && desc->its_vmovi_cmd.db_enabled)
652839 db = desc->its_vmovi_cmd.vpe->vpe_db_lpi;
653840 else
654841 db = 1023;
....@@ -678,9 +865,103 @@
678865 its_encode_vpeid(cmd, desc->its_vmovp_cmd.vpe->vpe_id);
679866 its_encode_target(cmd, target);
680867
868
+ if (is_v4_1(its)) {
869
+ its_encode_db(cmd, true);
870
+ its_encode_vmovp_default_db(cmd, desc->its_vmovp_cmd.vpe->vpe_db_lpi);
871
+ }
872
+
681873 its_fixup_cmd(cmd);
682874
683875 return valid_vpe(its, desc->its_vmovp_cmd.vpe);
876
+}
877
+
878
+static struct its_vpe *its_build_vinv_cmd(struct its_node *its,
879
+ struct its_cmd_block *cmd,
880
+ struct its_cmd_desc *desc)
881
+{
882
+ struct its_vlpi_map *map;
883
+
884
+ map = dev_event_to_vlpi_map(desc->its_inv_cmd.dev,
885
+ desc->its_inv_cmd.event_id);
886
+
887
+ its_encode_cmd(cmd, GITS_CMD_INV);
888
+ its_encode_devid(cmd, desc->its_inv_cmd.dev->device_id);
889
+ its_encode_event_id(cmd, desc->its_inv_cmd.event_id);
890
+
891
+ its_fixup_cmd(cmd);
892
+
893
+ return valid_vpe(its, map->vpe);
894
+}
895
+
896
+static struct its_vpe *its_build_vint_cmd(struct its_node *its,
897
+ struct its_cmd_block *cmd,
898
+ struct its_cmd_desc *desc)
899
+{
900
+ struct its_vlpi_map *map;
901
+
902
+ map = dev_event_to_vlpi_map(desc->its_int_cmd.dev,
903
+ desc->its_int_cmd.event_id);
904
+
905
+ its_encode_cmd(cmd, GITS_CMD_INT);
906
+ its_encode_devid(cmd, desc->its_int_cmd.dev->device_id);
907
+ its_encode_event_id(cmd, desc->its_int_cmd.event_id);
908
+
909
+ its_fixup_cmd(cmd);
910
+
911
+ return valid_vpe(its, map->vpe);
912
+}
913
+
914
+static struct its_vpe *its_build_vclear_cmd(struct its_node *its,
915
+ struct its_cmd_block *cmd,
916
+ struct its_cmd_desc *desc)
917
+{
918
+ struct its_vlpi_map *map;
919
+
920
+ map = dev_event_to_vlpi_map(desc->its_clear_cmd.dev,
921
+ desc->its_clear_cmd.event_id);
922
+
923
+ its_encode_cmd(cmd, GITS_CMD_CLEAR);
924
+ its_encode_devid(cmd, desc->its_clear_cmd.dev->device_id);
925
+ its_encode_event_id(cmd, desc->its_clear_cmd.event_id);
926
+
927
+ its_fixup_cmd(cmd);
928
+
929
+ return valid_vpe(its, map->vpe);
930
+}
931
+
932
+static struct its_vpe *its_build_invdb_cmd(struct its_node *its,
933
+ struct its_cmd_block *cmd,
934
+ struct its_cmd_desc *desc)
935
+{
936
+ if (WARN_ON(!is_v4_1(its)))
937
+ return NULL;
938
+
939
+ its_encode_cmd(cmd, GITS_CMD_INVDB);
940
+ its_encode_vpeid(cmd, desc->its_invdb_cmd.vpe->vpe_id);
941
+
942
+ its_fixup_cmd(cmd);
943
+
944
+ return valid_vpe(its, desc->its_invdb_cmd.vpe);
945
+}
946
+
947
+static struct its_vpe *its_build_vsgi_cmd(struct its_node *its,
948
+ struct its_cmd_block *cmd,
949
+ struct its_cmd_desc *desc)
950
+{
951
+ if (WARN_ON(!is_v4_1(its)))
952
+ return NULL;
953
+
954
+ its_encode_cmd(cmd, GITS_CMD_VSGI);
955
+ its_encode_vpeid(cmd, desc->its_vsgi_cmd.vpe->vpe_id);
956
+ its_encode_sgi_intid(cmd, desc->its_vsgi_cmd.sgi);
957
+ its_encode_sgi_priority(cmd, desc->its_vsgi_cmd.priority);
958
+ its_encode_sgi_group(cmd, desc->its_vsgi_cmd.group);
959
+ its_encode_sgi_clear(cmd, desc->its_vsgi_cmd.clear);
960
+ its_encode_sgi_enable(cmd, desc->its_vsgi_cmd.enable);
961
+
962
+ its_fixup_cmd(cmd);
963
+
964
+ return valid_vpe(its, desc->its_vsgi_cmd.vpe);
684965 }
685966
686967 static u64 its_cmd_ptr_to_offset(struct its_node *its,
....@@ -960,7 +1241,7 @@
9601241
9611242 static void its_send_vmapti(struct its_device *dev, u32 id)
9621243 {
963
- struct its_vlpi_map *map = &dev->event_map.vlpi_maps[id];
1244
+ struct its_vlpi_map *map = dev_event_to_vlpi_map(dev, id);
9641245 struct its_cmd_desc desc;
9651246
9661247 desc.its_vmapti_cmd.vpe = map->vpe;
....@@ -974,7 +1255,7 @@
9741255
9751256 static void its_send_vmovi(struct its_device *dev, u32 id)
9761257 {
977
- struct its_vlpi_map *map = &dev->event_map.vlpi_maps[id];
1258
+ struct its_vlpi_map *map = dev_event_to_vlpi_map(dev, id);
9781259 struct its_cmd_desc desc;
9791260
9801261 desc.its_vmovi_cmd.vpe = map->vpe;
....@@ -1028,10 +1309,10 @@
10281309
10291310 /* Emit VMOVPs */
10301311 list_for_each_entry(its, &its_nodes, entry) {
1031
- if (!its->is_v4)
1312
+ if (!is_v4(its))
10321313 continue;
10331314
1034
- if (!vpe->its_vm->vlpi_count[its->list_nr])
1315
+ if (!require_its_list_vmovp(vpe->its_vm, its))
10351316 continue;
10361317
10371318 desc.its_vmovp_cmd.col = &its->collections[col_id];
....@@ -1049,40 +1330,79 @@
10491330 its_send_single_vcommand(its, its_build_vinvall_cmd, &desc);
10501331 }
10511332
1333
+static void its_send_vinv(struct its_device *dev, u32 event_id)
1334
+{
1335
+ struct its_cmd_desc desc;
1336
+
1337
+ /*
1338
+ * There is no real VINV command. This is just a normal INV,
1339
+ * with a VSYNC instead of a SYNC.
1340
+ */
1341
+ desc.its_inv_cmd.dev = dev;
1342
+ desc.its_inv_cmd.event_id = event_id;
1343
+
1344
+ its_send_single_vcommand(dev->its, its_build_vinv_cmd, &desc);
1345
+}
1346
+
1347
+static void its_send_vint(struct its_device *dev, u32 event_id)
1348
+{
1349
+ struct its_cmd_desc desc;
1350
+
1351
+ /*
1352
+ * There is no real VINT command. This is just a normal INT,
1353
+ * with a VSYNC instead of a SYNC.
1354
+ */
1355
+ desc.its_int_cmd.dev = dev;
1356
+ desc.its_int_cmd.event_id = event_id;
1357
+
1358
+ its_send_single_vcommand(dev->its, its_build_vint_cmd, &desc);
1359
+}
1360
+
1361
+static void its_send_vclear(struct its_device *dev, u32 event_id)
1362
+{
1363
+ struct its_cmd_desc desc;
1364
+
1365
+ /*
1366
+ * There is no real VCLEAR command. This is just a normal CLEAR,
1367
+ * with a VSYNC instead of a SYNC.
1368
+ */
1369
+ desc.its_clear_cmd.dev = dev;
1370
+ desc.its_clear_cmd.event_id = event_id;
1371
+
1372
+ its_send_single_vcommand(dev->its, its_build_vclear_cmd, &desc);
1373
+}
1374
+
1375
+static void its_send_invdb(struct its_node *its, struct its_vpe *vpe)
1376
+{
1377
+ struct its_cmd_desc desc;
1378
+
1379
+ desc.its_invdb_cmd.vpe = vpe;
1380
+ its_send_single_vcommand(its, its_build_invdb_cmd, &desc);
1381
+}
1382
+
10521383 /*
10531384 * irqchip functions - assumes MSI, mostly.
10541385 */
1055
-
1056
-static inline u32 its_get_event_id(struct irq_data *d)
1057
-{
1058
- struct its_device *its_dev = irq_data_get_irq_chip_data(d);
1059
- return d->hwirq - its_dev->event_map.lpi_base;
1060
-}
1061
-
10621386 static void lpi_write_config(struct irq_data *d, u8 clr, u8 set)
10631387 {
1388
+ struct its_vlpi_map *map = get_vlpi_map(d);
10641389 irq_hw_number_t hwirq;
1065
- struct page *prop_page;
1390
+ void *va;
10661391 u8 *cfg;
10671392
1068
- if (irqd_is_forwarded_to_vcpu(d)) {
1069
- struct its_device *its_dev = irq_data_get_irq_chip_data(d);
1070
- u32 event = its_get_event_id(d);
1071
- struct its_vlpi_map *map;
1072
-
1073
- prop_page = its_dev->event_map.vm->vprop_page;
1074
- map = &its_dev->event_map.vlpi_maps[event];
1393
+ if (map) {
1394
+ va = page_address(map->vm->vprop_page);
10751395 hwirq = map->vintid;
10761396
10771397 /* Remember the updated property */
10781398 map->properties &= ~clr;
10791399 map->properties |= set | LPI_PROP_GROUP1;
10801400 } else {
1081
- prop_page = gic_rdists->prop_page;
1401
+ va = gic_rdists->prop_table_va;
10821402 hwirq = d->hwirq;
10831403 }
10841404
1085
- cfg = page_address(prop_page) + hwirq - 8192;
1405
+ cfg = va + hwirq - 8192;
10861406 *cfg &= ~clr;
10871407 *cfg |= set | LPI_PROP_GROUP1;
10881408
....@@ -1097,23 +1417,76 @@
10971417 dsb(ishst);
10981418 }
10991419
1420
+static void wait_for_syncr(void __iomem *rdbase)
1421
+{
1422
+ while (readl_relaxed(rdbase + GICR_SYNCR) & 1)
1423
+ cpu_relax();
1424
+}
1425
+
1426
+static void direct_lpi_inv(struct irq_data *d)
1427
+{
1428
+ struct its_vlpi_map *map = get_vlpi_map(d);
1429
+ void __iomem *rdbase;
1430
+ unsigned long flags;
1431
+ u64 val;
1432
+ int cpu;
1433
+
1434
+ if (map) {
1435
+ struct its_device *its_dev = irq_data_get_irq_chip_data(d);
1436
+
1437
+ WARN_ON(!is_v4_1(its_dev->its));
1438
+
1439
+ val = GICR_INVLPIR_V;
1440
+ val |= FIELD_PREP(GICR_INVLPIR_VPEID, map->vpe->vpe_id);
1441
+ val |= FIELD_PREP(GICR_INVLPIR_INTID, map->vintid);
1442
+ } else {
1443
+ val = d->hwirq;
1444
+ }
1445
+
1446
+ /* Target the redistributor this LPI is currently routed to */
1447
+ cpu = irq_to_cpuid_lock(d, &flags);
1448
+ raw_spin_lock(&gic_data_rdist_cpu(cpu)->rd_lock);
1449
+ rdbase = per_cpu_ptr(gic_rdists->rdist, cpu)->rd_base;
1450
+ gic_write_lpir(val, rdbase + GICR_INVLPIR);
1451
+
1452
+ wait_for_syncr(rdbase);
1453
+ raw_spin_unlock(&gic_data_rdist_cpu(cpu)->rd_lock);
1454
+ irq_to_cpuid_unlock(d, flags);
1455
+}
1456
+
11001457 static void lpi_update_config(struct irq_data *d, u8 clr, u8 set)
11011458 {
11021459 struct its_device *its_dev = irq_data_get_irq_chip_data(d);
11031460
11041461 lpi_write_config(d, clr, set);
1105
- its_send_inv(its_dev, its_get_event_id(d));
1462
+ if (gic_rdists->has_direct_lpi &&
1463
+ (is_v4_1(its_dev->its) || !irqd_is_forwarded_to_vcpu(d)))
1464
+ direct_lpi_inv(d);
1465
+ else if (!irqd_is_forwarded_to_vcpu(d))
1466
+ its_send_inv(its_dev, its_get_event_id(d));
1467
+ else
1468
+ its_send_vinv(its_dev, its_get_event_id(d));
11061469 }
11071470
11081471 static void its_vlpi_set_doorbell(struct irq_data *d, bool enable)
11091472 {
11101473 struct its_device *its_dev = irq_data_get_irq_chip_data(d);
11111474 u32 event = its_get_event_id(d);
1475
+ struct its_vlpi_map *map;
11121476
1113
- if (its_dev->event_map.vlpi_maps[event].db_enabled == enable)
1477
+ /*
1478
+ * GICv4.1 does away with the per-LPI nonsense, nothing to do
1479
+ * here.
1480
+ */
1481
+ if (is_v4_1(its_dev->its))
11141482 return;
11151483
1116
- its_dev->event_map.vlpi_maps[event].db_enabled = enable;
1484
+ map = dev_event_to_vlpi_map(its_dev, event);
1485
+
1486
+ if (map->db_enabled == enable)
1487
+ return;
1488
+
1489
+ map->db_enabled = enable;
11171490
11181491 /*
11191492 * More fun with the architecture:
....@@ -1144,42 +1517,159 @@
11441517 lpi_update_config(d, 0, LPI_PROP_ENABLED);
11451518 }
11461519
1520
+static __maybe_unused u32 its_read_lpi_count(struct irq_data *d, int cpu)
1521
+{
1522
+ if (irqd_affinity_is_managed(d))
1523
+ return atomic_read(&per_cpu_ptr(&cpu_lpi_count, cpu)->managed);
1524
+
1525
+ return atomic_read(&per_cpu_ptr(&cpu_lpi_count, cpu)->unmanaged);
1526
+}
1527
+
1528
+static void its_inc_lpi_count(struct irq_data *d, int cpu)
1529
+{
1530
+ if (irqd_affinity_is_managed(d))
1531
+ atomic_inc(&per_cpu_ptr(&cpu_lpi_count, cpu)->managed);
1532
+ else
1533
+ atomic_inc(&per_cpu_ptr(&cpu_lpi_count, cpu)->unmanaged);
1534
+}
1535
+
1536
+static void its_dec_lpi_count(struct irq_data *d, int cpu)
1537
+{
1538
+ if (irqd_affinity_is_managed(d))
1539
+ atomic_dec(&per_cpu_ptr(&cpu_lpi_count, cpu)->managed);
1540
+ else
1541
+ atomic_dec(&per_cpu_ptr(&cpu_lpi_count, cpu)->unmanaged);
1542
+}
1543
+
1544
+static unsigned int cpumask_pick_least_loaded(struct irq_data *d,
1545
+ const struct cpumask *cpu_mask)
1546
+{
1547
+ unsigned int cpu = nr_cpu_ids, tmp;
1548
+ int count = S32_MAX;
1549
+
1550
+ for_each_cpu(tmp, cpu_mask) {
1551
+ int this_count = its_read_lpi_count(d, tmp);
1552
+ if (this_count < count) {
1553
+ cpu = tmp;
1554
+ count = this_count;
1555
+ }
1556
+ }
1557
+
1558
+ return cpu;
1559
+}
1560
+
1561
+/*
1562
+ * As suggested by Thomas Gleixner in:
1563
+ * https://lore.kernel.org/r/87h80q2aoc.fsf@nanos.tec.linutronix.de
1564
+ */
1565
+static int its_select_cpu(struct irq_data *d,
1566
+ const struct cpumask *aff_mask)
1567
+{
1568
+ struct its_device *its_dev = irq_data_get_irq_chip_data(d);
1569
+ cpumask_var_t tmpmask;
1570
+ int cpu, node;
1571
+
1572
+ if (!alloc_cpumask_var(&tmpmask, GFP_ATOMIC))
1573
+ return -ENOMEM;
1574
+
1575
+ node = its_dev->its->numa_node;
1576
+
1577
+ if (!irqd_affinity_is_managed(d)) {
1578
+ /* First try the NUMA node */
1579
+ if (node != NUMA_NO_NODE) {
1580
+ /*
1581
+ * Try the intersection of the affinity mask and the
1582
+ * node mask (and the online mask, just to be safe).
1583
+ */
1584
+ cpumask_and(tmpmask, cpumask_of_node(node), aff_mask);
1585
+ cpumask_and(tmpmask, tmpmask, cpu_online_mask);
1586
+
1587
+ /*
1588
+ * Ideally, we would check if the mask is empty, and
1589
+ * try again on the full node here.
1590
+ *
1591
+ * But it turns out that the way ACPI describes the
1592
+ * affinity for ITSs only deals about memory, and
1593
+ * not target CPUs, so it cannot describe a single
1594
+ * ITS placed next to two NUMA nodes.
1595
+ *
1596
+ * Instead, just fallback on the online mask. This
1597
+ * diverges from Thomas' suggestion above.
1598
+ */
1599
+ cpu = cpumask_pick_least_loaded(d, tmpmask);
1600
+ if (cpu < nr_cpu_ids)
1601
+ goto out;
1602
+
1603
+ /* If we can't cross sockets, give up */
1604
+ if ((its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144))
1605
+ goto out;
1606
+
1607
+ /* If the above failed, expand the search */
1608
+ }
1609
+
1610
+ /* Try the intersection of the affinity and online masks */
1611
+ cpumask_and(tmpmask, aff_mask, cpu_online_mask);
1612
+
1613
+ /* If that doesn't fly, the online mask is the last resort */
1614
+ if (cpumask_empty(tmpmask))
1615
+ cpumask_copy(tmpmask, cpu_online_mask);
1616
+
1617
+ cpu = cpumask_pick_least_loaded(d, tmpmask);
1618
+ } else {
1619
+ cpumask_copy(tmpmask, aff_mask);
1620
+
1621
+ /* If we cannot cross sockets, limit the search to that node */
1622
+ if ((its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144) &&
1623
+ node != NUMA_NO_NODE)
1624
+ cpumask_and(tmpmask, tmpmask, cpumask_of_node(node));
1625
+
1626
+ cpu = cpumask_pick_least_loaded(d, tmpmask);
1627
+ }
1628
+out:
1629
+ free_cpumask_var(tmpmask);
1630
+
1631
+ pr_debug("IRQ%d -> %*pbl CPU%d\n", d->irq, cpumask_pr_args(aff_mask), cpu);
1632
+ return cpu;
1633
+}
1634
+
11471635 static int its_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
11481636 bool force)
11491637 {
1150
- unsigned int cpu;
1151
- const struct cpumask *cpu_mask = cpu_online_mask;
11521638 struct its_device *its_dev = irq_data_get_irq_chip_data(d);
11531639 struct its_collection *target_col;
11541640 u32 id = its_get_event_id(d);
1641
+ int cpu, prev_cpu;
11551642
11561643 /* A forwarded interrupt should use irq_set_vcpu_affinity */
11571644 if (irqd_is_forwarded_to_vcpu(d))
11581645 return -EINVAL;
11591646
1160
- /* lpi cannot be routed to a redistributor that is on a foreign node */
1161
- if (its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144) {
1162
- if (its_dev->its->numa_node >= 0) {
1163
- cpu_mask = cpumask_of_node(its_dev->its->numa_node);
1164
- if (!cpumask_intersects(mask_val, cpu_mask))
1165
- return -EINVAL;
1166
- }
1167
- }
1647
+ prev_cpu = its_dev->event_map.col_map[id];
1648
+ its_dec_lpi_count(d, prev_cpu);
11681649
1169
- cpu = cpumask_any_and(mask_val, cpu_mask);
1650
+ if (!force)
1651
+ cpu = its_select_cpu(d, mask_val);
1652
+ else
1653
+ cpu = cpumask_pick_least_loaded(d, mask_val);
11701654
1171
- if (cpu >= nr_cpu_ids)
1172
- return -EINVAL;
1655
+ if (cpu < 0 || cpu >= nr_cpu_ids)
1656
+ goto err;
11731657
11741658 /* don't set the affinity when the target cpu is same as current one */
1175
- if (cpu != its_dev->event_map.col_map[id]) {
1659
+ if (cpu != prev_cpu) {
11761660 target_col = &its_dev->its->collections[cpu];
11771661 its_send_movi(its_dev, target_col, id);
11781662 its_dev->event_map.col_map[id] = cpu;
11791663 irq_data_update_effective_affinity(d, cpumask_of(cpu));
11801664 }
11811665
1666
+ its_inc_lpi_count(d, cpu);
1667
+
11821668 return IRQ_SET_MASK_OK_DONE;
1669
+
1670
+err:
1671
+ its_inc_lpi_count(d, prev_cpu);
1672
+ return -EINVAL;
11831673 }
11841674
11851675 static u64 its_irq_get_msi_base(struct its_device *its_dev)
....@@ -1202,7 +1692,7 @@
12021692 msg->address_hi = upper_32_bits(addr);
12031693 msg->data = its_get_event_id(d);
12041694
1205
- iommu_dma_map_msi_msg(d->irq, msg);
1695
+ iommu_dma_compose_msi_msg(irq_data_get_msi_desc(d), msg);
12061696 }
12071697
12081698 static int its_irq_set_irqchip_state(struct irq_data *d,
....@@ -1215,20 +1705,51 @@
12151705 if (which != IRQCHIP_STATE_PENDING)
12161706 return -EINVAL;
12171707
1218
- if (state)
1219
- its_send_int(its_dev, event);
1220
- else
1221
- its_send_clear(its_dev, event);
1708
+ if (irqd_is_forwarded_to_vcpu(d)) {
1709
+ if (state)
1710
+ its_send_vint(its_dev, event);
1711
+ else
1712
+ its_send_vclear(its_dev, event);
1713
+ } else {
1714
+ if (state)
1715
+ its_send_int(its_dev, event);
1716
+ else
1717
+ its_send_clear(its_dev, event);
1718
+ }
12221719
12231720 return 0;
1721
+}
1722
+
1723
+static int its_irq_retrigger(struct irq_data *d)
1724
+{
1725
+ return !its_irq_set_irqchip_state(d, IRQCHIP_STATE_PENDING, true);
1726
+}
1727
+
1728
+/*
1729
+ * Two favourable cases:
1730
+ *
1731
+ * (a) Either we have a GICv4.1, and all vPEs have to be mapped at all times
1732
+ * for vSGI delivery
1733
+ *
1734
+ * (b) Or the ITSs do not use a list map, meaning that VMOVP is cheap enough
1735
+ * and we're better off mapping all VPEs always
1736
+ *
1737
+ * If neither (a) nor (b) is true, then we map vPEs on demand.
1738
+ *
1739
+ */
1740
+static bool gic_requires_eager_mapping(void)
1741
+{
1742
+ if (!its_list_map || gic_rdists->has_rvpeid)
1743
+ return true;
1744
+
1745
+ return false;
12241746 }
12251747
12261748 static void its_map_vm(struct its_node *its, struct its_vm *vm)
12271749 {
12281750 unsigned long flags;
12291751
1230
- /* Not using the ITS list? Everything is always mapped. */
1231
- if (!its_list_map)
1752
+ if (gic_requires_eager_mapping())
12321753 return;
12331754
12341755 raw_spin_lock_irqsave(&vmovp_lock, flags);
....@@ -1262,7 +1783,7 @@
12621783 unsigned long flags;
12631784
12641785 /* Not using the ITS list? Everything is always mapped. */
1265
- if (!its_list_map)
1786
+ if (gic_requires_eager_mapping())
12661787 return;
12671788
12681789 raw_spin_lock_irqsave(&vmovp_lock, flags);
....@@ -1286,13 +1807,13 @@
12861807 if (!info->map)
12871808 return -EINVAL;
12881809
1289
- mutex_lock(&its_dev->event_map.vlpi_lock);
1810
+ raw_spin_lock(&its_dev->event_map.vlpi_lock);
12901811
12911812 if (!its_dev->event_map.vm) {
12921813 struct its_vlpi_map *maps;
12931814
12941815 maps = kcalloc(its_dev->event_map.nr_lpis, sizeof(*maps),
1295
- GFP_KERNEL);
1816
+ GFP_ATOMIC);
12961817 if (!maps) {
12971818 ret = -ENOMEM;
12981819 goto out;
....@@ -1335,29 +1856,30 @@
13351856 }
13361857
13371858 out:
1338
- mutex_unlock(&its_dev->event_map.vlpi_lock);
1859
+ raw_spin_unlock(&its_dev->event_map.vlpi_lock);
13391860 return ret;
13401861 }
13411862
13421863 static int its_vlpi_get(struct irq_data *d, struct its_cmd_info *info)
13431864 {
13441865 struct its_device *its_dev = irq_data_get_irq_chip_data(d);
1345
- u32 event = its_get_event_id(d);
1866
+ struct its_vlpi_map *map;
13461867 int ret = 0;
13471868
1348
- mutex_lock(&its_dev->event_map.vlpi_lock);
1869
+ raw_spin_lock(&its_dev->event_map.vlpi_lock);
13491870
1350
- if (!its_dev->event_map.vm ||
1351
- !its_dev->event_map.vlpi_maps[event].vm) {
1871
+ map = get_vlpi_map(d);
1872
+
1873
+ if (!its_dev->event_map.vm || !map) {
13521874 ret = -EINVAL;
13531875 goto out;
13541876 }
13551877
13561878 /* Copy our mapping information to the incoming request */
1357
- *info->map = its_dev->event_map.vlpi_maps[event];
1879
+ *info->map = *map;
13581880
13591881 out:
1360
- mutex_unlock(&its_dev->event_map.vlpi_lock);
1882
+ raw_spin_unlock(&its_dev->event_map.vlpi_lock);
13611883 return ret;
13621884 }
13631885
....@@ -1367,7 +1889,7 @@
13671889 u32 event = its_get_event_id(d);
13681890 int ret = 0;
13691891
1370
- mutex_lock(&its_dev->event_map.vlpi_lock);
1892
+ raw_spin_lock(&its_dev->event_map.vlpi_lock);
13711893
13721894 if (!its_dev->event_map.vm || !irqd_is_forwarded_to_vcpu(d)) {
13731895 ret = -EINVAL;
....@@ -1397,7 +1919,7 @@
13971919 }
13981920
13991921 out:
1400
- mutex_unlock(&its_dev->event_map.vlpi_lock);
1922
+ raw_spin_unlock(&its_dev->event_map.vlpi_lock);
14011923 return ret;
14021924 }
14031925
....@@ -1423,7 +1945,7 @@
14231945 struct its_cmd_info *info = vcpu_info;
14241946
14251947 /* Need a v4 ITS */
1426
- if (!its_dev->its->is_v4)
1948
+ if (!is_v4(its_dev->its))
14271949 return -EINVAL;
14281950
14291951 /* Unmap request? */
....@@ -1454,6 +1976,7 @@
14541976 .irq_set_affinity = its_set_affinity,
14551977 .irq_compose_msi_msg = its_irq_compose_msi_msg,
14561978 .irq_set_irqchip_state = its_irq_set_irqchip_state,
1979
+ .irq_retrigger = its_irq_retrigger,
14571980 .irq_set_vcpu_affinity = its_irq_set_vcpu_affinity,
14581981 };
14591982
....@@ -1488,39 +2011,13 @@
14882011 {
14892012 struct lpi_range *range;
14902013
1491
- range = kzalloc(sizeof(*range), GFP_KERNEL);
2014
+ range = kmalloc(sizeof(*range), GFP_KERNEL);
14922015 if (range) {
1493
- INIT_LIST_HEAD(&range->entry);
14942016 range->base_id = base;
14952017 range->span = span;
14962018 }
14972019
14982020 return range;
1499
-}
1500
-
1501
-static int lpi_range_cmp(void *priv, struct list_head *a, struct list_head *b)
1502
-{
1503
- struct lpi_range *ra, *rb;
1504
-
1505
- ra = container_of(a, struct lpi_range, entry);
1506
- rb = container_of(b, struct lpi_range, entry);
1507
-
1508
- return ra->base_id - rb->base_id;
1509
-}
1510
-
1511
-static void merge_lpi_ranges(void)
1512
-{
1513
- struct lpi_range *range, *tmp;
1514
-
1515
- list_for_each_entry_safe(range, tmp, &lpi_range_list, entry) {
1516
- if (!list_is_last(&range->entry, &lpi_range_list) &&
1517
- (tmp->base_id == (range->base_id + range->span))) {
1518
- tmp->base_id = range->base_id;
1519
- tmp->span += range->span;
1520
- list_del(&range->entry);
1521
- kfree(range);
1522
- }
1523
- }
15242021 }
15252022
15262023 static int alloc_lpi_range(u32 nr_lpis, u32 *base)
....@@ -1552,25 +2049,49 @@
15522049 return err;
15532050 }
15542051
2052
+static void merge_lpi_ranges(struct lpi_range *a, struct lpi_range *b)
2053
+{
2054
+ if (&a->entry == &lpi_range_list || &b->entry == &lpi_range_list)
2055
+ return;
2056
+ if (a->base_id + a->span != b->base_id)
2057
+ return;
2058
+ b->base_id = a->base_id;
2059
+ b->span += a->span;
2060
+ list_del(&a->entry);
2061
+ kfree(a);
2062
+}
2063
+
15552064 static int free_lpi_range(u32 base, u32 nr_lpis)
15562065 {
1557
- struct lpi_range *new;
1558
- int err = 0;
2066
+ struct lpi_range *new, *old;
2067
+
2068
+ new = mk_lpi_range(base, nr_lpis);
2069
+ if (!new)
2070
+ return -ENOMEM;
15592071
15602072 mutex_lock(&lpi_range_lock);
15612073
1562
- new = mk_lpi_range(base, nr_lpis);
1563
- if (!new) {
1564
- err = -ENOMEM;
1565
- goto out;
2074
+ list_for_each_entry_reverse(old, &lpi_range_list, entry) {
2075
+ if (old->base_id < base)
2076
+ break;
15662077 }
2078
+ /*
2079
+ * old is the last element with ->base_id smaller than base,
2080
+ * so new goes right after it. If there are no elements with
2081
+ * ->base_id smaller than base, &old->entry ends up pointing
2082
+ * at the head of the list, and inserting new it the start of
2083
+ * the list is the right thing to do in that case as well.
2084
+ */
2085
+ list_add(&new->entry, &old->entry);
2086
+ /*
2087
+ * Now check if we can merge with the preceding and/or
2088
+ * following ranges.
2089
+ */
2090
+ merge_lpi_ranges(old, new);
2091
+ merge_lpi_ranges(new, list_next_entry(new, entry));
15672092
1568
- list_add(&new->entry, &lpi_range_list);
1569
- list_sort(NULL, &lpi_range_list, lpi_range_cmp);
1570
- merge_lpi_ranges();
1571
-out:
15722093 mutex_unlock(&lpi_range_lock);
1573
- return err;
2094
+ return 0;
15742095 }
15752096
15762097 static int __init its_lpi_init(u32 id_bits)
....@@ -1634,6 +2155,15 @@
16342155 kfree(bitmap);
16352156 }
16362157
2158
+static void gic_reset_prop_table(void *va)
2159
+{
2160
+ /* Priority 0xa0, Group-1, disabled */
2161
+ memset(va, LPI_PROP_DEFAULT_PRIO | LPI_PROP_GROUP1, LPI_PROPBASE_SZ);
2162
+
2163
+ /* Make sure the GIC will observe the written configuration */
2164
+ gic_flush_dcache_to_poc(va, LPI_PROPBASE_SZ);
2165
+}
2166
+
16372167 static struct page *its_allocate_prop_table(gfp_t gfp_flags)
16382168 {
16392169 struct page *prop_page;
....@@ -1644,13 +2174,7 @@
16442174 if (!prop_page)
16452175 return NULL;
16462176
1647
- /* Priority 0xa0, Group-1, disabled */
1648
- memset(page_address(prop_page),
1649
- LPI_PROP_DEFAULT_PRIO | LPI_PROP_GROUP1,
1650
- LPI_PROPBASE_SZ);
1651
-
1652
- /* Make sure the GIC will observe the written configuration */
1653
- gic_flush_dcache_to_poc(page_address(prop_page), LPI_PROPBASE_SZ);
2177
+ gic_reset_prop_table(page_address(prop_page));
16542178
16552179 return prop_page;
16562180 }
....@@ -1661,20 +2185,74 @@
16612185 get_order(LPI_PROPBASE_SZ));
16622186 }
16632187
1664
-static int __init its_alloc_lpi_tables(void)
2188
+static bool gic_check_reserved_range(phys_addr_t addr, unsigned long size)
16652189 {
1666
- phys_addr_t paddr;
2190
+ phys_addr_t start, end, addr_end;
2191
+ u64 i;
16672192
1668
- lpi_id_bits = min_t(u32, GICD_TYPER_ID_BITS(gic_rdists->gicd_typer),
1669
- ITS_MAX_LPI_NRBITS);
1670
- gic_rdists->prop_page = its_allocate_prop_table(GFP_NOWAIT);
1671
- if (!gic_rdists->prop_page) {
1672
- pr_err("Failed to allocate PROPBASE\n");
1673
- return -ENOMEM;
2193
+ /*
2194
+ * We don't bother checking for a kdump kernel as by
2195
+ * construction, the LPI tables are out of this kernel's
2196
+ * memory map.
2197
+ */
2198
+ if (is_kdump_kernel())
2199
+ return true;
2200
+
2201
+ addr_end = addr + size - 1;
2202
+
2203
+ for_each_reserved_mem_range(i, &start, &end) {
2204
+ if (addr >= start && addr_end <= end)
2205
+ return true;
16742206 }
16752207
1676
- paddr = page_to_phys(gic_rdists->prop_page);
1677
- pr_info("GIC: using LPI property table @%pa\n", &paddr);
2208
+ /* Not found, not a good sign... */
2209
+ pr_warn("GICv3: Expected reserved range [%pa:%pa], not found\n",
2210
+ &addr, &addr_end);
2211
+ add_taint(TAINT_CRAP, LOCKDEP_STILL_OK);
2212
+ return false;
2213
+}
2214
+
2215
+static int gic_reserve_range(phys_addr_t addr, unsigned long size)
2216
+{
2217
+ if (efi_enabled(EFI_CONFIG_TABLES))
2218
+ return efi_mem_reserve_persistent(addr, size);
2219
+
2220
+ return 0;
2221
+}
2222
+
2223
+static int __init its_setup_lpi_prop_table(void)
2224
+{
2225
+ if (gic_rdists->flags & RDIST_FLAGS_RD_TABLES_PREALLOCATED) {
2226
+ u64 val;
2227
+
2228
+ val = gicr_read_propbaser(gic_data_rdist_rd_base() + GICR_PROPBASER);
2229
+ lpi_id_bits = (val & GICR_PROPBASER_IDBITS_MASK) + 1;
2230
+
2231
+ gic_rdists->prop_table_pa = val & GENMASK_ULL(51, 12);
2232
+ gic_rdists->prop_table_va = memremap(gic_rdists->prop_table_pa,
2233
+ LPI_PROPBASE_SZ,
2234
+ MEMREMAP_WB);
2235
+ gic_reset_prop_table(gic_rdists->prop_table_va);
2236
+ } else {
2237
+ struct page *page;
2238
+
2239
+ lpi_id_bits = min_t(u32,
2240
+ GICD_TYPER_ID_BITS(gic_rdists->gicd_typer),
2241
+ ITS_MAX_LPI_NRBITS);
2242
+ page = its_allocate_prop_table(GFP_NOWAIT);
2243
+ if (!page) {
2244
+ pr_err("Failed to allocate PROPBASE\n");
2245
+ return -ENOMEM;
2246
+ }
2247
+
2248
+ gic_rdists->prop_table_pa = page_to_phys(page);
2249
+ gic_rdists->prop_table_va = page_address(page);
2250
+ WARN_ON(gic_reserve_range(gic_rdists->prop_table_pa,
2251
+ LPI_PROPBASE_SZ));
2252
+ }
2253
+
2254
+ pr_info("GICv3: using LPI property table @%pa\n",
2255
+ &gic_rdists->prop_table_pa);
16782256
16792257 return its_lpi_init(lpi_id_bits);
16802258 }
....@@ -1706,18 +2284,18 @@
17062284 }
17072285
17082286 static int its_setup_baser(struct its_node *its, struct its_baser *baser,
1709
- u64 cache, u64 shr, u32 psz, u32 order,
1710
- bool indirect)
2287
+ u64 cache, u64 shr, u32 order, bool indirect)
17112288 {
17122289 u64 val = its_read_baser(its, baser);
17132290 u64 esz = GITS_BASER_ENTRY_SIZE(val);
17142291 u64 type = GITS_BASER_TYPE(val);
17152292 u64 baser_phys, tmp;
1716
- u32 alloc_pages;
2293
+ u32 alloc_pages, psz;
2294
+ struct page *page;
17172295 void *base;
17182296 gfp_t gfp_flags;
17192297
1720
-retry_alloc_baser:
2298
+ psz = baser->psz;
17212299 alloc_pages = (PAGE_ORDER_TO_SIZE(order) / psz);
17222300 if (alloc_pages > GITS_BASER_PAGES_MAX) {
17232301 pr_warn("ITS@%pa: %s too large, reduce ITS pages %u->%u\n",
....@@ -1730,10 +2308,11 @@
17302308 gfp_flags = GFP_KERNEL | __GFP_ZERO;
17312309 if (of_machine_is_compatible("rockchip,rk3568") || of_machine_is_compatible("rockchip,rk3566"))
17322310 gfp_flags |= GFP_DMA32;
1733
- base = (void *)__get_free_pages(gfp_flags, order);
1734
- if (!base)
2311
+ page = alloc_pages_node(its->numa_node, gfp_flags, order);
2312
+ if (!page)
17352313 return -ENOMEM;
17362314
2315
+ base = (void *)page_address(page);
17372316 baser_phys = virt_to_phys(base);
17382317
17392318 /* Check if the physical address of the memory is above 48bits */
....@@ -1776,8 +2355,10 @@
17762355 its_write_baser(its, baser, val);
17772356 tmp = baser->val;
17782357
1779
- if (of_machine_is_compatible("rockchip,rk3568") ||
1780
- of_machine_is_compatible("rockchip,rk3566")) {
2358
+ if (IS_ENABLED(CONFIG_NO_GKI) &&
2359
+ (of_machine_is_compatible("rockchip,rk3568") ||
2360
+ of_machine_is_compatible("rockchip,rk3566") ||
2361
+ of_machine_is_compatible("rockchip,rk3588"))) {
17812362 if (tmp & GITS_BASER_SHAREABILITY_MASK)
17822363 tmp &= ~GITS_BASER_SHAREABILITY_MASK;
17832364 else
....@@ -1798,25 +2379,6 @@
17982379 gic_flush_dcache_to_poc(base, PAGE_ORDER_TO_SIZE(order));
17992380 }
18002381 goto retry_baser;
1801
- }
1802
-
1803
- if ((val ^ tmp) & GITS_BASER_PAGE_SIZE_MASK) {
1804
- /*
1805
- * Page size didn't stick. Let's try a smaller
1806
- * size and retry. If we reach 4K, then
1807
- * something is horribly wrong...
1808
- */
1809
- free_pages((unsigned long)base, order);
1810
- baser->base = NULL;
1811
-
1812
- switch (psz) {
1813
- case SZ_16K:
1814
- psz = SZ_4K;
1815
- goto retry_alloc_baser;
1816
- case SZ_64K:
1817
- psz = SZ_16K;
1818
- goto retry_alloc_baser;
1819
- }
18202382 }
18212383
18222384 if (val != tmp) {
....@@ -1844,13 +2406,14 @@
18442406
18452407 static bool its_parse_indirect_baser(struct its_node *its,
18462408 struct its_baser *baser,
1847
- u32 psz, u32 *order, u32 ids)
2409
+ u32 *order, u32 ids)
18482410 {
18492411 u64 tmp = its_read_baser(its, baser);
18502412 u64 type = GITS_BASER_TYPE(tmp);
18512413 u64 esz = GITS_BASER_ENTRY_SIZE(tmp);
18522414 u64 val = GITS_BASER_InnerShareable | GITS_BASER_RaWaWb;
18532415 u32 new_order = *order;
2416
+ u32 psz = baser->psz;
18542417 bool indirect = false;
18552418
18562419 /* No need to enable Indirection if memory requirement < (psz*2)bytes */
....@@ -1886,14 +2449,73 @@
18862449 if (new_order >= MAX_ORDER) {
18872450 new_order = MAX_ORDER - 1;
18882451 ids = ilog2(PAGE_ORDER_TO_SIZE(new_order) / (int)esz);
1889
- pr_warn("ITS@%pa: %s Table too large, reduce ids %u->%u\n",
2452
+ pr_warn("ITS@%pa: %s Table too large, reduce ids %llu->%u\n",
18902453 &its->phys_base, its_base_type_string[type],
1891
- its->device_ids, ids);
2454
+ device_ids(its), ids);
18922455 }
18932456
18942457 *order = new_order;
18952458
18962459 return indirect;
2460
+}
2461
+
2462
+static u32 compute_common_aff(u64 val)
2463
+{
2464
+ u32 aff, clpiaff;
2465
+
2466
+ aff = FIELD_GET(GICR_TYPER_AFFINITY, val);
2467
+ clpiaff = FIELD_GET(GICR_TYPER_COMMON_LPI_AFF, val);
2468
+
2469
+ return aff & ~(GENMASK(31, 0) >> (clpiaff * 8));
2470
+}
2471
+
2472
+static u32 compute_its_aff(struct its_node *its)
2473
+{
2474
+ u64 val;
2475
+ u32 svpet;
2476
+
2477
+ /*
2478
+ * Reencode the ITS SVPET and MPIDR as a GICR_TYPER, and compute
2479
+ * the resulting affinity. We then use that to see if this match
2480
+ * our own affinity.
2481
+ */
2482
+ svpet = FIELD_GET(GITS_TYPER_SVPET, its->typer);
2483
+ val = FIELD_PREP(GICR_TYPER_COMMON_LPI_AFF, svpet);
2484
+ val |= FIELD_PREP(GICR_TYPER_AFFINITY, its->mpidr);
2485
+ return compute_common_aff(val);
2486
+}
2487
+
2488
+static struct its_node *find_sibling_its(struct its_node *cur_its)
2489
+{
2490
+ struct its_node *its;
2491
+ u32 aff;
2492
+
2493
+ if (!FIELD_GET(GITS_TYPER_SVPET, cur_its->typer))
2494
+ return NULL;
2495
+
2496
+ aff = compute_its_aff(cur_its);
2497
+
2498
+ list_for_each_entry(its, &its_nodes, entry) {
2499
+ u64 baser;
2500
+
2501
+ if (!is_v4_1(its) || its == cur_its)
2502
+ continue;
2503
+
2504
+ if (!FIELD_GET(GITS_TYPER_SVPET, its->typer))
2505
+ continue;
2506
+
2507
+ if (aff != compute_its_aff(its))
2508
+ continue;
2509
+
2510
+ /* GICv4.1 guarantees that the vPE table is GITS_BASER2 */
2511
+ baser = its->tables[2].val;
2512
+ if (!(baser & GITS_BASER_VALID))
2513
+ continue;
2514
+
2515
+ return its;
2516
+ }
2517
+
2518
+ return NULL;
18972519 }
18982520
18992521 static void its_free_tables(struct its_node *its)
....@@ -1909,11 +2531,58 @@
19092531 }
19102532 }
19112533
2534
+static int its_probe_baser_psz(struct its_node *its, struct its_baser *baser)
2535
+{
2536
+ u64 psz = SZ_64K;
2537
+
2538
+ while (psz) {
2539
+ u64 val, gpsz;
2540
+
2541
+ val = its_read_baser(its, baser);
2542
+ val &= ~GITS_BASER_PAGE_SIZE_MASK;
2543
+
2544
+ switch (psz) {
2545
+ case SZ_64K:
2546
+ gpsz = GITS_BASER_PAGE_SIZE_64K;
2547
+ break;
2548
+ case SZ_16K:
2549
+ gpsz = GITS_BASER_PAGE_SIZE_16K;
2550
+ break;
2551
+ case SZ_4K:
2552
+ default:
2553
+ gpsz = GITS_BASER_PAGE_SIZE_4K;
2554
+ break;
2555
+ }
2556
+
2557
+ gpsz >>= GITS_BASER_PAGE_SIZE_SHIFT;
2558
+
2559
+ val |= FIELD_PREP(GITS_BASER_PAGE_SIZE_MASK, gpsz);
2560
+ its_write_baser(its, baser, val);
2561
+
2562
+ if (FIELD_GET(GITS_BASER_PAGE_SIZE_MASK, baser->val) == gpsz)
2563
+ break;
2564
+
2565
+ switch (psz) {
2566
+ case SZ_64K:
2567
+ psz = SZ_16K;
2568
+ break;
2569
+ case SZ_16K:
2570
+ psz = SZ_4K;
2571
+ break;
2572
+ case SZ_4K:
2573
+ default:
2574
+ return -1;
2575
+ }
2576
+ }
2577
+
2578
+ baser->psz = psz;
2579
+ return 0;
2580
+}
2581
+
19122582 static int its_alloc_tables(struct its_node *its)
19132583 {
19142584 u64 shr = GITS_BASER_InnerShareable;
19152585 u64 cache = GITS_BASER_RaWaWb;
1916
- u32 psz = SZ_64K;
19172586 int err, i;
19182587
19192588 if (its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_22375)
....@@ -1924,37 +2593,337 @@
19242593 struct its_baser *baser = its->tables + i;
19252594 u64 val = its_read_baser(its, baser);
19262595 u64 type = GITS_BASER_TYPE(val);
1927
- u32 order = get_order(psz);
19282596 bool indirect = false;
2597
+ u32 order;
19292598
1930
- switch (type) {
1931
- case GITS_BASER_TYPE_NONE:
2599
+ if (type == GITS_BASER_TYPE_NONE)
19322600 continue;
19332601
2602
+ if (its_probe_baser_psz(its, baser)) {
2603
+ its_free_tables(its);
2604
+ return -ENXIO;
2605
+ }
2606
+
2607
+ order = get_order(baser->psz);
2608
+
2609
+ switch (type) {
19342610 case GITS_BASER_TYPE_DEVICE:
1935
- indirect = its_parse_indirect_baser(its, baser,
1936
- psz, &order,
1937
- its->device_ids);
2611
+ indirect = its_parse_indirect_baser(its, baser, &order,
2612
+ device_ids(its));
19382613 break;
19392614
19402615 case GITS_BASER_TYPE_VCPU:
1941
- indirect = its_parse_indirect_baser(its, baser,
1942
- psz, &order,
2616
+ if (is_v4_1(its)) {
2617
+ struct its_node *sibling;
2618
+
2619
+ WARN_ON(i != 2);
2620
+ if ((sibling = find_sibling_its(its))) {
2621
+ *baser = sibling->tables[2];
2622
+ its_write_baser(its, baser, baser->val);
2623
+ continue;
2624
+ }
2625
+ }
2626
+
2627
+ indirect = its_parse_indirect_baser(its, baser, &order,
19432628 ITS_MAX_VPEID_BITS);
19442629 break;
19452630 }
19462631
1947
- err = its_setup_baser(its, baser, cache, shr, psz, order, indirect);
2632
+ err = its_setup_baser(its, baser, cache, shr, order, indirect);
19482633 if (err < 0) {
19492634 its_free_tables(its);
19502635 return err;
19512636 }
19522637
19532638 /* Update settings which will be used for next BASERn */
1954
- psz = baser->psz;
19552639 cache = baser->val & GITS_BASER_CACHEABILITY_MASK;
19562640 shr = baser->val & GITS_BASER_SHAREABILITY_MASK;
19572641 }
2642
+
2643
+ return 0;
2644
+}
2645
+
2646
+static u64 inherit_vpe_l1_table_from_its(void)
2647
+{
2648
+ struct its_node *its;
2649
+ u64 val;
2650
+ u32 aff;
2651
+
2652
+ val = gic_read_typer(gic_data_rdist_rd_base() + GICR_TYPER);
2653
+ aff = compute_common_aff(val);
2654
+
2655
+ list_for_each_entry(its, &its_nodes, entry) {
2656
+ u64 baser, addr;
2657
+
2658
+ if (!is_v4_1(its))
2659
+ continue;
2660
+
2661
+ if (!FIELD_GET(GITS_TYPER_SVPET, its->typer))
2662
+ continue;
2663
+
2664
+ if (aff != compute_its_aff(its))
2665
+ continue;
2666
+
2667
+ /* GICv4.1 guarantees that the vPE table is GITS_BASER2 */
2668
+ baser = its->tables[2].val;
2669
+ if (!(baser & GITS_BASER_VALID))
2670
+ continue;
2671
+
2672
+ /* We have a winner! */
2673
+ gic_data_rdist()->vpe_l1_base = its->tables[2].base;
2674
+
2675
+ val = GICR_VPROPBASER_4_1_VALID;
2676
+ if (baser & GITS_BASER_INDIRECT)
2677
+ val |= GICR_VPROPBASER_4_1_INDIRECT;
2678
+ val |= FIELD_PREP(GICR_VPROPBASER_4_1_PAGE_SIZE,
2679
+ FIELD_GET(GITS_BASER_PAGE_SIZE_MASK, baser));
2680
+ switch (FIELD_GET(GITS_BASER_PAGE_SIZE_MASK, baser)) {
2681
+ case GIC_PAGE_SIZE_64K:
2682
+ addr = GITS_BASER_ADDR_48_to_52(baser);
2683
+ break;
2684
+ default:
2685
+ addr = baser & GENMASK_ULL(47, 12);
2686
+ break;
2687
+ }
2688
+ val |= FIELD_PREP(GICR_VPROPBASER_4_1_ADDR, addr >> 12);
2689
+ val |= FIELD_PREP(GICR_VPROPBASER_SHAREABILITY_MASK,
2690
+ FIELD_GET(GITS_BASER_SHAREABILITY_MASK, baser));
2691
+ val |= FIELD_PREP(GICR_VPROPBASER_INNER_CACHEABILITY_MASK,
2692
+ FIELD_GET(GITS_BASER_INNER_CACHEABILITY_MASK, baser));
2693
+ val |= FIELD_PREP(GICR_VPROPBASER_4_1_SIZE, GITS_BASER_NR_PAGES(baser) - 1);
2694
+
2695
+ return val;
2696
+ }
2697
+
2698
+ return 0;
2699
+}
2700
+
2701
+static u64 inherit_vpe_l1_table_from_rd(cpumask_t **mask)
2702
+{
2703
+ u32 aff;
2704
+ u64 val;
2705
+ int cpu;
2706
+
2707
+ val = gic_read_typer(gic_data_rdist_rd_base() + GICR_TYPER);
2708
+ aff = compute_common_aff(val);
2709
+
2710
+ for_each_possible_cpu(cpu) {
2711
+ void __iomem *base = gic_data_rdist_cpu(cpu)->rd_base;
2712
+
2713
+ if (!base || cpu == smp_processor_id())
2714
+ continue;
2715
+
2716
+ val = gic_read_typer(base + GICR_TYPER);
2717
+ if (aff != compute_common_aff(val))
2718
+ continue;
2719
+
2720
+ /*
2721
+ * At this point, we have a victim. This particular CPU
2722
+ * has already booted, and has an affinity that matches
2723
+ * ours wrt CommonLPIAff. Let's use its own VPROPBASER.
2724
+ * Make sure we don't write the Z bit in that case.
2725
+ */
2726
+ val = gicr_read_vpropbaser(base + SZ_128K + GICR_VPROPBASER);
2727
+ val &= ~GICR_VPROPBASER_4_1_Z;
2728
+
2729
+ gic_data_rdist()->vpe_l1_base = gic_data_rdist_cpu(cpu)->vpe_l1_base;
2730
+ *mask = gic_data_rdist_cpu(cpu)->vpe_table_mask;
2731
+
2732
+ return val;
2733
+ }
2734
+
2735
+ return 0;
2736
+}
2737
+
2738
+static bool allocate_vpe_l2_table(int cpu, u32 id)
2739
+{
2740
+ void __iomem *base = gic_data_rdist_cpu(cpu)->rd_base;
2741
+ unsigned int psz, esz, idx, npg, gpsz;
2742
+ u64 val;
2743
+ struct page *page;
2744
+ __le64 *table;
2745
+
2746
+ if (!gic_rdists->has_rvpeid)
2747
+ return true;
2748
+
2749
+ /* Skip non-present CPUs */
2750
+ if (!base)
2751
+ return true;
2752
+
2753
+ val = gicr_read_vpropbaser(base + SZ_128K + GICR_VPROPBASER);
2754
+
2755
+ esz = FIELD_GET(GICR_VPROPBASER_4_1_ENTRY_SIZE, val) + 1;
2756
+ gpsz = FIELD_GET(GICR_VPROPBASER_4_1_PAGE_SIZE, val);
2757
+ npg = FIELD_GET(GICR_VPROPBASER_4_1_SIZE, val) + 1;
2758
+
2759
+ switch (gpsz) {
2760
+ default:
2761
+ WARN_ON(1);
2762
+ fallthrough;
2763
+ case GIC_PAGE_SIZE_4K:
2764
+ psz = SZ_4K;
2765
+ break;
2766
+ case GIC_PAGE_SIZE_16K:
2767
+ psz = SZ_16K;
2768
+ break;
2769
+ case GIC_PAGE_SIZE_64K:
2770
+ psz = SZ_64K;
2771
+ break;
2772
+ }
2773
+
2774
+ /* Don't allow vpe_id that exceeds single, flat table limit */
2775
+ if (!(val & GICR_VPROPBASER_4_1_INDIRECT))
2776
+ return (id < (npg * psz / (esz * SZ_8)));
2777
+
2778
+ /* Compute 1st level table index & check if that exceeds table limit */
2779
+ idx = id >> ilog2(psz / (esz * SZ_8));
2780
+ if (idx >= (npg * psz / GITS_LVL1_ENTRY_SIZE))
2781
+ return false;
2782
+
2783
+ table = gic_data_rdist_cpu(cpu)->vpe_l1_base;
2784
+
2785
+ /* Allocate memory for 2nd level table */
2786
+ if (!table[idx]) {
2787
+ page = alloc_pages(GFP_KERNEL | __GFP_ZERO, get_order(psz));
2788
+ if (!page)
2789
+ return false;
2790
+
2791
+ /* Flush Lvl2 table to PoC if hw doesn't support coherency */
2792
+ if (!(val & GICR_VPROPBASER_SHAREABILITY_MASK))
2793
+ gic_flush_dcache_to_poc(page_address(page), psz);
2794
+
2795
+ table[idx] = cpu_to_le64(page_to_phys(page) | GITS_BASER_VALID);
2796
+
2797
+ /* Flush Lvl1 entry to PoC if hw doesn't support coherency */
2798
+ if (!(val & GICR_VPROPBASER_SHAREABILITY_MASK))
2799
+ gic_flush_dcache_to_poc(table + idx, GITS_LVL1_ENTRY_SIZE);
2800
+
2801
+ /* Ensure updated table contents are visible to RD hardware */
2802
+ dsb(sy);
2803
+ }
2804
+
2805
+ return true;
2806
+}
2807
+
2808
+static int allocate_vpe_l1_table(void)
2809
+{
2810
+ void __iomem *vlpi_base = gic_data_rdist_vlpi_base();
2811
+ u64 val, gpsz, npg, pa;
2812
+ unsigned int psz = SZ_64K;
2813
+ unsigned int np, epp, esz;
2814
+ struct page *page;
2815
+
2816
+ if (!gic_rdists->has_rvpeid)
2817
+ return 0;
2818
+
2819
+ /*
2820
+ * if VPENDBASER.Valid is set, disable any previously programmed
2821
+ * VPE by setting PendingLast while clearing Valid. This has the
2822
+ * effect of making sure no doorbell will be generated and we can
2823
+ * then safely clear VPROPBASER.Valid.
2824
+ */
2825
+ if (gicr_read_vpendbaser(vlpi_base + GICR_VPENDBASER) & GICR_VPENDBASER_Valid)
2826
+ gicr_write_vpendbaser(GICR_VPENDBASER_PendingLast,
2827
+ vlpi_base + GICR_VPENDBASER);
2828
+
2829
+ /*
2830
+ * If we can inherit the configuration from another RD, let's do
2831
+ * so. Otherwise, we have to go through the allocation process. We
2832
+ * assume that all RDs have the exact same requirements, as
2833
+ * nothing will work otherwise.
2834
+ */
2835
+ val = inherit_vpe_l1_table_from_rd(&gic_data_rdist()->vpe_table_mask);
2836
+ if (val & GICR_VPROPBASER_4_1_VALID)
2837
+ goto out;
2838
+
2839
+ gic_data_rdist()->vpe_table_mask = kzalloc(sizeof(cpumask_t), GFP_ATOMIC);
2840
+ if (!gic_data_rdist()->vpe_table_mask)
2841
+ return -ENOMEM;
2842
+
2843
+ val = inherit_vpe_l1_table_from_its();
2844
+ if (val & GICR_VPROPBASER_4_1_VALID)
2845
+ goto out;
2846
+
2847
+ /* First probe the page size */
2848
+ val = FIELD_PREP(GICR_VPROPBASER_4_1_PAGE_SIZE, GIC_PAGE_SIZE_64K);
2849
+ gicr_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER);
2850
+ val = gicr_read_vpropbaser(vlpi_base + GICR_VPROPBASER);
2851
+ gpsz = FIELD_GET(GICR_VPROPBASER_4_1_PAGE_SIZE, val);
2852
+ esz = FIELD_GET(GICR_VPROPBASER_4_1_ENTRY_SIZE, val);
2853
+
2854
+ switch (gpsz) {
2855
+ default:
2856
+ gpsz = GIC_PAGE_SIZE_4K;
2857
+ fallthrough;
2858
+ case GIC_PAGE_SIZE_4K:
2859
+ psz = SZ_4K;
2860
+ break;
2861
+ case GIC_PAGE_SIZE_16K:
2862
+ psz = SZ_16K;
2863
+ break;
2864
+ case GIC_PAGE_SIZE_64K:
2865
+ psz = SZ_64K;
2866
+ break;
2867
+ }
2868
+
2869
+ /*
2870
+ * Start populating the register from scratch, including RO fields
2871
+ * (which we want to print in debug cases...)
2872
+ */
2873
+ val = 0;
2874
+ val |= FIELD_PREP(GICR_VPROPBASER_4_1_PAGE_SIZE, gpsz);
2875
+ val |= FIELD_PREP(GICR_VPROPBASER_4_1_ENTRY_SIZE, esz);
2876
+
2877
+ /* How many entries per GIC page? */
2878
+ esz++;
2879
+ epp = psz / (esz * SZ_8);
2880
+
2881
+ /*
2882
+ * If we need more than just a single L1 page, flag the table
2883
+ * as indirect and compute the number of required L1 pages.
2884
+ */
2885
+ if (epp < ITS_MAX_VPEID) {
2886
+ int nl2;
2887
+
2888
+ val |= GICR_VPROPBASER_4_1_INDIRECT;
2889
+
2890
+ /* Number of L2 pages required to cover the VPEID space */
2891
+ nl2 = DIV_ROUND_UP(ITS_MAX_VPEID, epp);
2892
+
2893
+ /* Number of L1 pages to point to the L2 pages */
2894
+ npg = DIV_ROUND_UP(nl2 * SZ_8, psz);
2895
+ } else {
2896
+ npg = 1;
2897
+ }
2898
+
2899
+ val |= FIELD_PREP(GICR_VPROPBASER_4_1_SIZE, npg - 1);
2900
+
2901
+ /* Right, that's the number of CPU pages we need for L1 */
2902
+ np = DIV_ROUND_UP(npg * psz, PAGE_SIZE);
2903
+
2904
+ pr_debug("np = %d, npg = %lld, psz = %d, epp = %d, esz = %d\n",
2905
+ np, npg, psz, epp, esz);
2906
+ page = alloc_pages(GFP_ATOMIC | __GFP_ZERO, get_order(np * PAGE_SIZE));
2907
+ if (!page)
2908
+ return -ENOMEM;
2909
+
2910
+ gic_data_rdist()->vpe_l1_base = page_address(page);
2911
+ pa = virt_to_phys(page_address(page));
2912
+ WARN_ON(!IS_ALIGNED(pa, psz));
2913
+
2914
+ val |= FIELD_PREP(GICR_VPROPBASER_4_1_ADDR, pa >> 12);
2915
+ val |= GICR_VPROPBASER_RaWb;
2916
+ val |= GICR_VPROPBASER_InnerShareable;
2917
+ val |= GICR_VPROPBASER_4_1_Z;
2918
+ val |= GICR_VPROPBASER_4_1_VALID;
2919
+
2920
+out:
2921
+ gicr_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER);
2922
+ cpumask_set_cpu(smp_processor_id(), gic_data_rdist()->vpe_table_mask);
2923
+
2924
+ pr_debug("CPU%d: VPROPBASER = %llx %*pbl\n",
2925
+ smp_processor_id(), val,
2926
+ cpumask_pr_args(gic_data_rdist()->vpe_table_mask));
19582927
19592928 return 0;
19602929 }
....@@ -1977,14 +2946,11 @@
19772946 static struct page *its_allocate_pending_table(gfp_t gfp_flags)
19782947 {
19792948 struct page *pend_page;
1980
- /*
1981
- * The pending pages have to be at least 64kB aligned,
1982
- * hence the 'max(LPI_PENDBASE_SZ, SZ_64K)' below.
1983
- */
2949
+
19842950 if (of_machine_is_compatible("rockchip,rk3568") || of_machine_is_compatible("rockchip,rk3566"))
19852951 gfp_flags |= GFP_DMA32;
19862952 pend_page = alloc_pages(gfp_flags | __GFP_ZERO,
1987
- get_order(max_t(u32, LPI_PENDBASE_SZ, SZ_64K)));
2953
+ get_order(LPI_PENDBASE_SZ));
19882954 if (!pend_page)
19892955 return NULL;
19902956
....@@ -1996,22 +2962,73 @@
19962962
19972963 static void its_free_pending_table(struct page *pt)
19982964 {
1999
- free_pages((unsigned long)page_address(pt),
2000
- get_order(max_t(u32, LPI_PENDBASE_SZ, SZ_64K)));
2965
+ free_pages((unsigned long)page_address(pt), get_order(LPI_PENDBASE_SZ));
20012966 }
20022967
2003
-static u64 its_clear_vpend_valid(void __iomem *vlpi_base)
2968
+/*
2969
+ * Booting with kdump and LPIs enabled is generally fine. Any other
2970
+ * case is wrong in the absence of firmware/EFI support.
2971
+ */
2972
+static bool enabled_lpis_allowed(void)
2973
+{
2974
+ phys_addr_t addr;
2975
+ u64 val;
2976
+
2977
+ /* Check whether the property table is in a reserved region */
2978
+ val = gicr_read_propbaser(gic_data_rdist_rd_base() + GICR_PROPBASER);
2979
+ addr = val & GENMASK_ULL(51, 12);
2980
+
2981
+ return gic_check_reserved_range(addr, LPI_PROPBASE_SZ);
2982
+}
2983
+
2984
+static int __init allocate_lpi_tables(void)
2985
+{
2986
+ u64 val;
2987
+ int err, cpu;
2988
+
2989
+ /*
2990
+ * If LPIs are enabled while we run this from the boot CPU,
2991
+ * flag the RD tables as pre-allocated if the stars do align.
2992
+ */
2993
+ val = readl_relaxed(gic_data_rdist_rd_base() + GICR_CTLR);
2994
+ if ((val & GICR_CTLR_ENABLE_LPIS) && enabled_lpis_allowed()) {
2995
+ gic_rdists->flags |= (RDIST_FLAGS_RD_TABLES_PREALLOCATED |
2996
+ RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING);
2997
+ pr_info("GICv3: Using preallocated redistributor tables\n");
2998
+ }
2999
+
3000
+ err = its_setup_lpi_prop_table();
3001
+ if (err)
3002
+ return err;
3003
+
3004
+ /*
3005
+ * We allocate all the pending tables anyway, as we may have a
3006
+ * mix of RDs that have had LPIs enabled, and some that
3007
+ * don't. We'll free the unused ones as each CPU comes online.
3008
+ */
3009
+ for_each_possible_cpu(cpu) {
3010
+ struct page *pend_page;
3011
+
3012
+ pend_page = its_allocate_pending_table(GFP_NOWAIT);
3013
+ if (!pend_page) {
3014
+ pr_err("Failed to allocate PENDBASE for CPU%d\n", cpu);
3015
+ return -ENOMEM;
3016
+ }
3017
+
3018
+ gic_data_rdist_cpu(cpu)->pend_page = pend_page;
3019
+ }
3020
+
3021
+ return 0;
3022
+}
3023
+
3024
+static u64 read_vpend_dirty_clear(void __iomem *vlpi_base)
20043025 {
20053026 u32 count = 1000000; /* 1s! */
20063027 bool clean;
20073028 u64 val;
20083029
2009
- val = gits_read_vpendbaser(vlpi_base + GICR_VPENDBASER);
2010
- val &= ~GICR_VPENDBASER_Valid;
2011
- gits_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER);
2012
-
20133030 do {
2014
- val = gits_read_vpendbaser(vlpi_base + GICR_VPENDBASER);
3031
+ val = gicr_read_vpendbaser(vlpi_base + GICR_VPENDBASER);
20153032 clean = !(val & GICR_VPENDBASER_Dirty);
20163033 if (!clean) {
20173034 count--;
....@@ -2020,6 +3037,27 @@
20203037 }
20213038 } while (!clean && count);
20223039
3040
+ if (unlikely(!clean))
3041
+ pr_err_ratelimited("ITS virtual pending table not cleaning\n");
3042
+
3043
+ return val;
3044
+}
3045
+
3046
+static u64 its_clear_vpend_valid(void __iomem *vlpi_base, u64 clr, u64 set)
3047
+{
3048
+ u64 val;
3049
+
3050
+ /* Make sure we wait until the RD is done with the initial scan */
3051
+ val = read_vpend_dirty_clear(vlpi_base);
3052
+ val &= ~GICR_VPENDBASER_Valid;
3053
+ val &= ~clr;
3054
+ val |= set;
3055
+ gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER);
3056
+
3057
+ val = read_vpend_dirty_clear(vlpi_base);
3058
+ if (unlikely(val & GICR_VPENDBASER_Dirty))
3059
+ val |= GICR_VPENDBASER_PendingLast;
3060
+
20233061 return val;
20243062 }
20253063
....@@ -2027,28 +3065,40 @@
20273065 {
20283066 void __iomem *rbase = gic_data_rdist_rd_base();
20293067 struct page *pend_page;
3068
+ phys_addr_t paddr;
20303069 u64 val, tmp;
20313070
2032
- /* If we didn't allocate the pending table yet, do it now */
2033
- pend_page = gic_data_rdist()->pend_page;
2034
- if (!pend_page) {
2035
- phys_addr_t paddr;
3071
+ if (gic_data_rdist()->lpi_enabled)
3072
+ return;
20363073
2037
- pend_page = its_allocate_pending_table(GFP_NOWAIT);
2038
- if (!pend_page) {
2039
- pr_err("Failed to allocate PENDBASE for CPU%d\n",
2040
- smp_processor_id());
2041
- return;
2042
- }
3074
+ val = readl_relaxed(rbase + GICR_CTLR);
3075
+ if ((gic_rdists->flags & RDIST_FLAGS_RD_TABLES_PREALLOCATED) &&
3076
+ (val & GICR_CTLR_ENABLE_LPIS)) {
3077
+ /*
3078
+ * Check that we get the same property table on all
3079
+ * RDs. If we don't, this is hopeless.
3080
+ */
3081
+ paddr = gicr_read_propbaser(rbase + GICR_PROPBASER);
3082
+ paddr &= GENMASK_ULL(51, 12);
3083
+ if (WARN_ON(gic_rdists->prop_table_pa != paddr))
3084
+ add_taint(TAINT_CRAP, LOCKDEP_STILL_OK);
20433085
2044
- paddr = page_to_phys(pend_page);
2045
- pr_info("CPU%d: using LPI pending table @%pa\n",
2046
- smp_processor_id(), &paddr);
2047
- gic_data_rdist()->pend_page = pend_page;
3086
+ paddr = gicr_read_pendbaser(rbase + GICR_PENDBASER);
3087
+ paddr &= GENMASK_ULL(51, 16);
3088
+
3089
+ WARN_ON(!gic_check_reserved_range(paddr, LPI_PENDBASE_SZ));
3090
+ its_free_pending_table(gic_data_rdist()->pend_page);
3091
+ gic_data_rdist()->pend_page = NULL;
3092
+
3093
+ goto out;
20483094 }
20493095
3096
+ pend_page = gic_data_rdist()->pend_page;
3097
+ paddr = page_to_phys(pend_page);
3098
+ WARN_ON(gic_reserve_range(paddr, LPI_PENDBASE_SZ));
3099
+
20503100 /* set PROPBASE */
2051
- val = (page_to_phys(gic_rdists->prop_page) |
3101
+ val = (gic_rdists->prop_table_pa |
20523102 GICR_PROPBASER_InnerShareable |
20533103 GICR_PROPBASER_RaWaWb |
20543104 ((LPI_NRBITS - 1) & GICR_PROPBASER_IDBITS_MASK));
....@@ -2056,7 +3106,10 @@
20563106 gicr_write_propbaser(val, rbase + GICR_PROPBASER);
20573107 tmp = gicr_read_propbaser(rbase + GICR_PROPBASER);
20583108
2059
- if (of_machine_is_compatible("rockchip,rk3568") || of_machine_is_compatible("rockchip,rk3566"))
3109
+ if (IS_ENABLED(CONFIG_NO_GKI) &&
3110
+ (of_machine_is_compatible("rockchip,rk3568") ||
3111
+ of_machine_is_compatible("rockchip,rk3566") ||
3112
+ of_machine_is_compatible("rockchip,rk3588")))
20603113 tmp &= ~GICR_PROPBASER_SHAREABILITY_MASK;
20613114
20623115 if ((tmp ^ val) & GICR_PROPBASER_SHAREABILITY_MASK) {
....@@ -2083,7 +3136,10 @@
20833136 gicr_write_pendbaser(val, rbase + GICR_PENDBASER);
20843137 tmp = gicr_read_pendbaser(rbase + GICR_PENDBASER);
20853138
2086
- if (of_machine_is_compatible("rockchip,rk3568") || of_machine_is_compatible("rockchip,rk3566"))
3139
+ if (IS_ENABLED(CONFIG_NO_GKI) &&
3140
+ (of_machine_is_compatible("rockchip,rk3568") ||
3141
+ of_machine_is_compatible("rockchip,rk3566") ||
3142
+ of_machine_is_compatible("rockchip,rk3588")))
20873143 tmp &= ~GICR_PENDBASER_SHAREABILITY_MASK;
20883144
20893145 if (!(tmp & GICR_PENDBASER_SHAREABILITY_MASK)) {
....@@ -2102,7 +3158,7 @@
21023158 val |= GICR_CTLR_ENABLE_LPIS;
21033159 writel_relaxed(val, rbase + GICR_CTLR);
21043160
2105
- if (gic_rdists->has_vlpis) {
3161
+ if (gic_rdists->has_vlpis && !gic_rdists->has_rvpeid) {
21063162 void __iomem *vlpi_base = gic_data_rdist_vlpi_base();
21073163
21083164 /*
....@@ -2115,19 +3171,34 @@
21153171 val = (LPI_NRBITS - 1) & GICR_VPROPBASER_IDBITS_MASK;
21163172 pr_debug("GICv4: CPU%d: Init IDbits to 0x%llx for GICR_VPROPBASER\n",
21173173 smp_processor_id(), val);
2118
- gits_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER);
3174
+ gicr_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER);
21193175
21203176 /*
21213177 * Also clear Valid bit of GICR_VPENDBASER, in case some
21223178 * ancient programming gets left in and has possibility of
21233179 * corrupting memory.
21243180 */
2125
- val = its_clear_vpend_valid(vlpi_base);
2126
- WARN_ON(val & GICR_VPENDBASER_Dirty);
3181
+ val = its_clear_vpend_valid(vlpi_base, 0, 0);
3182
+ }
3183
+
3184
+ if (allocate_vpe_l1_table()) {
3185
+ /*
3186
+ * If the allocation has failed, we're in massive trouble.
3187
+ * Disable direct injection, and pray that no VM was
3188
+ * already running...
3189
+ */
3190
+ gic_rdists->has_rvpeid = false;
3191
+ gic_rdists->has_vlpis = false;
21273192 }
21283193
21293194 /* Make sure the GIC has seen the above */
21303195 dsb(sy);
3196
+out:
3197
+ gic_data_rdist()->lpi_enabled = true;
3198
+ pr_info("GICv3: CPU%d: using %s LPI pending table @%pa\n",
3199
+ smp_processor_id(),
3200
+ gic_data_rdist()->pend_page ? "allocated" : "reserved",
3201
+ &paddr);
21313202 }
21323203
21333204 static void its_cpu_init_collection(struct its_node *its)
....@@ -2212,7 +3283,8 @@
22123283 return NULL;
22133284 }
22143285
2215
-static bool its_alloc_table_entry(struct its_baser *baser, u32 id)
3286
+static bool its_alloc_table_entry(struct its_node *its,
3287
+ struct its_baser *baser, u32 id)
22163288 {
22173289 struct page *page;
22183290 u32 esz, idx;
....@@ -2236,7 +3308,8 @@
22363308
22373309 if (of_machine_is_compatible("rockchip,rk3568") || of_machine_is_compatible("rockchip,rk3566"))
22383310 gfp_flags |= GFP_DMA32;
2239
- page = alloc_pages(gfp_flags, get_order(baser->psz));
3311
+ page = alloc_pages_node(its->numa_node, gfp_flags,
3312
+ get_order(baser->psz));
22403313 if (!page)
22413314 return false;
22423315
....@@ -2265,14 +3338,15 @@
22653338
22663339 /* Don't allow device id that exceeds ITS hardware limit */
22673340 if (!baser)
2268
- return (ilog2(dev_id) < its->device_ids);
3341
+ return (ilog2(dev_id) < device_ids(its));
22693342
2270
- return its_alloc_table_entry(baser, dev_id);
3343
+ return its_alloc_table_entry(its, baser, dev_id);
22713344 }
22723345
22733346 static bool its_alloc_vpe_table(u32 vpe_id)
22743347 {
22753348 struct its_node *its;
3349
+ int cpu;
22763350
22773351 /*
22783352 * Make sure the L2 tables are allocated on *all* v4 ITSs. We
....@@ -2284,14 +3358,27 @@
22843358 list_for_each_entry(its, &its_nodes, entry) {
22853359 struct its_baser *baser;
22863360
2287
- if (!its->is_v4)
3361
+ if (!is_v4(its))
22883362 continue;
22893363
22903364 baser = its_get_baser(its, GITS_BASER_TYPE_VCPU);
22913365 if (!baser)
22923366 return false;
22933367
2294
- if (!its_alloc_table_entry(baser, vpe_id))
3368
+ if (!its_alloc_table_entry(its, baser, vpe_id))
3369
+ return false;
3370
+ }
3371
+
3372
+ /* Non v4.1? No need to iterate RDs and go back early. */
3373
+ if (!gic_rdists->has_rvpeid)
3374
+ return true;
3375
+
3376
+ /*
3377
+ * Make sure the L2 tables are allocated for all copies of
3378
+ * the L1 table on *all* v4.1 RDs.
3379
+ */
3380
+ for_each_possible_cpu(cpu) {
3381
+ if (!allocate_vpe_l2_table(cpu, vpe_id))
22953382 return false;
22963383 }
22973384
....@@ -2324,12 +3411,16 @@
23243411 * sized as a power of two (and you need at least one bit...).
23253412 */
23263413 nr_ites = max(2, nvecs);
2327
- sz = nr_ites * its->ite_size;
3414
+ sz = nr_ites * (FIELD_GET(GITS_TYPER_ITT_ENTRY_SIZE, its->typer) + 1);
23283415 sz = max(sz, ITS_ITT_ALIGN) + ITS_ITT_ALIGN - 1;
23293416 gfp_flags = GFP_KERNEL;
2330
- if (of_machine_is_compatible("rockchip,rk3568") || of_machine_is_compatible("rockchip,rk3566"))
3417
+ if (of_machine_is_compatible("rockchip,rk3568") || of_machine_is_compatible("rockchip,rk3566")) {
23313418 gfp_flags |= GFP_DMA32;
2332
- itt = (void *)__get_free_pages(gfp_flags, get_order(sz));
3419
+ itt = (void *)__get_free_pages(gfp_flags, get_order(sz));
3420
+ } else {
3421
+ itt = kzalloc_node(sz, gfp_flags, its->numa_node);
3422
+ }
3423
+
23333424 if (alloc_lpis) {
23343425 lpi_map = its_lpi_alloc(nvecs, &lpi_base, &nr_lpis);
23353426 if (lpi_map)
....@@ -2343,7 +3434,13 @@
23433434
23443435 if (!dev || !itt || !col_map || (!lpi_map && alloc_lpis)) {
23453436 kfree(dev);
2346
- free_pages((unsigned long)itt, get_order(sz));
3437
+
3438
+ if (of_machine_is_compatible("rockchip,rk3568") ||
3439
+ of_machine_is_compatible("rockchip,rk3566"))
3440
+ free_pages((unsigned long)itt, get_order(sz));
3441
+ else
3442
+ kfree(itt);
3443
+
23473444 kfree(lpi_map);
23483445 kfree(col_map);
23493446 return NULL;
....@@ -2359,7 +3456,7 @@
23593456 dev->event_map.col_map = col_map;
23603457 dev->event_map.lpi_base = lpi_base;
23613458 dev->event_map.nr_lpis = nr_lpis;
2362
- mutex_init(&dev->event_map.vlpi_lock);
3459
+ raw_spin_lock_init(&dev->event_map.vlpi_lock);
23633460 dev->device_id = dev_id;
23643461 INIT_LIST_HEAD(&dev->entry);
23653462
....@@ -2380,7 +3477,14 @@
23803477 raw_spin_lock_irqsave(&its_dev->its->lock, flags);
23813478 list_del(&its_dev->entry);
23823479 raw_spin_unlock_irqrestore(&its_dev->its->lock, flags);
2383
- free_pages((unsigned long)its_dev->itt, get_order(its_dev->itt_sz));
3480
+ kfree(its_dev->event_map.col_map);
3481
+
3482
+ if (of_machine_is_compatible("rockchip,rk3568") ||
3483
+ of_machine_is_compatible("rockchip,rk3566"))
3484
+ free_pages((unsigned long)its_dev->itt, get_order(its_dev->itt_sz));
3485
+ else
3486
+ kfree(its_dev->itt);
3487
+
23843488 kfree(its_dev);
23853489 }
23863490
....@@ -2388,6 +3492,7 @@
23883492 {
23893493 int idx;
23903494
3495
+ /* Find a free LPI region in lpi_map and allocate them. */
23913496 idx = bitmap_find_free_region(dev->event_map.lpi_map,
23923497 dev->event_map.nr_lpis,
23933498 get_count_order(nvecs));
....@@ -2395,7 +3500,6 @@
23953500 return -ENOSPC;
23963501
23973502 *hwirq = dev->event_map.lpi_base + idx;
2398
- set_bit(idx, dev->event_map.lpi_map);
23993503
24003504 return 0;
24013505 }
....@@ -2410,7 +3514,7 @@
24103514 int err = 0;
24113515
24123516 /*
2413
- * We ignore "dev" entierely, and rely on the dev_id that has
3517
+ * We ignore "dev" entirely, and rely on the dev_id that has
24143518 * been passed via the scratchpad. This limits this domain's
24153519 * usefulness to upper layers that definitely know that they
24163520 * are built on top of the ITS.
....@@ -2489,12 +3593,17 @@
24893593 {
24903594 msi_alloc_info_t *info = args;
24913595 struct its_device *its_dev = info->scratchpad[0].ptr;
3596
+ struct its_node *its = its_dev->its;
24923597 struct irq_data *irqd;
24933598 irq_hw_number_t hwirq;
24943599 int err;
24953600 int i;
24963601
24973602 err = its_alloc_device_irq(its_dev, nr_irqs, &hwirq);
3603
+ if (err)
3604
+ return err;
3605
+
3606
+ err = iommu_dma_prepare_msi(info->desc, its->get_msi_base(its_dev));
24983607 if (err)
24993608 return err;
25003609
....@@ -2521,22 +3630,13 @@
25213630 {
25223631 struct its_device *its_dev = irq_data_get_irq_chip_data(d);
25233632 u32 event = its_get_event_id(d);
2524
- const struct cpumask *cpu_mask = cpu_online_mask;
25253633 int cpu;
25263634
2527
- /* get the cpu_mask of local node */
2528
- if (its_dev->its->numa_node >= 0)
2529
- cpu_mask = cpumask_of_node(its_dev->its->numa_node);
3635
+ cpu = its_select_cpu(d, cpu_online_mask);
3636
+ if (cpu < 0 || cpu >= nr_cpu_ids)
3637
+ return -EINVAL;
25303638
2531
- /* Bind the LPI to the first possible CPU */
2532
- cpu = cpumask_first_and(cpu_mask, cpu_online_mask);
2533
- if (cpu >= nr_cpu_ids) {
2534
- if (its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144)
2535
- return -EINVAL;
2536
-
2537
- cpu = cpumask_first(cpu_online_mask);
2538
- }
2539
-
3639
+ its_inc_lpi_count(d, cpu);
25403640 its_dev->event_map.col_map[event] = cpu;
25413641 irq_data_update_effective_affinity(d, cpumask_of(cpu));
25423642
....@@ -2551,6 +3651,7 @@
25513651 struct its_device *its_dev = irq_data_get_irq_chip_data(d);
25523652 u32 event = its_get_event_id(d);
25533653
3654
+ its_dec_lpi_count(d, its_dev->event_map.col_map[event]);
25543655 /* Stop the delivery of interrupts */
25553656 its_send_discard(its_dev, event);
25563657 }
....@@ -2586,7 +3687,6 @@
25863687 its_lpi_free(its_dev->event_map.lpi_map,
25873688 its_dev->event_map.lpi_base,
25883689 its_dev->event_map.nr_lpis);
2589
- kfree(its_dev->event_map.col_map);
25903690
25913691 /* Unmap device/itt */
25923692 its_send_mapd(its_dev, 0);
....@@ -2608,7 +3708,7 @@
26083708 /*
26093709 * This is insane.
26103710 *
2611
- * If a GICv4 doesn't implement Direct LPIs (which is extremely
3711
+ * If a GICv4.0 doesn't implement Direct LPIs (which is extremely
26123712 * likely), the only way to perform an invalidate is to use a fake
26133713 * device to issue an INV command, implying that the LPI has first
26143714 * been mapped to some event on that device. Since this is not exactly
....@@ -2616,9 +3716,20 @@
26163716 * only issue an UNMAP if we're short on available slots.
26173717 *
26183718 * Broken by design(tm).
3719
+ *
3720
+ * GICv4.1, on the other hand, mandates that we're able to invalidate
3721
+ * by writing to a MMIO register. It doesn't implement the whole of
3722
+ * DirectLPI, but that's good enough. And most of the time, we don't
3723
+ * even have to invalidate anything, as the redistributor can be told
3724
+ * whether to generate a doorbell or not (we thus leave it enabled,
3725
+ * always).
26193726 */
26203727 static void its_vpe_db_proxy_unmap_locked(struct its_vpe *vpe)
26213728 {
3729
+ /* GICv4.1 doesn't use a proxy, so nothing to do here */
3730
+ if (gic_rdists->has_rvpeid)
3731
+ return;
3732
+
26223733 /* Already unmapped? */
26233734 if (vpe->vpe_proxy_event == -1)
26243735 return;
....@@ -2641,6 +3752,10 @@
26413752
26423753 static void its_vpe_db_proxy_unmap(struct its_vpe *vpe)
26433754 {
3755
+ /* GICv4.1 doesn't use a proxy, so nothing to do here */
3756
+ if (gic_rdists->has_rvpeid)
3757
+ return;
3758
+
26443759 if (!gic_rdists->has_direct_lpi) {
26453760 unsigned long flags;
26463761
....@@ -2652,6 +3767,10 @@
26523767
26533768 static void its_vpe_db_proxy_map_locked(struct its_vpe *vpe)
26543769 {
3770
+ /* GICv4.1 doesn't use a proxy, so nothing to do here */
3771
+ if (gic_rdists->has_rvpeid)
3772
+ return;
3773
+
26553774 /* Already mapped? */
26563775 if (vpe->vpe_proxy_event != -1)
26573776 return;
....@@ -2674,13 +3793,16 @@
26743793 unsigned long flags;
26753794 struct its_collection *target_col;
26763795
3796
+ /* GICv4.1 doesn't use a proxy, so nothing to do here */
3797
+ if (gic_rdists->has_rvpeid)
3798
+ return;
3799
+
26773800 if (gic_rdists->has_direct_lpi) {
26783801 void __iomem *rdbase;
26793802
26803803 rdbase = per_cpu_ptr(gic_rdists->rdist, from)->rd_base;
26813804 gic_write_lpir(vpe->vpe_db_lpi, rdbase + GICR_CLRLPIR);
2682
- while (gic_read_lpir(rdbase + GICR_SYNCR) & 1)
2683
- cpu_relax();
3805
+ wait_for_syncr(rdbase);
26843806
26853807 return;
26863808 }
....@@ -2701,25 +3823,58 @@
27013823 bool force)
27023824 {
27033825 struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
2704
- int cpu = cpumask_first(mask_val);
3826
+ int from, cpu = cpumask_first(mask_val);
3827
+ unsigned long flags;
27053828
27063829 /*
27073830 * Changing affinity is mega expensive, so let's be as lazy as
27083831 * we can and only do it if we really have to. Also, if mapped
27093832 * into the proxy device, we need to move the doorbell
27103833 * interrupt to its new location.
3834
+ *
3835
+ * Another thing is that changing the affinity of a vPE affects
3836
+ * *other interrupts* such as all the vLPIs that are routed to
3837
+ * this vPE. This means that the irq_desc lock is not enough to
3838
+ * protect us, and that we must ensure nobody samples vpe->col_idx
3839
+ * during the update, hence the lock below which must also be
3840
+ * taken on any vLPI handling path that evaluates vpe->col_idx.
27113841 */
2712
- if (vpe->col_idx != cpu) {
2713
- int from = vpe->col_idx;
3842
+ from = vpe_to_cpuid_lock(vpe, &flags);
3843
+ if (from == cpu)
3844
+ goto out;
27143845
2715
- vpe->col_idx = cpu;
2716
- its_send_vmovp(vpe);
2717
- its_vpe_db_proxy_move(vpe, from, cpu);
2718
- }
3846
+ vpe->col_idx = cpu;
27193847
3848
+ /*
3849
+ * GICv4.1 allows us to skip VMOVP if moving to a cpu whose RD
3850
+ * is sharing its VPE table with the current one.
3851
+ */
3852
+ if (gic_data_rdist_cpu(cpu)->vpe_table_mask &&
3853
+ cpumask_test_cpu(from, gic_data_rdist_cpu(cpu)->vpe_table_mask))
3854
+ goto out;
3855
+
3856
+ its_send_vmovp(vpe);
3857
+ its_vpe_db_proxy_move(vpe, from, cpu);
3858
+
3859
+out:
27203860 irq_data_update_effective_affinity(d, cpumask_of(cpu));
3861
+ vpe_to_cpuid_unlock(vpe, flags);
27213862
27223863 return IRQ_SET_MASK_OK_DONE;
3864
+}
3865
+
3866
+static void its_wait_vpt_parse_complete(void)
3867
+{
3868
+ void __iomem *vlpi_base = gic_data_rdist_vlpi_base();
3869
+ u64 val;
3870
+
3871
+ if (!gic_rdists->has_vpend_valid_dirty)
3872
+ return;
3873
+
3874
+ WARN_ON_ONCE(readq_relaxed_poll_timeout_atomic(vlpi_base + GICR_VPENDBASER,
3875
+ val,
3876
+ !(val & GICR_VPENDBASER_Dirty),
3877
+ 10, 500));
27233878 }
27243879
27253880 static void its_vpe_schedule(struct its_vpe *vpe)
....@@ -2733,12 +3888,12 @@
27333888 val |= (LPI_NRBITS - 1) & GICR_VPROPBASER_IDBITS_MASK;
27343889 val |= GICR_VPROPBASER_RaWb;
27353890 val |= GICR_VPROPBASER_InnerShareable;
2736
- gits_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER);
3891
+ gicr_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER);
27373892
27383893 val = virt_to_phys(page_address(vpe->vpt_page)) &
27393894 GENMASK_ULL(51, 16);
27403895 val |= GICR_VPENDBASER_RaWaWb;
2741
- val |= GICR_VPENDBASER_NonShareable;
3896
+ val |= GICR_VPENDBASER_InnerShareable;
27423897 /*
27433898 * There is no good way of finding out if the pending table is
27443899 * empty as we can race against the doorbell interrupt very
....@@ -2751,7 +3906,7 @@
27513906 val |= GICR_VPENDBASER_PendingLast;
27523907 val |= vpe->idai ? GICR_VPENDBASER_IDAI : 0;
27533908 val |= GICR_VPENDBASER_Valid;
2754
- gits_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER);
3909
+ gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER);
27553910 }
27563911
27573912 static void its_vpe_deschedule(struct its_vpe *vpe)
....@@ -2759,16 +3914,10 @@
27593914 void __iomem *vlpi_base = gic_data_rdist_vlpi_base();
27603915 u64 val;
27613916
2762
- val = its_clear_vpend_valid(vlpi_base);
3917
+ val = its_clear_vpend_valid(vlpi_base, 0, 0);
27633918
2764
- if (unlikely(val & GICR_VPENDBASER_Dirty)) {
2765
- pr_err_ratelimited("ITS virtual pending table not cleaning\n");
2766
- vpe->idai = false;
2767
- vpe->pending_last = true;
2768
- } else {
2769
- vpe->idai = !!(val & GICR_VPENDBASER_IDAI);
2770
- vpe->pending_last = !!(val & GICR_VPENDBASER_PendingLast);
2771
- }
3919
+ vpe->idai = !!(val & GICR_VPENDBASER_IDAI);
3920
+ vpe->pending_last = !!(val & GICR_VPENDBASER_PendingLast);
27723921 }
27733922
27743923 static void its_vpe_invall(struct its_vpe *vpe)
....@@ -2776,7 +3925,7 @@
27763925 struct its_node *its;
27773926
27783927 list_for_each_entry(its, &its_nodes, entry) {
2779
- if (!its->is_v4)
3928
+ if (!is_v4(its))
27803929 continue;
27813930
27823931 if (its_list_map && !vpe->its_vm->vlpi_count[its->list_nr])
....@@ -2803,6 +3952,10 @@
28033952
28043953 case DESCHEDULE_VPE:
28053954 its_vpe_deschedule(vpe);
3955
+ return 0;
3956
+
3957
+ case COMMIT_VPE:
3958
+ its_wait_vpt_parse_complete();
28063959 return 0;
28073960
28083961 case INVALL_VPE:
....@@ -2834,10 +3987,12 @@
28343987 if (gic_rdists->has_direct_lpi) {
28353988 void __iomem *rdbase;
28363989
3990
+ /* Target the redistributor this VPE is currently known on */
3991
+ raw_spin_lock(&gic_data_rdist_cpu(vpe->col_idx)->rd_lock);
28373992 rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base;
2838
- gic_write_lpir(vpe->vpe_db_lpi, rdbase + GICR_INVLPIR);
2839
- while (gic_read_lpir(rdbase + GICR_SYNCR) & 1)
2840
- cpu_relax();
3993
+ gic_write_lpir(d->parent_data->hwirq, rdbase + GICR_INVLPIR);
3994
+ wait_for_syncr(rdbase);
3995
+ raw_spin_unlock(&gic_data_rdist_cpu(vpe->col_idx)->rd_lock);
28413996 } else {
28423997 its_vpe_send_cmd(vpe, its_send_inv);
28433998 }
....@@ -2879,8 +4034,7 @@
28794034 gic_write_lpir(vpe->vpe_db_lpi, rdbase + GICR_SETLPIR);
28804035 } else {
28814036 gic_write_lpir(vpe->vpe_db_lpi, rdbase + GICR_CLRLPIR);
2882
- while (gic_read_lpir(rdbase + GICR_SYNCR) & 1)
2883
- cpu_relax();
4037
+ wait_for_syncr(rdbase);
28844038 }
28854039 } else {
28864040 if (state)
....@@ -2906,6 +4060,375 @@
29064060 .irq_retrigger = its_vpe_retrigger,
29074061 .irq_set_irqchip_state = its_vpe_set_irqchip_state,
29084062 .irq_set_vcpu_affinity = its_vpe_set_vcpu_affinity,
4063
+};
4064
+
4065
+static struct its_node *find_4_1_its(void)
4066
+{
4067
+ static struct its_node *its = NULL;
4068
+
4069
+ if (!its) {
4070
+ list_for_each_entry(its, &its_nodes, entry) {
4071
+ if (is_v4_1(its))
4072
+ return its;
4073
+ }
4074
+
4075
+ /* Oops? */
4076
+ its = NULL;
4077
+ }
4078
+
4079
+ return its;
4080
+}
4081
+
4082
+static void its_vpe_4_1_send_inv(struct irq_data *d)
4083
+{
4084
+ struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
4085
+ struct its_node *its;
4086
+
4087
+ /*
4088
+ * GICv4.1 wants doorbells to be invalidated using the
4089
+ * INVDB command in order to be broadcast to all RDs. Send
4090
+ * it to the first valid ITS, and let the HW do its magic.
4091
+ */
4092
+ its = find_4_1_its();
4093
+ if (its)
4094
+ its_send_invdb(its, vpe);
4095
+}
4096
+
4097
+static void its_vpe_4_1_mask_irq(struct irq_data *d)
4098
+{
4099
+ lpi_write_config(d->parent_data, LPI_PROP_ENABLED, 0);
4100
+ its_vpe_4_1_send_inv(d);
4101
+}
4102
+
4103
+static void its_vpe_4_1_unmask_irq(struct irq_data *d)
4104
+{
4105
+ lpi_write_config(d->parent_data, 0, LPI_PROP_ENABLED);
4106
+ its_vpe_4_1_send_inv(d);
4107
+}
4108
+
4109
+static void its_vpe_4_1_schedule(struct its_vpe *vpe,
4110
+ struct its_cmd_info *info)
4111
+{
4112
+ void __iomem *vlpi_base = gic_data_rdist_vlpi_base();
4113
+ u64 val = 0;
4114
+
4115
+ /* Schedule the VPE */
4116
+ val |= GICR_VPENDBASER_Valid;
4117
+ val |= info->g0en ? GICR_VPENDBASER_4_1_VGRP0EN : 0;
4118
+ val |= info->g1en ? GICR_VPENDBASER_4_1_VGRP1EN : 0;
4119
+ val |= FIELD_PREP(GICR_VPENDBASER_4_1_VPEID, vpe->vpe_id);
4120
+
4121
+ gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER);
4122
+}
4123
+
4124
+static void its_vpe_4_1_deschedule(struct its_vpe *vpe,
4125
+ struct its_cmd_info *info)
4126
+{
4127
+ void __iomem *vlpi_base = gic_data_rdist_vlpi_base();
4128
+ u64 val;
4129
+
4130
+ if (info->req_db) {
4131
+ unsigned long flags;
4132
+
4133
+ /*
4134
+ * vPE is going to block: make the vPE non-resident with
4135
+ * PendingLast clear and DB set. The GIC guarantees that if
4136
+ * we read-back PendingLast clear, then a doorbell will be
4137
+ * delivered when an interrupt comes.
4138
+ *
4139
+ * Note the locking to deal with the concurrent update of
4140
+ * pending_last from the doorbell interrupt handler that can
4141
+ * run concurrently.
4142
+ */
4143
+ raw_spin_lock_irqsave(&vpe->vpe_lock, flags);
4144
+ val = its_clear_vpend_valid(vlpi_base,
4145
+ GICR_VPENDBASER_PendingLast,
4146
+ GICR_VPENDBASER_4_1_DB);
4147
+ vpe->pending_last = !!(val & GICR_VPENDBASER_PendingLast);
4148
+ raw_spin_unlock_irqrestore(&vpe->vpe_lock, flags);
4149
+ } else {
4150
+ /*
4151
+ * We're not blocking, so just make the vPE non-resident
4152
+ * with PendingLast set, indicating that we'll be back.
4153
+ */
4154
+ val = its_clear_vpend_valid(vlpi_base,
4155
+ 0,
4156
+ GICR_VPENDBASER_PendingLast);
4157
+ vpe->pending_last = true;
4158
+ }
4159
+}
4160
+
4161
+static void its_vpe_4_1_invall(struct its_vpe *vpe)
4162
+{
4163
+ void __iomem *rdbase;
4164
+ unsigned long flags;
4165
+ u64 val;
4166
+ int cpu;
4167
+
4168
+ val = GICR_INVALLR_V;
4169
+ val |= FIELD_PREP(GICR_INVALLR_VPEID, vpe->vpe_id);
4170
+
4171
+ /* Target the redistributor this vPE is currently known on */
4172
+ cpu = vpe_to_cpuid_lock(vpe, &flags);
4173
+ raw_spin_lock(&gic_data_rdist_cpu(cpu)->rd_lock);
4174
+ rdbase = per_cpu_ptr(gic_rdists->rdist, cpu)->rd_base;
4175
+ gic_write_lpir(val, rdbase + GICR_INVALLR);
4176
+
4177
+ wait_for_syncr(rdbase);
4178
+ raw_spin_unlock(&gic_data_rdist_cpu(cpu)->rd_lock);
4179
+ vpe_to_cpuid_unlock(vpe, flags);
4180
+}
4181
+
4182
+static int its_vpe_4_1_set_vcpu_affinity(struct irq_data *d, void *vcpu_info)
4183
+{
4184
+ struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
4185
+ struct its_cmd_info *info = vcpu_info;
4186
+
4187
+ switch (info->cmd_type) {
4188
+ case SCHEDULE_VPE:
4189
+ its_vpe_4_1_schedule(vpe, info);
4190
+ return 0;
4191
+
4192
+ case DESCHEDULE_VPE:
4193
+ its_vpe_4_1_deschedule(vpe, info);
4194
+ return 0;
4195
+
4196
+ case COMMIT_VPE:
4197
+ its_wait_vpt_parse_complete();
4198
+ return 0;
4199
+
4200
+ case INVALL_VPE:
4201
+ its_vpe_4_1_invall(vpe);
4202
+ return 0;
4203
+
4204
+ default:
4205
+ return -EINVAL;
4206
+ }
4207
+}
4208
+
4209
+static struct irq_chip its_vpe_4_1_irq_chip = {
4210
+ .name = "GICv4.1-vpe",
4211
+ .irq_mask = its_vpe_4_1_mask_irq,
4212
+ .irq_unmask = its_vpe_4_1_unmask_irq,
4213
+ .irq_eoi = irq_chip_eoi_parent,
4214
+ .irq_set_affinity = its_vpe_set_affinity,
4215
+ .irq_set_vcpu_affinity = its_vpe_4_1_set_vcpu_affinity,
4216
+};
4217
+
4218
+static void its_configure_sgi(struct irq_data *d, bool clear)
4219
+{
4220
+ struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
4221
+ struct its_cmd_desc desc;
4222
+
4223
+ desc.its_vsgi_cmd.vpe = vpe;
4224
+ desc.its_vsgi_cmd.sgi = d->hwirq;
4225
+ desc.its_vsgi_cmd.priority = vpe->sgi_config[d->hwirq].priority;
4226
+ desc.its_vsgi_cmd.enable = vpe->sgi_config[d->hwirq].enabled;
4227
+ desc.its_vsgi_cmd.group = vpe->sgi_config[d->hwirq].group;
4228
+ desc.its_vsgi_cmd.clear = clear;
4229
+
4230
+ /*
4231
+ * GICv4.1 allows us to send VSGI commands to any ITS as long as the
4232
+ * destination VPE is mapped there. Since we map them eagerly at
4233
+ * activation time, we're pretty sure the first GICv4.1 ITS will do.
4234
+ */
4235
+ its_send_single_vcommand(find_4_1_its(), its_build_vsgi_cmd, &desc);
4236
+}
4237
+
4238
+static void its_sgi_mask_irq(struct irq_data *d)
4239
+{
4240
+ struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
4241
+
4242
+ vpe->sgi_config[d->hwirq].enabled = false;
4243
+ its_configure_sgi(d, false);
4244
+}
4245
+
4246
+static void its_sgi_unmask_irq(struct irq_data *d)
4247
+{
4248
+ struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
4249
+
4250
+ vpe->sgi_config[d->hwirq].enabled = true;
4251
+ its_configure_sgi(d, false);
4252
+}
4253
+
4254
+static int its_sgi_set_affinity(struct irq_data *d,
4255
+ const struct cpumask *mask_val,
4256
+ bool force)
4257
+{
4258
+ /*
4259
+ * There is no notion of affinity for virtual SGIs, at least
4260
+ * not on the host (since they can only be targetting a vPE).
4261
+ * Tell the kernel we've done whatever it asked for.
4262
+ */
4263
+ irq_data_update_effective_affinity(d, mask_val);
4264
+ return IRQ_SET_MASK_OK;
4265
+}
4266
+
4267
+static int its_sgi_set_irqchip_state(struct irq_data *d,
4268
+ enum irqchip_irq_state which,
4269
+ bool state)
4270
+{
4271
+ if (which != IRQCHIP_STATE_PENDING)
4272
+ return -EINVAL;
4273
+
4274
+ if (state) {
4275
+ struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
4276
+ struct its_node *its = find_4_1_its();
4277
+ u64 val;
4278
+
4279
+ val = FIELD_PREP(GITS_SGIR_VPEID, vpe->vpe_id);
4280
+ val |= FIELD_PREP(GITS_SGIR_VINTID, d->hwirq);
4281
+ writeq_relaxed(val, its->sgir_base + GITS_SGIR - SZ_128K);
4282
+ } else {
4283
+ its_configure_sgi(d, true);
4284
+ }
4285
+
4286
+ return 0;
4287
+}
4288
+
4289
+static int its_sgi_get_irqchip_state(struct irq_data *d,
4290
+ enum irqchip_irq_state which, bool *val)
4291
+{
4292
+ struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
4293
+ void __iomem *base;
4294
+ unsigned long flags;
4295
+ u32 count = 1000000; /* 1s! */
4296
+ u32 status;
4297
+ int cpu;
4298
+
4299
+ if (which != IRQCHIP_STATE_PENDING)
4300
+ return -EINVAL;
4301
+
4302
+ /*
4303
+ * Locking galore! We can race against two different events:
4304
+ *
4305
+ * - Concurent vPE affinity change: we must make sure it cannot
4306
+ * happen, or we'll talk to the wrong redistributor. This is
4307
+ * identical to what happens with vLPIs.
4308
+ *
4309
+ * - Concurrent VSGIPENDR access: As it involves accessing two
4310
+ * MMIO registers, this must be made atomic one way or another.
4311
+ */
4312
+ cpu = vpe_to_cpuid_lock(vpe, &flags);
4313
+ raw_spin_lock(&gic_data_rdist_cpu(cpu)->rd_lock);
4314
+ base = gic_data_rdist_cpu(cpu)->rd_base + SZ_128K;
4315
+ writel_relaxed(vpe->vpe_id, base + GICR_VSGIR);
4316
+ do {
4317
+ status = readl_relaxed(base + GICR_VSGIPENDR);
4318
+ if (!(status & GICR_VSGIPENDR_BUSY))
4319
+ goto out;
4320
+
4321
+ count--;
4322
+ if (!count) {
4323
+ pr_err_ratelimited("Unable to get SGI status\n");
4324
+ goto out;
4325
+ }
4326
+ cpu_relax();
4327
+ udelay(1);
4328
+ } while (count);
4329
+
4330
+out:
4331
+ raw_spin_unlock(&gic_data_rdist_cpu(cpu)->rd_lock);
4332
+ vpe_to_cpuid_unlock(vpe, flags);
4333
+
4334
+ if (!count)
4335
+ return -ENXIO;
4336
+
4337
+ *val = !!(status & (1 << d->hwirq));
4338
+
4339
+ return 0;
4340
+}
4341
+
4342
+static int its_sgi_set_vcpu_affinity(struct irq_data *d, void *vcpu_info)
4343
+{
4344
+ struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
4345
+ struct its_cmd_info *info = vcpu_info;
4346
+
4347
+ switch (info->cmd_type) {
4348
+ case PROP_UPDATE_VSGI:
4349
+ vpe->sgi_config[d->hwirq].priority = info->priority;
4350
+ vpe->sgi_config[d->hwirq].group = info->group;
4351
+ its_configure_sgi(d, false);
4352
+ return 0;
4353
+
4354
+ default:
4355
+ return -EINVAL;
4356
+ }
4357
+}
4358
+
4359
+static struct irq_chip its_sgi_irq_chip = {
4360
+ .name = "GICv4.1-sgi",
4361
+ .irq_mask = its_sgi_mask_irq,
4362
+ .irq_unmask = its_sgi_unmask_irq,
4363
+ .irq_set_affinity = its_sgi_set_affinity,
4364
+ .irq_set_irqchip_state = its_sgi_set_irqchip_state,
4365
+ .irq_get_irqchip_state = its_sgi_get_irqchip_state,
4366
+ .irq_set_vcpu_affinity = its_sgi_set_vcpu_affinity,
4367
+};
4368
+
4369
+static int its_sgi_irq_domain_alloc(struct irq_domain *domain,
4370
+ unsigned int virq, unsigned int nr_irqs,
4371
+ void *args)
4372
+{
4373
+ struct its_vpe *vpe = args;
4374
+ int i;
4375
+
4376
+ /* Yes, we do want 16 SGIs */
4377
+ WARN_ON(nr_irqs != 16);
4378
+
4379
+ for (i = 0; i < 16; i++) {
4380
+ vpe->sgi_config[i].priority = 0;
4381
+ vpe->sgi_config[i].enabled = false;
4382
+ vpe->sgi_config[i].group = false;
4383
+
4384
+ irq_domain_set_hwirq_and_chip(domain, virq + i, i,
4385
+ &its_sgi_irq_chip, vpe);
4386
+ irq_set_status_flags(virq + i, IRQ_DISABLE_UNLAZY);
4387
+ }
4388
+
4389
+ return 0;
4390
+}
4391
+
4392
+static void its_sgi_irq_domain_free(struct irq_domain *domain,
4393
+ unsigned int virq,
4394
+ unsigned int nr_irqs)
4395
+{
4396
+ /* Nothing to do */
4397
+}
4398
+
4399
+static int its_sgi_irq_domain_activate(struct irq_domain *domain,
4400
+ struct irq_data *d, bool reserve)
4401
+{
4402
+ /* Write out the initial SGI configuration */
4403
+ its_configure_sgi(d, false);
4404
+ return 0;
4405
+}
4406
+
4407
+static void its_sgi_irq_domain_deactivate(struct irq_domain *domain,
4408
+ struct irq_data *d)
4409
+{
4410
+ struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
4411
+
4412
+ /*
4413
+ * The VSGI command is awkward:
4414
+ *
4415
+ * - To change the configuration, CLEAR must be set to false,
4416
+ * leaving the pending bit unchanged.
4417
+ * - To clear the pending bit, CLEAR must be set to true, leaving
4418
+ * the configuration unchanged.
4419
+ *
4420
+ * You just can't do both at once, hence the two commands below.
4421
+ */
4422
+ vpe->sgi_config[d->hwirq].enabled = false;
4423
+ its_configure_sgi(d, false);
4424
+ its_configure_sgi(d, true);
4425
+}
4426
+
4427
+static const struct irq_domain_ops its_sgi_domain_ops = {
4428
+ .alloc = its_sgi_irq_domain_alloc,
4429
+ .free = its_sgi_irq_domain_free,
4430
+ .activate = its_sgi_irq_domain_activate,
4431
+ .deactivate = its_sgi_irq_domain_deactivate,
29094432 };
29104433
29114434 static int its_vpe_id_alloc(void)
....@@ -2941,9 +4464,13 @@
29414464 return -ENOMEM;
29424465 }
29434466
4467
+ raw_spin_lock_init(&vpe->vpe_lock);
29444468 vpe->vpe_id = vpe_id;
29454469 vpe->vpt_page = vpt_page;
2946
- vpe->vpe_proxy_event = -1;
4470
+ if (gic_rdists->has_rvpeid)
4471
+ atomic_set(&vpe->vmapp_count, 0);
4472
+ else
4473
+ vpe->vpe_proxy_event = -1;
29474474
29484475 return 0;
29494476 }
....@@ -2985,6 +4512,7 @@
29854512 static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
29864513 unsigned int nr_irqs, void *args)
29874514 {
4515
+ struct irq_chip *irqchip = &its_vpe_irq_chip;
29884516 struct its_vm *vm = args;
29894517 unsigned long *bitmap;
29904518 struct page *vprop_page;
....@@ -3012,6 +4540,9 @@
30124540 vm->nr_db_lpis = nr_ids;
30134541 vm->vprop_page = vprop_page;
30144542
4543
+ if (gic_rdists->has_rvpeid)
4544
+ irqchip = &its_vpe_4_1_irq_chip;
4545
+
30154546 for (i = 0; i < nr_irqs; i++) {
30164547 vm->vpes[i]->vpe_db_lpi = base + i;
30174548 err = its_vpe_init(vm->vpes[i]);
....@@ -3022,7 +4553,7 @@
30224553 if (err)
30234554 break;
30244555 irq_domain_set_hwirq_and_chip(domain, virq + i, i,
3025
- &its_vpe_irq_chip, vm->vpes[i]);
4556
+ irqchip, vm->vpes[i]);
30264557 set_bit(i, bitmap);
30274558 }
30284559
....@@ -3043,15 +4574,19 @@
30434574 struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
30444575 struct its_node *its;
30454576
3046
- /* If we use the list map, we issue VMAPP on demand... */
3047
- if (its_list_map)
4577
+ /*
4578
+ * If we use the list map, we issue VMAPP on demand... Unless
4579
+ * we're on a GICv4.1 and we eagerly map the VPE on all ITSs
4580
+ * so that VSGIs can work.
4581
+ */
4582
+ if (!gic_requires_eager_mapping())
30484583 return 0;
30494584
30504585 /* Map the VPE to the first possible CPU */
30514586 vpe->col_idx = cpumask_first(cpu_online_mask);
30524587
30534588 list_for_each_entry(its, &its_nodes, entry) {
3054
- if (!its->is_v4)
4589
+ if (!is_v4(its))
30554590 continue;
30564591
30574592 its_send_vmapp(its, vpe, true);
....@@ -3070,14 +4605,14 @@
30704605 struct its_node *its;
30714606
30724607 /*
3073
- * If we use the list map, we unmap the VPE once no VLPIs are
3074
- * associated with the VM.
4608
+ * If we use the list map on GICv4.0, we unmap the VPE once no
4609
+ * VLPIs are associated with the VM.
30754610 */
3076
- if (its_list_map)
4611
+ if (!gic_requires_eager_mapping())
30774612 return;
30784613
30794614 list_for_each_entry(its, &its_nodes, entry) {
3080
- if (!its->is_v4)
4615
+ if (!is_v4(its))
30814616 continue;
30824617
30834618 its_send_vmapp(its, vpe, false);
....@@ -3128,8 +4663,9 @@
31284663 {
31294664 struct its_node *its = data;
31304665
3131
- /* erratum 22375: only alloc 8MB table size */
3132
- its->device_ids = 0x14; /* 20 bits, 8MB */
4666
+ /* erratum 22375: only alloc 8MB table size (20 bits) */
4667
+ its->typer &= ~GITS_TYPER_DEVBITS;
4668
+ its->typer |= FIELD_PREP(GITS_TYPER_DEVBITS, 20 - 1);
31334669 its->flags |= ITS_FLAGS_WORKAROUND_CAVIUM_22375;
31344670
31354671 return true;
....@@ -3149,7 +4685,8 @@
31494685 struct its_node *its = data;
31504686
31514687 /* On QDF2400, the size of the ITE is 16Bytes */
3152
- its->ite_size = 16;
4688
+ its->typer &= ~GITS_TYPER_ITT_ENTRY_SIZE;
4689
+ its->typer |= FIELD_PREP(GITS_TYPER_ITT_ENTRY_SIZE, 16 - 1);
31534690
31544691 return true;
31554692 }
....@@ -3183,8 +4720,10 @@
31834720 its->get_msi_base = its_irq_get_msi_base_pre_its;
31844721
31854722 ids = ilog2(pre_its_window[1]) - 2;
3186
- if (its->device_ids > ids)
3187
- its->device_ids = ids;
4723
+ if (device_ids(its) > ids) {
4724
+ its->typer &= ~GITS_TYPER_DEVBITS;
4725
+ its->typer |= FIELD_PREP(GITS_TYPER_DEVBITS, ids - 1);
4726
+ }
31884727
31894728 /* the pre-ITS breaks isolation, so disable MSI remapping */
31904729 its->msi_domain_flags &= ~IRQ_DOMAIN_FLAG_MSI_REMAP;
....@@ -3411,7 +4950,7 @@
34114950 }
34124951
34134952 /* Use the last possible DevID */
3414
- devid = GENMASK(its->device_ids - 1, 0);
4953
+ devid = GENMASK(device_ids(its) - 1, 0);
34154954 vpe_proxy.dev = its_create_device(its, devid, entries, false);
34164955 if (!vpe_proxy.dev) {
34174956 kfree(vpe_proxy.vpes);
....@@ -3474,10 +5013,11 @@
34745013 void __iomem *its_base;
34755014 u32 val, ctlr;
34765015 u64 baser, tmp, typer;
5016
+ struct page *page;
34775017 int err;
34785018 gfp_t gfp_flags;
34795019
3480
- its_base = ioremap(res->start, resource_size(res));
5020
+ its_base = ioremap(res->start, SZ_64K);
34815021 if (!its_base) {
34825022 pr_warn("ITS@%pa: Unable to map ITS registers\n", &res->start);
34835023 return -ENOMEM;
....@@ -3509,12 +5049,10 @@
35095049 INIT_LIST_HEAD(&its->entry);
35105050 INIT_LIST_HEAD(&its->its_device_list);
35115051 typer = gic_read_typer(its_base + GITS_TYPER);
5052
+ its->typer = typer;
35125053 its->base = its_base;
35135054 its->phys_base = res->start;
3514
- its->ite_size = GITS_TYPER_ITT_ENTRY_SIZE(typer);
3515
- its->device_ids = GITS_TYPER_DEVBITS(typer);
3516
- its->is_v4 = !!(typer & GITS_TYPER_VLPIS);
3517
- if (its->is_v4) {
5055
+ if (is_v4(its)) {
35185056 if (!(typer & GITS_TYPER_VMOVP)) {
35195057 err = its_compute_its_list_map(res, its_base);
35205058 if (err < 0)
....@@ -3527,6 +5065,21 @@
35275065 } else {
35285066 pr_info("ITS@%pa: Single VMOVP capable\n", &res->start);
35295067 }
5068
+
5069
+ if (is_v4_1(its)) {
5070
+ u32 svpet = FIELD_GET(GITS_TYPER_SVPET, typer);
5071
+
5072
+ its->sgir_base = ioremap(res->start + SZ_128K, SZ_64K);
5073
+ if (!its->sgir_base) {
5074
+ err = -ENOMEM;
5075
+ goto out_free_its;
5076
+ }
5077
+
5078
+ its->mpidr = readl_relaxed(its_base + GITS_MPIDR);
5079
+
5080
+ pr_info("ITS@%pa: Using GICv4.1 mode %08x %08x\n",
5081
+ &res->start, its->mpidr, svpet);
5082
+ }
35305083 }
35315084
35325085 its->numa_node = numa_node;
....@@ -3534,12 +5087,13 @@
35345087 gfp_flags = GFP_KERNEL | __GFP_ZERO;
35355088 if (of_machine_is_compatible("rockchip,rk3568") || of_machine_is_compatible("rockchip,rk3566"))
35365089 gfp_flags |= GFP_DMA32;
3537
- its->cmd_base = (void *)__get_free_pages(gfp_flags,
3538
- get_order(ITS_CMD_QUEUE_SZ));
3539
- if (!its->cmd_base) {
5090
+ page = alloc_pages_node(its->numa_node, gfp_flags,
5091
+ get_order(ITS_CMD_QUEUE_SZ));
5092
+ if (!page) {
35405093 err = -ENOMEM;
3541
- goto out_free_its;
5094
+ goto out_unmap_sgir;
35425095 }
5096
+ its->cmd_base = (void *)page_address(page);
35435097 its->cmd_write = its->cmd_base;
35445098 its->fwnode_handle = handle;
35455099 its->get_msi_base = its_irq_get_msi_base;
....@@ -3564,7 +5118,10 @@
35645118 gits_write_cbaser(baser, its->base + GITS_CBASER);
35655119 tmp = gits_read_cbaser(its->base + GITS_CBASER);
35665120
3567
- if (of_machine_is_compatible("rockchip,rk3568") || of_machine_is_compatible("rockchip,rk3566"))
5121
+ if (IS_ENABLED(CONFIG_NO_GKI) &&
5122
+ (of_machine_is_compatible("rockchip,rk3568") ||
5123
+ of_machine_is_compatible("rockchip,rk3566") ||
5124
+ of_machine_is_compatible("rockchip,rk3588")))
35685125 tmp &= ~GITS_CBASER_SHAREABILITY_MASK;
35695126
35705127 if ((tmp ^ baser) & GITS_CBASER_SHAREABILITY_MASK) {
....@@ -3586,7 +5143,7 @@
35865143 gits_write_cwriter(0, its->base + GITS_CWRITER);
35875144 ctlr = readl_relaxed(its->base + GITS_CTLR);
35885145 ctlr |= GITS_CTLR_ENABLE;
3589
- if (its->is_v4)
5146
+ if (is_v4(its))
35905147 ctlr |= GITS_CTLR_ImDe;
35915148 writel_relaxed(ctlr, its->base + GITS_CTLR);
35925149
....@@ -3604,6 +5161,9 @@
36045161 its_free_tables(its);
36055162 out_free_cmd:
36065163 free_pages((unsigned long)its->cmd_base, get_order(ITS_CMD_QUEUE_SZ));
5164
+out_unmap_sgir:
5165
+ if (its->sgir_base)
5166
+ iounmap(its->sgir_base);
36075167 out_free_its:
36085168 kfree(its);
36095169 out_unmap:
....@@ -3623,16 +5183,6 @@
36235183 u64 timeout = USEC_PER_SEC;
36245184 u64 val;
36255185
3626
- /*
3627
- * If coming via a CPU hotplug event, we don't need to disable
3628
- * LPIs before trying to re-enable them. They are already
3629
- * configured and all is well in the world. Detect this case
3630
- * by checking the allocation of the pending table for the
3631
- * current CPU.
3632
- */
3633
- if (gic_data_rdist()->pend_page)
3634
- return 0;
3635
-
36365186 if (!gic_rdists_supports_plpis()) {
36375187 pr_info("CPU%d: LPIs not supported\n", smp_processor_id());
36385188 return -ENXIO;
....@@ -3642,7 +5192,21 @@
36425192 if (!(val & GICR_CTLR_ENABLE_LPIS))
36435193 return 0;
36445194
3645
- pr_warn("CPU%d: Booted with LPIs enabled, memory probably corrupted\n",
5195
+ /*
5196
+ * If coming via a CPU hotplug event, we don't need to disable
5197
+ * LPIs before trying to re-enable them. They are already
5198
+ * configured and all is well in the world.
5199
+ *
5200
+ * If running with preallocated tables, there is nothing to do.
5201
+ */
5202
+ if (gic_data_rdist()->lpi_enabled ||
5203
+ (gic_rdists->flags & RDIST_FLAGS_RD_TABLES_PREALLOCATED))
5204
+ return 0;
5205
+
5206
+ /*
5207
+ * From that point on, we only try to do some damage control.
5208
+ */
5209
+ pr_warn("GICv3: CPU%d: Booted with LPIs enabled, memory probably corrupted\n",
36465210 smp_processor_id());
36475211 add_taint(TAINT_CRAP, LOCKDEP_STILL_OK);
36485212
....@@ -3753,13 +5317,13 @@
37535317 return NUMA_NO_NODE;
37545318 }
37555319
3756
-static int __init gic_acpi_match_srat_its(struct acpi_subtable_header *header,
5320
+static int __init gic_acpi_match_srat_its(union acpi_subtable_headers *header,
37575321 const unsigned long end)
37585322 {
37595323 return 0;
37605324 }
37615325
3762
-static int __init gic_acpi_parse_srat_its(struct acpi_subtable_header *header,
5326
+static int __init gic_acpi_parse_srat_its(union acpi_subtable_headers *header,
37635327 const unsigned long end)
37645328 {
37655329 int node;
....@@ -3775,7 +5339,12 @@
37755339 return -EINVAL;
37765340 }
37775341
3778
- node = acpi_map_pxm_to_node(its_affinity->proximity_domain);
5342
+ /*
5343
+ * Note that in theory a new proximity node could be created by this
5344
+ * entry as it is an SRAT resource allocation structure.
5345
+ * We do not currently support doing so.
5346
+ */
5347
+ node = pxm_to_node(its_affinity->proximity_domain);
37795348
37805349 if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) {
37815350 pr_err("SRAT: Invalid NUMA node %d in ITS affinity\n", node);
....@@ -3826,7 +5395,7 @@
38265395 static void __init acpi_its_srat_maps_free(void) { }
38275396 #endif
38285397
3829
-static int __init gic_acpi_parse_madt_its(struct acpi_subtable_header *header,
5398
+static int __init gic_acpi_parse_madt_its(union acpi_subtable_headers *header,
38305399 const unsigned long end)
38315400 {
38325401 struct acpi_madt_generic_translator *its_entry;
....@@ -3840,7 +5409,7 @@
38405409 res.end = its_entry->base_address + ACPI_GICV3_ITS_MEM_SIZE - 1;
38415410 res.flags = IORESOURCE_MEM;
38425411
3843
- dom_handle = irq_domain_alloc_fwnode((void *)its_entry->base_address);
5412
+ dom_handle = irq_domain_alloc_fwnode(&res.start);
38445413 if (!dom_handle) {
38455414 pr_err("ITS@%pa: Unable to allocate GICv3 ITS domain token\n",
38465415 &res.start);
....@@ -3883,7 +5452,10 @@
38835452 struct device_node *of_node;
38845453 struct its_node *its;
38855454 bool has_v4 = false;
5455
+ bool has_v4_1 = false;
38865456 int err;
5457
+
5458
+ gic_rdists = rdists;
38875459
38885460 its_parent = parent_domain;
38895461 of_node = to_of_node(handle);
....@@ -3897,17 +5469,29 @@
38975469 return -ENXIO;
38985470 }
38995471
3900
- gic_rdists = rdists;
3901
- err = its_alloc_lpi_tables();
5472
+ err = allocate_lpi_tables();
39025473 if (err)
39035474 return err;
39045475
3905
- list_for_each_entry(its, &its_nodes, entry)
3906
- has_v4 |= its->is_v4;
5476
+ list_for_each_entry(its, &its_nodes, entry) {
5477
+ has_v4 |= is_v4(its);
5478
+ has_v4_1 |= is_v4_1(its);
5479
+ }
5480
+
5481
+ /* Don't bother with inconsistent systems */
5482
+ if (WARN_ON(!has_v4_1 && rdists->has_rvpeid))
5483
+ rdists->has_rvpeid = false;
39075484
39085485 if (has_v4 & rdists->has_vlpis) {
5486
+ const struct irq_domain_ops *sgi_ops;
5487
+
5488
+ if (has_v4_1)
5489
+ sgi_ops = &its_sgi_domain_ops;
5490
+ else
5491
+ sgi_ops = NULL;
5492
+
39095493 if (its_init_vpe_domain() ||
3910
- its_init_v4(parent_domain, &its_vpe_domain_ops)) {
5494
+ its_init_v4(parent_domain, &its_vpe_domain_ops, sgi_ops)) {
39115495 rdists->has_vlpis = false;
39125496 pr_err("ITS: Disabling GICv4 support\n");
39135497 }