hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/arch/powerpc/kernel/iommu.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
34 *
....@@ -6,20 +7,6 @@
67 * and Ben. Herrenschmidt, IBM Corporation
78 *
89 * Dynamic DMA mapping support, bus-independent parts.
9
- *
10
- * This program is free software; you can redistribute it and/or modify
11
- * it under the terms of the GNU General Public License as published by
12
- * the Free Software Foundation; either version 2 of the License, or
13
- * (at your option) any later version.
14
- *
15
- * This program is distributed in the hope that it will be useful,
16
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
- * GNU General Public License for more details.
19
- *
20
- * You should have received a copy of the GNU General Public License
21
- * along with this program; if not, write to the Free Software
22
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
2310 */
2411
2512
....@@ -47,6 +34,7 @@
4734 #include <asm/fadump.h>
4835 #include <asm/vio.h>
4936 #include <asm/tce.h>
37
+#include <asm/mmu_context.h>
5038
5139 #define DBG(...)
5240
....@@ -145,17 +133,28 @@
145133 return 0;
146134 }
147135
148
-static struct notifier_block fail_iommu_bus_notifier = {
136
+/*
137
+ * PCI and VIO buses need separate notifier_block structs, since they're linked
138
+ * list nodes. Sharing a notifier_block would mean that any notifiers later
139
+ * registered for PCI buses would also get called by VIO buses and vice versa.
140
+ */
141
+static struct notifier_block fail_iommu_pci_bus_notifier = {
149142 .notifier_call = fail_iommu_bus_notify
150143 };
144
+
145
+#ifdef CONFIG_IBMVIO
146
+static struct notifier_block fail_iommu_vio_bus_notifier = {
147
+ .notifier_call = fail_iommu_bus_notify
148
+};
149
+#endif
151150
152151 static int __init fail_iommu_setup(void)
153152 {
154153 #ifdef CONFIG_PCI
155
- bus_register_notifier(&pci_bus_type, &fail_iommu_bus_notifier);
154
+ bus_register_notifier(&pci_bus_type, &fail_iommu_pci_bus_notifier);
156155 #endif
157156 #ifdef CONFIG_IBMVIO
158
- bus_register_notifier(&vio_bus_type, &fail_iommu_bus_notifier);
157
+ bus_register_notifier(&vio_bus_type, &fail_iommu_vio_bus_notifier);
159158 #endif
160159
161160 return 0;
....@@ -184,7 +183,6 @@
184183 int largealloc = npages > 15;
185184 int pass = 0;
186185 unsigned long align_mask;
187
- unsigned long boundary_size;
188186 unsigned long flags;
189187 unsigned int pool_nr;
190188 struct iommu_pool *pool;
....@@ -197,11 +195,11 @@
197195 if (unlikely(npages == 0)) {
198196 if (printk_ratelimit())
199197 WARN_ON(1);
200
- return IOMMU_MAPPING_ERROR;
198
+ return DMA_MAPPING_ERROR;
201199 }
202200
203201 if (should_fail_iommu(dev))
204
- return IOMMU_MAPPING_ERROR;
202
+ return DMA_MAPPING_ERROR;
205203
206204 /*
207205 * We don't need to disable preemption here because any CPU can
....@@ -248,15 +246,9 @@
248246 }
249247 }
250248
251
- if (dev)
252
- boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
253
- 1 << tbl->it_page_shift);
254
- else
255
- boundary_size = ALIGN(1UL << 32, 1 << tbl->it_page_shift);
256
- /* 4GB boundary for iseries_hv_alloc and iseries_hv_map */
257
-
258249 n = iommu_area_alloc(tbl->it_map, limit, start, npages, tbl->it_offset,
259
- boundary_size >> tbl->it_page_shift, align_mask);
250
+ dma_get_seg_boundary_nr_pages(dev, tbl->it_page_shift),
251
+ align_mask);
260252 if (n == -1) {
261253 if (likely(pass == 0)) {
262254 /* First try the pool from the start */
....@@ -277,7 +269,7 @@
277269 } else {
278270 /* Give up */
279271 spin_unlock_irqrestore(&(pool->lock), flags);
280
- return IOMMU_MAPPING_ERROR;
272
+ return DMA_MAPPING_ERROR;
281273 }
282274 }
283275
....@@ -309,13 +301,13 @@
309301 unsigned long attrs)
310302 {
311303 unsigned long entry;
312
- dma_addr_t ret = IOMMU_MAPPING_ERROR;
304
+ dma_addr_t ret = DMA_MAPPING_ERROR;
313305 int build_fail;
314306
315307 entry = iommu_range_alloc(dev, tbl, npages, NULL, mask, align_order);
316308
317
- if (unlikely(entry == IOMMU_MAPPING_ERROR))
318
- return IOMMU_MAPPING_ERROR;
309
+ if (unlikely(entry == DMA_MAPPING_ERROR))
310
+ return DMA_MAPPING_ERROR;
319311
320312 entry += tbl->it_offset; /* Offset into real TCE table */
321313 ret = entry << tbl->it_page_shift; /* Set the return dma address */
....@@ -327,12 +319,12 @@
327319
328320 /* tbl->it_ops->set() only returns non-zero for transient errors.
329321 * Clean up the table bitmap in this case and return
330
- * IOMMU_MAPPING_ERROR. For all other errors the functionality is
322
+ * DMA_MAPPING_ERROR. For all other errors the functionality is
331323 * not altered.
332324 */
333325 if (unlikely(build_fail)) {
334326 __iommu_free(tbl, ret, npages);
335
- return IOMMU_MAPPING_ERROR;
327
+ return DMA_MAPPING_ERROR;
336328 }
337329
338330 /* Flush/invalidate TLB caches if necessary */
....@@ -477,7 +469,7 @@
477469 DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen);
478470
479471 /* Handle failure */
480
- if (unlikely(entry == IOMMU_MAPPING_ERROR)) {
472
+ if (unlikely(entry == DMA_MAPPING_ERROR)) {
481473 if (!(attrs & DMA_ATTR_NO_WARN) &&
482474 printk_ratelimit())
483475 dev_info(dev, "iommu_alloc failed, tbl %p "
....@@ -544,7 +536,7 @@
544536 */
545537 if (outcount < incount) {
546538 outs = sg_next(outs);
547
- outs->dma_address = IOMMU_MAPPING_ERROR;
539
+ outs->dma_address = DMA_MAPPING_ERROR;
548540 outs->dma_length = 0;
549541 }
550542
....@@ -562,7 +554,7 @@
562554 npages = iommu_num_pages(s->dma_address, s->dma_length,
563555 IOMMU_PAGE_SIZE(tbl));
564556 __iommu_free(tbl, vaddr, npages);
565
- s->dma_address = IOMMU_MAPPING_ERROR;
557
+ s->dma_address = DMA_MAPPING_ERROR;
566558 s->dma_length = 0;
567559 }
568560 if (s == outs)
....@@ -645,11 +637,54 @@
645637 #endif
646638 }
647639
640
+static void iommu_table_reserve_pages(struct iommu_table *tbl,
641
+ unsigned long res_start, unsigned long res_end)
642
+{
643
+ int i;
644
+
645
+ WARN_ON_ONCE(res_end < res_start);
646
+ /*
647
+ * Reserve page 0 so it will not be used for any mappings.
648
+ * This avoids buggy drivers that consider page 0 to be invalid
649
+ * to crash the machine or even lose data.
650
+ */
651
+ if (tbl->it_offset == 0)
652
+ set_bit(0, tbl->it_map);
653
+
654
+ tbl->it_reserved_start = res_start;
655
+ tbl->it_reserved_end = res_end;
656
+
657
+ /* Check if res_start..res_end isn't empty and overlaps the table */
658
+ if (res_start && res_end &&
659
+ (tbl->it_offset + tbl->it_size < res_start ||
660
+ res_end < tbl->it_offset))
661
+ return;
662
+
663
+ for (i = tbl->it_reserved_start; i < tbl->it_reserved_end; ++i)
664
+ set_bit(i - tbl->it_offset, tbl->it_map);
665
+}
666
+
667
+static void iommu_table_release_pages(struct iommu_table *tbl)
668
+{
669
+ int i;
670
+
671
+ /*
672
+ * In case we have reserved the first bit, we should not emit
673
+ * the warning below.
674
+ */
675
+ if (tbl->it_offset == 0)
676
+ clear_bit(0, tbl->it_map);
677
+
678
+ for (i = tbl->it_reserved_start; i < tbl->it_reserved_end; ++i)
679
+ clear_bit(i - tbl->it_offset, tbl->it_map);
680
+}
681
+
648682 /*
649683 * Build a iommu_table structure. This contains a bit map which
650684 * is used to manage allocation of the tce space.
651685 */
652
-struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
686
+struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid,
687
+ unsigned long res_start, unsigned long res_end)
653688 {
654689 unsigned long sz;
655690 static int welcomed = 0;
....@@ -668,13 +703,7 @@
668703 tbl->it_map = page_address(page);
669704 memset(tbl->it_map, 0, sz);
670705
671
- /*
672
- * Reserve page 0 so it will not be used for any mappings.
673
- * This avoids buggy drivers that consider page 0 to be invalid
674
- * to crash the machine or even lose data.
675
- */
676
- if (tbl->it_offset == 0)
677
- set_bit(0, tbl->it_map);
706
+ iommu_table_reserve_pages(tbl, res_start, res_end);
678707
679708 /* We only split the IOMMU table if we have 1GB or more of space */
680709 if ((tbl->it_size << tbl->it_page_shift) >= (1UL * 1024 * 1024 * 1024))
....@@ -726,12 +755,7 @@
726755 return;
727756 }
728757
729
- /*
730
- * In case we have reserved the first bit, we should not emit
731
- * the warning below.
732
- */
733
- if (tbl->it_offset == 0)
734
- clear_bit(0, tbl->it_map);
758
+ iommu_table_release_pages(tbl);
735759
736760 /* verify that table contains no entries */
737761 if (!bitmap_empty(tbl->it_map, tbl->it_size))
....@@ -776,7 +800,7 @@
776800 unsigned long mask, enum dma_data_direction direction,
777801 unsigned long attrs)
778802 {
779
- dma_addr_t dma_handle = IOMMU_MAPPING_ERROR;
803
+ dma_addr_t dma_handle = DMA_MAPPING_ERROR;
780804 void *vaddr;
781805 unsigned long uaddr;
782806 unsigned int npages, align;
....@@ -796,7 +820,7 @@
796820 dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction,
797821 mask >> tbl->it_page_shift, align,
798822 attrs);
799
- if (dma_handle == IOMMU_MAPPING_ERROR) {
823
+ if (dma_handle == DMA_MAPPING_ERROR) {
800824 if (!(attrs & DMA_ATTR_NO_WARN) &&
801825 printk_ratelimit()) {
802826 dev_info(dev, "iommu_alloc failed, tbl %p "
....@@ -868,7 +892,7 @@
868892 io_order = get_iommu_order(size, tbl);
869893 mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
870894 mask >> tbl->it_page_shift, io_order, 0);
871
- if (mapping == IOMMU_MAPPING_ERROR) {
895
+ if (mapping == DMA_MAPPING_ERROR) {
872896 free_pages((unsigned long)ret, order);
873897 return NULL;
874898 }
....@@ -993,25 +1017,32 @@
9931017 }
9941018 EXPORT_SYMBOL_GPL(iommu_tce_check_gpa);
9951019
996
-long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
997
- unsigned long *hpa, enum dma_data_direction *direction)
1020
+extern long iommu_tce_xchg_no_kill(struct mm_struct *mm,
1021
+ struct iommu_table *tbl,
1022
+ unsigned long entry, unsigned long *hpa,
1023
+ enum dma_data_direction *direction)
9981024 {
9991025 long ret;
1026
+ unsigned long size = 0;
10001027
1001
- ret = tbl->it_ops->exchange(tbl, entry, hpa, direction);
1002
-
1028
+ ret = tbl->it_ops->xchg_no_kill(tbl, entry, hpa, direction, false);
10031029 if (!ret && ((*direction == DMA_FROM_DEVICE) ||
1004
- (*direction == DMA_BIDIRECTIONAL)))
1030
+ (*direction == DMA_BIDIRECTIONAL)) &&
1031
+ !mm_iommu_is_devmem(mm, *hpa, tbl->it_page_shift,
1032
+ &size))
10051033 SetPageDirty(pfn_to_page(*hpa >> PAGE_SHIFT));
1006
-
1007
- /* if (unlikely(ret))
1008
- pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n",
1009
- __func__, hwaddr, entry << tbl->it_page_shift,
1010
- hwaddr, ret); */
10111034
10121035 return ret;
10131036 }
1014
-EXPORT_SYMBOL_GPL(iommu_tce_xchg);
1037
+EXPORT_SYMBOL_GPL(iommu_tce_xchg_no_kill);
1038
+
1039
+void iommu_tce_kill(struct iommu_table *tbl,
1040
+ unsigned long entry, unsigned long pages)
1041
+{
1042
+ if (tbl->it_ops->tce_kill)
1043
+ tbl->it_ops->tce_kill(tbl, entry, pages, false);
1044
+}
1045
+EXPORT_SYMBOL_GPL(iommu_tce_kill);
10151046
10161047 int iommu_take_ownership(struct iommu_table *tbl)
10171048 {
....@@ -1025,22 +1056,21 @@
10251056 * requires exchange() callback defined so if it is not
10261057 * implemented, we disallow taking ownership over the table.
10271058 */
1028
- if (!tbl->it_ops->exchange)
1059
+ if (!tbl->it_ops->xchg_no_kill)
10291060 return -EINVAL;
10301061
10311062 spin_lock_irqsave(&tbl->large_pool.lock, flags);
10321063 for (i = 0; i < tbl->nr_pools; i++)
10331064 spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock);
10341065
1035
- if (tbl->it_offset == 0)
1036
- clear_bit(0, tbl->it_map);
1066
+ iommu_table_release_pages(tbl);
10371067
10381068 if (!bitmap_empty(tbl->it_map, tbl->it_size)) {
10391069 pr_err("iommu_tce: it_map is not empty");
10401070 ret = -EBUSY;
1041
- /* Restore bit#0 set by iommu_init_table() */
1042
- if (tbl->it_offset == 0)
1043
- set_bit(0, tbl->it_map);
1071
+ /* Undo iommu_table_release_pages, i.e. restore bit#0, etc */
1072
+ iommu_table_reserve_pages(tbl, tbl->it_reserved_start,
1073
+ tbl->it_reserved_end);
10441074 } else {
10451075 memset(tbl->it_map, 0xff, sz);
10461076 }
....@@ -1063,9 +1093,8 @@
10631093
10641094 memset(tbl->it_map, 0, sz);
10651095
1066
- /* Restore bit#0 set by iommu_init_table() */
1067
- if (tbl->it_offset == 0)
1068
- set_bit(0, tbl->it_map);
1096
+ iommu_table_reserve_pages(tbl, tbl->it_reserved_start,
1097
+ tbl->it_reserved_end);
10691098
10701099 for (i = 0; i < tbl->nr_pools; i++)
10711100 spin_unlock(&tbl->pools[i].lock);
....@@ -1073,11 +1102,8 @@
10731102 }
10741103 EXPORT_SYMBOL_GPL(iommu_release_ownership);
10751104
1076
-int iommu_add_device(struct device *dev)
1105
+int iommu_add_device(struct iommu_table_group *table_group, struct device *dev)
10771106 {
1078
- struct iommu_table *tbl;
1079
- struct iommu_table_group_link *tgl;
1080
-
10811107 /*
10821108 * The sysfs entries should be populated before
10831109 * binding IOMMU group. If sysfs entries isn't
....@@ -1086,39 +1112,17 @@
10861112 if (!device_is_registered(dev))
10871113 return -ENOENT;
10881114
1089
- if (dev->iommu_group) {
1115
+ if (device_iommu_mapped(dev)) {
10901116 pr_debug("%s: Skipping device %s with iommu group %d\n",
10911117 __func__, dev_name(dev),
10921118 iommu_group_id(dev->iommu_group));
10931119 return -EBUSY;
10941120 }
10951121
1096
- tbl = get_iommu_table_base(dev);
1097
- if (!tbl) {
1098
- pr_debug("%s: Skipping device %s with no tbl\n",
1099
- __func__, dev_name(dev));
1100
- return 0;
1101
- }
1102
-
1103
- tgl = list_first_entry_or_null(&tbl->it_group_list,
1104
- struct iommu_table_group_link, next);
1105
- if (!tgl) {
1106
- pr_debug("%s: Skipping device %s with no group\n",
1107
- __func__, dev_name(dev));
1108
- return 0;
1109
- }
11101122 pr_debug("%s: Adding %s to iommu group %d\n",
1111
- __func__, dev_name(dev),
1112
- iommu_group_id(tgl->table_group->group));
1123
+ __func__, dev_name(dev), iommu_group_id(table_group->group));
11131124
1114
- if (PAGE_SIZE < IOMMU_PAGE_SIZE(tbl)) {
1115
- pr_err("%s: Invalid IOMMU page size %lx (%lx) on %s\n",
1116
- __func__, IOMMU_PAGE_SIZE(tbl),
1117
- PAGE_SIZE, dev_name(dev));
1118
- return -EINVAL;
1119
- }
1120
-
1121
- return iommu_group_add_device(tgl->table_group->group, dev);
1125
+ return iommu_group_add_device(table_group->group, dev);
11221126 }
11231127 EXPORT_SYMBOL_GPL(iommu_add_device);
11241128
....@@ -1129,7 +1133,7 @@
11291133 * and we needn't detach them from the associated
11301134 * IOMMU groups
11311135 */
1132
- if (!dev->iommu_group) {
1136
+ if (!device_iommu_mapped(dev)) {
11331137 pr_debug("iommu_tce: skipping device %s with no tbl\n",
11341138 dev_name(dev));
11351139 return;
....@@ -1138,31 +1142,4 @@
11381142 iommu_group_remove_device(dev);
11391143 }
11401144 EXPORT_SYMBOL_GPL(iommu_del_device);
1141
-
1142
-static int tce_iommu_bus_notifier(struct notifier_block *nb,
1143
- unsigned long action, void *data)
1144
-{
1145
- struct device *dev = data;
1146
-
1147
- switch (action) {
1148
- case BUS_NOTIFY_ADD_DEVICE:
1149
- return iommu_add_device(dev);
1150
- case BUS_NOTIFY_DEL_DEVICE:
1151
- if (dev->iommu_group)
1152
- iommu_del_device(dev);
1153
- return 0;
1154
- default:
1155
- return 0;
1156
- }
1157
-}
1158
-
1159
-static struct notifier_block tce_iommu_bus_nb = {
1160
- .notifier_call = tce_iommu_bus_notifier,
1161
-};
1162
-
1163
-int __init tce_iommu_bus_notifier_init(void)
1164
-{
1165
- bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb);
1166
- return 0;
1167
-}
11681145 #endif /* CONFIG_IOMMU_API */