forked from ~ljy/RK356X_SDK_RELEASE

hc
2023-12-06 08f87f769b595151be1afeff53e144f543faa614
kernel/arch/powerpc/platforms/pseries/iommu.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
34 *
....@@ -7,21 +8,6 @@
78 * Copyright (C) 2006 Olof Johansson <olof@lixom.net>
89 *
910 * Dynamic DMA mapping support, pSeries-specific parts, both SMP and LPAR.
10
- *
11
- *
12
- * This program is free software; you can redistribute it and/or modify
13
- * it under the terms of the GNU General Public License as published by
14
- * the Free Software Foundation; either version 2 of the License, or
15
- * (at your option) any later version.
16
- *
17
- * This program is distributed in the hope that it will be useful,
18
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20
- * GNU General Public License for more details.
21
- *
22
- * You should have received a copy of the GNU General Public License
23
- * along with this program; if not, write to the Free Software
24
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
2511 */
2612
2713 #include <linux/init.h>
....@@ -38,7 +24,7 @@
3824 #include <linux/of.h>
3925 #include <linux/iommu.h>
4026 #include <linux/rculist.h>
41
-#include <linux/locallock.h>
27
+#include <linux/local_lock.h>
4228 #include <asm/io.h>
4329 #include <asm/prom.h>
4430 #include <asm/rtas.h>
....@@ -54,11 +40,24 @@
5440
5541 #include "pseries.h"
5642
43
+enum {
44
+ DDW_QUERY_PE_DMA_WIN = 0,
45
+ DDW_CREATE_PE_DMA_WIN = 1,
46
+ DDW_REMOVE_PE_DMA_WIN = 2,
47
+
48
+ DDW_APPLICABLE_SIZE
49
+};
50
+
51
+enum {
52
+ DDW_EXT_SIZE = 0,
53
+ DDW_EXT_RESET_DMA_WIN = 1,
54
+ DDW_EXT_QUERY_OUT_SIZE = 2
55
+};
56
+
5757 static struct iommu_table_group *iommu_pseries_alloc_group(int node)
5858 {
5959 struct iommu_table_group *table_group;
6060 struct iommu_table *tbl;
61
- struct iommu_table_group_link *tgl;
6261
6362 table_group = kzalloc_node(sizeof(struct iommu_table_group), GFP_KERNEL,
6463 node);
....@@ -69,22 +68,13 @@
6968 if (!tbl)
7069 goto free_group;
7170
72
- tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
73
- node);
74
- if (!tgl)
75
- goto free_table;
76
-
7771 INIT_LIST_HEAD_RCU(&tbl->it_group_list);
7872 kref_init(&tbl->it_kref);
79
- tgl->table_group = table_group;
80
- list_add_rcu(&tgl->next, &tbl->it_group_list);
8173
8274 table_group->tables[0] = tbl;
8375
8476 return table_group;
8577
86
-free_table:
87
- kfree(tbl);
8878 free_group:
8979 kfree(table_group);
9080 return NULL;
....@@ -94,23 +84,12 @@
9484 const char *node_name)
9585 {
9686 struct iommu_table *tbl;
97
-#ifdef CONFIG_IOMMU_API
98
- struct iommu_table_group_link *tgl;
99
-#endif
10087
10188 if (!table_group)
10289 return;
10390
10491 tbl = table_group->tables[0];
10592 #ifdef CONFIG_IOMMU_API
106
- tgl = list_first_entry_or_null(&tbl->it_group_list,
107
- struct iommu_table_group_link, next);
108
-
109
- WARN_ON_ONCE(!tgl);
110
- if (tgl) {
111
- list_del_rcu(&tgl->next);
112
- kfree(tgl);
113
- }
11493 if (table_group->group) {
11594 iommu_group_put(table_group->group);
11695 BUG_ON(table_group->group);
....@@ -127,7 +106,7 @@
127106 unsigned long attrs)
128107 {
129108 u64 proto_tce;
130
- __be64 *tcep, *tces;
109
+ __be64 *tcep;
131110 u64 rpn;
132111
133112 proto_tce = TCE_PCI_READ; // Read allowed
....@@ -135,7 +114,7 @@
135114 if (direction != DMA_TO_DEVICE)
136115 proto_tce |= TCE_PCI_WRITE;
137116
138
- tces = tcep = ((__be64 *)tbl->it_base) + index;
117
+ tcep = ((__be64 *)tbl->it_base) + index;
139118
140119 while (npages--) {
141120 /* can't move this out since we might cross MEMBLOCK boundary */
....@@ -151,9 +130,9 @@
151130
152131 static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages)
153132 {
154
- __be64 *tcep, *tces;
133
+ __be64 *tcep;
155134
156
- tces = tcep = ((__be64 *)tbl->it_base) + index;
135
+ tcep = ((__be64 *)tbl->it_base) + index;
157136
158137 while (npages--)
159138 *(tcep++) = 0;
....@@ -212,8 +191,13 @@
212191 return ret;
213192 }
214193
215
-static DEFINE_PER_CPU(__be64 *, tce_page);
216
-static DEFINE_LOCAL_IRQ_LOCK(tcp_page_lock);
194
+struct tce_page {
195
+ __be64 * page;
196
+ local_lock_t lock;
197
+};
198
+static DEFINE_PER_CPU(struct tce_page, tce_page) = {
199
+ .lock = INIT_LOCAL_LOCK(lock),
200
+};
217201
218202 static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
219203 long npages, unsigned long uaddr,
....@@ -229,16 +213,16 @@
229213 int ret = 0;
230214 unsigned long flags;
231215
232
- if ((npages == 1) || !firmware_has_feature(FW_FEATURE_MULTITCE)) {
216
+ if ((npages == 1) || !firmware_has_feature(FW_FEATURE_PUT_TCE_IND)) {
233217 return tce_build_pSeriesLP(tbl->it_index, tcenum,
234218 tbl->it_page_shift, npages, uaddr,
235219 direction, attrs);
236220 }
237221
238222 /* to protect tcep and the page behind it */
239
- local_lock_irqsave(tcp_page_lock, flags);
223
+ local_lock_irqsave(&tce_page.lock, flags);
240224
241
- tcep = __this_cpu_read(tce_page);
225
+ tcep = __this_cpu_read(tce_page.page);
242226
243227 /* This is safe to do since interrupts are off when we're called
244228 * from iommu_alloc{,_sg}()
....@@ -247,12 +231,12 @@
247231 tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
248232 /* If allocation fails, fall back to the loop implementation */
249233 if (!tcep) {
250
- local_unlock_irqrestore(tcp_page_lock, flags);
234
+ local_unlock_irqrestore(&tce_page.lock, flags);
251235 return tce_build_pSeriesLP(tbl->it_index, tcenum,
252236 tbl->it_page_shift,
253237 npages, uaddr, direction, attrs);
254238 }
255
- __this_cpu_write(tce_page, tcep);
239
+ __this_cpu_write(tce_page.page, tcep);
256240 }
257241
258242 rpn = __pa(uaddr) >> TCE_SHIFT;
....@@ -282,7 +266,7 @@
282266 tcenum += limit;
283267 } while (npages > 0 && !rc);
284268
285
- local_unlock_irqrestore(tcp_page_lock, flags);
269
+ local_unlock_irqrestore(&tce_page.lock, flags);
286270
287271 if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
288272 ret = (int)rc;
....@@ -324,7 +308,7 @@
324308 {
325309 u64 rc;
326310
327
- if (!firmware_has_feature(FW_FEATURE_MULTITCE))
311
+ if (!firmware_has_feature(FW_FEATURE_STUFF_TCE))
328312 return tce_free_pSeriesLP(tbl->it_index, tcenum, npages);
329313
330314 rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages);
....@@ -372,7 +356,7 @@
372356 /* Dynamic DMA Window support */
373357 struct ddw_query_response {
374358 u32 windows_available;
375
- u32 largest_available_block;
359
+ u64 largest_available_block;
376360 u32 page_size;
377361 u32 migration_capable;
378362 };
....@@ -440,7 +424,7 @@
440424 u64 rc = 0;
441425 long l, limit;
442426
443
- if (!firmware_has_feature(FW_FEATURE_MULTITCE)) {
427
+ if (!firmware_has_feature(FW_FEATURE_PUT_TCE_IND)) {
444428 unsigned long tceshift = be32_to_cpu(maprange->tce_shift);
445429 unsigned long dmastart = (start_pfn << PAGE_SHIFT) +
446430 be64_to_cpu(maprange->dma_base);
....@@ -454,16 +438,16 @@
454438 }
455439
456440 /* to protect tcep and the page behind it */
457
- local_lock_irq(tcp_page_lock);
458
- tcep = __this_cpu_read(tce_page);
441
+ local_lock_irq(&tce_page.lock);
442
+ tcep = __this_cpu_read(tce_page.page);
459443
460444 if (!tcep) {
461445 tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
462446 if (!tcep) {
463
- local_unlock_irq(tcp_page_lock);
447
+ local_unlock_irq(&tce_page.lock);
464448 return -ENOMEM;
465449 }
466
- __this_cpu_write(tce_page, tcep);
450
+ __this_cpu_write(tce_page.page, tcep);
467451 }
468452
469453 proto_tce = TCE_PCI_READ | TCE_PCI_WRITE;
....@@ -506,7 +490,7 @@
506490
507491 /* error cleanup: caller will clear whole range */
508492
509
- local_unlock_irq(tcp_page_lock);
493
+ local_unlock_irq(&tce_page.lock);
510494 return rc;
511495 }
512496
....@@ -663,8 +647,7 @@
663647
664648 iommu_table_setparms(pci->phb, dn, tbl);
665649 tbl->it_ops = &iommu_table_pseries_ops;
666
- iommu_init_table(tbl, pci->phb->node);
667
- iommu_register_group(pci->table_group, pci_domain_nr(bus), 0);
650
+ iommu_init_table(tbl, pci->phb->node, 0, 0);
668651
669652 /* Divide the rest (1.75GB) among the children */
670653 pci->phb->dma_window_size = 0x80000000ul;
....@@ -676,7 +659,8 @@
676659
677660 #ifdef CONFIG_IOMMU_API
678661 static int tce_exchange_pseries(struct iommu_table *tbl, long index, unsigned
679
- long *tce, enum dma_data_direction *direction)
662
+ long *tce, enum dma_data_direction *direction,
663
+ bool realmode)
680664 {
681665 long rc;
682666 unsigned long ioba = (unsigned long) index << tbl->it_page_shift;
....@@ -704,7 +688,7 @@
704688 struct iommu_table_ops iommu_table_lpar_multi_ops = {
705689 .set = tce_buildmulti_pSeriesLP,
706690 #ifdef CONFIG_IOMMU_API
707
- .exchange = tce_exchange_pseries,
691
+ .xchg_no_kill = tce_exchange_pseries,
708692 #endif
709693 .clear = tce_freemulti_pSeriesLP,
710694 .get = tce_get_pSeriesLP
....@@ -745,7 +729,7 @@
745729 iommu_table_setparms_lpar(ppci->phb, pdn, tbl,
746730 ppci->table_group, dma_window);
747731 tbl->it_ops = &iommu_table_lpar_multi_ops;
748
- iommu_init_table(tbl, ppci->phb->node);
732
+ iommu_init_table(tbl, ppci->phb->node, 0, 0);
749733 iommu_register_group(ppci->table_group,
750734 pci_domain_nr(bus), 0);
751735 pr_debug(" created table: %p\n", ppci->table_group);
....@@ -774,11 +758,8 @@
774758 tbl = PCI_DN(dn)->table_group->tables[0];
775759 iommu_table_setparms(phb, dn, tbl);
776760 tbl->it_ops = &iommu_table_pseries_ops;
777
- iommu_init_table(tbl, phb->node);
778
- iommu_register_group(PCI_DN(dn)->table_group,
779
- pci_domain_nr(phb->bus), 0);
761
+ iommu_init_table(tbl, phb->node, 0, 0);
780762 set_iommu_table_base(&dev->dev, tbl);
781
- iommu_add_device(&dev->dev);
782763 return;
783764 }
784765
....@@ -789,11 +770,10 @@
789770 while (dn && PCI_DN(dn) && PCI_DN(dn)->table_group == NULL)
790771 dn = dn->parent;
791772
792
- if (dn && PCI_DN(dn)) {
773
+ if (dn && PCI_DN(dn))
793774 set_iommu_table_base(&dev->dev,
794775 PCI_DN(dn)->table_group->tables[0]);
795
- iommu_add_device(&dev->dev);
796
- } else
776
+ else
797777 printk(KERN_WARNING "iommu: Device %s has no iommu table\n",
798778 pci_name(dev));
799779 }
....@@ -810,25 +790,14 @@
810790
811791 early_param("disable_ddw", disable_ddw_setup);
812792
813
-static void remove_ddw(struct device_node *np, bool remove_prop)
793
+static void remove_dma_window(struct device_node *np, u32 *ddw_avail,
794
+ struct property *win)
814795 {
815796 struct dynamic_dma_window_prop *dwp;
816
- struct property *win64;
817
- u32 ddw_avail[3];
818797 u64 liobn;
819
- int ret = 0;
798
+ int ret;
820799
821
- ret = of_property_read_u32_array(np, "ibm,ddw-applicable",
822
- &ddw_avail[0], 3);
823
-
824
- win64 = of_find_property(np, DIRECT64_PROPNAME, NULL);
825
- if (!win64)
826
- return;
827
-
828
- if (ret || win64->length < sizeof(*dwp))
829
- goto delprop;
830
-
831
- dwp = win64->value;
800
+ dwp = win->value;
832801 liobn = (u64)be32_to_cpu(dwp->liobn);
833802
834803 /* clear the whole window, note the arg is in kernel pages */
....@@ -841,19 +810,39 @@
841810 pr_debug("%pOF successfully cleared tces in window.\n",
842811 np);
843812
844
- ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn);
813
+ ret = rtas_call(ddw_avail[DDW_REMOVE_PE_DMA_WIN], 1, 1, NULL, liobn);
845814 if (ret)
846815 pr_warn("%pOF: failed to remove direct window: rtas returned "
847816 "%d to ibm,remove-pe-dma-window(%x) %llx\n",
848
- np, ret, ddw_avail[2], liobn);
817
+ np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn);
849818 else
850819 pr_debug("%pOF: successfully removed direct window: rtas returned "
851820 "%d to ibm,remove-pe-dma-window(%x) %llx\n",
852
- np, ret, ddw_avail[2], liobn);
821
+ np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn);
822
+}
853823
854
-delprop:
855
- if (remove_prop)
856
- ret = of_remove_property(np, win64);
824
+static void remove_ddw(struct device_node *np, bool remove_prop)
825
+{
826
+ struct property *win;
827
+ u32 ddw_avail[DDW_APPLICABLE_SIZE];
828
+ int ret = 0;
829
+
830
+ ret = of_property_read_u32_array(np, "ibm,ddw-applicable",
831
+ &ddw_avail[0], DDW_APPLICABLE_SIZE);
832
+ if (ret)
833
+ return;
834
+
835
+ win = of_find_property(np, DIRECT64_PROPNAME, NULL);
836
+ if (!win)
837
+ return;
838
+
839
+ if (win->length >= sizeof(struct dynamic_dma_window_prop))
840
+ remove_dma_window(np, ddw_avail, win);
841
+
842
+ if (!remove_prop)
843
+ return;
844
+
845
+ ret = of_remove_property(np, win);
857846 if (ret)
858847 pr_warn("%pOF: failed to remove direct window property: %d\n",
859848 np, ret);
....@@ -912,14 +901,62 @@
912901 }
913902 machine_arch_initcall(pseries, find_existing_ddw_windows);
914903
904
+/**
905
+ * ddw_read_ext - Get the value of an DDW extension
906
+ * @np: device node from which the extension value is to be read.
907
+ * @extnum: index number of the extension.
908
+ * @value: pointer to return value, modified when extension is available.
909
+ *
910
+ * Checks if "ibm,ddw-extensions" exists for this node, and get the value
911
+ * on index 'extnum'.
912
+ * It can be used only to check if a property exists, passing value == NULL.
913
+ *
914
+ * Returns:
915
+ * 0 if extension successfully read
916
+ * -EINVAL if the "ibm,ddw-extensions" does not exist,
917
+ * -ENODATA if "ibm,ddw-extensions" does not have a value, and
918
+ * -EOVERFLOW if "ibm,ddw-extensions" does not contain this extension.
919
+ */
920
+static inline int ddw_read_ext(const struct device_node *np, int extnum,
921
+ u32 *value)
922
+{
923
+ static const char propname[] = "ibm,ddw-extensions";
924
+ u32 count;
925
+ int ret;
926
+
927
+ ret = of_property_read_u32_index(np, propname, DDW_EXT_SIZE, &count);
928
+ if (ret)
929
+ return ret;
930
+
931
+ if (count < extnum)
932
+ return -EOVERFLOW;
933
+
934
+ if (!value)
935
+ value = &count;
936
+
937
+ return of_property_read_u32_index(np, propname, extnum, value);
938
+}
939
+
915940 static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
916
- struct ddw_query_response *query)
941
+ struct ddw_query_response *query,
942
+ struct device_node *parent)
917943 {
918944 struct device_node *dn;
919945 struct pci_dn *pdn;
920
- u32 cfg_addr;
946
+ u32 cfg_addr, ext_query, query_out[5];
921947 u64 buid;
922
- int ret;
948
+ int ret, out_sz;
949
+
950
+ /*
951
+ * From LoPAR level 2.8, "ibm,ddw-extensions" index 3 can rule how many
952
+ * output parameters ibm,query-pe-dma-windows will have, ranging from
953
+ * 5 to 6.
954
+ */
955
+ ret = ddw_read_ext(parent, DDW_EXT_QUERY_OUT_SIZE, &ext_query);
956
+ if (!ret && ext_query == 1)
957
+ out_sz = 6;
958
+ else
959
+ out_sz = 5;
923960
924961 /*
925962 * Get the config address and phb buid of the PE window.
....@@ -932,11 +969,28 @@
932969 buid = pdn->phb->buid;
933970 cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8));
934971
935
- ret = rtas_call(ddw_avail[0], 3, 5, (u32 *)query,
936
- cfg_addr, BUID_HI(buid), BUID_LO(buid));
937
- dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x"
938
- " returned %d\n", ddw_avail[0], cfg_addr, BUID_HI(buid),
939
- BUID_LO(buid), ret);
972
+ ret = rtas_call(ddw_avail[DDW_QUERY_PE_DMA_WIN], 3, out_sz, query_out,
973
+ cfg_addr, BUID_HI(buid), BUID_LO(buid));
974
+ dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x returned %d\n",
975
+ ddw_avail[DDW_QUERY_PE_DMA_WIN], cfg_addr, BUID_HI(buid),
976
+ BUID_LO(buid), ret);
977
+
978
+ switch (out_sz) {
979
+ case 5:
980
+ query->windows_available = query_out[0];
981
+ query->largest_available_block = query_out[1];
982
+ query->page_size = query_out[2];
983
+ query->migration_capable = query_out[3];
984
+ break;
985
+ case 6:
986
+ query->windows_available = query_out[0];
987
+ query->largest_available_block = ((u64)query_out[1] << 32) |
988
+ query_out[2];
989
+ query->page_size = query_out[3];
990
+ query->migration_capable = query_out[4];
991
+ break;
992
+ }
993
+
940994 return ret;
941995 }
942996
....@@ -963,15 +1017,16 @@
9631017
9641018 do {
9651019 /* extra outputs are LIOBN and dma-addr (hi, lo) */
966
- ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create,
967
- cfg_addr, BUID_HI(buid), BUID_LO(buid),
968
- page_shift, window_shift);
1020
+ ret = rtas_call(ddw_avail[DDW_CREATE_PE_DMA_WIN], 5, 4,
1021
+ (u32 *)create, cfg_addr, BUID_HI(buid),
1022
+ BUID_LO(buid), page_shift, window_shift);
9691023 } while (rtas_busy_delay(ret));
9701024 dev_info(&dev->dev,
9711025 "ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d "
972
- "(liobn = 0x%x starting addr = %x %x)\n", ddw_avail[1],
973
- cfg_addr, BUID_HI(buid), BUID_LO(buid), page_shift,
974
- window_shift, ret, create->liobn, create->addr_hi, create->addr_lo);
1026
+ "(liobn = 0x%x starting addr = %x %x)\n",
1027
+ ddw_avail[DDW_CREATE_PE_DMA_WIN], cfg_addr, BUID_HI(buid),
1028
+ BUID_LO(buid), page_shift, window_shift, ret, create->liobn,
1029
+ create->addr_hi, create->addr_lo);
9751030
9761031 return ret;
9771032 }
....@@ -983,6 +1038,66 @@
9831038
9841039 static LIST_HEAD(failed_ddw_pdn_list);
9851040
1041
+static phys_addr_t ddw_memory_hotplug_max(void)
1042
+{
1043
+ phys_addr_t max_addr = memory_hotplug_max();
1044
+ struct device_node *memory;
1045
+
1046
+ for_each_node_by_type(memory, "memory") {
1047
+ unsigned long start, size;
1048
+ int n_mem_addr_cells, n_mem_size_cells, len;
1049
+ const __be32 *memcell_buf;
1050
+
1051
+ memcell_buf = of_get_property(memory, "reg", &len);
1052
+ if (!memcell_buf || len <= 0)
1053
+ continue;
1054
+
1055
+ n_mem_addr_cells = of_n_addr_cells(memory);
1056
+ n_mem_size_cells = of_n_size_cells(memory);
1057
+
1058
+ start = of_read_number(memcell_buf, n_mem_addr_cells);
1059
+ memcell_buf += n_mem_addr_cells;
1060
+ size = of_read_number(memcell_buf, n_mem_size_cells);
1061
+ memcell_buf += n_mem_size_cells;
1062
+
1063
+ max_addr = max_t(phys_addr_t, max_addr, start + size);
1064
+ }
1065
+
1066
+ return max_addr;
1067
+}
1068
+
1069
+/*
1070
+ * Platforms supporting the DDW option starting with LoPAR level 2.7 implement
1071
+ * ibm,ddw-extensions, which carries the rtas token for
1072
+ * ibm,reset-pe-dma-windows.
1073
+ * That rtas-call can be used to restore the default DMA window for the device.
1074
+ */
1075
+static void reset_dma_window(struct pci_dev *dev, struct device_node *par_dn)
1076
+{
1077
+ int ret;
1078
+ u32 cfg_addr, reset_dma_win;
1079
+ u64 buid;
1080
+ struct device_node *dn;
1081
+ struct pci_dn *pdn;
1082
+
1083
+ ret = ddw_read_ext(par_dn, DDW_EXT_RESET_DMA_WIN, &reset_dma_win);
1084
+ if (ret)
1085
+ return;
1086
+
1087
+ dn = pci_device_to_OF_node(dev);
1088
+ pdn = PCI_DN(dn);
1089
+ buid = pdn->phb->buid;
1090
+ cfg_addr = (pdn->busno << 16) | (pdn->devfn << 8);
1091
+
1092
+ ret = rtas_call(reset_dma_win, 3, 1, NULL, cfg_addr, BUID_HI(buid),
1093
+ BUID_LO(buid));
1094
+ if (ret)
1095
+ dev_info(&dev->dev,
1096
+ "ibm,reset-pe-dma-windows(%x) %x %x %x returned %d ",
1097
+ reset_dma_win, cfg_addr, BUID_HI(buid), BUID_LO(buid),
1098
+ ret);
1099
+}
1100
+
9861101 /*
9871102 * If the PE supports dynamic dma windows, and there is space for a table
9881103 * that can map all pages in a linear offset, then setup such a table,
....@@ -992,7 +1107,7 @@
9921107 * pdn: the parent pe node with the ibm,dma_window property
9931108 * Future: also check if we can remap the base window for our base page size
9941109 *
995
- * returns the dma offset for use by dma_set_mask
1110
+ * returns the dma offset for use by the direct mapped DMA code.
9961111 */
9971112 static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
9981113 {
....@@ -1002,11 +1117,12 @@
10021117 int page_shift;
10031118 u64 dma_addr, max_addr;
10041119 struct device_node *dn;
1005
- u32 ddw_avail[3];
1120
+ u32 ddw_avail[DDW_APPLICABLE_SIZE];
10061121 struct direct_window *window;
10071122 struct property *win64;
10081123 struct dynamic_dma_window_prop *ddwprop;
10091124 struct failed_ddw_pdn *fpdn;
1125
+ bool default_win_removed = false;
10101126
10111127 mutex_lock(&direct_window_init_mutex);
10121128
....@@ -1035,7 +1151,7 @@
10351151 * the property is actually in the parent, not the PE
10361152 */
10371153 ret = of_property_read_u32_array(pdn, "ibm,ddw-applicable",
1038
- &ddw_avail[0], 3);
1154
+ &ddw_avail[0], DDW_APPLICABLE_SIZE);
10391155 if (ret)
10401156 goto out_failed;
10411157
....@@ -1046,18 +1162,42 @@
10461162 * of page sizes: supported and supported for migrate-dma.
10471163 */
10481164 dn = pci_device_to_OF_node(dev);
1049
- ret = query_ddw(dev, ddw_avail, &query);
1165
+ ret = query_ddw(dev, ddw_avail, &query, pdn);
10501166 if (ret != 0)
10511167 goto out_failed;
10521168
1169
+ /*
1170
+ * If there is no window available, remove the default DMA window,
1171
+ * if it's present. This will make all the resources available to the
1172
+ * new DDW window.
1173
+ * If anything fails after this, we need to restore it, so also check
1174
+ * for extensions presence.
1175
+ */
10531176 if (query.windows_available == 0) {
1054
- /*
1055
- * no additional windows are available for this device.
1056
- * We might be able to reallocate the existing window,
1057
- * trading in for a larger page size.
1058
- */
1059
- dev_dbg(&dev->dev, "no free dynamic windows");
1060
- goto out_failed;
1177
+ struct property *default_win;
1178
+ int reset_win_ext;
1179
+
1180
+ default_win = of_find_property(pdn, "ibm,dma-window", NULL);
1181
+ if (!default_win)
1182
+ goto out_failed;
1183
+
1184
+ reset_win_ext = ddw_read_ext(pdn, DDW_EXT_RESET_DMA_WIN, NULL);
1185
+ if (reset_win_ext)
1186
+ goto out_failed;
1187
+
1188
+ remove_dma_window(pdn, ddw_avail, default_win);
1189
+ default_win_removed = true;
1190
+
1191
+ /* Query again, to check if the window is available */
1192
+ ret = query_ddw(dev, ddw_avail, &query, pdn);
1193
+ if (ret != 0)
1194
+ goto out_failed;
1195
+
1196
+ if (query.windows_available == 0) {
1197
+ /* no windows are available for this device. */
1198
+ dev_dbg(&dev->dev, "no free dynamic windows");
1199
+ goto out_failed;
1200
+ }
10611201 }
10621202 if (query.page_size & 4) {
10631203 page_shift = 24; /* 16MB */
....@@ -1072,9 +1212,9 @@
10721212 }
10731213 /* verify the window * number of ptes will map the partition */
10741214 /* check largest block * page size > max memory hotplug addr */
1075
- max_addr = memory_hotplug_max();
1215
+ max_addr = ddw_memory_hotplug_max();
10761216 if (query.largest_available_block < (max_addr >> page_shift)) {
1077
- dev_dbg(&dev->dev, "can't map partition max 0x%llx with %u "
1217
+ dev_dbg(&dev->dev, "can't map partition max 0x%llx with %llu "
10781218 "%llu-sized pages\n", max_addr, query.largest_available_block,
10791219 1ULL << page_shift);
10801220 goto out_failed;
....@@ -1148,6 +1288,8 @@
11481288 kfree(win64);
11491289
11501290 out_failed:
1291
+ if (default_win_removed)
1292
+ reset_dma_window(dev, pdn);
11511293
11521294 fpdn = kzalloc(sizeof(*fpdn), GFP_KERNEL);
11531295 if (!fpdn)
....@@ -1200,7 +1342,7 @@
12001342 iommu_table_setparms_lpar(pci->phb, pdn, tbl,
12011343 pci->table_group, dma_window);
12021344 tbl->it_ops = &iommu_table_lpar_multi_ops;
1203
- iommu_init_table(tbl, pci->phb->node);
1345
+ iommu_init_table(tbl, pci->phb->node, 0, 0);
12041346 iommu_register_group(pci->table_group,
12051347 pci_domain_nr(pci->phb->bus), 0);
12061348 pr_debug(" created table: %p\n", pci->table_group);
....@@ -1209,90 +1351,40 @@
12091351 }
12101352
12111353 set_iommu_table_base(&dev->dev, pci->table_group->tables[0]);
1212
- iommu_add_device(&dev->dev);
1354
+ iommu_add_device(pci->table_group, &dev->dev);
12131355 }
12141356
1215
-static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
1357
+static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask)
12161358 {
1217
- bool ddw_enabled = false;
1218
- struct device_node *pdn, *dn;
1219
- struct pci_dev *pdev;
1359
+ struct device_node *dn = pci_device_to_OF_node(pdev), *pdn;
12201360 const __be32 *dma_window = NULL;
1221
- u64 dma_offset;
1222
-
1223
- if (!dev->dma_mask)
1224
- return -EIO;
1225
-
1226
- if (!dev_is_pci(dev))
1227
- goto check_mask;
1228
-
1229
- pdev = to_pci_dev(dev);
12301361
12311362 /* only attempt to use a new window if 64-bit DMA is requested */
1232
- if (!disable_ddw && dma_mask == DMA_BIT_MASK(64)) {
1233
- dn = pci_device_to_OF_node(pdev);
1234
- dev_dbg(dev, "node is %pOF\n", dn);
1363
+ if (dma_mask < DMA_BIT_MASK(64))
1364
+ return false;
12351365
1236
- /*
1237
- * the device tree might contain the dma-window properties
1238
- * per-device and not necessarily for the bus. So we need to
1239
- * search upwards in the tree until we either hit a dma-window
1240
- * property, OR find a parent with a table already allocated.
1241
- */
1242
- for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group;
1243
- pdn = pdn->parent) {
1244
- dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
1245
- if (dma_window)
1246
- break;
1247
- }
1248
- if (pdn && PCI_DN(pdn)) {
1249
- dma_offset = enable_ddw(pdev, pdn);
1250
- if (dma_offset != 0) {
1251
- dev_info(dev, "Using 64-bit direct DMA at offset %llx\n", dma_offset);
1252
- set_dma_offset(dev, dma_offset);
1253
- set_dma_ops(dev, &dma_nommu_ops);
1254
- ddw_enabled = true;
1255
- }
1256
- }
1366
+ dev_dbg(&pdev->dev, "node is %pOF\n", dn);
1367
+
1368
+ /*
1369
+ * the device tree might contain the dma-window properties
1370
+ * per-device and not necessarily for the bus. So we need to
1371
+ * search upwards in the tree until we either hit a dma-window
1372
+ * property, OR find a parent with a table already allocated.
1373
+ */
1374
+ for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group;
1375
+ pdn = pdn->parent) {
1376
+ dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
1377
+ if (dma_window)
1378
+ break;
12571379 }
12581380
1259
- /* fall back on iommu ops */
1260
- if (!ddw_enabled && get_dma_ops(dev) != &dma_iommu_ops) {
1261
- dev_info(dev, "Restoring 32-bit DMA via iommu\n");
1262
- set_dma_ops(dev, &dma_iommu_ops);
1381
+ if (pdn && PCI_DN(pdn)) {
1382
+ pdev->dev.archdata.dma_offset = enable_ddw(pdev, pdn);
1383
+ if (pdev->dev.archdata.dma_offset)
1384
+ return true;
12631385 }
12641386
1265
-check_mask:
1266
- if (!dma_supported(dev, dma_mask))
1267
- return -EIO;
1268
-
1269
- *dev->dma_mask = dma_mask;
1270
- return 0;
1271
-}
1272
-
1273
-static u64 dma_get_required_mask_pSeriesLP(struct device *dev)
1274
-{
1275
- if (!dev->dma_mask)
1276
- return 0;
1277
-
1278
- if (!disable_ddw && dev_is_pci(dev)) {
1279
- struct pci_dev *pdev = to_pci_dev(dev);
1280
- struct device_node *dn;
1281
-
1282
- dn = pci_device_to_OF_node(pdev);
1283
-
1284
- /* search upwards for ibm,dma-window */
1285
- for (; dn && PCI_DN(dn) && !PCI_DN(dn)->table_group;
1286
- dn = dn->parent)
1287
- if (of_get_property(dn, "ibm,dma-window", NULL))
1288
- break;
1289
- /* if there is a ibm,ddw-applicable property require 64 bits */
1290
- if (dn && PCI_DN(dn) &&
1291
- of_get_property(dn, "ibm,ddw-applicable", NULL))
1292
- return DMA_BIT_MASK(64);
1293
- }
1294
-
1295
- return dma_iommu_ops.get_required_mask(dev);
1387
+ return false;
12961388 }
12971389
12981390 static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action,
....@@ -1387,8 +1479,9 @@
13871479 if (firmware_has_feature(FW_FEATURE_LPAR)) {
13881480 pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeriesLP;
13891481 pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeriesLP;
1390
- ppc_md.dma_set_mask = dma_set_mask_pSeriesLP;
1391
- ppc_md.dma_get_required_mask = dma_get_required_mask_pSeriesLP;
1482
+ if (!disable_ddw)
1483
+ pseries_pci_controller_ops.iommu_bypass_supported =
1484
+ iommu_bypass_supported_pSeriesLP;
13921485 } else {
13931486 pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeries;
13941487 pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeries;
....@@ -1405,13 +1498,38 @@
14051498 {
14061499 if (strcmp(str, "off") == 0 &&
14071500 firmware_has_feature(FW_FEATURE_LPAR) &&
1408
- firmware_has_feature(FW_FEATURE_MULTITCE)) {
1501
+ (firmware_has_feature(FW_FEATURE_PUT_TCE_IND) ||
1502
+ firmware_has_feature(FW_FEATURE_STUFF_TCE))) {
14091503 printk(KERN_INFO "Disabling MULTITCE firmware feature\n");
1410
- powerpc_firmware_features &= ~FW_FEATURE_MULTITCE;
1504
+ powerpc_firmware_features &=
1505
+ ~(FW_FEATURE_PUT_TCE_IND | FW_FEATURE_STUFF_TCE);
14111506 }
14121507 return 1;
14131508 }
14141509
14151510 __setup("multitce=", disable_multitce);
14161511
1512
+static int tce_iommu_bus_notifier(struct notifier_block *nb,
1513
+ unsigned long action, void *data)
1514
+{
1515
+ struct device *dev = data;
1516
+
1517
+ switch (action) {
1518
+ case BUS_NOTIFY_DEL_DEVICE:
1519
+ iommu_del_device(dev);
1520
+ return 0;
1521
+ default:
1522
+ return 0;
1523
+ }
1524
+}
1525
+
1526
+static struct notifier_block tce_iommu_bus_nb = {
1527
+ .notifier_call = tce_iommu_bus_notifier,
1528
+};
1529
+
1530
+static int __init tce_iommu_bus_notifier_init(void)
1531
+{
1532
+ bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb);
1533
+ return 0;
1534
+}
14171535 machine_subsys_initcall_sync(pseries, tce_iommu_bus_notifier_init);