hc
2024-01-03 2f7c68cb55ecb7331f2381deb497c27155f32faf
kernel/arch/powerpc/platforms/pseries/iommu.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
34 *
....@@ -7,21 +8,6 @@
78 * Copyright (C) 2006 Olof Johansson <olof@lixom.net>
89 *
910 * Dynamic DMA mapping support, pSeries-specific parts, both SMP and LPAR.
10
- *
11
- *
12
- * This program is free software; you can redistribute it and/or modify
13
- * it under the terms of the GNU General Public License as published by
14
- * the Free Software Foundation; either version 2 of the License, or
15
- * (at your option) any later version.
16
- *
17
- * This program is distributed in the hope that it will be useful,
18
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20
- * GNU General Public License for more details.
21
- *
22
- * You should have received a copy of the GNU General Public License
23
- * along with this program; if not, write to the Free Software
24
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
2511 */
2612
2713 #include <linux/init.h>
....@@ -53,11 +39,24 @@
5339
5440 #include "pseries.h"
5541
42
+enum {
43
+ DDW_QUERY_PE_DMA_WIN = 0,
44
+ DDW_CREATE_PE_DMA_WIN = 1,
45
+ DDW_REMOVE_PE_DMA_WIN = 2,
46
+
47
+ DDW_APPLICABLE_SIZE
48
+};
49
+
50
+enum {
51
+ DDW_EXT_SIZE = 0,
52
+ DDW_EXT_RESET_DMA_WIN = 1,
53
+ DDW_EXT_QUERY_OUT_SIZE = 2
54
+};
55
+
5656 static struct iommu_table_group *iommu_pseries_alloc_group(int node)
5757 {
5858 struct iommu_table_group *table_group;
5959 struct iommu_table *tbl;
60
- struct iommu_table_group_link *tgl;
6160
6261 table_group = kzalloc_node(sizeof(struct iommu_table_group), GFP_KERNEL,
6362 node);
....@@ -68,22 +67,13 @@
6867 if (!tbl)
6968 goto free_group;
7069
71
- tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
72
- node);
73
- if (!tgl)
74
- goto free_table;
75
-
7670 INIT_LIST_HEAD_RCU(&tbl->it_group_list);
7771 kref_init(&tbl->it_kref);
78
- tgl->table_group = table_group;
79
- list_add_rcu(&tgl->next, &tbl->it_group_list);
8072
8173 table_group->tables[0] = tbl;
8274
8375 return table_group;
8476
85
-free_table:
86
- kfree(tbl);
8777 free_group:
8878 kfree(table_group);
8979 return NULL;
....@@ -93,23 +83,12 @@
9383 const char *node_name)
9484 {
9585 struct iommu_table *tbl;
96
-#ifdef CONFIG_IOMMU_API
97
- struct iommu_table_group_link *tgl;
98
-#endif
9986
10087 if (!table_group)
10188 return;
10289
10390 tbl = table_group->tables[0];
10491 #ifdef CONFIG_IOMMU_API
105
- tgl = list_first_entry_or_null(&tbl->it_group_list,
106
- struct iommu_table_group_link, next);
107
-
108
- WARN_ON_ONCE(!tgl);
109
- if (tgl) {
110
- list_del_rcu(&tgl->next);
111
- kfree(tgl);
112
- }
11392 if (table_group->group) {
11493 iommu_group_put(table_group->group);
11594 BUG_ON(table_group->group);
....@@ -126,7 +105,7 @@
126105 unsigned long attrs)
127106 {
128107 u64 proto_tce;
129
- __be64 *tcep, *tces;
108
+ __be64 *tcep;
130109 u64 rpn;
131110
132111 proto_tce = TCE_PCI_READ; // Read allowed
....@@ -134,7 +113,7 @@
134113 if (direction != DMA_TO_DEVICE)
135114 proto_tce |= TCE_PCI_WRITE;
136115
137
- tces = tcep = ((__be64 *)tbl->it_base) + index;
116
+ tcep = ((__be64 *)tbl->it_base) + index;
138117
139118 while (npages--) {
140119 /* can't move this out since we might cross MEMBLOCK boundary */
....@@ -150,9 +129,9 @@
150129
151130 static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages)
152131 {
153
- __be64 *tcep, *tces;
132
+ __be64 *tcep;
154133
155
- tces = tcep = ((__be64 *)tbl->it_base) + index;
134
+ tcep = ((__be64 *)tbl->it_base) + index;
156135
157136 while (npages--)
158137 *(tcep++) = 0;
....@@ -227,7 +206,7 @@
227206 int ret = 0;
228207 unsigned long flags;
229208
230
- if ((npages == 1) || !firmware_has_feature(FW_FEATURE_MULTITCE)) {
209
+ if ((npages == 1) || !firmware_has_feature(FW_FEATURE_PUT_TCE_IND)) {
231210 return tce_build_pSeriesLP(tbl->it_index, tcenum,
232211 tbl->it_page_shift, npages, uaddr,
233212 direction, attrs);
....@@ -321,7 +300,7 @@
321300 {
322301 u64 rc;
323302
324
- if (!firmware_has_feature(FW_FEATURE_MULTITCE))
303
+ if (!firmware_has_feature(FW_FEATURE_STUFF_TCE))
325304 return tce_free_pSeriesLP(tbl->it_index, tcenum, npages);
326305
327306 rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages);
....@@ -369,7 +348,7 @@
369348 /* Dynamic DMA Window support */
370349 struct ddw_query_response {
371350 u32 windows_available;
372
- u32 largest_available_block;
351
+ u64 largest_available_block;
373352 u32 page_size;
374353 u32 migration_capable;
375354 };
....@@ -437,7 +416,7 @@
437416 u64 rc = 0;
438417 long l, limit;
439418
440
- if (!firmware_has_feature(FW_FEATURE_MULTITCE)) {
419
+ if (!firmware_has_feature(FW_FEATURE_PUT_TCE_IND)) {
441420 unsigned long tceshift = be32_to_cpu(maprange->tce_shift);
442421 unsigned long dmastart = (start_pfn << PAGE_SHIFT) +
443422 be64_to_cpu(maprange->dma_base);
....@@ -659,8 +638,7 @@
659638
660639 iommu_table_setparms(pci->phb, dn, tbl);
661640 tbl->it_ops = &iommu_table_pseries_ops;
662
- iommu_init_table(tbl, pci->phb->node);
663
- iommu_register_group(pci->table_group, pci_domain_nr(bus), 0);
641
+ iommu_init_table(tbl, pci->phb->node, 0, 0);
664642
665643 /* Divide the rest (1.75GB) among the children */
666644 pci->phb->dma_window_size = 0x80000000ul;
....@@ -672,7 +650,8 @@
672650
673651 #ifdef CONFIG_IOMMU_API
674652 static int tce_exchange_pseries(struct iommu_table *tbl, long index, unsigned
675
- long *tce, enum dma_data_direction *direction)
653
+ long *tce, enum dma_data_direction *direction,
654
+ bool realmode)
676655 {
677656 long rc;
678657 unsigned long ioba = (unsigned long) index << tbl->it_page_shift;
....@@ -700,7 +679,7 @@
700679 struct iommu_table_ops iommu_table_lpar_multi_ops = {
701680 .set = tce_buildmulti_pSeriesLP,
702681 #ifdef CONFIG_IOMMU_API
703
- .exchange = tce_exchange_pseries,
682
+ .xchg_no_kill = tce_exchange_pseries,
704683 #endif
705684 .clear = tce_freemulti_pSeriesLP,
706685 .get = tce_get_pSeriesLP
....@@ -741,7 +720,7 @@
741720 iommu_table_setparms_lpar(ppci->phb, pdn, tbl,
742721 ppci->table_group, dma_window);
743722 tbl->it_ops = &iommu_table_lpar_multi_ops;
744
- iommu_init_table(tbl, ppci->phb->node);
723
+ iommu_init_table(tbl, ppci->phb->node, 0, 0);
745724 iommu_register_group(ppci->table_group,
746725 pci_domain_nr(bus), 0);
747726 pr_debug(" created table: %p\n", ppci->table_group);
....@@ -770,11 +749,8 @@
770749 tbl = PCI_DN(dn)->table_group->tables[0];
771750 iommu_table_setparms(phb, dn, tbl);
772751 tbl->it_ops = &iommu_table_pseries_ops;
773
- iommu_init_table(tbl, phb->node);
774
- iommu_register_group(PCI_DN(dn)->table_group,
775
- pci_domain_nr(phb->bus), 0);
752
+ iommu_init_table(tbl, phb->node, 0, 0);
776753 set_iommu_table_base(&dev->dev, tbl);
777
- iommu_add_device(&dev->dev);
778754 return;
779755 }
780756
....@@ -785,11 +761,10 @@
785761 while (dn && PCI_DN(dn) && PCI_DN(dn)->table_group == NULL)
786762 dn = dn->parent;
787763
788
- if (dn && PCI_DN(dn)) {
764
+ if (dn && PCI_DN(dn))
789765 set_iommu_table_base(&dev->dev,
790766 PCI_DN(dn)->table_group->tables[0]);
791
- iommu_add_device(&dev->dev);
792
- } else
767
+ else
793768 printk(KERN_WARNING "iommu: Device %s has no iommu table\n",
794769 pci_name(dev));
795770 }
....@@ -806,25 +781,14 @@
806781
807782 early_param("disable_ddw", disable_ddw_setup);
808783
809
-static void remove_ddw(struct device_node *np, bool remove_prop)
784
+static void remove_dma_window(struct device_node *np, u32 *ddw_avail,
785
+ struct property *win)
810786 {
811787 struct dynamic_dma_window_prop *dwp;
812
- struct property *win64;
813
- u32 ddw_avail[3];
814788 u64 liobn;
815
- int ret = 0;
789
+ int ret;
816790
817
- ret = of_property_read_u32_array(np, "ibm,ddw-applicable",
818
- &ddw_avail[0], 3);
819
-
820
- win64 = of_find_property(np, DIRECT64_PROPNAME, NULL);
821
- if (!win64)
822
- return;
823
-
824
- if (ret || win64->length < sizeof(*dwp))
825
- goto delprop;
826
-
827
- dwp = win64->value;
791
+ dwp = win->value;
828792 liobn = (u64)be32_to_cpu(dwp->liobn);
829793
830794 /* clear the whole window, note the arg is in kernel pages */
....@@ -837,19 +801,39 @@
837801 pr_debug("%pOF successfully cleared tces in window.\n",
838802 np);
839803
840
- ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn);
804
+ ret = rtas_call(ddw_avail[DDW_REMOVE_PE_DMA_WIN], 1, 1, NULL, liobn);
841805 if (ret)
842806 pr_warn("%pOF: failed to remove direct window: rtas returned "
843807 "%d to ibm,remove-pe-dma-window(%x) %llx\n",
844
- np, ret, ddw_avail[2], liobn);
808
+ np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn);
845809 else
846810 pr_debug("%pOF: successfully removed direct window: rtas returned "
847811 "%d to ibm,remove-pe-dma-window(%x) %llx\n",
848
- np, ret, ddw_avail[2], liobn);
812
+ np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn);
813
+}
849814
850
-delprop:
851
- if (remove_prop)
852
- ret = of_remove_property(np, win64);
815
+static void remove_ddw(struct device_node *np, bool remove_prop)
816
+{
817
+ struct property *win;
818
+ u32 ddw_avail[DDW_APPLICABLE_SIZE];
819
+ int ret = 0;
820
+
821
+ ret = of_property_read_u32_array(np, "ibm,ddw-applicable",
822
+ &ddw_avail[0], DDW_APPLICABLE_SIZE);
823
+ if (ret)
824
+ return;
825
+
826
+ win = of_find_property(np, DIRECT64_PROPNAME, NULL);
827
+ if (!win)
828
+ return;
829
+
830
+ if (win->length >= sizeof(struct dynamic_dma_window_prop))
831
+ remove_dma_window(np, ddw_avail, win);
832
+
833
+ if (!remove_prop)
834
+ return;
835
+
836
+ ret = of_remove_property(np, win);
853837 if (ret)
854838 pr_warn("%pOF: failed to remove direct window property: %d\n",
855839 np, ret);
....@@ -908,14 +892,62 @@
908892 }
909893 machine_arch_initcall(pseries, find_existing_ddw_windows);
910894
895
+/**
896
+ * ddw_read_ext - Get the value of an DDW extension
897
+ * @np: device node from which the extension value is to be read.
898
+ * @extnum: index number of the extension.
899
+ * @value: pointer to return value, modified when extension is available.
900
+ *
901
+ * Checks if "ibm,ddw-extensions" exists for this node, and get the value
902
+ * on index 'extnum'.
903
+ * It can be used only to check if a property exists, passing value == NULL.
904
+ *
905
+ * Returns:
906
+ * 0 if extension successfully read
907
+ * -EINVAL if the "ibm,ddw-extensions" does not exist,
908
+ * -ENODATA if "ibm,ddw-extensions" does not have a value, and
909
+ * -EOVERFLOW if "ibm,ddw-extensions" does not contain this extension.
910
+ */
911
+static inline int ddw_read_ext(const struct device_node *np, int extnum,
912
+ u32 *value)
913
+{
914
+ static const char propname[] = "ibm,ddw-extensions";
915
+ u32 count;
916
+ int ret;
917
+
918
+ ret = of_property_read_u32_index(np, propname, DDW_EXT_SIZE, &count);
919
+ if (ret)
920
+ return ret;
921
+
922
+ if (count < extnum)
923
+ return -EOVERFLOW;
924
+
925
+ if (!value)
926
+ value = &count;
927
+
928
+ return of_property_read_u32_index(np, propname, extnum, value);
929
+}
930
+
911931 static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
912
- struct ddw_query_response *query)
932
+ struct ddw_query_response *query,
933
+ struct device_node *parent)
913934 {
914935 struct device_node *dn;
915936 struct pci_dn *pdn;
916
- u32 cfg_addr;
937
+ u32 cfg_addr, ext_query, query_out[5];
917938 u64 buid;
918
- int ret;
939
+ int ret, out_sz;
940
+
941
+ /*
942
+ * From LoPAR level 2.8, "ibm,ddw-extensions" index 3 can rule how many
943
+ * output parameters ibm,query-pe-dma-windows will have, ranging from
944
+ * 5 to 6.
945
+ */
946
+ ret = ddw_read_ext(parent, DDW_EXT_QUERY_OUT_SIZE, &ext_query);
947
+ if (!ret && ext_query == 1)
948
+ out_sz = 6;
949
+ else
950
+ out_sz = 5;
919951
920952 /*
921953 * Get the config address and phb buid of the PE window.
....@@ -928,11 +960,28 @@
928960 buid = pdn->phb->buid;
929961 cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8));
930962
931
- ret = rtas_call(ddw_avail[0], 3, 5, (u32 *)query,
932
- cfg_addr, BUID_HI(buid), BUID_LO(buid));
933
- dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x"
934
- " returned %d\n", ddw_avail[0], cfg_addr, BUID_HI(buid),
935
- BUID_LO(buid), ret);
963
+ ret = rtas_call(ddw_avail[DDW_QUERY_PE_DMA_WIN], 3, out_sz, query_out,
964
+ cfg_addr, BUID_HI(buid), BUID_LO(buid));
965
+ dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x returned %d\n",
966
+ ddw_avail[DDW_QUERY_PE_DMA_WIN], cfg_addr, BUID_HI(buid),
967
+ BUID_LO(buid), ret);
968
+
969
+ switch (out_sz) {
970
+ case 5:
971
+ query->windows_available = query_out[0];
972
+ query->largest_available_block = query_out[1];
973
+ query->page_size = query_out[2];
974
+ query->migration_capable = query_out[3];
975
+ break;
976
+ case 6:
977
+ query->windows_available = query_out[0];
978
+ query->largest_available_block = ((u64)query_out[1] << 32) |
979
+ query_out[2];
980
+ query->page_size = query_out[3];
981
+ query->migration_capable = query_out[4];
982
+ break;
983
+ }
984
+
936985 return ret;
937986 }
938987
....@@ -959,15 +1008,16 @@
9591008
9601009 do {
9611010 /* extra outputs are LIOBN and dma-addr (hi, lo) */
962
- ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create,
963
- cfg_addr, BUID_HI(buid), BUID_LO(buid),
964
- page_shift, window_shift);
1011
+ ret = rtas_call(ddw_avail[DDW_CREATE_PE_DMA_WIN], 5, 4,
1012
+ (u32 *)create, cfg_addr, BUID_HI(buid),
1013
+ BUID_LO(buid), page_shift, window_shift);
9651014 } while (rtas_busy_delay(ret));
9661015 dev_info(&dev->dev,
9671016 "ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d "
968
- "(liobn = 0x%x starting addr = %x %x)\n", ddw_avail[1],
969
- cfg_addr, BUID_HI(buid), BUID_LO(buid), page_shift,
970
- window_shift, ret, create->liobn, create->addr_hi, create->addr_lo);
1017
+ "(liobn = 0x%x starting addr = %x %x)\n",
1018
+ ddw_avail[DDW_CREATE_PE_DMA_WIN], cfg_addr, BUID_HI(buid),
1019
+ BUID_LO(buid), page_shift, window_shift, ret, create->liobn,
1020
+ create->addr_hi, create->addr_lo);
9711021
9721022 return ret;
9731023 }
....@@ -979,6 +1029,66 @@
9791029
9801030 static LIST_HEAD(failed_ddw_pdn_list);
9811031
1032
+static phys_addr_t ddw_memory_hotplug_max(void)
1033
+{
1034
+ phys_addr_t max_addr = memory_hotplug_max();
1035
+ struct device_node *memory;
1036
+
1037
+ for_each_node_by_type(memory, "memory") {
1038
+ unsigned long start, size;
1039
+ int n_mem_addr_cells, n_mem_size_cells, len;
1040
+ const __be32 *memcell_buf;
1041
+
1042
+ memcell_buf = of_get_property(memory, "reg", &len);
1043
+ if (!memcell_buf || len <= 0)
1044
+ continue;
1045
+
1046
+ n_mem_addr_cells = of_n_addr_cells(memory);
1047
+ n_mem_size_cells = of_n_size_cells(memory);
1048
+
1049
+ start = of_read_number(memcell_buf, n_mem_addr_cells);
1050
+ memcell_buf += n_mem_addr_cells;
1051
+ size = of_read_number(memcell_buf, n_mem_size_cells);
1052
+ memcell_buf += n_mem_size_cells;
1053
+
1054
+ max_addr = max_t(phys_addr_t, max_addr, start + size);
1055
+ }
1056
+
1057
+ return max_addr;
1058
+}
1059
+
1060
+/*
1061
+ * Platforms supporting the DDW option starting with LoPAR level 2.7 implement
1062
+ * ibm,ddw-extensions, which carries the rtas token for
1063
+ * ibm,reset-pe-dma-windows.
1064
+ * That rtas-call can be used to restore the default DMA window for the device.
1065
+ */
1066
+static void reset_dma_window(struct pci_dev *dev, struct device_node *par_dn)
1067
+{
1068
+ int ret;
1069
+ u32 cfg_addr, reset_dma_win;
1070
+ u64 buid;
1071
+ struct device_node *dn;
1072
+ struct pci_dn *pdn;
1073
+
1074
+ ret = ddw_read_ext(par_dn, DDW_EXT_RESET_DMA_WIN, &reset_dma_win);
1075
+ if (ret)
1076
+ return;
1077
+
1078
+ dn = pci_device_to_OF_node(dev);
1079
+ pdn = PCI_DN(dn);
1080
+ buid = pdn->phb->buid;
1081
+ cfg_addr = (pdn->busno << 16) | (pdn->devfn << 8);
1082
+
1083
+ ret = rtas_call(reset_dma_win, 3, 1, NULL, cfg_addr, BUID_HI(buid),
1084
+ BUID_LO(buid));
1085
+ if (ret)
1086
+ dev_info(&dev->dev,
1087
+ "ibm,reset-pe-dma-windows(%x) %x %x %x returned %d ",
1088
+ reset_dma_win, cfg_addr, BUID_HI(buid), BUID_LO(buid),
1089
+ ret);
1090
+}
1091
+
9821092 /*
9831093 * If the PE supports dynamic dma windows, and there is space for a table
9841094 * that can map all pages in a linear offset, then setup such a table,
....@@ -988,7 +1098,7 @@
9881098 * pdn: the parent pe node with the ibm,dma_window property
9891099 * Future: also check if we can remap the base window for our base page size
9901100 *
991
- * returns the dma offset for use by dma_set_mask
1101
+ * returns the dma offset for use by the direct mapped DMA code.
9921102 */
9931103 static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
9941104 {
....@@ -998,11 +1108,12 @@
9981108 int page_shift;
9991109 u64 dma_addr, max_addr;
10001110 struct device_node *dn;
1001
- u32 ddw_avail[3];
1111
+ u32 ddw_avail[DDW_APPLICABLE_SIZE];
10021112 struct direct_window *window;
10031113 struct property *win64;
10041114 struct dynamic_dma_window_prop *ddwprop;
10051115 struct failed_ddw_pdn *fpdn;
1116
+ bool default_win_removed = false;
10061117
10071118 mutex_lock(&direct_window_init_mutex);
10081119
....@@ -1031,7 +1142,7 @@
10311142 * the property is actually in the parent, not the PE
10321143 */
10331144 ret = of_property_read_u32_array(pdn, "ibm,ddw-applicable",
1034
- &ddw_avail[0], 3);
1145
+ &ddw_avail[0], DDW_APPLICABLE_SIZE);
10351146 if (ret)
10361147 goto out_failed;
10371148
....@@ -1042,18 +1153,42 @@
10421153 * of page sizes: supported and supported for migrate-dma.
10431154 */
10441155 dn = pci_device_to_OF_node(dev);
1045
- ret = query_ddw(dev, ddw_avail, &query);
1156
+ ret = query_ddw(dev, ddw_avail, &query, pdn);
10461157 if (ret != 0)
10471158 goto out_failed;
10481159
1160
+ /*
1161
+ * If there is no window available, remove the default DMA window,
1162
+ * if it's present. This will make all the resources available to the
1163
+ * new DDW window.
1164
+ * If anything fails after this, we need to restore it, so also check
1165
+ * for extensions presence.
1166
+ */
10491167 if (query.windows_available == 0) {
1050
- /*
1051
- * no additional windows are available for this device.
1052
- * We might be able to reallocate the existing window,
1053
- * trading in for a larger page size.
1054
- */
1055
- dev_dbg(&dev->dev, "no free dynamic windows");
1056
- goto out_failed;
1168
+ struct property *default_win;
1169
+ int reset_win_ext;
1170
+
1171
+ default_win = of_find_property(pdn, "ibm,dma-window", NULL);
1172
+ if (!default_win)
1173
+ goto out_failed;
1174
+
1175
+ reset_win_ext = ddw_read_ext(pdn, DDW_EXT_RESET_DMA_WIN, NULL);
1176
+ if (reset_win_ext)
1177
+ goto out_failed;
1178
+
1179
+ remove_dma_window(pdn, ddw_avail, default_win);
1180
+ default_win_removed = true;
1181
+
1182
+ /* Query again, to check if the window is available */
1183
+ ret = query_ddw(dev, ddw_avail, &query, pdn);
1184
+ if (ret != 0)
1185
+ goto out_failed;
1186
+
1187
+ if (query.windows_available == 0) {
1188
+ /* no windows are available for this device. */
1189
+ dev_dbg(&dev->dev, "no free dynamic windows");
1190
+ goto out_failed;
1191
+ }
10571192 }
10581193 if (query.page_size & 4) {
10591194 page_shift = 24; /* 16MB */
....@@ -1068,9 +1203,9 @@
10681203 }
10691204 /* verify the window * number of ptes will map the partition */
10701205 /* check largest block * page size > max memory hotplug addr */
1071
- max_addr = memory_hotplug_max();
1206
+ max_addr = ddw_memory_hotplug_max();
10721207 if (query.largest_available_block < (max_addr >> page_shift)) {
1073
- dev_dbg(&dev->dev, "can't map partition max 0x%llx with %u "
1208
+ dev_dbg(&dev->dev, "can't map partition max 0x%llx with %llu "
10741209 "%llu-sized pages\n", max_addr, query.largest_available_block,
10751210 1ULL << page_shift);
10761211 goto out_failed;
....@@ -1144,6 +1279,8 @@
11441279 kfree(win64);
11451280
11461281 out_failed:
1282
+ if (default_win_removed)
1283
+ reset_dma_window(dev, pdn);
11471284
11481285 fpdn = kzalloc(sizeof(*fpdn), GFP_KERNEL);
11491286 if (!fpdn)
....@@ -1196,7 +1333,7 @@
11961333 iommu_table_setparms_lpar(pci->phb, pdn, tbl,
11971334 pci->table_group, dma_window);
11981335 tbl->it_ops = &iommu_table_lpar_multi_ops;
1199
- iommu_init_table(tbl, pci->phb->node);
1336
+ iommu_init_table(tbl, pci->phb->node, 0, 0);
12001337 iommu_register_group(pci->table_group,
12011338 pci_domain_nr(pci->phb->bus), 0);
12021339 pr_debug(" created table: %p\n", pci->table_group);
....@@ -1205,90 +1342,40 @@
12051342 }
12061343
12071344 set_iommu_table_base(&dev->dev, pci->table_group->tables[0]);
1208
- iommu_add_device(&dev->dev);
1345
+ iommu_add_device(pci->table_group, &dev->dev);
12091346 }
12101347
1211
-static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
1348
+static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask)
12121349 {
1213
- bool ddw_enabled = false;
1214
- struct device_node *pdn, *dn;
1215
- struct pci_dev *pdev;
1350
+ struct device_node *dn = pci_device_to_OF_node(pdev), *pdn;
12161351 const __be32 *dma_window = NULL;
1217
- u64 dma_offset;
1218
-
1219
- if (!dev->dma_mask)
1220
- return -EIO;
1221
-
1222
- if (!dev_is_pci(dev))
1223
- goto check_mask;
1224
-
1225
- pdev = to_pci_dev(dev);
12261352
12271353 /* only attempt to use a new window if 64-bit DMA is requested */
1228
- if (!disable_ddw && dma_mask == DMA_BIT_MASK(64)) {
1229
- dn = pci_device_to_OF_node(pdev);
1230
- dev_dbg(dev, "node is %pOF\n", dn);
1354
+ if (dma_mask < DMA_BIT_MASK(64))
1355
+ return false;
12311356
1232
- /*
1233
- * the device tree might contain the dma-window properties
1234
- * per-device and not necessarily for the bus. So we need to
1235
- * search upwards in the tree until we either hit a dma-window
1236
- * property, OR find a parent with a table already allocated.
1237
- */
1238
- for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group;
1239
- pdn = pdn->parent) {
1240
- dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
1241
- if (dma_window)
1242
- break;
1243
- }
1244
- if (pdn && PCI_DN(pdn)) {
1245
- dma_offset = enable_ddw(pdev, pdn);
1246
- if (dma_offset != 0) {
1247
- dev_info(dev, "Using 64-bit direct DMA at offset %llx\n", dma_offset);
1248
- set_dma_offset(dev, dma_offset);
1249
- set_dma_ops(dev, &dma_nommu_ops);
1250
- ddw_enabled = true;
1251
- }
1252
- }
1357
+ dev_dbg(&pdev->dev, "node is %pOF\n", dn);
1358
+
1359
+ /*
1360
+ * the device tree might contain the dma-window properties
1361
+ * per-device and not necessarily for the bus. So we need to
1362
+ * search upwards in the tree until we either hit a dma-window
1363
+ * property, OR find a parent with a table already allocated.
1364
+ */
1365
+ for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group;
1366
+ pdn = pdn->parent) {
1367
+ dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
1368
+ if (dma_window)
1369
+ break;
12531370 }
12541371
1255
- /* fall back on iommu ops */
1256
- if (!ddw_enabled && get_dma_ops(dev) != &dma_iommu_ops) {
1257
- dev_info(dev, "Restoring 32-bit DMA via iommu\n");
1258
- set_dma_ops(dev, &dma_iommu_ops);
1372
+ if (pdn && PCI_DN(pdn)) {
1373
+ pdev->dev.archdata.dma_offset = enable_ddw(pdev, pdn);
1374
+ if (pdev->dev.archdata.dma_offset)
1375
+ return true;
12591376 }
12601377
1261
-check_mask:
1262
- if (!dma_supported(dev, dma_mask))
1263
- return -EIO;
1264
-
1265
- *dev->dma_mask = dma_mask;
1266
- return 0;
1267
-}
1268
-
1269
-static u64 dma_get_required_mask_pSeriesLP(struct device *dev)
1270
-{
1271
- if (!dev->dma_mask)
1272
- return 0;
1273
-
1274
- if (!disable_ddw && dev_is_pci(dev)) {
1275
- struct pci_dev *pdev = to_pci_dev(dev);
1276
- struct device_node *dn;
1277
-
1278
- dn = pci_device_to_OF_node(pdev);
1279
-
1280
- /* search upwards for ibm,dma-window */
1281
- for (; dn && PCI_DN(dn) && !PCI_DN(dn)->table_group;
1282
- dn = dn->parent)
1283
- if (of_get_property(dn, "ibm,dma-window", NULL))
1284
- break;
1285
- /* if there is a ibm,ddw-applicable property require 64 bits */
1286
- if (dn && PCI_DN(dn) &&
1287
- of_get_property(dn, "ibm,ddw-applicable", NULL))
1288
- return DMA_BIT_MASK(64);
1289
- }
1290
-
1291
- return dma_iommu_ops.get_required_mask(dev);
1378
+ return false;
12921379 }
12931380
12941381 static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action,
....@@ -1383,8 +1470,9 @@
13831470 if (firmware_has_feature(FW_FEATURE_LPAR)) {
13841471 pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeriesLP;
13851472 pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeriesLP;
1386
- ppc_md.dma_set_mask = dma_set_mask_pSeriesLP;
1387
- ppc_md.dma_get_required_mask = dma_get_required_mask_pSeriesLP;
1473
+ if (!disable_ddw)
1474
+ pseries_pci_controller_ops.iommu_bypass_supported =
1475
+ iommu_bypass_supported_pSeriesLP;
13881476 } else {
13891477 pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeries;
13901478 pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeries;
....@@ -1401,13 +1489,38 @@
14011489 {
14021490 if (strcmp(str, "off") == 0 &&
14031491 firmware_has_feature(FW_FEATURE_LPAR) &&
1404
- firmware_has_feature(FW_FEATURE_MULTITCE)) {
1492
+ (firmware_has_feature(FW_FEATURE_PUT_TCE_IND) ||
1493
+ firmware_has_feature(FW_FEATURE_STUFF_TCE))) {
14051494 printk(KERN_INFO "Disabling MULTITCE firmware feature\n");
1406
- powerpc_firmware_features &= ~FW_FEATURE_MULTITCE;
1495
+ powerpc_firmware_features &=
1496
+ ~(FW_FEATURE_PUT_TCE_IND | FW_FEATURE_STUFF_TCE);
14071497 }
14081498 return 1;
14091499 }
14101500
14111501 __setup("multitce=", disable_multitce);
14121502
1503
+static int tce_iommu_bus_notifier(struct notifier_block *nb,
1504
+ unsigned long action, void *data)
1505
+{
1506
+ struct device *dev = data;
1507
+
1508
+ switch (action) {
1509
+ case BUS_NOTIFY_DEL_DEVICE:
1510
+ iommu_del_device(dev);
1511
+ return 0;
1512
+ default:
1513
+ return 0;
1514
+ }
1515
+}
1516
+
1517
+static struct notifier_block tce_iommu_bus_nb = {
1518
+ .notifier_call = tce_iommu_bus_notifier,
1519
+};
1520
+
1521
+static int __init tce_iommu_bus_notifier_init(void)
1522
+{
1523
+ bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb);
1524
+ return 0;
1525
+}
14131526 machine_subsys_initcall_sync(pseries, tce_iommu_bus_notifier_init);