hc
2024-01-03 2f7c68cb55ecb7331f2381deb497c27155f32faf
kernel/arch/powerpc/platforms/pseries/iommu.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
34 *
....@@ -7,21 +8,6 @@
78 * Copyright (C) 2006 Olof Johansson <olof@lixom.net>
89 *
910 * Dynamic DMA mapping support, pSeries-specific parts, both SMP and LPAR.
10
- *
11
- *
12
- * This program is free software; you can redistribute it and/or modify
13
- * it under the terms of the GNU General Public License as published by
14
- * the Free Software Foundation; either version 2 of the License, or
15
- * (at your option) any later version.
16
- *
17
- * This program is distributed in the hope that it will be useful,
18
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20
- * GNU General Public License for more details.
21
- *
22
- * You should have received a copy of the GNU General Public License
23
- * along with this program; if not, write to the Free Software
24
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
2511 */
2612
2713 #include <linux/init.h>
....@@ -38,7 +24,6 @@
3824 #include <linux/of.h>
3925 #include <linux/iommu.h>
4026 #include <linux/rculist.h>
41
-#include <linux/locallock.h>
4227 #include <asm/io.h>
4328 #include <asm/prom.h>
4429 #include <asm/rtas.h>
....@@ -54,11 +39,24 @@
5439
5540 #include "pseries.h"
5641
42
+enum {
43
+ DDW_QUERY_PE_DMA_WIN = 0,
44
+ DDW_CREATE_PE_DMA_WIN = 1,
45
+ DDW_REMOVE_PE_DMA_WIN = 2,
46
+
47
+ DDW_APPLICABLE_SIZE
48
+};
49
+
50
+enum {
51
+ DDW_EXT_SIZE = 0,
52
+ DDW_EXT_RESET_DMA_WIN = 1,
53
+ DDW_EXT_QUERY_OUT_SIZE = 2
54
+};
55
+
5756 static struct iommu_table_group *iommu_pseries_alloc_group(int node)
5857 {
5958 struct iommu_table_group *table_group;
6059 struct iommu_table *tbl;
61
- struct iommu_table_group_link *tgl;
6260
6361 table_group = kzalloc_node(sizeof(struct iommu_table_group), GFP_KERNEL,
6462 node);
....@@ -69,22 +67,13 @@
6967 if (!tbl)
7068 goto free_group;
7169
72
- tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
73
- node);
74
- if (!tgl)
75
- goto free_table;
76
-
7770 INIT_LIST_HEAD_RCU(&tbl->it_group_list);
7871 kref_init(&tbl->it_kref);
79
- tgl->table_group = table_group;
80
- list_add_rcu(&tgl->next, &tbl->it_group_list);
8172
8273 table_group->tables[0] = tbl;
8374
8475 return table_group;
8576
86
-free_table:
87
- kfree(tbl);
8877 free_group:
8978 kfree(table_group);
9079 return NULL;
....@@ -94,23 +83,12 @@
9483 const char *node_name)
9584 {
9685 struct iommu_table *tbl;
97
-#ifdef CONFIG_IOMMU_API
98
- struct iommu_table_group_link *tgl;
99
-#endif
10086
10187 if (!table_group)
10288 return;
10389
10490 tbl = table_group->tables[0];
10591 #ifdef CONFIG_IOMMU_API
106
- tgl = list_first_entry_or_null(&tbl->it_group_list,
107
- struct iommu_table_group_link, next);
108
-
109
- WARN_ON_ONCE(!tgl);
110
- if (tgl) {
111
- list_del_rcu(&tgl->next);
112
- kfree(tgl);
113
- }
11492 if (table_group->group) {
11593 iommu_group_put(table_group->group);
11694 BUG_ON(table_group->group);
....@@ -127,7 +105,7 @@
127105 unsigned long attrs)
128106 {
129107 u64 proto_tce;
130
- __be64 *tcep, *tces;
108
+ __be64 *tcep;
131109 u64 rpn;
132110
133111 proto_tce = TCE_PCI_READ; // Read allowed
....@@ -135,7 +113,7 @@
135113 if (direction != DMA_TO_DEVICE)
136114 proto_tce |= TCE_PCI_WRITE;
137115
138
- tces = tcep = ((__be64 *)tbl->it_base) + index;
116
+ tcep = ((__be64 *)tbl->it_base) + index;
139117
140118 while (npages--) {
141119 /* can't move this out since we might cross MEMBLOCK boundary */
....@@ -151,9 +129,9 @@
151129
152130 static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages)
153131 {
154
- __be64 *tcep, *tces;
132
+ __be64 *tcep;
155133
156
- tces = tcep = ((__be64 *)tbl->it_base) + index;
134
+ tcep = ((__be64 *)tbl->it_base) + index;
157135
158136 while (npages--)
159137 *(tcep++) = 0;
....@@ -213,7 +191,6 @@
213191 }
214192
215193 static DEFINE_PER_CPU(__be64 *, tce_page);
216
-static DEFINE_LOCAL_IRQ_LOCK(tcp_page_lock);
217194
218195 static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
219196 long npages, unsigned long uaddr,
....@@ -229,14 +206,13 @@
229206 int ret = 0;
230207 unsigned long flags;
231208
232
- if ((npages == 1) || !firmware_has_feature(FW_FEATURE_MULTITCE)) {
209
+ if ((npages == 1) || !firmware_has_feature(FW_FEATURE_PUT_TCE_IND)) {
233210 return tce_build_pSeriesLP(tbl->it_index, tcenum,
234211 tbl->it_page_shift, npages, uaddr,
235212 direction, attrs);
236213 }
237214
238
- /* to protect tcep and the page behind it */
239
- local_lock_irqsave(tcp_page_lock, flags);
215
+ local_irq_save(flags); /* to protect tcep and the page behind it */
240216
241217 tcep = __this_cpu_read(tce_page);
242218
....@@ -247,7 +223,7 @@
247223 tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
248224 /* If allocation fails, fall back to the loop implementation */
249225 if (!tcep) {
250
- local_unlock_irqrestore(tcp_page_lock, flags);
226
+ local_irq_restore(flags);
251227 return tce_build_pSeriesLP(tbl->it_index, tcenum,
252228 tbl->it_page_shift,
253229 npages, uaddr, direction, attrs);
....@@ -282,7 +258,7 @@
282258 tcenum += limit;
283259 } while (npages > 0 && !rc);
284260
285
- local_unlock_irqrestore(tcp_page_lock, flags);
261
+ local_irq_restore(flags);
286262
287263 if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
288264 ret = (int)rc;
....@@ -324,7 +300,7 @@
324300 {
325301 u64 rc;
326302
327
- if (!firmware_has_feature(FW_FEATURE_MULTITCE))
303
+ if (!firmware_has_feature(FW_FEATURE_STUFF_TCE))
328304 return tce_free_pSeriesLP(tbl->it_index, tcenum, npages);
329305
330306 rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages);
....@@ -372,7 +348,7 @@
372348 /* Dynamic DMA Window support */
373349 struct ddw_query_response {
374350 u32 windows_available;
375
- u32 largest_available_block;
351
+ u64 largest_available_block;
376352 u32 page_size;
377353 u32 migration_capable;
378354 };
....@@ -440,7 +416,7 @@
440416 u64 rc = 0;
441417 long l, limit;
442418
443
- if (!firmware_has_feature(FW_FEATURE_MULTITCE)) {
419
+ if (!firmware_has_feature(FW_FEATURE_PUT_TCE_IND)) {
444420 unsigned long tceshift = be32_to_cpu(maprange->tce_shift);
445421 unsigned long dmastart = (start_pfn << PAGE_SHIFT) +
446422 be64_to_cpu(maprange->dma_base);
....@@ -453,14 +429,13 @@
453429 DMA_BIDIRECTIONAL, 0);
454430 }
455431
456
- /* to protect tcep and the page behind it */
457
- local_lock_irq(tcp_page_lock);
432
+ local_irq_disable(); /* to protect tcep and the page behind it */
458433 tcep = __this_cpu_read(tce_page);
459434
460435 if (!tcep) {
461436 tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
462437 if (!tcep) {
463
- local_unlock_irq(tcp_page_lock);
438
+ local_irq_enable();
464439 return -ENOMEM;
465440 }
466441 __this_cpu_write(tce_page, tcep);
....@@ -506,7 +481,7 @@
506481
507482 /* error cleanup: caller will clear whole range */
508483
509
- local_unlock_irq(tcp_page_lock);
484
+ local_irq_enable();
510485 return rc;
511486 }
512487
....@@ -663,8 +638,7 @@
663638
664639 iommu_table_setparms(pci->phb, dn, tbl);
665640 tbl->it_ops = &iommu_table_pseries_ops;
666
- iommu_init_table(tbl, pci->phb->node);
667
- iommu_register_group(pci->table_group, pci_domain_nr(bus), 0);
641
+ iommu_init_table(tbl, pci->phb->node, 0, 0);
668642
669643 /* Divide the rest (1.75GB) among the children */
670644 pci->phb->dma_window_size = 0x80000000ul;
....@@ -676,7 +650,8 @@
676650
677651 #ifdef CONFIG_IOMMU_API
678652 static int tce_exchange_pseries(struct iommu_table *tbl, long index, unsigned
679
- long *tce, enum dma_data_direction *direction)
653
+ long *tce, enum dma_data_direction *direction,
654
+ bool realmode)
680655 {
681656 long rc;
682657 unsigned long ioba = (unsigned long) index << tbl->it_page_shift;
....@@ -704,7 +679,7 @@
704679 struct iommu_table_ops iommu_table_lpar_multi_ops = {
705680 .set = tce_buildmulti_pSeriesLP,
706681 #ifdef CONFIG_IOMMU_API
707
- .exchange = tce_exchange_pseries,
682
+ .xchg_no_kill = tce_exchange_pseries,
708683 #endif
709684 .clear = tce_freemulti_pSeriesLP,
710685 .get = tce_get_pSeriesLP
....@@ -745,7 +720,7 @@
745720 iommu_table_setparms_lpar(ppci->phb, pdn, tbl,
746721 ppci->table_group, dma_window);
747722 tbl->it_ops = &iommu_table_lpar_multi_ops;
748
- iommu_init_table(tbl, ppci->phb->node);
723
+ iommu_init_table(tbl, ppci->phb->node, 0, 0);
749724 iommu_register_group(ppci->table_group,
750725 pci_domain_nr(bus), 0);
751726 pr_debug(" created table: %p\n", ppci->table_group);
....@@ -774,11 +749,8 @@
774749 tbl = PCI_DN(dn)->table_group->tables[0];
775750 iommu_table_setparms(phb, dn, tbl);
776751 tbl->it_ops = &iommu_table_pseries_ops;
777
- iommu_init_table(tbl, phb->node);
778
- iommu_register_group(PCI_DN(dn)->table_group,
779
- pci_domain_nr(phb->bus), 0);
752
+ iommu_init_table(tbl, phb->node, 0, 0);
780753 set_iommu_table_base(&dev->dev, tbl);
781
- iommu_add_device(&dev->dev);
782754 return;
783755 }
784756
....@@ -789,11 +761,10 @@
789761 while (dn && PCI_DN(dn) && PCI_DN(dn)->table_group == NULL)
790762 dn = dn->parent;
791763
792
- if (dn && PCI_DN(dn)) {
764
+ if (dn && PCI_DN(dn))
793765 set_iommu_table_base(&dev->dev,
794766 PCI_DN(dn)->table_group->tables[0]);
795
- iommu_add_device(&dev->dev);
796
- } else
767
+ else
797768 printk(KERN_WARNING "iommu: Device %s has no iommu table\n",
798769 pci_name(dev));
799770 }
....@@ -810,25 +781,14 @@
810781
811782 early_param("disable_ddw", disable_ddw_setup);
812783
813
-static void remove_ddw(struct device_node *np, bool remove_prop)
784
+static void remove_dma_window(struct device_node *np, u32 *ddw_avail,
785
+ struct property *win)
814786 {
815787 struct dynamic_dma_window_prop *dwp;
816
- struct property *win64;
817
- u32 ddw_avail[3];
818788 u64 liobn;
819
- int ret = 0;
789
+ int ret;
820790
821
- ret = of_property_read_u32_array(np, "ibm,ddw-applicable",
822
- &ddw_avail[0], 3);
823
-
824
- win64 = of_find_property(np, DIRECT64_PROPNAME, NULL);
825
- if (!win64)
826
- return;
827
-
828
- if (ret || win64->length < sizeof(*dwp))
829
- goto delprop;
830
-
831
- dwp = win64->value;
791
+ dwp = win->value;
832792 liobn = (u64)be32_to_cpu(dwp->liobn);
833793
834794 /* clear the whole window, note the arg is in kernel pages */
....@@ -841,19 +801,39 @@
841801 pr_debug("%pOF successfully cleared tces in window.\n",
842802 np);
843803
844
- ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn);
804
+ ret = rtas_call(ddw_avail[DDW_REMOVE_PE_DMA_WIN], 1, 1, NULL, liobn);
845805 if (ret)
846806 pr_warn("%pOF: failed to remove direct window: rtas returned "
847807 "%d to ibm,remove-pe-dma-window(%x) %llx\n",
848
- np, ret, ddw_avail[2], liobn);
808
+ np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn);
849809 else
850810 pr_debug("%pOF: successfully removed direct window: rtas returned "
851811 "%d to ibm,remove-pe-dma-window(%x) %llx\n",
852
- np, ret, ddw_avail[2], liobn);
812
+ np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn);
813
+}
853814
854
-delprop:
855
- if (remove_prop)
856
- ret = of_remove_property(np, win64);
815
+static void remove_ddw(struct device_node *np, bool remove_prop)
816
+{
817
+ struct property *win;
818
+ u32 ddw_avail[DDW_APPLICABLE_SIZE];
819
+ int ret = 0;
820
+
821
+ ret = of_property_read_u32_array(np, "ibm,ddw-applicable",
822
+ &ddw_avail[0], DDW_APPLICABLE_SIZE);
823
+ if (ret)
824
+ return;
825
+
826
+ win = of_find_property(np, DIRECT64_PROPNAME, NULL);
827
+ if (!win)
828
+ return;
829
+
830
+ if (win->length >= sizeof(struct dynamic_dma_window_prop))
831
+ remove_dma_window(np, ddw_avail, win);
832
+
833
+ if (!remove_prop)
834
+ return;
835
+
836
+ ret = of_remove_property(np, win);
857837 if (ret)
858838 pr_warn("%pOF: failed to remove direct window property: %d\n",
859839 np, ret);
....@@ -912,14 +892,62 @@
912892 }
913893 machine_arch_initcall(pseries, find_existing_ddw_windows);
914894
895
+/**
896
+ * ddw_read_ext - Get the value of an DDW extension
897
+ * @np: device node from which the extension value is to be read.
898
+ * @extnum: index number of the extension.
899
+ * @value: pointer to return value, modified when extension is available.
900
+ *
901
+ * Checks if "ibm,ddw-extensions" exists for this node, and get the value
902
+ * on index 'extnum'.
903
+ * It can be used only to check if a property exists, passing value == NULL.
904
+ *
905
+ * Returns:
906
+ * 0 if extension successfully read
907
+ * -EINVAL if the "ibm,ddw-extensions" does not exist,
908
+ * -ENODATA if "ibm,ddw-extensions" does not have a value, and
909
+ * -EOVERFLOW if "ibm,ddw-extensions" does not contain this extension.
910
+ */
911
+static inline int ddw_read_ext(const struct device_node *np, int extnum,
912
+ u32 *value)
913
+{
914
+ static const char propname[] = "ibm,ddw-extensions";
915
+ u32 count;
916
+ int ret;
917
+
918
+ ret = of_property_read_u32_index(np, propname, DDW_EXT_SIZE, &count);
919
+ if (ret)
920
+ return ret;
921
+
922
+ if (count < extnum)
923
+ return -EOVERFLOW;
924
+
925
+ if (!value)
926
+ value = &count;
927
+
928
+ return of_property_read_u32_index(np, propname, extnum, value);
929
+}
930
+
915931 static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
916
- struct ddw_query_response *query)
932
+ struct ddw_query_response *query,
933
+ struct device_node *parent)
917934 {
918935 struct device_node *dn;
919936 struct pci_dn *pdn;
920
- u32 cfg_addr;
937
+ u32 cfg_addr, ext_query, query_out[5];
921938 u64 buid;
922
- int ret;
939
+ int ret, out_sz;
940
+
941
+ /*
942
+ * From LoPAR level 2.8, "ibm,ddw-extensions" index 3 can rule how many
943
+ * output parameters ibm,query-pe-dma-windows will have, ranging from
944
+ * 5 to 6.
945
+ */
946
+ ret = ddw_read_ext(parent, DDW_EXT_QUERY_OUT_SIZE, &ext_query);
947
+ if (!ret && ext_query == 1)
948
+ out_sz = 6;
949
+ else
950
+ out_sz = 5;
923951
924952 /*
925953 * Get the config address and phb buid of the PE window.
....@@ -932,11 +960,28 @@
932960 buid = pdn->phb->buid;
933961 cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8));
934962
935
- ret = rtas_call(ddw_avail[0], 3, 5, (u32 *)query,
936
- cfg_addr, BUID_HI(buid), BUID_LO(buid));
937
- dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x"
938
- " returned %d\n", ddw_avail[0], cfg_addr, BUID_HI(buid),
939
- BUID_LO(buid), ret);
963
+ ret = rtas_call(ddw_avail[DDW_QUERY_PE_DMA_WIN], 3, out_sz, query_out,
964
+ cfg_addr, BUID_HI(buid), BUID_LO(buid));
965
+ dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x returned %d\n",
966
+ ddw_avail[DDW_QUERY_PE_DMA_WIN], cfg_addr, BUID_HI(buid),
967
+ BUID_LO(buid), ret);
968
+
969
+ switch (out_sz) {
970
+ case 5:
971
+ query->windows_available = query_out[0];
972
+ query->largest_available_block = query_out[1];
973
+ query->page_size = query_out[2];
974
+ query->migration_capable = query_out[3];
975
+ break;
976
+ case 6:
977
+ query->windows_available = query_out[0];
978
+ query->largest_available_block = ((u64)query_out[1] << 32) |
979
+ query_out[2];
980
+ query->page_size = query_out[3];
981
+ query->migration_capable = query_out[4];
982
+ break;
983
+ }
984
+
940985 return ret;
941986 }
942987
....@@ -963,15 +1008,16 @@
9631008
9641009 do {
9651010 /* extra outputs are LIOBN and dma-addr (hi, lo) */
966
- ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create,
967
- cfg_addr, BUID_HI(buid), BUID_LO(buid),
968
- page_shift, window_shift);
1011
+ ret = rtas_call(ddw_avail[DDW_CREATE_PE_DMA_WIN], 5, 4,
1012
+ (u32 *)create, cfg_addr, BUID_HI(buid),
1013
+ BUID_LO(buid), page_shift, window_shift);
9691014 } while (rtas_busy_delay(ret));
9701015 dev_info(&dev->dev,
9711016 "ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d "
972
- "(liobn = 0x%x starting addr = %x %x)\n", ddw_avail[1],
973
- cfg_addr, BUID_HI(buid), BUID_LO(buid), page_shift,
974
- window_shift, ret, create->liobn, create->addr_hi, create->addr_lo);
1017
+ "(liobn = 0x%x starting addr = %x %x)\n",
1018
+ ddw_avail[DDW_CREATE_PE_DMA_WIN], cfg_addr, BUID_HI(buid),
1019
+ BUID_LO(buid), page_shift, window_shift, ret, create->liobn,
1020
+ create->addr_hi, create->addr_lo);
9751021
9761022 return ret;
9771023 }
....@@ -983,6 +1029,66 @@
9831029
9841030 static LIST_HEAD(failed_ddw_pdn_list);
9851031
1032
+static phys_addr_t ddw_memory_hotplug_max(void)
1033
+{
1034
+ phys_addr_t max_addr = memory_hotplug_max();
1035
+ struct device_node *memory;
1036
+
1037
+ for_each_node_by_type(memory, "memory") {
1038
+ unsigned long start, size;
1039
+ int n_mem_addr_cells, n_mem_size_cells, len;
1040
+ const __be32 *memcell_buf;
1041
+
1042
+ memcell_buf = of_get_property(memory, "reg", &len);
1043
+ if (!memcell_buf || len <= 0)
1044
+ continue;
1045
+
1046
+ n_mem_addr_cells = of_n_addr_cells(memory);
1047
+ n_mem_size_cells = of_n_size_cells(memory);
1048
+
1049
+ start = of_read_number(memcell_buf, n_mem_addr_cells);
1050
+ memcell_buf += n_mem_addr_cells;
1051
+ size = of_read_number(memcell_buf, n_mem_size_cells);
1052
+ memcell_buf += n_mem_size_cells;
1053
+
1054
+ max_addr = max_t(phys_addr_t, max_addr, start + size);
1055
+ }
1056
+
1057
+ return max_addr;
1058
+}
1059
+
1060
+/*
1061
+ * Platforms supporting the DDW option starting with LoPAR level 2.7 implement
1062
+ * ibm,ddw-extensions, which carries the rtas token for
1063
+ * ibm,reset-pe-dma-windows.
1064
+ * That rtas-call can be used to restore the default DMA window for the device.
1065
+ */
1066
+static void reset_dma_window(struct pci_dev *dev, struct device_node *par_dn)
1067
+{
1068
+ int ret;
1069
+ u32 cfg_addr, reset_dma_win;
1070
+ u64 buid;
1071
+ struct device_node *dn;
1072
+ struct pci_dn *pdn;
1073
+
1074
+ ret = ddw_read_ext(par_dn, DDW_EXT_RESET_DMA_WIN, &reset_dma_win);
1075
+ if (ret)
1076
+ return;
1077
+
1078
+ dn = pci_device_to_OF_node(dev);
1079
+ pdn = PCI_DN(dn);
1080
+ buid = pdn->phb->buid;
1081
+ cfg_addr = (pdn->busno << 16) | (pdn->devfn << 8);
1082
+
1083
+ ret = rtas_call(reset_dma_win, 3, 1, NULL, cfg_addr, BUID_HI(buid),
1084
+ BUID_LO(buid));
1085
+ if (ret)
1086
+ dev_info(&dev->dev,
1087
+ "ibm,reset-pe-dma-windows(%x) %x %x %x returned %d ",
1088
+ reset_dma_win, cfg_addr, BUID_HI(buid), BUID_LO(buid),
1089
+ ret);
1090
+}
1091
+
9861092 /*
9871093 * If the PE supports dynamic dma windows, and there is space for a table
9881094 * that can map all pages in a linear offset, then setup such a table,
....@@ -992,7 +1098,7 @@
9921098 * pdn: the parent pe node with the ibm,dma_window property
9931099 * Future: also check if we can remap the base window for our base page size
9941100 *
995
- * returns the dma offset for use by dma_set_mask
1101
+ * returns the dma offset for use by the direct mapped DMA code.
9961102 */
9971103 static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
9981104 {
....@@ -1002,11 +1108,12 @@
10021108 int page_shift;
10031109 u64 dma_addr, max_addr;
10041110 struct device_node *dn;
1005
- u32 ddw_avail[3];
1111
+ u32 ddw_avail[DDW_APPLICABLE_SIZE];
10061112 struct direct_window *window;
10071113 struct property *win64;
10081114 struct dynamic_dma_window_prop *ddwprop;
10091115 struct failed_ddw_pdn *fpdn;
1116
+ bool default_win_removed = false;
10101117
10111118 mutex_lock(&direct_window_init_mutex);
10121119
....@@ -1035,7 +1142,7 @@
10351142 * the property is actually in the parent, not the PE
10361143 */
10371144 ret = of_property_read_u32_array(pdn, "ibm,ddw-applicable",
1038
- &ddw_avail[0], 3);
1145
+ &ddw_avail[0], DDW_APPLICABLE_SIZE);
10391146 if (ret)
10401147 goto out_failed;
10411148
....@@ -1046,18 +1153,42 @@
10461153 * of page sizes: supported and supported for migrate-dma.
10471154 */
10481155 dn = pci_device_to_OF_node(dev);
1049
- ret = query_ddw(dev, ddw_avail, &query);
1156
+ ret = query_ddw(dev, ddw_avail, &query, pdn);
10501157 if (ret != 0)
10511158 goto out_failed;
10521159
1160
+ /*
1161
+ * If there is no window available, remove the default DMA window,
1162
+ * if it's present. This will make all the resources available to the
1163
+ * new DDW window.
1164
+ * If anything fails after this, we need to restore it, so also check
1165
+ * for extensions presence.
1166
+ */
10531167 if (query.windows_available == 0) {
1054
- /*
1055
- * no additional windows are available for this device.
1056
- * We might be able to reallocate the existing window,
1057
- * trading in for a larger page size.
1058
- */
1059
- dev_dbg(&dev->dev, "no free dynamic windows");
1060
- goto out_failed;
1168
+ struct property *default_win;
1169
+ int reset_win_ext;
1170
+
1171
+ default_win = of_find_property(pdn, "ibm,dma-window", NULL);
1172
+ if (!default_win)
1173
+ goto out_failed;
1174
+
1175
+ reset_win_ext = ddw_read_ext(pdn, DDW_EXT_RESET_DMA_WIN, NULL);
1176
+ if (reset_win_ext)
1177
+ goto out_failed;
1178
+
1179
+ remove_dma_window(pdn, ddw_avail, default_win);
1180
+ default_win_removed = true;
1181
+
1182
+ /* Query again, to check if the window is available */
1183
+ ret = query_ddw(dev, ddw_avail, &query, pdn);
1184
+ if (ret != 0)
1185
+ goto out_failed;
1186
+
1187
+ if (query.windows_available == 0) {
1188
+ /* no windows are available for this device. */
1189
+ dev_dbg(&dev->dev, "no free dynamic windows");
1190
+ goto out_failed;
1191
+ }
10611192 }
10621193 if (query.page_size & 4) {
10631194 page_shift = 24; /* 16MB */
....@@ -1072,9 +1203,9 @@
10721203 }
10731204 /* verify the window * number of ptes will map the partition */
10741205 /* check largest block * page size > max memory hotplug addr */
1075
- max_addr = memory_hotplug_max();
1206
+ max_addr = ddw_memory_hotplug_max();
10761207 if (query.largest_available_block < (max_addr >> page_shift)) {
1077
- dev_dbg(&dev->dev, "can't map partition max 0x%llx with %u "
1208
+ dev_dbg(&dev->dev, "can't map partition max 0x%llx with %llu "
10781209 "%llu-sized pages\n", max_addr, query.largest_available_block,
10791210 1ULL << page_shift);
10801211 goto out_failed;
....@@ -1148,6 +1279,8 @@
11481279 kfree(win64);
11491280
11501281 out_failed:
1282
+ if (default_win_removed)
1283
+ reset_dma_window(dev, pdn);
11511284
11521285 fpdn = kzalloc(sizeof(*fpdn), GFP_KERNEL);
11531286 if (!fpdn)
....@@ -1200,7 +1333,7 @@
12001333 iommu_table_setparms_lpar(pci->phb, pdn, tbl,
12011334 pci->table_group, dma_window);
12021335 tbl->it_ops = &iommu_table_lpar_multi_ops;
1203
- iommu_init_table(tbl, pci->phb->node);
1336
+ iommu_init_table(tbl, pci->phb->node, 0, 0);
12041337 iommu_register_group(pci->table_group,
12051338 pci_domain_nr(pci->phb->bus), 0);
12061339 pr_debug(" created table: %p\n", pci->table_group);
....@@ -1209,90 +1342,40 @@
12091342 }
12101343
12111344 set_iommu_table_base(&dev->dev, pci->table_group->tables[0]);
1212
- iommu_add_device(&dev->dev);
1345
+ iommu_add_device(pci->table_group, &dev->dev);
12131346 }
12141347
1215
-static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
1348
+static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask)
12161349 {
1217
- bool ddw_enabled = false;
1218
- struct device_node *pdn, *dn;
1219
- struct pci_dev *pdev;
1350
+ struct device_node *dn = pci_device_to_OF_node(pdev), *pdn;
12201351 const __be32 *dma_window = NULL;
1221
- u64 dma_offset;
1222
-
1223
- if (!dev->dma_mask)
1224
- return -EIO;
1225
-
1226
- if (!dev_is_pci(dev))
1227
- goto check_mask;
1228
-
1229
- pdev = to_pci_dev(dev);
12301352
12311353 /* only attempt to use a new window if 64-bit DMA is requested */
1232
- if (!disable_ddw && dma_mask == DMA_BIT_MASK(64)) {
1233
- dn = pci_device_to_OF_node(pdev);
1234
- dev_dbg(dev, "node is %pOF\n", dn);
1354
+ if (dma_mask < DMA_BIT_MASK(64))
1355
+ return false;
12351356
1236
- /*
1237
- * the device tree might contain the dma-window properties
1238
- * per-device and not necessarily for the bus. So we need to
1239
- * search upwards in the tree until we either hit a dma-window
1240
- * property, OR find a parent with a table already allocated.
1241
- */
1242
- for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group;
1243
- pdn = pdn->parent) {
1244
- dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
1245
- if (dma_window)
1246
- break;
1247
- }
1248
- if (pdn && PCI_DN(pdn)) {
1249
- dma_offset = enable_ddw(pdev, pdn);
1250
- if (dma_offset != 0) {
1251
- dev_info(dev, "Using 64-bit direct DMA at offset %llx\n", dma_offset);
1252
- set_dma_offset(dev, dma_offset);
1253
- set_dma_ops(dev, &dma_nommu_ops);
1254
- ddw_enabled = true;
1255
- }
1256
- }
1357
+ dev_dbg(&pdev->dev, "node is %pOF\n", dn);
1358
+
1359
+ /*
1360
+ * the device tree might contain the dma-window properties
1361
+ * per-device and not necessarily for the bus. So we need to
1362
+ * search upwards in the tree until we either hit a dma-window
1363
+ * property, OR find a parent with a table already allocated.
1364
+ */
1365
+ for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group;
1366
+ pdn = pdn->parent) {
1367
+ dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
1368
+ if (dma_window)
1369
+ break;
12571370 }
12581371
1259
- /* fall back on iommu ops */
1260
- if (!ddw_enabled && get_dma_ops(dev) != &dma_iommu_ops) {
1261
- dev_info(dev, "Restoring 32-bit DMA via iommu\n");
1262
- set_dma_ops(dev, &dma_iommu_ops);
1372
+ if (pdn && PCI_DN(pdn)) {
1373
+ pdev->dev.archdata.dma_offset = enable_ddw(pdev, pdn);
1374
+ if (pdev->dev.archdata.dma_offset)
1375
+ return true;
12631376 }
12641377
1265
-check_mask:
1266
- if (!dma_supported(dev, dma_mask))
1267
- return -EIO;
1268
-
1269
- *dev->dma_mask = dma_mask;
1270
- return 0;
1271
-}
1272
-
1273
-static u64 dma_get_required_mask_pSeriesLP(struct device *dev)
1274
-{
1275
- if (!dev->dma_mask)
1276
- return 0;
1277
-
1278
- if (!disable_ddw && dev_is_pci(dev)) {
1279
- struct pci_dev *pdev = to_pci_dev(dev);
1280
- struct device_node *dn;
1281
-
1282
- dn = pci_device_to_OF_node(pdev);
1283
-
1284
- /* search upwards for ibm,dma-window */
1285
- for (; dn && PCI_DN(dn) && !PCI_DN(dn)->table_group;
1286
- dn = dn->parent)
1287
- if (of_get_property(dn, "ibm,dma-window", NULL))
1288
- break;
1289
- /* if there is a ibm,ddw-applicable property require 64 bits */
1290
- if (dn && PCI_DN(dn) &&
1291
- of_get_property(dn, "ibm,ddw-applicable", NULL))
1292
- return DMA_BIT_MASK(64);
1293
- }
1294
-
1295
- return dma_iommu_ops.get_required_mask(dev);
1378
+ return false;
12961379 }
12971380
12981381 static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action,
....@@ -1387,8 +1470,9 @@
13871470 if (firmware_has_feature(FW_FEATURE_LPAR)) {
13881471 pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeriesLP;
13891472 pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeriesLP;
1390
- ppc_md.dma_set_mask = dma_set_mask_pSeriesLP;
1391
- ppc_md.dma_get_required_mask = dma_get_required_mask_pSeriesLP;
1473
+ if (!disable_ddw)
1474
+ pseries_pci_controller_ops.iommu_bypass_supported =
1475
+ iommu_bypass_supported_pSeriesLP;
13921476 } else {
13931477 pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeries;
13941478 pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeries;
....@@ -1405,13 +1489,38 @@
14051489 {
14061490 if (strcmp(str, "off") == 0 &&
14071491 firmware_has_feature(FW_FEATURE_LPAR) &&
1408
- firmware_has_feature(FW_FEATURE_MULTITCE)) {
1492
+ (firmware_has_feature(FW_FEATURE_PUT_TCE_IND) ||
1493
+ firmware_has_feature(FW_FEATURE_STUFF_TCE))) {
14091494 printk(KERN_INFO "Disabling MULTITCE firmware feature\n");
1410
- powerpc_firmware_features &= ~FW_FEATURE_MULTITCE;
1495
+ powerpc_firmware_features &=
1496
+ ~(FW_FEATURE_PUT_TCE_IND | FW_FEATURE_STUFF_TCE);
14111497 }
14121498 return 1;
14131499 }
14141500
14151501 __setup("multitce=", disable_multitce);
14161502
1503
+static int tce_iommu_bus_notifier(struct notifier_block *nb,
1504
+ unsigned long action, void *data)
1505
+{
1506
+ struct device *dev = data;
1507
+
1508
+ switch (action) {
1509
+ case BUS_NOTIFY_DEL_DEVICE:
1510
+ iommu_del_device(dev);
1511
+ return 0;
1512
+ default:
1513
+ return 0;
1514
+ }
1515
+}
1516
+
1517
+static struct notifier_block tce_iommu_bus_nb = {
1518
+ .notifier_call = tce_iommu_bus_notifier,
1519
+};
1520
+
1521
+static int __init tce_iommu_bus_notifier_init(void)
1522
+{
1523
+ bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb);
1524
+ return 0;
1525
+}
14171526 machine_subsys_initcall_sync(pseries, tce_iommu_bus_notifier_init);