From 95099d4622f8cb224d94e314c7a8e0df60b13f87 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Sat, 09 Dec 2023 08:38:01 +0000
Subject: [PATCH] enable docker ppp
---
kernel/arch/powerpc/platforms/pseries/iommu.c | 508 +++++++++++++++++++++++++++++++++++---------------------
1 files changed, 315 insertions(+), 193 deletions(-)
diff --git a/kernel/arch/powerpc/platforms/pseries/iommu.c b/kernel/arch/powerpc/platforms/pseries/iommu.c
index b1a08cb..f05555d 100644
--- a/kernel/arch/powerpc/platforms/pseries/iommu.c
+++ b/kernel/arch/powerpc/platforms/pseries/iommu.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
*
@@ -7,21 +8,6 @@
* Copyright (C) 2006 Olof Johansson <olof@lixom.net>
*
* Dynamic DMA mapping support, pSeries-specific parts, both SMP and LPAR.
- *
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/init.h>
@@ -38,6 +24,7 @@
#include <linux/of.h>
#include <linux/iommu.h>
#include <linux/rculist.h>
+#include <linux/local_lock.h>
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/rtas.h>
@@ -53,11 +40,24 @@
#include "pseries.h"
+enum {
+ DDW_QUERY_PE_DMA_WIN = 0,
+ DDW_CREATE_PE_DMA_WIN = 1,
+ DDW_REMOVE_PE_DMA_WIN = 2,
+
+ DDW_APPLICABLE_SIZE
+};
+
+enum {
+ DDW_EXT_SIZE = 0,
+ DDW_EXT_RESET_DMA_WIN = 1,
+ DDW_EXT_QUERY_OUT_SIZE = 2
+};
+
static struct iommu_table_group *iommu_pseries_alloc_group(int node)
{
struct iommu_table_group *table_group;
struct iommu_table *tbl;
- struct iommu_table_group_link *tgl;
table_group = kzalloc_node(sizeof(struct iommu_table_group), GFP_KERNEL,
node);
@@ -68,22 +68,13 @@
if (!tbl)
goto free_group;
- tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
- node);
- if (!tgl)
- goto free_table;
-
INIT_LIST_HEAD_RCU(&tbl->it_group_list);
kref_init(&tbl->it_kref);
- tgl->table_group = table_group;
- list_add_rcu(&tgl->next, &tbl->it_group_list);
table_group->tables[0] = tbl;
return table_group;
-free_table:
- kfree(tbl);
free_group:
kfree(table_group);
return NULL;
@@ -93,23 +84,12 @@
const char *node_name)
{
struct iommu_table *tbl;
-#ifdef CONFIG_IOMMU_API
- struct iommu_table_group_link *tgl;
-#endif
if (!table_group)
return;
tbl = table_group->tables[0];
#ifdef CONFIG_IOMMU_API
- tgl = list_first_entry_or_null(&tbl->it_group_list,
- struct iommu_table_group_link, next);
-
- WARN_ON_ONCE(!tgl);
- if (tgl) {
- list_del_rcu(&tgl->next);
- kfree(tgl);
- }
if (table_group->group) {
iommu_group_put(table_group->group);
BUG_ON(table_group->group);
@@ -126,7 +106,7 @@
unsigned long attrs)
{
u64 proto_tce;
- __be64 *tcep, *tces;
+ __be64 *tcep;
u64 rpn;
proto_tce = TCE_PCI_READ; // Read allowed
@@ -134,7 +114,7 @@
if (direction != DMA_TO_DEVICE)
proto_tce |= TCE_PCI_WRITE;
- tces = tcep = ((__be64 *)tbl->it_base) + index;
+ tcep = ((__be64 *)tbl->it_base) + index;
while (npages--) {
/* can't move this out since we might cross MEMBLOCK boundary */
@@ -150,9 +130,9 @@
static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages)
{
- __be64 *tcep, *tces;
+ __be64 *tcep;
- tces = tcep = ((__be64 *)tbl->it_base) + index;
+ tcep = ((__be64 *)tbl->it_base) + index;
while (npages--)
*(tcep++) = 0;
@@ -211,7 +191,13 @@
return ret;
}
-static DEFINE_PER_CPU(__be64 *, tce_page);
+struct tce_page {
+ __be64 * page;
+ local_lock_t lock;
+};
+static DEFINE_PER_CPU(struct tce_page, tce_page) = {
+ .lock = INIT_LOCAL_LOCK(lock),
+};
static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
long npages, unsigned long uaddr,
@@ -227,15 +213,16 @@
int ret = 0;
unsigned long flags;
- if ((npages == 1) || !firmware_has_feature(FW_FEATURE_MULTITCE)) {
+ if ((npages == 1) || !firmware_has_feature(FW_FEATURE_PUT_TCE_IND)) {
return tce_build_pSeriesLP(tbl->it_index, tcenum,
tbl->it_page_shift, npages, uaddr,
direction, attrs);
}
- local_irq_save(flags); /* to protect tcep and the page behind it */
+ /* to protect tcep and the page behind it */
+ local_lock_irqsave(&tce_page.lock, flags);
- tcep = __this_cpu_read(tce_page);
+ tcep = __this_cpu_read(tce_page.page);
/* This is safe to do since interrupts are off when we're called
* from iommu_alloc{,_sg}()
@@ -244,12 +231,12 @@
tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
/* If allocation fails, fall back to the loop implementation */
if (!tcep) {
- local_irq_restore(flags);
+ local_unlock_irqrestore(&tce_page.lock, flags);
return tce_build_pSeriesLP(tbl->it_index, tcenum,
tbl->it_page_shift,
npages, uaddr, direction, attrs);
}
- __this_cpu_write(tce_page, tcep);
+ __this_cpu_write(tce_page.page, tcep);
}
rpn = __pa(uaddr) >> TCE_SHIFT;
@@ -279,7 +266,7 @@
tcenum += limit;
} while (npages > 0 && !rc);
- local_irq_restore(flags);
+ local_unlock_irqrestore(&tce_page.lock, flags);
if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
ret = (int)rc;
@@ -321,7 +308,7 @@
{
u64 rc;
- if (!firmware_has_feature(FW_FEATURE_MULTITCE))
+ if (!firmware_has_feature(FW_FEATURE_STUFF_TCE))
return tce_free_pSeriesLP(tbl->it_index, tcenum, npages);
rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages);
@@ -369,7 +356,7 @@
/* Dynamic DMA Window support */
struct ddw_query_response {
u32 windows_available;
- u32 largest_available_block;
+ u64 largest_available_block;
u32 page_size;
u32 migration_capable;
};
@@ -437,7 +424,7 @@
u64 rc = 0;
long l, limit;
- if (!firmware_has_feature(FW_FEATURE_MULTITCE)) {
+ if (!firmware_has_feature(FW_FEATURE_PUT_TCE_IND)) {
unsigned long tceshift = be32_to_cpu(maprange->tce_shift);
unsigned long dmastart = (start_pfn << PAGE_SHIFT) +
be64_to_cpu(maprange->dma_base);
@@ -450,16 +437,17 @@
DMA_BIDIRECTIONAL, 0);
}
- local_irq_disable(); /* to protect tcep and the page behind it */
- tcep = __this_cpu_read(tce_page);
+ /* to protect tcep and the page behind it */
+ local_lock_irq(&tce_page.lock);
+ tcep = __this_cpu_read(tce_page.page);
if (!tcep) {
tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
if (!tcep) {
- local_irq_enable();
+ local_unlock_irq(&tce_page.lock);
return -ENOMEM;
}
- __this_cpu_write(tce_page, tcep);
+ __this_cpu_write(tce_page.page, tcep);
}
proto_tce = TCE_PCI_READ | TCE_PCI_WRITE;
@@ -502,7 +490,7 @@
/* error cleanup: caller will clear whole range */
- local_irq_enable();
+ local_unlock_irq(&tce_page.lock);
return rc;
}
@@ -659,8 +647,7 @@
iommu_table_setparms(pci->phb, dn, tbl);
tbl->it_ops = &iommu_table_pseries_ops;
- iommu_init_table(tbl, pci->phb->node);
- iommu_register_group(pci->table_group, pci_domain_nr(bus), 0);
+ iommu_init_table(tbl, pci->phb->node, 0, 0);
/* Divide the rest (1.75GB) among the children */
pci->phb->dma_window_size = 0x80000000ul;
@@ -672,7 +659,8 @@
#ifdef CONFIG_IOMMU_API
static int tce_exchange_pseries(struct iommu_table *tbl, long index, unsigned
- long *tce, enum dma_data_direction *direction)
+ long *tce, enum dma_data_direction *direction,
+ bool realmode)
{
long rc;
unsigned long ioba = (unsigned long) index << tbl->it_page_shift;
@@ -700,7 +688,7 @@
struct iommu_table_ops iommu_table_lpar_multi_ops = {
.set = tce_buildmulti_pSeriesLP,
#ifdef CONFIG_IOMMU_API
- .exchange = tce_exchange_pseries,
+ .xchg_no_kill = tce_exchange_pseries,
#endif
.clear = tce_freemulti_pSeriesLP,
.get = tce_get_pSeriesLP
@@ -741,7 +729,7 @@
iommu_table_setparms_lpar(ppci->phb, pdn, tbl,
ppci->table_group, dma_window);
tbl->it_ops = &iommu_table_lpar_multi_ops;
- iommu_init_table(tbl, ppci->phb->node);
+ iommu_init_table(tbl, ppci->phb->node, 0, 0);
iommu_register_group(ppci->table_group,
pci_domain_nr(bus), 0);
pr_debug(" created table: %p\n", ppci->table_group);
@@ -770,11 +758,8 @@
tbl = PCI_DN(dn)->table_group->tables[0];
iommu_table_setparms(phb, dn, tbl);
tbl->it_ops = &iommu_table_pseries_ops;
- iommu_init_table(tbl, phb->node);
- iommu_register_group(PCI_DN(dn)->table_group,
- pci_domain_nr(phb->bus), 0);
+ iommu_init_table(tbl, phb->node, 0, 0);
set_iommu_table_base(&dev->dev, tbl);
- iommu_add_device(&dev->dev);
return;
}
@@ -785,11 +770,10 @@
while (dn && PCI_DN(dn) && PCI_DN(dn)->table_group == NULL)
dn = dn->parent;
- if (dn && PCI_DN(dn)) {
+ if (dn && PCI_DN(dn))
set_iommu_table_base(&dev->dev,
PCI_DN(dn)->table_group->tables[0]);
- iommu_add_device(&dev->dev);
- } else
+ else
printk(KERN_WARNING "iommu: Device %s has no iommu table\n",
pci_name(dev));
}
@@ -806,25 +790,14 @@
early_param("disable_ddw", disable_ddw_setup);
-static void remove_ddw(struct device_node *np, bool remove_prop)
+static void remove_dma_window(struct device_node *np, u32 *ddw_avail,
+ struct property *win)
{
struct dynamic_dma_window_prop *dwp;
- struct property *win64;
- u32 ddw_avail[3];
u64 liobn;
- int ret = 0;
+ int ret;
- ret = of_property_read_u32_array(np, "ibm,ddw-applicable",
- &ddw_avail[0], 3);
-
- win64 = of_find_property(np, DIRECT64_PROPNAME, NULL);
- if (!win64)
- return;
-
- if (ret || win64->length < sizeof(*dwp))
- goto delprop;
-
- dwp = win64->value;
+ dwp = win->value;
liobn = (u64)be32_to_cpu(dwp->liobn);
/* clear the whole window, note the arg is in kernel pages */
@@ -837,19 +810,39 @@
pr_debug("%pOF successfully cleared tces in window.\n",
np);
- ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn);
+ ret = rtas_call(ddw_avail[DDW_REMOVE_PE_DMA_WIN], 1, 1, NULL, liobn);
if (ret)
pr_warn("%pOF: failed to remove direct window: rtas returned "
"%d to ibm,remove-pe-dma-window(%x) %llx\n",
- np, ret, ddw_avail[2], liobn);
+ np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn);
else
pr_debug("%pOF: successfully removed direct window: rtas returned "
"%d to ibm,remove-pe-dma-window(%x) %llx\n",
- np, ret, ddw_avail[2], liobn);
+ np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn);
+}
-delprop:
- if (remove_prop)
- ret = of_remove_property(np, win64);
+static void remove_ddw(struct device_node *np, bool remove_prop)
+{
+ struct property *win;
+ u32 ddw_avail[DDW_APPLICABLE_SIZE];
+ int ret = 0;
+
+ ret = of_property_read_u32_array(np, "ibm,ddw-applicable",
+ &ddw_avail[0], DDW_APPLICABLE_SIZE);
+ if (ret)
+ return;
+
+ win = of_find_property(np, DIRECT64_PROPNAME, NULL);
+ if (!win)
+ return;
+
+ if (win->length >= sizeof(struct dynamic_dma_window_prop))
+ remove_dma_window(np, ddw_avail, win);
+
+ if (!remove_prop)
+ return;
+
+ ret = of_remove_property(np, win);
if (ret)
pr_warn("%pOF: failed to remove direct window property: %d\n",
np, ret);
@@ -908,14 +901,62 @@
}
machine_arch_initcall(pseries, find_existing_ddw_windows);
+/**
+ * ddw_read_ext - Get the value of an DDW extension
+ * @np: device node from which the extension value is to be read.
+ * @extnum: index number of the extension.
+ * @value: pointer to return value, modified when extension is available.
+ *
+ * Checks if "ibm,ddw-extensions" exists for this node, and get the value
+ * on index 'extnum'.
+ * It can be used only to check if a property exists, passing value == NULL.
+ *
+ * Returns:
+ * 0 if extension successfully read
+ * -EINVAL if the "ibm,ddw-extensions" does not exist,
+ * -ENODATA if "ibm,ddw-extensions" does not have a value, and
+ * -EOVERFLOW if "ibm,ddw-extensions" does not contain this extension.
+ */
+static inline int ddw_read_ext(const struct device_node *np, int extnum,
+ u32 *value)
+{
+ static const char propname[] = "ibm,ddw-extensions";
+ u32 count;
+ int ret;
+
+ ret = of_property_read_u32_index(np, propname, DDW_EXT_SIZE, &count);
+ if (ret)
+ return ret;
+
+ if (count < extnum)
+ return -EOVERFLOW;
+
+ if (!value)
+ value = &count;
+
+ return of_property_read_u32_index(np, propname, extnum, value);
+}
+
static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
- struct ddw_query_response *query)
+ struct ddw_query_response *query,
+ struct device_node *parent)
{
struct device_node *dn;
struct pci_dn *pdn;
- u32 cfg_addr;
+ u32 cfg_addr, ext_query, query_out[5];
u64 buid;
- int ret;
+ int ret, out_sz;
+
+ /*
+ * From LoPAR level 2.8, "ibm,ddw-extensions" index 3 can rule how many
+ * output parameters ibm,query-pe-dma-windows will have, ranging from
+ * 5 to 6.
+ */
+ ret = ddw_read_ext(parent, DDW_EXT_QUERY_OUT_SIZE, &ext_query);
+ if (!ret && ext_query == 1)
+ out_sz = 6;
+ else
+ out_sz = 5;
/*
* Get the config address and phb buid of the PE window.
@@ -928,11 +969,28 @@
buid = pdn->phb->buid;
cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8));
- ret = rtas_call(ddw_avail[0], 3, 5, (u32 *)query,
- cfg_addr, BUID_HI(buid), BUID_LO(buid));
- dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x"
- " returned %d\n", ddw_avail[0], cfg_addr, BUID_HI(buid),
- BUID_LO(buid), ret);
+ ret = rtas_call(ddw_avail[DDW_QUERY_PE_DMA_WIN], 3, out_sz, query_out,
+ cfg_addr, BUID_HI(buid), BUID_LO(buid));
+ dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x returned %d\n",
+ ddw_avail[DDW_QUERY_PE_DMA_WIN], cfg_addr, BUID_HI(buid),
+ BUID_LO(buid), ret);
+
+ switch (out_sz) {
+ case 5:
+ query->windows_available = query_out[0];
+ query->largest_available_block = query_out[1];
+ query->page_size = query_out[2];
+ query->migration_capable = query_out[3];
+ break;
+ case 6:
+ query->windows_available = query_out[0];
+ query->largest_available_block = ((u64)query_out[1] << 32) |
+ query_out[2];
+ query->page_size = query_out[3];
+ query->migration_capable = query_out[4];
+ break;
+ }
+
return ret;
}
@@ -959,15 +1017,16 @@
do {
/* extra outputs are LIOBN and dma-addr (hi, lo) */
- ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create,
- cfg_addr, BUID_HI(buid), BUID_LO(buid),
- page_shift, window_shift);
+ ret = rtas_call(ddw_avail[DDW_CREATE_PE_DMA_WIN], 5, 4,
+ (u32 *)create, cfg_addr, BUID_HI(buid),
+ BUID_LO(buid), page_shift, window_shift);
} while (rtas_busy_delay(ret));
dev_info(&dev->dev,
"ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d "
- "(liobn = 0x%x starting addr = %x %x)\n", ddw_avail[1],
- cfg_addr, BUID_HI(buid), BUID_LO(buid), page_shift,
- window_shift, ret, create->liobn, create->addr_hi, create->addr_lo);
+ "(liobn = 0x%x starting addr = %x %x)\n",
+ ddw_avail[DDW_CREATE_PE_DMA_WIN], cfg_addr, BUID_HI(buid),
+ BUID_LO(buid), page_shift, window_shift, ret, create->liobn,
+ create->addr_hi, create->addr_lo);
return ret;
}
@@ -979,6 +1038,66 @@
static LIST_HEAD(failed_ddw_pdn_list);
+static phys_addr_t ddw_memory_hotplug_max(void)
+{
+ phys_addr_t max_addr = memory_hotplug_max();
+ struct device_node *memory;
+
+ for_each_node_by_type(memory, "memory") {
+ unsigned long start, size;
+ int n_mem_addr_cells, n_mem_size_cells, len;
+ const __be32 *memcell_buf;
+
+ memcell_buf = of_get_property(memory, "reg", &len);
+ if (!memcell_buf || len <= 0)
+ continue;
+
+ n_mem_addr_cells = of_n_addr_cells(memory);
+ n_mem_size_cells = of_n_size_cells(memory);
+
+ start = of_read_number(memcell_buf, n_mem_addr_cells);
+ memcell_buf += n_mem_addr_cells;
+ size = of_read_number(memcell_buf, n_mem_size_cells);
+ memcell_buf += n_mem_size_cells;
+
+ max_addr = max_t(phys_addr_t, max_addr, start + size);
+ }
+
+ return max_addr;
+}
+
+/*
+ * Platforms supporting the DDW option starting with LoPAR level 2.7 implement
+ * ibm,ddw-extensions, which carries the rtas token for
+ * ibm,reset-pe-dma-windows.
+ * That rtas-call can be used to restore the default DMA window for the device.
+ */
+static void reset_dma_window(struct pci_dev *dev, struct device_node *par_dn)
+{
+ int ret;
+ u32 cfg_addr, reset_dma_win;
+ u64 buid;
+ struct device_node *dn;
+ struct pci_dn *pdn;
+
+ ret = ddw_read_ext(par_dn, DDW_EXT_RESET_DMA_WIN, &reset_dma_win);
+ if (ret)
+ return;
+
+ dn = pci_device_to_OF_node(dev);
+ pdn = PCI_DN(dn);
+ buid = pdn->phb->buid;
+ cfg_addr = (pdn->busno << 16) | (pdn->devfn << 8);
+
+ ret = rtas_call(reset_dma_win, 3, 1, NULL, cfg_addr, BUID_HI(buid),
+ BUID_LO(buid));
+ if (ret)
+ dev_info(&dev->dev,
+ "ibm,reset-pe-dma-windows(%x) %x %x %x returned %d ",
+ reset_dma_win, cfg_addr, BUID_HI(buid), BUID_LO(buid),
+ ret);
+}
+
/*
* If the PE supports dynamic dma windows, and there is space for a table
* that can map all pages in a linear offset, then setup such a table,
@@ -988,7 +1107,7 @@
* pdn: the parent pe node with the ibm,dma_window property
* Future: also check if we can remap the base window for our base page size
*
- * returns the dma offset for use by dma_set_mask
+ * returns the dma offset for use by the direct mapped DMA code.
*/
static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
{
@@ -998,11 +1117,12 @@
int page_shift;
u64 dma_addr, max_addr;
struct device_node *dn;
- u32 ddw_avail[3];
+ u32 ddw_avail[DDW_APPLICABLE_SIZE];
struct direct_window *window;
struct property *win64;
struct dynamic_dma_window_prop *ddwprop;
struct failed_ddw_pdn *fpdn;
+ bool default_win_removed = false;
mutex_lock(&direct_window_init_mutex);
@@ -1031,7 +1151,7 @@
* the property is actually in the parent, not the PE
*/
ret = of_property_read_u32_array(pdn, "ibm,ddw-applicable",
- &ddw_avail[0], 3);
+ &ddw_avail[0], DDW_APPLICABLE_SIZE);
if (ret)
goto out_failed;
@@ -1042,18 +1162,42 @@
* of page sizes: supported and supported for migrate-dma.
*/
dn = pci_device_to_OF_node(dev);
- ret = query_ddw(dev, ddw_avail, &query);
+ ret = query_ddw(dev, ddw_avail, &query, pdn);
if (ret != 0)
goto out_failed;
+ /*
+ * If there is no window available, remove the default DMA window,
+ * if it's present. This will make all the resources available to the
+ * new DDW window.
+ * If anything fails after this, we need to restore it, so also check
+ * for extensions presence.
+ */
if (query.windows_available == 0) {
- /*
- * no additional windows are available for this device.
- * We might be able to reallocate the existing window,
- * trading in for a larger page size.
- */
- dev_dbg(&dev->dev, "no free dynamic windows");
- goto out_failed;
+ struct property *default_win;
+ int reset_win_ext;
+
+ default_win = of_find_property(pdn, "ibm,dma-window", NULL);
+ if (!default_win)
+ goto out_failed;
+
+ reset_win_ext = ddw_read_ext(pdn, DDW_EXT_RESET_DMA_WIN, NULL);
+ if (reset_win_ext)
+ goto out_failed;
+
+ remove_dma_window(pdn, ddw_avail, default_win);
+ default_win_removed = true;
+
+ /* Query again, to check if the window is available */
+ ret = query_ddw(dev, ddw_avail, &query, pdn);
+ if (ret != 0)
+ goto out_failed;
+
+ if (query.windows_available == 0) {
+ /* no windows are available for this device. */
+ dev_dbg(&dev->dev, "no free dynamic windows");
+ goto out_failed;
+ }
}
if (query.page_size & 4) {
page_shift = 24; /* 16MB */
@@ -1068,9 +1212,9 @@
}
/* verify the window * number of ptes will map the partition */
/* check largest block * page size > max memory hotplug addr */
- max_addr = memory_hotplug_max();
+ max_addr = ddw_memory_hotplug_max();
if (query.largest_available_block < (max_addr >> page_shift)) {
- dev_dbg(&dev->dev, "can't map partition max 0x%llx with %u "
+ dev_dbg(&dev->dev, "can't map partition max 0x%llx with %llu "
"%llu-sized pages\n", max_addr, query.largest_available_block,
1ULL << page_shift);
goto out_failed;
@@ -1144,6 +1288,8 @@
kfree(win64);
out_failed:
+ if (default_win_removed)
+ reset_dma_window(dev, pdn);
fpdn = kzalloc(sizeof(*fpdn), GFP_KERNEL);
if (!fpdn)
@@ -1196,7 +1342,7 @@
iommu_table_setparms_lpar(pci->phb, pdn, tbl,
pci->table_group, dma_window);
tbl->it_ops = &iommu_table_lpar_multi_ops;
- iommu_init_table(tbl, pci->phb->node);
+ iommu_init_table(tbl, pci->phb->node, 0, 0);
iommu_register_group(pci->table_group,
pci_domain_nr(pci->phb->bus), 0);
pr_debug(" created table: %p\n", pci->table_group);
@@ -1205,90 +1351,40 @@
}
set_iommu_table_base(&dev->dev, pci->table_group->tables[0]);
- iommu_add_device(&dev->dev);
+ iommu_add_device(pci->table_group, &dev->dev);
}
-static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
+static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask)
{
- bool ddw_enabled = false;
- struct device_node *pdn, *dn;
- struct pci_dev *pdev;
+ struct device_node *dn = pci_device_to_OF_node(pdev), *pdn;
const __be32 *dma_window = NULL;
- u64 dma_offset;
-
- if (!dev->dma_mask)
- return -EIO;
-
- if (!dev_is_pci(dev))
- goto check_mask;
-
- pdev = to_pci_dev(dev);
/* only attempt to use a new window if 64-bit DMA is requested */
- if (!disable_ddw && dma_mask == DMA_BIT_MASK(64)) {
- dn = pci_device_to_OF_node(pdev);
- dev_dbg(dev, "node is %pOF\n", dn);
+ if (dma_mask < DMA_BIT_MASK(64))
+ return false;
- /*
- * the device tree might contain the dma-window properties
- * per-device and not necessarily for the bus. So we need to
- * search upwards in the tree until we either hit a dma-window
- * property, OR find a parent with a table already allocated.
- */
- for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group;
- pdn = pdn->parent) {
- dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
- if (dma_window)
- break;
- }
- if (pdn && PCI_DN(pdn)) {
- dma_offset = enable_ddw(pdev, pdn);
- if (dma_offset != 0) {
- dev_info(dev, "Using 64-bit direct DMA at offset %llx\n", dma_offset);
- set_dma_offset(dev, dma_offset);
- set_dma_ops(dev, &dma_nommu_ops);
- ddw_enabled = true;
- }
- }
+ dev_dbg(&pdev->dev, "node is %pOF\n", dn);
+
+ /*
+ * the device tree might contain the dma-window properties
+ * per-device and not necessarily for the bus. So we need to
+ * search upwards in the tree until we either hit a dma-window
+ * property, OR find a parent with a table already allocated.
+ */
+ for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group;
+ pdn = pdn->parent) {
+ dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
+ if (dma_window)
+ break;
}
- /* fall back on iommu ops */
- if (!ddw_enabled && get_dma_ops(dev) != &dma_iommu_ops) {
- dev_info(dev, "Restoring 32-bit DMA via iommu\n");
- set_dma_ops(dev, &dma_iommu_ops);
+ if (pdn && PCI_DN(pdn)) {
+ pdev->dev.archdata.dma_offset = enable_ddw(pdev, pdn);
+ if (pdev->dev.archdata.dma_offset)
+ return true;
}
-check_mask:
- if (!dma_supported(dev, dma_mask))
- return -EIO;
-
- *dev->dma_mask = dma_mask;
- return 0;
-}
-
-static u64 dma_get_required_mask_pSeriesLP(struct device *dev)
-{
- if (!dev->dma_mask)
- return 0;
-
- if (!disable_ddw && dev_is_pci(dev)) {
- struct pci_dev *pdev = to_pci_dev(dev);
- struct device_node *dn;
-
- dn = pci_device_to_OF_node(pdev);
-
- /* search upwards for ibm,dma-window */
- for (; dn && PCI_DN(dn) && !PCI_DN(dn)->table_group;
- dn = dn->parent)
- if (of_get_property(dn, "ibm,dma-window", NULL))
- break;
- /* if there is a ibm,ddw-applicable property require 64 bits */
- if (dn && PCI_DN(dn) &&
- of_get_property(dn, "ibm,ddw-applicable", NULL))
- return DMA_BIT_MASK(64);
- }
-
- return dma_iommu_ops.get_required_mask(dev);
+ return false;
}
static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action,
@@ -1383,8 +1479,9 @@
if (firmware_has_feature(FW_FEATURE_LPAR)) {
pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeriesLP;
pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeriesLP;
- ppc_md.dma_set_mask = dma_set_mask_pSeriesLP;
- ppc_md.dma_get_required_mask = dma_get_required_mask_pSeriesLP;
+ if (!disable_ddw)
+ pseries_pci_controller_ops.iommu_bypass_supported =
+ iommu_bypass_supported_pSeriesLP;
} else {
pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeries;
pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeries;
@@ -1401,13 +1498,38 @@
{
if (strcmp(str, "off") == 0 &&
firmware_has_feature(FW_FEATURE_LPAR) &&
- firmware_has_feature(FW_FEATURE_MULTITCE)) {
+ (firmware_has_feature(FW_FEATURE_PUT_TCE_IND) ||
+ firmware_has_feature(FW_FEATURE_STUFF_TCE))) {
printk(KERN_INFO "Disabling MULTITCE firmware feature\n");
- powerpc_firmware_features &= ~FW_FEATURE_MULTITCE;
+ powerpc_firmware_features &=
+ ~(FW_FEATURE_PUT_TCE_IND | FW_FEATURE_STUFF_TCE);
}
return 1;
}
__setup("multitce=", disable_multitce);
+static int tce_iommu_bus_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct device *dev = data;
+
+ switch (action) {
+ case BUS_NOTIFY_DEL_DEVICE:
+ iommu_del_device(dev);
+ return 0;
+ default:
+ return 0;
+ }
+}
+
+static struct notifier_block tce_iommu_bus_nb = {
+ .notifier_call = tce_iommu_bus_notifier,
+};
+
+static int __init tce_iommu_bus_notifier_init(void)
+{
+ bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb);
+ return 0;
+}
machine_subsys_initcall_sync(pseries, tce_iommu_bus_notifier_init);
--
Gitblit v1.6.2