From b22da3d8526a935aa31e086e63f60ff3246cb61c Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Sat, 09 Dec 2023 07:24:11 +0000 Subject: [PATCH] add stmac read mac form eeprom --- kernel/drivers/iommu/iommu.c | 1829 ++++++++++++++++++++++++++++++++++++++++++++++------------ 1 files changed, 1,438 insertions(+), 391 deletions(-) diff --git a/kernel/drivers/iommu/iommu.c b/kernel/drivers/iommu/iommu.c index 8f3c832..793c528 100644 --- a/kernel/drivers/iommu/iommu.c +++ b/kernel/drivers/iommu/iommu.c @@ -1,28 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. * Author: Joerg Roedel <jroedel@suse.de> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #define pr_fmt(fmt) "iommu: " fmt #include <linux/device.h> #include <linux/kernel.h> +#include <linux/bits.h> #include <linux/bug.h> #include <linux/types.h> -#include <linux/module.h> +#include <linux/init.h> +#include <linux/export.h> #include <linux/slab.h> #include <linux/errno.h> #include <linux/iommu.h> @@ -32,19 +22,16 @@ #include <linux/pci.h> #include <linux/bitops.h> #include <linux/property.h> +#include <linux/fsl/mc.h> +#include <linux/module.h> #include <trace/events/iommu.h> static struct kset *iommu_group_kset; static DEFINE_IDA(iommu_group_ida); -#ifdef CONFIG_IOMMU_DEFAULT_PASSTHROUGH -static unsigned int iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY; -#else -static unsigned int iommu_def_domain_type = IOMMU_DOMAIN_DMA; -#endif -struct iommu_callback_data { - const struct iommu_ops *ops; -}; +static unsigned int iommu_def_domain_type __read_mostly; +static bool iommu_dma_strict __read_mostly = true; +static u32 iommu_cmd_line __read_mostly; struct iommu_group { struct kobject kobj; @@ -58,6 +45,7 @@ int id; struct iommu_domain *default_domain; struct iommu_domain *domain; + struct list_head entry; }; struct group_device { @@ -74,11 +62,38 @@ }; static const char * const iommu_group_resv_type_string[] = { - [IOMMU_RESV_DIRECT] = "direct", - [IOMMU_RESV_RESERVED] = "reserved", - [IOMMU_RESV_MSI] = "msi", - [IOMMU_RESV_SW_MSI] = "msi", + [IOMMU_RESV_DIRECT] = "direct", + [IOMMU_RESV_DIRECT_RELAXABLE] = "direct-relaxable", + [IOMMU_RESV_RESERVED] = "reserved", + [IOMMU_RESV_MSI] = "msi", + [IOMMU_RESV_SW_MSI] = "msi", }; + +#define IOMMU_CMD_LINE_DMA_API BIT(0) + +static void iommu_set_cmd_line_dma_api(void) +{ + iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API; +} + +static bool iommu_cmd_line_dma_api(void) +{ + return !!(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API); +} + +static int iommu_alloc_default_domain(struct iommu_group *group, + struct device *dev); +static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus, + unsigned type); +static int __iommu_attach_device(struct iommu_domain *domain, + struct device *dev); +static int __iommu_attach_group(struct iommu_domain *domain, + struct iommu_group *group); +static void __iommu_detach_group(struct iommu_domain *domain, + struct iommu_group *group); +static int iommu_create_device_direct_mappings(struct iommu_group *group, + struct device *dev); +static struct iommu_group *iommu_group_get_for_dev(struct device *dev); #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \ struct iommu_group_attribute iommu_group_attr_##_name = \ @@ -92,12 +107,55 @@ static LIST_HEAD(iommu_device_list); static DEFINE_SPINLOCK(iommu_device_lock); +/* + * Use a function instead of an array here because the domain-type is a + * bit-field, so an array would waste memory. + */ +static const char *iommu_domain_type_str(unsigned int t) +{ + switch (t) { + case IOMMU_DOMAIN_BLOCKED: + return "Blocked"; + case IOMMU_DOMAIN_IDENTITY: + return "Passthrough"; + case IOMMU_DOMAIN_UNMANAGED: + return "Unmanaged"; + case IOMMU_DOMAIN_DMA: + return "Translated"; + default: + return "Unknown"; + } +} + +static int __init iommu_subsys_init(void) +{ + bool cmd_line = iommu_cmd_line_dma_api(); + + if (!cmd_line) { + if (IS_ENABLED(CONFIG_IOMMU_DEFAULT_PASSTHROUGH)) + iommu_set_default_passthrough(false); + else + iommu_set_default_translated(false); + + if (iommu_default_passthrough() && mem_encrypt_active()) { + pr_info("Memory encryption detected - Disabling default IOMMU Passthrough\n"); + iommu_set_default_translated(false); + } + } + + pr_info("Default domain type: %s %s\n", + iommu_domain_type_str(iommu_def_domain_type), + cmd_line ? "(set via kernel command line)" : ""); + + return 0; +} +subsys_initcall(iommu_subsys_init); + int iommu_device_register(struct iommu_device *iommu) { spin_lock(&iommu_device_lock); list_add_tail(&iommu->list, &iommu_device_list); spin_unlock(&iommu_device_lock); - return 0; } EXPORT_SYMBOL_GPL(iommu_device_register); @@ -110,14 +168,151 @@ } EXPORT_SYMBOL_GPL(iommu_device_unregister); -static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus, - unsigned type); -static int __iommu_attach_device(struct iommu_domain *domain, - struct device *dev); -static int __iommu_attach_group(struct iommu_domain *domain, - struct iommu_group *group); -static void __iommu_detach_group(struct iommu_domain *domain, - struct iommu_group *group); +static struct dev_iommu *dev_iommu_get(struct device *dev) +{ + struct dev_iommu *param = dev->iommu; + + if (param) + return param; + + param = kzalloc(sizeof(*param), GFP_KERNEL); + if (!param) + return NULL; + + mutex_init(¶m->lock); + dev->iommu = param; + return param; +} + +static void dev_iommu_free(struct device *dev) +{ + struct dev_iommu *param = dev->iommu; + + dev->iommu = NULL; + if (param->fwspec) { + fwnode_handle_put(param->fwspec->iommu_fwnode); + kfree(param->fwspec); + } + kfree(param); +} + +static int __iommu_probe_device(struct device *dev, struct list_head *group_list) +{ + const struct iommu_ops *ops = dev->bus->iommu_ops; + struct iommu_device *iommu_dev; + struct iommu_group *group; + int ret; + + if (!ops) + return -ENODEV; + + if (!dev_iommu_get(dev)) + return -ENOMEM; + + if (!try_module_get(ops->owner)) { + ret = -EINVAL; + goto err_free; + } + + iommu_dev = ops->probe_device(dev); + if (IS_ERR(iommu_dev)) { + ret = PTR_ERR(iommu_dev); + goto out_module_put; + } + + dev->iommu->iommu_dev = iommu_dev; + + group = iommu_group_get_for_dev(dev); + if (IS_ERR(group)) { + ret = PTR_ERR(group); + goto out_release; + } + iommu_group_put(group); + + if (group_list && !group->default_domain && list_empty(&group->entry)) + list_add_tail(&group->entry, group_list); + + iommu_device_link(iommu_dev, dev); + + return 0; + +out_release: + ops->release_device(dev); + +out_module_put: + module_put(ops->owner); + +err_free: + dev_iommu_free(dev); + + return ret; +} + +int iommu_probe_device(struct device *dev) +{ + const struct iommu_ops *ops = dev->bus->iommu_ops; + struct iommu_group *group; + int ret; + + ret = __iommu_probe_device(dev, NULL); + if (ret) + goto err_out; + + group = iommu_group_get(dev); + if (!group) + goto err_release; + + /* + * Try to allocate a default domain - needs support from the + * IOMMU driver. There are still some drivers which don't + * support default domains, so the return value is not yet + * checked. + */ + mutex_lock(&group->mutex); + iommu_alloc_default_domain(group, dev); + + if (group->default_domain) { + ret = __iommu_attach_device(group->default_domain, dev); + if (ret) { + mutex_unlock(&group->mutex); + iommu_group_put(group); + goto err_release; + } + } + + iommu_create_device_direct_mappings(group, dev); + + mutex_unlock(&group->mutex); + iommu_group_put(group); + + if (ops->probe_finalize) + ops->probe_finalize(dev); + + return 0; + +err_release: + iommu_release_device(dev); + +err_out: + return ret; + +} + +void iommu_release_device(struct device *dev) +{ + const struct iommu_ops *ops = dev->bus->iommu_ops; + + if (!dev->iommu) + return; + + iommu_device_unlink(dev->iommu->iommu_dev, dev); + + ops->release_device(dev); + + iommu_group_remove_device(dev); + module_put(ops->owner); + dev_iommu_free(dev); +} static int __init iommu_set_def_domain_type(char *str) { @@ -128,10 +323,20 @@ if (ret) return ret; - iommu_def_domain_type = pt ? IOMMU_DOMAIN_IDENTITY : IOMMU_DOMAIN_DMA; + if (pt) + iommu_set_default_passthrough(true); + else + iommu_set_default_translated(true); + return 0; } early_param("iommu.passthrough", iommu_set_def_domain_type); + +static int __init iommu_dma_setup(char *str) +{ + return kstrtobool(str, &iommu_dma_strict); +} +early_param("iommu.strict", iommu_dma_setup); static ssize_t iommu_group_attr_show(struct kobject *kobj, struct attribute *__attr, char *buf) @@ -186,60 +391,58 @@ * @new: new region to insert * @regions: list of regions * - * The new element is sorted by address with respect to the other - * regions of the same type. In case it overlaps with another - * region of the same type, regions are merged. In case it - * overlaps with another region of different type, regions are - * not merged. + * Elements are sorted by start address and overlapping segments + * of the same type are merged. */ static int iommu_insert_resv_region(struct iommu_resv_region *new, struct list_head *regions) { - struct iommu_resv_region *region; - phys_addr_t start = new->start; - phys_addr_t end = new->start + new->length - 1; - struct list_head *pos = regions->next; + struct iommu_resv_region *iter, *tmp, *nr, *top; + LIST_HEAD(stack); - while (pos != regions) { - struct iommu_resv_region *entry = - list_entry(pos, struct iommu_resv_region, list); - phys_addr_t a = entry->start; - phys_addr_t b = entry->start + entry->length - 1; - int type = entry->type; - - if (end < a) { - goto insert; - } else if (start > b) { - pos = pos->next; - } else if ((start >= a) && (end <= b)) { - if (new->type == type) - return 0; - else - pos = pos->next; - } else { - if (new->type == type) { - phys_addr_t new_start = min(a, start); - phys_addr_t new_end = max(b, end); - int ret; - - list_del(&entry->list); - entry->start = new_start; - entry->length = new_end - new_start + 1; - ret = iommu_insert_resv_region(entry, regions); - kfree(entry); - return ret; - } else { - pos = pos->next; - } - } - } -insert: - region = iommu_alloc_resv_region(new->start, new->length, - new->prot, new->type); - if (!region) + nr = iommu_alloc_resv_region(new->start, new->length, + new->prot, new->type); + if (!nr) return -ENOMEM; - list_add_tail(®ion->list, pos); + /* First add the new element based on start address sorting */ + list_for_each_entry(iter, regions, list) { + if (nr->start < iter->start || + (nr->start == iter->start && nr->type <= iter->type)) + break; + } + list_add_tail(&nr->list, &iter->list); + + /* Merge overlapping segments of type nr->type in @regions, if any */ + list_for_each_entry_safe(iter, tmp, regions, list) { + phys_addr_t top_end, iter_end = iter->start + iter->length - 1; + + /* no merge needed on elements of different types than @new */ + if (iter->type != new->type) { + list_move_tail(&iter->list, &stack); + continue; + } + + /* look for the last stack element of same type as @iter */ + list_for_each_entry_reverse(top, &stack, list) + if (top->type == iter->type) + goto check_overlap; + + list_move_tail(&iter->list, &stack); + continue; + +check_overlap: + top_end = top->start + top->length - 1; + + if (iter->start > top_end + 1) { + list_move_tail(&iter->list, &stack); + } else { + top->length = max(top_end, iter_end) - top->start + 1; + list_del(&iter->list); + kfree(iter); + } + } + list_splice(&stack, regions); return 0; } @@ -381,6 +584,7 @@ group->kobj.kset = iommu_group_kset; mutex_init(&group->mutex); INIT_LIST_HEAD(&group->devices); + INIT_LIST_HEAD(&group->entry); BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier); ret = ida_simple_get(&iommu_group_ida, 0, 0, GFP_KERNEL); @@ -522,8 +726,8 @@ } EXPORT_SYMBOL_GPL(iommu_group_set_name); -static int iommu_group_create_direct_mappings(struct iommu_group *group, - struct device *dev) +static int iommu_create_device_direct_mappings(struct iommu_group *group, + struct device *dev) { struct iommu_domain *domain = group->default_domain; struct iommu_resv_region *entry; @@ -551,7 +755,8 @@ start = ALIGN(entry->start, pg_size); end = ALIGN(entry->start + entry->length, pg_size); - if (entry->type != IOMMU_RESV_DIRECT) + if (entry->type != IOMMU_RESV_DIRECT && + entry->type != IOMMU_RESV_DIRECT_RELAXABLE) continue; for (addr = start; addr < end; addr += pg_size) { @@ -568,12 +773,21 @@ } - iommu_flush_tlb_all(domain); + iommu_flush_iotlb_all(domain); out: iommu_put_resv_regions(dev, &mappings); return ret; +} + +static bool iommu_is_attach_deferred(struct iommu_domain *domain, + struct device *dev) +{ + if (domain->ops->is_attach_deferred) + return domain->ops->is_attach_deferred(domain, dev); + + return false; } /** @@ -626,11 +840,9 @@ dev->iommu_group = group; - iommu_group_create_direct_mappings(group, dev); - mutex_lock(&group->mutex); list_add_tail(&device->list, &group->devices); - if (group->domain) + if (group->domain && !iommu_is_attach_deferred(group->domain, dev)) ret = __iommu_attach_device(group->domain, dev); mutex_unlock(&group->mutex); if (ret) @@ -642,7 +854,7 @@ trace_add_device_to_group(group->id, dev); - pr_info("Adding device %s to group %d\n", dev_name(dev), group->id); + dev_info(dev, "Adding to iommu group %d\n", group->id); return 0; @@ -659,7 +871,7 @@ sysfs_remove_link(&dev->kobj, "iommu_group"); err_free_device: kfree(device); - pr_err("Failed to add device %s to group %d: %d\n", dev_name(dev), group->id, ret); + dev_err(dev, "Failed to add to iommu group %d: %d\n", group->id, ret); return ret; } EXPORT_SYMBOL_GPL(iommu_group_add_device); @@ -676,7 +888,10 @@ struct iommu_group *group = dev->iommu_group; struct group_device *tmp_device, *device = NULL; - pr_info("Removing device %s from group %d\n", dev_name(dev), group->id); + if (!group) + return; + + dev_info(dev, "Removing from iommu group %d\n", group->id); /* Pre-notify listeners that a device is being removed. */ blocking_notifier_call_chain(&group->notifier, @@ -833,6 +1048,217 @@ return blocking_notifier_chain_unregister(&group->notifier, nb); } EXPORT_SYMBOL_GPL(iommu_group_unregister_notifier); + +/** + * iommu_register_device_fault_handler() - Register a device fault handler + * @dev: the device + * @handler: the fault handler + * @data: private data passed as argument to the handler + * + * When an IOMMU fault event is received, this handler gets called with the + * fault event and data as argument. The handler should return 0 on success. If + * the fault is recoverable (IOMMU_FAULT_PAGE_REQ), the consumer should also + * complete the fault by calling iommu_page_response() with one of the following + * response code: + * - IOMMU_PAGE_RESP_SUCCESS: retry the translation + * - IOMMU_PAGE_RESP_INVALID: terminate the fault + * - IOMMU_PAGE_RESP_FAILURE: terminate the fault and stop reporting + * page faults if possible. + * + * Return 0 if the fault handler was installed successfully, or an error. + */ +int iommu_register_device_fault_handler(struct device *dev, + iommu_dev_fault_handler_t handler, + void *data) +{ + struct dev_iommu *param = dev->iommu; + int ret = 0; + + if (!param) + return -EINVAL; + + mutex_lock(¶m->lock); + /* Only allow one fault handler registered for each device */ + if (param->fault_param) { + ret = -EBUSY; + goto done_unlock; + } + + get_device(dev); + param->fault_param = kzalloc(sizeof(*param->fault_param), GFP_KERNEL); + if (!param->fault_param) { + put_device(dev); + ret = -ENOMEM; + goto done_unlock; + } + param->fault_param->handler = handler; + param->fault_param->data = data; + mutex_init(¶m->fault_param->lock); + INIT_LIST_HEAD(¶m->fault_param->faults); + +done_unlock: + mutex_unlock(¶m->lock); + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_register_device_fault_handler); + +/** + * iommu_unregister_device_fault_handler() - Unregister the device fault handler + * @dev: the device + * + * Remove the device fault handler installed with + * iommu_register_device_fault_handler(). + * + * Return 0 on success, or an error. + */ +int iommu_unregister_device_fault_handler(struct device *dev) +{ + struct dev_iommu *param = dev->iommu; + int ret = 0; + + if (!param) + return -EINVAL; + + mutex_lock(¶m->lock); + + if (!param->fault_param) + goto unlock; + + /* we cannot unregister handler if there are pending faults */ + if (!list_empty(¶m->fault_param->faults)) { + ret = -EBUSY; + goto unlock; + } + + kfree(param->fault_param); + param->fault_param = NULL; + put_device(dev); +unlock: + mutex_unlock(¶m->lock); + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_unregister_device_fault_handler); + +/** + * iommu_report_device_fault() - Report fault event to device driver + * @dev: the device + * @evt: fault event data + * + * Called by IOMMU drivers when a fault is detected, typically in a threaded IRQ + * handler. When this function fails and the fault is recoverable, it is the + * caller's responsibility to complete the fault. + * + * Return 0 on success, or an error. + */ +int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) +{ + struct dev_iommu *param = dev->iommu; + struct iommu_fault_event *evt_pending = NULL; + struct iommu_fault_param *fparam; + int ret = 0; + + if (!param || !evt) + return -EINVAL; + + /* we only report device fault if there is a handler registered */ + mutex_lock(¶m->lock); + fparam = param->fault_param; + if (!fparam || !fparam->handler) { + ret = -EINVAL; + goto done_unlock; + } + + if (evt->fault.type == IOMMU_FAULT_PAGE_REQ && + (evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { + evt_pending = kmemdup(evt, sizeof(struct iommu_fault_event), + GFP_KERNEL); + if (!evt_pending) { + ret = -ENOMEM; + goto done_unlock; + } + mutex_lock(&fparam->lock); + list_add_tail(&evt_pending->list, &fparam->faults); + mutex_unlock(&fparam->lock); + } + + ret = fparam->handler(&evt->fault, fparam->data); + if (ret && evt_pending) { + mutex_lock(&fparam->lock); + list_del(&evt_pending->list); + mutex_unlock(&fparam->lock); + kfree(evt_pending); + } +done_unlock: + mutex_unlock(¶m->lock); + return ret; +} +EXPORT_SYMBOL_GPL(iommu_report_device_fault); + +int iommu_page_response(struct device *dev, + struct iommu_page_response *msg) +{ + bool needs_pasid; + int ret = -EINVAL; + struct iommu_fault_event *evt; + struct iommu_fault_page_request *prm; + struct dev_iommu *param = dev->iommu; + bool has_pasid = msg->flags & IOMMU_PAGE_RESP_PASID_VALID; + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + + if (!domain || !domain->ops->page_response) + return -ENODEV; + + if (!param || !param->fault_param) + return -EINVAL; + + if (msg->version != IOMMU_PAGE_RESP_VERSION_1 || + msg->flags & ~IOMMU_PAGE_RESP_PASID_VALID) + return -EINVAL; + + /* Only send response if there is a fault report pending */ + mutex_lock(¶m->fault_param->lock); + if (list_empty(¶m->fault_param->faults)) { + dev_warn_ratelimited(dev, "no pending PRQ, drop response\n"); + goto done_unlock; + } + /* + * Check if we have a matching page request pending to respond, + * otherwise return -EINVAL + */ + list_for_each_entry(evt, ¶m->fault_param->faults, list) { + prm = &evt->fault.prm; + if (prm->grpid != msg->grpid) + continue; + + /* + * If the PASID is required, the corresponding request is + * matched using the group ID, the PASID valid bit and the PASID + * value. Otherwise only the group ID matches request and + * response. + */ + needs_pasid = prm->flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID; + if (needs_pasid && (!has_pasid || msg->pasid != prm->pasid)) + continue; + + if (!needs_pasid && has_pasid) { + /* No big deal, just clear it. */ + msg->flags &= ~IOMMU_PAGE_RESP_PASID_VALID; + msg->pasid = 0; + } + + ret = domain->ops->page_response(dev, evt, msg); + list_del(&evt->list); + kfree(evt); + break; + } + +done_unlock: + mutex_unlock(¶m->fault_param->lock); + return ret; +} +EXPORT_SYMBOL_GPL(iommu_page_response); /** * iommu_group_id - Return ID for a group @@ -1032,6 +1458,74 @@ } EXPORT_SYMBOL_GPL(pci_device_group); +/* Get the IOMMU group for device on fsl-mc bus */ +struct iommu_group *fsl_mc_device_group(struct device *dev) +{ + struct device *cont_dev = fsl_mc_cont_dev(dev); + struct iommu_group *group; + + group = iommu_group_get(cont_dev); + if (!group) + group = iommu_group_alloc(); + return group; +} +EXPORT_SYMBOL_GPL(fsl_mc_device_group); + +static int iommu_get_def_domain_type(struct device *dev) +{ + const struct iommu_ops *ops = dev->bus->iommu_ops; + unsigned int type = 0; + + if (ops->def_domain_type) + type = ops->def_domain_type(dev); + + return (type == 0) ? iommu_def_domain_type : type; +} + +static int iommu_group_alloc_default_domain(struct bus_type *bus, + struct iommu_group *group, + unsigned int type) +{ + struct iommu_domain *dom; + + dom = __iommu_domain_alloc(bus, type); + if (!dom && type != IOMMU_DOMAIN_DMA) { + dom = __iommu_domain_alloc(bus, IOMMU_DOMAIN_DMA); + if (dom) + pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA", + type, group->name); + } + + if (!dom) + return -ENOMEM; + + group->default_domain = dom; + if (!group->domain) + group->domain = dom; + + if (!iommu_dma_strict) { + int attr = 1; + iommu_domain_set_attr(dom, + DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, + &attr); + } + + return 0; +} + +static int iommu_alloc_default_domain(struct iommu_group *group, + struct device *dev) +{ + unsigned int type; + + if (group->default_domain) + return 0; + + type = iommu_get_def_domain_type(dev); + + return iommu_group_alloc_default_domain(dev->bus, group, type); +} + /** * iommu_group_get_for_dev - Find or create the IOMMU group for a device * @dev: target device @@ -1042,7 +1536,7 @@ * to the returned IOMMU group, which will already include the provided * device. The reference should be released with iommu_group_put(). */ -struct iommu_group *iommu_group_get_for_dev(struct device *dev) +static struct iommu_group *iommu_group_get_for_dev(struct device *dev) { const struct iommu_ops *ops = dev->bus->iommu_ops; struct iommu_group *group; @@ -1062,60 +1556,37 @@ if (IS_ERR(group)) return group; - /* - * Try to allocate a default domain - needs support from the - * IOMMU driver. - */ - if (!group->default_domain) { - struct iommu_domain *dom; - - dom = __iommu_domain_alloc(dev->bus, iommu_def_domain_type); - if (!dom && iommu_def_domain_type != IOMMU_DOMAIN_DMA) { - dev_warn(dev, - "failed to allocate default IOMMU domain of type %u; falling back to IOMMU_DOMAIN_DMA", - iommu_def_domain_type); - dom = __iommu_domain_alloc(dev->bus, IOMMU_DOMAIN_DMA); - } - - group->default_domain = dom; - if (!group->domain) - group->domain = dom; - } - ret = iommu_group_add_device(group, dev); - if (ret) { - iommu_group_put(group); - return ERR_PTR(ret); - } + if (ret) + goto out_put_group; return group; + +out_put_group: + iommu_group_put(group); + + return ERR_PTR(ret); } -EXPORT_SYMBOL_GPL(iommu_group_get_for_dev); struct iommu_domain *iommu_group_default_domain(struct iommu_group *group) { return group->default_domain; } -EXPORT_SYMBOL_GPL(iommu_group_default_domain); -static int add_iommu_group(struct device *dev, void *data) +static int probe_iommu_group(struct device *dev, void *data) { - struct iommu_callback_data *cb = data; - const struct iommu_ops *ops = cb->ops; + struct list_head *group_list = data; + struct iommu_group *group; int ret; - if (!ops->add_device) + /* Device is probed already if in a group */ + group = iommu_group_get(dev); + if (group) { + iommu_group_put(group); return 0; + } - WARN_ON(dev->iommu_group); - - ret = ops->add_device(dev); - - /* - * We ignore -ENODEV errors for now, as they just mean that the - * device is not translated by an IOMMU. We still care about - * other errors and fail to initialize when they happen. - */ + ret = __iommu_probe_device(dev, group_list); if (ret == -ENODEV) ret = 0; @@ -1124,11 +1595,7 @@ static int remove_iommu_group(struct device *dev, void *data) { - struct iommu_callback_data *cb = data; - const struct iommu_ops *ops = cb->ops; - - if (ops->remove_device && dev->iommu_group) - ops->remove_device(dev); + iommu_release_device(dev); return 0; } @@ -1136,27 +1603,22 @@ static int iommu_bus_notifier(struct notifier_block *nb, unsigned long action, void *data) { - struct device *dev = data; - const struct iommu_ops *ops = dev->bus->iommu_ops; - struct iommu_group *group; unsigned long group_action = 0; + struct device *dev = data; + struct iommu_group *group; /* * ADD/DEL call into iommu driver ops if provided, which may * result in ADD/DEL notifiers to group->notifier */ if (action == BUS_NOTIFY_ADD_DEVICE) { - if (ops->add_device) { - int ret; + int ret; - ret = ops->add_device(dev); - return (ret) ? NOTIFY_DONE : NOTIFY_OK; - } + ret = iommu_probe_device(dev); + return (ret) ? NOTIFY_DONE : NOTIFY_OK; } else if (action == BUS_NOTIFY_REMOVED_DEVICE) { - if (ops->remove_device && dev->iommu_group) { - ops->remove_device(dev); - return 0; - } + iommu_release_device(dev); + return NOTIFY_OK; } /* @@ -1190,13 +1652,152 @@ return 0; } +struct __group_domain_type { + struct device *dev; + unsigned int type; +}; + +static int probe_get_default_domain_type(struct device *dev, void *data) +{ + const struct iommu_ops *ops = dev->bus->iommu_ops; + struct __group_domain_type *gtype = data; + unsigned int type = 0; + + if (ops->def_domain_type) + type = ops->def_domain_type(dev); + + if (type) { + if (gtype->type && gtype->type != type) { + dev_warn(dev, "Device needs domain type %s, but device %s in the same iommu group requires type %s - using default\n", + iommu_domain_type_str(type), + dev_name(gtype->dev), + iommu_domain_type_str(gtype->type)); + gtype->type = 0; + } + + if (!gtype->dev) { + gtype->dev = dev; + gtype->type = type; + } + } + + return 0; +} + +static void probe_alloc_default_domain(struct bus_type *bus, + struct iommu_group *group) +{ + struct __group_domain_type gtype; + + memset(>ype, 0, sizeof(gtype)); + + /* Ask for default domain requirements of all devices in the group */ + __iommu_group_for_each_dev(group, >ype, + probe_get_default_domain_type); + + if (!gtype.type) + gtype.type = iommu_def_domain_type; + + iommu_group_alloc_default_domain(bus, group, gtype.type); + +} + +static int iommu_group_do_dma_attach(struct device *dev, void *data) +{ + struct iommu_domain *domain = data; + int ret = 0; + + if (!iommu_is_attach_deferred(domain, dev)) + ret = __iommu_attach_device(domain, dev); + + return ret; +} + +static int __iommu_group_dma_attach(struct iommu_group *group) +{ + return __iommu_group_for_each_dev(group, group->default_domain, + iommu_group_do_dma_attach); +} + +static int iommu_group_do_probe_finalize(struct device *dev, void *data) +{ + struct iommu_domain *domain = data; + + if (domain->ops->probe_finalize) + domain->ops->probe_finalize(dev); + + return 0; +} + +static void __iommu_group_dma_finalize(struct iommu_group *group) +{ + __iommu_group_for_each_dev(group, group->default_domain, + iommu_group_do_probe_finalize); +} + +static int iommu_do_create_direct_mappings(struct device *dev, void *data) +{ + struct iommu_group *group = data; + + iommu_create_device_direct_mappings(group, dev); + + return 0; +} + +static int iommu_group_create_direct_mappings(struct iommu_group *group) +{ + return __iommu_group_for_each_dev(group, group, + iommu_do_create_direct_mappings); +} + +int bus_iommu_probe(struct bus_type *bus) +{ + struct iommu_group *group, *next; + LIST_HEAD(group_list); + int ret; + + /* + * This code-path does not allocate the default domain when + * creating the iommu group, so do it after the groups are + * created. + */ + ret = bus_for_each_dev(bus, NULL, &group_list, probe_iommu_group); + if (ret) + return ret; + + list_for_each_entry_safe(group, next, &group_list, entry) { + /* Remove item from the list */ + list_del_init(&group->entry); + + mutex_lock(&group->mutex); + + /* Try to allocate default domain */ + probe_alloc_default_domain(bus, group); + + if (!group->default_domain) { + mutex_unlock(&group->mutex); + continue; + } + + iommu_group_create_direct_mappings(group); + + ret = __iommu_group_dma_attach(group); + + mutex_unlock(&group->mutex); + + if (ret) + break; + + __iommu_group_dma_finalize(group); + } + + return ret; +} + static int iommu_bus_init(struct bus_type *bus, const struct iommu_ops *ops) { - int err; struct notifier_block *nb; - struct iommu_callback_data cb = { - .ops = ops, - }; + int err; nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL); if (!nb) @@ -1208,15 +1809,16 @@ if (err) goto out_free; - err = bus_for_each_dev(bus, NULL, &cb, add_iommu_group); + err = bus_iommu_probe(bus); if (err) goto out_err; + return 0; out_err: /* Clean up */ - bus_for_each_dev(bus, NULL, &cb, remove_iommu_group); + bus_for_each_dev(bus, NULL, NULL, remove_iommu_group); bus_unregister_notifier(bus, nb); out_free: @@ -1241,6 +1843,11 @@ int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops) { int err; + + if (ops == NULL) { + bus->iommu_ops = NULL; + return 0; + } if (bus->iommu_ops != NULL) return -EBUSY; @@ -1310,8 +1917,6 @@ domain->type = type; /* Assume all sizes by default; the driver may override this later */ domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap; - domain->is_debug_domain = false; - memset(domain->name, 0, IOMMU_DOMAIN_NAME_LEN); return domain; } @@ -1333,28 +1938,12 @@ { int ret; - /* Hack for disable iommu */ - if (!domain) { - ret = dev->bus->iommu_ops->attach_dev(domain, dev); - return ret; - } - - if ((domain->ops->is_attach_deferred != NULL) && - domain->ops->is_attach_deferred(domain, dev)) - return 0; - if (unlikely(domain->ops->attach_dev == NULL)) return -ENODEV; ret = domain->ops->attach_dev(domain, dev); - if (!ret) { + if (!ret) trace_attach_device_to_domain(dev); - - if (!strnlen(domain->name, IOMMU_DOMAIN_NAME_LEN)) { - strlcpy(domain->name, dev_name(dev), - IOMMU_DOMAIN_NAME_LEN); - } - } return ret; } @@ -1388,11 +1977,220 @@ } EXPORT_SYMBOL_GPL(iommu_attach_device); +/* + * Check flags and other user provided data for valid combinations. We also + * make sure no reserved fields or unused flags are set. This is to ensure + * not breaking userspace in the future when these fields or flags are used. + */ +static int iommu_check_cache_invl_data(struct iommu_cache_invalidate_info *info) +{ + u32 mask; + int i; + + if (info->version != IOMMU_CACHE_INVALIDATE_INFO_VERSION_1) + return -EINVAL; + + mask = (1 << IOMMU_CACHE_INV_TYPE_NR) - 1; + if (info->cache & ~mask) + return -EINVAL; + + if (info->granularity >= IOMMU_INV_GRANU_NR) + return -EINVAL; + + switch (info->granularity) { + case IOMMU_INV_GRANU_ADDR: + if (info->cache & IOMMU_CACHE_INV_TYPE_PASID) + return -EINVAL; + + mask = IOMMU_INV_ADDR_FLAGS_PASID | + IOMMU_INV_ADDR_FLAGS_ARCHID | + IOMMU_INV_ADDR_FLAGS_LEAF; + + if (info->granu.addr_info.flags & ~mask) + return -EINVAL; + break; + case IOMMU_INV_GRANU_PASID: + mask = IOMMU_INV_PASID_FLAGS_PASID | + IOMMU_INV_PASID_FLAGS_ARCHID; + if (info->granu.pasid_info.flags & ~mask) + return -EINVAL; + + break; + case IOMMU_INV_GRANU_DOMAIN: + if (info->cache & IOMMU_CACHE_INV_TYPE_DEV_IOTLB) + return -EINVAL; + break; + default: + return -EINVAL; + } + + /* Check reserved padding fields */ + for (i = 0; i < sizeof(info->padding); i++) { + if (info->padding[i]) + return -EINVAL; + } + + return 0; +} + +int iommu_uapi_cache_invalidate(struct iommu_domain *domain, struct device *dev, + void __user *uinfo) +{ + struct iommu_cache_invalidate_info inv_info = { 0 }; + u32 minsz; + int ret; + + if (unlikely(!domain->ops->cache_invalidate)) + return -ENODEV; + + /* + * No new spaces can be added before the variable sized union, the + * minimum size is the offset to the union. + */ + minsz = offsetof(struct iommu_cache_invalidate_info, granu); + + /* Copy minsz from user to get flags and argsz */ + if (copy_from_user(&inv_info, uinfo, minsz)) + return -EFAULT; + + /* Fields before the variable size union are mandatory */ + if (inv_info.argsz < minsz) + return -EINVAL; + + /* PASID and address granu require additional info beyond minsz */ + if (inv_info.granularity == IOMMU_INV_GRANU_PASID && + inv_info.argsz < offsetofend(struct iommu_cache_invalidate_info, granu.pasid_info)) + return -EINVAL; + + if (inv_info.granularity == IOMMU_INV_GRANU_ADDR && + inv_info.argsz < offsetofend(struct iommu_cache_invalidate_info, granu.addr_info)) + return -EINVAL; + + /* + * User might be using a newer UAPI header which has a larger data + * size, we shall support the existing flags within the current + * size. Copy the remaining user data _after_ minsz but not more + * than the current kernel supported size. + */ + if (copy_from_user((void *)&inv_info + minsz, uinfo + minsz, + min_t(u32, inv_info.argsz, sizeof(inv_info)) - minsz)) + return -EFAULT; + + /* Now the argsz is validated, check the content */ + ret = iommu_check_cache_invl_data(&inv_info); + if (ret) + return ret; + + return domain->ops->cache_invalidate(domain, dev, &inv_info); +} +EXPORT_SYMBOL_GPL(iommu_uapi_cache_invalidate); + +static int iommu_check_bind_data(struct iommu_gpasid_bind_data *data) +{ + u64 mask; + int i; + + if (data->version != IOMMU_GPASID_BIND_VERSION_1) + return -EINVAL; + + /* Check the range of supported formats */ + if (data->format >= IOMMU_PASID_FORMAT_LAST) + return -EINVAL; + + /* Check all flags */ + mask = IOMMU_SVA_GPASID_VAL; + if (data->flags & ~mask) + return -EINVAL; + + /* Check reserved padding fields */ + for (i = 0; i < sizeof(data->padding); i++) { + if (data->padding[i]) + return -EINVAL; + } + + return 0; +} + +static int iommu_sva_prepare_bind_data(void __user *udata, + struct iommu_gpasid_bind_data *data) +{ + u32 minsz; + + /* + * No new spaces can be added before the variable sized union, the + * minimum size is the offset to the union. + */ + minsz = offsetof(struct iommu_gpasid_bind_data, vendor); + + /* Copy minsz from user to get flags and argsz */ + if (copy_from_user(data, udata, minsz)) + return -EFAULT; + + /* Fields before the variable size union are mandatory */ + if (data->argsz < minsz) + return -EINVAL; + /* + * User might be using a newer UAPI header, we shall let IOMMU vendor + * driver decide on what size it needs. Since the guest PASID bind data + * can be vendor specific, larger argsz could be the result of extension + * for one vendor but it should not affect another vendor. + * Copy the remaining user data _after_ minsz + */ + if (copy_from_user((void *)data + minsz, udata + minsz, + min_t(u32, data->argsz, sizeof(*data)) - minsz)) + return -EFAULT; + + return iommu_check_bind_data(data); +} + +int iommu_uapi_sva_bind_gpasid(struct iommu_domain *domain, struct device *dev, + void __user *udata) +{ + struct iommu_gpasid_bind_data data = { 0 }; + int ret; + + if (unlikely(!domain->ops->sva_bind_gpasid)) + return -ENODEV; + + ret = iommu_sva_prepare_bind_data(udata, &data); + if (ret) + return ret; + + return domain->ops->sva_bind_gpasid(domain, dev, &data); +} +EXPORT_SYMBOL_GPL(iommu_uapi_sva_bind_gpasid); + +int iommu_sva_unbind_gpasid(struct iommu_domain *domain, struct device *dev, + ioasid_t pasid) +{ + if (unlikely(!domain->ops->sva_unbind_gpasid)) + return -ENODEV; + + return domain->ops->sva_unbind_gpasid(dev, pasid); +} +EXPORT_SYMBOL_GPL(iommu_sva_unbind_gpasid); + +int iommu_uapi_sva_unbind_gpasid(struct iommu_domain *domain, struct device *dev, + void __user *udata) +{ + struct iommu_gpasid_bind_data data = { 0 }; + int ret; + + if (unlikely(!domain->ops->sva_bind_gpasid)) + return -ENODEV; + + ret = iommu_sva_prepare_bind_data(udata, &data); + if (ret) + return ret; + + return iommu_sva_unbind_gpasid(domain, dev, data.hpasid); +} +EXPORT_SYMBOL_GPL(iommu_uapi_sva_unbind_gpasid); + static void __iommu_detach_device(struct iommu_domain *domain, struct device *dev) { - if ((domain->ops->is_attach_deferred != NULL) && - domain->ops->is_attach_deferred(domain, dev)) + if (iommu_is_attach_deferred(domain, dev)) return; if (unlikely(domain->ops->detach_dev == NULL)) @@ -1443,7 +2241,16 @@ EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev); /* - * IOMMU groups are really the natrual working unit of the IOMMU, but + * For IOMMU_DOMAIN_DMA implementations which already provide their own + * guarantees that the group and its default domain are valid and correct. + */ +struct iommu_domain *iommu_get_dma_domain(struct device *dev) +{ + return dev->iommu_group->default_domain; +} + +/* + * IOMMU groups are really the natural working unit of the IOMMU, but * the IOMMU API works on domains and devices. Bridge that gap by * iterating over the devices in a group. Ideally we'd have a single * device which represents the requestor ID of the group, but we also @@ -1463,6 +2270,9 @@ struct iommu_group *group) { int ret; + + if (group->default_domain && group->domain != group->default_domain) + return -EBUSY; ret = __iommu_group_for_each_dev(group, domain, iommu_group_do_attach_device); @@ -1493,18 +2303,28 @@ return 0; } -/* - * Although upstream implements detaching the default_domain as a noop, - * the "SID switch" secure usecase require complete removal of SIDS/SMRS - * from HLOS iommu registers. - */ static void __iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group) { - __iommu_group_for_each_dev(group, domain, + int ret; + + if (!group->default_domain) { + __iommu_group_for_each_dev(group, domain, iommu_group_do_detach_device); - group->domain = NULL; - return; + group->domain = NULL; + return; + } + + if (group->domain == group->default_domain) + return; + + /* Detach by re-attaching to the default domain */ + ret = __iommu_group_for_each_dev(group, group->default_domain, + iommu_group_do_attach_device); + if (ret != 0) + WARN_ON(1); + else + group->domain = group->default_domain; } void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group) @@ -1524,78 +2344,96 @@ } EXPORT_SYMBOL_GPL(iommu_iova_to_phys); -phys_addr_t iommu_iova_to_phys_hard(struct iommu_domain *domain, - dma_addr_t iova) +static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, size_t *count) { - if (unlikely(domain->ops->iova_to_phys_hard == NULL)) - return 0; + unsigned int pgsize_idx, pgsize_idx_next; + unsigned long pgsizes; + size_t offset, pgsize, pgsize_next; + unsigned long addr_merge = paddr | iova; - return domain->ops->iova_to_phys_hard(domain, iova); -} + /* Page sizes supported by the hardware and small enough for @size */ + pgsizes = domain->pgsize_bitmap & GENMASK(__fls(size), 0); -uint64_t iommu_iova_to_pte(struct iommu_domain *domain, - dma_addr_t iova) -{ - if (unlikely(domain->ops->iova_to_pte == NULL)) - return 0; + /* Constrain the page sizes further based on the maximum alignment */ + if (likely(addr_merge)) + pgsizes &= GENMASK(__ffs(addr_merge), 0); - return domain->ops->iova_to_pte(domain, iova); -} + /* Make sure we have at least one suitable page size */ + BUG_ON(!pgsizes); -bool iommu_is_iova_coherent(struct iommu_domain *domain, dma_addr_t iova) -{ - if (unlikely(domain->ops->is_iova_coherent == NULL)) - return 0; + /* Pick the biggest page size remaining */ + pgsize_idx = __fls(pgsizes); + pgsize = BIT(pgsize_idx); + if (!count) + return pgsize; - return domain->ops->is_iova_coherent(domain, iova); -} -size_t iommu_pgsize(unsigned long pgsize_bitmap, - unsigned long addr_merge, size_t size) -{ - unsigned int pgsize_idx; - size_t pgsize; + /* Find the next biggest support page size, if it exists */ + pgsizes = domain->pgsize_bitmap & ~GENMASK(pgsize_idx, 0); + if (!pgsizes) + goto out_set_count; - /* Max page size that still fits into 'size' */ - pgsize_idx = __fls(size); + pgsize_idx_next = __ffs(pgsizes); + pgsize_next = BIT(pgsize_idx_next); - /* need to consider alignment requirements ? */ - if (likely(addr_merge)) { - /* Max page size allowed by address */ - unsigned int align_pgsize_idx = __ffs(addr_merge); - pgsize_idx = min(pgsize_idx, align_pgsize_idx); - } + /* + * There's no point trying a bigger page size unless the virtual + * and physical addresses are similarly offset within the larger page. + */ + if ((iova ^ paddr) & (pgsize_next - 1)) + goto out_set_count; - /* build a mask of acceptable page sizes */ - pgsize = (1UL << (pgsize_idx + 1)) - 1; + /* Calculate the offset to the next page size alignment boundary */ + offset = pgsize_next - (addr_merge & (pgsize_next - 1)); - /* throw away page sizes not supported by the hardware */ - pgsize &= pgsize_bitmap; + /* + * If size is big enough to accommodate the larger page, reduce + * the number of smaller pages. + */ + if (offset + pgsize_next <= size) + size = offset; - /* make sure we're still sane */ - if (!pgsize) { - pr_err("invalid pgsize/addr/size! 0x%lx 0x%lx 0x%zx\n", - pgsize_bitmap, addr_merge, size); - BUG(); - } - - /* pick the biggest page */ - pgsize_idx = __fls(pgsize); - pgsize = 1UL << pgsize_idx; - +out_set_count: + *count = size >> pgsize_idx; return pgsize; } -int iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot) +static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, + gfp_t gfp, size_t *mapped) { + const struct iommu_ops *ops = domain->ops; + size_t pgsize, count; + int ret; + + pgsize = iommu_pgsize(domain, iova, paddr, size, &count); + + pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n", + iova, &paddr, pgsize, count); + + if (ops->map_pages) { + ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot, + gfp, mapped); + } else { + ret = ops->map(domain, iova, paddr, pgsize, prot, gfp); + *mapped = ret ? 0 : pgsize; + } + + return ret; +} + +static int __iommu_map(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, gfp_t gfp) +{ + const struct iommu_ops *ops = domain->ops; unsigned long orig_iova = iova; unsigned int min_pagesz; size_t orig_size = size; phys_addr_t orig_paddr = paddr; int ret = 0; - if (unlikely(domain->ops->map == NULL || + if (unlikely(!(ops->map || ops->map_pages) || domain->pgsize_bitmap == 0UL)) return -ENODEV; @@ -1619,41 +2457,83 @@ pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size); while (size) { - size_t pgsize = iommu_pgsize(domain->pgsize_bitmap, - iova | paddr, size); + size_t mapped = 0; - pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n", - iova, &paddr, pgsize); + ret = __iommu_map_pages(domain, iova, paddr, size, prot, gfp, + &mapped); + /* + * Some pages may have been mapped, even if an error occurred, + * so we should account for those so they can be unmapped. + */ + size -= mapped; - ret = domain->ops->map(domain, iova, paddr, pgsize, prot); if (ret) break; - iova += pgsize; - paddr += pgsize; - size -= pgsize; + iova += mapped; + paddr += mapped; } /* unroll mapping in case something went wrong */ if (ret) iommu_unmap(domain, orig_iova, orig_size - size); else - trace_map(domain, orig_iova, orig_paddr, orig_size, prot); + trace_map(orig_iova, orig_paddr, orig_size); return ret; } + +static int _iommu_map(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, gfp_t gfp) +{ + const struct iommu_ops *ops = domain->ops; + int ret; + + ret = __iommu_map(domain, iova, paddr, size, prot, gfp); + if (ret == 0 && ops->iotlb_sync_map) + ops->iotlb_sync_map(domain, iova, size); + + return ret; +} + +int iommu_map(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot) +{ + might_sleep(); + return _iommu_map(domain, iova, paddr, size, prot, GFP_KERNEL); +} EXPORT_SYMBOL_GPL(iommu_map); + +int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot) +{ + return _iommu_map(domain, iova, paddr, size, prot, GFP_ATOMIC); +} +EXPORT_SYMBOL_GPL(iommu_map_atomic); + +static size_t __iommu_unmap_pages(struct iommu_domain *domain, + unsigned long iova, size_t size, + struct iommu_iotlb_gather *iotlb_gather) +{ + const struct iommu_ops *ops = domain->ops; + size_t pgsize, count; + + pgsize = iommu_pgsize(domain, iova, iova, size, &count); + return ops->unmap_pages ? + ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather) : + ops->unmap(domain, iova, pgsize, iotlb_gather); +} static size_t __iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size, - bool sync) + struct iommu_iotlb_gather *iotlb_gather) { const struct iommu_ops *ops = domain->ops; size_t unmapped_page, unmapped = 0; unsigned long orig_iova = iova; unsigned int min_pagesz; - if (unlikely(ops->unmap == NULL || + if (unlikely(!(ops->unmap || ops->unmap_pages) || domain->pgsize_bitmap == 0UL)) return 0; @@ -1681,14 +2561,11 @@ * or we hit an area that isn't mapped. */ while (unmapped < size) { - size_t pgsize = iommu_pgsize(domain->pgsize_bitmap, iova, size - unmapped); - - unmapped_page = ops->unmap(domain, iova, pgsize); + unmapped_page = __iommu_unmap_pages(domain, iova, + size - unmapped, + iotlb_gather); if (!unmapped_page) break; - - if (sync && ops->iotlb_range_add) - ops->iotlb_range_add(domain, iova, pgsize); pr_debug("unmapped: iova 0x%lx size 0x%zx\n", iova, unmapped_page); @@ -1697,74 +2574,81 @@ unmapped += unmapped_page; } - if (sync && ops->iotlb_sync) - ops->iotlb_sync(domain); - - trace_unmap(domain, orig_iova, size, unmapped); + trace_unmap(orig_iova, size, unmapped); return unmapped; } size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) { - return __iommu_unmap(domain, iova, size, true); + struct iommu_iotlb_gather iotlb_gather; + size_t ret; + + iommu_iotlb_gather_init(&iotlb_gather); + ret = __iommu_unmap(domain, iova, size, &iotlb_gather); + iommu_iotlb_sync(domain, &iotlb_gather); + + return ret; } EXPORT_SYMBOL_GPL(iommu_unmap); size_t iommu_unmap_fast(struct iommu_domain *domain, - unsigned long iova, size_t size) + unsigned long iova, size_t size, + struct iommu_iotlb_gather *iotlb_gather) { - return __iommu_unmap(domain, iova, size, false); + return __iommu_unmap(domain, iova, size, iotlb_gather); } EXPORT_SYMBOL_GPL(iommu_unmap_fast); -size_t iommu_map_sg(struct iommu_domain *domain, - unsigned long iova, struct scatterlist *sg, - unsigned int nents, int prot) +static size_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova, + struct scatterlist *sg, unsigned int nents, int prot, + gfp_t gfp) { - size_t mapped; - - mapped = domain->ops->map_sg(domain, iova, sg, nents, prot); - trace_map_sg(domain, iova, mapped, prot); - return mapped; -} -EXPORT_SYMBOL(iommu_map_sg); - -size_t default_iommu_map_sg(struct iommu_domain *domain, unsigned long iova, - struct scatterlist *sg, unsigned int nents, int prot) -{ - struct scatterlist *s; - size_t mapped = 0; - unsigned int i, min_pagesz; + const struct iommu_ops *ops = domain->ops; + size_t len = 0, mapped = 0; + phys_addr_t start; + unsigned int i = 0; int ret; - if (unlikely(domain->pgsize_bitmap == 0UL)) - return 0; + if (ops->map_sg) { + ret = ops->map_sg(domain, iova, sg, nents, prot, gfp, &mapped); - min_pagesz = 1 << __ffs(domain->pgsize_bitmap); + if (ops->iotlb_sync_map) + ops->iotlb_sync_map(domain, iova, mapped); - for_each_sg(sg, s, nents, i) { - phys_addr_t phys = page_to_phys(sg_page(s)) + s->offset; - - /* - * We are mapping on IOMMU page boundaries, so offset within - * the page must be 0. However, the IOMMU may support pages - * smaller than PAGE_SIZE, so s->offset may still represent - * an offset of that boundary within the CPU page. - */ - if (!IS_ALIGNED(s->offset, min_pagesz)) - goto out_err; - - ret = iommu_map(domain, iova + mapped, phys, s->length, prot); if (ret) goto out_err; - mapped += s->length; + return mapped; } - if (domain->ops->flush_iotlb_all && (prot & IOMMU_TLB_SHOT_ENTIRE)) - domain->ops->flush_iotlb_all(domain); + while (i <= nents) { + phys_addr_t s_phys = sg_phys(sg); + if (len && s_phys != start + len) { + ret = __iommu_map(domain, iova + mapped, start, + len, prot, gfp); + + if (ret) + goto out_err; + + mapped += len; + len = 0; + } + + if (len) { + len += sg->length; + } else { + len = sg->length; + start = s_phys; + } + + if (++i < nents) + sg = sg_next(sg); + } + + if (ops->iotlb_sync_map) + ops->iotlb_sync_map(domain, iova, mapped); return mapped; out_err: @@ -1774,7 +2658,21 @@ return 0; } -EXPORT_SYMBOL_GPL(default_iommu_map_sg); + +size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, + struct scatterlist *sg, unsigned int nents, int prot) +{ + might_sleep(); + return __iommu_map_sg(domain, iova, sg, nents, prot, GFP_KERNEL); +} +EXPORT_SYMBOL_GPL(iommu_map_sg); + +size_t iommu_map_sg_atomic(struct iommu_domain *domain, unsigned long iova, + struct scatterlist *sg, unsigned int nents, int prot) +{ + return __iommu_map_sg(domain, iova, sg, nents, prot, GFP_ATOMIC); +} +EXPORT_SYMBOL_GPL(iommu_map_sg_atomic); int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr, phys_addr_t paddr, u64 size, int prot) @@ -1838,9 +2736,6 @@ } EXPORT_SYMBOL_GPL(report_iommu_fault); -struct dentry *iommu_debugfs_top; -EXPORT_SYMBOL_GPL(iommu_debugfs_top); - static int __init iommu_init(void) { iommu_group_kset = kset_create_and_add("iommu_groups", @@ -1859,7 +2754,6 @@ struct iommu_domain_geometry *geometry; bool *paging; int ret = 0; - u32 *count; switch (attr) { case DOMAIN_ATTR_GEOMETRY: @@ -1870,15 +2764,6 @@ case DOMAIN_ATTR_PAGING: paging = data; *paging = (domain->pgsize_bitmap != 0UL); - break; - case DOMAIN_ATTR_WINDOWS: - count = data; - - if (domain->ops->domain_get_windows != NULL) - *count = domain->ops->domain_get_windows(domain); - else - ret = -ENODEV; - break; default: if (!domain->ops->domain_get_attr) @@ -1895,18 +2780,8 @@ enum iommu_attr attr, void *data) { int ret = 0; - u32 *count; switch (attr) { - case DOMAIN_ATTR_WINDOWS: - count = data; - - if (domain->ops->domain_set_windows != NULL) - ret = domain->ops->domain_set_windows(domain, *count); - else - ret = -ENODEV; - - break; default: if (domain->ops->domain_set_attr == NULL) return -EINVAL; @@ -1935,19 +2810,23 @@ } /** - * iommu_trigger_fault() - trigger an IOMMU fault - * @domain: iommu domain + * generic_iommu_put_resv_regions - Reserved region driver helper + * @dev: device for which to free reserved regions + * @list: reserved region list for device * - * Triggers a fault on the device to which this domain is attached. - * - * This function should only be used for debugging purposes, for obvious - * reasons. + * IOMMU drivers can use this to implement their .put_resv_regions() callback + * for simple reservations. Memory allocated for each reserved region will be + * freed. If an IOMMU driver allocates additional resources per region, it is + * going to have to implement a custom callback. */ -void iommu_trigger_fault(struct iommu_domain *domain, unsigned long flags) +void generic_iommu_put_resv_regions(struct device *dev, struct list_head *list) { - if (domain->ops->trigger_fault) - domain->ops->trigger_fault(domain, flags); + struct iommu_resv_region *entry, *next; + + list_for_each_entry_safe(entry, next, list, list) + kfree(entry); } +EXPORT_SYMBOL(generic_iommu_put_resv_regions); struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start, size_t length, int prot, @@ -1968,58 +2847,27 @@ } EXPORT_SYMBOL_GPL(iommu_alloc_resv_region); -/* Request that a device is direct mapped by the IOMMU */ -int iommu_request_dm_for_dev(struct device *dev) +void iommu_set_default_passthrough(bool cmd_line) { - struct iommu_domain *dm_domain; - struct iommu_group *group; - int ret; + if (cmd_line) + iommu_set_cmd_line_dma_api(); - /* Device must already be in a group before calling this function */ - group = iommu_group_get(dev); - if (!group) - return -EINVAL; - - mutex_lock(&group->mutex); - - /* Check if the default domain is already direct mapped */ - ret = 0; - if (group->default_domain && - group->default_domain->type == IOMMU_DOMAIN_IDENTITY) - goto out; - - /* Don't change mappings of existing devices */ - ret = -EBUSY; - if (iommu_group_device_count(group) != 1) - goto out; - - /* Allocate a direct mapped domain */ - ret = -ENOMEM; - dm_domain = __iommu_domain_alloc(dev->bus, IOMMU_DOMAIN_IDENTITY); - if (!dm_domain) - goto out; - - /* Attach the device to the domain */ - ret = __iommu_attach_group(dm_domain, group); - if (ret) { - iommu_domain_free(dm_domain); - goto out; - } - - /* Make the direct mapped domain the default for this group */ - if (group->default_domain) - iommu_domain_free(group->default_domain); - group->default_domain = dm_domain; - - pr_info("Using direct mapping for device %s\n", dev_name(dev)); - - ret = 0; -out: - mutex_unlock(&group->mutex); - iommu_group_put(group); - - return ret; + iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY; } + +void iommu_set_default_translated(bool cmd_line) +{ + if (cmd_line) + iommu_set_cmd_line_dma_api(); + + iommu_def_domain_type = IOMMU_DOMAIN_DMA; +} + +bool iommu_default_passthrough(void) +{ + return iommu_def_domain_type == IOMMU_DOMAIN_IDENTITY; +} +EXPORT_SYMBOL_GPL(iommu_default_passthrough); const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode) { @@ -2039,57 +2887,256 @@ int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, const struct iommu_ops *ops) { - struct iommu_fwspec *fwspec = dev->iommu_fwspec; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); if (fwspec) return ops == fwspec->ops ? 0 : -EINVAL; - fwspec = kzalloc(sizeof(*fwspec), GFP_KERNEL); + if (!dev_iommu_get(dev)) + return -ENOMEM; + + /* Preallocate for the overwhelmingly common case of 1 ID */ + fwspec = kzalloc(struct_size(fwspec, ids, 1), GFP_KERNEL); if (!fwspec) return -ENOMEM; of_node_get(to_of_node(iommu_fwnode)); fwspec->iommu_fwnode = iommu_fwnode; fwspec->ops = ops; - dev->iommu_fwspec = fwspec; + dev_iommu_fwspec_set(dev, fwspec); return 0; } EXPORT_SYMBOL_GPL(iommu_fwspec_init); void iommu_fwspec_free(struct device *dev) { - struct iommu_fwspec *fwspec = dev->iommu_fwspec; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); if (fwspec) { fwnode_handle_put(fwspec->iommu_fwnode); kfree(fwspec); - dev->iommu_fwspec = NULL; + dev_iommu_fwspec_set(dev, NULL); } } EXPORT_SYMBOL_GPL(iommu_fwspec_free); int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids) { - struct iommu_fwspec *fwspec = dev->iommu_fwspec; - size_t size; - int i; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + int i, new_num; if (!fwspec) return -EINVAL; - size = offsetof(struct iommu_fwspec, ids[fwspec->num_ids + num_ids]); - if (size > sizeof(*fwspec)) { - fwspec = krealloc(dev->iommu_fwspec, size, GFP_KERNEL); + new_num = fwspec->num_ids + num_ids; + if (new_num > 1) { + fwspec = krealloc(fwspec, struct_size(fwspec, ids, new_num), + GFP_KERNEL); if (!fwspec) return -ENOMEM; - dev->iommu_fwspec = fwspec; + dev_iommu_fwspec_set(dev, fwspec); } for (i = 0; i < num_ids; i++) fwspec->ids[fwspec->num_ids + i] = ids[i]; - fwspec->num_ids += num_ids; + fwspec->num_ids = new_num; return 0; } EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids); + +/* + * Per device IOMMU features. + */ +bool iommu_dev_has_feature(struct device *dev, enum iommu_dev_features feat) +{ + const struct iommu_ops *ops = dev->bus->iommu_ops; + + if (ops && ops->dev_has_feat) + return ops->dev_has_feat(dev, feat); + + return false; +} +EXPORT_SYMBOL_GPL(iommu_dev_has_feature); + +int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) +{ + if (dev->iommu && dev->iommu->iommu_dev) { + const struct iommu_ops *ops = dev->iommu->iommu_dev->ops; + + if (ops->dev_enable_feat) + return ops->dev_enable_feat(dev, feat); + } + + return -ENODEV; +} +EXPORT_SYMBOL_GPL(iommu_dev_enable_feature); + +/* + * The device drivers should do the necessary cleanups before calling this. + * For example, before disabling the aux-domain feature, the device driver + * should detach all aux-domains. Otherwise, this will return -EBUSY. + */ +int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) +{ + if (dev->iommu && dev->iommu->iommu_dev) { + const struct iommu_ops *ops = dev->iommu->iommu_dev->ops; + + if (ops->dev_disable_feat) + return ops->dev_disable_feat(dev, feat); + } + + return -EBUSY; +} +EXPORT_SYMBOL_GPL(iommu_dev_disable_feature); + +bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features feat) +{ + if (dev->iommu && dev->iommu->iommu_dev) { + const struct iommu_ops *ops = dev->iommu->iommu_dev->ops; + + if (ops->dev_feat_enabled) + return ops->dev_feat_enabled(dev, feat); + } + + return false; +} +EXPORT_SYMBOL_GPL(iommu_dev_feature_enabled); + +/* + * Aux-domain specific attach/detach. + * + * Only works if iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX) returns + * true. Also, as long as domains are attached to a device through this + * interface, any tries to call iommu_attach_device() should fail + * (iommu_detach_device() can't fail, so we fail when trying to re-attach). + * This should make us safe against a device being attached to a guest as a + * whole while there are still pasid users on it (aux and sva). + */ +int iommu_aux_attach_device(struct iommu_domain *domain, struct device *dev) +{ + int ret = -ENODEV; + + if (domain->ops->aux_attach_dev) + ret = domain->ops->aux_attach_dev(domain, dev); + + if (!ret) + trace_attach_device_to_domain(dev); + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_aux_attach_device); + +void iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev) +{ + if (domain->ops->aux_detach_dev) { + domain->ops->aux_detach_dev(domain, dev); + trace_detach_device_from_domain(dev); + } +} +EXPORT_SYMBOL_GPL(iommu_aux_detach_device); + +int iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev) +{ + int ret = -ENODEV; + + if (domain->ops->aux_get_pasid) + ret = domain->ops->aux_get_pasid(domain, dev); + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_aux_get_pasid); + +/** + * iommu_sva_bind_device() - Bind a process address space to a device + * @dev: the device + * @mm: the mm to bind, caller must hold a reference to it + * + * Create a bond between device and address space, allowing the device to access + * the mm using the returned PASID. If a bond already exists between @device and + * @mm, it is returned and an additional reference is taken. Caller must call + * iommu_sva_unbind_device() to release each reference. + * + * iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA) must be called first, to + * initialize the required SVA features. + * + * On error, returns an ERR_PTR value. + */ +struct iommu_sva * +iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata) +{ + struct iommu_group *group; + struct iommu_sva *handle = ERR_PTR(-EINVAL); + const struct iommu_ops *ops = dev->bus->iommu_ops; + + if (!ops || !ops->sva_bind) + return ERR_PTR(-ENODEV); + + group = iommu_group_get(dev); + if (!group) + return ERR_PTR(-ENODEV); + + /* Ensure device count and domain don't change while we're binding */ + mutex_lock(&group->mutex); + + /* + * To keep things simple, SVA currently doesn't support IOMMU groups + * with more than one device. Existing SVA-capable systems are not + * affected by the problems that required IOMMU groups (lack of ACS + * isolation, device ID aliasing and other hardware issues). + */ + if (iommu_group_device_count(group) != 1) + goto out_unlock; + + handle = ops->sva_bind(dev, mm, drvdata); + +out_unlock: + mutex_unlock(&group->mutex); + iommu_group_put(group); + + return handle; +} +EXPORT_SYMBOL_GPL(iommu_sva_bind_device); + +/** + * iommu_sva_unbind_device() - Remove a bond created with iommu_sva_bind_device + * @handle: the handle returned by iommu_sva_bind_device() + * + * Put reference to a bond between device and address space. The device should + * not be issuing any more transaction for this PASID. All outstanding page + * requests for this PASID must have been flushed to the IOMMU. + * + * Returns 0 on success, or an error value + */ +void iommu_sva_unbind_device(struct iommu_sva *handle) +{ + struct iommu_group *group; + struct device *dev = handle->dev; + const struct iommu_ops *ops = dev->bus->iommu_ops; + + if (!ops || !ops->sva_unbind) + return; + + group = iommu_group_get(dev); + if (!group) + return; + + mutex_lock(&group->mutex); + ops->sva_unbind(handle); + mutex_unlock(&group->mutex); + + iommu_group_put(group); +} +EXPORT_SYMBOL_GPL(iommu_sva_unbind_device); + +u32 iommu_sva_get_pasid(struct iommu_sva *handle) +{ + const struct iommu_ops *ops = handle->dev->bus->iommu_ops; + + if (!ops || !ops->sva_get_pasid) + return IOMMU_PASID_INVALID; + + return ops->sva_get_pasid(handle); +} +EXPORT_SYMBOL_GPL(iommu_sva_get_pasid); -- Gitblit v1.6.2