From 072de836f53be56a70cecf70b43ae43b7ce17376 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Mon, 11 Dec 2023 10:08:36 +0000
Subject: [PATCH] mk-rootfs.sh
---
kernel/drivers/vfio/vfio_iommu_spapr_tce.c | 177 +++++++++++++++++++++++++++++-----------------------------
1 files changed, 89 insertions(+), 88 deletions(-)
diff --git a/kernel/drivers/vfio/vfio_iommu_spapr_tce.c b/kernel/drivers/vfio/vfio_iommu_spapr_tce.c
index ec53310..fe888b5 100644
--- a/kernel/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/kernel/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* VFIO: IOMMU DMA mapping support for TCE on POWER
*
* Copyright (C) 2013 IBM Corp. All rights reserved.
* Author: Alexey Kardashevskiy <aik@ozlabs.ru>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*
* Derived from original vfio_iommu_type1.c:
* Copyright (C) 2012 Red Hat, Inc. All rights reserved.
@@ -22,6 +19,7 @@
#include <linux/vmalloc.h>
#include <linux/sched/mm.h>
#include <linux/sched/signal.h>
+#include <linux/mm.h>
#include <asm/iommu.h>
#include <asm/tce.h>
@@ -33,51 +31,6 @@
static void tce_iommu_detach_group(void *iommu_data,
struct iommu_group *iommu_group);
-
-static long try_increment_locked_vm(struct mm_struct *mm, long npages)
-{
- long ret = 0, locked, lock_limit;
-
- if (WARN_ON_ONCE(!mm))
- return -EPERM;
-
- if (!npages)
- return 0;
-
- down_write(&mm->mmap_sem);
- locked = mm->locked_vm + npages;
- lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
- if (locked > lock_limit && !capable(CAP_IPC_LOCK))
- ret = -ENOMEM;
- else
- mm->locked_vm += npages;
-
- pr_debug("[%d] RLIMIT_MEMLOCK +%ld %ld/%ld%s\n", current->pid,
- npages << PAGE_SHIFT,
- mm->locked_vm << PAGE_SHIFT,
- rlimit(RLIMIT_MEMLOCK),
- ret ? " - exceeded" : "");
-
- up_write(&mm->mmap_sem);
-
- return ret;
-}
-
-static void decrement_locked_vm(struct mm_struct *mm, long npages)
-{
- if (!mm || !npages)
- return;
-
- down_write(&mm->mmap_sem);
- if (WARN_ON_ONCE(npages > mm->locked_vm))
- npages = mm->locked_vm;
- mm->locked_vm -= npages;
- pr_debug("[%d] RLIMIT_MEMLOCK -%ld %ld/%ld\n", current->pid,
- npages << PAGE_SHIFT,
- mm->locked_vm << PAGE_SHIFT,
- rlimit(RLIMIT_MEMLOCK));
- up_write(&mm->mmap_sem);
-}
/*
* VFIO IOMMU fd for SPAPR_TCE IOMMU implementation
@@ -126,7 +79,7 @@
}
BUG_ON(!current->mm);
container->mm = current->mm;
- atomic_inc(&container->mm->mm_count);
+ mmgrab(container->mm);
return 0;
}
@@ -152,11 +105,12 @@
struct mm_iommu_table_group_mem_t *mem;
struct tce_iommu_prereg *tcemem;
bool found = false;
+ long ret;
if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK))
return -EINVAL;
- mem = mm_iommu_find(container->mm, vaddr, size >> PAGE_SHIFT);
+ mem = mm_iommu_get(container->mm, vaddr, size >> PAGE_SHIFT);
if (!mem)
return -ENOENT;
@@ -168,9 +122,13 @@
}
if (!found)
- return -ENOENT;
+ ret = -ENOENT;
+ else
+ ret = tce_iommu_prereg_free(container, tcemem);
- return tce_iommu_prereg_free(container, tcemem);
+ mm_iommu_put(container->mm, mem);
+
+ return ret;
}
static long tce_iommu_register_pages(struct tce_container *container,
@@ -185,22 +143,24 @@
((vaddr + size) < vaddr))
return -EINVAL;
- mem = mm_iommu_find(container->mm, vaddr, entries);
+ mem = mm_iommu_get(container->mm, vaddr, entries);
if (mem) {
list_for_each_entry(tcemem, &container->prereg_list, next) {
- if (tcemem->mem == mem)
- return -EBUSY;
+ if (tcemem->mem == mem) {
+ ret = -EBUSY;
+ goto put_exit;
+ }
}
+ } else {
+ ret = mm_iommu_new(container->mm, vaddr, entries, &mem);
+ if (ret)
+ return ret;
}
-
- ret = mm_iommu_get(container->mm, vaddr, entries, &mem);
- if (ret)
- return ret;
tcemem = kzalloc(sizeof(*tcemem), GFP_KERNEL);
if (!tcemem) {
- mm_iommu_put(container->mm, mem);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto put_exit;
}
tcemem->mem = mem;
@@ -209,16 +169,28 @@
container->enabled = true;
return 0;
+
+put_exit:
+ mm_iommu_put(container->mm, mem);
+ return ret;
}
-static bool tce_page_is_contained(struct page *page, unsigned page_shift)
+static bool tce_page_is_contained(struct mm_struct *mm, unsigned long hpa,
+ unsigned int it_page_shift)
{
+ struct page *page;
+ unsigned long size = 0;
+
+ if (mm_iommu_is_devmem(mm, hpa, it_page_shift, &size))
+ return size == (1UL << it_page_shift);
+
+ page = pfn_to_page(hpa >> PAGE_SHIFT);
/*
* Check that the TCE table granularity is not bigger than the size of
* a page we just found. Otherwise the hardware can get access to
* a bigger memory chunk that it should.
*/
- return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift;
+ return page_shift(compound_head(page)) >= it_page_shift;
}
static inline bool tce_groups_attached(struct tce_container *container)
@@ -317,7 +289,7 @@
return ret;
locked = table_group->tce32_size >> PAGE_SHIFT;
- ret = try_increment_locked_vm(container->mm, locked);
+ ret = account_locked_vm(container->mm, locked, true);
if (ret)
return ret;
@@ -336,7 +308,7 @@
container->enabled = false;
BUG_ON(!container->mm);
- decrement_locked_vm(container->mm, container->locked_pages);
+ account_locked_vm(container->mm, container->locked_pages, false);
}
static void *tce_iommu_open(unsigned long arg)
@@ -411,7 +383,7 @@
struct page *page;
page = pfn_to_page(hpa >> PAGE_SHIFT);
- put_page(page);
+ unpin_user_page(page);
}
static int tce_iommu_prereg_ua_to_hpa(struct tce_container *container,
@@ -440,7 +412,7 @@
struct mm_iommu_table_group_mem_t *mem = NULL;
int ret;
unsigned long hpa = 0;
- __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+ __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry);
if (!pua)
return;
@@ -463,13 +435,33 @@
unsigned long oldhpa;
long ret;
enum dma_data_direction direction;
+ unsigned long lastentry = entry + pages, firstentry = entry;
- for ( ; pages; --pages, ++entry) {
+ for ( ; entry < lastentry; ++entry) {
+ if (tbl->it_indirect_levels && tbl->it_userspace) {
+ /*
+ * For multilevel tables, we can take a shortcut here
+ * and skip some TCEs as we know that the userspace
+ * addresses cache is a mirror of the real TCE table
+ * and if it is missing some indirect levels, then
+ * the hardware table does not have them allocated
+ * either and therefore does not require updating.
+ */
+ __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl,
+ entry);
+ if (!pua) {
+ /* align to level_size which is power of two */
+ entry |= tbl->it_level_size - 1;
+ continue;
+ }
+ }
+
cond_resched();
direction = DMA_NONE;
oldhpa = 0;
- ret = iommu_tce_xchg(tbl, entry, &oldhpa, &direction);
+ ret = iommu_tce_xchg_no_kill(container->mm, tbl, entry, &oldhpa,
+ &direction);
if (ret)
continue;
@@ -484,6 +476,8 @@
tce_iommu_unuse_page(container, oldhpa);
}
+ iommu_tce_kill(tbl, firstentry, pages);
+
return 0;
}
@@ -492,8 +486,9 @@
struct page *page = NULL;
enum dma_data_direction direction = iommu_tce_direction(tce);
- if (get_user_pages_fast(tce & PAGE_MASK, 1,
- direction != DMA_TO_DEVICE, &page) != 1)
+ if (pin_user_pages_fast(tce & PAGE_MASK, 1,
+ direction != DMA_TO_DEVICE ? FOLL_WRITE : 0,
+ &page) != 1)
return -EFAULT;
*hpa = __pa((unsigned long) page_address(page));
@@ -507,7 +502,6 @@
enum dma_data_direction direction)
{
long i, ret = 0;
- struct page *page;
unsigned long hpa;
enum dma_data_direction dirtmp;
@@ -518,15 +512,16 @@
if (ret)
break;
- page = pfn_to_page(hpa >> PAGE_SHIFT);
- if (!tce_page_is_contained(page, tbl->it_page_shift)) {
+ if (!tce_page_is_contained(container->mm, hpa,
+ tbl->it_page_shift)) {
ret = -EPERM;
break;
}
hpa |= offset;
dirtmp = direction;
- ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp);
+ ret = iommu_tce_xchg_no_kill(container->mm, tbl, entry + i,
+ &hpa, &dirtmp);
if (ret) {
tce_iommu_unuse_page(container, hpa);
pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
@@ -543,6 +538,8 @@
if (ret)
tce_iommu_clear(container, tbl, entry, i);
+ else
+ iommu_tce_kill(tbl, entry, pages);
return ret;
}
@@ -553,7 +550,6 @@
enum dma_data_direction direction)
{
long i, ret = 0;
- struct page *page;
unsigned long hpa;
enum dma_data_direction dirtmp;
@@ -566,8 +562,8 @@
if (ret)
break;
- page = pfn_to_page(hpa >> PAGE_SHIFT);
- if (!tce_page_is_contained(page, tbl->it_page_shift)) {
+ if (!tce_page_is_contained(container->mm, hpa,
+ tbl->it_page_shift)) {
ret = -EPERM;
break;
}
@@ -580,7 +576,8 @@
if (mm_iommu_mapped_inc(mem))
break;
- ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp);
+ ret = iommu_tce_xchg_no_kill(container->mm, tbl, entry + i,
+ &hpa, &dirtmp);
if (ret) {
/* dirtmp cannot be DMA_NONE here */
tce_iommu_unuse_page_v2(container, tbl, entry + i);
@@ -600,6 +597,8 @@
if (ret)
tce_iommu_clear(container, tbl, entry, i);
+ else
+ iommu_tce_kill(tbl, entry, pages);
return ret;
}
@@ -619,7 +618,7 @@
if (!table_size)
return -EINVAL;
- ret = try_increment_locked_vm(container->mm, table_size >> PAGE_SHIFT);
+ ret = account_locked_vm(container->mm, table_size >> PAGE_SHIFT, true);
if (ret)
return ret;
@@ -638,7 +637,7 @@
unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT;
iommu_tce_table_put(tbl);
- decrement_locked_vm(container->mm, pages);
+ account_locked_vm(container->mm, pages, false);
}
static long tce_iommu_create_window(struct tce_container *container,
@@ -1196,7 +1195,8 @@
}
for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
- table_group->ops->unset_window(table_group, i);
+ if (container->tables[i])
+ table_group->ops->unset_window(table_group, i);
table_group->ops->release_ownership(table_group);
}
@@ -1240,7 +1240,7 @@
static int tce_iommu_attach_group(void *iommu_data,
struct iommu_group *iommu_group)
{
- int ret;
+ int ret = 0;
struct tce_container *container = iommu_data;
struct iommu_table_group *table_group;
struct tce_iommu_group *tcegrp = NULL;
@@ -1293,13 +1293,13 @@
!table_group->ops->release_ownership) {
if (container->v2) {
ret = -EPERM;
- goto unlock_exit;
+ goto free_exit;
}
ret = tce_iommu_take_ownership(container, table_group);
} else {
if (!container->v2) {
ret = -EPERM;
- goto unlock_exit;
+ goto free_exit;
}
ret = tce_iommu_take_ownership_ddw(container, table_group);
if (!tce_groups_attached(container) && !container->tables[0])
@@ -1311,10 +1311,11 @@
list_add(&tcegrp->next, &container->group_list);
}
-unlock_exit:
+free_exit:
if (ret && tcegrp)
kfree(tcegrp);
+unlock_exit:
mutex_unlock(&container->lock);
return ret;
@@ -1358,7 +1359,7 @@
mutex_unlock(&container->lock);
}
-const struct vfio_iommu_driver_ops tce_iommu_driver_ops = {
+static const struct vfio_iommu_driver_ops tce_iommu_driver_ops = {
.name = "iommu-vfio-powerpc",
.owner = THIS_MODULE,
.open = tce_iommu_open,
--
Gitblit v1.6.2