// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
|
/*
|
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
|
* Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
|
* Copyright 2019 Marvell. All rights reserved.
|
*/
|
#include <linux/xarray.h>
|
#include "uverbs.h"
|
#include "core_priv.h"
|
|
/**
|
* rdma_umap_priv_init() - Initialize the private data of a vma
|
*
|
* @priv: The already allocated private data
|
* @vma: The vm area struct that needs private data
|
* @entry: entry into the mmap_xa that needs to be linked with
|
* this vma
|
*
|
* Each time we map IO memory into user space this keeps track of the
|
* mapping. When the device is hot-unplugged we 'zap' the mmaps in user space
|
* to point to the zero page and allow the hot unplug to proceed.
|
*
|
* This is necessary for cases like PCI physical hot unplug as the actual BAR
|
* memory may vanish after this and access to it from userspace could MCE.
|
*
|
* RDMA drivers supporting disassociation must have their user space designed
|
* to cope in some way with their IO pages going to the zero page.
|
*
|
*/
|
void rdma_umap_priv_init(struct rdma_umap_priv *priv,
|
struct vm_area_struct *vma,
|
struct rdma_user_mmap_entry *entry)
|
{
|
struct ib_uverbs_file *ufile = vma->vm_file->private_data;
|
|
priv->vma = vma;
|
if (entry) {
|
kref_get(&entry->ref);
|
priv->entry = entry;
|
}
|
vma->vm_private_data = priv;
|
/* vm_ops is setup in ib_uverbs_mmap() to avoid module dependencies */
|
|
mutex_lock(&ufile->umap_lock);
|
list_add(&priv->list, &ufile->umaps);
|
mutex_unlock(&ufile->umap_lock);
|
}
|
EXPORT_SYMBOL(rdma_umap_priv_init);
|
|
/**
|
* rdma_user_mmap_io() - Map IO memory into a process
|
*
|
* @ucontext: associated user context
|
* @vma: the vma related to the current mmap call
|
* @pfn: pfn to map
|
* @size: size to map
|
* @prot: pgprot to use in remap call
|
* @entry: mmap_entry retrieved from rdma_user_mmap_entry_get(), or NULL
|
* if mmap_entry is not used by the driver
|
*
|
* This is to be called by drivers as part of their mmap() functions if they
|
* wish to send something like PCI-E BAR memory to userspace.
|
*
|
* Return -EINVAL on wrong flags or size, -EAGAIN on failure to map. 0 on
|
* success.
|
*/
|
int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
|
unsigned long pfn, unsigned long size, pgprot_t prot,
|
struct rdma_user_mmap_entry *entry)
|
{
|
struct ib_uverbs_file *ufile = ucontext->ufile;
|
struct rdma_umap_priv *priv;
|
|
if (!(vma->vm_flags & VM_SHARED))
|
return -EINVAL;
|
|
if (vma->vm_end - vma->vm_start != size)
|
return -EINVAL;
|
|
/* Driver is using this wrong, must be called by ib_uverbs_mmap */
|
if (WARN_ON(!vma->vm_file ||
|
vma->vm_file->private_data != ufile))
|
return -EINVAL;
|
lockdep_assert_held(&ufile->device->disassociate_srcu);
|
|
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
|
if (!priv)
|
return -ENOMEM;
|
|
vma->vm_page_prot = prot;
|
if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) {
|
kfree(priv);
|
return -EAGAIN;
|
}
|
|
rdma_umap_priv_init(priv, vma, entry);
|
return 0;
|
}
|
EXPORT_SYMBOL(rdma_user_mmap_io);
|
|
/**
|
* rdma_user_mmap_entry_get_pgoff() - Get an entry from the mmap_xa
|
*
|
* @ucontext: associated user context
|
* @pgoff: The mmap offset >> PAGE_SHIFT
|
*
|
* This function is called when a user tries to mmap with an offset (returned
|
* by rdma_user_mmap_get_offset()) it initially received from the driver. The
|
* rdma_user_mmap_entry was created by the function
|
* rdma_user_mmap_entry_insert(). This function increases the refcnt of the
|
* entry so that it won't be deleted from the xarray in the meantime.
|
*
|
* Return an reference to an entry if exists or NULL if there is no
|
* match. rdma_user_mmap_entry_put() must be called to put the reference.
|
*/
|
struct rdma_user_mmap_entry *
|
rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext,
|
unsigned long pgoff)
|
{
|
struct rdma_user_mmap_entry *entry;
|
|
if (pgoff > U32_MAX)
|
return NULL;
|
|
xa_lock(&ucontext->mmap_xa);
|
|
entry = xa_load(&ucontext->mmap_xa, pgoff);
|
|
/*
|
* If refcount is zero, entry is already being deleted, driver_removed
|
* indicates that the no further mmaps are possible and we waiting for
|
* the active VMAs to be closed.
|
*/
|
if (!entry || entry->start_pgoff != pgoff || entry->driver_removed ||
|
!kref_get_unless_zero(&entry->ref))
|
goto err;
|
|
xa_unlock(&ucontext->mmap_xa);
|
|
ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] returned\n",
|
pgoff, entry->npages);
|
|
return entry;
|
|
err:
|
xa_unlock(&ucontext->mmap_xa);
|
return NULL;
|
}
|
EXPORT_SYMBOL(rdma_user_mmap_entry_get_pgoff);
|
|
/**
|
* rdma_user_mmap_entry_get() - Get an entry from the mmap_xa
|
*
|
* @ucontext: associated user context
|
* @vma: the vma being mmap'd into
|
*
|
* This function is like rdma_user_mmap_entry_get_pgoff() except that it also
|
* checks that the VMA is correct.
|
*/
|
struct rdma_user_mmap_entry *
|
rdma_user_mmap_entry_get(struct ib_ucontext *ucontext,
|
struct vm_area_struct *vma)
|
{
|
struct rdma_user_mmap_entry *entry;
|
|
if (!(vma->vm_flags & VM_SHARED))
|
return NULL;
|
entry = rdma_user_mmap_entry_get_pgoff(ucontext, vma->vm_pgoff);
|
if (!entry)
|
return NULL;
|
if (entry->npages * PAGE_SIZE != vma->vm_end - vma->vm_start) {
|
rdma_user_mmap_entry_put(entry);
|
return NULL;
|
}
|
return entry;
|
}
|
EXPORT_SYMBOL(rdma_user_mmap_entry_get);
|
|
static void rdma_user_mmap_entry_free(struct kref *kref)
|
{
|
struct rdma_user_mmap_entry *entry =
|
container_of(kref, struct rdma_user_mmap_entry, ref);
|
struct ib_ucontext *ucontext = entry->ucontext;
|
unsigned long i;
|
|
/*
|
* Erase all entries occupied by this single entry, this is deferred
|
* until all VMA are closed so that the mmap offsets remain unique.
|
*/
|
xa_lock(&ucontext->mmap_xa);
|
for (i = 0; i < entry->npages; i++)
|
__xa_erase(&ucontext->mmap_xa, entry->start_pgoff + i);
|
xa_unlock(&ucontext->mmap_xa);
|
|
ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] removed\n",
|
entry->start_pgoff, entry->npages);
|
|
if (ucontext->device->ops.mmap_free)
|
ucontext->device->ops.mmap_free(entry);
|
}
|
|
/**
|
* rdma_user_mmap_entry_put() - Drop reference to the mmap entry
|
*
|
* @entry: an entry in the mmap_xa
|
*
|
* This function is called when the mapping is closed if it was
|
* an io mapping or when the driver is done with the entry for
|
* some other reason.
|
* Should be called after rdma_user_mmap_entry_get was called
|
* and entry is no longer needed. This function will erase the
|
* entry and free it if its refcnt reaches zero.
|
*/
|
void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry)
|
{
|
kref_put(&entry->ref, rdma_user_mmap_entry_free);
|
}
|
EXPORT_SYMBOL(rdma_user_mmap_entry_put);
|
|
/**
|
* rdma_user_mmap_entry_remove() - Drop reference to entry and
|
* mark it as unmmapable
|
*
|
* @entry: the entry to insert into the mmap_xa
|
*
|
* Drivers can call this to prevent userspace from creating more mappings for
|
* entry, however existing mmaps continue to exist and ops->mmap_free() will
|
* not be called until all user mmaps are destroyed.
|
*/
|
void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry)
|
{
|
if (!entry)
|
return;
|
|
xa_lock(&entry->ucontext->mmap_xa);
|
entry->driver_removed = true;
|
xa_unlock(&entry->ucontext->mmap_xa);
|
kref_put(&entry->ref, rdma_user_mmap_entry_free);
|
}
|
EXPORT_SYMBOL(rdma_user_mmap_entry_remove);
|
|
/**
|
* rdma_user_mmap_entry_insert_range() - Insert an entry to the mmap_xa
|
* in a given range.
|
*
|
* @ucontext: associated user context.
|
* @entry: the entry to insert into the mmap_xa
|
* @length: length of the address that will be mmapped
|
* @min_pgoff: minimum pgoff to be returned
|
* @max_pgoff: maximum pgoff to be returned
|
*
|
* This function should be called by drivers that use the rdma_user_mmap
|
* interface for implementing their mmap syscall A database of mmap offsets is
|
* handled in the core and helper functions are provided to insert entries
|
* into the database and extract entries when the user calls mmap with the
|
* given offset. The function allocates a unique page offset in a given range
|
* that should be provided to user, the user will use the offset to retrieve
|
* information such as address to be mapped and how.
|
*
|
* Return: 0 on success and -ENOMEM on failure
|
*/
|
int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
|
struct rdma_user_mmap_entry *entry,
|
size_t length, u32 min_pgoff,
|
u32 max_pgoff)
|
{
|
struct ib_uverbs_file *ufile = ucontext->ufile;
|
XA_STATE(xas, &ucontext->mmap_xa, min_pgoff);
|
u32 xa_first, xa_last, npages;
|
int err;
|
u32 i;
|
|
if (!entry)
|
return -EINVAL;
|
|
kref_init(&entry->ref);
|
entry->ucontext = ucontext;
|
|
/*
|
* We want the whole allocation to be done without interruption from a
|
* different thread. The allocation requires finding a free range and
|
* storing. During the xa_insert the lock could be released, possibly
|
* allowing another thread to choose the same range.
|
*/
|
mutex_lock(&ufile->umap_lock);
|
|
xa_lock(&ucontext->mmap_xa);
|
|
/* We want to find an empty range */
|
npages = (u32)DIV_ROUND_UP(length, PAGE_SIZE);
|
entry->npages = npages;
|
while (true) {
|
/* First find an empty index */
|
xas_find_marked(&xas, max_pgoff, XA_FREE_MARK);
|
if (xas.xa_node == XAS_RESTART)
|
goto err_unlock;
|
|
xa_first = xas.xa_index;
|
|
/* Is there enough room to have the range? */
|
if (check_add_overflow(xa_first, npages, &xa_last))
|
goto err_unlock;
|
|
/*
|
* Now look for the next present entry. If an entry doesn't
|
* exist, we found an empty range and can proceed.
|
*/
|
xas_next_entry(&xas, xa_last - 1);
|
if (xas.xa_node == XAS_BOUNDS || xas.xa_index >= xa_last)
|
break;
|
}
|
|
for (i = xa_first; i < xa_last; i++) {
|
err = __xa_insert(&ucontext->mmap_xa, i, entry, GFP_KERNEL);
|
if (err)
|
goto err_undo;
|
}
|
|
/*
|
* Internally the kernel uses a page offset, in libc this is a byte
|
* offset. Drivers should not return pgoff to userspace.
|
*/
|
entry->start_pgoff = xa_first;
|
xa_unlock(&ucontext->mmap_xa);
|
mutex_unlock(&ufile->umap_lock);
|
|
ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#x] inserted\n",
|
entry->start_pgoff, npages);
|
|
return 0;
|
|
err_undo:
|
for (; i > xa_first; i--)
|
__xa_erase(&ucontext->mmap_xa, i - 1);
|
|
err_unlock:
|
xa_unlock(&ucontext->mmap_xa);
|
mutex_unlock(&ufile->umap_lock);
|
return -ENOMEM;
|
}
|
EXPORT_SYMBOL(rdma_user_mmap_entry_insert_range);
|
|
/**
|
* rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa.
|
*
|
* @ucontext: associated user context.
|
* @entry: the entry to insert into the mmap_xa
|
* @length: length of the address that will be mmapped
|
*
|
* This function should be called by drivers that use the rdma_user_mmap
|
* interface for handling user mmapped addresses. The database is handled in
|
* the core and helper functions are provided to insert entries into the
|
* database and extract entries when the user calls mmap with the given offset.
|
* The function allocates a unique page offset that should be provided to user,
|
* the user will use the offset to retrieve information such as address to
|
* be mapped and how.
|
*
|
* Return: 0 on success and -ENOMEM on failure
|
*/
|
int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
|
struct rdma_user_mmap_entry *entry,
|
size_t length)
|
{
|
return rdma_user_mmap_entry_insert_range(ucontext, entry, length, 0,
|
U32_MAX);
|
}
|
EXPORT_SYMBOL(rdma_user_mmap_entry_insert);
|