From 95099d4622f8cb224d94e314c7a8e0df60b13f87 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Sat, 09 Dec 2023 08:38:01 +0000
Subject: [PATCH] enable docker ppp
---
kernel/drivers/infiniband/hw/hfi1/user_exp_rcv.c | 167 +++++++++++++++++++++----------------------------------
1 files changed, 65 insertions(+), 102 deletions(-)
diff --git a/kernel/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/kernel/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index 4e417ed..b94fc7f 100644
--- a/kernel/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+++ b/kernel/drivers/infiniband/hw/hfi1/user_exp_rcv.c
@@ -1,4 +1,5 @@
/*
+ * Copyright(c) 2020 Cornelis Networks, Inc.
* Copyright(c) 2015-2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
@@ -59,11 +60,11 @@
struct tid_user_buf *tbuf,
u32 rcventry, struct tid_group *grp,
u16 pageidx, unsigned int npages);
-static int tid_rb_insert(void *arg, struct mmu_rb_node *node);
static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
struct tid_rb_node *tnode);
-static void tid_rb_remove(void *arg, struct mmu_rb_node *node);
-static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode);
+static bool tid_rb_invalidate(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq);
static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *,
struct tid_group *grp,
unsigned int start, u16 count,
@@ -73,10 +74,8 @@
struct tid_group **grp);
static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node);
-static struct mmu_rb_ops tid_rb_ops = {
- .insert = tid_rb_insert,
- .remove = tid_rb_remove,
- .invalidate = tid_rb_invalidate
+static const struct mmu_interval_notifier_ops tid_mn_ops = {
+ .invalidate = tid_rb_invalidate,
};
/*
@@ -87,7 +86,6 @@
int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
struct hfi1_ctxtdata *uctxt)
{
- struct hfi1_devdata *dd = uctxt->dd;
int ret = 0;
fd->entry_to_rb = kcalloc(uctxt->expected_count,
@@ -106,20 +104,7 @@
fd->entry_to_rb = NULL;
return -ENOMEM;
}
-
- /*
- * Register MMU notifier callbacks. If the registration
- * fails, continue without TID caching for this context.
- */
- ret = hfi1_mmu_rb_register(fd, fd->mm, &tid_rb_ops,
- dd->pport->hfi1_wq,
- &fd->handler);
- if (ret) {
- dd_dev_info(dd,
- "Failed MMU notifier registration %d\n",
- ret);
- ret = 0;
- }
+ fd->use_mn = true;
}
/*
@@ -136,7 +121,7 @@
* init.
*/
spin_lock(&fd->tid_lock);
- if (uctxt->subctxt_cnt && fd->handler) {
+ if (uctxt->subctxt_cnt && fd->use_mn) {
u16 remainder;
fd->tid_limit = uctxt->expected_count / uctxt->subctxt_cnt;
@@ -155,20 +140,12 @@
{
struct hfi1_ctxtdata *uctxt = fd->uctxt;
- /*
- * The notifier would have been removed when the process'es mm
- * was freed.
- */
- if (fd->handler) {
- hfi1_mmu_rb_unregister(fd->handler);
- } else {
- mutex_lock(&uctxt->exp_mutex);
- if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list))
- unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd);
- if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list))
- unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd);
- mutex_unlock(&uctxt->exp_mutex);
- }
+ mutex_lock(&uctxt->exp_mutex);
+ if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list))
+ unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd);
+ if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list))
+ unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd);
+ mutex_unlock(&uctxt->exp_mutex);
kfree(fd->invalid_tids);
fd->invalid_tids = NULL;
@@ -197,15 +174,18 @@
{
struct page **pages;
struct hfi1_devdata *dd = fd->uctxt->dd;
+ struct mm_struct *mm;
if (mapped) {
pci_unmap_single(dd->pcidev, node->dma_addr,
- node->mmu.len, PCI_DMA_FROMDEVICE);
+ node->npages * PAGE_SIZE, PCI_DMA_FROMDEVICE);
pages = &node->pages[idx];
+ mm = mm_from_tid_node(node);
} else {
pages = &tidbuf->pages[idx];
+ mm = current->mm;
}
- hfi1_release_user_pages(fd->mm, pages, npages, mapped);
+ hfi1_release_user_pages(mm, pages, npages, mapped);
fd->tid_n_pinned -= npages;
}
@@ -230,13 +210,6 @@
return -EINVAL;
}
- /* Verify that access is OK for the user buffer */
- if (!access_ok(VERIFY_WRITE, (void __user *)vaddr,
- npages * PAGE_SIZE)) {
- dd_dev_err(dd, "Fail vaddr %p, %u pages, !access_ok\n",
- (void *)vaddr, npages);
- return -EFAULT;
- }
/* Allocate the array of struct page pointers needed for pinning */
pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL);
if (!pages)
@@ -247,12 +220,12 @@
* pages, accept the amount pinned so far and program only that.
* User space knows how to deal with partially programmed buffers.
*/
- if (!hfi1_can_pin_pages(dd, fd->mm, fd->tid_n_pinned, npages)) {
+ if (!hfi1_can_pin_pages(dd, current->mm, fd->tid_n_pinned, npages)) {
kfree(pages);
return -ENOMEM;
}
- pinned = hfi1_acquire_user_pages(fd->mm, vaddr, npages, true, pages);
+ pinned = hfi1_acquire_user_pages(current->mm, vaddr, npages, true, pages);
if (pinned <= 0) {
kfree(pages);
return pinned;
@@ -776,8 +749,7 @@
return -EFAULT;
}
- node->mmu.addr = tbuf->vaddr + (pageidx * PAGE_SIZE);
- node->mmu.len = npages * PAGE_SIZE;
+ node->fdata = fd;
node->phys = page_to_phys(pages[0]);
node->npages = npages;
node->rcventry = rcventry;
@@ -786,23 +758,35 @@
node->freed = false;
memcpy(node->pages, pages, sizeof(struct page *) * npages);
- if (!fd->handler)
- ret = tid_rb_insert(fd, &node->mmu);
- else
- ret = hfi1_mmu_rb_insert(fd->handler, &node->mmu);
-
- if (ret) {
- hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d",
- node->rcventry, node->mmu.addr, node->phys, ret);
- pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE,
- PCI_DMA_FROMDEVICE);
- kfree(node);
- return -EFAULT;
+ if (fd->use_mn) {
+ ret = mmu_interval_notifier_insert(
+ &node->notifier, current->mm,
+ tbuf->vaddr + (pageidx * PAGE_SIZE), npages * PAGE_SIZE,
+ &tid_mn_ops);
+ if (ret)
+ goto out_unmap;
+ /*
+ * FIXME: This is in the wrong order, the notifier should be
+ * established before the pages are pinned by pin_rcv_pages.
+ */
+ mmu_interval_read_begin(&node->notifier);
}
+ fd->entry_to_rb[node->rcventry - uctxt->expected_base] = node;
+
hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, ilog2(npages) + 1);
trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, npages,
- node->mmu.addr, node->phys, phys);
+ node->notifier.interval_tree.start, node->phys,
+ phys);
return 0;
+
+out_unmap:
+ hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d",
+ node->rcventry, node->notifier.interval_tree.start,
+ node->phys, ret);
+ pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE,
+ PCI_DMA_FROMDEVICE);
+ kfree(node);
+ return -EFAULT;
}
static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
@@ -832,10 +816,9 @@
if (grp)
*grp = node->grp;
- if (!fd->handler)
- cacheless_tid_rb_remove(fd, node);
- else
- hfi1_mmu_rb_remove(fd->handler, &node->mmu);
+ if (fd->use_mn)
+ mmu_interval_notifier_remove(&node->notifier);
+ cacheless_tid_rb_remove(fd, node);
return 0;
}
@@ -846,7 +829,8 @@
struct hfi1_devdata *dd = uctxt->dd;
trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry,
- node->npages, node->mmu.addr, node->phys,
+ node->npages,
+ node->notifier.interval_tree.start, node->phys,
node->dma_addr);
/*
@@ -893,30 +877,29 @@
if (!node || node->rcventry != rcventry)
continue;
+ if (fd->use_mn)
+ mmu_interval_notifier_remove(
+ &node->notifier);
cacheless_tid_rb_remove(fd, node);
}
}
}
}
-/*
- * Always return 0 from this function. A non-zero return indicates that the
- * remove operation will be called and that memory should be unpinned.
- * However, the driver cannot unpin out from under PSM. Instead, retain the
- * memory (by returning 0) and inform PSM that the memory is going away. PSM
- * will call back later when it has removed the memory from its list.
- */
-static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode)
+static bool tid_rb_invalidate(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
{
- struct hfi1_filedata *fdata = arg;
- struct hfi1_ctxtdata *uctxt = fdata->uctxt;
struct tid_rb_node *node =
- container_of(mnode, struct tid_rb_node, mmu);
+ container_of(mni, struct tid_rb_node, notifier);
+ struct hfi1_filedata *fdata = node->fdata;
+ struct hfi1_ctxtdata *uctxt = fdata->uctxt;
if (node->freed)
- return 0;
+ return true;
- trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, node->mmu.addr,
+ trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt,
+ node->notifier.interval_tree.start,
node->rcventry, node->npages, node->dma_addr);
node->freed = true;
@@ -945,18 +928,7 @@
fdata->invalid_tid_idx++;
}
spin_unlock(&fdata->invalid_lock);
- return 0;
-}
-
-static int tid_rb_insert(void *arg, struct mmu_rb_node *node)
-{
- struct hfi1_filedata *fdata = arg;
- struct tid_rb_node *tnode =
- container_of(node, struct tid_rb_node, mmu);
- u32 base = fdata->uctxt->expected_base;
-
- fdata->entry_to_rb[tnode->rcventry - base] = tnode;
- return 0;
+ return true;
}
static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
@@ -966,13 +938,4 @@
fdata->entry_to_rb[tnode->rcventry - base] = NULL;
clear_tid_node(fdata, tnode);
-}
-
-static void tid_rb_remove(void *arg, struct mmu_rb_node *node)
-{
- struct hfi1_filedata *fdata = arg;
- struct tid_rb_node *tnode =
- container_of(node, struct tid_rb_node, mmu);
-
- cacheless_tid_rb_remove(fdata, tnode);
}
--
Gitblit v1.6.2