From 9370bb92b2d16684ee45cf24e879c93c509162da Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Thu, 19 Dec 2024 01:47:39 +0000 Subject: [PATCH] add wifi6 8852be driver --- kernel/drivers/infiniband/hw/hfi1/user_exp_rcv.c | 372 +++++++++++++++++++++++++++++----------------------- 1 files changed, 205 insertions(+), 167 deletions(-) diff --git a/kernel/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/kernel/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 4e417ed..0e0be6c 100644 --- a/kernel/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/kernel/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -1,4 +1,5 @@ /* + * Copyright(c) 2020 Cornelis Networks, Inc. * Copyright(c) 2015-2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or @@ -59,24 +60,29 @@ struct tid_user_buf *tbuf, u32 rcventry, struct tid_group *grp, u16 pageidx, unsigned int npages); -static int tid_rb_insert(void *arg, struct mmu_rb_node *node); static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, struct tid_rb_node *tnode); -static void tid_rb_remove(void *arg, struct mmu_rb_node *node); -static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode); +static bool tid_rb_invalidate(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq); +static bool tid_cover_invalidate(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq); static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *, struct tid_group *grp, unsigned int start, u16 count, u32 *tidlist, unsigned int *tididx, unsigned int *pmapped); -static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, - struct tid_group **grp); +static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo); +static void __clear_tid_node(struct hfi1_filedata *fd, + struct tid_rb_node *node); static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node); -static struct mmu_rb_ops tid_rb_ops = { - .insert = tid_rb_insert, - .remove = tid_rb_remove, - .invalidate = tid_rb_invalidate +static const struct mmu_interval_notifier_ops tid_mn_ops = { + .invalidate = tid_rb_invalidate, +}; +static const struct mmu_interval_notifier_ops tid_cover_ops = { + .invalidate = tid_cover_invalidate, }; /* @@ -87,7 +93,6 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd, struct hfi1_ctxtdata *uctxt) { - struct hfi1_devdata *dd = uctxt->dd; int ret = 0; fd->entry_to_rb = kcalloc(uctxt->expected_count, @@ -106,20 +111,7 @@ fd->entry_to_rb = NULL; return -ENOMEM; } - - /* - * Register MMU notifier callbacks. If the registration - * fails, continue without TID caching for this context. - */ - ret = hfi1_mmu_rb_register(fd, fd->mm, &tid_rb_ops, - dd->pport->hfi1_wq, - &fd->handler); - if (ret) { - dd_dev_info(dd, - "Failed MMU notifier registration %d\n", - ret); - ret = 0; - } + fd->use_mn = true; } /* @@ -136,7 +128,7 @@ * init. */ spin_lock(&fd->tid_lock); - if (uctxt->subctxt_cnt && fd->handler) { + if (uctxt->subctxt_cnt && fd->use_mn) { u16 remainder; fd->tid_limit = uctxt->expected_count / uctxt->subctxt_cnt; @@ -155,20 +147,12 @@ { struct hfi1_ctxtdata *uctxt = fd->uctxt; - /* - * The notifier would have been removed when the process'es mm - * was freed. - */ - if (fd->handler) { - hfi1_mmu_rb_unregister(fd->handler); - } else { - mutex_lock(&uctxt->exp_mutex); - if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list)) - unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd); - if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list)) - unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd); - mutex_unlock(&uctxt->exp_mutex); - } + mutex_lock(&uctxt->exp_mutex); + if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list)) + unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd); + if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list)) + unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd); + mutex_unlock(&uctxt->exp_mutex); kfree(fd->invalid_tids); fd->invalid_tids = NULL; @@ -197,15 +181,18 @@ { struct page **pages; struct hfi1_devdata *dd = fd->uctxt->dd; + struct mm_struct *mm; if (mapped) { pci_unmap_single(dd->pcidev, node->dma_addr, - node->mmu.len, PCI_DMA_FROMDEVICE); + node->npages * PAGE_SIZE, PCI_DMA_FROMDEVICE); pages = &node->pages[idx]; + mm = mm_from_tid_node(node); } else { pages = &tidbuf->pages[idx]; + mm = current->mm; } - hfi1_release_user_pages(fd->mm, pages, npages, mapped); + hfi1_release_user_pages(mm, pages, npages, mapped); fd->tid_n_pinned -= npages; } @@ -215,28 +202,16 @@ static int pin_rcv_pages(struct hfi1_filedata *fd, struct tid_user_buf *tidbuf) { int pinned; - unsigned int npages; + unsigned int npages = tidbuf->npages; unsigned long vaddr = tidbuf->vaddr; struct page **pages = NULL; struct hfi1_devdata *dd = fd->uctxt->dd; - - /* Get the number of pages the user buffer spans */ - npages = num_user_pages(vaddr, tidbuf->length); - if (!npages) - return -EINVAL; if (npages > fd->uctxt->expected_count) { dd_dev_err(dd, "Expected buffer too big\n"); return -EINVAL; } - /* Verify that access is OK for the user buffer */ - if (!access_ok(VERIFY_WRITE, (void __user *)vaddr, - npages * PAGE_SIZE)) { - dd_dev_err(dd, "Fail vaddr %p, %u pages, !access_ok\n", - (void *)vaddr, npages); - return -EFAULT; - } /* Allocate the array of struct page pointers needed for pinning */ pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL); if (!pages) @@ -247,18 +222,17 @@ * pages, accept the amount pinned so far and program only that. * User space knows how to deal with partially programmed buffers. */ - if (!hfi1_can_pin_pages(dd, fd->mm, fd->tid_n_pinned, npages)) { + if (!hfi1_can_pin_pages(dd, current->mm, fd->tid_n_pinned, npages)) { kfree(pages); return -ENOMEM; } - pinned = hfi1_acquire_user_pages(fd->mm, vaddr, npages, true, pages); + pinned = hfi1_acquire_user_pages(current->mm, vaddr, npages, true, pages); if (pinned <= 0) { kfree(pages); return pinned; } tidbuf->pages = pages; - tidbuf->npages = npages; fd->tid_n_pinned += pinned; return pinned; } @@ -322,53 +296,66 @@ tididx = 0, mapped, mapped_pages = 0; u32 *tidlist = NULL; struct tid_user_buf *tidbuf; + unsigned long mmu_seq = 0; if (!PAGE_ALIGNED(tinfo->vaddr)) + return -EINVAL; + if (tinfo->length == 0) return -EINVAL; tidbuf = kzalloc(sizeof(*tidbuf), GFP_KERNEL); if (!tidbuf) return -ENOMEM; + mutex_init(&tidbuf->cover_mutex); tidbuf->vaddr = tinfo->vaddr; tidbuf->length = tinfo->length; + tidbuf->npages = num_user_pages(tidbuf->vaddr, tidbuf->length); tidbuf->psets = kcalloc(uctxt->expected_count, sizeof(*tidbuf->psets), GFP_KERNEL); if (!tidbuf->psets) { - kfree(tidbuf); - return -ENOMEM; + ret = -ENOMEM; + goto fail_release_mem; + } + + if (fd->use_mn) { + ret = mmu_interval_notifier_insert( + &tidbuf->notifier, current->mm, + tidbuf->vaddr, tidbuf->npages * PAGE_SIZE, + &tid_cover_ops); + if (ret) + goto fail_release_mem; + mmu_seq = mmu_interval_read_begin(&tidbuf->notifier); } pinned = pin_rcv_pages(fd, tidbuf); if (pinned <= 0) { - kfree(tidbuf->psets); - kfree(tidbuf); - return pinned; + ret = (pinned < 0) ? pinned : -ENOSPC; + goto fail_unpin; } /* Find sets of physically contiguous pages */ tidbuf->n_psets = find_phys_blocks(tidbuf, pinned); - /* - * We don't need to access this under a lock since tid_used is per - * process and the same process cannot be in hfi1_user_exp_rcv_clear() - * and hfi1_user_exp_rcv_setup() at the same time. - */ + /* Reserve the number of expected tids to be used. */ spin_lock(&fd->tid_lock); if (fd->tid_used + tidbuf->n_psets > fd->tid_limit) pageset_count = fd->tid_limit - fd->tid_used; else pageset_count = tidbuf->n_psets; + fd->tid_used += pageset_count; spin_unlock(&fd->tid_lock); - if (!pageset_count) - goto bail; + if (!pageset_count) { + ret = -ENOSPC; + goto fail_unreserve; + } ngroups = pageset_count / dd->rcv_entries.group_size; tidlist = kcalloc(pageset_count, sizeof(*tidlist), GFP_KERNEL); if (!tidlist) { ret = -ENOMEM; - goto nomem; + goto fail_unreserve; } tididx = 0; @@ -464,43 +451,78 @@ } unlock: mutex_unlock(&uctxt->exp_mutex); -nomem: hfi1_cdbg(TID, "total mapped: tidpairs:%u pages:%u (%d)", tididx, mapped_pages, ret); - if (tididx) { - spin_lock(&fd->tid_lock); - fd->tid_used += tididx; - spin_unlock(&fd->tid_lock); - tinfo->tidcnt = tididx; - tinfo->length = mapped_pages * PAGE_SIZE; - if (copy_to_user(u64_to_user_ptr(tinfo->tidlist), - tidlist, sizeof(tidlist[0]) * tididx)) { - /* - * On failure to copy to the user level, we need to undo - * everything done so far so we don't leak resources. - */ - tinfo->tidlist = (unsigned long)&tidlist; - hfi1_user_exp_rcv_clear(fd, tinfo); - tinfo->tidlist = 0; - ret = -EFAULT; - goto bail; + /* fail if nothing was programmed, set error if none provided */ + if (tididx == 0) { + if (ret >= 0) + ret = -ENOSPC; + goto fail_unreserve; + } + + /* adjust reserved tid_used to actual count */ + spin_lock(&fd->tid_lock); + fd->tid_used -= pageset_count - tididx; + spin_unlock(&fd->tid_lock); + + /* unpin all pages not covered by a TID */ + unpin_rcv_pages(fd, tidbuf, NULL, mapped_pages, pinned - mapped_pages, + false); + + if (fd->use_mn) { + /* check for an invalidate during setup */ + bool fail = false; + + mutex_lock(&tidbuf->cover_mutex); + fail = mmu_interval_read_retry(&tidbuf->notifier, mmu_seq); + mutex_unlock(&tidbuf->cover_mutex); + + if (fail) { + ret = -EBUSY; + goto fail_unprogram; } } - /* - * If not everything was mapped (due to insufficient RcvArray entries, - * for example), unpin all unmapped pages so we can pin them nex time. - */ - if (mapped_pages != pinned) - unpin_rcv_pages(fd, tidbuf, NULL, mapped_pages, - (pinned - mapped_pages), false); -bail: - kfree(tidbuf->psets); - kfree(tidlist); + tinfo->tidcnt = tididx; + tinfo->length = mapped_pages * PAGE_SIZE; + + if (copy_to_user(u64_to_user_ptr(tinfo->tidlist), + tidlist, sizeof(tidlist[0]) * tididx)) { + ret = -EFAULT; + goto fail_unprogram; + } + + if (fd->use_mn) + mmu_interval_notifier_remove(&tidbuf->notifier); kfree(tidbuf->pages); + kfree(tidbuf->psets); kfree(tidbuf); - return ret > 0 ? 0 : ret; + kfree(tidlist); + return 0; + +fail_unprogram: + /* unprogram, unmap, and unpin all allocated TIDs */ + tinfo->tidlist = (unsigned long)tidlist; + hfi1_user_exp_rcv_clear(fd, tinfo); + tinfo->tidlist = 0; + pinned = 0; /* nothing left to unpin */ + pageset_count = 0; /* nothing left reserved */ +fail_unreserve: + spin_lock(&fd->tid_lock); + fd->tid_used -= pageset_count; + spin_unlock(&fd->tid_lock); +fail_unpin: + if (fd->use_mn) + mmu_interval_notifier_remove(&tidbuf->notifier); + if (pinned > 0) + unpin_rcv_pages(fd, tidbuf, NULL, 0, pinned, false); +fail_release_mem: + kfree(tidbuf->pages); + kfree(tidbuf->psets); + kfree(tidbuf); + kfree(tidlist); + return ret; } int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd, @@ -521,7 +543,7 @@ mutex_lock(&uctxt->exp_mutex); for (tididx = 0; tididx < tinfo->tidcnt; tididx++) { - ret = unprogram_rcvarray(fd, tidinfo[tididx], NULL); + ret = unprogram_rcvarray(fd, tidinfo[tididx]); if (ret) { hfi1_cdbg(TID, "Failed to unprogram rcv array %d", ret); @@ -776,8 +798,8 @@ return -EFAULT; } - node->mmu.addr = tbuf->vaddr + (pageidx * PAGE_SIZE); - node->mmu.len = npages * PAGE_SIZE; + node->fdata = fd; + mutex_init(&node->invalidate_mutex); node->phys = page_to_phys(pages[0]); node->npages = npages; node->rcventry = rcventry; @@ -786,27 +808,33 @@ node->freed = false; memcpy(node->pages, pages, sizeof(struct page *) * npages); - if (!fd->handler) - ret = tid_rb_insert(fd, &node->mmu); - else - ret = hfi1_mmu_rb_insert(fd->handler, &node->mmu); - - if (ret) { - hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d", - node->rcventry, node->mmu.addr, node->phys, ret); - pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE, - PCI_DMA_FROMDEVICE); - kfree(node); - return -EFAULT; + if (fd->use_mn) { + ret = mmu_interval_notifier_insert( + &node->notifier, current->mm, + tbuf->vaddr + (pageidx * PAGE_SIZE), npages * PAGE_SIZE, + &tid_mn_ops); + if (ret) + goto out_unmap; } + fd->entry_to_rb[node->rcventry - uctxt->expected_base] = node; + hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, ilog2(npages) + 1); trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, npages, - node->mmu.addr, node->phys, phys); + node->notifier.interval_tree.start, node->phys, + phys); return 0; + +out_unmap: + hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d", + node->rcventry, node->notifier.interval_tree.start, + node->phys, ret); + pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE, + PCI_DMA_FROMDEVICE); + kfree(node); + return -EFAULT; } -static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, - struct tid_group **grp) +static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo) { struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; @@ -829,33 +857,41 @@ if (!node || node->rcventry != (uctxt->expected_base + rcventry)) return -EBADF; - if (grp) - *grp = node->grp; - - if (!fd->handler) - cacheless_tid_rb_remove(fd, node); - else - hfi1_mmu_rb_remove(fd->handler, &node->mmu); + if (fd->use_mn) + mmu_interval_notifier_remove(&node->notifier); + cacheless_tid_rb_remove(fd, node); return 0; +} + +static void __clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node) +{ + struct hfi1_ctxtdata *uctxt = fd->uctxt; + struct hfi1_devdata *dd = uctxt->dd; + + mutex_lock(&node->invalidate_mutex); + if (node->freed) + goto done; + node->freed = true; + + trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry, + node->npages, + node->notifier.interval_tree.start, node->phys, + node->dma_addr); + + /* Make sure device has seen the write before pages are unpinned */ + hfi1_put_tid(dd, node->rcventry, PT_INVALID_FLUSH, 0, 0); + + unpin_rcv_pages(fd, NULL, node, 0, node->npages, true); +done: + mutex_unlock(&node->invalidate_mutex); } static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node) { struct hfi1_ctxtdata *uctxt = fd->uctxt; - struct hfi1_devdata *dd = uctxt->dd; - trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry, - node->npages, node->mmu.addr, node->phys, - node->dma_addr); - - /* - * Make sure device has seen the write before we unpin the - * pages. - */ - hfi1_put_tid(dd, node->rcventry, PT_INVALID_FLUSH, 0, 0); - - unpin_rcv_pages(fd, NULL, node, 0, node->npages, true); + __clear_tid_node(fd, node); node->grp->used--; node->grp->map &= ~(1 << (node->rcventry - node->grp->base)); @@ -893,32 +929,37 @@ if (!node || node->rcventry != rcventry) continue; + if (fd->use_mn) + mmu_interval_notifier_remove( + &node->notifier); cacheless_tid_rb_remove(fd, node); } } } } -/* - * Always return 0 from this function. A non-zero return indicates that the - * remove operation will be called and that memory should be unpinned. - * However, the driver cannot unpin out from under PSM. Instead, retain the - * memory (by returning 0) and inform PSM that the memory is going away. PSM - * will call back later when it has removed the memory from its list. - */ -static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode) +static bool tid_rb_invalidate(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) { - struct hfi1_filedata *fdata = arg; - struct hfi1_ctxtdata *uctxt = fdata->uctxt; struct tid_rb_node *node = - container_of(mnode, struct tid_rb_node, mmu); + container_of(mni, struct tid_rb_node, notifier); + struct hfi1_filedata *fdata = node->fdata; + struct hfi1_ctxtdata *uctxt = fdata->uctxt; if (node->freed) - return 0; + return true; - trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, node->mmu.addr, + /* take action only if unmapping */ + if (range->event != MMU_NOTIFY_UNMAP) + return true; + + trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, + node->notifier.interval_tree.start, node->rcventry, node->npages, node->dma_addr); - node->freed = true; + + /* clear the hardware rcvarray entry */ + __clear_tid_node(fdata, node); spin_lock(&fdata->invalid_lock); if (fdata->invalid_tid_idx < uctxt->expected_count) { @@ -945,18 +986,24 @@ fdata->invalid_tid_idx++; } spin_unlock(&fdata->invalid_lock); - return 0; + return true; } -static int tid_rb_insert(void *arg, struct mmu_rb_node *node) +static bool tid_cover_invalidate(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) { - struct hfi1_filedata *fdata = arg; - struct tid_rb_node *tnode = - container_of(node, struct tid_rb_node, mmu); - u32 base = fdata->uctxt->expected_base; + struct tid_user_buf *tidbuf = + container_of(mni, struct tid_user_buf, notifier); - fdata->entry_to_rb[tnode->rcventry - base] = tnode; - return 0; + /* take action only if unmapping */ + if (range->event == MMU_NOTIFY_UNMAP) { + mutex_lock(&tidbuf->cover_mutex); + mmu_interval_set_seq(mni, cur_seq); + mutex_unlock(&tidbuf->cover_mutex); + } + + return true; } static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, @@ -966,13 +1013,4 @@ fdata->entry_to_rb[tnode->rcventry - base] = NULL; clear_tid_node(fdata, tnode); -} - -static void tid_rb_remove(void *arg, struct mmu_rb_node *node) -{ - struct hfi1_filedata *fdata = arg; - struct tid_rb_node *tnode = - container_of(node, struct tid_rb_node, mmu); - - cacheless_tid_rb_remove(fdata, tnode); } -- Gitblit v1.6.2