| .. | .. |
|---|
| 1 | 1 | /* |
|---|
| 2 | + * Copyright(c) 2020 Cornelis Networks, Inc. |
|---|
| 2 | 3 | * Copyright(c) 2015-2018 Intel Corporation. |
|---|
| 3 | 4 | * |
|---|
| 4 | 5 | * This file is provided under a dual BSD/GPLv2 license. When using or |
|---|
| .. | .. |
|---|
| 59 | 60 | struct tid_user_buf *tbuf, |
|---|
| 60 | 61 | u32 rcventry, struct tid_group *grp, |
|---|
| 61 | 62 | u16 pageidx, unsigned int npages); |
|---|
| 62 | | -static int tid_rb_insert(void *arg, struct mmu_rb_node *node); |
|---|
| 63 | 63 | static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, |
|---|
| 64 | 64 | struct tid_rb_node *tnode); |
|---|
| 65 | | -static void tid_rb_remove(void *arg, struct mmu_rb_node *node); |
|---|
| 66 | | -static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode); |
|---|
| 65 | +static bool tid_rb_invalidate(struct mmu_interval_notifier *mni, |
|---|
| 66 | + const struct mmu_notifier_range *range, |
|---|
| 67 | + unsigned long cur_seq); |
|---|
| 67 | 68 | static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *, |
|---|
| 68 | 69 | struct tid_group *grp, |
|---|
| 69 | 70 | unsigned int start, u16 count, |
|---|
| .. | .. |
|---|
| 73 | 74 | struct tid_group **grp); |
|---|
| 74 | 75 | static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node); |
|---|
| 75 | 76 | |
|---|
| 76 | | -static struct mmu_rb_ops tid_rb_ops = { |
|---|
| 77 | | - .insert = tid_rb_insert, |
|---|
| 78 | | - .remove = tid_rb_remove, |
|---|
| 79 | | - .invalidate = tid_rb_invalidate |
|---|
| 77 | +static const struct mmu_interval_notifier_ops tid_mn_ops = { |
|---|
| 78 | + .invalidate = tid_rb_invalidate, |
|---|
| 80 | 79 | }; |
|---|
| 81 | 80 | |
|---|
| 82 | 81 | /* |
|---|
| .. | .. |
|---|
| 87 | 86 | int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd, |
|---|
| 88 | 87 | struct hfi1_ctxtdata *uctxt) |
|---|
| 89 | 88 | { |
|---|
| 90 | | - struct hfi1_devdata *dd = uctxt->dd; |
|---|
| 91 | 89 | int ret = 0; |
|---|
| 92 | 90 | |
|---|
| 93 | 91 | fd->entry_to_rb = kcalloc(uctxt->expected_count, |
|---|
| .. | .. |
|---|
| 106 | 104 | fd->entry_to_rb = NULL; |
|---|
| 107 | 105 | return -ENOMEM; |
|---|
| 108 | 106 | } |
|---|
| 109 | | - |
|---|
| 110 | | - /* |
|---|
| 111 | | - * Register MMU notifier callbacks. If the registration |
|---|
| 112 | | - * fails, continue without TID caching for this context. |
|---|
| 113 | | - */ |
|---|
| 114 | | - ret = hfi1_mmu_rb_register(fd, fd->mm, &tid_rb_ops, |
|---|
| 115 | | - dd->pport->hfi1_wq, |
|---|
| 116 | | - &fd->handler); |
|---|
| 117 | | - if (ret) { |
|---|
| 118 | | - dd_dev_info(dd, |
|---|
| 119 | | - "Failed MMU notifier registration %d\n", |
|---|
| 120 | | - ret); |
|---|
| 121 | | - ret = 0; |
|---|
| 122 | | - } |
|---|
| 107 | + fd->use_mn = true; |
|---|
| 123 | 108 | } |
|---|
| 124 | 109 | |
|---|
| 125 | 110 | /* |
|---|
| .. | .. |
|---|
| 136 | 121 | * init. |
|---|
| 137 | 122 | */ |
|---|
| 138 | 123 | spin_lock(&fd->tid_lock); |
|---|
| 139 | | - if (uctxt->subctxt_cnt && fd->handler) { |
|---|
| 124 | + if (uctxt->subctxt_cnt && fd->use_mn) { |
|---|
| 140 | 125 | u16 remainder; |
|---|
| 141 | 126 | |
|---|
| 142 | 127 | fd->tid_limit = uctxt->expected_count / uctxt->subctxt_cnt; |
|---|
| .. | .. |
|---|
| 155 | 140 | { |
|---|
| 156 | 141 | struct hfi1_ctxtdata *uctxt = fd->uctxt; |
|---|
| 157 | 142 | |
|---|
| 158 | | - /* |
|---|
| 159 | | - * The notifier would have been removed when the process'es mm |
|---|
| 160 | | - * was freed. |
|---|
| 161 | | - */ |
|---|
| 162 | | - if (fd->handler) { |
|---|
| 163 | | - hfi1_mmu_rb_unregister(fd->handler); |
|---|
| 164 | | - } else { |
|---|
| 165 | | - mutex_lock(&uctxt->exp_mutex); |
|---|
| 166 | | - if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list)) |
|---|
| 167 | | - unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd); |
|---|
| 168 | | - if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list)) |
|---|
| 169 | | - unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd); |
|---|
| 170 | | - mutex_unlock(&uctxt->exp_mutex); |
|---|
| 171 | | - } |
|---|
| 143 | + mutex_lock(&uctxt->exp_mutex); |
|---|
| 144 | + if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list)) |
|---|
| 145 | + unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd); |
|---|
| 146 | + if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list)) |
|---|
| 147 | + unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd); |
|---|
| 148 | + mutex_unlock(&uctxt->exp_mutex); |
|---|
| 172 | 149 | |
|---|
| 173 | 150 | kfree(fd->invalid_tids); |
|---|
| 174 | 151 | fd->invalid_tids = NULL; |
|---|
| .. | .. |
|---|
| 197 | 174 | { |
|---|
| 198 | 175 | struct page **pages; |
|---|
| 199 | 176 | struct hfi1_devdata *dd = fd->uctxt->dd; |
|---|
| 177 | + struct mm_struct *mm; |
|---|
| 200 | 178 | |
|---|
| 201 | 179 | if (mapped) { |
|---|
| 202 | 180 | pci_unmap_single(dd->pcidev, node->dma_addr, |
|---|
| 203 | | - node->mmu.len, PCI_DMA_FROMDEVICE); |
|---|
| 181 | + node->npages * PAGE_SIZE, PCI_DMA_FROMDEVICE); |
|---|
| 204 | 182 | pages = &node->pages[idx]; |
|---|
| 183 | + mm = mm_from_tid_node(node); |
|---|
| 205 | 184 | } else { |
|---|
| 206 | 185 | pages = &tidbuf->pages[idx]; |
|---|
| 186 | + mm = current->mm; |
|---|
| 207 | 187 | } |
|---|
| 208 | | - hfi1_release_user_pages(fd->mm, pages, npages, mapped); |
|---|
| 188 | + hfi1_release_user_pages(mm, pages, npages, mapped); |
|---|
| 209 | 189 | fd->tid_n_pinned -= npages; |
|---|
| 210 | 190 | } |
|---|
| 211 | 191 | |
|---|
| .. | .. |
|---|
| 230 | 210 | return -EINVAL; |
|---|
| 231 | 211 | } |
|---|
| 232 | 212 | |
|---|
| 233 | | - /* Verify that access is OK for the user buffer */ |
|---|
| 234 | | - if (!access_ok(VERIFY_WRITE, (void __user *)vaddr, |
|---|
| 235 | | - npages * PAGE_SIZE)) { |
|---|
| 236 | | - dd_dev_err(dd, "Fail vaddr %p, %u pages, !access_ok\n", |
|---|
| 237 | | - (void *)vaddr, npages); |
|---|
| 238 | | - return -EFAULT; |
|---|
| 239 | | - } |
|---|
| 240 | 213 | /* Allocate the array of struct page pointers needed for pinning */ |
|---|
| 241 | 214 | pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL); |
|---|
| 242 | 215 | if (!pages) |
|---|
| .. | .. |
|---|
| 247 | 220 | * pages, accept the amount pinned so far and program only that. |
|---|
| 248 | 221 | * User space knows how to deal with partially programmed buffers. |
|---|
| 249 | 222 | */ |
|---|
| 250 | | - if (!hfi1_can_pin_pages(dd, fd->mm, fd->tid_n_pinned, npages)) { |
|---|
| 223 | + if (!hfi1_can_pin_pages(dd, current->mm, fd->tid_n_pinned, npages)) { |
|---|
| 251 | 224 | kfree(pages); |
|---|
| 252 | 225 | return -ENOMEM; |
|---|
| 253 | 226 | } |
|---|
| 254 | 227 | |
|---|
| 255 | | - pinned = hfi1_acquire_user_pages(fd->mm, vaddr, npages, true, pages); |
|---|
| 228 | + pinned = hfi1_acquire_user_pages(current->mm, vaddr, npages, true, pages); |
|---|
| 256 | 229 | if (pinned <= 0) { |
|---|
| 257 | 230 | kfree(pages); |
|---|
| 258 | 231 | return pinned; |
|---|
| .. | .. |
|---|
| 776 | 749 | return -EFAULT; |
|---|
| 777 | 750 | } |
|---|
| 778 | 751 | |
|---|
| 779 | | - node->mmu.addr = tbuf->vaddr + (pageidx * PAGE_SIZE); |
|---|
| 780 | | - node->mmu.len = npages * PAGE_SIZE; |
|---|
| 752 | + node->fdata = fd; |
|---|
| 781 | 753 | node->phys = page_to_phys(pages[0]); |
|---|
| 782 | 754 | node->npages = npages; |
|---|
| 783 | 755 | node->rcventry = rcventry; |
|---|
| .. | .. |
|---|
| 786 | 758 | node->freed = false; |
|---|
| 787 | 759 | memcpy(node->pages, pages, sizeof(struct page *) * npages); |
|---|
| 788 | 760 | |
|---|
| 789 | | - if (!fd->handler) |
|---|
| 790 | | - ret = tid_rb_insert(fd, &node->mmu); |
|---|
| 791 | | - else |
|---|
| 792 | | - ret = hfi1_mmu_rb_insert(fd->handler, &node->mmu); |
|---|
| 793 | | - |
|---|
| 794 | | - if (ret) { |
|---|
| 795 | | - hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d", |
|---|
| 796 | | - node->rcventry, node->mmu.addr, node->phys, ret); |
|---|
| 797 | | - pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE, |
|---|
| 798 | | - PCI_DMA_FROMDEVICE); |
|---|
| 799 | | - kfree(node); |
|---|
| 800 | | - return -EFAULT; |
|---|
| 761 | + if (fd->use_mn) { |
|---|
| 762 | + ret = mmu_interval_notifier_insert( |
|---|
| 763 | + &node->notifier, current->mm, |
|---|
| 764 | + tbuf->vaddr + (pageidx * PAGE_SIZE), npages * PAGE_SIZE, |
|---|
| 765 | + &tid_mn_ops); |
|---|
| 766 | + if (ret) |
|---|
| 767 | + goto out_unmap; |
|---|
| 768 | + /* |
|---|
| 769 | + * FIXME: This is in the wrong order, the notifier should be |
|---|
| 770 | + * established before the pages are pinned by pin_rcv_pages. |
|---|
| 771 | + */ |
|---|
| 772 | + mmu_interval_read_begin(&node->notifier); |
|---|
| 801 | 773 | } |
|---|
| 774 | + fd->entry_to_rb[node->rcventry - uctxt->expected_base] = node; |
|---|
| 775 | + |
|---|
| 802 | 776 | hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, ilog2(npages) + 1); |
|---|
| 803 | 777 | trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, npages, |
|---|
| 804 | | - node->mmu.addr, node->phys, phys); |
|---|
| 778 | + node->notifier.interval_tree.start, node->phys, |
|---|
| 779 | + phys); |
|---|
| 805 | 780 | return 0; |
|---|
| 781 | + |
|---|
| 782 | +out_unmap: |
|---|
| 783 | + hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d", |
|---|
| 784 | + node->rcventry, node->notifier.interval_tree.start, |
|---|
| 785 | + node->phys, ret); |
|---|
| 786 | + pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE, |
|---|
| 787 | + PCI_DMA_FROMDEVICE); |
|---|
| 788 | + kfree(node); |
|---|
| 789 | + return -EFAULT; |
|---|
| 806 | 790 | } |
|---|
| 807 | 791 | |
|---|
| 808 | 792 | static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, |
|---|
| .. | .. |
|---|
| 832 | 816 | if (grp) |
|---|
| 833 | 817 | *grp = node->grp; |
|---|
| 834 | 818 | |
|---|
| 835 | | - if (!fd->handler) |
|---|
| 836 | | - cacheless_tid_rb_remove(fd, node); |
|---|
| 837 | | - else |
|---|
| 838 | | - hfi1_mmu_rb_remove(fd->handler, &node->mmu); |
|---|
| 819 | + if (fd->use_mn) |
|---|
| 820 | + mmu_interval_notifier_remove(&node->notifier); |
|---|
| 821 | + cacheless_tid_rb_remove(fd, node); |
|---|
| 839 | 822 | |
|---|
| 840 | 823 | return 0; |
|---|
| 841 | 824 | } |
|---|
| .. | .. |
|---|
| 846 | 829 | struct hfi1_devdata *dd = uctxt->dd; |
|---|
| 847 | 830 | |
|---|
| 848 | 831 | trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry, |
|---|
| 849 | | - node->npages, node->mmu.addr, node->phys, |
|---|
| 832 | + node->npages, |
|---|
| 833 | + node->notifier.interval_tree.start, node->phys, |
|---|
| 850 | 834 | node->dma_addr); |
|---|
| 851 | 835 | |
|---|
| 852 | 836 | /* |
|---|
| .. | .. |
|---|
| 893 | 877 | if (!node || node->rcventry != rcventry) |
|---|
| 894 | 878 | continue; |
|---|
| 895 | 879 | |
|---|
| 880 | + if (fd->use_mn) |
|---|
| 881 | + mmu_interval_notifier_remove( |
|---|
| 882 | + &node->notifier); |
|---|
| 896 | 883 | cacheless_tid_rb_remove(fd, node); |
|---|
| 897 | 884 | } |
|---|
| 898 | 885 | } |
|---|
| 899 | 886 | } |
|---|
| 900 | 887 | } |
|---|
| 901 | 888 | |
|---|
| 902 | | -/* |
|---|
| 903 | | - * Always return 0 from this function. A non-zero return indicates that the |
|---|
| 904 | | - * remove operation will be called and that memory should be unpinned. |
|---|
| 905 | | - * However, the driver cannot unpin out from under PSM. Instead, retain the |
|---|
| 906 | | - * memory (by returning 0) and inform PSM that the memory is going away. PSM |
|---|
| 907 | | - * will call back later when it has removed the memory from its list. |
|---|
| 908 | | - */ |
|---|
| 909 | | -static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode) |
|---|
| 889 | +static bool tid_rb_invalidate(struct mmu_interval_notifier *mni, |
|---|
| 890 | + const struct mmu_notifier_range *range, |
|---|
| 891 | + unsigned long cur_seq) |
|---|
| 910 | 892 | { |
|---|
| 911 | | - struct hfi1_filedata *fdata = arg; |
|---|
| 912 | | - struct hfi1_ctxtdata *uctxt = fdata->uctxt; |
|---|
| 913 | 893 | struct tid_rb_node *node = |
|---|
| 914 | | - container_of(mnode, struct tid_rb_node, mmu); |
|---|
| 894 | + container_of(mni, struct tid_rb_node, notifier); |
|---|
| 895 | + struct hfi1_filedata *fdata = node->fdata; |
|---|
| 896 | + struct hfi1_ctxtdata *uctxt = fdata->uctxt; |
|---|
| 915 | 897 | |
|---|
| 916 | 898 | if (node->freed) |
|---|
| 917 | | - return 0; |
|---|
| 899 | + return true; |
|---|
| 918 | 900 | |
|---|
| 919 | | - trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, node->mmu.addr, |
|---|
| 901 | + trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, |
|---|
| 902 | + node->notifier.interval_tree.start, |
|---|
| 920 | 903 | node->rcventry, node->npages, node->dma_addr); |
|---|
| 921 | 904 | node->freed = true; |
|---|
| 922 | 905 | |
|---|
| .. | .. |
|---|
| 945 | 928 | fdata->invalid_tid_idx++; |
|---|
| 946 | 929 | } |
|---|
| 947 | 930 | spin_unlock(&fdata->invalid_lock); |
|---|
| 948 | | - return 0; |
|---|
| 949 | | -} |
|---|
| 950 | | - |
|---|
| 951 | | -static int tid_rb_insert(void *arg, struct mmu_rb_node *node) |
|---|
| 952 | | -{ |
|---|
| 953 | | - struct hfi1_filedata *fdata = arg; |
|---|
| 954 | | - struct tid_rb_node *tnode = |
|---|
| 955 | | - container_of(node, struct tid_rb_node, mmu); |
|---|
| 956 | | - u32 base = fdata->uctxt->expected_base; |
|---|
| 957 | | - |
|---|
| 958 | | - fdata->entry_to_rb[tnode->rcventry - base] = tnode; |
|---|
| 959 | | - return 0; |
|---|
| 931 | + return true; |
|---|
| 960 | 932 | } |
|---|
| 961 | 933 | |
|---|
| 962 | 934 | static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, |
|---|
| .. | .. |
|---|
| 966 | 938 | |
|---|
| 967 | 939 | fdata->entry_to_rb[tnode->rcventry - base] = NULL; |
|---|
| 968 | 940 | clear_tid_node(fdata, tnode); |
|---|
| 969 | | -} |
|---|
| 970 | | - |
|---|
| 971 | | -static void tid_rb_remove(void *arg, struct mmu_rb_node *node) |
|---|
| 972 | | -{ |
|---|
| 973 | | - struct hfi1_filedata *fdata = arg; |
|---|
| 974 | | - struct tid_rb_node *tnode = |
|---|
| 975 | | - container_of(node, struct tid_rb_node, mmu); |
|---|
| 976 | | - |
|---|
| 977 | | - cacheless_tid_rb_remove(fdata, tnode); |
|---|
| 978 | 941 | } |
|---|