.. | .. |
---|
1 | 1 | /* |
---|
| 2 | + * Copyright(c) 2020 Cornelis Networks, Inc. |
---|
2 | 3 | * Copyright(c) 2015-2018 Intel Corporation. |
---|
3 | 4 | * |
---|
4 | 5 | * This file is provided under a dual BSD/GPLv2 license. When using or |
---|
.. | .. |
---|
59 | 60 | struct tid_user_buf *tbuf, |
---|
60 | 61 | u32 rcventry, struct tid_group *grp, |
---|
61 | 62 | u16 pageidx, unsigned int npages); |
---|
62 | | -static int tid_rb_insert(void *arg, struct mmu_rb_node *node); |
---|
63 | 63 | static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, |
---|
64 | 64 | struct tid_rb_node *tnode); |
---|
65 | | -static void tid_rb_remove(void *arg, struct mmu_rb_node *node); |
---|
66 | | -static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode); |
---|
| 65 | +static bool tid_rb_invalidate(struct mmu_interval_notifier *mni, |
---|
| 66 | + const struct mmu_notifier_range *range, |
---|
| 67 | + unsigned long cur_seq); |
---|
67 | 68 | static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *, |
---|
68 | 69 | struct tid_group *grp, |
---|
69 | 70 | unsigned int start, u16 count, |
---|
.. | .. |
---|
73 | 74 | struct tid_group **grp); |
---|
74 | 75 | static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node); |
---|
75 | 76 | |
---|
76 | | -static struct mmu_rb_ops tid_rb_ops = { |
---|
77 | | - .insert = tid_rb_insert, |
---|
78 | | - .remove = tid_rb_remove, |
---|
79 | | - .invalidate = tid_rb_invalidate |
---|
| 77 | +static const struct mmu_interval_notifier_ops tid_mn_ops = { |
---|
| 78 | + .invalidate = tid_rb_invalidate, |
---|
80 | 79 | }; |
---|
81 | 80 | |
---|
82 | 81 | /* |
---|
.. | .. |
---|
87 | 86 | int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd, |
---|
88 | 87 | struct hfi1_ctxtdata *uctxt) |
---|
89 | 88 | { |
---|
90 | | - struct hfi1_devdata *dd = uctxt->dd; |
---|
91 | 89 | int ret = 0; |
---|
92 | 90 | |
---|
93 | 91 | fd->entry_to_rb = kcalloc(uctxt->expected_count, |
---|
.. | .. |
---|
106 | 104 | fd->entry_to_rb = NULL; |
---|
107 | 105 | return -ENOMEM; |
---|
108 | 106 | } |
---|
109 | | - |
---|
110 | | - /* |
---|
111 | | - * Register MMU notifier callbacks. If the registration |
---|
112 | | - * fails, continue without TID caching for this context. |
---|
113 | | - */ |
---|
114 | | - ret = hfi1_mmu_rb_register(fd, fd->mm, &tid_rb_ops, |
---|
115 | | - dd->pport->hfi1_wq, |
---|
116 | | - &fd->handler); |
---|
117 | | - if (ret) { |
---|
118 | | - dd_dev_info(dd, |
---|
119 | | - "Failed MMU notifier registration %d\n", |
---|
120 | | - ret); |
---|
121 | | - ret = 0; |
---|
122 | | - } |
---|
| 107 | + fd->use_mn = true; |
---|
123 | 108 | } |
---|
124 | 109 | |
---|
125 | 110 | /* |
---|
.. | .. |
---|
136 | 121 | * init. |
---|
137 | 122 | */ |
---|
138 | 123 | spin_lock(&fd->tid_lock); |
---|
139 | | - if (uctxt->subctxt_cnt && fd->handler) { |
---|
| 124 | + if (uctxt->subctxt_cnt && fd->use_mn) { |
---|
140 | 125 | u16 remainder; |
---|
141 | 126 | |
---|
142 | 127 | fd->tid_limit = uctxt->expected_count / uctxt->subctxt_cnt; |
---|
.. | .. |
---|
155 | 140 | { |
---|
156 | 141 | struct hfi1_ctxtdata *uctxt = fd->uctxt; |
---|
157 | 142 | |
---|
158 | | - /* |
---|
159 | | - * The notifier would have been removed when the process'es mm |
---|
160 | | - * was freed. |
---|
161 | | - */ |
---|
162 | | - if (fd->handler) { |
---|
163 | | - hfi1_mmu_rb_unregister(fd->handler); |
---|
164 | | - } else { |
---|
165 | | - mutex_lock(&uctxt->exp_mutex); |
---|
166 | | - if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list)) |
---|
167 | | - unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd); |
---|
168 | | - if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list)) |
---|
169 | | - unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd); |
---|
170 | | - mutex_unlock(&uctxt->exp_mutex); |
---|
171 | | - } |
---|
| 143 | + mutex_lock(&uctxt->exp_mutex); |
---|
| 144 | + if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list)) |
---|
| 145 | + unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd); |
---|
| 146 | + if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list)) |
---|
| 147 | + unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd); |
---|
| 148 | + mutex_unlock(&uctxt->exp_mutex); |
---|
172 | 149 | |
---|
173 | 150 | kfree(fd->invalid_tids); |
---|
174 | 151 | fd->invalid_tids = NULL; |
---|
.. | .. |
---|
197 | 174 | { |
---|
198 | 175 | struct page **pages; |
---|
199 | 176 | struct hfi1_devdata *dd = fd->uctxt->dd; |
---|
| 177 | + struct mm_struct *mm; |
---|
200 | 178 | |
---|
201 | 179 | if (mapped) { |
---|
202 | 180 | pci_unmap_single(dd->pcidev, node->dma_addr, |
---|
203 | | - node->mmu.len, PCI_DMA_FROMDEVICE); |
---|
| 181 | + node->npages * PAGE_SIZE, PCI_DMA_FROMDEVICE); |
---|
204 | 182 | pages = &node->pages[idx]; |
---|
| 183 | + mm = mm_from_tid_node(node); |
---|
205 | 184 | } else { |
---|
206 | 185 | pages = &tidbuf->pages[idx]; |
---|
| 186 | + mm = current->mm; |
---|
207 | 187 | } |
---|
208 | | - hfi1_release_user_pages(fd->mm, pages, npages, mapped); |
---|
| 188 | + hfi1_release_user_pages(mm, pages, npages, mapped); |
---|
209 | 189 | fd->tid_n_pinned -= npages; |
---|
210 | 190 | } |
---|
211 | 191 | |
---|
.. | .. |
---|
230 | 210 | return -EINVAL; |
---|
231 | 211 | } |
---|
232 | 212 | |
---|
233 | | - /* Verify that access is OK for the user buffer */ |
---|
234 | | - if (!access_ok(VERIFY_WRITE, (void __user *)vaddr, |
---|
235 | | - npages * PAGE_SIZE)) { |
---|
236 | | - dd_dev_err(dd, "Fail vaddr %p, %u pages, !access_ok\n", |
---|
237 | | - (void *)vaddr, npages); |
---|
238 | | - return -EFAULT; |
---|
239 | | - } |
---|
240 | 213 | /* Allocate the array of struct page pointers needed for pinning */ |
---|
241 | 214 | pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL); |
---|
242 | 215 | if (!pages) |
---|
.. | .. |
---|
247 | 220 | * pages, accept the amount pinned so far and program only that. |
---|
248 | 221 | * User space knows how to deal with partially programmed buffers. |
---|
249 | 222 | */ |
---|
250 | | - if (!hfi1_can_pin_pages(dd, fd->mm, fd->tid_n_pinned, npages)) { |
---|
| 223 | + if (!hfi1_can_pin_pages(dd, current->mm, fd->tid_n_pinned, npages)) { |
---|
251 | 224 | kfree(pages); |
---|
252 | 225 | return -ENOMEM; |
---|
253 | 226 | } |
---|
254 | 227 | |
---|
255 | | - pinned = hfi1_acquire_user_pages(fd->mm, vaddr, npages, true, pages); |
---|
| 228 | + pinned = hfi1_acquire_user_pages(current->mm, vaddr, npages, true, pages); |
---|
256 | 229 | if (pinned <= 0) { |
---|
257 | 230 | kfree(pages); |
---|
258 | 231 | return pinned; |
---|
.. | .. |
---|
776 | 749 | return -EFAULT; |
---|
777 | 750 | } |
---|
778 | 751 | |
---|
779 | | - node->mmu.addr = tbuf->vaddr + (pageidx * PAGE_SIZE); |
---|
780 | | - node->mmu.len = npages * PAGE_SIZE; |
---|
| 752 | + node->fdata = fd; |
---|
781 | 753 | node->phys = page_to_phys(pages[0]); |
---|
782 | 754 | node->npages = npages; |
---|
783 | 755 | node->rcventry = rcventry; |
---|
.. | .. |
---|
786 | 758 | node->freed = false; |
---|
787 | 759 | memcpy(node->pages, pages, sizeof(struct page *) * npages); |
---|
788 | 760 | |
---|
789 | | - if (!fd->handler) |
---|
790 | | - ret = tid_rb_insert(fd, &node->mmu); |
---|
791 | | - else |
---|
792 | | - ret = hfi1_mmu_rb_insert(fd->handler, &node->mmu); |
---|
793 | | - |
---|
794 | | - if (ret) { |
---|
795 | | - hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d", |
---|
796 | | - node->rcventry, node->mmu.addr, node->phys, ret); |
---|
797 | | - pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE, |
---|
798 | | - PCI_DMA_FROMDEVICE); |
---|
799 | | - kfree(node); |
---|
800 | | - return -EFAULT; |
---|
| 761 | + if (fd->use_mn) { |
---|
| 762 | + ret = mmu_interval_notifier_insert( |
---|
| 763 | + &node->notifier, current->mm, |
---|
| 764 | + tbuf->vaddr + (pageidx * PAGE_SIZE), npages * PAGE_SIZE, |
---|
| 765 | + &tid_mn_ops); |
---|
| 766 | + if (ret) |
---|
| 767 | + goto out_unmap; |
---|
| 768 | + /* |
---|
| 769 | + * FIXME: This is in the wrong order, the notifier should be |
---|
| 770 | + * established before the pages are pinned by pin_rcv_pages. |
---|
| 771 | + */ |
---|
| 772 | + mmu_interval_read_begin(&node->notifier); |
---|
801 | 773 | } |
---|
| 774 | + fd->entry_to_rb[node->rcventry - uctxt->expected_base] = node; |
---|
| 775 | + |
---|
802 | 776 | hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, ilog2(npages) + 1); |
---|
803 | 777 | trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, npages, |
---|
804 | | - node->mmu.addr, node->phys, phys); |
---|
| 778 | + node->notifier.interval_tree.start, node->phys, |
---|
| 779 | + phys); |
---|
805 | 780 | return 0; |
---|
| 781 | + |
---|
| 782 | +out_unmap: |
---|
| 783 | + hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d", |
---|
| 784 | + node->rcventry, node->notifier.interval_tree.start, |
---|
| 785 | + node->phys, ret); |
---|
| 786 | + pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE, |
---|
| 787 | + PCI_DMA_FROMDEVICE); |
---|
| 788 | + kfree(node); |
---|
| 789 | + return -EFAULT; |
---|
806 | 790 | } |
---|
807 | 791 | |
---|
808 | 792 | static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, |
---|
.. | .. |
---|
832 | 816 | if (grp) |
---|
833 | 817 | *grp = node->grp; |
---|
834 | 818 | |
---|
835 | | - if (!fd->handler) |
---|
836 | | - cacheless_tid_rb_remove(fd, node); |
---|
837 | | - else |
---|
838 | | - hfi1_mmu_rb_remove(fd->handler, &node->mmu); |
---|
| 819 | + if (fd->use_mn) |
---|
| 820 | + mmu_interval_notifier_remove(&node->notifier); |
---|
| 821 | + cacheless_tid_rb_remove(fd, node); |
---|
839 | 822 | |
---|
840 | 823 | return 0; |
---|
841 | 824 | } |
---|
.. | .. |
---|
846 | 829 | struct hfi1_devdata *dd = uctxt->dd; |
---|
847 | 830 | |
---|
848 | 831 | trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry, |
---|
849 | | - node->npages, node->mmu.addr, node->phys, |
---|
| 832 | + node->npages, |
---|
| 833 | + node->notifier.interval_tree.start, node->phys, |
---|
850 | 834 | node->dma_addr); |
---|
851 | 835 | |
---|
852 | 836 | /* |
---|
.. | .. |
---|
893 | 877 | if (!node || node->rcventry != rcventry) |
---|
894 | 878 | continue; |
---|
895 | 879 | |
---|
| 880 | + if (fd->use_mn) |
---|
| 881 | + mmu_interval_notifier_remove( |
---|
| 882 | + &node->notifier); |
---|
896 | 883 | cacheless_tid_rb_remove(fd, node); |
---|
897 | 884 | } |
---|
898 | 885 | } |
---|
899 | 886 | } |
---|
900 | 887 | } |
---|
901 | 888 | |
---|
902 | | -/* |
---|
903 | | - * Always return 0 from this function. A non-zero return indicates that the |
---|
904 | | - * remove operation will be called and that memory should be unpinned. |
---|
905 | | - * However, the driver cannot unpin out from under PSM. Instead, retain the |
---|
906 | | - * memory (by returning 0) and inform PSM that the memory is going away. PSM |
---|
907 | | - * will call back later when it has removed the memory from its list. |
---|
908 | | - */ |
---|
909 | | -static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode) |
---|
| 889 | +static bool tid_rb_invalidate(struct mmu_interval_notifier *mni, |
---|
| 890 | + const struct mmu_notifier_range *range, |
---|
| 891 | + unsigned long cur_seq) |
---|
910 | 892 | { |
---|
911 | | - struct hfi1_filedata *fdata = arg; |
---|
912 | | - struct hfi1_ctxtdata *uctxt = fdata->uctxt; |
---|
913 | 893 | struct tid_rb_node *node = |
---|
914 | | - container_of(mnode, struct tid_rb_node, mmu); |
---|
| 894 | + container_of(mni, struct tid_rb_node, notifier); |
---|
| 895 | + struct hfi1_filedata *fdata = node->fdata; |
---|
| 896 | + struct hfi1_ctxtdata *uctxt = fdata->uctxt; |
---|
915 | 897 | |
---|
916 | 898 | if (node->freed) |
---|
917 | | - return 0; |
---|
| 899 | + return true; |
---|
918 | 900 | |
---|
919 | | - trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, node->mmu.addr, |
---|
| 901 | + trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, |
---|
| 902 | + node->notifier.interval_tree.start, |
---|
920 | 903 | node->rcventry, node->npages, node->dma_addr); |
---|
921 | 904 | node->freed = true; |
---|
922 | 905 | |
---|
.. | .. |
---|
945 | 928 | fdata->invalid_tid_idx++; |
---|
946 | 929 | } |
---|
947 | 930 | spin_unlock(&fdata->invalid_lock); |
---|
948 | | - return 0; |
---|
949 | | -} |
---|
950 | | - |
---|
951 | | -static int tid_rb_insert(void *arg, struct mmu_rb_node *node) |
---|
952 | | -{ |
---|
953 | | - struct hfi1_filedata *fdata = arg; |
---|
954 | | - struct tid_rb_node *tnode = |
---|
955 | | - container_of(node, struct tid_rb_node, mmu); |
---|
956 | | - u32 base = fdata->uctxt->expected_base; |
---|
957 | | - |
---|
958 | | - fdata->entry_to_rb[tnode->rcventry - base] = tnode; |
---|
959 | | - return 0; |
---|
| 931 | + return true; |
---|
960 | 932 | } |
---|
961 | 933 | |
---|
962 | 934 | static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, |
---|
.. | .. |
---|
966 | 938 | |
---|
967 | 939 | fdata->entry_to_rb[tnode->rcventry - base] = NULL; |
---|
968 | 940 | clear_tid_node(fdata, tnode); |
---|
969 | | -} |
---|
970 | | - |
---|
971 | | -static void tid_rb_remove(void *arg, struct mmu_rb_node *node) |
---|
972 | | -{ |
---|
973 | | - struct hfi1_filedata *fdata = arg; |
---|
974 | | - struct tid_rb_node *tnode = |
---|
975 | | - container_of(node, struct tid_rb_node, mmu); |
---|
976 | | - |
---|
977 | | - cacheless_tid_rb_remove(fdata, tnode); |
---|
978 | 941 | } |
---|