hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/drivers/infiniband/hw/hfi1/user_exp_rcv.c
....@@ -1,4 +1,5 @@
11 /*
2
+ * Copyright(c) 2020 Cornelis Networks, Inc.
23 * Copyright(c) 2015-2018 Intel Corporation.
34 *
45 * This file is provided under a dual BSD/GPLv2 license. When using or
....@@ -59,11 +60,11 @@
5960 struct tid_user_buf *tbuf,
6061 u32 rcventry, struct tid_group *grp,
6162 u16 pageidx, unsigned int npages);
62
-static int tid_rb_insert(void *arg, struct mmu_rb_node *node);
6363 static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
6464 struct tid_rb_node *tnode);
65
-static void tid_rb_remove(void *arg, struct mmu_rb_node *node);
66
-static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode);
65
+static bool tid_rb_invalidate(struct mmu_interval_notifier *mni,
66
+ const struct mmu_notifier_range *range,
67
+ unsigned long cur_seq);
6768 static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *,
6869 struct tid_group *grp,
6970 unsigned int start, u16 count,
....@@ -73,10 +74,8 @@
7374 struct tid_group **grp);
7475 static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node);
7576
76
-static struct mmu_rb_ops tid_rb_ops = {
77
- .insert = tid_rb_insert,
78
- .remove = tid_rb_remove,
79
- .invalidate = tid_rb_invalidate
77
+static const struct mmu_interval_notifier_ops tid_mn_ops = {
78
+ .invalidate = tid_rb_invalidate,
8079 };
8180
8281 /*
....@@ -87,7 +86,6 @@
8786 int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
8887 struct hfi1_ctxtdata *uctxt)
8988 {
90
- struct hfi1_devdata *dd = uctxt->dd;
9189 int ret = 0;
9290
9391 fd->entry_to_rb = kcalloc(uctxt->expected_count,
....@@ -106,20 +104,7 @@
106104 fd->entry_to_rb = NULL;
107105 return -ENOMEM;
108106 }
109
-
110
- /*
111
- * Register MMU notifier callbacks. If the registration
112
- * fails, continue without TID caching for this context.
113
- */
114
- ret = hfi1_mmu_rb_register(fd, fd->mm, &tid_rb_ops,
115
- dd->pport->hfi1_wq,
116
- &fd->handler);
117
- if (ret) {
118
- dd_dev_info(dd,
119
- "Failed MMU notifier registration %d\n",
120
- ret);
121
- ret = 0;
122
- }
107
+ fd->use_mn = true;
123108 }
124109
125110 /*
....@@ -136,7 +121,7 @@
136121 * init.
137122 */
138123 spin_lock(&fd->tid_lock);
139
- if (uctxt->subctxt_cnt && fd->handler) {
124
+ if (uctxt->subctxt_cnt && fd->use_mn) {
140125 u16 remainder;
141126
142127 fd->tid_limit = uctxt->expected_count / uctxt->subctxt_cnt;
....@@ -155,20 +140,12 @@
155140 {
156141 struct hfi1_ctxtdata *uctxt = fd->uctxt;
157142
158
- /*
159
- * The notifier would have been removed when the process'es mm
160
- * was freed.
161
- */
162
- if (fd->handler) {
163
- hfi1_mmu_rb_unregister(fd->handler);
164
- } else {
165
- mutex_lock(&uctxt->exp_mutex);
166
- if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list))
167
- unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd);
168
- if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list))
169
- unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd);
170
- mutex_unlock(&uctxt->exp_mutex);
171
- }
143
+ mutex_lock(&uctxt->exp_mutex);
144
+ if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list))
145
+ unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd);
146
+ if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list))
147
+ unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd);
148
+ mutex_unlock(&uctxt->exp_mutex);
172149
173150 kfree(fd->invalid_tids);
174151 fd->invalid_tids = NULL;
....@@ -197,15 +174,18 @@
197174 {
198175 struct page **pages;
199176 struct hfi1_devdata *dd = fd->uctxt->dd;
177
+ struct mm_struct *mm;
200178
201179 if (mapped) {
202180 pci_unmap_single(dd->pcidev, node->dma_addr,
203
- node->mmu.len, PCI_DMA_FROMDEVICE);
181
+ node->npages * PAGE_SIZE, PCI_DMA_FROMDEVICE);
204182 pages = &node->pages[idx];
183
+ mm = mm_from_tid_node(node);
205184 } else {
206185 pages = &tidbuf->pages[idx];
186
+ mm = current->mm;
207187 }
208
- hfi1_release_user_pages(fd->mm, pages, npages, mapped);
188
+ hfi1_release_user_pages(mm, pages, npages, mapped);
209189 fd->tid_n_pinned -= npages;
210190 }
211191
....@@ -230,13 +210,6 @@
230210 return -EINVAL;
231211 }
232212
233
- /* Verify that access is OK for the user buffer */
234
- if (!access_ok(VERIFY_WRITE, (void __user *)vaddr,
235
- npages * PAGE_SIZE)) {
236
- dd_dev_err(dd, "Fail vaddr %p, %u pages, !access_ok\n",
237
- (void *)vaddr, npages);
238
- return -EFAULT;
239
- }
240213 /* Allocate the array of struct page pointers needed for pinning */
241214 pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL);
242215 if (!pages)
....@@ -247,12 +220,12 @@
247220 * pages, accept the amount pinned so far and program only that.
248221 * User space knows how to deal with partially programmed buffers.
249222 */
250
- if (!hfi1_can_pin_pages(dd, fd->mm, fd->tid_n_pinned, npages)) {
223
+ if (!hfi1_can_pin_pages(dd, current->mm, fd->tid_n_pinned, npages)) {
251224 kfree(pages);
252225 return -ENOMEM;
253226 }
254227
255
- pinned = hfi1_acquire_user_pages(fd->mm, vaddr, npages, true, pages);
228
+ pinned = hfi1_acquire_user_pages(current->mm, vaddr, npages, true, pages);
256229 if (pinned <= 0) {
257230 kfree(pages);
258231 return pinned;
....@@ -776,8 +749,7 @@
776749 return -EFAULT;
777750 }
778751
779
- node->mmu.addr = tbuf->vaddr + (pageidx * PAGE_SIZE);
780
- node->mmu.len = npages * PAGE_SIZE;
752
+ node->fdata = fd;
781753 node->phys = page_to_phys(pages[0]);
782754 node->npages = npages;
783755 node->rcventry = rcventry;
....@@ -786,23 +758,35 @@
786758 node->freed = false;
787759 memcpy(node->pages, pages, sizeof(struct page *) * npages);
788760
789
- if (!fd->handler)
790
- ret = tid_rb_insert(fd, &node->mmu);
791
- else
792
- ret = hfi1_mmu_rb_insert(fd->handler, &node->mmu);
793
-
794
- if (ret) {
795
- hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d",
796
- node->rcventry, node->mmu.addr, node->phys, ret);
797
- pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE,
798
- PCI_DMA_FROMDEVICE);
799
- kfree(node);
800
- return -EFAULT;
761
+ if (fd->use_mn) {
762
+ ret = mmu_interval_notifier_insert(
763
+ &node->notifier, current->mm,
764
+ tbuf->vaddr + (pageidx * PAGE_SIZE), npages * PAGE_SIZE,
765
+ &tid_mn_ops);
766
+ if (ret)
767
+ goto out_unmap;
768
+ /*
769
+ * FIXME: This is in the wrong order, the notifier should be
770
+ * established before the pages are pinned by pin_rcv_pages.
771
+ */
772
+ mmu_interval_read_begin(&node->notifier);
801773 }
774
+ fd->entry_to_rb[node->rcventry - uctxt->expected_base] = node;
775
+
802776 hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, ilog2(npages) + 1);
803777 trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, npages,
804
- node->mmu.addr, node->phys, phys);
778
+ node->notifier.interval_tree.start, node->phys,
779
+ phys);
805780 return 0;
781
+
782
+out_unmap:
783
+ hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d",
784
+ node->rcventry, node->notifier.interval_tree.start,
785
+ node->phys, ret);
786
+ pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE,
787
+ PCI_DMA_FROMDEVICE);
788
+ kfree(node);
789
+ return -EFAULT;
806790 }
807791
808792 static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
....@@ -832,10 +816,9 @@
832816 if (grp)
833817 *grp = node->grp;
834818
835
- if (!fd->handler)
836
- cacheless_tid_rb_remove(fd, node);
837
- else
838
- hfi1_mmu_rb_remove(fd->handler, &node->mmu);
819
+ if (fd->use_mn)
820
+ mmu_interval_notifier_remove(&node->notifier);
821
+ cacheless_tid_rb_remove(fd, node);
839822
840823 return 0;
841824 }
....@@ -846,7 +829,8 @@
846829 struct hfi1_devdata *dd = uctxt->dd;
847830
848831 trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry,
849
- node->npages, node->mmu.addr, node->phys,
832
+ node->npages,
833
+ node->notifier.interval_tree.start, node->phys,
850834 node->dma_addr);
851835
852836 /*
....@@ -893,30 +877,29 @@
893877 if (!node || node->rcventry != rcventry)
894878 continue;
895879
880
+ if (fd->use_mn)
881
+ mmu_interval_notifier_remove(
882
+ &node->notifier);
896883 cacheless_tid_rb_remove(fd, node);
897884 }
898885 }
899886 }
900887 }
901888
902
-/*
903
- * Always return 0 from this function. A non-zero return indicates that the
904
- * remove operation will be called and that memory should be unpinned.
905
- * However, the driver cannot unpin out from under PSM. Instead, retain the
906
- * memory (by returning 0) and inform PSM that the memory is going away. PSM
907
- * will call back later when it has removed the memory from its list.
908
- */
909
-static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode)
889
+static bool tid_rb_invalidate(struct mmu_interval_notifier *mni,
890
+ const struct mmu_notifier_range *range,
891
+ unsigned long cur_seq)
910892 {
911
- struct hfi1_filedata *fdata = arg;
912
- struct hfi1_ctxtdata *uctxt = fdata->uctxt;
913893 struct tid_rb_node *node =
914
- container_of(mnode, struct tid_rb_node, mmu);
894
+ container_of(mni, struct tid_rb_node, notifier);
895
+ struct hfi1_filedata *fdata = node->fdata;
896
+ struct hfi1_ctxtdata *uctxt = fdata->uctxt;
915897
916898 if (node->freed)
917
- return 0;
899
+ return true;
918900
919
- trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, node->mmu.addr,
901
+ trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt,
902
+ node->notifier.interval_tree.start,
920903 node->rcventry, node->npages, node->dma_addr);
921904 node->freed = true;
922905
....@@ -945,18 +928,7 @@
945928 fdata->invalid_tid_idx++;
946929 }
947930 spin_unlock(&fdata->invalid_lock);
948
- return 0;
949
-}
950
-
951
-static int tid_rb_insert(void *arg, struct mmu_rb_node *node)
952
-{
953
- struct hfi1_filedata *fdata = arg;
954
- struct tid_rb_node *tnode =
955
- container_of(node, struct tid_rb_node, mmu);
956
- u32 base = fdata->uctxt->expected_base;
957
-
958
- fdata->entry_to_rb[tnode->rcventry - base] = tnode;
959
- return 0;
931
+ return true;
960932 }
961933
962934 static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
....@@ -966,13 +938,4 @@
966938
967939 fdata->entry_to_rb[tnode->rcventry - base] = NULL;
968940 clear_tid_node(fdata, tnode);
969
-}
970
-
971
-static void tid_rb_remove(void *arg, struct mmu_rb_node *node)
972
-{
973
- struct hfi1_filedata *fdata = arg;
974
- struct tid_rb_node *tnode =
975
- container_of(node, struct tid_rb_node, mmu);
976
-
977
- cacheless_tid_rb_remove(fdata, tnode);
978941 }