hc
2024-01-03 2f7c68cb55ecb7331f2381deb497c27155f32faf
kernel/drivers/infiniband/hw/hfi1/user_exp_rcv.c
....@@ -65,17 +65,24 @@
6565 static bool tid_rb_invalidate(struct mmu_interval_notifier *mni,
6666 const struct mmu_notifier_range *range,
6767 unsigned long cur_seq);
68
+static bool tid_cover_invalidate(struct mmu_interval_notifier *mni,
69
+ const struct mmu_notifier_range *range,
70
+ unsigned long cur_seq);
6871 static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *,
6972 struct tid_group *grp,
7073 unsigned int start, u16 count,
7174 u32 *tidlist, unsigned int *tididx,
7275 unsigned int *pmapped);
73
-static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
74
- struct tid_group **grp);
76
+static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo);
77
+static void __clear_tid_node(struct hfi1_filedata *fd,
78
+ struct tid_rb_node *node);
7579 static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node);
7680
7781 static const struct mmu_interval_notifier_ops tid_mn_ops = {
7882 .invalidate = tid_rb_invalidate,
83
+};
84
+static const struct mmu_interval_notifier_ops tid_cover_ops = {
85
+ .invalidate = tid_cover_invalidate,
7986 };
8087
8188 /*
....@@ -195,15 +202,10 @@
195202 static int pin_rcv_pages(struct hfi1_filedata *fd, struct tid_user_buf *tidbuf)
196203 {
197204 int pinned;
198
- unsigned int npages;
205
+ unsigned int npages = tidbuf->npages;
199206 unsigned long vaddr = tidbuf->vaddr;
200207 struct page **pages = NULL;
201208 struct hfi1_devdata *dd = fd->uctxt->dd;
202
-
203
- /* Get the number of pages the user buffer spans */
204
- npages = num_user_pages(vaddr, tidbuf->length);
205
- if (!npages)
206
- return -EINVAL;
207209
208210 if (npages > fd->uctxt->expected_count) {
209211 dd_dev_err(dd, "Expected buffer too big\n");
....@@ -231,7 +233,6 @@
231233 return pinned;
232234 }
233235 tidbuf->pages = pages;
234
- tidbuf->npages = npages;
235236 fd->tid_n_pinned += pinned;
236237 return pinned;
237238 }
....@@ -295,53 +296,66 @@
295296 tididx = 0, mapped, mapped_pages = 0;
296297 u32 *tidlist = NULL;
297298 struct tid_user_buf *tidbuf;
299
+ unsigned long mmu_seq = 0;
298300
299301 if (!PAGE_ALIGNED(tinfo->vaddr))
302
+ return -EINVAL;
303
+ if (tinfo->length == 0)
300304 return -EINVAL;
301305
302306 tidbuf = kzalloc(sizeof(*tidbuf), GFP_KERNEL);
303307 if (!tidbuf)
304308 return -ENOMEM;
305309
310
+ mutex_init(&tidbuf->cover_mutex);
306311 tidbuf->vaddr = tinfo->vaddr;
307312 tidbuf->length = tinfo->length;
313
+ tidbuf->npages = num_user_pages(tidbuf->vaddr, tidbuf->length);
308314 tidbuf->psets = kcalloc(uctxt->expected_count, sizeof(*tidbuf->psets),
309315 GFP_KERNEL);
310316 if (!tidbuf->psets) {
311
- kfree(tidbuf);
312
- return -ENOMEM;
317
+ ret = -ENOMEM;
318
+ goto fail_release_mem;
319
+ }
320
+
321
+ if (fd->use_mn) {
322
+ ret = mmu_interval_notifier_insert(
323
+ &tidbuf->notifier, current->mm,
324
+ tidbuf->vaddr, tidbuf->npages * PAGE_SIZE,
325
+ &tid_cover_ops);
326
+ if (ret)
327
+ goto fail_release_mem;
328
+ mmu_seq = mmu_interval_read_begin(&tidbuf->notifier);
313329 }
314330
315331 pinned = pin_rcv_pages(fd, tidbuf);
316332 if (pinned <= 0) {
317
- kfree(tidbuf->psets);
318
- kfree(tidbuf);
319
- return pinned;
333
+ ret = (pinned < 0) ? pinned : -ENOSPC;
334
+ goto fail_unpin;
320335 }
321336
322337 /* Find sets of physically contiguous pages */
323338 tidbuf->n_psets = find_phys_blocks(tidbuf, pinned);
324339
325
- /*
326
- * We don't need to access this under a lock since tid_used is per
327
- * process and the same process cannot be in hfi1_user_exp_rcv_clear()
328
- * and hfi1_user_exp_rcv_setup() at the same time.
329
- */
340
+ /* Reserve the number of expected tids to be used. */
330341 spin_lock(&fd->tid_lock);
331342 if (fd->tid_used + tidbuf->n_psets > fd->tid_limit)
332343 pageset_count = fd->tid_limit - fd->tid_used;
333344 else
334345 pageset_count = tidbuf->n_psets;
346
+ fd->tid_used += pageset_count;
335347 spin_unlock(&fd->tid_lock);
336348
337
- if (!pageset_count)
338
- goto bail;
349
+ if (!pageset_count) {
350
+ ret = -ENOSPC;
351
+ goto fail_unreserve;
352
+ }
339353
340354 ngroups = pageset_count / dd->rcv_entries.group_size;
341355 tidlist = kcalloc(pageset_count, sizeof(*tidlist), GFP_KERNEL);
342356 if (!tidlist) {
343357 ret = -ENOMEM;
344
- goto nomem;
358
+ goto fail_unreserve;
345359 }
346360
347361 tididx = 0;
....@@ -437,43 +451,78 @@
437451 }
438452 unlock:
439453 mutex_unlock(&uctxt->exp_mutex);
440
-nomem:
441454 hfi1_cdbg(TID, "total mapped: tidpairs:%u pages:%u (%d)", tididx,
442455 mapped_pages, ret);
443
- if (tididx) {
444
- spin_lock(&fd->tid_lock);
445
- fd->tid_used += tididx;
446
- spin_unlock(&fd->tid_lock);
447
- tinfo->tidcnt = tididx;
448
- tinfo->length = mapped_pages * PAGE_SIZE;
449456
450
- if (copy_to_user(u64_to_user_ptr(tinfo->tidlist),
451
- tidlist, sizeof(tidlist[0]) * tididx)) {
452
- /*
453
- * On failure to copy to the user level, we need to undo
454
- * everything done so far so we don't leak resources.
455
- */
456
- tinfo->tidlist = (unsigned long)&tidlist;
457
- hfi1_user_exp_rcv_clear(fd, tinfo);
458
- tinfo->tidlist = 0;
459
- ret = -EFAULT;
460
- goto bail;
457
+ /* fail if nothing was programmed, set error if none provided */
458
+ if (tididx == 0) {
459
+ if (ret >= 0)
460
+ ret = -ENOSPC;
461
+ goto fail_unreserve;
462
+ }
463
+
464
+ /* adjust reserved tid_used to actual count */
465
+ spin_lock(&fd->tid_lock);
466
+ fd->tid_used -= pageset_count - tididx;
467
+ spin_unlock(&fd->tid_lock);
468
+
469
+ /* unpin all pages not covered by a TID */
470
+ unpin_rcv_pages(fd, tidbuf, NULL, mapped_pages, pinned - mapped_pages,
471
+ false);
472
+
473
+ if (fd->use_mn) {
474
+ /* check for an invalidate during setup */
475
+ bool fail = false;
476
+
477
+ mutex_lock(&tidbuf->cover_mutex);
478
+ fail = mmu_interval_read_retry(&tidbuf->notifier, mmu_seq);
479
+ mutex_unlock(&tidbuf->cover_mutex);
480
+
481
+ if (fail) {
482
+ ret = -EBUSY;
483
+ goto fail_unprogram;
461484 }
462485 }
463486
464
- /*
465
- * If not everything was mapped (due to insufficient RcvArray entries,
466
- * for example), unpin all unmapped pages so we can pin them nex time.
467
- */
468
- if (mapped_pages != pinned)
469
- unpin_rcv_pages(fd, tidbuf, NULL, mapped_pages,
470
- (pinned - mapped_pages), false);
471
-bail:
472
- kfree(tidbuf->psets);
473
- kfree(tidlist);
487
+ tinfo->tidcnt = tididx;
488
+ tinfo->length = mapped_pages * PAGE_SIZE;
489
+
490
+ if (copy_to_user(u64_to_user_ptr(tinfo->tidlist),
491
+ tidlist, sizeof(tidlist[0]) * tididx)) {
492
+ ret = -EFAULT;
493
+ goto fail_unprogram;
494
+ }
495
+
496
+ if (fd->use_mn)
497
+ mmu_interval_notifier_remove(&tidbuf->notifier);
474498 kfree(tidbuf->pages);
499
+ kfree(tidbuf->psets);
475500 kfree(tidbuf);
476
- return ret > 0 ? 0 : ret;
501
+ kfree(tidlist);
502
+ return 0;
503
+
504
+fail_unprogram:
505
+ /* unprogram, unmap, and unpin all allocated TIDs */
506
+ tinfo->tidlist = (unsigned long)tidlist;
507
+ hfi1_user_exp_rcv_clear(fd, tinfo);
508
+ tinfo->tidlist = 0;
509
+ pinned = 0; /* nothing left to unpin */
510
+ pageset_count = 0; /* nothing left reserved */
511
+fail_unreserve:
512
+ spin_lock(&fd->tid_lock);
513
+ fd->tid_used -= pageset_count;
514
+ spin_unlock(&fd->tid_lock);
515
+fail_unpin:
516
+ if (fd->use_mn)
517
+ mmu_interval_notifier_remove(&tidbuf->notifier);
518
+ if (pinned > 0)
519
+ unpin_rcv_pages(fd, tidbuf, NULL, 0, pinned, false);
520
+fail_release_mem:
521
+ kfree(tidbuf->pages);
522
+ kfree(tidbuf->psets);
523
+ kfree(tidbuf);
524
+ kfree(tidlist);
525
+ return ret;
477526 }
478527
479528 int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd,
....@@ -494,7 +543,7 @@
494543
495544 mutex_lock(&uctxt->exp_mutex);
496545 for (tididx = 0; tididx < tinfo->tidcnt; tididx++) {
497
- ret = unprogram_rcvarray(fd, tidinfo[tididx], NULL);
546
+ ret = unprogram_rcvarray(fd, tidinfo[tididx]);
498547 if (ret) {
499548 hfi1_cdbg(TID, "Failed to unprogram rcv array %d",
500549 ret);
....@@ -750,6 +799,7 @@
750799 }
751800
752801 node->fdata = fd;
802
+ mutex_init(&node->invalidate_mutex);
753803 node->phys = page_to_phys(pages[0]);
754804 node->npages = npages;
755805 node->rcventry = rcventry;
....@@ -765,11 +815,6 @@
765815 &tid_mn_ops);
766816 if (ret)
767817 goto out_unmap;
768
- /*
769
- * FIXME: This is in the wrong order, the notifier should be
770
- * established before the pages are pinned by pin_rcv_pages.
771
- */
772
- mmu_interval_read_begin(&node->notifier);
773818 }
774819 fd->entry_to_rb[node->rcventry - uctxt->expected_base] = node;
775820
....@@ -789,8 +834,7 @@
789834 return -EFAULT;
790835 }
791836
792
-static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
793
- struct tid_group **grp)
837
+static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo)
794838 {
795839 struct hfi1_ctxtdata *uctxt = fd->uctxt;
796840 struct hfi1_devdata *dd = uctxt->dd;
....@@ -813,9 +857,6 @@
813857 if (!node || node->rcventry != (uctxt->expected_base + rcventry))
814858 return -EBADF;
815859
816
- if (grp)
817
- *grp = node->grp;
818
-
819860 if (fd->use_mn)
820861 mmu_interval_notifier_remove(&node->notifier);
821862 cacheless_tid_rb_remove(fd, node);
....@@ -823,23 +864,34 @@
823864 return 0;
824865 }
825866
826
-static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node)
867
+static void __clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node)
827868 {
828869 struct hfi1_ctxtdata *uctxt = fd->uctxt;
829870 struct hfi1_devdata *dd = uctxt->dd;
871
+
872
+ mutex_lock(&node->invalidate_mutex);
873
+ if (node->freed)
874
+ goto done;
875
+ node->freed = true;
830876
831877 trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry,
832878 node->npages,
833879 node->notifier.interval_tree.start, node->phys,
834880 node->dma_addr);
835881
836
- /*
837
- * Make sure device has seen the write before we unpin the
838
- * pages.
839
- */
882
+ /* Make sure device has seen the write before pages are unpinned */
840883 hfi1_put_tid(dd, node->rcventry, PT_INVALID_FLUSH, 0, 0);
841884
842885 unpin_rcv_pages(fd, NULL, node, 0, node->npages, true);
886
+done:
887
+ mutex_unlock(&node->invalidate_mutex);
888
+}
889
+
890
+static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node)
891
+{
892
+ struct hfi1_ctxtdata *uctxt = fd->uctxt;
893
+
894
+ __clear_tid_node(fd, node);
843895
844896 node->grp->used--;
845897 node->grp->map &= ~(1 << (node->rcventry - node->grp->base));
....@@ -898,10 +950,16 @@
898950 if (node->freed)
899951 return true;
900952
953
+ /* take action only if unmapping */
954
+ if (range->event != MMU_NOTIFY_UNMAP)
955
+ return true;
956
+
901957 trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt,
902958 node->notifier.interval_tree.start,
903959 node->rcventry, node->npages, node->dma_addr);
904
- node->freed = true;
960
+
961
+ /* clear the hardware rcvarray entry */
962
+ __clear_tid_node(fdata, node);
905963
906964 spin_lock(&fdata->invalid_lock);
907965 if (fdata->invalid_tid_idx < uctxt->expected_count) {
....@@ -931,6 +989,23 @@
931989 return true;
932990 }
933991
992
+static bool tid_cover_invalidate(struct mmu_interval_notifier *mni,
993
+ const struct mmu_notifier_range *range,
994
+ unsigned long cur_seq)
995
+{
996
+ struct tid_user_buf *tidbuf =
997
+ container_of(mni, struct tid_user_buf, notifier);
998
+
999
+ /* take action only if unmapping */
1000
+ if (range->event == MMU_NOTIFY_UNMAP) {
1001
+ mutex_lock(&tidbuf->cover_mutex);
1002
+ mmu_interval_set_seq(mni, cur_seq);
1003
+ mutex_unlock(&tidbuf->cover_mutex);
1004
+ }
1005
+
1006
+ return true;
1007
+}
1008
+
9341009 static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
9351010 struct tid_rb_node *tnode)
9361011 {