.. | .. |
---|
65 | 65 | static bool tid_rb_invalidate(struct mmu_interval_notifier *mni, |
---|
66 | 66 | const struct mmu_notifier_range *range, |
---|
67 | 67 | unsigned long cur_seq); |
---|
| 68 | +static bool tid_cover_invalidate(struct mmu_interval_notifier *mni, |
---|
| 69 | + const struct mmu_notifier_range *range, |
---|
| 70 | + unsigned long cur_seq); |
---|
68 | 71 | static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *, |
---|
69 | 72 | struct tid_group *grp, |
---|
70 | 73 | unsigned int start, u16 count, |
---|
71 | 74 | u32 *tidlist, unsigned int *tididx, |
---|
72 | 75 | unsigned int *pmapped); |
---|
73 | | -static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, |
---|
74 | | - struct tid_group **grp); |
---|
| 76 | +static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo); |
---|
| 77 | +static void __clear_tid_node(struct hfi1_filedata *fd, |
---|
| 78 | + struct tid_rb_node *node); |
---|
75 | 79 | static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node); |
---|
76 | 80 | |
---|
77 | 81 | static const struct mmu_interval_notifier_ops tid_mn_ops = { |
---|
78 | 82 | .invalidate = tid_rb_invalidate, |
---|
| 83 | +}; |
---|
| 84 | +static const struct mmu_interval_notifier_ops tid_cover_ops = { |
---|
| 85 | + .invalidate = tid_cover_invalidate, |
---|
79 | 86 | }; |
---|
80 | 87 | |
---|
81 | 88 | /* |
---|
.. | .. |
---|
195 | 202 | static int pin_rcv_pages(struct hfi1_filedata *fd, struct tid_user_buf *tidbuf) |
---|
196 | 203 | { |
---|
197 | 204 | int pinned; |
---|
198 | | - unsigned int npages; |
---|
| 205 | + unsigned int npages = tidbuf->npages; |
---|
199 | 206 | unsigned long vaddr = tidbuf->vaddr; |
---|
200 | 207 | struct page **pages = NULL; |
---|
201 | 208 | struct hfi1_devdata *dd = fd->uctxt->dd; |
---|
202 | | - |
---|
203 | | - /* Get the number of pages the user buffer spans */ |
---|
204 | | - npages = num_user_pages(vaddr, tidbuf->length); |
---|
205 | | - if (!npages) |
---|
206 | | - return -EINVAL; |
---|
207 | 209 | |
---|
208 | 210 | if (npages > fd->uctxt->expected_count) { |
---|
209 | 211 | dd_dev_err(dd, "Expected buffer too big\n"); |
---|
.. | .. |
---|
231 | 233 | return pinned; |
---|
232 | 234 | } |
---|
233 | 235 | tidbuf->pages = pages; |
---|
234 | | - tidbuf->npages = npages; |
---|
235 | 236 | fd->tid_n_pinned += pinned; |
---|
236 | 237 | return pinned; |
---|
237 | 238 | } |
---|
.. | .. |
---|
295 | 296 | tididx = 0, mapped, mapped_pages = 0; |
---|
296 | 297 | u32 *tidlist = NULL; |
---|
297 | 298 | struct tid_user_buf *tidbuf; |
---|
| 299 | + unsigned long mmu_seq = 0; |
---|
298 | 300 | |
---|
299 | 301 | if (!PAGE_ALIGNED(tinfo->vaddr)) |
---|
| 302 | + return -EINVAL; |
---|
| 303 | + if (tinfo->length == 0) |
---|
300 | 304 | return -EINVAL; |
---|
301 | 305 | |
---|
302 | 306 | tidbuf = kzalloc(sizeof(*tidbuf), GFP_KERNEL); |
---|
303 | 307 | if (!tidbuf) |
---|
304 | 308 | return -ENOMEM; |
---|
305 | 309 | |
---|
| 310 | + mutex_init(&tidbuf->cover_mutex); |
---|
306 | 311 | tidbuf->vaddr = tinfo->vaddr; |
---|
307 | 312 | tidbuf->length = tinfo->length; |
---|
| 313 | + tidbuf->npages = num_user_pages(tidbuf->vaddr, tidbuf->length); |
---|
308 | 314 | tidbuf->psets = kcalloc(uctxt->expected_count, sizeof(*tidbuf->psets), |
---|
309 | 315 | GFP_KERNEL); |
---|
310 | 316 | if (!tidbuf->psets) { |
---|
311 | | - kfree(tidbuf); |
---|
312 | | - return -ENOMEM; |
---|
| 317 | + ret = -ENOMEM; |
---|
| 318 | + goto fail_release_mem; |
---|
| 319 | + } |
---|
| 320 | + |
---|
| 321 | + if (fd->use_mn) { |
---|
| 322 | + ret = mmu_interval_notifier_insert( |
---|
| 323 | + &tidbuf->notifier, current->mm, |
---|
| 324 | + tidbuf->vaddr, tidbuf->npages * PAGE_SIZE, |
---|
| 325 | + &tid_cover_ops); |
---|
| 326 | + if (ret) |
---|
| 327 | + goto fail_release_mem; |
---|
| 328 | + mmu_seq = mmu_interval_read_begin(&tidbuf->notifier); |
---|
313 | 329 | } |
---|
314 | 330 | |
---|
315 | 331 | pinned = pin_rcv_pages(fd, tidbuf); |
---|
316 | 332 | if (pinned <= 0) { |
---|
317 | | - kfree(tidbuf->psets); |
---|
318 | | - kfree(tidbuf); |
---|
319 | | - return pinned; |
---|
| 333 | + ret = (pinned < 0) ? pinned : -ENOSPC; |
---|
| 334 | + goto fail_unpin; |
---|
320 | 335 | } |
---|
321 | 336 | |
---|
322 | 337 | /* Find sets of physically contiguous pages */ |
---|
323 | 338 | tidbuf->n_psets = find_phys_blocks(tidbuf, pinned); |
---|
324 | 339 | |
---|
325 | | - /* |
---|
326 | | - * We don't need to access this under a lock since tid_used is per |
---|
327 | | - * process and the same process cannot be in hfi1_user_exp_rcv_clear() |
---|
328 | | - * and hfi1_user_exp_rcv_setup() at the same time. |
---|
329 | | - */ |
---|
| 340 | + /* Reserve the number of expected tids to be used. */ |
---|
330 | 341 | spin_lock(&fd->tid_lock); |
---|
331 | 342 | if (fd->tid_used + tidbuf->n_psets > fd->tid_limit) |
---|
332 | 343 | pageset_count = fd->tid_limit - fd->tid_used; |
---|
333 | 344 | else |
---|
334 | 345 | pageset_count = tidbuf->n_psets; |
---|
| 346 | + fd->tid_used += pageset_count; |
---|
335 | 347 | spin_unlock(&fd->tid_lock); |
---|
336 | 348 | |
---|
337 | | - if (!pageset_count) |
---|
338 | | - goto bail; |
---|
| 349 | + if (!pageset_count) { |
---|
| 350 | + ret = -ENOSPC; |
---|
| 351 | + goto fail_unreserve; |
---|
| 352 | + } |
---|
339 | 353 | |
---|
340 | 354 | ngroups = pageset_count / dd->rcv_entries.group_size; |
---|
341 | 355 | tidlist = kcalloc(pageset_count, sizeof(*tidlist), GFP_KERNEL); |
---|
342 | 356 | if (!tidlist) { |
---|
343 | 357 | ret = -ENOMEM; |
---|
344 | | - goto nomem; |
---|
| 358 | + goto fail_unreserve; |
---|
345 | 359 | } |
---|
346 | 360 | |
---|
347 | 361 | tididx = 0; |
---|
.. | .. |
---|
437 | 451 | } |
---|
438 | 452 | unlock: |
---|
439 | 453 | mutex_unlock(&uctxt->exp_mutex); |
---|
440 | | -nomem: |
---|
441 | 454 | hfi1_cdbg(TID, "total mapped: tidpairs:%u pages:%u (%d)", tididx, |
---|
442 | 455 | mapped_pages, ret); |
---|
443 | | - if (tididx) { |
---|
444 | | - spin_lock(&fd->tid_lock); |
---|
445 | | - fd->tid_used += tididx; |
---|
446 | | - spin_unlock(&fd->tid_lock); |
---|
447 | | - tinfo->tidcnt = tididx; |
---|
448 | | - tinfo->length = mapped_pages * PAGE_SIZE; |
---|
449 | 456 | |
---|
450 | | - if (copy_to_user(u64_to_user_ptr(tinfo->tidlist), |
---|
451 | | - tidlist, sizeof(tidlist[0]) * tididx)) { |
---|
452 | | - /* |
---|
453 | | - * On failure to copy to the user level, we need to undo |
---|
454 | | - * everything done so far so we don't leak resources. |
---|
455 | | - */ |
---|
456 | | - tinfo->tidlist = (unsigned long)&tidlist; |
---|
457 | | - hfi1_user_exp_rcv_clear(fd, tinfo); |
---|
458 | | - tinfo->tidlist = 0; |
---|
459 | | - ret = -EFAULT; |
---|
460 | | - goto bail; |
---|
| 457 | + /* fail if nothing was programmed, set error if none provided */ |
---|
| 458 | + if (tididx == 0) { |
---|
| 459 | + if (ret >= 0) |
---|
| 460 | + ret = -ENOSPC; |
---|
| 461 | + goto fail_unreserve; |
---|
| 462 | + } |
---|
| 463 | + |
---|
| 464 | + /* adjust reserved tid_used to actual count */ |
---|
| 465 | + spin_lock(&fd->tid_lock); |
---|
| 466 | + fd->tid_used -= pageset_count - tididx; |
---|
| 467 | + spin_unlock(&fd->tid_lock); |
---|
| 468 | + |
---|
| 469 | + /* unpin all pages not covered by a TID */ |
---|
| 470 | + unpin_rcv_pages(fd, tidbuf, NULL, mapped_pages, pinned - mapped_pages, |
---|
| 471 | + false); |
---|
| 472 | + |
---|
| 473 | + if (fd->use_mn) { |
---|
| 474 | + /* check for an invalidate during setup */ |
---|
| 475 | + bool fail = false; |
---|
| 476 | + |
---|
| 477 | + mutex_lock(&tidbuf->cover_mutex); |
---|
| 478 | + fail = mmu_interval_read_retry(&tidbuf->notifier, mmu_seq); |
---|
| 479 | + mutex_unlock(&tidbuf->cover_mutex); |
---|
| 480 | + |
---|
| 481 | + if (fail) { |
---|
| 482 | + ret = -EBUSY; |
---|
| 483 | + goto fail_unprogram; |
---|
461 | 484 | } |
---|
462 | 485 | } |
---|
463 | 486 | |
---|
464 | | - /* |
---|
465 | | - * If not everything was mapped (due to insufficient RcvArray entries, |
---|
466 | | - * for example), unpin all unmapped pages so we can pin them nex time. |
---|
467 | | - */ |
---|
468 | | - if (mapped_pages != pinned) |
---|
469 | | - unpin_rcv_pages(fd, tidbuf, NULL, mapped_pages, |
---|
470 | | - (pinned - mapped_pages), false); |
---|
471 | | -bail: |
---|
472 | | - kfree(tidbuf->psets); |
---|
473 | | - kfree(tidlist); |
---|
| 487 | + tinfo->tidcnt = tididx; |
---|
| 488 | + tinfo->length = mapped_pages * PAGE_SIZE; |
---|
| 489 | + |
---|
| 490 | + if (copy_to_user(u64_to_user_ptr(tinfo->tidlist), |
---|
| 491 | + tidlist, sizeof(tidlist[0]) * tididx)) { |
---|
| 492 | + ret = -EFAULT; |
---|
| 493 | + goto fail_unprogram; |
---|
| 494 | + } |
---|
| 495 | + |
---|
| 496 | + if (fd->use_mn) |
---|
| 497 | + mmu_interval_notifier_remove(&tidbuf->notifier); |
---|
474 | 498 | kfree(tidbuf->pages); |
---|
| 499 | + kfree(tidbuf->psets); |
---|
475 | 500 | kfree(tidbuf); |
---|
476 | | - return ret > 0 ? 0 : ret; |
---|
| 501 | + kfree(tidlist); |
---|
| 502 | + return 0; |
---|
| 503 | + |
---|
| 504 | +fail_unprogram: |
---|
| 505 | + /* unprogram, unmap, and unpin all allocated TIDs */ |
---|
| 506 | + tinfo->tidlist = (unsigned long)tidlist; |
---|
| 507 | + hfi1_user_exp_rcv_clear(fd, tinfo); |
---|
| 508 | + tinfo->tidlist = 0; |
---|
| 509 | + pinned = 0; /* nothing left to unpin */ |
---|
| 510 | + pageset_count = 0; /* nothing left reserved */ |
---|
| 511 | +fail_unreserve: |
---|
| 512 | + spin_lock(&fd->tid_lock); |
---|
| 513 | + fd->tid_used -= pageset_count; |
---|
| 514 | + spin_unlock(&fd->tid_lock); |
---|
| 515 | +fail_unpin: |
---|
| 516 | + if (fd->use_mn) |
---|
| 517 | + mmu_interval_notifier_remove(&tidbuf->notifier); |
---|
| 518 | + if (pinned > 0) |
---|
| 519 | + unpin_rcv_pages(fd, tidbuf, NULL, 0, pinned, false); |
---|
| 520 | +fail_release_mem: |
---|
| 521 | + kfree(tidbuf->pages); |
---|
| 522 | + kfree(tidbuf->psets); |
---|
| 523 | + kfree(tidbuf); |
---|
| 524 | + kfree(tidlist); |
---|
| 525 | + return ret; |
---|
477 | 526 | } |
---|
478 | 527 | |
---|
479 | 528 | int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd, |
---|
.. | .. |
---|
494 | 543 | |
---|
495 | 544 | mutex_lock(&uctxt->exp_mutex); |
---|
496 | 545 | for (tididx = 0; tididx < tinfo->tidcnt; tididx++) { |
---|
497 | | - ret = unprogram_rcvarray(fd, tidinfo[tididx], NULL); |
---|
| 546 | + ret = unprogram_rcvarray(fd, tidinfo[tididx]); |
---|
498 | 547 | if (ret) { |
---|
499 | 548 | hfi1_cdbg(TID, "Failed to unprogram rcv array %d", |
---|
500 | 549 | ret); |
---|
.. | .. |
---|
750 | 799 | } |
---|
751 | 800 | |
---|
752 | 801 | node->fdata = fd; |
---|
| 802 | + mutex_init(&node->invalidate_mutex); |
---|
753 | 803 | node->phys = page_to_phys(pages[0]); |
---|
754 | 804 | node->npages = npages; |
---|
755 | 805 | node->rcventry = rcventry; |
---|
.. | .. |
---|
765 | 815 | &tid_mn_ops); |
---|
766 | 816 | if (ret) |
---|
767 | 817 | goto out_unmap; |
---|
768 | | - /* |
---|
769 | | - * FIXME: This is in the wrong order, the notifier should be |
---|
770 | | - * established before the pages are pinned by pin_rcv_pages. |
---|
771 | | - */ |
---|
772 | | - mmu_interval_read_begin(&node->notifier); |
---|
773 | 818 | } |
---|
774 | 819 | fd->entry_to_rb[node->rcventry - uctxt->expected_base] = node; |
---|
775 | 820 | |
---|
.. | .. |
---|
789 | 834 | return -EFAULT; |
---|
790 | 835 | } |
---|
791 | 836 | |
---|
792 | | -static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, |
---|
793 | | - struct tid_group **grp) |
---|
| 837 | +static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo) |
---|
794 | 838 | { |
---|
795 | 839 | struct hfi1_ctxtdata *uctxt = fd->uctxt; |
---|
796 | 840 | struct hfi1_devdata *dd = uctxt->dd; |
---|
.. | .. |
---|
813 | 857 | if (!node || node->rcventry != (uctxt->expected_base + rcventry)) |
---|
814 | 858 | return -EBADF; |
---|
815 | 859 | |
---|
816 | | - if (grp) |
---|
817 | | - *grp = node->grp; |
---|
818 | | - |
---|
819 | 860 | if (fd->use_mn) |
---|
820 | 861 | mmu_interval_notifier_remove(&node->notifier); |
---|
821 | 862 | cacheless_tid_rb_remove(fd, node); |
---|
.. | .. |
---|
823 | 864 | return 0; |
---|
824 | 865 | } |
---|
825 | 866 | |
---|
826 | | -static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node) |
---|
| 867 | +static void __clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node) |
---|
827 | 868 | { |
---|
828 | 869 | struct hfi1_ctxtdata *uctxt = fd->uctxt; |
---|
829 | 870 | struct hfi1_devdata *dd = uctxt->dd; |
---|
| 871 | + |
---|
| 872 | + mutex_lock(&node->invalidate_mutex); |
---|
| 873 | + if (node->freed) |
---|
| 874 | + goto done; |
---|
| 875 | + node->freed = true; |
---|
830 | 876 | |
---|
831 | 877 | trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry, |
---|
832 | 878 | node->npages, |
---|
833 | 879 | node->notifier.interval_tree.start, node->phys, |
---|
834 | 880 | node->dma_addr); |
---|
835 | 881 | |
---|
836 | | - /* |
---|
837 | | - * Make sure device has seen the write before we unpin the |
---|
838 | | - * pages. |
---|
839 | | - */ |
---|
| 882 | + /* Make sure device has seen the write before pages are unpinned */ |
---|
840 | 883 | hfi1_put_tid(dd, node->rcventry, PT_INVALID_FLUSH, 0, 0); |
---|
841 | 884 | |
---|
842 | 885 | unpin_rcv_pages(fd, NULL, node, 0, node->npages, true); |
---|
| 886 | +done: |
---|
| 887 | + mutex_unlock(&node->invalidate_mutex); |
---|
| 888 | +} |
---|
| 889 | + |
---|
| 890 | +static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node) |
---|
| 891 | +{ |
---|
| 892 | + struct hfi1_ctxtdata *uctxt = fd->uctxt; |
---|
| 893 | + |
---|
| 894 | + __clear_tid_node(fd, node); |
---|
843 | 895 | |
---|
844 | 896 | node->grp->used--; |
---|
845 | 897 | node->grp->map &= ~(1 << (node->rcventry - node->grp->base)); |
---|
.. | .. |
---|
898 | 950 | if (node->freed) |
---|
899 | 951 | return true; |
---|
900 | 952 | |
---|
| 953 | + /* take action only if unmapping */ |
---|
| 954 | + if (range->event != MMU_NOTIFY_UNMAP) |
---|
| 955 | + return true; |
---|
| 956 | + |
---|
901 | 957 | trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, |
---|
902 | 958 | node->notifier.interval_tree.start, |
---|
903 | 959 | node->rcventry, node->npages, node->dma_addr); |
---|
904 | | - node->freed = true; |
---|
| 960 | + |
---|
| 961 | + /* clear the hardware rcvarray entry */ |
---|
| 962 | + __clear_tid_node(fdata, node); |
---|
905 | 963 | |
---|
906 | 964 | spin_lock(&fdata->invalid_lock); |
---|
907 | 965 | if (fdata->invalid_tid_idx < uctxt->expected_count) { |
---|
.. | .. |
---|
931 | 989 | return true; |
---|
932 | 990 | } |
---|
933 | 991 | |
---|
| 992 | +static bool tid_cover_invalidate(struct mmu_interval_notifier *mni, |
---|
| 993 | + const struct mmu_notifier_range *range, |
---|
| 994 | + unsigned long cur_seq) |
---|
| 995 | +{ |
---|
| 996 | + struct tid_user_buf *tidbuf = |
---|
| 997 | + container_of(mni, struct tid_user_buf, notifier); |
---|
| 998 | + |
---|
| 999 | + /* take action only if unmapping */ |
---|
| 1000 | + if (range->event == MMU_NOTIFY_UNMAP) { |
---|
| 1001 | + mutex_lock(&tidbuf->cover_mutex); |
---|
| 1002 | + mmu_interval_set_seq(mni, cur_seq); |
---|
| 1003 | + mutex_unlock(&tidbuf->cover_mutex); |
---|
| 1004 | + } |
---|
| 1005 | + |
---|
| 1006 | + return true; |
---|
| 1007 | +} |
---|
| 1008 | + |
---|
934 | 1009 | static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, |
---|
935 | 1010 | struct tid_rb_node *tnode) |
---|
936 | 1011 | { |
---|