| .. | .. |
|---|
| 65 | 65 | static bool tid_rb_invalidate(struct mmu_interval_notifier *mni, |
|---|
| 66 | 66 | const struct mmu_notifier_range *range, |
|---|
| 67 | 67 | unsigned long cur_seq); |
|---|
| 68 | +static bool tid_cover_invalidate(struct mmu_interval_notifier *mni, |
|---|
| 69 | + const struct mmu_notifier_range *range, |
|---|
| 70 | + unsigned long cur_seq); |
|---|
| 68 | 71 | static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *, |
|---|
| 69 | 72 | struct tid_group *grp, |
|---|
| 70 | 73 | unsigned int start, u16 count, |
|---|
| 71 | 74 | u32 *tidlist, unsigned int *tididx, |
|---|
| 72 | 75 | unsigned int *pmapped); |
|---|
| 73 | | -static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, |
|---|
| 74 | | - struct tid_group **grp); |
|---|
| 76 | +static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo); |
|---|
| 77 | +static void __clear_tid_node(struct hfi1_filedata *fd, |
|---|
| 78 | + struct tid_rb_node *node); |
|---|
| 75 | 79 | static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node); |
|---|
| 76 | 80 | |
|---|
| 77 | 81 | static const struct mmu_interval_notifier_ops tid_mn_ops = { |
|---|
| 78 | 82 | .invalidate = tid_rb_invalidate, |
|---|
| 83 | +}; |
|---|
| 84 | +static const struct mmu_interval_notifier_ops tid_cover_ops = { |
|---|
| 85 | + .invalidate = tid_cover_invalidate, |
|---|
| 79 | 86 | }; |
|---|
| 80 | 87 | |
|---|
| 81 | 88 | /* |
|---|
| .. | .. |
|---|
| 195 | 202 | static int pin_rcv_pages(struct hfi1_filedata *fd, struct tid_user_buf *tidbuf) |
|---|
| 196 | 203 | { |
|---|
| 197 | 204 | int pinned; |
|---|
| 198 | | - unsigned int npages; |
|---|
| 205 | + unsigned int npages = tidbuf->npages; |
|---|
| 199 | 206 | unsigned long vaddr = tidbuf->vaddr; |
|---|
| 200 | 207 | struct page **pages = NULL; |
|---|
| 201 | 208 | struct hfi1_devdata *dd = fd->uctxt->dd; |
|---|
| 202 | | - |
|---|
| 203 | | - /* Get the number of pages the user buffer spans */ |
|---|
| 204 | | - npages = num_user_pages(vaddr, tidbuf->length); |
|---|
| 205 | | - if (!npages) |
|---|
| 206 | | - return -EINVAL; |
|---|
| 207 | 209 | |
|---|
| 208 | 210 | if (npages > fd->uctxt->expected_count) { |
|---|
| 209 | 211 | dd_dev_err(dd, "Expected buffer too big\n"); |
|---|
| .. | .. |
|---|
| 231 | 233 | return pinned; |
|---|
| 232 | 234 | } |
|---|
| 233 | 235 | tidbuf->pages = pages; |
|---|
| 234 | | - tidbuf->npages = npages; |
|---|
| 235 | 236 | fd->tid_n_pinned += pinned; |
|---|
| 236 | 237 | return pinned; |
|---|
| 237 | 238 | } |
|---|
| .. | .. |
|---|
| 295 | 296 | tididx = 0, mapped, mapped_pages = 0; |
|---|
| 296 | 297 | u32 *tidlist = NULL; |
|---|
| 297 | 298 | struct tid_user_buf *tidbuf; |
|---|
| 299 | + unsigned long mmu_seq = 0; |
|---|
| 298 | 300 | |
|---|
| 299 | 301 | if (!PAGE_ALIGNED(tinfo->vaddr)) |
|---|
| 302 | + return -EINVAL; |
|---|
| 303 | + if (tinfo->length == 0) |
|---|
| 300 | 304 | return -EINVAL; |
|---|
| 301 | 305 | |
|---|
| 302 | 306 | tidbuf = kzalloc(sizeof(*tidbuf), GFP_KERNEL); |
|---|
| 303 | 307 | if (!tidbuf) |
|---|
| 304 | 308 | return -ENOMEM; |
|---|
| 305 | 309 | |
|---|
| 310 | + mutex_init(&tidbuf->cover_mutex); |
|---|
| 306 | 311 | tidbuf->vaddr = tinfo->vaddr; |
|---|
| 307 | 312 | tidbuf->length = tinfo->length; |
|---|
| 313 | + tidbuf->npages = num_user_pages(tidbuf->vaddr, tidbuf->length); |
|---|
| 308 | 314 | tidbuf->psets = kcalloc(uctxt->expected_count, sizeof(*tidbuf->psets), |
|---|
| 309 | 315 | GFP_KERNEL); |
|---|
| 310 | 316 | if (!tidbuf->psets) { |
|---|
| 311 | | - kfree(tidbuf); |
|---|
| 312 | | - return -ENOMEM; |
|---|
| 317 | + ret = -ENOMEM; |
|---|
| 318 | + goto fail_release_mem; |
|---|
| 319 | + } |
|---|
| 320 | + |
|---|
| 321 | + if (fd->use_mn) { |
|---|
| 322 | + ret = mmu_interval_notifier_insert( |
|---|
| 323 | + &tidbuf->notifier, current->mm, |
|---|
| 324 | + tidbuf->vaddr, tidbuf->npages * PAGE_SIZE, |
|---|
| 325 | + &tid_cover_ops); |
|---|
| 326 | + if (ret) |
|---|
| 327 | + goto fail_release_mem; |
|---|
| 328 | + mmu_seq = mmu_interval_read_begin(&tidbuf->notifier); |
|---|
| 313 | 329 | } |
|---|
| 314 | 330 | |
|---|
| 315 | 331 | pinned = pin_rcv_pages(fd, tidbuf); |
|---|
| 316 | 332 | if (pinned <= 0) { |
|---|
| 317 | | - kfree(tidbuf->psets); |
|---|
| 318 | | - kfree(tidbuf); |
|---|
| 319 | | - return pinned; |
|---|
| 333 | + ret = (pinned < 0) ? pinned : -ENOSPC; |
|---|
| 334 | + goto fail_unpin; |
|---|
| 320 | 335 | } |
|---|
| 321 | 336 | |
|---|
| 322 | 337 | /* Find sets of physically contiguous pages */ |
|---|
| 323 | 338 | tidbuf->n_psets = find_phys_blocks(tidbuf, pinned); |
|---|
| 324 | 339 | |
|---|
| 325 | | - /* |
|---|
| 326 | | - * We don't need to access this under a lock since tid_used is per |
|---|
| 327 | | - * process and the same process cannot be in hfi1_user_exp_rcv_clear() |
|---|
| 328 | | - * and hfi1_user_exp_rcv_setup() at the same time. |
|---|
| 329 | | - */ |
|---|
| 340 | + /* Reserve the number of expected tids to be used. */ |
|---|
| 330 | 341 | spin_lock(&fd->tid_lock); |
|---|
| 331 | 342 | if (fd->tid_used + tidbuf->n_psets > fd->tid_limit) |
|---|
| 332 | 343 | pageset_count = fd->tid_limit - fd->tid_used; |
|---|
| 333 | 344 | else |
|---|
| 334 | 345 | pageset_count = tidbuf->n_psets; |
|---|
| 346 | + fd->tid_used += pageset_count; |
|---|
| 335 | 347 | spin_unlock(&fd->tid_lock); |
|---|
| 336 | 348 | |
|---|
| 337 | | - if (!pageset_count) |
|---|
| 338 | | - goto bail; |
|---|
| 349 | + if (!pageset_count) { |
|---|
| 350 | + ret = -ENOSPC; |
|---|
| 351 | + goto fail_unreserve; |
|---|
| 352 | + } |
|---|
| 339 | 353 | |
|---|
| 340 | 354 | ngroups = pageset_count / dd->rcv_entries.group_size; |
|---|
| 341 | 355 | tidlist = kcalloc(pageset_count, sizeof(*tidlist), GFP_KERNEL); |
|---|
| 342 | 356 | if (!tidlist) { |
|---|
| 343 | 357 | ret = -ENOMEM; |
|---|
| 344 | | - goto nomem; |
|---|
| 358 | + goto fail_unreserve; |
|---|
| 345 | 359 | } |
|---|
| 346 | 360 | |
|---|
| 347 | 361 | tididx = 0; |
|---|
| .. | .. |
|---|
| 437 | 451 | } |
|---|
| 438 | 452 | unlock: |
|---|
| 439 | 453 | mutex_unlock(&uctxt->exp_mutex); |
|---|
| 440 | | -nomem: |
|---|
| 441 | 454 | hfi1_cdbg(TID, "total mapped: tidpairs:%u pages:%u (%d)", tididx, |
|---|
| 442 | 455 | mapped_pages, ret); |
|---|
| 443 | | - if (tididx) { |
|---|
| 444 | | - spin_lock(&fd->tid_lock); |
|---|
| 445 | | - fd->tid_used += tididx; |
|---|
| 446 | | - spin_unlock(&fd->tid_lock); |
|---|
| 447 | | - tinfo->tidcnt = tididx; |
|---|
| 448 | | - tinfo->length = mapped_pages * PAGE_SIZE; |
|---|
| 449 | 456 | |
|---|
| 450 | | - if (copy_to_user(u64_to_user_ptr(tinfo->tidlist), |
|---|
| 451 | | - tidlist, sizeof(tidlist[0]) * tididx)) { |
|---|
| 452 | | - /* |
|---|
| 453 | | - * On failure to copy to the user level, we need to undo |
|---|
| 454 | | - * everything done so far so we don't leak resources. |
|---|
| 455 | | - */ |
|---|
| 456 | | - tinfo->tidlist = (unsigned long)&tidlist; |
|---|
| 457 | | - hfi1_user_exp_rcv_clear(fd, tinfo); |
|---|
| 458 | | - tinfo->tidlist = 0; |
|---|
| 459 | | - ret = -EFAULT; |
|---|
| 460 | | - goto bail; |
|---|
| 457 | + /* fail if nothing was programmed, set error if none provided */ |
|---|
| 458 | + if (tididx == 0) { |
|---|
| 459 | + if (ret >= 0) |
|---|
| 460 | + ret = -ENOSPC; |
|---|
| 461 | + goto fail_unreserve; |
|---|
| 462 | + } |
|---|
| 463 | + |
|---|
| 464 | + /* adjust reserved tid_used to actual count */ |
|---|
| 465 | + spin_lock(&fd->tid_lock); |
|---|
| 466 | + fd->tid_used -= pageset_count - tididx; |
|---|
| 467 | + spin_unlock(&fd->tid_lock); |
|---|
| 468 | + |
|---|
| 469 | + /* unpin all pages not covered by a TID */ |
|---|
| 470 | + unpin_rcv_pages(fd, tidbuf, NULL, mapped_pages, pinned - mapped_pages, |
|---|
| 471 | + false); |
|---|
| 472 | + |
|---|
| 473 | + if (fd->use_mn) { |
|---|
| 474 | + /* check for an invalidate during setup */ |
|---|
| 475 | + bool fail = false; |
|---|
| 476 | + |
|---|
| 477 | + mutex_lock(&tidbuf->cover_mutex); |
|---|
| 478 | + fail = mmu_interval_read_retry(&tidbuf->notifier, mmu_seq); |
|---|
| 479 | + mutex_unlock(&tidbuf->cover_mutex); |
|---|
| 480 | + |
|---|
| 481 | + if (fail) { |
|---|
| 482 | + ret = -EBUSY; |
|---|
| 483 | + goto fail_unprogram; |
|---|
| 461 | 484 | } |
|---|
| 462 | 485 | } |
|---|
| 463 | 486 | |
|---|
| 464 | | - /* |
|---|
| 465 | | - * If not everything was mapped (due to insufficient RcvArray entries, |
|---|
| 466 | | - * for example), unpin all unmapped pages so we can pin them nex time. |
|---|
| 467 | | - */ |
|---|
| 468 | | - if (mapped_pages != pinned) |
|---|
| 469 | | - unpin_rcv_pages(fd, tidbuf, NULL, mapped_pages, |
|---|
| 470 | | - (pinned - mapped_pages), false); |
|---|
| 471 | | -bail: |
|---|
| 472 | | - kfree(tidbuf->psets); |
|---|
| 473 | | - kfree(tidlist); |
|---|
| 487 | + tinfo->tidcnt = tididx; |
|---|
| 488 | + tinfo->length = mapped_pages * PAGE_SIZE; |
|---|
| 489 | + |
|---|
| 490 | + if (copy_to_user(u64_to_user_ptr(tinfo->tidlist), |
|---|
| 491 | + tidlist, sizeof(tidlist[0]) * tididx)) { |
|---|
| 492 | + ret = -EFAULT; |
|---|
| 493 | + goto fail_unprogram; |
|---|
| 494 | + } |
|---|
| 495 | + |
|---|
| 496 | + if (fd->use_mn) |
|---|
| 497 | + mmu_interval_notifier_remove(&tidbuf->notifier); |
|---|
| 474 | 498 | kfree(tidbuf->pages); |
|---|
| 499 | + kfree(tidbuf->psets); |
|---|
| 475 | 500 | kfree(tidbuf); |
|---|
| 476 | | - return ret > 0 ? 0 : ret; |
|---|
| 501 | + kfree(tidlist); |
|---|
| 502 | + return 0; |
|---|
| 503 | + |
|---|
| 504 | +fail_unprogram: |
|---|
| 505 | + /* unprogram, unmap, and unpin all allocated TIDs */ |
|---|
| 506 | + tinfo->tidlist = (unsigned long)tidlist; |
|---|
| 507 | + hfi1_user_exp_rcv_clear(fd, tinfo); |
|---|
| 508 | + tinfo->tidlist = 0; |
|---|
| 509 | + pinned = 0; /* nothing left to unpin */ |
|---|
| 510 | + pageset_count = 0; /* nothing left reserved */ |
|---|
| 511 | +fail_unreserve: |
|---|
| 512 | + spin_lock(&fd->tid_lock); |
|---|
| 513 | + fd->tid_used -= pageset_count; |
|---|
| 514 | + spin_unlock(&fd->tid_lock); |
|---|
| 515 | +fail_unpin: |
|---|
| 516 | + if (fd->use_mn) |
|---|
| 517 | + mmu_interval_notifier_remove(&tidbuf->notifier); |
|---|
| 518 | + if (pinned > 0) |
|---|
| 519 | + unpin_rcv_pages(fd, tidbuf, NULL, 0, pinned, false); |
|---|
| 520 | +fail_release_mem: |
|---|
| 521 | + kfree(tidbuf->pages); |
|---|
| 522 | + kfree(tidbuf->psets); |
|---|
| 523 | + kfree(tidbuf); |
|---|
| 524 | + kfree(tidlist); |
|---|
| 525 | + return ret; |
|---|
| 477 | 526 | } |
|---|
| 478 | 527 | |
|---|
| 479 | 528 | int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd, |
|---|
| .. | .. |
|---|
| 494 | 543 | |
|---|
| 495 | 544 | mutex_lock(&uctxt->exp_mutex); |
|---|
| 496 | 545 | for (tididx = 0; tididx < tinfo->tidcnt; tididx++) { |
|---|
| 497 | | - ret = unprogram_rcvarray(fd, tidinfo[tididx], NULL); |
|---|
| 546 | + ret = unprogram_rcvarray(fd, tidinfo[tididx]); |
|---|
| 498 | 547 | if (ret) { |
|---|
| 499 | 548 | hfi1_cdbg(TID, "Failed to unprogram rcv array %d", |
|---|
| 500 | 549 | ret); |
|---|
| .. | .. |
|---|
| 750 | 799 | } |
|---|
| 751 | 800 | |
|---|
| 752 | 801 | node->fdata = fd; |
|---|
| 802 | + mutex_init(&node->invalidate_mutex); |
|---|
| 753 | 803 | node->phys = page_to_phys(pages[0]); |
|---|
| 754 | 804 | node->npages = npages; |
|---|
| 755 | 805 | node->rcventry = rcventry; |
|---|
| .. | .. |
|---|
| 765 | 815 | &tid_mn_ops); |
|---|
| 766 | 816 | if (ret) |
|---|
| 767 | 817 | goto out_unmap; |
|---|
| 768 | | - /* |
|---|
| 769 | | - * FIXME: This is in the wrong order, the notifier should be |
|---|
| 770 | | - * established before the pages are pinned by pin_rcv_pages. |
|---|
| 771 | | - */ |
|---|
| 772 | | - mmu_interval_read_begin(&node->notifier); |
|---|
| 773 | 818 | } |
|---|
| 774 | 819 | fd->entry_to_rb[node->rcventry - uctxt->expected_base] = node; |
|---|
| 775 | 820 | |
|---|
| .. | .. |
|---|
| 789 | 834 | return -EFAULT; |
|---|
| 790 | 835 | } |
|---|
| 791 | 836 | |
|---|
| 792 | | -static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, |
|---|
| 793 | | - struct tid_group **grp) |
|---|
| 837 | +static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo) |
|---|
| 794 | 838 | { |
|---|
| 795 | 839 | struct hfi1_ctxtdata *uctxt = fd->uctxt; |
|---|
| 796 | 840 | struct hfi1_devdata *dd = uctxt->dd; |
|---|
| .. | .. |
|---|
| 813 | 857 | if (!node || node->rcventry != (uctxt->expected_base + rcventry)) |
|---|
| 814 | 858 | return -EBADF; |
|---|
| 815 | 859 | |
|---|
| 816 | | - if (grp) |
|---|
| 817 | | - *grp = node->grp; |
|---|
| 818 | | - |
|---|
| 819 | 860 | if (fd->use_mn) |
|---|
| 820 | 861 | mmu_interval_notifier_remove(&node->notifier); |
|---|
| 821 | 862 | cacheless_tid_rb_remove(fd, node); |
|---|
| .. | .. |
|---|
| 823 | 864 | return 0; |
|---|
| 824 | 865 | } |
|---|
| 825 | 866 | |
|---|
| 826 | | -static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node) |
|---|
| 867 | +static void __clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node) |
|---|
| 827 | 868 | { |
|---|
| 828 | 869 | struct hfi1_ctxtdata *uctxt = fd->uctxt; |
|---|
| 829 | 870 | struct hfi1_devdata *dd = uctxt->dd; |
|---|
| 871 | + |
|---|
| 872 | + mutex_lock(&node->invalidate_mutex); |
|---|
| 873 | + if (node->freed) |
|---|
| 874 | + goto done; |
|---|
| 875 | + node->freed = true; |
|---|
| 830 | 876 | |
|---|
| 831 | 877 | trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry, |
|---|
| 832 | 878 | node->npages, |
|---|
| 833 | 879 | node->notifier.interval_tree.start, node->phys, |
|---|
| 834 | 880 | node->dma_addr); |
|---|
| 835 | 881 | |
|---|
| 836 | | - /* |
|---|
| 837 | | - * Make sure device has seen the write before we unpin the |
|---|
| 838 | | - * pages. |
|---|
| 839 | | - */ |
|---|
| 882 | + /* Make sure device has seen the write before pages are unpinned */ |
|---|
| 840 | 883 | hfi1_put_tid(dd, node->rcventry, PT_INVALID_FLUSH, 0, 0); |
|---|
| 841 | 884 | |
|---|
| 842 | 885 | unpin_rcv_pages(fd, NULL, node, 0, node->npages, true); |
|---|
| 886 | +done: |
|---|
| 887 | + mutex_unlock(&node->invalidate_mutex); |
|---|
| 888 | +} |
|---|
| 889 | + |
|---|
| 890 | +static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node) |
|---|
| 891 | +{ |
|---|
| 892 | + struct hfi1_ctxtdata *uctxt = fd->uctxt; |
|---|
| 893 | + |
|---|
| 894 | + __clear_tid_node(fd, node); |
|---|
| 843 | 895 | |
|---|
| 844 | 896 | node->grp->used--; |
|---|
| 845 | 897 | node->grp->map &= ~(1 << (node->rcventry - node->grp->base)); |
|---|
| .. | .. |
|---|
| 898 | 950 | if (node->freed) |
|---|
| 899 | 951 | return true; |
|---|
| 900 | 952 | |
|---|
| 953 | + /* take action only if unmapping */ |
|---|
| 954 | + if (range->event != MMU_NOTIFY_UNMAP) |
|---|
| 955 | + return true; |
|---|
| 956 | + |
|---|
| 901 | 957 | trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, |
|---|
| 902 | 958 | node->notifier.interval_tree.start, |
|---|
| 903 | 959 | node->rcventry, node->npages, node->dma_addr); |
|---|
| 904 | | - node->freed = true; |
|---|
| 960 | + |
|---|
| 961 | + /* clear the hardware rcvarray entry */ |
|---|
| 962 | + __clear_tid_node(fdata, node); |
|---|
| 905 | 963 | |
|---|
| 906 | 964 | spin_lock(&fdata->invalid_lock); |
|---|
| 907 | 965 | if (fdata->invalid_tid_idx < uctxt->expected_count) { |
|---|
| .. | .. |
|---|
| 931 | 989 | return true; |
|---|
| 932 | 990 | } |
|---|
| 933 | 991 | |
|---|
| 992 | +static bool tid_cover_invalidate(struct mmu_interval_notifier *mni, |
|---|
| 993 | + const struct mmu_notifier_range *range, |
|---|
| 994 | + unsigned long cur_seq) |
|---|
| 995 | +{ |
|---|
| 996 | + struct tid_user_buf *tidbuf = |
|---|
| 997 | + container_of(mni, struct tid_user_buf, notifier); |
|---|
| 998 | + |
|---|
| 999 | + /* take action only if unmapping */ |
|---|
| 1000 | + if (range->event == MMU_NOTIFY_UNMAP) { |
|---|
| 1001 | + mutex_lock(&tidbuf->cover_mutex); |
|---|
| 1002 | + mmu_interval_set_seq(mni, cur_seq); |
|---|
| 1003 | + mutex_unlock(&tidbuf->cover_mutex); |
|---|
| 1004 | + } |
|---|
| 1005 | + |
|---|
| 1006 | + return true; |
|---|
| 1007 | +} |
|---|
| 1008 | + |
|---|
| 934 | 1009 | static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, |
|---|
| 935 | 1010 | struct tid_rb_node *tnode) |
|---|
| 936 | 1011 | { |
|---|