.. | .. |
---|
1 | 1 | /* |
---|
| 2 | + * Copyright(c) 2020 Cornelis Networks, Inc. |
---|
2 | 3 | * Copyright(c) 2015-2018 Intel Corporation. |
---|
3 | 4 | * |
---|
4 | 5 | * This file is provided under a dual BSD/GPLv2 license. When using or |
---|
.. | .. |
---|
59 | 60 | struct tid_user_buf *tbuf, |
---|
60 | 61 | u32 rcventry, struct tid_group *grp, |
---|
61 | 62 | u16 pageidx, unsigned int npages); |
---|
62 | | -static int tid_rb_insert(void *arg, struct mmu_rb_node *node); |
---|
63 | 63 | static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, |
---|
64 | 64 | struct tid_rb_node *tnode); |
---|
65 | | -static void tid_rb_remove(void *arg, struct mmu_rb_node *node); |
---|
66 | | -static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode); |
---|
| 65 | +static bool tid_rb_invalidate(struct mmu_interval_notifier *mni, |
---|
| 66 | + const struct mmu_notifier_range *range, |
---|
| 67 | + unsigned long cur_seq); |
---|
| 68 | +static bool tid_cover_invalidate(struct mmu_interval_notifier *mni, |
---|
| 69 | + const struct mmu_notifier_range *range, |
---|
| 70 | + unsigned long cur_seq); |
---|
67 | 71 | static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *, |
---|
68 | 72 | struct tid_group *grp, |
---|
69 | 73 | unsigned int start, u16 count, |
---|
70 | 74 | u32 *tidlist, unsigned int *tididx, |
---|
71 | 75 | unsigned int *pmapped); |
---|
72 | | -static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, |
---|
73 | | - struct tid_group **grp); |
---|
| 76 | +static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo); |
---|
| 77 | +static void __clear_tid_node(struct hfi1_filedata *fd, |
---|
| 78 | + struct tid_rb_node *node); |
---|
74 | 79 | static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node); |
---|
75 | 80 | |
---|
76 | | -static struct mmu_rb_ops tid_rb_ops = { |
---|
77 | | - .insert = tid_rb_insert, |
---|
78 | | - .remove = tid_rb_remove, |
---|
79 | | - .invalidate = tid_rb_invalidate |
---|
| 81 | +static const struct mmu_interval_notifier_ops tid_mn_ops = { |
---|
| 82 | + .invalidate = tid_rb_invalidate, |
---|
| 83 | +}; |
---|
| 84 | +static const struct mmu_interval_notifier_ops tid_cover_ops = { |
---|
| 85 | + .invalidate = tid_cover_invalidate, |
---|
80 | 86 | }; |
---|
81 | 87 | |
---|
82 | 88 | /* |
---|
.. | .. |
---|
87 | 93 | int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd, |
---|
88 | 94 | struct hfi1_ctxtdata *uctxt) |
---|
89 | 95 | { |
---|
90 | | - struct hfi1_devdata *dd = uctxt->dd; |
---|
91 | 96 | int ret = 0; |
---|
92 | 97 | |
---|
93 | 98 | fd->entry_to_rb = kcalloc(uctxt->expected_count, |
---|
.. | .. |
---|
106 | 111 | fd->entry_to_rb = NULL; |
---|
107 | 112 | return -ENOMEM; |
---|
108 | 113 | } |
---|
109 | | - |
---|
110 | | - /* |
---|
111 | | - * Register MMU notifier callbacks. If the registration |
---|
112 | | - * fails, continue without TID caching for this context. |
---|
113 | | - */ |
---|
114 | | - ret = hfi1_mmu_rb_register(fd, fd->mm, &tid_rb_ops, |
---|
115 | | - dd->pport->hfi1_wq, |
---|
116 | | - &fd->handler); |
---|
117 | | - if (ret) { |
---|
118 | | - dd_dev_info(dd, |
---|
119 | | - "Failed MMU notifier registration %d\n", |
---|
120 | | - ret); |
---|
121 | | - ret = 0; |
---|
122 | | - } |
---|
| 114 | + fd->use_mn = true; |
---|
123 | 115 | } |
---|
124 | 116 | |
---|
125 | 117 | /* |
---|
.. | .. |
---|
136 | 128 | * init. |
---|
137 | 129 | */ |
---|
138 | 130 | spin_lock(&fd->tid_lock); |
---|
139 | | - if (uctxt->subctxt_cnt && fd->handler) { |
---|
| 131 | + if (uctxt->subctxt_cnt && fd->use_mn) { |
---|
140 | 132 | u16 remainder; |
---|
141 | 133 | |
---|
142 | 134 | fd->tid_limit = uctxt->expected_count / uctxt->subctxt_cnt; |
---|
.. | .. |
---|
155 | 147 | { |
---|
156 | 148 | struct hfi1_ctxtdata *uctxt = fd->uctxt; |
---|
157 | 149 | |
---|
158 | | - /* |
---|
159 | | - * The notifier would have been removed when the process'es mm |
---|
160 | | - * was freed. |
---|
161 | | - */ |
---|
162 | | - if (fd->handler) { |
---|
163 | | - hfi1_mmu_rb_unregister(fd->handler); |
---|
164 | | - } else { |
---|
165 | | - mutex_lock(&uctxt->exp_mutex); |
---|
166 | | - if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list)) |
---|
167 | | - unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd); |
---|
168 | | - if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list)) |
---|
169 | | - unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd); |
---|
170 | | - mutex_unlock(&uctxt->exp_mutex); |
---|
171 | | - } |
---|
| 150 | + mutex_lock(&uctxt->exp_mutex); |
---|
| 151 | + if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list)) |
---|
| 152 | + unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd); |
---|
| 153 | + if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list)) |
---|
| 154 | + unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd); |
---|
| 155 | + mutex_unlock(&uctxt->exp_mutex); |
---|
172 | 156 | |
---|
173 | 157 | kfree(fd->invalid_tids); |
---|
174 | 158 | fd->invalid_tids = NULL; |
---|
.. | .. |
---|
197 | 181 | { |
---|
198 | 182 | struct page **pages; |
---|
199 | 183 | struct hfi1_devdata *dd = fd->uctxt->dd; |
---|
| 184 | + struct mm_struct *mm; |
---|
200 | 185 | |
---|
201 | 186 | if (mapped) { |
---|
202 | 187 | pci_unmap_single(dd->pcidev, node->dma_addr, |
---|
203 | | - node->mmu.len, PCI_DMA_FROMDEVICE); |
---|
| 188 | + node->npages * PAGE_SIZE, PCI_DMA_FROMDEVICE); |
---|
204 | 189 | pages = &node->pages[idx]; |
---|
| 190 | + mm = mm_from_tid_node(node); |
---|
205 | 191 | } else { |
---|
206 | 192 | pages = &tidbuf->pages[idx]; |
---|
| 193 | + mm = current->mm; |
---|
207 | 194 | } |
---|
208 | | - hfi1_release_user_pages(fd->mm, pages, npages, mapped); |
---|
| 195 | + hfi1_release_user_pages(mm, pages, npages, mapped); |
---|
209 | 196 | fd->tid_n_pinned -= npages; |
---|
210 | 197 | } |
---|
211 | 198 | |
---|
.. | .. |
---|
215 | 202 | static int pin_rcv_pages(struct hfi1_filedata *fd, struct tid_user_buf *tidbuf) |
---|
216 | 203 | { |
---|
217 | 204 | int pinned; |
---|
218 | | - unsigned int npages; |
---|
| 205 | + unsigned int npages = tidbuf->npages; |
---|
219 | 206 | unsigned long vaddr = tidbuf->vaddr; |
---|
220 | 207 | struct page **pages = NULL; |
---|
221 | 208 | struct hfi1_devdata *dd = fd->uctxt->dd; |
---|
222 | | - |
---|
223 | | - /* Get the number of pages the user buffer spans */ |
---|
224 | | - npages = num_user_pages(vaddr, tidbuf->length); |
---|
225 | | - if (!npages) |
---|
226 | | - return -EINVAL; |
---|
227 | 209 | |
---|
228 | 210 | if (npages > fd->uctxt->expected_count) { |
---|
229 | 211 | dd_dev_err(dd, "Expected buffer too big\n"); |
---|
230 | 212 | return -EINVAL; |
---|
231 | 213 | } |
---|
232 | 214 | |
---|
233 | | - /* Verify that access is OK for the user buffer */ |
---|
234 | | - if (!access_ok(VERIFY_WRITE, (void __user *)vaddr, |
---|
235 | | - npages * PAGE_SIZE)) { |
---|
236 | | - dd_dev_err(dd, "Fail vaddr %p, %u pages, !access_ok\n", |
---|
237 | | - (void *)vaddr, npages); |
---|
238 | | - return -EFAULT; |
---|
239 | | - } |
---|
240 | 215 | /* Allocate the array of struct page pointers needed for pinning */ |
---|
241 | 216 | pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL); |
---|
242 | 217 | if (!pages) |
---|
.. | .. |
---|
247 | 222 | * pages, accept the amount pinned so far and program only that. |
---|
248 | 223 | * User space knows how to deal with partially programmed buffers. |
---|
249 | 224 | */ |
---|
250 | | - if (!hfi1_can_pin_pages(dd, fd->mm, fd->tid_n_pinned, npages)) { |
---|
| 225 | + if (!hfi1_can_pin_pages(dd, current->mm, fd->tid_n_pinned, npages)) { |
---|
251 | 226 | kfree(pages); |
---|
252 | 227 | return -ENOMEM; |
---|
253 | 228 | } |
---|
254 | 229 | |
---|
255 | | - pinned = hfi1_acquire_user_pages(fd->mm, vaddr, npages, true, pages); |
---|
| 230 | + pinned = hfi1_acquire_user_pages(current->mm, vaddr, npages, true, pages); |
---|
256 | 231 | if (pinned <= 0) { |
---|
257 | 232 | kfree(pages); |
---|
258 | 233 | return pinned; |
---|
259 | 234 | } |
---|
260 | 235 | tidbuf->pages = pages; |
---|
261 | | - tidbuf->npages = npages; |
---|
262 | 236 | fd->tid_n_pinned += pinned; |
---|
263 | 237 | return pinned; |
---|
264 | 238 | } |
---|
.. | .. |
---|
322 | 296 | tididx = 0, mapped, mapped_pages = 0; |
---|
323 | 297 | u32 *tidlist = NULL; |
---|
324 | 298 | struct tid_user_buf *tidbuf; |
---|
| 299 | + unsigned long mmu_seq = 0; |
---|
325 | 300 | |
---|
326 | 301 | if (!PAGE_ALIGNED(tinfo->vaddr)) |
---|
| 302 | + return -EINVAL; |
---|
| 303 | + if (tinfo->length == 0) |
---|
327 | 304 | return -EINVAL; |
---|
328 | 305 | |
---|
329 | 306 | tidbuf = kzalloc(sizeof(*tidbuf), GFP_KERNEL); |
---|
330 | 307 | if (!tidbuf) |
---|
331 | 308 | return -ENOMEM; |
---|
332 | 309 | |
---|
| 310 | + mutex_init(&tidbuf->cover_mutex); |
---|
333 | 311 | tidbuf->vaddr = tinfo->vaddr; |
---|
334 | 312 | tidbuf->length = tinfo->length; |
---|
| 313 | + tidbuf->npages = num_user_pages(tidbuf->vaddr, tidbuf->length); |
---|
335 | 314 | tidbuf->psets = kcalloc(uctxt->expected_count, sizeof(*tidbuf->psets), |
---|
336 | 315 | GFP_KERNEL); |
---|
337 | 316 | if (!tidbuf->psets) { |
---|
338 | | - kfree(tidbuf); |
---|
339 | | - return -ENOMEM; |
---|
| 317 | + ret = -ENOMEM; |
---|
| 318 | + goto fail_release_mem; |
---|
| 319 | + } |
---|
| 320 | + |
---|
| 321 | + if (fd->use_mn) { |
---|
| 322 | + ret = mmu_interval_notifier_insert( |
---|
| 323 | + &tidbuf->notifier, current->mm, |
---|
| 324 | + tidbuf->vaddr, tidbuf->npages * PAGE_SIZE, |
---|
| 325 | + &tid_cover_ops); |
---|
| 326 | + if (ret) |
---|
| 327 | + goto fail_release_mem; |
---|
| 328 | + mmu_seq = mmu_interval_read_begin(&tidbuf->notifier); |
---|
340 | 329 | } |
---|
341 | 330 | |
---|
342 | 331 | pinned = pin_rcv_pages(fd, tidbuf); |
---|
343 | 332 | if (pinned <= 0) { |
---|
344 | | - kfree(tidbuf->psets); |
---|
345 | | - kfree(tidbuf); |
---|
346 | | - return pinned; |
---|
| 333 | + ret = (pinned < 0) ? pinned : -ENOSPC; |
---|
| 334 | + goto fail_unpin; |
---|
347 | 335 | } |
---|
348 | 336 | |
---|
349 | 337 | /* Find sets of physically contiguous pages */ |
---|
350 | 338 | tidbuf->n_psets = find_phys_blocks(tidbuf, pinned); |
---|
351 | 339 | |
---|
352 | | - /* |
---|
353 | | - * We don't need to access this under a lock since tid_used is per |
---|
354 | | - * process and the same process cannot be in hfi1_user_exp_rcv_clear() |
---|
355 | | - * and hfi1_user_exp_rcv_setup() at the same time. |
---|
356 | | - */ |
---|
| 340 | + /* Reserve the number of expected tids to be used. */ |
---|
357 | 341 | spin_lock(&fd->tid_lock); |
---|
358 | 342 | if (fd->tid_used + tidbuf->n_psets > fd->tid_limit) |
---|
359 | 343 | pageset_count = fd->tid_limit - fd->tid_used; |
---|
360 | 344 | else |
---|
361 | 345 | pageset_count = tidbuf->n_psets; |
---|
| 346 | + fd->tid_used += pageset_count; |
---|
362 | 347 | spin_unlock(&fd->tid_lock); |
---|
363 | 348 | |
---|
364 | | - if (!pageset_count) |
---|
365 | | - goto bail; |
---|
| 349 | + if (!pageset_count) { |
---|
| 350 | + ret = -ENOSPC; |
---|
| 351 | + goto fail_unreserve; |
---|
| 352 | + } |
---|
366 | 353 | |
---|
367 | 354 | ngroups = pageset_count / dd->rcv_entries.group_size; |
---|
368 | 355 | tidlist = kcalloc(pageset_count, sizeof(*tidlist), GFP_KERNEL); |
---|
369 | 356 | if (!tidlist) { |
---|
370 | 357 | ret = -ENOMEM; |
---|
371 | | - goto nomem; |
---|
| 358 | + goto fail_unreserve; |
---|
372 | 359 | } |
---|
373 | 360 | |
---|
374 | 361 | tididx = 0; |
---|
.. | .. |
---|
464 | 451 | } |
---|
465 | 452 | unlock: |
---|
466 | 453 | mutex_unlock(&uctxt->exp_mutex); |
---|
467 | | -nomem: |
---|
468 | 454 | hfi1_cdbg(TID, "total mapped: tidpairs:%u pages:%u (%d)", tididx, |
---|
469 | 455 | mapped_pages, ret); |
---|
470 | | - if (tididx) { |
---|
471 | | - spin_lock(&fd->tid_lock); |
---|
472 | | - fd->tid_used += tididx; |
---|
473 | | - spin_unlock(&fd->tid_lock); |
---|
474 | | - tinfo->tidcnt = tididx; |
---|
475 | | - tinfo->length = mapped_pages * PAGE_SIZE; |
---|
476 | 456 | |
---|
477 | | - if (copy_to_user(u64_to_user_ptr(tinfo->tidlist), |
---|
478 | | - tidlist, sizeof(tidlist[0]) * tididx)) { |
---|
479 | | - /* |
---|
480 | | - * On failure to copy to the user level, we need to undo |
---|
481 | | - * everything done so far so we don't leak resources. |
---|
482 | | - */ |
---|
483 | | - tinfo->tidlist = (unsigned long)&tidlist; |
---|
484 | | - hfi1_user_exp_rcv_clear(fd, tinfo); |
---|
485 | | - tinfo->tidlist = 0; |
---|
486 | | - ret = -EFAULT; |
---|
487 | | - goto bail; |
---|
| 457 | + /* fail if nothing was programmed, set error if none provided */ |
---|
| 458 | + if (tididx == 0) { |
---|
| 459 | + if (ret >= 0) |
---|
| 460 | + ret = -ENOSPC; |
---|
| 461 | + goto fail_unreserve; |
---|
| 462 | + } |
---|
| 463 | + |
---|
| 464 | + /* adjust reserved tid_used to actual count */ |
---|
| 465 | + spin_lock(&fd->tid_lock); |
---|
| 466 | + fd->tid_used -= pageset_count - tididx; |
---|
| 467 | + spin_unlock(&fd->tid_lock); |
---|
| 468 | + |
---|
| 469 | + /* unpin all pages not covered by a TID */ |
---|
| 470 | + unpin_rcv_pages(fd, tidbuf, NULL, mapped_pages, pinned - mapped_pages, |
---|
| 471 | + false); |
---|
| 472 | + |
---|
| 473 | + if (fd->use_mn) { |
---|
| 474 | + /* check for an invalidate during setup */ |
---|
| 475 | + bool fail = false; |
---|
| 476 | + |
---|
| 477 | + mutex_lock(&tidbuf->cover_mutex); |
---|
| 478 | + fail = mmu_interval_read_retry(&tidbuf->notifier, mmu_seq); |
---|
| 479 | + mutex_unlock(&tidbuf->cover_mutex); |
---|
| 480 | + |
---|
| 481 | + if (fail) { |
---|
| 482 | + ret = -EBUSY; |
---|
| 483 | + goto fail_unprogram; |
---|
488 | 484 | } |
---|
489 | 485 | } |
---|
490 | 486 | |
---|
491 | | - /* |
---|
492 | | - * If not everything was mapped (due to insufficient RcvArray entries, |
---|
493 | | - * for example), unpin all unmapped pages so we can pin them nex time. |
---|
494 | | - */ |
---|
495 | | - if (mapped_pages != pinned) |
---|
496 | | - unpin_rcv_pages(fd, tidbuf, NULL, mapped_pages, |
---|
497 | | - (pinned - mapped_pages), false); |
---|
498 | | -bail: |
---|
499 | | - kfree(tidbuf->psets); |
---|
500 | | - kfree(tidlist); |
---|
| 487 | + tinfo->tidcnt = tididx; |
---|
| 488 | + tinfo->length = mapped_pages * PAGE_SIZE; |
---|
| 489 | + |
---|
| 490 | + if (copy_to_user(u64_to_user_ptr(tinfo->tidlist), |
---|
| 491 | + tidlist, sizeof(tidlist[0]) * tididx)) { |
---|
| 492 | + ret = -EFAULT; |
---|
| 493 | + goto fail_unprogram; |
---|
| 494 | + } |
---|
| 495 | + |
---|
| 496 | + if (fd->use_mn) |
---|
| 497 | + mmu_interval_notifier_remove(&tidbuf->notifier); |
---|
501 | 498 | kfree(tidbuf->pages); |
---|
| 499 | + kfree(tidbuf->psets); |
---|
502 | 500 | kfree(tidbuf); |
---|
503 | | - return ret > 0 ? 0 : ret; |
---|
| 501 | + kfree(tidlist); |
---|
| 502 | + return 0; |
---|
| 503 | + |
---|
| 504 | +fail_unprogram: |
---|
| 505 | + /* unprogram, unmap, and unpin all allocated TIDs */ |
---|
| 506 | + tinfo->tidlist = (unsigned long)tidlist; |
---|
| 507 | + hfi1_user_exp_rcv_clear(fd, tinfo); |
---|
| 508 | + tinfo->tidlist = 0; |
---|
| 509 | + pinned = 0; /* nothing left to unpin */ |
---|
| 510 | + pageset_count = 0; /* nothing left reserved */ |
---|
| 511 | +fail_unreserve: |
---|
| 512 | + spin_lock(&fd->tid_lock); |
---|
| 513 | + fd->tid_used -= pageset_count; |
---|
| 514 | + spin_unlock(&fd->tid_lock); |
---|
| 515 | +fail_unpin: |
---|
| 516 | + if (fd->use_mn) |
---|
| 517 | + mmu_interval_notifier_remove(&tidbuf->notifier); |
---|
| 518 | + if (pinned > 0) |
---|
| 519 | + unpin_rcv_pages(fd, tidbuf, NULL, 0, pinned, false); |
---|
| 520 | +fail_release_mem: |
---|
| 521 | + kfree(tidbuf->pages); |
---|
| 522 | + kfree(tidbuf->psets); |
---|
| 523 | + kfree(tidbuf); |
---|
| 524 | + kfree(tidlist); |
---|
| 525 | + return ret; |
---|
504 | 526 | } |
---|
505 | 527 | |
---|
506 | 528 | int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd, |
---|
.. | .. |
---|
521 | 543 | |
---|
522 | 544 | mutex_lock(&uctxt->exp_mutex); |
---|
523 | 545 | for (tididx = 0; tididx < tinfo->tidcnt; tididx++) { |
---|
524 | | - ret = unprogram_rcvarray(fd, tidinfo[tididx], NULL); |
---|
| 546 | + ret = unprogram_rcvarray(fd, tidinfo[tididx]); |
---|
525 | 547 | if (ret) { |
---|
526 | 548 | hfi1_cdbg(TID, "Failed to unprogram rcv array %d", |
---|
527 | 549 | ret); |
---|
.. | .. |
---|
776 | 798 | return -EFAULT; |
---|
777 | 799 | } |
---|
778 | 800 | |
---|
779 | | - node->mmu.addr = tbuf->vaddr + (pageidx * PAGE_SIZE); |
---|
780 | | - node->mmu.len = npages * PAGE_SIZE; |
---|
| 801 | + node->fdata = fd; |
---|
| 802 | + mutex_init(&node->invalidate_mutex); |
---|
781 | 803 | node->phys = page_to_phys(pages[0]); |
---|
782 | 804 | node->npages = npages; |
---|
783 | 805 | node->rcventry = rcventry; |
---|
.. | .. |
---|
786 | 808 | node->freed = false; |
---|
787 | 809 | memcpy(node->pages, pages, sizeof(struct page *) * npages); |
---|
788 | 810 | |
---|
789 | | - if (!fd->handler) |
---|
790 | | - ret = tid_rb_insert(fd, &node->mmu); |
---|
791 | | - else |
---|
792 | | - ret = hfi1_mmu_rb_insert(fd->handler, &node->mmu); |
---|
793 | | - |
---|
794 | | - if (ret) { |
---|
795 | | - hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d", |
---|
796 | | - node->rcventry, node->mmu.addr, node->phys, ret); |
---|
797 | | - pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE, |
---|
798 | | - PCI_DMA_FROMDEVICE); |
---|
799 | | - kfree(node); |
---|
800 | | - return -EFAULT; |
---|
| 811 | + if (fd->use_mn) { |
---|
| 812 | + ret = mmu_interval_notifier_insert( |
---|
| 813 | + &node->notifier, current->mm, |
---|
| 814 | + tbuf->vaddr + (pageidx * PAGE_SIZE), npages * PAGE_SIZE, |
---|
| 815 | + &tid_mn_ops); |
---|
| 816 | + if (ret) |
---|
| 817 | + goto out_unmap; |
---|
801 | 818 | } |
---|
| 819 | + fd->entry_to_rb[node->rcventry - uctxt->expected_base] = node; |
---|
| 820 | + |
---|
802 | 821 | hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, ilog2(npages) + 1); |
---|
803 | 822 | trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, npages, |
---|
804 | | - node->mmu.addr, node->phys, phys); |
---|
| 823 | + node->notifier.interval_tree.start, node->phys, |
---|
| 824 | + phys); |
---|
805 | 825 | return 0; |
---|
| 826 | + |
---|
| 827 | +out_unmap: |
---|
| 828 | + hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d", |
---|
| 829 | + node->rcventry, node->notifier.interval_tree.start, |
---|
| 830 | + node->phys, ret); |
---|
| 831 | + pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE, |
---|
| 832 | + PCI_DMA_FROMDEVICE); |
---|
| 833 | + kfree(node); |
---|
| 834 | + return -EFAULT; |
---|
806 | 835 | } |
---|
807 | 836 | |
---|
808 | | -static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, |
---|
809 | | - struct tid_group **grp) |
---|
| 837 | +static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo) |
---|
810 | 838 | { |
---|
811 | 839 | struct hfi1_ctxtdata *uctxt = fd->uctxt; |
---|
812 | 840 | struct hfi1_devdata *dd = uctxt->dd; |
---|
.. | .. |
---|
829 | 857 | if (!node || node->rcventry != (uctxt->expected_base + rcventry)) |
---|
830 | 858 | return -EBADF; |
---|
831 | 859 | |
---|
832 | | - if (grp) |
---|
833 | | - *grp = node->grp; |
---|
834 | | - |
---|
835 | | - if (!fd->handler) |
---|
836 | | - cacheless_tid_rb_remove(fd, node); |
---|
837 | | - else |
---|
838 | | - hfi1_mmu_rb_remove(fd->handler, &node->mmu); |
---|
| 860 | + if (fd->use_mn) |
---|
| 861 | + mmu_interval_notifier_remove(&node->notifier); |
---|
| 862 | + cacheless_tid_rb_remove(fd, node); |
---|
839 | 863 | |
---|
840 | 864 | return 0; |
---|
| 865 | +} |
---|
| 866 | + |
---|
| 867 | +static void __clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node) |
---|
| 868 | +{ |
---|
| 869 | + struct hfi1_ctxtdata *uctxt = fd->uctxt; |
---|
| 870 | + struct hfi1_devdata *dd = uctxt->dd; |
---|
| 871 | + |
---|
| 872 | + mutex_lock(&node->invalidate_mutex); |
---|
| 873 | + if (node->freed) |
---|
| 874 | + goto done; |
---|
| 875 | + node->freed = true; |
---|
| 876 | + |
---|
| 877 | + trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry, |
---|
| 878 | + node->npages, |
---|
| 879 | + node->notifier.interval_tree.start, node->phys, |
---|
| 880 | + node->dma_addr); |
---|
| 881 | + |
---|
| 882 | + /* Make sure device has seen the write before pages are unpinned */ |
---|
| 883 | + hfi1_put_tid(dd, node->rcventry, PT_INVALID_FLUSH, 0, 0); |
---|
| 884 | + |
---|
| 885 | + unpin_rcv_pages(fd, NULL, node, 0, node->npages, true); |
---|
| 886 | +done: |
---|
| 887 | + mutex_unlock(&node->invalidate_mutex); |
---|
841 | 888 | } |
---|
842 | 889 | |
---|
843 | 890 | static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node) |
---|
844 | 891 | { |
---|
845 | 892 | struct hfi1_ctxtdata *uctxt = fd->uctxt; |
---|
846 | | - struct hfi1_devdata *dd = uctxt->dd; |
---|
847 | 893 | |
---|
848 | | - trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry, |
---|
849 | | - node->npages, node->mmu.addr, node->phys, |
---|
850 | | - node->dma_addr); |
---|
851 | | - |
---|
852 | | - /* |
---|
853 | | - * Make sure device has seen the write before we unpin the |
---|
854 | | - * pages. |
---|
855 | | - */ |
---|
856 | | - hfi1_put_tid(dd, node->rcventry, PT_INVALID_FLUSH, 0, 0); |
---|
857 | | - |
---|
858 | | - unpin_rcv_pages(fd, NULL, node, 0, node->npages, true); |
---|
| 894 | + __clear_tid_node(fd, node); |
---|
859 | 895 | |
---|
860 | 896 | node->grp->used--; |
---|
861 | 897 | node->grp->map &= ~(1 << (node->rcventry - node->grp->base)); |
---|
.. | .. |
---|
893 | 929 | if (!node || node->rcventry != rcventry) |
---|
894 | 930 | continue; |
---|
895 | 931 | |
---|
| 932 | + if (fd->use_mn) |
---|
| 933 | + mmu_interval_notifier_remove( |
---|
| 934 | + &node->notifier); |
---|
896 | 935 | cacheless_tid_rb_remove(fd, node); |
---|
897 | 936 | } |
---|
898 | 937 | } |
---|
899 | 938 | } |
---|
900 | 939 | } |
---|
901 | 940 | |
---|
902 | | -/* |
---|
903 | | - * Always return 0 from this function. A non-zero return indicates that the |
---|
904 | | - * remove operation will be called and that memory should be unpinned. |
---|
905 | | - * However, the driver cannot unpin out from under PSM. Instead, retain the |
---|
906 | | - * memory (by returning 0) and inform PSM that the memory is going away. PSM |
---|
907 | | - * will call back later when it has removed the memory from its list. |
---|
908 | | - */ |
---|
909 | | -static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode) |
---|
| 941 | +static bool tid_rb_invalidate(struct mmu_interval_notifier *mni, |
---|
| 942 | + const struct mmu_notifier_range *range, |
---|
| 943 | + unsigned long cur_seq) |
---|
910 | 944 | { |
---|
911 | | - struct hfi1_filedata *fdata = arg; |
---|
912 | | - struct hfi1_ctxtdata *uctxt = fdata->uctxt; |
---|
913 | 945 | struct tid_rb_node *node = |
---|
914 | | - container_of(mnode, struct tid_rb_node, mmu); |
---|
| 946 | + container_of(mni, struct tid_rb_node, notifier); |
---|
| 947 | + struct hfi1_filedata *fdata = node->fdata; |
---|
| 948 | + struct hfi1_ctxtdata *uctxt = fdata->uctxt; |
---|
915 | 949 | |
---|
916 | 950 | if (node->freed) |
---|
917 | | - return 0; |
---|
| 951 | + return true; |
---|
918 | 952 | |
---|
919 | | - trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, node->mmu.addr, |
---|
| 953 | + /* take action only if unmapping */ |
---|
| 954 | + if (range->event != MMU_NOTIFY_UNMAP) |
---|
| 955 | + return true; |
---|
| 956 | + |
---|
| 957 | + trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, |
---|
| 958 | + node->notifier.interval_tree.start, |
---|
920 | 959 | node->rcventry, node->npages, node->dma_addr); |
---|
921 | | - node->freed = true; |
---|
| 960 | + |
---|
| 961 | + /* clear the hardware rcvarray entry */ |
---|
| 962 | + __clear_tid_node(fdata, node); |
---|
922 | 963 | |
---|
923 | 964 | spin_lock(&fdata->invalid_lock); |
---|
924 | 965 | if (fdata->invalid_tid_idx < uctxt->expected_count) { |
---|
.. | .. |
---|
945 | 986 | fdata->invalid_tid_idx++; |
---|
946 | 987 | } |
---|
947 | 988 | spin_unlock(&fdata->invalid_lock); |
---|
948 | | - return 0; |
---|
| 989 | + return true; |
---|
949 | 990 | } |
---|
950 | 991 | |
---|
951 | | -static int tid_rb_insert(void *arg, struct mmu_rb_node *node) |
---|
| 992 | +static bool tid_cover_invalidate(struct mmu_interval_notifier *mni, |
---|
| 993 | + const struct mmu_notifier_range *range, |
---|
| 994 | + unsigned long cur_seq) |
---|
952 | 995 | { |
---|
953 | | - struct hfi1_filedata *fdata = arg; |
---|
954 | | - struct tid_rb_node *tnode = |
---|
955 | | - container_of(node, struct tid_rb_node, mmu); |
---|
956 | | - u32 base = fdata->uctxt->expected_base; |
---|
| 996 | + struct tid_user_buf *tidbuf = |
---|
| 997 | + container_of(mni, struct tid_user_buf, notifier); |
---|
957 | 998 | |
---|
958 | | - fdata->entry_to_rb[tnode->rcventry - base] = tnode; |
---|
959 | | - return 0; |
---|
| 999 | + /* take action only if unmapping */ |
---|
| 1000 | + if (range->event == MMU_NOTIFY_UNMAP) { |
---|
| 1001 | + mutex_lock(&tidbuf->cover_mutex); |
---|
| 1002 | + mmu_interval_set_seq(mni, cur_seq); |
---|
| 1003 | + mutex_unlock(&tidbuf->cover_mutex); |
---|
| 1004 | + } |
---|
| 1005 | + |
---|
| 1006 | + return true; |
---|
960 | 1007 | } |
---|
961 | 1008 | |
---|
962 | 1009 | static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, |
---|
.. | .. |
---|
966 | 1013 | |
---|
967 | 1014 | fdata->entry_to_rb[tnode->rcventry - base] = NULL; |
---|
968 | 1015 | clear_tid_node(fdata, tnode); |
---|
969 | | -} |
---|
970 | | - |
---|
971 | | -static void tid_rb_remove(void *arg, struct mmu_rb_node *node) |
---|
972 | | -{ |
---|
973 | | - struct hfi1_filedata *fdata = arg; |
---|
974 | | - struct tid_rb_node *tnode = |
---|
975 | | - container_of(node, struct tid_rb_node, mmu); |
---|
976 | | - |
---|
977 | | - cacheless_tid_rb_remove(fdata, tnode); |
---|
978 | 1016 | } |
---|