| .. | .. |
|---|
| 1 | 1 | /* |
|---|
| 2 | + * Copyright(c) 2020 Cornelis Networks, Inc. |
|---|
| 2 | 3 | * Copyright(c) 2015-2018 Intel Corporation. |
|---|
| 3 | 4 | * |
|---|
| 4 | 5 | * This file is provided under a dual BSD/GPLv2 license. When using or |
|---|
| .. | .. |
|---|
| 59 | 60 | struct tid_user_buf *tbuf, |
|---|
| 60 | 61 | u32 rcventry, struct tid_group *grp, |
|---|
| 61 | 62 | u16 pageidx, unsigned int npages); |
|---|
| 62 | | -static int tid_rb_insert(void *arg, struct mmu_rb_node *node); |
|---|
| 63 | 63 | static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, |
|---|
| 64 | 64 | struct tid_rb_node *tnode); |
|---|
| 65 | | -static void tid_rb_remove(void *arg, struct mmu_rb_node *node); |
|---|
| 66 | | -static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode); |
|---|
| 65 | +static bool tid_rb_invalidate(struct mmu_interval_notifier *mni, |
|---|
| 66 | + const struct mmu_notifier_range *range, |
|---|
| 67 | + unsigned long cur_seq); |
|---|
| 68 | +static bool tid_cover_invalidate(struct mmu_interval_notifier *mni, |
|---|
| 69 | + const struct mmu_notifier_range *range, |
|---|
| 70 | + unsigned long cur_seq); |
|---|
| 67 | 71 | static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *, |
|---|
| 68 | 72 | struct tid_group *grp, |
|---|
| 69 | 73 | unsigned int start, u16 count, |
|---|
| 70 | 74 | u32 *tidlist, unsigned int *tididx, |
|---|
| 71 | 75 | unsigned int *pmapped); |
|---|
| 72 | | -static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, |
|---|
| 73 | | - struct tid_group **grp); |
|---|
| 76 | +static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo); |
|---|
| 77 | +static void __clear_tid_node(struct hfi1_filedata *fd, |
|---|
| 78 | + struct tid_rb_node *node); |
|---|
| 74 | 79 | static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node); |
|---|
| 75 | 80 | |
|---|
| 76 | | -static struct mmu_rb_ops tid_rb_ops = { |
|---|
| 77 | | - .insert = tid_rb_insert, |
|---|
| 78 | | - .remove = tid_rb_remove, |
|---|
| 79 | | - .invalidate = tid_rb_invalidate |
|---|
| 81 | +static const struct mmu_interval_notifier_ops tid_mn_ops = { |
|---|
| 82 | + .invalidate = tid_rb_invalidate, |
|---|
| 83 | +}; |
|---|
| 84 | +static const struct mmu_interval_notifier_ops tid_cover_ops = { |
|---|
| 85 | + .invalidate = tid_cover_invalidate, |
|---|
| 80 | 86 | }; |
|---|
| 81 | 87 | |
|---|
| 82 | 88 | /* |
|---|
| .. | .. |
|---|
| 87 | 93 | int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd, |
|---|
| 88 | 94 | struct hfi1_ctxtdata *uctxt) |
|---|
| 89 | 95 | { |
|---|
| 90 | | - struct hfi1_devdata *dd = uctxt->dd; |
|---|
| 91 | 96 | int ret = 0; |
|---|
| 92 | 97 | |
|---|
| 93 | 98 | fd->entry_to_rb = kcalloc(uctxt->expected_count, |
|---|
| .. | .. |
|---|
| 106 | 111 | fd->entry_to_rb = NULL; |
|---|
| 107 | 112 | return -ENOMEM; |
|---|
| 108 | 113 | } |
|---|
| 109 | | - |
|---|
| 110 | | - /* |
|---|
| 111 | | - * Register MMU notifier callbacks. If the registration |
|---|
| 112 | | - * fails, continue without TID caching for this context. |
|---|
| 113 | | - */ |
|---|
| 114 | | - ret = hfi1_mmu_rb_register(fd, fd->mm, &tid_rb_ops, |
|---|
| 115 | | - dd->pport->hfi1_wq, |
|---|
| 116 | | - &fd->handler); |
|---|
| 117 | | - if (ret) { |
|---|
| 118 | | - dd_dev_info(dd, |
|---|
| 119 | | - "Failed MMU notifier registration %d\n", |
|---|
| 120 | | - ret); |
|---|
| 121 | | - ret = 0; |
|---|
| 122 | | - } |
|---|
| 114 | + fd->use_mn = true; |
|---|
| 123 | 115 | } |
|---|
| 124 | 116 | |
|---|
| 125 | 117 | /* |
|---|
| .. | .. |
|---|
| 136 | 128 | * init. |
|---|
| 137 | 129 | */ |
|---|
| 138 | 130 | spin_lock(&fd->tid_lock); |
|---|
| 139 | | - if (uctxt->subctxt_cnt && fd->handler) { |
|---|
| 131 | + if (uctxt->subctxt_cnt && fd->use_mn) { |
|---|
| 140 | 132 | u16 remainder; |
|---|
| 141 | 133 | |
|---|
| 142 | 134 | fd->tid_limit = uctxt->expected_count / uctxt->subctxt_cnt; |
|---|
| .. | .. |
|---|
| 155 | 147 | { |
|---|
| 156 | 148 | struct hfi1_ctxtdata *uctxt = fd->uctxt; |
|---|
| 157 | 149 | |
|---|
| 158 | | - /* |
|---|
| 159 | | - * The notifier would have been removed when the process'es mm |
|---|
| 160 | | - * was freed. |
|---|
| 161 | | - */ |
|---|
| 162 | | - if (fd->handler) { |
|---|
| 163 | | - hfi1_mmu_rb_unregister(fd->handler); |
|---|
| 164 | | - } else { |
|---|
| 165 | | - mutex_lock(&uctxt->exp_mutex); |
|---|
| 166 | | - if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list)) |
|---|
| 167 | | - unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd); |
|---|
| 168 | | - if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list)) |
|---|
| 169 | | - unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd); |
|---|
| 170 | | - mutex_unlock(&uctxt->exp_mutex); |
|---|
| 171 | | - } |
|---|
| 150 | + mutex_lock(&uctxt->exp_mutex); |
|---|
| 151 | + if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list)) |
|---|
| 152 | + unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd); |
|---|
| 153 | + if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list)) |
|---|
| 154 | + unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd); |
|---|
| 155 | + mutex_unlock(&uctxt->exp_mutex); |
|---|
| 172 | 156 | |
|---|
| 173 | 157 | kfree(fd->invalid_tids); |
|---|
| 174 | 158 | fd->invalid_tids = NULL; |
|---|
| .. | .. |
|---|
| 197 | 181 | { |
|---|
| 198 | 182 | struct page **pages; |
|---|
| 199 | 183 | struct hfi1_devdata *dd = fd->uctxt->dd; |
|---|
| 184 | + struct mm_struct *mm; |
|---|
| 200 | 185 | |
|---|
| 201 | 186 | if (mapped) { |
|---|
| 202 | 187 | pci_unmap_single(dd->pcidev, node->dma_addr, |
|---|
| 203 | | - node->mmu.len, PCI_DMA_FROMDEVICE); |
|---|
| 188 | + node->npages * PAGE_SIZE, PCI_DMA_FROMDEVICE); |
|---|
| 204 | 189 | pages = &node->pages[idx]; |
|---|
| 190 | + mm = mm_from_tid_node(node); |
|---|
| 205 | 191 | } else { |
|---|
| 206 | 192 | pages = &tidbuf->pages[idx]; |
|---|
| 193 | + mm = current->mm; |
|---|
| 207 | 194 | } |
|---|
| 208 | | - hfi1_release_user_pages(fd->mm, pages, npages, mapped); |
|---|
| 195 | + hfi1_release_user_pages(mm, pages, npages, mapped); |
|---|
| 209 | 196 | fd->tid_n_pinned -= npages; |
|---|
| 210 | 197 | } |
|---|
| 211 | 198 | |
|---|
| .. | .. |
|---|
| 215 | 202 | static int pin_rcv_pages(struct hfi1_filedata *fd, struct tid_user_buf *tidbuf) |
|---|
| 216 | 203 | { |
|---|
| 217 | 204 | int pinned; |
|---|
| 218 | | - unsigned int npages; |
|---|
| 205 | + unsigned int npages = tidbuf->npages; |
|---|
| 219 | 206 | unsigned long vaddr = tidbuf->vaddr; |
|---|
| 220 | 207 | struct page **pages = NULL; |
|---|
| 221 | 208 | struct hfi1_devdata *dd = fd->uctxt->dd; |
|---|
| 222 | | - |
|---|
| 223 | | - /* Get the number of pages the user buffer spans */ |
|---|
| 224 | | - npages = num_user_pages(vaddr, tidbuf->length); |
|---|
| 225 | | - if (!npages) |
|---|
| 226 | | - return -EINVAL; |
|---|
| 227 | 209 | |
|---|
| 228 | 210 | if (npages > fd->uctxt->expected_count) { |
|---|
| 229 | 211 | dd_dev_err(dd, "Expected buffer too big\n"); |
|---|
| 230 | 212 | return -EINVAL; |
|---|
| 231 | 213 | } |
|---|
| 232 | 214 | |
|---|
| 233 | | - /* Verify that access is OK for the user buffer */ |
|---|
| 234 | | - if (!access_ok(VERIFY_WRITE, (void __user *)vaddr, |
|---|
| 235 | | - npages * PAGE_SIZE)) { |
|---|
| 236 | | - dd_dev_err(dd, "Fail vaddr %p, %u pages, !access_ok\n", |
|---|
| 237 | | - (void *)vaddr, npages); |
|---|
| 238 | | - return -EFAULT; |
|---|
| 239 | | - } |
|---|
| 240 | 215 | /* Allocate the array of struct page pointers needed for pinning */ |
|---|
| 241 | 216 | pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL); |
|---|
| 242 | 217 | if (!pages) |
|---|
| .. | .. |
|---|
| 247 | 222 | * pages, accept the amount pinned so far and program only that. |
|---|
| 248 | 223 | * User space knows how to deal with partially programmed buffers. |
|---|
| 249 | 224 | */ |
|---|
| 250 | | - if (!hfi1_can_pin_pages(dd, fd->mm, fd->tid_n_pinned, npages)) { |
|---|
| 225 | + if (!hfi1_can_pin_pages(dd, current->mm, fd->tid_n_pinned, npages)) { |
|---|
| 251 | 226 | kfree(pages); |
|---|
| 252 | 227 | return -ENOMEM; |
|---|
| 253 | 228 | } |
|---|
| 254 | 229 | |
|---|
| 255 | | - pinned = hfi1_acquire_user_pages(fd->mm, vaddr, npages, true, pages); |
|---|
| 230 | + pinned = hfi1_acquire_user_pages(current->mm, vaddr, npages, true, pages); |
|---|
| 256 | 231 | if (pinned <= 0) { |
|---|
| 257 | 232 | kfree(pages); |
|---|
| 258 | 233 | return pinned; |
|---|
| 259 | 234 | } |
|---|
| 260 | 235 | tidbuf->pages = pages; |
|---|
| 261 | | - tidbuf->npages = npages; |
|---|
| 262 | 236 | fd->tid_n_pinned += pinned; |
|---|
| 263 | 237 | return pinned; |
|---|
| 264 | 238 | } |
|---|
| .. | .. |
|---|
| 322 | 296 | tididx = 0, mapped, mapped_pages = 0; |
|---|
| 323 | 297 | u32 *tidlist = NULL; |
|---|
| 324 | 298 | struct tid_user_buf *tidbuf; |
|---|
| 299 | + unsigned long mmu_seq = 0; |
|---|
| 325 | 300 | |
|---|
| 326 | 301 | if (!PAGE_ALIGNED(tinfo->vaddr)) |
|---|
| 302 | + return -EINVAL; |
|---|
| 303 | + if (tinfo->length == 0) |
|---|
| 327 | 304 | return -EINVAL; |
|---|
| 328 | 305 | |
|---|
| 329 | 306 | tidbuf = kzalloc(sizeof(*tidbuf), GFP_KERNEL); |
|---|
| 330 | 307 | if (!tidbuf) |
|---|
| 331 | 308 | return -ENOMEM; |
|---|
| 332 | 309 | |
|---|
| 310 | + mutex_init(&tidbuf->cover_mutex); |
|---|
| 333 | 311 | tidbuf->vaddr = tinfo->vaddr; |
|---|
| 334 | 312 | tidbuf->length = tinfo->length; |
|---|
| 313 | + tidbuf->npages = num_user_pages(tidbuf->vaddr, tidbuf->length); |
|---|
| 335 | 314 | tidbuf->psets = kcalloc(uctxt->expected_count, sizeof(*tidbuf->psets), |
|---|
| 336 | 315 | GFP_KERNEL); |
|---|
| 337 | 316 | if (!tidbuf->psets) { |
|---|
| 338 | | - kfree(tidbuf); |
|---|
| 339 | | - return -ENOMEM; |
|---|
| 317 | + ret = -ENOMEM; |
|---|
| 318 | + goto fail_release_mem; |
|---|
| 319 | + } |
|---|
| 320 | + |
|---|
| 321 | + if (fd->use_mn) { |
|---|
| 322 | + ret = mmu_interval_notifier_insert( |
|---|
| 323 | + &tidbuf->notifier, current->mm, |
|---|
| 324 | + tidbuf->vaddr, tidbuf->npages * PAGE_SIZE, |
|---|
| 325 | + &tid_cover_ops); |
|---|
| 326 | + if (ret) |
|---|
| 327 | + goto fail_release_mem; |
|---|
| 328 | + mmu_seq = mmu_interval_read_begin(&tidbuf->notifier); |
|---|
| 340 | 329 | } |
|---|
| 341 | 330 | |
|---|
| 342 | 331 | pinned = pin_rcv_pages(fd, tidbuf); |
|---|
| 343 | 332 | if (pinned <= 0) { |
|---|
| 344 | | - kfree(tidbuf->psets); |
|---|
| 345 | | - kfree(tidbuf); |
|---|
| 346 | | - return pinned; |
|---|
| 333 | + ret = (pinned < 0) ? pinned : -ENOSPC; |
|---|
| 334 | + goto fail_unpin; |
|---|
| 347 | 335 | } |
|---|
| 348 | 336 | |
|---|
| 349 | 337 | /* Find sets of physically contiguous pages */ |
|---|
| 350 | 338 | tidbuf->n_psets = find_phys_blocks(tidbuf, pinned); |
|---|
| 351 | 339 | |
|---|
| 352 | | - /* |
|---|
| 353 | | - * We don't need to access this under a lock since tid_used is per |
|---|
| 354 | | - * process and the same process cannot be in hfi1_user_exp_rcv_clear() |
|---|
| 355 | | - * and hfi1_user_exp_rcv_setup() at the same time. |
|---|
| 356 | | - */ |
|---|
| 340 | + /* Reserve the number of expected tids to be used. */ |
|---|
| 357 | 341 | spin_lock(&fd->tid_lock); |
|---|
| 358 | 342 | if (fd->tid_used + tidbuf->n_psets > fd->tid_limit) |
|---|
| 359 | 343 | pageset_count = fd->tid_limit - fd->tid_used; |
|---|
| 360 | 344 | else |
|---|
| 361 | 345 | pageset_count = tidbuf->n_psets; |
|---|
| 346 | + fd->tid_used += pageset_count; |
|---|
| 362 | 347 | spin_unlock(&fd->tid_lock); |
|---|
| 363 | 348 | |
|---|
| 364 | | - if (!pageset_count) |
|---|
| 365 | | - goto bail; |
|---|
| 349 | + if (!pageset_count) { |
|---|
| 350 | + ret = -ENOSPC; |
|---|
| 351 | + goto fail_unreserve; |
|---|
| 352 | + } |
|---|
| 366 | 353 | |
|---|
| 367 | 354 | ngroups = pageset_count / dd->rcv_entries.group_size; |
|---|
| 368 | 355 | tidlist = kcalloc(pageset_count, sizeof(*tidlist), GFP_KERNEL); |
|---|
| 369 | 356 | if (!tidlist) { |
|---|
| 370 | 357 | ret = -ENOMEM; |
|---|
| 371 | | - goto nomem; |
|---|
| 358 | + goto fail_unreserve; |
|---|
| 372 | 359 | } |
|---|
| 373 | 360 | |
|---|
| 374 | 361 | tididx = 0; |
|---|
| .. | .. |
|---|
| 464 | 451 | } |
|---|
| 465 | 452 | unlock: |
|---|
| 466 | 453 | mutex_unlock(&uctxt->exp_mutex); |
|---|
| 467 | | -nomem: |
|---|
| 468 | 454 | hfi1_cdbg(TID, "total mapped: tidpairs:%u pages:%u (%d)", tididx, |
|---|
| 469 | 455 | mapped_pages, ret); |
|---|
| 470 | | - if (tididx) { |
|---|
| 471 | | - spin_lock(&fd->tid_lock); |
|---|
| 472 | | - fd->tid_used += tididx; |
|---|
| 473 | | - spin_unlock(&fd->tid_lock); |
|---|
| 474 | | - tinfo->tidcnt = tididx; |
|---|
| 475 | | - tinfo->length = mapped_pages * PAGE_SIZE; |
|---|
| 476 | 456 | |
|---|
| 477 | | - if (copy_to_user(u64_to_user_ptr(tinfo->tidlist), |
|---|
| 478 | | - tidlist, sizeof(tidlist[0]) * tididx)) { |
|---|
| 479 | | - /* |
|---|
| 480 | | - * On failure to copy to the user level, we need to undo |
|---|
| 481 | | - * everything done so far so we don't leak resources. |
|---|
| 482 | | - */ |
|---|
| 483 | | - tinfo->tidlist = (unsigned long)&tidlist; |
|---|
| 484 | | - hfi1_user_exp_rcv_clear(fd, tinfo); |
|---|
| 485 | | - tinfo->tidlist = 0; |
|---|
| 486 | | - ret = -EFAULT; |
|---|
| 487 | | - goto bail; |
|---|
| 457 | + /* fail if nothing was programmed, set error if none provided */ |
|---|
| 458 | + if (tididx == 0) { |
|---|
| 459 | + if (ret >= 0) |
|---|
| 460 | + ret = -ENOSPC; |
|---|
| 461 | + goto fail_unreserve; |
|---|
| 462 | + } |
|---|
| 463 | + |
|---|
| 464 | + /* adjust reserved tid_used to actual count */ |
|---|
| 465 | + spin_lock(&fd->tid_lock); |
|---|
| 466 | + fd->tid_used -= pageset_count - tididx; |
|---|
| 467 | + spin_unlock(&fd->tid_lock); |
|---|
| 468 | + |
|---|
| 469 | + /* unpin all pages not covered by a TID */ |
|---|
| 470 | + unpin_rcv_pages(fd, tidbuf, NULL, mapped_pages, pinned - mapped_pages, |
|---|
| 471 | + false); |
|---|
| 472 | + |
|---|
| 473 | + if (fd->use_mn) { |
|---|
| 474 | + /* check for an invalidate during setup */ |
|---|
| 475 | + bool fail = false; |
|---|
| 476 | + |
|---|
| 477 | + mutex_lock(&tidbuf->cover_mutex); |
|---|
| 478 | + fail = mmu_interval_read_retry(&tidbuf->notifier, mmu_seq); |
|---|
| 479 | + mutex_unlock(&tidbuf->cover_mutex); |
|---|
| 480 | + |
|---|
| 481 | + if (fail) { |
|---|
| 482 | + ret = -EBUSY; |
|---|
| 483 | + goto fail_unprogram; |
|---|
| 488 | 484 | } |
|---|
| 489 | 485 | } |
|---|
| 490 | 486 | |
|---|
| 491 | | - /* |
|---|
| 492 | | - * If not everything was mapped (due to insufficient RcvArray entries, |
|---|
| 493 | | - * for example), unpin all unmapped pages so we can pin them nex time. |
|---|
| 494 | | - */ |
|---|
| 495 | | - if (mapped_pages != pinned) |
|---|
| 496 | | - unpin_rcv_pages(fd, tidbuf, NULL, mapped_pages, |
|---|
| 497 | | - (pinned - mapped_pages), false); |
|---|
| 498 | | -bail: |
|---|
| 499 | | - kfree(tidbuf->psets); |
|---|
| 500 | | - kfree(tidlist); |
|---|
| 487 | + tinfo->tidcnt = tididx; |
|---|
| 488 | + tinfo->length = mapped_pages * PAGE_SIZE; |
|---|
| 489 | + |
|---|
| 490 | + if (copy_to_user(u64_to_user_ptr(tinfo->tidlist), |
|---|
| 491 | + tidlist, sizeof(tidlist[0]) * tididx)) { |
|---|
| 492 | + ret = -EFAULT; |
|---|
| 493 | + goto fail_unprogram; |
|---|
| 494 | + } |
|---|
| 495 | + |
|---|
| 496 | + if (fd->use_mn) |
|---|
| 497 | + mmu_interval_notifier_remove(&tidbuf->notifier); |
|---|
| 501 | 498 | kfree(tidbuf->pages); |
|---|
| 499 | + kfree(tidbuf->psets); |
|---|
| 502 | 500 | kfree(tidbuf); |
|---|
| 503 | | - return ret > 0 ? 0 : ret; |
|---|
| 501 | + kfree(tidlist); |
|---|
| 502 | + return 0; |
|---|
| 503 | + |
|---|
| 504 | +fail_unprogram: |
|---|
| 505 | + /* unprogram, unmap, and unpin all allocated TIDs */ |
|---|
| 506 | + tinfo->tidlist = (unsigned long)tidlist; |
|---|
| 507 | + hfi1_user_exp_rcv_clear(fd, tinfo); |
|---|
| 508 | + tinfo->tidlist = 0; |
|---|
| 509 | + pinned = 0; /* nothing left to unpin */ |
|---|
| 510 | + pageset_count = 0; /* nothing left reserved */ |
|---|
| 511 | +fail_unreserve: |
|---|
| 512 | + spin_lock(&fd->tid_lock); |
|---|
| 513 | + fd->tid_used -= pageset_count; |
|---|
| 514 | + spin_unlock(&fd->tid_lock); |
|---|
| 515 | +fail_unpin: |
|---|
| 516 | + if (fd->use_mn) |
|---|
| 517 | + mmu_interval_notifier_remove(&tidbuf->notifier); |
|---|
| 518 | + if (pinned > 0) |
|---|
| 519 | + unpin_rcv_pages(fd, tidbuf, NULL, 0, pinned, false); |
|---|
| 520 | +fail_release_mem: |
|---|
| 521 | + kfree(tidbuf->pages); |
|---|
| 522 | + kfree(tidbuf->psets); |
|---|
| 523 | + kfree(tidbuf); |
|---|
| 524 | + kfree(tidlist); |
|---|
| 525 | + return ret; |
|---|
| 504 | 526 | } |
|---|
| 505 | 527 | |
|---|
| 506 | 528 | int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd, |
|---|
| .. | .. |
|---|
| 521 | 543 | |
|---|
| 522 | 544 | mutex_lock(&uctxt->exp_mutex); |
|---|
| 523 | 545 | for (tididx = 0; tididx < tinfo->tidcnt; tididx++) { |
|---|
| 524 | | - ret = unprogram_rcvarray(fd, tidinfo[tididx], NULL); |
|---|
| 546 | + ret = unprogram_rcvarray(fd, tidinfo[tididx]); |
|---|
| 525 | 547 | if (ret) { |
|---|
| 526 | 548 | hfi1_cdbg(TID, "Failed to unprogram rcv array %d", |
|---|
| 527 | 549 | ret); |
|---|
| .. | .. |
|---|
| 776 | 798 | return -EFAULT; |
|---|
| 777 | 799 | } |
|---|
| 778 | 800 | |
|---|
| 779 | | - node->mmu.addr = tbuf->vaddr + (pageidx * PAGE_SIZE); |
|---|
| 780 | | - node->mmu.len = npages * PAGE_SIZE; |
|---|
| 801 | + node->fdata = fd; |
|---|
| 802 | + mutex_init(&node->invalidate_mutex); |
|---|
| 781 | 803 | node->phys = page_to_phys(pages[0]); |
|---|
| 782 | 804 | node->npages = npages; |
|---|
| 783 | 805 | node->rcventry = rcventry; |
|---|
| .. | .. |
|---|
| 786 | 808 | node->freed = false; |
|---|
| 787 | 809 | memcpy(node->pages, pages, sizeof(struct page *) * npages); |
|---|
| 788 | 810 | |
|---|
| 789 | | - if (!fd->handler) |
|---|
| 790 | | - ret = tid_rb_insert(fd, &node->mmu); |
|---|
| 791 | | - else |
|---|
| 792 | | - ret = hfi1_mmu_rb_insert(fd->handler, &node->mmu); |
|---|
| 793 | | - |
|---|
| 794 | | - if (ret) { |
|---|
| 795 | | - hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d", |
|---|
| 796 | | - node->rcventry, node->mmu.addr, node->phys, ret); |
|---|
| 797 | | - pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE, |
|---|
| 798 | | - PCI_DMA_FROMDEVICE); |
|---|
| 799 | | - kfree(node); |
|---|
| 800 | | - return -EFAULT; |
|---|
| 811 | + if (fd->use_mn) { |
|---|
| 812 | + ret = mmu_interval_notifier_insert( |
|---|
| 813 | + &node->notifier, current->mm, |
|---|
| 814 | + tbuf->vaddr + (pageidx * PAGE_SIZE), npages * PAGE_SIZE, |
|---|
| 815 | + &tid_mn_ops); |
|---|
| 816 | + if (ret) |
|---|
| 817 | + goto out_unmap; |
|---|
| 801 | 818 | } |
|---|
| 819 | + fd->entry_to_rb[node->rcventry - uctxt->expected_base] = node; |
|---|
| 820 | + |
|---|
| 802 | 821 | hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, ilog2(npages) + 1); |
|---|
| 803 | 822 | trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, npages, |
|---|
| 804 | | - node->mmu.addr, node->phys, phys); |
|---|
| 823 | + node->notifier.interval_tree.start, node->phys, |
|---|
| 824 | + phys); |
|---|
| 805 | 825 | return 0; |
|---|
| 826 | + |
|---|
| 827 | +out_unmap: |
|---|
| 828 | + hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d", |
|---|
| 829 | + node->rcventry, node->notifier.interval_tree.start, |
|---|
| 830 | + node->phys, ret); |
|---|
| 831 | + pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE, |
|---|
| 832 | + PCI_DMA_FROMDEVICE); |
|---|
| 833 | + kfree(node); |
|---|
| 834 | + return -EFAULT; |
|---|
| 806 | 835 | } |
|---|
| 807 | 836 | |
|---|
| 808 | | -static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, |
|---|
| 809 | | - struct tid_group **grp) |
|---|
| 837 | +static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo) |
|---|
| 810 | 838 | { |
|---|
| 811 | 839 | struct hfi1_ctxtdata *uctxt = fd->uctxt; |
|---|
| 812 | 840 | struct hfi1_devdata *dd = uctxt->dd; |
|---|
| .. | .. |
|---|
| 829 | 857 | if (!node || node->rcventry != (uctxt->expected_base + rcventry)) |
|---|
| 830 | 858 | return -EBADF; |
|---|
| 831 | 859 | |
|---|
| 832 | | - if (grp) |
|---|
| 833 | | - *grp = node->grp; |
|---|
| 834 | | - |
|---|
| 835 | | - if (!fd->handler) |
|---|
| 836 | | - cacheless_tid_rb_remove(fd, node); |
|---|
| 837 | | - else |
|---|
| 838 | | - hfi1_mmu_rb_remove(fd->handler, &node->mmu); |
|---|
| 860 | + if (fd->use_mn) |
|---|
| 861 | + mmu_interval_notifier_remove(&node->notifier); |
|---|
| 862 | + cacheless_tid_rb_remove(fd, node); |
|---|
| 839 | 863 | |
|---|
| 840 | 864 | return 0; |
|---|
| 865 | +} |
|---|
| 866 | + |
|---|
| 867 | +static void __clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node) |
|---|
| 868 | +{ |
|---|
| 869 | + struct hfi1_ctxtdata *uctxt = fd->uctxt; |
|---|
| 870 | + struct hfi1_devdata *dd = uctxt->dd; |
|---|
| 871 | + |
|---|
| 872 | + mutex_lock(&node->invalidate_mutex); |
|---|
| 873 | + if (node->freed) |
|---|
| 874 | + goto done; |
|---|
| 875 | + node->freed = true; |
|---|
| 876 | + |
|---|
| 877 | + trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry, |
|---|
| 878 | + node->npages, |
|---|
| 879 | + node->notifier.interval_tree.start, node->phys, |
|---|
| 880 | + node->dma_addr); |
|---|
| 881 | + |
|---|
| 882 | + /* Make sure device has seen the write before pages are unpinned */ |
|---|
| 883 | + hfi1_put_tid(dd, node->rcventry, PT_INVALID_FLUSH, 0, 0); |
|---|
| 884 | + |
|---|
| 885 | + unpin_rcv_pages(fd, NULL, node, 0, node->npages, true); |
|---|
| 886 | +done: |
|---|
| 887 | + mutex_unlock(&node->invalidate_mutex); |
|---|
| 841 | 888 | } |
|---|
| 842 | 889 | |
|---|
| 843 | 890 | static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node) |
|---|
| 844 | 891 | { |
|---|
| 845 | 892 | struct hfi1_ctxtdata *uctxt = fd->uctxt; |
|---|
| 846 | | - struct hfi1_devdata *dd = uctxt->dd; |
|---|
| 847 | 893 | |
|---|
| 848 | | - trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry, |
|---|
| 849 | | - node->npages, node->mmu.addr, node->phys, |
|---|
| 850 | | - node->dma_addr); |
|---|
| 851 | | - |
|---|
| 852 | | - /* |
|---|
| 853 | | - * Make sure device has seen the write before we unpin the |
|---|
| 854 | | - * pages. |
|---|
| 855 | | - */ |
|---|
| 856 | | - hfi1_put_tid(dd, node->rcventry, PT_INVALID_FLUSH, 0, 0); |
|---|
| 857 | | - |
|---|
| 858 | | - unpin_rcv_pages(fd, NULL, node, 0, node->npages, true); |
|---|
| 894 | + __clear_tid_node(fd, node); |
|---|
| 859 | 895 | |
|---|
| 860 | 896 | node->grp->used--; |
|---|
| 861 | 897 | node->grp->map &= ~(1 << (node->rcventry - node->grp->base)); |
|---|
| .. | .. |
|---|
| 893 | 929 | if (!node || node->rcventry != rcventry) |
|---|
| 894 | 930 | continue; |
|---|
| 895 | 931 | |
|---|
| 932 | + if (fd->use_mn) |
|---|
| 933 | + mmu_interval_notifier_remove( |
|---|
| 934 | + &node->notifier); |
|---|
| 896 | 935 | cacheless_tid_rb_remove(fd, node); |
|---|
| 897 | 936 | } |
|---|
| 898 | 937 | } |
|---|
| 899 | 938 | } |
|---|
| 900 | 939 | } |
|---|
| 901 | 940 | |
|---|
| 902 | | -/* |
|---|
| 903 | | - * Always return 0 from this function. A non-zero return indicates that the |
|---|
| 904 | | - * remove operation will be called and that memory should be unpinned. |
|---|
| 905 | | - * However, the driver cannot unpin out from under PSM. Instead, retain the |
|---|
| 906 | | - * memory (by returning 0) and inform PSM that the memory is going away. PSM |
|---|
| 907 | | - * will call back later when it has removed the memory from its list. |
|---|
| 908 | | - */ |
|---|
| 909 | | -static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode) |
|---|
| 941 | +static bool tid_rb_invalidate(struct mmu_interval_notifier *mni, |
|---|
| 942 | + const struct mmu_notifier_range *range, |
|---|
| 943 | + unsigned long cur_seq) |
|---|
| 910 | 944 | { |
|---|
| 911 | | - struct hfi1_filedata *fdata = arg; |
|---|
| 912 | | - struct hfi1_ctxtdata *uctxt = fdata->uctxt; |
|---|
| 913 | 945 | struct tid_rb_node *node = |
|---|
| 914 | | - container_of(mnode, struct tid_rb_node, mmu); |
|---|
| 946 | + container_of(mni, struct tid_rb_node, notifier); |
|---|
| 947 | + struct hfi1_filedata *fdata = node->fdata; |
|---|
| 948 | + struct hfi1_ctxtdata *uctxt = fdata->uctxt; |
|---|
| 915 | 949 | |
|---|
| 916 | 950 | if (node->freed) |
|---|
| 917 | | - return 0; |
|---|
| 951 | + return true; |
|---|
| 918 | 952 | |
|---|
| 919 | | - trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, node->mmu.addr, |
|---|
| 953 | + /* take action only if unmapping */ |
|---|
| 954 | + if (range->event != MMU_NOTIFY_UNMAP) |
|---|
| 955 | + return true; |
|---|
| 956 | + |
|---|
| 957 | + trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, |
|---|
| 958 | + node->notifier.interval_tree.start, |
|---|
| 920 | 959 | node->rcventry, node->npages, node->dma_addr); |
|---|
| 921 | | - node->freed = true; |
|---|
| 960 | + |
|---|
| 961 | + /* clear the hardware rcvarray entry */ |
|---|
| 962 | + __clear_tid_node(fdata, node); |
|---|
| 922 | 963 | |
|---|
| 923 | 964 | spin_lock(&fdata->invalid_lock); |
|---|
| 924 | 965 | if (fdata->invalid_tid_idx < uctxt->expected_count) { |
|---|
| .. | .. |
|---|
| 945 | 986 | fdata->invalid_tid_idx++; |
|---|
| 946 | 987 | } |
|---|
| 947 | 988 | spin_unlock(&fdata->invalid_lock); |
|---|
| 948 | | - return 0; |
|---|
| 989 | + return true; |
|---|
| 949 | 990 | } |
|---|
| 950 | 991 | |
|---|
| 951 | | -static int tid_rb_insert(void *arg, struct mmu_rb_node *node) |
|---|
| 992 | +static bool tid_cover_invalidate(struct mmu_interval_notifier *mni, |
|---|
| 993 | + const struct mmu_notifier_range *range, |
|---|
| 994 | + unsigned long cur_seq) |
|---|
| 952 | 995 | { |
|---|
| 953 | | - struct hfi1_filedata *fdata = arg; |
|---|
| 954 | | - struct tid_rb_node *tnode = |
|---|
| 955 | | - container_of(node, struct tid_rb_node, mmu); |
|---|
| 956 | | - u32 base = fdata->uctxt->expected_base; |
|---|
| 996 | + struct tid_user_buf *tidbuf = |
|---|
| 997 | + container_of(mni, struct tid_user_buf, notifier); |
|---|
| 957 | 998 | |
|---|
| 958 | | - fdata->entry_to_rb[tnode->rcventry - base] = tnode; |
|---|
| 959 | | - return 0; |
|---|
| 999 | + /* take action only if unmapping */ |
|---|
| 1000 | + if (range->event == MMU_NOTIFY_UNMAP) { |
|---|
| 1001 | + mutex_lock(&tidbuf->cover_mutex); |
|---|
| 1002 | + mmu_interval_set_seq(mni, cur_seq); |
|---|
| 1003 | + mutex_unlock(&tidbuf->cover_mutex); |
|---|
| 1004 | + } |
|---|
| 1005 | + |
|---|
| 1006 | + return true; |
|---|
| 960 | 1007 | } |
|---|
| 961 | 1008 | |
|---|
| 962 | 1009 | static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, |
|---|
| .. | .. |
|---|
| 966 | 1013 | |
|---|
| 967 | 1014 | fdata->entry_to_rb[tnode->rcventry - base] = NULL; |
|---|
| 968 | 1015 | clear_tid_node(fdata, tnode); |
|---|
| 969 | | -} |
|---|
| 970 | | - |
|---|
| 971 | | -static void tid_rb_remove(void *arg, struct mmu_rb_node *node) |
|---|
| 972 | | -{ |
|---|
| 973 | | - struct hfi1_filedata *fdata = arg; |
|---|
| 974 | | - struct tid_rb_node *tnode = |
|---|
| 975 | | - container_of(node, struct tid_rb_node, mmu); |
|---|
| 976 | | - |
|---|
| 977 | | - cacheless_tid_rb_remove(fdata, tnode); |
|---|
| 978 | 1016 | } |
|---|