| .. | .. |
|---|
| 1 | 1 | /* |
|---|
| 2 | | - * Copyright (c) 2007, 2017 Oracle and/or its affiliates. All rights reserved. |
|---|
| 2 | + * Copyright (c) 2007, 2020 Oracle and/or its affiliates. |
|---|
| 3 | 3 | * |
|---|
| 4 | 4 | * This software is available to you under a choice of one of two |
|---|
| 5 | 5 | * licenses. You may choose to be licensed under the terms of the GNU |
|---|
| .. | .. |
|---|
| 84 | 84 | if (insert) { |
|---|
| 85 | 85 | rb_link_node(&insert->r_rb_node, parent, p); |
|---|
| 86 | 86 | rb_insert_color(&insert->r_rb_node, root); |
|---|
| 87 | | - refcount_inc(&insert->r_refcount); |
|---|
| 87 | + kref_get(&insert->r_kref); |
|---|
| 88 | 88 | } |
|---|
| 89 | 89 | return NULL; |
|---|
| 90 | 90 | } |
|---|
| .. | .. |
|---|
| 99 | 99 | unsigned long flags; |
|---|
| 100 | 100 | |
|---|
| 101 | 101 | rdsdebug("RDS: destroy mr key is %x refcnt %u\n", |
|---|
| 102 | | - mr->r_key, refcount_read(&mr->r_refcount)); |
|---|
| 103 | | - |
|---|
| 104 | | - if (test_and_set_bit(RDS_MR_DEAD, &mr->r_state)) |
|---|
| 105 | | - return; |
|---|
| 102 | + mr->r_key, kref_read(&mr->r_kref)); |
|---|
| 106 | 103 | |
|---|
| 107 | 104 | spin_lock_irqsave(&rs->rs_rdma_lock, flags); |
|---|
| 108 | 105 | if (!RB_EMPTY_NODE(&mr->r_rb_node)) |
|---|
| .. | .. |
|---|
| 115 | 112 | mr->r_trans->free_mr(trans_private, mr->r_invalidate); |
|---|
| 116 | 113 | } |
|---|
| 117 | 114 | |
|---|
| 118 | | -void __rds_put_mr_final(struct rds_mr *mr) |
|---|
| 115 | +void __rds_put_mr_final(struct kref *kref) |
|---|
| 119 | 116 | { |
|---|
| 117 | + struct rds_mr *mr = container_of(kref, struct rds_mr, r_kref); |
|---|
| 118 | + |
|---|
| 120 | 119 | rds_destroy_mr(mr); |
|---|
| 121 | 120 | kfree(mr); |
|---|
| 122 | 121 | } |
|---|
| .. | .. |
|---|
| 140 | 139 | rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys); |
|---|
| 141 | 140 | RB_CLEAR_NODE(&mr->r_rb_node); |
|---|
| 142 | 141 | spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); |
|---|
| 143 | | - rds_destroy_mr(mr); |
|---|
| 144 | | - rds_mr_put(mr); |
|---|
| 142 | + kref_put(&mr->r_kref, __rds_put_mr_final); |
|---|
| 145 | 143 | spin_lock_irqsave(&rs->rs_rdma_lock, flags); |
|---|
| 146 | 144 | } |
|---|
| 147 | 145 | spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); |
|---|
| .. | .. |
|---|
| 156 | 154 | static int rds_pin_pages(unsigned long user_addr, unsigned int nr_pages, |
|---|
| 157 | 155 | struct page **pages, int write) |
|---|
| 158 | 156 | { |
|---|
| 157 | + unsigned int gup_flags = FOLL_LONGTERM; |
|---|
| 159 | 158 | int ret; |
|---|
| 160 | 159 | |
|---|
| 161 | | - ret = get_user_pages_fast(user_addr, nr_pages, write, pages); |
|---|
| 160 | + if (write) |
|---|
| 161 | + gup_flags |= FOLL_WRITE; |
|---|
| 162 | 162 | |
|---|
| 163 | + ret = pin_user_pages_fast(user_addr, nr_pages, gup_flags, pages); |
|---|
| 163 | 164 | if (ret >= 0 && ret < nr_pages) { |
|---|
| 164 | | - while (ret--) |
|---|
| 165 | | - put_page(pages[ret]); |
|---|
| 165 | + unpin_user_pages(pages, ret); |
|---|
| 166 | 166 | ret = -EFAULT; |
|---|
| 167 | 167 | } |
|---|
| 168 | 168 | |
|---|
| .. | .. |
|---|
| 174 | 174 | struct rds_conn_path *cp) |
|---|
| 175 | 175 | { |
|---|
| 176 | 176 | struct rds_mr *mr = NULL, *found; |
|---|
| 177 | + struct scatterlist *sg = NULL; |
|---|
| 177 | 178 | unsigned int nr_pages; |
|---|
| 178 | 179 | struct page **pages = NULL; |
|---|
| 179 | | - struct scatterlist *sg; |
|---|
| 180 | 180 | void *trans_private; |
|---|
| 181 | 181 | unsigned long flags; |
|---|
| 182 | 182 | rds_rdma_cookie_t cookie; |
|---|
| 183 | | - unsigned int nents; |
|---|
| 183 | + unsigned int nents = 0; |
|---|
| 184 | + int need_odp = 0; |
|---|
| 184 | 185 | long i; |
|---|
| 185 | 186 | int ret; |
|---|
| 186 | 187 | |
|---|
| .. | .. |
|---|
| 191 | 192 | |
|---|
| 192 | 193 | if (!rs->rs_transport->get_mr) { |
|---|
| 193 | 194 | ret = -EOPNOTSUPP; |
|---|
| 195 | + goto out; |
|---|
| 196 | + } |
|---|
| 197 | + |
|---|
| 198 | + /* If the combination of the addr and size requested for this memory |
|---|
| 199 | + * region causes an integer overflow, return error. |
|---|
| 200 | + */ |
|---|
| 201 | + if (((args->vec.addr + args->vec.bytes) < args->vec.addr) || |
|---|
| 202 | + PAGE_ALIGN(args->vec.addr + args->vec.bytes) < |
|---|
| 203 | + (args->vec.addr + args->vec.bytes)) { |
|---|
| 204 | + ret = -EINVAL; |
|---|
| 205 | + goto out; |
|---|
| 206 | + } |
|---|
| 207 | + |
|---|
| 208 | + if (!can_do_mlock()) { |
|---|
| 209 | + ret = -EPERM; |
|---|
| 194 | 210 | goto out; |
|---|
| 195 | 211 | } |
|---|
| 196 | 212 | |
|---|
| .. | .. |
|---|
| 224 | 240 | goto out; |
|---|
| 225 | 241 | } |
|---|
| 226 | 242 | |
|---|
| 227 | | - refcount_set(&mr->r_refcount, 1); |
|---|
| 243 | + kref_init(&mr->r_kref); |
|---|
| 228 | 244 | RB_CLEAR_NODE(&mr->r_rb_node); |
|---|
| 229 | 245 | mr->r_trans = rs->rs_transport; |
|---|
| 230 | 246 | mr->r_sock = rs; |
|---|
| .. | .. |
|---|
| 247 | 263 | * the zero page. |
|---|
| 248 | 264 | */ |
|---|
| 249 | 265 | ret = rds_pin_pages(args->vec.addr, nr_pages, pages, 1); |
|---|
| 250 | | - if (ret < 0) |
|---|
| 266 | + if (ret == -EOPNOTSUPP) { |
|---|
| 267 | + need_odp = 1; |
|---|
| 268 | + } else if (ret <= 0) { |
|---|
| 251 | 269 | goto out; |
|---|
| 270 | + } else { |
|---|
| 271 | + nents = ret; |
|---|
| 272 | + sg = kmalloc_array(nents, sizeof(*sg), GFP_KERNEL); |
|---|
| 273 | + if (!sg) { |
|---|
| 274 | + ret = -ENOMEM; |
|---|
| 275 | + goto out; |
|---|
| 276 | + } |
|---|
| 277 | + WARN_ON(!nents); |
|---|
| 278 | + sg_init_table(sg, nents); |
|---|
| 252 | 279 | |
|---|
| 253 | | - nents = ret; |
|---|
| 254 | | - sg = kcalloc(nents, sizeof(*sg), GFP_KERNEL); |
|---|
| 255 | | - if (!sg) { |
|---|
| 256 | | - ret = -ENOMEM; |
|---|
| 257 | | - goto out; |
|---|
| 280 | + /* Stick all pages into the scatterlist */ |
|---|
| 281 | + for (i = 0 ; i < nents; i++) |
|---|
| 282 | + sg_set_page(&sg[i], pages[i], PAGE_SIZE, 0); |
|---|
| 283 | + |
|---|
| 284 | + rdsdebug("RDS: trans_private nents is %u\n", nents); |
|---|
| 258 | 285 | } |
|---|
| 259 | | - WARN_ON(!nents); |
|---|
| 260 | | - sg_init_table(sg, nents); |
|---|
| 261 | | - |
|---|
| 262 | | - /* Stick all pages into the scatterlist */ |
|---|
| 263 | | - for (i = 0 ; i < nents; i++) |
|---|
| 264 | | - sg_set_page(&sg[i], pages[i], PAGE_SIZE, 0); |
|---|
| 265 | | - |
|---|
| 266 | | - rdsdebug("RDS: trans_private nents is %u\n", nents); |
|---|
| 267 | | - |
|---|
| 268 | 286 | /* Obtain a transport specific MR. If this succeeds, the |
|---|
| 269 | 287 | * s/g list is now owned by the MR. |
|---|
| 270 | 288 | * Note that dma_map() implies that pending writes are |
|---|
| 271 | 289 | * flushed to RAM, so no dma_sync is needed here. */ |
|---|
| 272 | | - trans_private = rs->rs_transport->get_mr(sg, nents, rs, |
|---|
| 273 | | - &mr->r_key, |
|---|
| 274 | | - cp ? cp->cp_conn : NULL); |
|---|
| 290 | + trans_private = rs->rs_transport->get_mr( |
|---|
| 291 | + sg, nents, rs, &mr->r_key, cp ? cp->cp_conn : NULL, |
|---|
| 292 | + args->vec.addr, args->vec.bytes, |
|---|
| 293 | + need_odp ? ODP_ZEROBASED : ODP_NOT_NEEDED); |
|---|
| 275 | 294 | |
|---|
| 276 | 295 | if (IS_ERR(trans_private)) { |
|---|
| 277 | | - for (i = 0 ; i < nents; i++) |
|---|
| 278 | | - put_page(sg_page(&sg[i])); |
|---|
| 279 | | - kfree(sg); |
|---|
| 296 | + /* In ODP case, we don't GUP pages, so don't need |
|---|
| 297 | + * to release anything. |
|---|
| 298 | + */ |
|---|
| 299 | + if (!need_odp) { |
|---|
| 300 | + unpin_user_pages(pages, nr_pages); |
|---|
| 301 | + kfree(sg); |
|---|
| 302 | + } |
|---|
| 280 | 303 | ret = PTR_ERR(trans_private); |
|---|
| 281 | 304 | goto out; |
|---|
| 282 | 305 | } |
|---|
| .. | .. |
|---|
| 290 | 313 | * map page aligned regions. So we keep the offset, and build |
|---|
| 291 | 314 | * a 64bit cookie containing <R_Key, offset> and pass that |
|---|
| 292 | 315 | * around. */ |
|---|
| 293 | | - cookie = rds_rdma_make_cookie(mr->r_key, args->vec.addr & ~PAGE_MASK); |
|---|
| 316 | + if (need_odp) |
|---|
| 317 | + cookie = rds_rdma_make_cookie(mr->r_key, 0); |
|---|
| 318 | + else |
|---|
| 319 | + cookie = rds_rdma_make_cookie(mr->r_key, |
|---|
| 320 | + args->vec.addr & ~PAGE_MASK); |
|---|
| 294 | 321 | if (cookie_ret) |
|---|
| 295 | 322 | *cookie_ret = cookie; |
|---|
| 296 | 323 | |
|---|
| 297 | | - if (args->cookie_addr && put_user(cookie, (u64 __user *)(unsigned long) args->cookie_addr)) { |
|---|
| 324 | + if (args->cookie_addr && |
|---|
| 325 | + put_user(cookie, (u64 __user *)(unsigned long)args->cookie_addr)) { |
|---|
| 326 | + if (!need_odp) { |
|---|
| 327 | + unpin_user_pages(pages, nr_pages); |
|---|
| 328 | + kfree(sg); |
|---|
| 329 | + } |
|---|
| 298 | 330 | ret = -EFAULT; |
|---|
| 299 | 331 | goto out; |
|---|
| 300 | 332 | } |
|---|
| .. | .. |
|---|
| 309 | 341 | |
|---|
| 310 | 342 | rdsdebug("RDS: get_mr key is %x\n", mr->r_key); |
|---|
| 311 | 343 | if (mr_ret) { |
|---|
| 312 | | - refcount_inc(&mr->r_refcount); |
|---|
| 344 | + kref_get(&mr->r_kref); |
|---|
| 313 | 345 | *mr_ret = mr; |
|---|
| 314 | 346 | } |
|---|
| 315 | 347 | |
|---|
| .. | .. |
|---|
| 317 | 349 | out: |
|---|
| 318 | 350 | kfree(pages); |
|---|
| 319 | 351 | if (mr) |
|---|
| 320 | | - rds_mr_put(mr); |
|---|
| 352 | + kref_put(&mr->r_kref, __rds_put_mr_final); |
|---|
| 321 | 353 | return ret; |
|---|
| 322 | 354 | } |
|---|
| 323 | 355 | |
|---|
| 324 | | -int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen) |
|---|
| 356 | +int rds_get_mr(struct rds_sock *rs, sockptr_t optval, int optlen) |
|---|
| 325 | 357 | { |
|---|
| 326 | 358 | struct rds_get_mr_args args; |
|---|
| 327 | 359 | |
|---|
| 328 | 360 | if (optlen != sizeof(struct rds_get_mr_args)) |
|---|
| 329 | 361 | return -EINVAL; |
|---|
| 330 | 362 | |
|---|
| 331 | | - if (copy_from_user(&args, (struct rds_get_mr_args __user *)optval, |
|---|
| 332 | | - sizeof(struct rds_get_mr_args))) |
|---|
| 363 | + if (copy_from_sockptr(&args, optval, sizeof(struct rds_get_mr_args))) |
|---|
| 333 | 364 | return -EFAULT; |
|---|
| 334 | 365 | |
|---|
| 335 | 366 | return __rds_rdma_map(rs, &args, NULL, NULL, NULL); |
|---|
| 336 | 367 | } |
|---|
| 337 | 368 | |
|---|
| 338 | | -int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen) |
|---|
| 369 | +int rds_get_mr_for_dest(struct rds_sock *rs, sockptr_t optval, int optlen) |
|---|
| 339 | 370 | { |
|---|
| 340 | 371 | struct rds_get_mr_for_dest_args args; |
|---|
| 341 | 372 | struct rds_get_mr_args new_args; |
|---|
| .. | .. |
|---|
| 343 | 374 | if (optlen != sizeof(struct rds_get_mr_for_dest_args)) |
|---|
| 344 | 375 | return -EINVAL; |
|---|
| 345 | 376 | |
|---|
| 346 | | - if (copy_from_user(&args, (struct rds_get_mr_for_dest_args __user *)optval, |
|---|
| 377 | + if (copy_from_sockptr(&args, optval, |
|---|
| 347 | 378 | sizeof(struct rds_get_mr_for_dest_args))) |
|---|
| 348 | 379 | return -EFAULT; |
|---|
| 349 | 380 | |
|---|
| .. | .. |
|---|
| 362 | 393 | /* |
|---|
| 363 | 394 | * Free the MR indicated by the given R_Key |
|---|
| 364 | 395 | */ |
|---|
| 365 | | -int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen) |
|---|
| 396 | +int rds_free_mr(struct rds_sock *rs, sockptr_t optval, int optlen) |
|---|
| 366 | 397 | { |
|---|
| 367 | 398 | struct rds_free_mr_args args; |
|---|
| 368 | 399 | struct rds_mr *mr; |
|---|
| .. | .. |
|---|
| 371 | 402 | if (optlen != sizeof(struct rds_free_mr_args)) |
|---|
| 372 | 403 | return -EINVAL; |
|---|
| 373 | 404 | |
|---|
| 374 | | - if (copy_from_user(&args, (struct rds_free_mr_args __user *)optval, |
|---|
| 375 | | - sizeof(struct rds_free_mr_args))) |
|---|
| 405 | + if (copy_from_sockptr(&args, optval, sizeof(struct rds_free_mr_args))) |
|---|
| 376 | 406 | return -EFAULT; |
|---|
| 377 | 407 | |
|---|
| 378 | 408 | /* Special case - a null cookie means flush all unused MRs */ |
|---|
| .. | .. |
|---|
| 400 | 430 | if (!mr) |
|---|
| 401 | 431 | return -EINVAL; |
|---|
| 402 | 432 | |
|---|
| 403 | | - /* |
|---|
| 404 | | - * call rds_destroy_mr() ourselves so that we're sure it's done by the time |
|---|
| 405 | | - * we return. If we let rds_mr_put() do it it might not happen until |
|---|
| 406 | | - * someone else drops their ref. |
|---|
| 407 | | - */ |
|---|
| 408 | | - rds_destroy_mr(mr); |
|---|
| 409 | | - rds_mr_put(mr); |
|---|
| 433 | + kref_put(&mr->r_kref, __rds_put_mr_final); |
|---|
| 410 | 434 | return 0; |
|---|
| 411 | 435 | } |
|---|
| 412 | 436 | |
|---|
| .. | .. |
|---|
| 430 | 454 | return; |
|---|
| 431 | 455 | } |
|---|
| 432 | 456 | |
|---|
| 457 | + /* Get a reference so that the MR won't go away before calling |
|---|
| 458 | + * sync_mr() below. |
|---|
| 459 | + */ |
|---|
| 460 | + kref_get(&mr->r_kref); |
|---|
| 461 | + |
|---|
| 462 | + /* If it is going to be freed, remove it from the tree now so |
|---|
| 463 | + * that no other thread can find it and free it. |
|---|
| 464 | + */ |
|---|
| 433 | 465 | if (mr->r_use_once || force) { |
|---|
| 434 | 466 | rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys); |
|---|
| 435 | 467 | RB_CLEAR_NODE(&mr->r_rb_node); |
|---|
| .. | .. |
|---|
| 443 | 475 | if (mr->r_trans->sync_mr) |
|---|
| 444 | 476 | mr->r_trans->sync_mr(mr->r_trans_private, DMA_FROM_DEVICE); |
|---|
| 445 | 477 | |
|---|
| 478 | + /* Release the reference held above. */ |
|---|
| 479 | + kref_put(&mr->r_kref, __rds_put_mr_final); |
|---|
| 480 | + |
|---|
| 446 | 481 | /* If the MR was marked as invalidate, this will |
|---|
| 447 | 482 | * trigger an async flush. */ |
|---|
| 448 | | - if (zot_me) { |
|---|
| 449 | | - rds_destroy_mr(mr); |
|---|
| 450 | | - rds_mr_put(mr); |
|---|
| 451 | | - } |
|---|
| 483 | + if (zot_me) |
|---|
| 484 | + kref_put(&mr->r_kref, __rds_put_mr_final); |
|---|
| 452 | 485 | } |
|---|
| 453 | 486 | |
|---|
| 454 | 487 | void rds_rdma_free_op(struct rm_rdma_op *ro) |
|---|
| 455 | 488 | { |
|---|
| 456 | 489 | unsigned int i; |
|---|
| 457 | 490 | |
|---|
| 458 | | - for (i = 0; i < ro->op_nents; i++) { |
|---|
| 459 | | - struct page *page = sg_page(&ro->op_sg[i]); |
|---|
| 491 | + if (ro->op_odp_mr) { |
|---|
| 492 | + kref_put(&ro->op_odp_mr->r_kref, __rds_put_mr_final); |
|---|
| 493 | + } else { |
|---|
| 494 | + for (i = 0; i < ro->op_nents; i++) { |
|---|
| 495 | + struct page *page = sg_page(&ro->op_sg[i]); |
|---|
| 460 | 496 | |
|---|
| 461 | | - /* Mark page dirty if it was possibly modified, which |
|---|
| 462 | | - * is the case for a RDMA_READ which copies from remote |
|---|
| 463 | | - * to local memory */ |
|---|
| 464 | | - if (!ro->op_write) { |
|---|
| 465 | | - WARN_ON(!page->mapping && irqs_disabled()); |
|---|
| 466 | | - set_page_dirty(page); |
|---|
| 497 | + /* Mark page dirty if it was possibly modified, which |
|---|
| 498 | + * is the case for a RDMA_READ which copies from remote |
|---|
| 499 | + * to local memory |
|---|
| 500 | + */ |
|---|
| 501 | + unpin_user_pages_dirty_lock(&page, 1, !ro->op_write); |
|---|
| 467 | 502 | } |
|---|
| 468 | | - put_page(page); |
|---|
| 469 | 503 | } |
|---|
| 470 | 504 | |
|---|
| 471 | 505 | kfree(ro->op_notifier); |
|---|
| 472 | 506 | ro->op_notifier = NULL; |
|---|
| 473 | 507 | ro->op_active = 0; |
|---|
| 508 | + ro->op_odp_mr = NULL; |
|---|
| 474 | 509 | } |
|---|
| 475 | 510 | |
|---|
| 476 | 511 | void rds_atomic_free_op(struct rm_atomic_op *ao) |
|---|
| .. | .. |
|---|
| 480 | 515 | /* Mark page dirty if it was possibly modified, which |
|---|
| 481 | 516 | * is the case for a RDMA_READ which copies from remote |
|---|
| 482 | 517 | * to local memory */ |
|---|
| 483 | | - set_page_dirty(page); |
|---|
| 484 | | - put_page(page); |
|---|
| 518 | + unpin_user_pages_dirty_lock(&page, 1, true); |
|---|
| 485 | 519 | |
|---|
| 486 | 520 | kfree(ao->op_notifier); |
|---|
| 487 | 521 | ao->op_notifier = NULL; |
|---|
| .. | .. |
|---|
| 583 | 617 | struct rds_iovec *iovs; |
|---|
| 584 | 618 | unsigned int i, j; |
|---|
| 585 | 619 | int ret = 0; |
|---|
| 620 | + bool odp_supported = true; |
|---|
| 586 | 621 | |
|---|
| 587 | 622 | if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_rdma_args)) |
|---|
| 588 | 623 | || rm->rdma.op_active) |
|---|
| .. | .. |
|---|
| 604 | 639 | ret = -EINVAL; |
|---|
| 605 | 640 | goto out_ret; |
|---|
| 606 | 641 | } |
|---|
| 642 | + /* odp-mr is not supported for multiple requests within one message */ |
|---|
| 643 | + if (args->nr_local != 1) |
|---|
| 644 | + odp_supported = false; |
|---|
| 607 | 645 | |
|---|
| 608 | 646 | iovs = vec->iov; |
|---|
| 609 | 647 | |
|---|
| .. | .. |
|---|
| 625 | 663 | op->op_silent = !!(args->flags & RDS_RDMA_SILENT); |
|---|
| 626 | 664 | op->op_active = 1; |
|---|
| 627 | 665 | op->op_recverr = rs->rs_recverr; |
|---|
| 666 | + op->op_odp_mr = NULL; |
|---|
| 667 | + |
|---|
| 628 | 668 | WARN_ON(!nr_pages); |
|---|
| 629 | 669 | op->op_sg = rds_message_alloc_sgs(rm, nr_pages); |
|---|
| 630 | | - if (!op->op_sg) { |
|---|
| 631 | | - ret = -ENOMEM; |
|---|
| 670 | + if (IS_ERR(op->op_sg)) { |
|---|
| 671 | + ret = PTR_ERR(op->op_sg); |
|---|
| 632 | 672 | goto out_pages; |
|---|
| 633 | 673 | } |
|---|
| 634 | 674 | |
|---|
| .. | .. |
|---|
| 645 | 685 | } |
|---|
| 646 | 686 | op->op_notifier->n_user_token = args->user_token; |
|---|
| 647 | 687 | op->op_notifier->n_status = RDS_RDMA_SUCCESS; |
|---|
| 648 | | - |
|---|
| 649 | | - /* Enable rmda notification on data operation for composite |
|---|
| 650 | | - * rds messages and make sure notification is enabled only |
|---|
| 651 | | - * for the data operation which follows it so that application |
|---|
| 652 | | - * gets notified only after full message gets delivered. |
|---|
| 653 | | - */ |
|---|
| 654 | | - if (rm->data.op_sg) { |
|---|
| 655 | | - rm->rdma.op_notify = 0; |
|---|
| 656 | | - rm->data.op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME); |
|---|
| 657 | | - } |
|---|
| 658 | 688 | } |
|---|
| 659 | 689 | |
|---|
| 660 | 690 | /* The cookie contains the R_Key of the remote memory region, and |
|---|
| .. | .. |
|---|
| 686 | 716 | * If it's a READ operation, we need to pin the pages for writing. |
|---|
| 687 | 717 | */ |
|---|
| 688 | 718 | ret = rds_pin_pages(iov->addr, nr, pages, !op->op_write); |
|---|
| 689 | | - if (ret < 0) |
|---|
| 719 | + if ((!odp_supported && ret <= 0) || |
|---|
| 720 | + (odp_supported && ret <= 0 && ret != -EOPNOTSUPP)) |
|---|
| 690 | 721 | goto out_pages; |
|---|
| 691 | | - else |
|---|
| 692 | | - ret = 0; |
|---|
| 722 | + |
|---|
| 723 | + if (ret == -EOPNOTSUPP) { |
|---|
| 724 | + struct rds_mr *local_odp_mr; |
|---|
| 725 | + |
|---|
| 726 | + if (!rs->rs_transport->get_mr) { |
|---|
| 727 | + ret = -EOPNOTSUPP; |
|---|
| 728 | + goto out_pages; |
|---|
| 729 | + } |
|---|
| 730 | + local_odp_mr = |
|---|
| 731 | + kzalloc(sizeof(*local_odp_mr), GFP_KERNEL); |
|---|
| 732 | + if (!local_odp_mr) { |
|---|
| 733 | + ret = -ENOMEM; |
|---|
| 734 | + goto out_pages; |
|---|
| 735 | + } |
|---|
| 736 | + RB_CLEAR_NODE(&local_odp_mr->r_rb_node); |
|---|
| 737 | + kref_init(&local_odp_mr->r_kref); |
|---|
| 738 | + local_odp_mr->r_trans = rs->rs_transport; |
|---|
| 739 | + local_odp_mr->r_sock = rs; |
|---|
| 740 | + local_odp_mr->r_trans_private = |
|---|
| 741 | + rs->rs_transport->get_mr( |
|---|
| 742 | + NULL, 0, rs, &local_odp_mr->r_key, NULL, |
|---|
| 743 | + iov->addr, iov->bytes, ODP_VIRTUAL); |
|---|
| 744 | + if (IS_ERR(local_odp_mr->r_trans_private)) { |
|---|
| 745 | + ret = IS_ERR(local_odp_mr->r_trans_private); |
|---|
| 746 | + rdsdebug("get_mr ret %d %p\"", ret, |
|---|
| 747 | + local_odp_mr->r_trans_private); |
|---|
| 748 | + kfree(local_odp_mr); |
|---|
| 749 | + ret = -EOPNOTSUPP; |
|---|
| 750 | + goto out_pages; |
|---|
| 751 | + } |
|---|
| 752 | + rdsdebug("Need odp; local_odp_mr %p trans_private %p\n", |
|---|
| 753 | + local_odp_mr, local_odp_mr->r_trans_private); |
|---|
| 754 | + op->op_odp_mr = local_odp_mr; |
|---|
| 755 | + op->op_odp_addr = iov->addr; |
|---|
| 756 | + } |
|---|
| 693 | 757 | |
|---|
| 694 | 758 | rdsdebug("RDS: nr_bytes %u nr %u iov->bytes %llu iov->addr %llx\n", |
|---|
| 695 | 759 | nr_bytes, nr, iov->bytes, iov->addr); |
|---|
| .. | .. |
|---|
| 705 | 769 | min_t(unsigned int, iov->bytes, PAGE_SIZE - offset), |
|---|
| 706 | 770 | offset); |
|---|
| 707 | 771 | |
|---|
| 772 | + sg_dma_len(sg) = sg->length; |
|---|
| 708 | 773 | rdsdebug("RDS: sg->offset %x sg->len %x iov->addr %llx iov->bytes %llu\n", |
|---|
| 709 | 774 | sg->offset, sg->length, iov->addr, iov->bytes); |
|---|
| 710 | 775 | |
|---|
| .. | .. |
|---|
| 723 | 788 | goto out_pages; |
|---|
| 724 | 789 | } |
|---|
| 725 | 790 | op->op_bytes = nr_bytes; |
|---|
| 791 | + ret = 0; |
|---|
| 726 | 792 | |
|---|
| 727 | 793 | out_pages: |
|---|
| 728 | 794 | kfree(pages); |
|---|
| .. | .. |
|---|
| 765 | 831 | if (!mr) |
|---|
| 766 | 832 | err = -EINVAL; /* invalid r_key */ |
|---|
| 767 | 833 | else |
|---|
| 768 | | - refcount_inc(&mr->r_refcount); |
|---|
| 834 | + kref_get(&mr->r_kref); |
|---|
| 769 | 835 | spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); |
|---|
| 770 | 836 | |
|---|
| 771 | 837 | if (mr) { |
|---|
| 772 | | - mr->r_trans->sync_mr(mr->r_trans_private, DMA_TO_DEVICE); |
|---|
| 838 | + mr->r_trans->sync_mr(mr->r_trans_private, |
|---|
| 839 | + DMA_TO_DEVICE); |
|---|
| 773 | 840 | rm->rdma.op_rdma_mr = mr; |
|---|
| 774 | 841 | } |
|---|
| 775 | 842 | return err; |
|---|
| .. | .. |
|---|
| 843 | 910 | rm->atomic.op_active = 1; |
|---|
| 844 | 911 | rm->atomic.op_recverr = rs->rs_recverr; |
|---|
| 845 | 912 | rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1); |
|---|
| 846 | | - if (!rm->atomic.op_sg) { |
|---|
| 847 | | - ret = -ENOMEM; |
|---|
| 913 | + if (IS_ERR(rm->atomic.op_sg)) { |
|---|
| 914 | + ret = PTR_ERR(rm->atomic.op_sg); |
|---|
| 848 | 915 | goto err; |
|---|
| 849 | 916 | } |
|---|
| 850 | 917 | |
|---|
| .. | .. |
|---|
| 883 | 950 | return ret; |
|---|
| 884 | 951 | err: |
|---|
| 885 | 952 | if (page) |
|---|
| 886 | | - put_page(page); |
|---|
| 953 | + unpin_user_page(page); |
|---|
| 887 | 954 | rm->atomic.op_active = 0; |
|---|
| 888 | 955 | kfree(rm->atomic.op_notifier); |
|---|
| 889 | 956 | |
|---|