| .. | .. |
|---|
| 1 | 1 | /* |
|---|
| 2 | | - * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. |
|---|
| 2 | + * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved. |
|---|
| 3 | 3 | * |
|---|
| 4 | 4 | * This software is available to you under a choice of one of two |
|---|
| 5 | 5 | * licenses. You may choose to be licensed under the terms of the GNU |
|---|
| .. | .. |
|---|
| 36 | 36 | #include <linux/vmalloc.h> |
|---|
| 37 | 37 | #include <linux/ratelimit.h> |
|---|
| 38 | 38 | #include <net/addrconf.h> |
|---|
| 39 | +#include <rdma/ib_cm.h> |
|---|
| 39 | 40 | |
|---|
| 40 | 41 | #include "rds_single_path.h" |
|---|
| 41 | 42 | #include "rds.h" |
|---|
| 42 | 43 | #include "ib.h" |
|---|
| 44 | +#include "ib_mr.h" |
|---|
| 43 | 45 | |
|---|
| 44 | 46 | /* |
|---|
| 45 | 47 | * Set the selected protocol version |
|---|
| .. | .. |
|---|
| 133 | 135 | rds_ib_set_flow_control(conn, be32_to_cpu(credit)); |
|---|
| 134 | 136 | } |
|---|
| 135 | 137 | |
|---|
| 136 | | - if (conn->c_version < RDS_PROTOCOL(3, 1)) { |
|---|
| 137 | | - pr_notice("RDS/IB: Connection <%pI6c,%pI6c> version %u.%u no longer supported\n", |
|---|
| 138 | | - &conn->c_laddr, &conn->c_faddr, |
|---|
| 139 | | - RDS_PROTOCOL_MAJOR(conn->c_version), |
|---|
| 140 | | - RDS_PROTOCOL_MINOR(conn->c_version)); |
|---|
| 141 | | - set_bit(RDS_DESTROY_PENDING, &conn->c_path[0].cp_flags); |
|---|
| 142 | | - rds_conn_destroy(conn); |
|---|
| 143 | | - return; |
|---|
| 144 | | - } else { |
|---|
| 145 | | - pr_notice("RDS/IB: %s conn connected <%pI6c,%pI6c> version %u.%u%s\n", |
|---|
| 146 | | - ic->i_active_side ? "Active" : "Passive", |
|---|
| 147 | | - &conn->c_laddr, &conn->c_faddr, |
|---|
| 148 | | - RDS_PROTOCOL_MAJOR(conn->c_version), |
|---|
| 149 | | - RDS_PROTOCOL_MINOR(conn->c_version), |
|---|
| 150 | | - ic->i_flowctl ? ", flow control" : ""); |
|---|
| 138 | + if (conn->c_version < RDS_PROTOCOL_VERSION) { |
|---|
| 139 | + if (conn->c_version != RDS_PROTOCOL_COMPAT_VERSION) { |
|---|
| 140 | + pr_notice("RDS/IB: Connection <%pI6c,%pI6c> version %u.%u no longer supported\n", |
|---|
| 141 | + &conn->c_laddr, &conn->c_faddr, |
|---|
| 142 | + RDS_PROTOCOL_MAJOR(conn->c_version), |
|---|
| 143 | + RDS_PROTOCOL_MINOR(conn->c_version)); |
|---|
| 144 | + rds_conn_destroy(conn); |
|---|
| 145 | + return; |
|---|
| 146 | + } |
|---|
| 151 | 147 | } |
|---|
| 148 | + |
|---|
| 149 | + pr_notice("RDS/IB: %s conn connected <%pI6c,%pI6c,%d> version %u.%u%s\n", |
|---|
| 150 | + ic->i_active_side ? "Active" : "Passive", |
|---|
| 151 | + &conn->c_laddr, &conn->c_faddr, conn->c_tos, |
|---|
| 152 | + RDS_PROTOCOL_MAJOR(conn->c_version), |
|---|
| 153 | + RDS_PROTOCOL_MINOR(conn->c_version), |
|---|
| 154 | + ic->i_flowctl ? ", flow control" : ""); |
|---|
| 155 | + |
|---|
| 156 | + /* receive sl from the peer */ |
|---|
| 157 | + ic->i_sl = ic->i_cm_id->route.path_rec->sl; |
|---|
| 152 | 158 | |
|---|
| 153 | 159 | atomic_set(&ic->i_cq_quiesce, 0); |
|---|
| 154 | 160 | |
|---|
| .. | .. |
|---|
| 184 | 190 | NULL); |
|---|
| 185 | 191 | } |
|---|
| 186 | 192 | |
|---|
| 193 | + conn->c_proposed_version = conn->c_version; |
|---|
| 187 | 194 | rds_connect_complete(conn); |
|---|
| 188 | 195 | } |
|---|
| 189 | 196 | |
|---|
| .. | .. |
|---|
| 220 | 227 | cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS); |
|---|
| 221 | 228 | dp->ricp_v6.dp_ack_seq = |
|---|
| 222 | 229 | cpu_to_be64(rds_ib_piggyb_ack(ic)); |
|---|
| 230 | + dp->ricp_v6.dp_cmn.ricpc_dp_toss = conn->c_tos; |
|---|
| 223 | 231 | |
|---|
| 224 | 232 | conn_param->private_data = &dp->ricp_v6; |
|---|
| 225 | 233 | conn_param->private_data_len = sizeof(dp->ricp_v6); |
|---|
| .. | .. |
|---|
| 234 | 242 | cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS); |
|---|
| 235 | 243 | dp->ricp_v4.dp_ack_seq = |
|---|
| 236 | 244 | cpu_to_be64(rds_ib_piggyb_ack(ic)); |
|---|
| 245 | + dp->ricp_v4.dp_cmn.ricpc_dp_toss = conn->c_tos; |
|---|
| 237 | 246 | |
|---|
| 238 | 247 | conn_param->private_data = &dp->ricp_v4; |
|---|
| 239 | 248 | conn_param->private_data_len = sizeof(dp->ricp_v4); |
|---|
| .. | .. |
|---|
| 389 | 398 | rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST); |
|---|
| 390 | 399 | break; |
|---|
| 391 | 400 | default: |
|---|
| 392 | | - rdsdebug("Fatal QP Event %u (%s) " |
|---|
| 393 | | - "- connection %pI6c->%pI6c, reconnecting\n", |
|---|
| 394 | | - event->event, ib_event_msg(event->event), |
|---|
| 395 | | - &conn->c_laddr, &conn->c_faddr); |
|---|
| 401 | + rdsdebug("Fatal QP Event %u (%s) - connection %pI6c->%pI6c, reconnecting\n", |
|---|
| 402 | + event->event, ib_event_msg(event->event), |
|---|
| 403 | + &conn->c_laddr, &conn->c_faddr); |
|---|
| 396 | 404 | rds_conn_drop(conn); |
|---|
| 397 | 405 | break; |
|---|
| 398 | 406 | } |
|---|
| .. | .. |
|---|
| 432 | 440 | rds_ibdev->vector_load[index]--; |
|---|
| 433 | 441 | } |
|---|
| 434 | 442 | |
|---|
| 443 | +static void rds_dma_hdr_free(struct ib_device *dev, struct rds_header *hdr, |
|---|
| 444 | + dma_addr_t dma_addr, enum dma_data_direction dir) |
|---|
| 445 | +{ |
|---|
| 446 | + ib_dma_unmap_single(dev, dma_addr, sizeof(*hdr), dir); |
|---|
| 447 | + kfree(hdr); |
|---|
| 448 | +} |
|---|
| 449 | + |
|---|
| 450 | +static struct rds_header *rds_dma_hdr_alloc(struct ib_device *dev, |
|---|
| 451 | + dma_addr_t *dma_addr, enum dma_data_direction dir) |
|---|
| 452 | +{ |
|---|
| 453 | + struct rds_header *hdr; |
|---|
| 454 | + |
|---|
| 455 | + hdr = kzalloc_node(sizeof(*hdr), GFP_KERNEL, ibdev_to_node(dev)); |
|---|
| 456 | + if (!hdr) |
|---|
| 457 | + return NULL; |
|---|
| 458 | + |
|---|
| 459 | + *dma_addr = ib_dma_map_single(dev, hdr, sizeof(*hdr), |
|---|
| 460 | + DMA_BIDIRECTIONAL); |
|---|
| 461 | + if (ib_dma_mapping_error(dev, *dma_addr)) { |
|---|
| 462 | + kfree(hdr); |
|---|
| 463 | + return NULL; |
|---|
| 464 | + } |
|---|
| 465 | + |
|---|
| 466 | + return hdr; |
|---|
| 467 | +} |
|---|
| 468 | + |
|---|
| 469 | +/* Free the DMA memory used to store struct rds_header. |
|---|
| 470 | + * |
|---|
| 471 | + * @dev: the RDS IB device |
|---|
| 472 | + * @hdrs: pointer to the array storing DMA memory pointers |
|---|
| 473 | + * @dma_addrs: pointer to the array storing DMA addresses |
|---|
| 474 | + * @num_hdars: number of headers to free. |
|---|
| 475 | + */ |
|---|
| 476 | +static void rds_dma_hdrs_free(struct rds_ib_device *dev, |
|---|
| 477 | + struct rds_header **hdrs, dma_addr_t *dma_addrs, u32 num_hdrs, |
|---|
| 478 | + enum dma_data_direction dir) |
|---|
| 479 | +{ |
|---|
| 480 | + u32 i; |
|---|
| 481 | + |
|---|
| 482 | + for (i = 0; i < num_hdrs; i++) |
|---|
| 483 | + rds_dma_hdr_free(dev->dev, hdrs[i], dma_addrs[i], dir); |
|---|
| 484 | + kvfree(hdrs); |
|---|
| 485 | + kvfree(dma_addrs); |
|---|
| 486 | +} |
|---|
| 487 | + |
|---|
| 488 | + |
|---|
| 489 | +/* Allocate DMA coherent memory to be used to store struct rds_header for |
|---|
| 490 | + * sending/receiving packets. The pointers to the DMA memory and the |
|---|
| 491 | + * associated DMA addresses are stored in two arrays. |
|---|
| 492 | + * |
|---|
| 493 | + * @dev: the RDS IB device |
|---|
| 494 | + * @dma_addrs: pointer to the array for storing DMA addresses |
|---|
| 495 | + * @num_hdrs: number of headers to allocate |
|---|
| 496 | + * |
|---|
| 497 | + * It returns the pointer to the array storing the DMA memory pointers. On |
|---|
| 498 | + * error, NULL pointer is returned. |
|---|
| 499 | + */ |
|---|
| 500 | +static struct rds_header **rds_dma_hdrs_alloc(struct rds_ib_device *dev, |
|---|
| 501 | + dma_addr_t **dma_addrs, u32 num_hdrs, |
|---|
| 502 | + enum dma_data_direction dir) |
|---|
| 503 | +{ |
|---|
| 504 | + struct rds_header **hdrs; |
|---|
| 505 | + dma_addr_t *hdr_daddrs; |
|---|
| 506 | + u32 i; |
|---|
| 507 | + |
|---|
| 508 | + hdrs = kvmalloc_node(sizeof(*hdrs) * num_hdrs, GFP_KERNEL, |
|---|
| 509 | + ibdev_to_node(dev->dev)); |
|---|
| 510 | + if (!hdrs) |
|---|
| 511 | + return NULL; |
|---|
| 512 | + |
|---|
| 513 | + hdr_daddrs = kvmalloc_node(sizeof(*hdr_daddrs) * num_hdrs, GFP_KERNEL, |
|---|
| 514 | + ibdev_to_node(dev->dev)); |
|---|
| 515 | + if (!hdr_daddrs) { |
|---|
| 516 | + kvfree(hdrs); |
|---|
| 517 | + return NULL; |
|---|
| 518 | + } |
|---|
| 519 | + |
|---|
| 520 | + for (i = 0; i < num_hdrs; i++) { |
|---|
| 521 | + hdrs[i] = rds_dma_hdr_alloc(dev->dev, &hdr_daddrs[i], dir); |
|---|
| 522 | + if (!hdrs[i]) { |
|---|
| 523 | + rds_dma_hdrs_free(dev, hdrs, hdr_daddrs, i, dir); |
|---|
| 524 | + return NULL; |
|---|
| 525 | + } |
|---|
| 526 | + } |
|---|
| 527 | + |
|---|
| 528 | + *dma_addrs = hdr_daddrs; |
|---|
| 529 | + return hdrs; |
|---|
| 530 | +} |
|---|
| 531 | + |
|---|
| 435 | 532 | /* |
|---|
| 436 | 533 | * This needs to be very careful to not leave IS_ERR pointers around for |
|---|
| 437 | 534 | * cleanup to trip over. |
|---|
| .. | .. |
|---|
| 443 | 540 | struct ib_qp_init_attr attr; |
|---|
| 444 | 541 | struct ib_cq_init_attr cq_attr = {}; |
|---|
| 445 | 542 | struct rds_ib_device *rds_ibdev; |
|---|
| 543 | + unsigned long max_wrs; |
|---|
| 446 | 544 | int ret, fr_queue_space; |
|---|
| 447 | 545 | |
|---|
| 448 | 546 | /* |
|---|
| .. | .. |
|---|
| 454 | 552 | return -EOPNOTSUPP; |
|---|
| 455 | 553 | |
|---|
| 456 | 554 | /* The fr_queue_space is currently set to 512, to add extra space on |
|---|
| 457 | | - * completion queue and send queue. This extra space is used for FRMR |
|---|
| 555 | + * completion queue and send queue. This extra space is used for FRWR |
|---|
| 458 | 556 | * registration and invalidation work requests |
|---|
| 459 | 557 | */ |
|---|
| 460 | | - fr_queue_space = rds_ibdev->use_fastreg ? |
|---|
| 461 | | - (RDS_IB_DEFAULT_FR_WR + 1) + |
|---|
| 462 | | - (RDS_IB_DEFAULT_FR_INV_WR + 1) |
|---|
| 463 | | - : 0; |
|---|
| 558 | + fr_queue_space = RDS_IB_DEFAULT_FR_WR; |
|---|
| 464 | 559 | |
|---|
| 465 | 560 | /* add the conn now so that connection establishment has the dev */ |
|---|
| 466 | 561 | rds_ib_add_conn(rds_ibdev, conn); |
|---|
| 467 | 562 | |
|---|
| 468 | | - if (rds_ibdev->max_wrs < ic->i_send_ring.w_nr + 1) |
|---|
| 469 | | - rds_ib_ring_resize(&ic->i_send_ring, rds_ibdev->max_wrs - 1); |
|---|
| 470 | | - if (rds_ibdev->max_wrs < ic->i_recv_ring.w_nr + 1) |
|---|
| 471 | | - rds_ib_ring_resize(&ic->i_recv_ring, rds_ibdev->max_wrs - 1); |
|---|
| 563 | + max_wrs = rds_ibdev->max_wrs < rds_ib_sysctl_max_send_wr + 1 ? |
|---|
| 564 | + rds_ibdev->max_wrs - 1 : rds_ib_sysctl_max_send_wr; |
|---|
| 565 | + if (ic->i_send_ring.w_nr != max_wrs) |
|---|
| 566 | + rds_ib_ring_resize(&ic->i_send_ring, max_wrs); |
|---|
| 567 | + |
|---|
| 568 | + max_wrs = rds_ibdev->max_wrs < rds_ib_sysctl_max_recv_wr + 1 ? |
|---|
| 569 | + rds_ibdev->max_wrs - 1 : rds_ib_sysctl_max_recv_wr; |
|---|
| 570 | + if (ic->i_recv_ring.w_nr != max_wrs) |
|---|
| 571 | + rds_ib_ring_resize(&ic->i_recv_ring, max_wrs); |
|---|
| 472 | 572 | |
|---|
| 473 | 573 | /* Protection domain and memory range */ |
|---|
| 474 | 574 | ic->i_pd = rds_ibdev->pd; |
|---|
| .. | .. |
|---|
| 526 | 626 | attr.qp_type = IB_QPT_RC; |
|---|
| 527 | 627 | attr.send_cq = ic->i_send_cq; |
|---|
| 528 | 628 | attr.recv_cq = ic->i_recv_cq; |
|---|
| 529 | | - atomic_set(&ic->i_fastreg_wrs, RDS_IB_DEFAULT_FR_WR); |
|---|
| 530 | | - atomic_set(&ic->i_fastunreg_wrs, RDS_IB_DEFAULT_FR_INV_WR); |
|---|
| 531 | 629 | |
|---|
| 532 | 630 | /* |
|---|
| 533 | 631 | * XXX this can fail if max_*_wr is too large? Are we supposed |
|---|
| .. | .. |
|---|
| 539 | 637 | goto recv_cq_out; |
|---|
| 540 | 638 | } |
|---|
| 541 | 639 | |
|---|
| 542 | | - ic->i_send_hdrs = ib_dma_alloc_coherent(dev, |
|---|
| 543 | | - ic->i_send_ring.w_nr * |
|---|
| 544 | | - sizeof(struct rds_header), |
|---|
| 545 | | - &ic->i_send_hdrs_dma, GFP_KERNEL); |
|---|
| 640 | + ic->i_send_hdrs = rds_dma_hdrs_alloc(rds_ibdev, &ic->i_send_hdrs_dma, |
|---|
| 641 | + ic->i_send_ring.w_nr, |
|---|
| 642 | + DMA_TO_DEVICE); |
|---|
| 546 | 643 | if (!ic->i_send_hdrs) { |
|---|
| 547 | 644 | ret = -ENOMEM; |
|---|
| 548 | | - rdsdebug("ib_dma_alloc_coherent send failed\n"); |
|---|
| 645 | + rdsdebug("DMA send hdrs alloc failed\n"); |
|---|
| 549 | 646 | goto qp_out; |
|---|
| 550 | 647 | } |
|---|
| 551 | 648 | |
|---|
| 552 | | - ic->i_recv_hdrs = ib_dma_alloc_coherent(dev, |
|---|
| 553 | | - ic->i_recv_ring.w_nr * |
|---|
| 554 | | - sizeof(struct rds_header), |
|---|
| 555 | | - &ic->i_recv_hdrs_dma, GFP_KERNEL); |
|---|
| 649 | + ic->i_recv_hdrs = rds_dma_hdrs_alloc(rds_ibdev, &ic->i_recv_hdrs_dma, |
|---|
| 650 | + ic->i_recv_ring.w_nr, |
|---|
| 651 | + DMA_FROM_DEVICE); |
|---|
| 556 | 652 | if (!ic->i_recv_hdrs) { |
|---|
| 557 | 653 | ret = -ENOMEM; |
|---|
| 558 | | - rdsdebug("ib_dma_alloc_coherent recv failed\n"); |
|---|
| 654 | + rdsdebug("DMA recv hdrs alloc failed\n"); |
|---|
| 559 | 655 | goto send_hdrs_dma_out; |
|---|
| 560 | 656 | } |
|---|
| 561 | 657 | |
|---|
| 562 | | - ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header), |
|---|
| 563 | | - &ic->i_ack_dma, GFP_KERNEL); |
|---|
| 658 | + ic->i_ack = rds_dma_hdr_alloc(rds_ibdev->dev, &ic->i_ack_dma, |
|---|
| 659 | + DMA_TO_DEVICE); |
|---|
| 564 | 660 | if (!ic->i_ack) { |
|---|
| 565 | 661 | ret = -ENOMEM; |
|---|
| 566 | | - rdsdebug("ib_dma_alloc_coherent ack failed\n"); |
|---|
| 662 | + rdsdebug("DMA ack header alloc failed\n"); |
|---|
| 567 | 663 | goto recv_hdrs_dma_out; |
|---|
| 568 | 664 | } |
|---|
| 569 | 665 | |
|---|
| .. | .. |
|---|
| 594 | 690 | |
|---|
| 595 | 691 | sends_out: |
|---|
| 596 | 692 | vfree(ic->i_sends); |
|---|
| 693 | + |
|---|
| 597 | 694 | ack_dma_out: |
|---|
| 598 | | - ib_dma_free_coherent(dev, sizeof(struct rds_header), |
|---|
| 599 | | - ic->i_ack, ic->i_ack_dma); |
|---|
| 695 | + rds_dma_hdr_free(rds_ibdev->dev, ic->i_ack, ic->i_ack_dma, |
|---|
| 696 | + DMA_TO_DEVICE); |
|---|
| 697 | + ic->i_ack = NULL; |
|---|
| 698 | + |
|---|
| 600 | 699 | recv_hdrs_dma_out: |
|---|
| 601 | | - ib_dma_free_coherent(dev, ic->i_recv_ring.w_nr * |
|---|
| 602 | | - sizeof(struct rds_header), |
|---|
| 603 | | - ic->i_recv_hdrs, ic->i_recv_hdrs_dma); |
|---|
| 700 | + rds_dma_hdrs_free(rds_ibdev, ic->i_recv_hdrs, ic->i_recv_hdrs_dma, |
|---|
| 701 | + ic->i_recv_ring.w_nr, DMA_FROM_DEVICE); |
|---|
| 702 | + ic->i_recv_hdrs = NULL; |
|---|
| 703 | + ic->i_recv_hdrs_dma = NULL; |
|---|
| 704 | + |
|---|
| 604 | 705 | send_hdrs_dma_out: |
|---|
| 605 | | - ib_dma_free_coherent(dev, ic->i_send_ring.w_nr * |
|---|
| 606 | | - sizeof(struct rds_header), |
|---|
| 607 | | - ic->i_send_hdrs, ic->i_send_hdrs_dma); |
|---|
| 706 | + rds_dma_hdrs_free(rds_ibdev, ic->i_send_hdrs, ic->i_send_hdrs_dma, |
|---|
| 707 | + ic->i_send_ring.w_nr, DMA_TO_DEVICE); |
|---|
| 708 | + ic->i_send_hdrs = NULL; |
|---|
| 709 | + ic->i_send_hdrs_dma = NULL; |
|---|
| 710 | + |
|---|
| 608 | 711 | qp_out: |
|---|
| 609 | 712 | rdma_destroy_qp(ic->i_cm_id); |
|---|
| 610 | 713 | recv_cq_out: |
|---|
| 611 | | - if (!ib_destroy_cq(ic->i_recv_cq)) |
|---|
| 612 | | - ic->i_recv_cq = NULL; |
|---|
| 714 | + ib_destroy_cq(ic->i_recv_cq); |
|---|
| 715 | + ic->i_recv_cq = NULL; |
|---|
| 613 | 716 | send_cq_out: |
|---|
| 614 | | - if (!ib_destroy_cq(ic->i_send_cq)) |
|---|
| 615 | | - ic->i_send_cq = NULL; |
|---|
| 717 | + ib_destroy_cq(ic->i_send_cq); |
|---|
| 718 | + ic->i_send_cq = NULL; |
|---|
| 616 | 719 | rds_ibdev_out: |
|---|
| 617 | 720 | rds_ib_remove_conn(rds_ibdev, conn); |
|---|
| 618 | 721 | out: |
|---|
| .. | .. |
|---|
| 635 | 738 | * original size. The only way to tell the difference is by looking at |
|---|
| 636 | 739 | * the contents, which are initialized to zero. |
|---|
| 637 | 740 | * If the protocol version fields aren't set, this is a connection attempt |
|---|
| 638 | | - * from an older version. This could could be 3.0 or 2.0 - we can't tell. |
|---|
| 741 | + * from an older version. This could be 3.0 or 2.0 - we can't tell. |
|---|
| 639 | 742 | * We really should have changed this for OFED 1.3 :-( |
|---|
| 640 | 743 | */ |
|---|
| 641 | 744 | |
|---|
| .. | .. |
|---|
| 660 | 763 | |
|---|
| 661 | 764 | /* Even if len is crap *now* I still want to check it. -ASG */ |
|---|
| 662 | 765 | if (event->param.conn.private_data_len < data_len || major == 0) |
|---|
| 663 | | - return RDS_PROTOCOL_3_0; |
|---|
| 766 | + return RDS_PROTOCOL_4_0; |
|---|
| 664 | 767 | |
|---|
| 665 | 768 | common = be16_to_cpu(mask) & RDS_IB_SUPPORTED_PROTOCOLS; |
|---|
| 666 | | - if (major == 3 && common) { |
|---|
| 667 | | - version = RDS_PROTOCOL_3_0; |
|---|
| 769 | + if (major == 4 && common) { |
|---|
| 770 | + version = RDS_PROTOCOL_4_0; |
|---|
| 668 | 771 | while ((common >>= 1) != 0) |
|---|
| 669 | 772 | version++; |
|---|
| 773 | + } else if (RDS_PROTOCOL_COMPAT_VERSION == |
|---|
| 774 | + RDS_PROTOCOL(major, minor)) { |
|---|
| 775 | + version = RDS_PROTOCOL_COMPAT_VERSION; |
|---|
| 670 | 776 | } else { |
|---|
| 671 | 777 | if (isv6) |
|---|
| 672 | 778 | printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI6c using incompatible protocol version %u.%u\n", |
|---|
| .. | .. |
|---|
| 729 | 835 | |
|---|
| 730 | 836 | /* Check whether the remote protocol version matches ours. */ |
|---|
| 731 | 837 | version = rds_ib_protocol_compatible(event, isv6); |
|---|
| 732 | | - if (!version) |
|---|
| 838 | + if (!version) { |
|---|
| 839 | + err = RDS_RDMA_REJ_INCOMPAT; |
|---|
| 733 | 840 | goto out; |
|---|
| 841 | + } |
|---|
| 734 | 842 | |
|---|
| 735 | 843 | dp = event->param.conn.private_data; |
|---|
| 736 | 844 | if (isv6) { |
|---|
| .. | .. |
|---|
| 771 | 879 | daddr6 = &d_mapped_addr; |
|---|
| 772 | 880 | } |
|---|
| 773 | 881 | |
|---|
| 774 | | - rdsdebug("saddr %pI6c daddr %pI6c RDSv%u.%u lguid 0x%llx fguid " |
|---|
| 775 | | - "0x%llx\n", saddr6, daddr6, |
|---|
| 776 | | - RDS_PROTOCOL_MAJOR(version), RDS_PROTOCOL_MINOR(version), |
|---|
| 882 | + rdsdebug("saddr %pI6c daddr %pI6c RDSv%u.%u lguid 0x%llx fguid 0x%llx, tos:%d\n", |
|---|
| 883 | + saddr6, daddr6, RDS_PROTOCOL_MAJOR(version), |
|---|
| 884 | + RDS_PROTOCOL_MINOR(version), |
|---|
| 777 | 885 | (unsigned long long)be64_to_cpu(lguid), |
|---|
| 778 | | - (unsigned long long)be64_to_cpu(fguid)); |
|---|
| 886 | + (unsigned long long)be64_to_cpu(fguid), dp_cmn->ricpc_dp_toss); |
|---|
| 779 | 887 | |
|---|
| 780 | 888 | /* RDS/IB is not currently netns aware, thus init_net */ |
|---|
| 781 | 889 | conn = rds_conn_create(&init_net, daddr6, saddr6, |
|---|
| 782 | | - &rds_ib_transport, GFP_KERNEL, ifindex); |
|---|
| 890 | + &rds_ib_transport, dp_cmn->ricpc_dp_toss, |
|---|
| 891 | + GFP_KERNEL, ifindex); |
|---|
| 783 | 892 | if (IS_ERR(conn)) { |
|---|
| 784 | 893 | rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn)); |
|---|
| 785 | 894 | conn = NULL; |
|---|
| .. | .. |
|---|
| 846 | 955 | if (conn) |
|---|
| 847 | 956 | mutex_unlock(&conn->c_cm_lock); |
|---|
| 848 | 957 | if (err) |
|---|
| 849 | | - rdma_reject(cm_id, NULL, 0); |
|---|
| 958 | + rdma_reject(cm_id, &err, sizeof(int), |
|---|
| 959 | + IB_CM_REJ_CONSUMER_DEFINED); |
|---|
| 850 | 960 | return destroy; |
|---|
| 851 | 961 | } |
|---|
| 852 | 962 | |
|---|
| .. | .. |
|---|
| 861 | 971 | |
|---|
| 862 | 972 | /* If the peer doesn't do protocol negotiation, we must |
|---|
| 863 | 973 | * default to RDSv3.0 */ |
|---|
| 864 | | - rds_ib_set_protocol(conn, RDS_PROTOCOL_3_0); |
|---|
| 974 | + rds_ib_set_protocol(conn, RDS_PROTOCOL_4_1); |
|---|
| 865 | 975 | ic->i_flowctl = rds_ib_sysctl_flow_control; /* advertise flow control */ |
|---|
| 866 | 976 | |
|---|
| 867 | 977 | ret = rds_ib_setup_qp(conn); |
|---|
| .. | .. |
|---|
| 870 | 980 | goto out; |
|---|
| 871 | 981 | } |
|---|
| 872 | 982 | |
|---|
| 873 | | - rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION, |
|---|
| 983 | + rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, |
|---|
| 984 | + conn->c_proposed_version, |
|---|
| 874 | 985 | UINT_MAX, UINT_MAX, isv6); |
|---|
| 875 | | - ret = rdma_connect(cm_id, &conn_param); |
|---|
| 986 | + ret = rdma_connect_locked(cm_id, &conn_param); |
|---|
| 876 | 987 | if (ret) |
|---|
| 877 | | - rds_ib_conn_error(conn, "rdma_connect failed (%d)\n", ret); |
|---|
| 988 | + rds_ib_conn_error(conn, "rdma_connect_locked failed (%d)\n", |
|---|
| 989 | + ret); |
|---|
| 878 | 990 | |
|---|
| 879 | 991 | out: |
|---|
| 880 | 992 | /* Beware - returning non-zero tells the rdma_cm to destroy |
|---|
| .. | .. |
|---|
| 975 | 1087 | ic->i_cm_id ? ic->i_cm_id->qp : NULL); |
|---|
| 976 | 1088 | |
|---|
| 977 | 1089 | if (ic->i_cm_id) { |
|---|
| 978 | | - struct ib_device *dev = ic->i_cm_id->device; |
|---|
| 979 | | - |
|---|
| 980 | 1090 | rdsdebug("disconnecting cm %p\n", ic->i_cm_id); |
|---|
| 981 | 1091 | err = rdma_disconnect(ic->i_cm_id); |
|---|
| 982 | 1092 | if (err) { |
|---|
| .. | .. |
|---|
| 986 | 1096 | rdsdebug("failed to disconnect, cm: %p err %d\n", |
|---|
| 987 | 1097 | ic->i_cm_id, err); |
|---|
| 988 | 1098 | } |
|---|
| 1099 | + |
|---|
| 1100 | + /* kick off "flush_worker" for all pools in order to reap |
|---|
| 1101 | + * all FRMR registrations that are still marked "FRMR_IS_INUSE" |
|---|
| 1102 | + */ |
|---|
| 1103 | + rds_ib_flush_mrs(); |
|---|
| 989 | 1104 | |
|---|
| 990 | 1105 | /* |
|---|
| 991 | 1106 | * We want to wait for tx and rx completion to finish |
|---|
| .. | .. |
|---|
| 999 | 1114 | wait_event(rds_ib_ring_empty_wait, |
|---|
| 1000 | 1115 | rds_ib_ring_empty(&ic->i_recv_ring) && |
|---|
| 1001 | 1116 | (atomic_read(&ic->i_signaled_sends) == 0) && |
|---|
| 1002 | | - (atomic_read(&ic->i_fastreg_wrs) == RDS_IB_DEFAULT_FR_WR) && |
|---|
| 1003 | | - (atomic_read(&ic->i_fastunreg_wrs) == RDS_IB_DEFAULT_FR_INV_WR)); |
|---|
| 1117 | + (atomic_read(&ic->i_fastreg_inuse_count) == 0) && |
|---|
| 1118 | + (atomic_read(&ic->i_fastreg_wrs) == RDS_IB_DEFAULT_FR_WR)); |
|---|
| 1004 | 1119 | tasklet_kill(&ic->i_send_tasklet); |
|---|
| 1005 | 1120 | tasklet_kill(&ic->i_recv_tasklet); |
|---|
| 1006 | 1121 | |
|---|
| .. | .. |
|---|
| 1021 | 1136 | ib_destroy_cq(ic->i_recv_cq); |
|---|
| 1022 | 1137 | } |
|---|
| 1023 | 1138 | |
|---|
| 1024 | | - /* then free the resources that ib callbacks use */ |
|---|
| 1025 | | - if (ic->i_send_hdrs) |
|---|
| 1026 | | - ib_dma_free_coherent(dev, |
|---|
| 1027 | | - ic->i_send_ring.w_nr * |
|---|
| 1028 | | - sizeof(struct rds_header), |
|---|
| 1029 | | - ic->i_send_hdrs, |
|---|
| 1030 | | - ic->i_send_hdrs_dma); |
|---|
| 1139 | + if (ic->rds_ibdev) { |
|---|
| 1140 | + /* then free the resources that ib callbacks use */ |
|---|
| 1141 | + if (ic->i_send_hdrs) { |
|---|
| 1142 | + rds_dma_hdrs_free(ic->rds_ibdev, |
|---|
| 1143 | + ic->i_send_hdrs, |
|---|
| 1144 | + ic->i_send_hdrs_dma, |
|---|
| 1145 | + ic->i_send_ring.w_nr, |
|---|
| 1146 | + DMA_TO_DEVICE); |
|---|
| 1147 | + ic->i_send_hdrs = NULL; |
|---|
| 1148 | + ic->i_send_hdrs_dma = NULL; |
|---|
| 1149 | + } |
|---|
| 1031 | 1150 | |
|---|
| 1032 | | - if (ic->i_recv_hdrs) |
|---|
| 1033 | | - ib_dma_free_coherent(dev, |
|---|
| 1034 | | - ic->i_recv_ring.w_nr * |
|---|
| 1035 | | - sizeof(struct rds_header), |
|---|
| 1036 | | - ic->i_recv_hdrs, |
|---|
| 1037 | | - ic->i_recv_hdrs_dma); |
|---|
| 1151 | + if (ic->i_recv_hdrs) { |
|---|
| 1152 | + rds_dma_hdrs_free(ic->rds_ibdev, |
|---|
| 1153 | + ic->i_recv_hdrs, |
|---|
| 1154 | + ic->i_recv_hdrs_dma, |
|---|
| 1155 | + ic->i_recv_ring.w_nr, |
|---|
| 1156 | + DMA_FROM_DEVICE); |
|---|
| 1157 | + ic->i_recv_hdrs = NULL; |
|---|
| 1158 | + ic->i_recv_hdrs_dma = NULL; |
|---|
| 1159 | + } |
|---|
| 1038 | 1160 | |
|---|
| 1039 | | - if (ic->i_ack) |
|---|
| 1040 | | - ib_dma_free_coherent(dev, sizeof(struct rds_header), |
|---|
| 1041 | | - ic->i_ack, ic->i_ack_dma); |
|---|
| 1161 | + if (ic->i_ack) { |
|---|
| 1162 | + rds_dma_hdr_free(ic->rds_ibdev->dev, ic->i_ack, |
|---|
| 1163 | + ic->i_ack_dma, DMA_TO_DEVICE); |
|---|
| 1164 | + ic->i_ack = NULL; |
|---|
| 1165 | + } |
|---|
| 1166 | + } else { |
|---|
| 1167 | + WARN_ON(ic->i_send_hdrs); |
|---|
| 1168 | + WARN_ON(ic->i_send_hdrs_dma); |
|---|
| 1169 | + WARN_ON(ic->i_recv_hdrs); |
|---|
| 1170 | + WARN_ON(ic->i_recv_hdrs_dma); |
|---|
| 1171 | + WARN_ON(ic->i_ack); |
|---|
| 1172 | + } |
|---|
| 1042 | 1173 | |
|---|
| 1043 | 1174 | if (ic->i_sends) |
|---|
| 1044 | 1175 | rds_ib_send_clear_ring(ic); |
|---|
| .. | .. |
|---|
| 1057 | 1188 | ic->i_pd = NULL; |
|---|
| 1058 | 1189 | ic->i_send_cq = NULL; |
|---|
| 1059 | 1190 | ic->i_recv_cq = NULL; |
|---|
| 1060 | | - ic->i_send_hdrs = NULL; |
|---|
| 1061 | | - ic->i_recv_hdrs = NULL; |
|---|
| 1062 | | - ic->i_ack = NULL; |
|---|
| 1063 | 1191 | } |
|---|
| 1064 | 1192 | BUG_ON(ic->rds_ibdev); |
|---|
| 1065 | 1193 | |
|---|
| .. | .. |
|---|
| 1085 | 1213 | ic->i_flowctl = 0; |
|---|
| 1086 | 1214 | atomic_set(&ic->i_credits, 0); |
|---|
| 1087 | 1215 | |
|---|
| 1088 | | - rds_ib_ring_init(&ic->i_send_ring, rds_ib_sysctl_max_send_wr); |
|---|
| 1089 | | - rds_ib_ring_init(&ic->i_recv_ring, rds_ib_sysctl_max_recv_wr); |
|---|
| 1216 | + /* Re-init rings, but retain sizes. */ |
|---|
| 1217 | + rds_ib_ring_init(&ic->i_send_ring, ic->i_send_ring.w_nr); |
|---|
| 1218 | + rds_ib_ring_init(&ic->i_recv_ring, ic->i_recv_ring.w_nr); |
|---|
| 1090 | 1219 | |
|---|
| 1091 | 1220 | if (ic->i_ibinc) { |
|---|
| 1092 | 1221 | rds_inc_put(&ic->i_ibinc->ii_inc); |
|---|
| .. | .. |
|---|
| 1127 | 1256 | spin_lock_init(&ic->i_ack_lock); |
|---|
| 1128 | 1257 | #endif |
|---|
| 1129 | 1258 | atomic_set(&ic->i_signaled_sends, 0); |
|---|
| 1259 | + atomic_set(&ic->i_fastreg_wrs, RDS_IB_DEFAULT_FR_WR); |
|---|
| 1130 | 1260 | |
|---|
| 1131 | 1261 | /* |
|---|
| 1132 | 1262 | * rds_ib_conn_shutdown() waits for these to be emptied so they |
|---|
| 1133 | 1263 | * must be initialized before it can be called. |
|---|
| 1134 | 1264 | */ |
|---|
| 1135 | | - rds_ib_ring_init(&ic->i_send_ring, rds_ib_sysctl_max_send_wr); |
|---|
| 1136 | | - rds_ib_ring_init(&ic->i_recv_ring, rds_ib_sysctl_max_recv_wr); |
|---|
| 1265 | + rds_ib_ring_init(&ic->i_send_ring, 0); |
|---|
| 1266 | + rds_ib_ring_init(&ic->i_recv_ring, 0); |
|---|
| 1137 | 1267 | |
|---|
| 1138 | 1268 | ic->conn = conn; |
|---|
| 1139 | 1269 | conn->c_transport_data = ic; |
|---|