.. | .. |
---|
1 | 1 | /* |
---|
2 | | - * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. |
---|
| 2 | + * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved. |
---|
3 | 3 | * |
---|
4 | 4 | * This software is available to you under a choice of one of two |
---|
5 | 5 | * licenses. You may choose to be licensed under the terms of the GNU |
---|
.. | .. |
---|
36 | 36 | #include <linux/vmalloc.h> |
---|
37 | 37 | #include <linux/ratelimit.h> |
---|
38 | 38 | #include <net/addrconf.h> |
---|
| 39 | +#include <rdma/ib_cm.h> |
---|
39 | 40 | |
---|
40 | 41 | #include "rds_single_path.h" |
---|
41 | 42 | #include "rds.h" |
---|
42 | 43 | #include "ib.h" |
---|
| 44 | +#include "ib_mr.h" |
---|
43 | 45 | |
---|
44 | 46 | /* |
---|
45 | 47 | * Set the selected protocol version |
---|
.. | .. |
---|
133 | 135 | rds_ib_set_flow_control(conn, be32_to_cpu(credit)); |
---|
134 | 136 | } |
---|
135 | 137 | |
---|
136 | | - if (conn->c_version < RDS_PROTOCOL(3, 1)) { |
---|
137 | | - pr_notice("RDS/IB: Connection <%pI6c,%pI6c> version %u.%u no longer supported\n", |
---|
138 | | - &conn->c_laddr, &conn->c_faddr, |
---|
139 | | - RDS_PROTOCOL_MAJOR(conn->c_version), |
---|
140 | | - RDS_PROTOCOL_MINOR(conn->c_version)); |
---|
141 | | - set_bit(RDS_DESTROY_PENDING, &conn->c_path[0].cp_flags); |
---|
142 | | - rds_conn_destroy(conn); |
---|
143 | | - return; |
---|
144 | | - } else { |
---|
145 | | - pr_notice("RDS/IB: %s conn connected <%pI6c,%pI6c> version %u.%u%s\n", |
---|
146 | | - ic->i_active_side ? "Active" : "Passive", |
---|
147 | | - &conn->c_laddr, &conn->c_faddr, |
---|
148 | | - RDS_PROTOCOL_MAJOR(conn->c_version), |
---|
149 | | - RDS_PROTOCOL_MINOR(conn->c_version), |
---|
150 | | - ic->i_flowctl ? ", flow control" : ""); |
---|
| 138 | + if (conn->c_version < RDS_PROTOCOL_VERSION) { |
---|
| 139 | + if (conn->c_version != RDS_PROTOCOL_COMPAT_VERSION) { |
---|
| 140 | + pr_notice("RDS/IB: Connection <%pI6c,%pI6c> version %u.%u no longer supported\n", |
---|
| 141 | + &conn->c_laddr, &conn->c_faddr, |
---|
| 142 | + RDS_PROTOCOL_MAJOR(conn->c_version), |
---|
| 143 | + RDS_PROTOCOL_MINOR(conn->c_version)); |
---|
| 144 | + rds_conn_destroy(conn); |
---|
| 145 | + return; |
---|
| 146 | + } |
---|
151 | 147 | } |
---|
| 148 | + |
---|
| 149 | + pr_notice("RDS/IB: %s conn connected <%pI6c,%pI6c,%d> version %u.%u%s\n", |
---|
| 150 | + ic->i_active_side ? "Active" : "Passive", |
---|
| 151 | + &conn->c_laddr, &conn->c_faddr, conn->c_tos, |
---|
| 152 | + RDS_PROTOCOL_MAJOR(conn->c_version), |
---|
| 153 | + RDS_PROTOCOL_MINOR(conn->c_version), |
---|
| 154 | + ic->i_flowctl ? ", flow control" : ""); |
---|
| 155 | + |
---|
| 156 | + /* receive sl from the peer */ |
---|
| 157 | + ic->i_sl = ic->i_cm_id->route.path_rec->sl; |
---|
152 | 158 | |
---|
153 | 159 | atomic_set(&ic->i_cq_quiesce, 0); |
---|
154 | 160 | |
---|
.. | .. |
---|
184 | 190 | NULL); |
---|
185 | 191 | } |
---|
186 | 192 | |
---|
| 193 | + conn->c_proposed_version = conn->c_version; |
---|
187 | 194 | rds_connect_complete(conn); |
---|
188 | 195 | } |
---|
189 | 196 | |
---|
.. | .. |
---|
220 | 227 | cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS); |
---|
221 | 228 | dp->ricp_v6.dp_ack_seq = |
---|
222 | 229 | cpu_to_be64(rds_ib_piggyb_ack(ic)); |
---|
| 230 | + dp->ricp_v6.dp_cmn.ricpc_dp_toss = conn->c_tos; |
---|
223 | 231 | |
---|
224 | 232 | conn_param->private_data = &dp->ricp_v6; |
---|
225 | 233 | conn_param->private_data_len = sizeof(dp->ricp_v6); |
---|
.. | .. |
---|
234 | 242 | cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS); |
---|
235 | 243 | dp->ricp_v4.dp_ack_seq = |
---|
236 | 244 | cpu_to_be64(rds_ib_piggyb_ack(ic)); |
---|
| 245 | + dp->ricp_v4.dp_cmn.ricpc_dp_toss = conn->c_tos; |
---|
237 | 246 | |
---|
238 | 247 | conn_param->private_data = &dp->ricp_v4; |
---|
239 | 248 | conn_param->private_data_len = sizeof(dp->ricp_v4); |
---|
.. | .. |
---|
389 | 398 | rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST); |
---|
390 | 399 | break; |
---|
391 | 400 | default: |
---|
392 | | - rdsdebug("Fatal QP Event %u (%s) " |
---|
393 | | - "- connection %pI6c->%pI6c, reconnecting\n", |
---|
394 | | - event->event, ib_event_msg(event->event), |
---|
395 | | - &conn->c_laddr, &conn->c_faddr); |
---|
| 401 | + rdsdebug("Fatal QP Event %u (%s) - connection %pI6c->%pI6c, reconnecting\n", |
---|
| 402 | + event->event, ib_event_msg(event->event), |
---|
| 403 | + &conn->c_laddr, &conn->c_faddr); |
---|
396 | 404 | rds_conn_drop(conn); |
---|
397 | 405 | break; |
---|
398 | 406 | } |
---|
.. | .. |
---|
432 | 440 | rds_ibdev->vector_load[index]--; |
---|
433 | 441 | } |
---|
434 | 442 | |
---|
| 443 | +static void rds_dma_hdr_free(struct ib_device *dev, struct rds_header *hdr, |
---|
| 444 | + dma_addr_t dma_addr, enum dma_data_direction dir) |
---|
| 445 | +{ |
---|
| 446 | + ib_dma_unmap_single(dev, dma_addr, sizeof(*hdr), dir); |
---|
| 447 | + kfree(hdr); |
---|
| 448 | +} |
---|
| 449 | + |
---|
| 450 | +static struct rds_header *rds_dma_hdr_alloc(struct ib_device *dev, |
---|
| 451 | + dma_addr_t *dma_addr, enum dma_data_direction dir) |
---|
| 452 | +{ |
---|
| 453 | + struct rds_header *hdr; |
---|
| 454 | + |
---|
| 455 | + hdr = kzalloc_node(sizeof(*hdr), GFP_KERNEL, ibdev_to_node(dev)); |
---|
| 456 | + if (!hdr) |
---|
| 457 | + return NULL; |
---|
| 458 | + |
---|
| 459 | + *dma_addr = ib_dma_map_single(dev, hdr, sizeof(*hdr), |
---|
| 460 | + DMA_BIDIRECTIONAL); |
---|
| 461 | + if (ib_dma_mapping_error(dev, *dma_addr)) { |
---|
| 462 | + kfree(hdr); |
---|
| 463 | + return NULL; |
---|
| 464 | + } |
---|
| 465 | + |
---|
| 466 | + return hdr; |
---|
| 467 | +} |
---|
| 468 | + |
---|
| 469 | +/* Free the DMA memory used to store struct rds_header. |
---|
| 470 | + * |
---|
| 471 | + * @dev: the RDS IB device |
---|
| 472 | + * @hdrs: pointer to the array storing DMA memory pointers |
---|
| 473 | + * @dma_addrs: pointer to the array storing DMA addresses |
---|
| 474 | + * @num_hdars: number of headers to free. |
---|
| 475 | + */ |
---|
| 476 | +static void rds_dma_hdrs_free(struct rds_ib_device *dev, |
---|
| 477 | + struct rds_header **hdrs, dma_addr_t *dma_addrs, u32 num_hdrs, |
---|
| 478 | + enum dma_data_direction dir) |
---|
| 479 | +{ |
---|
| 480 | + u32 i; |
---|
| 481 | + |
---|
| 482 | + for (i = 0; i < num_hdrs; i++) |
---|
| 483 | + rds_dma_hdr_free(dev->dev, hdrs[i], dma_addrs[i], dir); |
---|
| 484 | + kvfree(hdrs); |
---|
| 485 | + kvfree(dma_addrs); |
---|
| 486 | +} |
---|
| 487 | + |
---|
| 488 | + |
---|
| 489 | +/* Allocate DMA coherent memory to be used to store struct rds_header for |
---|
| 490 | + * sending/receiving packets. The pointers to the DMA memory and the |
---|
| 491 | + * associated DMA addresses are stored in two arrays. |
---|
| 492 | + * |
---|
| 493 | + * @dev: the RDS IB device |
---|
| 494 | + * @dma_addrs: pointer to the array for storing DMA addresses |
---|
| 495 | + * @num_hdrs: number of headers to allocate |
---|
| 496 | + * |
---|
| 497 | + * It returns the pointer to the array storing the DMA memory pointers. On |
---|
| 498 | + * error, NULL pointer is returned. |
---|
| 499 | + */ |
---|
| 500 | +static struct rds_header **rds_dma_hdrs_alloc(struct rds_ib_device *dev, |
---|
| 501 | + dma_addr_t **dma_addrs, u32 num_hdrs, |
---|
| 502 | + enum dma_data_direction dir) |
---|
| 503 | +{ |
---|
| 504 | + struct rds_header **hdrs; |
---|
| 505 | + dma_addr_t *hdr_daddrs; |
---|
| 506 | + u32 i; |
---|
| 507 | + |
---|
| 508 | + hdrs = kvmalloc_node(sizeof(*hdrs) * num_hdrs, GFP_KERNEL, |
---|
| 509 | + ibdev_to_node(dev->dev)); |
---|
| 510 | + if (!hdrs) |
---|
| 511 | + return NULL; |
---|
| 512 | + |
---|
| 513 | + hdr_daddrs = kvmalloc_node(sizeof(*hdr_daddrs) * num_hdrs, GFP_KERNEL, |
---|
| 514 | + ibdev_to_node(dev->dev)); |
---|
| 515 | + if (!hdr_daddrs) { |
---|
| 516 | + kvfree(hdrs); |
---|
| 517 | + return NULL; |
---|
| 518 | + } |
---|
| 519 | + |
---|
| 520 | + for (i = 0; i < num_hdrs; i++) { |
---|
| 521 | + hdrs[i] = rds_dma_hdr_alloc(dev->dev, &hdr_daddrs[i], dir); |
---|
| 522 | + if (!hdrs[i]) { |
---|
| 523 | + rds_dma_hdrs_free(dev, hdrs, hdr_daddrs, i, dir); |
---|
| 524 | + return NULL; |
---|
| 525 | + } |
---|
| 526 | + } |
---|
| 527 | + |
---|
| 528 | + *dma_addrs = hdr_daddrs; |
---|
| 529 | + return hdrs; |
---|
| 530 | +} |
---|
| 531 | + |
---|
435 | 532 | /* |
---|
436 | 533 | * This needs to be very careful to not leave IS_ERR pointers around for |
---|
437 | 534 | * cleanup to trip over. |
---|
.. | .. |
---|
443 | 540 | struct ib_qp_init_attr attr; |
---|
444 | 541 | struct ib_cq_init_attr cq_attr = {}; |
---|
445 | 542 | struct rds_ib_device *rds_ibdev; |
---|
| 543 | + unsigned long max_wrs; |
---|
446 | 544 | int ret, fr_queue_space; |
---|
447 | 545 | |
---|
448 | 546 | /* |
---|
.. | .. |
---|
454 | 552 | return -EOPNOTSUPP; |
---|
455 | 553 | |
---|
456 | 554 | /* The fr_queue_space is currently set to 512, to add extra space on |
---|
457 | | - * completion queue and send queue. This extra space is used for FRMR |
---|
| 555 | + * completion queue and send queue. This extra space is used for FRWR |
---|
458 | 556 | * registration and invalidation work requests |
---|
459 | 557 | */ |
---|
460 | | - fr_queue_space = rds_ibdev->use_fastreg ? |
---|
461 | | - (RDS_IB_DEFAULT_FR_WR + 1) + |
---|
462 | | - (RDS_IB_DEFAULT_FR_INV_WR + 1) |
---|
463 | | - : 0; |
---|
| 558 | + fr_queue_space = RDS_IB_DEFAULT_FR_WR; |
---|
464 | 559 | |
---|
465 | 560 | /* add the conn now so that connection establishment has the dev */ |
---|
466 | 561 | rds_ib_add_conn(rds_ibdev, conn); |
---|
467 | 562 | |
---|
468 | | - if (rds_ibdev->max_wrs < ic->i_send_ring.w_nr + 1) |
---|
469 | | - rds_ib_ring_resize(&ic->i_send_ring, rds_ibdev->max_wrs - 1); |
---|
470 | | - if (rds_ibdev->max_wrs < ic->i_recv_ring.w_nr + 1) |
---|
471 | | - rds_ib_ring_resize(&ic->i_recv_ring, rds_ibdev->max_wrs - 1); |
---|
| 563 | + max_wrs = rds_ibdev->max_wrs < rds_ib_sysctl_max_send_wr + 1 ? |
---|
| 564 | + rds_ibdev->max_wrs - 1 : rds_ib_sysctl_max_send_wr; |
---|
| 565 | + if (ic->i_send_ring.w_nr != max_wrs) |
---|
| 566 | + rds_ib_ring_resize(&ic->i_send_ring, max_wrs); |
---|
| 567 | + |
---|
| 568 | + max_wrs = rds_ibdev->max_wrs < rds_ib_sysctl_max_recv_wr + 1 ? |
---|
| 569 | + rds_ibdev->max_wrs - 1 : rds_ib_sysctl_max_recv_wr; |
---|
| 570 | + if (ic->i_recv_ring.w_nr != max_wrs) |
---|
| 571 | + rds_ib_ring_resize(&ic->i_recv_ring, max_wrs); |
---|
472 | 572 | |
---|
473 | 573 | /* Protection domain and memory range */ |
---|
474 | 574 | ic->i_pd = rds_ibdev->pd; |
---|
.. | .. |
---|
526 | 626 | attr.qp_type = IB_QPT_RC; |
---|
527 | 627 | attr.send_cq = ic->i_send_cq; |
---|
528 | 628 | attr.recv_cq = ic->i_recv_cq; |
---|
529 | | - atomic_set(&ic->i_fastreg_wrs, RDS_IB_DEFAULT_FR_WR); |
---|
530 | | - atomic_set(&ic->i_fastunreg_wrs, RDS_IB_DEFAULT_FR_INV_WR); |
---|
531 | 629 | |
---|
532 | 630 | /* |
---|
533 | 631 | * XXX this can fail if max_*_wr is too large? Are we supposed |
---|
.. | .. |
---|
539 | 637 | goto recv_cq_out; |
---|
540 | 638 | } |
---|
541 | 639 | |
---|
542 | | - ic->i_send_hdrs = ib_dma_alloc_coherent(dev, |
---|
543 | | - ic->i_send_ring.w_nr * |
---|
544 | | - sizeof(struct rds_header), |
---|
545 | | - &ic->i_send_hdrs_dma, GFP_KERNEL); |
---|
| 640 | + ic->i_send_hdrs = rds_dma_hdrs_alloc(rds_ibdev, &ic->i_send_hdrs_dma, |
---|
| 641 | + ic->i_send_ring.w_nr, |
---|
| 642 | + DMA_TO_DEVICE); |
---|
546 | 643 | if (!ic->i_send_hdrs) { |
---|
547 | 644 | ret = -ENOMEM; |
---|
548 | | - rdsdebug("ib_dma_alloc_coherent send failed\n"); |
---|
| 645 | + rdsdebug("DMA send hdrs alloc failed\n"); |
---|
549 | 646 | goto qp_out; |
---|
550 | 647 | } |
---|
551 | 648 | |
---|
552 | | - ic->i_recv_hdrs = ib_dma_alloc_coherent(dev, |
---|
553 | | - ic->i_recv_ring.w_nr * |
---|
554 | | - sizeof(struct rds_header), |
---|
555 | | - &ic->i_recv_hdrs_dma, GFP_KERNEL); |
---|
| 649 | + ic->i_recv_hdrs = rds_dma_hdrs_alloc(rds_ibdev, &ic->i_recv_hdrs_dma, |
---|
| 650 | + ic->i_recv_ring.w_nr, |
---|
| 651 | + DMA_FROM_DEVICE); |
---|
556 | 652 | if (!ic->i_recv_hdrs) { |
---|
557 | 653 | ret = -ENOMEM; |
---|
558 | | - rdsdebug("ib_dma_alloc_coherent recv failed\n"); |
---|
| 654 | + rdsdebug("DMA recv hdrs alloc failed\n"); |
---|
559 | 655 | goto send_hdrs_dma_out; |
---|
560 | 656 | } |
---|
561 | 657 | |
---|
562 | | - ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header), |
---|
563 | | - &ic->i_ack_dma, GFP_KERNEL); |
---|
| 658 | + ic->i_ack = rds_dma_hdr_alloc(rds_ibdev->dev, &ic->i_ack_dma, |
---|
| 659 | + DMA_TO_DEVICE); |
---|
564 | 660 | if (!ic->i_ack) { |
---|
565 | 661 | ret = -ENOMEM; |
---|
566 | | - rdsdebug("ib_dma_alloc_coherent ack failed\n"); |
---|
| 662 | + rdsdebug("DMA ack header alloc failed\n"); |
---|
567 | 663 | goto recv_hdrs_dma_out; |
---|
568 | 664 | } |
---|
569 | 665 | |
---|
.. | .. |
---|
594 | 690 | |
---|
595 | 691 | sends_out: |
---|
596 | 692 | vfree(ic->i_sends); |
---|
| 693 | + |
---|
597 | 694 | ack_dma_out: |
---|
598 | | - ib_dma_free_coherent(dev, sizeof(struct rds_header), |
---|
599 | | - ic->i_ack, ic->i_ack_dma); |
---|
| 695 | + rds_dma_hdr_free(rds_ibdev->dev, ic->i_ack, ic->i_ack_dma, |
---|
| 696 | + DMA_TO_DEVICE); |
---|
| 697 | + ic->i_ack = NULL; |
---|
| 698 | + |
---|
600 | 699 | recv_hdrs_dma_out: |
---|
601 | | - ib_dma_free_coherent(dev, ic->i_recv_ring.w_nr * |
---|
602 | | - sizeof(struct rds_header), |
---|
603 | | - ic->i_recv_hdrs, ic->i_recv_hdrs_dma); |
---|
| 700 | + rds_dma_hdrs_free(rds_ibdev, ic->i_recv_hdrs, ic->i_recv_hdrs_dma, |
---|
| 701 | + ic->i_recv_ring.w_nr, DMA_FROM_DEVICE); |
---|
| 702 | + ic->i_recv_hdrs = NULL; |
---|
| 703 | + ic->i_recv_hdrs_dma = NULL; |
---|
| 704 | + |
---|
604 | 705 | send_hdrs_dma_out: |
---|
605 | | - ib_dma_free_coherent(dev, ic->i_send_ring.w_nr * |
---|
606 | | - sizeof(struct rds_header), |
---|
607 | | - ic->i_send_hdrs, ic->i_send_hdrs_dma); |
---|
| 706 | + rds_dma_hdrs_free(rds_ibdev, ic->i_send_hdrs, ic->i_send_hdrs_dma, |
---|
| 707 | + ic->i_send_ring.w_nr, DMA_TO_DEVICE); |
---|
| 708 | + ic->i_send_hdrs = NULL; |
---|
| 709 | + ic->i_send_hdrs_dma = NULL; |
---|
| 710 | + |
---|
608 | 711 | qp_out: |
---|
609 | 712 | rdma_destroy_qp(ic->i_cm_id); |
---|
610 | 713 | recv_cq_out: |
---|
611 | | - if (!ib_destroy_cq(ic->i_recv_cq)) |
---|
612 | | - ic->i_recv_cq = NULL; |
---|
| 714 | + ib_destroy_cq(ic->i_recv_cq); |
---|
| 715 | + ic->i_recv_cq = NULL; |
---|
613 | 716 | send_cq_out: |
---|
614 | | - if (!ib_destroy_cq(ic->i_send_cq)) |
---|
615 | | - ic->i_send_cq = NULL; |
---|
| 717 | + ib_destroy_cq(ic->i_send_cq); |
---|
| 718 | + ic->i_send_cq = NULL; |
---|
616 | 719 | rds_ibdev_out: |
---|
617 | 720 | rds_ib_remove_conn(rds_ibdev, conn); |
---|
618 | 721 | out: |
---|
.. | .. |
---|
635 | 738 | * original size. The only way to tell the difference is by looking at |
---|
636 | 739 | * the contents, which are initialized to zero. |
---|
637 | 740 | * If the protocol version fields aren't set, this is a connection attempt |
---|
638 | | - * from an older version. This could could be 3.0 or 2.0 - we can't tell. |
---|
| 741 | + * from an older version. This could be 3.0 or 2.0 - we can't tell. |
---|
639 | 742 | * We really should have changed this for OFED 1.3 :-( |
---|
640 | 743 | */ |
---|
641 | 744 | |
---|
.. | .. |
---|
660 | 763 | |
---|
661 | 764 | /* Even if len is crap *now* I still want to check it. -ASG */ |
---|
662 | 765 | if (event->param.conn.private_data_len < data_len || major == 0) |
---|
663 | | - return RDS_PROTOCOL_3_0; |
---|
| 766 | + return RDS_PROTOCOL_4_0; |
---|
664 | 767 | |
---|
665 | 768 | common = be16_to_cpu(mask) & RDS_IB_SUPPORTED_PROTOCOLS; |
---|
666 | | - if (major == 3 && common) { |
---|
667 | | - version = RDS_PROTOCOL_3_0; |
---|
| 769 | + if (major == 4 && common) { |
---|
| 770 | + version = RDS_PROTOCOL_4_0; |
---|
668 | 771 | while ((common >>= 1) != 0) |
---|
669 | 772 | version++; |
---|
| 773 | + } else if (RDS_PROTOCOL_COMPAT_VERSION == |
---|
| 774 | + RDS_PROTOCOL(major, minor)) { |
---|
| 775 | + version = RDS_PROTOCOL_COMPAT_VERSION; |
---|
670 | 776 | } else { |
---|
671 | 777 | if (isv6) |
---|
672 | 778 | printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI6c using incompatible protocol version %u.%u\n", |
---|
.. | .. |
---|
729 | 835 | |
---|
730 | 836 | /* Check whether the remote protocol version matches ours. */ |
---|
731 | 837 | version = rds_ib_protocol_compatible(event, isv6); |
---|
732 | | - if (!version) |
---|
| 838 | + if (!version) { |
---|
| 839 | + err = RDS_RDMA_REJ_INCOMPAT; |
---|
733 | 840 | goto out; |
---|
| 841 | + } |
---|
734 | 842 | |
---|
735 | 843 | dp = event->param.conn.private_data; |
---|
736 | 844 | if (isv6) { |
---|
.. | .. |
---|
771 | 879 | daddr6 = &d_mapped_addr; |
---|
772 | 880 | } |
---|
773 | 881 | |
---|
774 | | - rdsdebug("saddr %pI6c daddr %pI6c RDSv%u.%u lguid 0x%llx fguid " |
---|
775 | | - "0x%llx\n", saddr6, daddr6, |
---|
776 | | - RDS_PROTOCOL_MAJOR(version), RDS_PROTOCOL_MINOR(version), |
---|
| 882 | + rdsdebug("saddr %pI6c daddr %pI6c RDSv%u.%u lguid 0x%llx fguid 0x%llx, tos:%d\n", |
---|
| 883 | + saddr6, daddr6, RDS_PROTOCOL_MAJOR(version), |
---|
| 884 | + RDS_PROTOCOL_MINOR(version), |
---|
777 | 885 | (unsigned long long)be64_to_cpu(lguid), |
---|
778 | | - (unsigned long long)be64_to_cpu(fguid)); |
---|
| 886 | + (unsigned long long)be64_to_cpu(fguid), dp_cmn->ricpc_dp_toss); |
---|
779 | 887 | |
---|
780 | 888 | /* RDS/IB is not currently netns aware, thus init_net */ |
---|
781 | 889 | conn = rds_conn_create(&init_net, daddr6, saddr6, |
---|
782 | | - &rds_ib_transport, GFP_KERNEL, ifindex); |
---|
| 890 | + &rds_ib_transport, dp_cmn->ricpc_dp_toss, |
---|
| 891 | + GFP_KERNEL, ifindex); |
---|
783 | 892 | if (IS_ERR(conn)) { |
---|
784 | 893 | rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn)); |
---|
785 | 894 | conn = NULL; |
---|
.. | .. |
---|
846 | 955 | if (conn) |
---|
847 | 956 | mutex_unlock(&conn->c_cm_lock); |
---|
848 | 957 | if (err) |
---|
849 | | - rdma_reject(cm_id, NULL, 0); |
---|
| 958 | + rdma_reject(cm_id, &err, sizeof(int), |
---|
| 959 | + IB_CM_REJ_CONSUMER_DEFINED); |
---|
850 | 960 | return destroy; |
---|
851 | 961 | } |
---|
852 | 962 | |
---|
.. | .. |
---|
861 | 971 | |
---|
862 | 972 | /* If the peer doesn't do protocol negotiation, we must |
---|
863 | 973 | * default to RDSv3.0 */ |
---|
864 | | - rds_ib_set_protocol(conn, RDS_PROTOCOL_3_0); |
---|
| 974 | + rds_ib_set_protocol(conn, RDS_PROTOCOL_4_1); |
---|
865 | 975 | ic->i_flowctl = rds_ib_sysctl_flow_control; /* advertise flow control */ |
---|
866 | 976 | |
---|
867 | 977 | ret = rds_ib_setup_qp(conn); |
---|
.. | .. |
---|
870 | 980 | goto out; |
---|
871 | 981 | } |
---|
872 | 982 | |
---|
873 | | - rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION, |
---|
| 983 | + rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, |
---|
| 984 | + conn->c_proposed_version, |
---|
874 | 985 | UINT_MAX, UINT_MAX, isv6); |
---|
875 | | - ret = rdma_connect(cm_id, &conn_param); |
---|
| 986 | + ret = rdma_connect_locked(cm_id, &conn_param); |
---|
876 | 987 | if (ret) |
---|
877 | | - rds_ib_conn_error(conn, "rdma_connect failed (%d)\n", ret); |
---|
| 988 | + rds_ib_conn_error(conn, "rdma_connect_locked failed (%d)\n", |
---|
| 989 | + ret); |
---|
878 | 990 | |
---|
879 | 991 | out: |
---|
880 | 992 | /* Beware - returning non-zero tells the rdma_cm to destroy |
---|
.. | .. |
---|
975 | 1087 | ic->i_cm_id ? ic->i_cm_id->qp : NULL); |
---|
976 | 1088 | |
---|
977 | 1089 | if (ic->i_cm_id) { |
---|
978 | | - struct ib_device *dev = ic->i_cm_id->device; |
---|
979 | | - |
---|
980 | 1090 | rdsdebug("disconnecting cm %p\n", ic->i_cm_id); |
---|
981 | 1091 | err = rdma_disconnect(ic->i_cm_id); |
---|
982 | 1092 | if (err) { |
---|
.. | .. |
---|
986 | 1096 | rdsdebug("failed to disconnect, cm: %p err %d\n", |
---|
987 | 1097 | ic->i_cm_id, err); |
---|
988 | 1098 | } |
---|
| 1099 | + |
---|
| 1100 | + /* kick off "flush_worker" for all pools in order to reap |
---|
| 1101 | + * all FRMR registrations that are still marked "FRMR_IS_INUSE" |
---|
| 1102 | + */ |
---|
| 1103 | + rds_ib_flush_mrs(); |
---|
989 | 1104 | |
---|
990 | 1105 | /* |
---|
991 | 1106 | * We want to wait for tx and rx completion to finish |
---|
.. | .. |
---|
999 | 1114 | wait_event(rds_ib_ring_empty_wait, |
---|
1000 | 1115 | rds_ib_ring_empty(&ic->i_recv_ring) && |
---|
1001 | 1116 | (atomic_read(&ic->i_signaled_sends) == 0) && |
---|
1002 | | - (atomic_read(&ic->i_fastreg_wrs) == RDS_IB_DEFAULT_FR_WR) && |
---|
1003 | | - (atomic_read(&ic->i_fastunreg_wrs) == RDS_IB_DEFAULT_FR_INV_WR)); |
---|
| 1117 | + (atomic_read(&ic->i_fastreg_inuse_count) == 0) && |
---|
| 1118 | + (atomic_read(&ic->i_fastreg_wrs) == RDS_IB_DEFAULT_FR_WR)); |
---|
1004 | 1119 | tasklet_kill(&ic->i_send_tasklet); |
---|
1005 | 1120 | tasklet_kill(&ic->i_recv_tasklet); |
---|
1006 | 1121 | |
---|
.. | .. |
---|
1021 | 1136 | ib_destroy_cq(ic->i_recv_cq); |
---|
1022 | 1137 | } |
---|
1023 | 1138 | |
---|
1024 | | - /* then free the resources that ib callbacks use */ |
---|
1025 | | - if (ic->i_send_hdrs) |
---|
1026 | | - ib_dma_free_coherent(dev, |
---|
1027 | | - ic->i_send_ring.w_nr * |
---|
1028 | | - sizeof(struct rds_header), |
---|
1029 | | - ic->i_send_hdrs, |
---|
1030 | | - ic->i_send_hdrs_dma); |
---|
| 1139 | + if (ic->rds_ibdev) { |
---|
| 1140 | + /* then free the resources that ib callbacks use */ |
---|
| 1141 | + if (ic->i_send_hdrs) { |
---|
| 1142 | + rds_dma_hdrs_free(ic->rds_ibdev, |
---|
| 1143 | + ic->i_send_hdrs, |
---|
| 1144 | + ic->i_send_hdrs_dma, |
---|
| 1145 | + ic->i_send_ring.w_nr, |
---|
| 1146 | + DMA_TO_DEVICE); |
---|
| 1147 | + ic->i_send_hdrs = NULL; |
---|
| 1148 | + ic->i_send_hdrs_dma = NULL; |
---|
| 1149 | + } |
---|
1031 | 1150 | |
---|
1032 | | - if (ic->i_recv_hdrs) |
---|
1033 | | - ib_dma_free_coherent(dev, |
---|
1034 | | - ic->i_recv_ring.w_nr * |
---|
1035 | | - sizeof(struct rds_header), |
---|
1036 | | - ic->i_recv_hdrs, |
---|
1037 | | - ic->i_recv_hdrs_dma); |
---|
| 1151 | + if (ic->i_recv_hdrs) { |
---|
| 1152 | + rds_dma_hdrs_free(ic->rds_ibdev, |
---|
| 1153 | + ic->i_recv_hdrs, |
---|
| 1154 | + ic->i_recv_hdrs_dma, |
---|
| 1155 | + ic->i_recv_ring.w_nr, |
---|
| 1156 | + DMA_FROM_DEVICE); |
---|
| 1157 | + ic->i_recv_hdrs = NULL; |
---|
| 1158 | + ic->i_recv_hdrs_dma = NULL; |
---|
| 1159 | + } |
---|
1038 | 1160 | |
---|
1039 | | - if (ic->i_ack) |
---|
1040 | | - ib_dma_free_coherent(dev, sizeof(struct rds_header), |
---|
1041 | | - ic->i_ack, ic->i_ack_dma); |
---|
| 1161 | + if (ic->i_ack) { |
---|
| 1162 | + rds_dma_hdr_free(ic->rds_ibdev->dev, ic->i_ack, |
---|
| 1163 | + ic->i_ack_dma, DMA_TO_DEVICE); |
---|
| 1164 | + ic->i_ack = NULL; |
---|
| 1165 | + } |
---|
| 1166 | + } else { |
---|
| 1167 | + WARN_ON(ic->i_send_hdrs); |
---|
| 1168 | + WARN_ON(ic->i_send_hdrs_dma); |
---|
| 1169 | + WARN_ON(ic->i_recv_hdrs); |
---|
| 1170 | + WARN_ON(ic->i_recv_hdrs_dma); |
---|
| 1171 | + WARN_ON(ic->i_ack); |
---|
| 1172 | + } |
---|
1042 | 1173 | |
---|
1043 | 1174 | if (ic->i_sends) |
---|
1044 | 1175 | rds_ib_send_clear_ring(ic); |
---|
.. | .. |
---|
1057 | 1188 | ic->i_pd = NULL; |
---|
1058 | 1189 | ic->i_send_cq = NULL; |
---|
1059 | 1190 | ic->i_recv_cq = NULL; |
---|
1060 | | - ic->i_send_hdrs = NULL; |
---|
1061 | | - ic->i_recv_hdrs = NULL; |
---|
1062 | | - ic->i_ack = NULL; |
---|
1063 | 1191 | } |
---|
1064 | 1192 | BUG_ON(ic->rds_ibdev); |
---|
1065 | 1193 | |
---|
.. | .. |
---|
1085 | 1213 | ic->i_flowctl = 0; |
---|
1086 | 1214 | atomic_set(&ic->i_credits, 0); |
---|
1087 | 1215 | |
---|
1088 | | - rds_ib_ring_init(&ic->i_send_ring, rds_ib_sysctl_max_send_wr); |
---|
1089 | | - rds_ib_ring_init(&ic->i_recv_ring, rds_ib_sysctl_max_recv_wr); |
---|
| 1216 | + /* Re-init rings, but retain sizes. */ |
---|
| 1217 | + rds_ib_ring_init(&ic->i_send_ring, ic->i_send_ring.w_nr); |
---|
| 1218 | + rds_ib_ring_init(&ic->i_recv_ring, ic->i_recv_ring.w_nr); |
---|
1090 | 1219 | |
---|
1091 | 1220 | if (ic->i_ibinc) { |
---|
1092 | 1221 | rds_inc_put(&ic->i_ibinc->ii_inc); |
---|
.. | .. |
---|
1127 | 1256 | spin_lock_init(&ic->i_ack_lock); |
---|
1128 | 1257 | #endif |
---|
1129 | 1258 | atomic_set(&ic->i_signaled_sends, 0); |
---|
| 1259 | + atomic_set(&ic->i_fastreg_wrs, RDS_IB_DEFAULT_FR_WR); |
---|
1130 | 1260 | |
---|
1131 | 1261 | /* |
---|
1132 | 1262 | * rds_ib_conn_shutdown() waits for these to be emptied so they |
---|
1133 | 1263 | * must be initialized before it can be called. |
---|
1134 | 1264 | */ |
---|
1135 | | - rds_ib_ring_init(&ic->i_send_ring, rds_ib_sysctl_max_send_wr); |
---|
1136 | | - rds_ib_ring_init(&ic->i_recv_ring, rds_ib_sysctl_max_recv_wr); |
---|
| 1265 | + rds_ib_ring_init(&ic->i_send_ring, 0); |
---|
| 1266 | + rds_ib_ring_init(&ic->i_recv_ring, 0); |
---|
1137 | 1267 | |
---|
1138 | 1268 | ic->conn = conn; |
---|
1139 | 1269 | conn->c_transport_data = ic; |
---|