hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/net/rds/ib_send.c
....@@ -1,5 +1,5 @@
11 /*
2
- * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
2
+ * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
33 *
44 * This software is available to you under a choice of one of two
55 * licenses. You may choose to be licensed under the terms of the GNU
....@@ -39,6 +39,7 @@
3939 #include "rds_single_path.h"
4040 #include "rds.h"
4141 #include "ib.h"
42
+#include "ib_mr.h"
4243
4344 /*
4445 * Convert IB-specific error message to RDS error message and call core
....@@ -67,6 +68,16 @@
6768 break;
6869 }
6970 complete(rm, notify_status);
71
+}
72
+
73
+static void rds_ib_send_unmap_data(struct rds_ib_connection *ic,
74
+ struct rm_data_op *op,
75
+ int wc_status)
76
+{
77
+ if (op->op_nents)
78
+ ib_dma_unmap_sg(ic->i_cm_id->device,
79
+ op->op_sg, op->op_nents,
80
+ DMA_TO_DEVICE);
7081 }
7182
7283 static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic,
....@@ -127,21 +138,6 @@
127138 rds_ib_stats_inc(s_ib_atomic_cswp);
128139 else
129140 rds_ib_stats_inc(s_ib_atomic_fadd);
130
-}
131
-
132
-static void rds_ib_send_unmap_data(struct rds_ib_connection *ic,
133
- struct rm_data_op *op,
134
- int wc_status)
135
-{
136
- struct rds_message *rm = container_of(op, struct rds_message, data);
137
-
138
- if (op->op_nents)
139
- ib_dma_unmap_sg(ic->i_cm_id->device,
140
- op->op_sg, op->op_nents,
141
- DMA_TO_DEVICE);
142
-
143
- if (rm->rdma.op_active && rm->data.op_notify)
144
- rds_ib_send_unmap_rdma(ic, &rm->rdma, wc_status);
145141 }
146142
147143 /*
....@@ -206,7 +202,8 @@
206202 send->s_wr.ex.imm_data = 0;
207203
208204 sge = &send->s_sge[0];
209
- sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header));
205
+ sge->addr = ic->i_send_hdrs_dma[i];
206
+
210207 sge->length = sizeof(struct rds_header);
211208 sge->lkey = ic->i_pd->local_dma_lkey;
212209
....@@ -305,9 +302,10 @@
305302
306303 /* We expect errors as the qp is drained during shutdown */
307304 if (wc->status != IB_WC_SUCCESS && rds_conn_up(conn)) {
308
- rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c> had status %u (%s), disconnecting and reconnecting\n",
309
- &conn->c_laddr, &conn->c_faddr, wc->status,
310
- ib_wc_status_msg(wc->status));
305
+ rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c,%d> had status %u (%s), vendor err 0x%x, disconnecting and reconnecting\n",
306
+ &conn->c_laddr, &conn->c_faddr,
307
+ conn->c_tos, wc->status,
308
+ ib_wc_status_msg(wc->status), wc->vendor_err);
311309 }
312310 }
313311
....@@ -522,7 +520,7 @@
522520 if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0)
523521 i = 1;
524522 else
525
- i = ceil(be32_to_cpu(rm->m_inc.i_hdr.h_len), RDS_FRAG_SIZE);
523
+ i = DIV_ROUND_UP(be32_to_cpu(rm->m_inc.i_hdr.h_len), RDS_FRAG_SIZE);
526524
527525 work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos);
528526 if (work_alloc == 0) {
....@@ -635,26 +633,34 @@
635633 send->s_queued = jiffies;
636634 send->s_op = NULL;
637635
638
- send->s_sge[0].addr = ic->i_send_hdrs_dma
639
- + (pos * sizeof(struct rds_header));
640
- send->s_sge[0].length = sizeof(struct rds_header);
636
+ send->s_sge[0].addr = ic->i_send_hdrs_dma[pos];
641637
642
- memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header));
638
+ send->s_sge[0].length = sizeof(struct rds_header);
639
+ send->s_sge[0].lkey = ic->i_pd->local_dma_lkey;
640
+
641
+ ib_dma_sync_single_for_cpu(ic->rds_ibdev->dev,
642
+ ic->i_send_hdrs_dma[pos],
643
+ sizeof(struct rds_header),
644
+ DMA_TO_DEVICE);
645
+ memcpy(ic->i_send_hdrs[pos], &rm->m_inc.i_hdr,
646
+ sizeof(struct rds_header));
647
+
643648
644649 /* Set up the data, if present */
645650 if (i < work_alloc
646651 && scat != &rm->data.op_sg[rm->data.op_count]) {
647652 len = min(RDS_FRAG_SIZE,
648
- ib_sg_dma_len(dev, scat) - rm->data.op_dmaoff);
653
+ sg_dma_len(scat) - rm->data.op_dmaoff);
649654 send->s_wr.num_sge = 2;
650655
651
- send->s_sge[1].addr = ib_sg_dma_address(dev, scat);
656
+ send->s_sge[1].addr = sg_dma_address(scat);
652657 send->s_sge[1].addr += rm->data.op_dmaoff;
653658 send->s_sge[1].length = len;
659
+ send->s_sge[1].lkey = ic->i_pd->local_dma_lkey;
654660
655661 bytes_sent += len;
656662 rm->data.op_dmaoff += len;
657
- if (rm->data.op_dmaoff == ib_sg_dma_len(dev, scat)) {
663
+ if (rm->data.op_dmaoff == sg_dma_len(scat)) {
658664 scat++;
659665 rm->data.op_dmasg++;
660666 rm->data.op_dmaoff = 0;
....@@ -678,7 +684,7 @@
678684 &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
679685
680686 if (ic->i_flowctl && adv_credits) {
681
- struct rds_header *hdr = &ic->i_send_hdrs[pos];
687
+ struct rds_header *hdr = ic->i_send_hdrs[pos];
682688
683689 /* add credit and redo the header checksum */
684690 hdr->h_credit = adv_credits;
....@@ -686,6 +692,10 @@
686692 adv_credits = 0;
687693 rds_ib_stats_inc(s_ib_tx_credit_updates);
688694 }
695
+ ib_dma_sync_single_for_device(ic->rds_ibdev->dev,
696
+ ic->i_send_hdrs_dma[pos],
697
+ sizeof(struct rds_header),
698
+ DMA_TO_DEVICE);
689699
690700 if (prev)
691701 prev->s_wr.next = &send->s_wr;
....@@ -808,8 +818,8 @@
808818 }
809819
810820 /* Convert our struct scatterlist to struct ib_sge */
811
- send->s_sge[0].addr = ib_sg_dma_address(ic->i_cm_id->device, op->op_sg);
812
- send->s_sge[0].length = ib_sg_dma_len(ic->i_cm_id->device, op->op_sg);
821
+ send->s_sge[0].addr = sg_dma_address(op->op_sg);
822
+ send->s_sge[0].length = sg_dma_len(op->op_sg);
813823 send->s_sge[0].lkey = ic->i_pd->local_dma_lkey;
814824
815825 rdsdebug("rva %Lx rpa %Lx len %u\n", op->op_remote_addr,
....@@ -859,27 +869,36 @@
859869 int ret;
860870 int num_sge;
861871 int nr_sig = 0;
872
+ u64 odp_addr = op->op_odp_addr;
873
+ u32 odp_lkey = 0;
862874
863875 /* map the op the first time we see it */
864
- if (!op->op_mapped) {
865
- op->op_count = ib_dma_map_sg(ic->i_cm_id->device,
866
- op->op_sg, op->op_nents, (op->op_write) ?
867
- DMA_TO_DEVICE : DMA_FROM_DEVICE);
868
- rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->op_count);
869
- if (op->op_count == 0) {
870
- rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
871
- ret = -ENOMEM; /* XXX ? */
872
- goto out;
876
+ if (!op->op_odp_mr) {
877
+ if (!op->op_mapped) {
878
+ op->op_count =
879
+ ib_dma_map_sg(ic->i_cm_id->device, op->op_sg,
880
+ op->op_nents,
881
+ (op->op_write) ? DMA_TO_DEVICE :
882
+ DMA_FROM_DEVICE);
883
+ rdsdebug("ic %p mapping op %p: %d\n", ic, op,
884
+ op->op_count);
885
+ if (op->op_count == 0) {
886
+ rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
887
+ ret = -ENOMEM; /* XXX ? */
888
+ goto out;
889
+ }
890
+ op->op_mapped = 1;
873891 }
874
-
875
- op->op_mapped = 1;
892
+ } else {
893
+ op->op_count = op->op_nents;
894
+ odp_lkey = rds_ib_get_lkey(op->op_odp_mr->r_trans_private);
876895 }
877896
878897 /*
879898 * Instead of knowing how to return a partial rdma read/write we insist that there
880899 * be enough work requests to send the entire message.
881900 */
882
- i = ceil(op->op_count, max_sge);
901
+ i = DIV_ROUND_UP(op->op_count, max_sge);
883902
884903 work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos);
885904 if (work_alloc != i) {
....@@ -901,7 +920,9 @@
901920 send->s_queued = jiffies;
902921 send->s_op = NULL;
903922
904
- nr_sig += rds_ib_set_wr_signal_state(ic, send, op->op_notify);
923
+ if (!op->op_notify)
924
+ nr_sig += rds_ib_set_wr_signal_state(ic, send,
925
+ op->op_notify);
905926
906927 send->s_wr.opcode = op->op_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
907928 send->s_rdma_wr.remote_addr = remote_addr;
....@@ -921,16 +942,21 @@
921942
922943 for (j = 0; j < send->s_rdma_wr.wr.num_sge &&
923944 scat != &op->op_sg[op->op_count]; j++) {
924
- len = ib_sg_dma_len(ic->i_cm_id->device, scat);
925
- send->s_sge[j].addr =
926
- ib_sg_dma_address(ic->i_cm_id->device, scat);
945
+ len = sg_dma_len(scat);
946
+ if (!op->op_odp_mr) {
947
+ send->s_sge[j].addr = sg_dma_address(scat);
948
+ send->s_sge[j].lkey = ic->i_pd->local_dma_lkey;
949
+ } else {
950
+ send->s_sge[j].addr = odp_addr;
951
+ send->s_sge[j].lkey = odp_lkey;
952
+ }
927953 send->s_sge[j].length = len;
928
- send->s_sge[j].lkey = ic->i_pd->local_dma_lkey;
929954
930955 sent += len;
931956 rdsdebug("ic %p sent %d remote_addr %llu\n", ic, sent, remote_addr);
932957
933958 remote_addr += len;
959
+ odp_addr += len;
934960 scat++;
935961 }
936962