hc
2024-10-22 8ac6c7a54ed1b98d142dce24b11c6de6a1e239a5
kernel/net/sunrpc/xprtrdma/svc_rdma_transport.c
....@@ -55,7 +55,6 @@
5555
5656 #include <linux/sunrpc/addr.h>
5757 #include <linux/sunrpc/debug.h>
58
-#include <linux/sunrpc/rpc_rdma.h>
5958 #include <linux/sunrpc/svc_xprt.h>
6059 #include <linux/sunrpc/svc_rdma.h>
6160
....@@ -81,10 +80,10 @@
8180 .xpo_create = svc_rdma_create,
8281 .xpo_recvfrom = svc_rdma_recvfrom,
8382 .xpo_sendto = svc_rdma_sendto,
83
+ .xpo_read_payload = svc_rdma_read_payload,
8484 .xpo_release_rqst = svc_rdma_release_rqst,
8585 .xpo_detach = svc_rdma_detach,
8686 .xpo_free = svc_rdma_free,
87
- .xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr,
8887 .xpo_has_wspace = svc_rdma_has_wspace,
8988 .xpo_accept = svc_rdma_accept,
9089 .xpo_secure_port = svc_rdma_secure_port,
....@@ -98,64 +97,6 @@
9897 .xcl_max_payload = RPCSVC_MAXPAYLOAD_RDMA,
9998 .xcl_ident = XPRT_TRANSPORT_RDMA,
10099 };
101
-
102
-#if defined(CONFIG_SUNRPC_BACKCHANNEL)
103
-static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *, struct net *,
104
- struct sockaddr *, int, int);
105
-static void svc_rdma_bc_detach(struct svc_xprt *);
106
-static void svc_rdma_bc_free(struct svc_xprt *);
107
-
108
-static const struct svc_xprt_ops svc_rdma_bc_ops = {
109
- .xpo_create = svc_rdma_bc_create,
110
- .xpo_detach = svc_rdma_bc_detach,
111
- .xpo_free = svc_rdma_bc_free,
112
- .xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr,
113
- .xpo_secure_port = svc_rdma_secure_port,
114
-};
115
-
116
-struct svc_xprt_class svc_rdma_bc_class = {
117
- .xcl_name = "rdma-bc",
118
- .xcl_owner = THIS_MODULE,
119
- .xcl_ops = &svc_rdma_bc_ops,
120
- .xcl_max_payload = (1024 - RPCRDMA_HDRLEN_MIN)
121
-};
122
-
123
-static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *serv,
124
- struct net *net,
125
- struct sockaddr *sa, int salen,
126
- int flags)
127
-{
128
- struct svcxprt_rdma *cma_xprt;
129
- struct svc_xprt *xprt;
130
-
131
- cma_xprt = svc_rdma_create_xprt(serv, net);
132
- if (!cma_xprt)
133
- return ERR_PTR(-ENOMEM);
134
- xprt = &cma_xprt->sc_xprt;
135
-
136
- svc_xprt_init(net, &svc_rdma_bc_class, xprt, serv);
137
- set_bit(XPT_CONG_CTRL, &xprt->xpt_flags);
138
- serv->sv_bc_xprt = xprt;
139
-
140
- dprintk("svcrdma: %s(%p)\n", __func__, xprt);
141
- return xprt;
142
-}
143
-
144
-static void svc_rdma_bc_detach(struct svc_xprt *xprt)
145
-{
146
- dprintk("svcrdma: %s(%p)\n", __func__, xprt);
147
-}
148
-
149
-static void svc_rdma_bc_free(struct svc_xprt *xprt)
150
-{
151
- struct svcxprt_rdma *rdma =
152
- container_of(xprt, struct svcxprt_rdma, sc_xprt);
153
-
154
- dprintk("svcrdma: %s(%p)\n", __func__, xprt);
155
- if (xprt)
156
- kfree(rdma);
157
-}
158
-#endif /* CONFIG_SUNRPC_BACKCHANNEL */
159100
160101 /* QP event handler */
161102 static void qp_event_handler(struct ib_event *event, void *context)
....@@ -198,14 +139,13 @@
198139 INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
199140 INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
200141 INIT_LIST_HEAD(&cma_xprt->sc_send_ctxts);
201
- INIT_LIST_HEAD(&cma_xprt->sc_recv_ctxts);
142
+ init_llist_head(&cma_xprt->sc_recv_ctxts);
202143 INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts);
203144 init_waitqueue_head(&cma_xprt->sc_send_wait);
204145
205146 spin_lock_init(&cma_xprt->sc_lock);
206147 spin_lock_init(&cma_xprt->sc_rq_dto_lock);
207148 spin_lock_init(&cma_xprt->sc_send_lock);
208
- spin_lock_init(&cma_xprt->sc_recv_lock);
209149 spin_lock_init(&cma_xprt->sc_rw_ctxt_lock);
210150
211151 /*
....@@ -270,7 +210,12 @@
270210 newxprt->sc_ord = param->initiator_depth;
271211
272212 sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
273
- svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa));
213
+ newxprt->sc_xprt.xpt_remotelen = svc_addr_len(sa);
214
+ memcpy(&newxprt->sc_xprt.xpt_remote, sa,
215
+ newxprt->sc_xprt.xpt_remotelen);
216
+ snprintf(newxprt->sc_xprt.xpt_remotebuf,
217
+ sizeof(newxprt->sc_xprt.xpt_remotebuf) - 1, "%pISc", sa);
218
+
274219 /* The remote port is arbitrary and not under the control of the
275220 * client ULP. Set it to a fixed value so that the DRC continues
276221 * to be effective after a reconnect.
....@@ -284,80 +229,64 @@
284229 * Enqueue the new transport on the accept queue of the listening
285230 * transport
286231 */
287
- spin_lock_bh(&listen_xprt->sc_lock);
232
+ spin_lock(&listen_xprt->sc_lock);
288233 list_add_tail(&newxprt->sc_accept_q, &listen_xprt->sc_accept_q);
289
- spin_unlock_bh(&listen_xprt->sc_lock);
234
+ spin_unlock(&listen_xprt->sc_lock);
290235
291236 set_bit(XPT_CONN, &listen_xprt->sc_xprt.xpt_flags);
292237 svc_xprt_enqueue(&listen_xprt->sc_xprt);
293238 }
294239
295
-/*
296
- * Handles events generated on the listening endpoint. These events will be
297
- * either be incoming connect requests or adapter removal events.
240
+/**
241
+ * svc_rdma_listen_handler - Handle CM events generated on a listening endpoint
242
+ * @cma_id: the server's listener rdma_cm_id
243
+ * @event: details of the event
244
+ *
245
+ * Return values:
246
+ * %0: Do not destroy @cma_id
247
+ * %1: Destroy @cma_id (never returned here)
248
+ *
249
+ * NB: There is never a DEVICE_REMOVAL event for INADDR_ANY listeners.
298250 */
299
-static int rdma_listen_handler(struct rdma_cm_id *cma_id,
300
- struct rdma_cm_event *event)
251
+static int svc_rdma_listen_handler(struct rdma_cm_id *cma_id,
252
+ struct rdma_cm_event *event)
301253 {
302
- struct sockaddr *sap = (struct sockaddr *)&cma_id->route.addr.src_addr;
303
-
304
- trace_svcrdma_cm_event(event, sap);
305
-
306254 switch (event->event) {
307255 case RDMA_CM_EVENT_CONNECT_REQUEST:
308
- dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, "
309
- "event = %s (%d)\n", cma_id, cma_id->context,
310
- rdma_event_msg(event->event), event->event);
311256 handle_connect_req(cma_id, &event->param.conn);
312257 break;
313258 default:
314
- /* NB: No device removal upcall for INADDR_ANY listeners */
315
- dprintk("svcrdma: Unexpected event on listening endpoint %p, "
316
- "event = %s (%d)\n", cma_id,
317
- rdma_event_msg(event->event), event->event);
318259 break;
319260 }
320
-
321261 return 0;
322262 }
323263
324
-static int rdma_cma_handler(struct rdma_cm_id *cma_id,
325
- struct rdma_cm_event *event)
264
+/**
265
+ * svc_rdma_cma_handler - Handle CM events on client connections
266
+ * @cma_id: the server's listener rdma_cm_id
267
+ * @event: details of the event
268
+ *
269
+ * Return values:
270
+ * %0: Do not destroy @cma_id
271
+ * %1: Destroy @cma_id (never returned here)
272
+ */
273
+static int svc_rdma_cma_handler(struct rdma_cm_id *cma_id,
274
+ struct rdma_cm_event *event)
326275 {
327
- struct sockaddr *sap = (struct sockaddr *)&cma_id->route.addr.dst_addr;
328276 struct svcxprt_rdma *rdma = cma_id->context;
329277 struct svc_xprt *xprt = &rdma->sc_xprt;
330278
331
- trace_svcrdma_cm_event(event, sap);
332
-
333279 switch (event->event) {
334280 case RDMA_CM_EVENT_ESTABLISHED:
335
- /* Accept complete */
336
- svc_xprt_get(xprt);
337
- dprintk("svcrdma: Connection completed on DTO xprt=%p, "
338
- "cm_id=%p\n", xprt, cma_id);
339281 clear_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags);
340282 svc_xprt_enqueue(xprt);
341283 break;
342284 case RDMA_CM_EVENT_DISCONNECTED:
343
- dprintk("svcrdma: Disconnect on DTO xprt=%p, cm_id=%p\n",
344
- xprt, cma_id);
345
- set_bit(XPT_CLOSE, &xprt->xpt_flags);
346
- svc_xprt_enqueue(xprt);
347
- svc_xprt_put(xprt);
348
- break;
349285 case RDMA_CM_EVENT_DEVICE_REMOVAL:
350
- dprintk("svcrdma: Device removal cma_id=%p, xprt = %p, "
351
- "event = %s (%d)\n", cma_id, xprt,
352
- rdma_event_msg(event->event), event->event);
353286 set_bit(XPT_CLOSE, &xprt->xpt_flags);
354287 svc_xprt_enqueue(xprt);
355
- svc_xprt_put(xprt);
356288 break;
357289 default:
358
- dprintk("svcrdma: Unexpected event on DTO endpoint %p, "
359
- "event = %s (%d)\n", cma_id,
360
- rdma_event_msg(event->event), event->event);
361290 break;
362291 }
363292 return 0;
....@@ -375,22 +304,18 @@
375304 struct svcxprt_rdma *cma_xprt;
376305 int ret;
377306
378
- dprintk("svcrdma: Creating RDMA listener\n");
379
- if ((sa->sa_family != AF_INET) && (sa->sa_family != AF_INET6)) {
380
- dprintk("svcrdma: Address family %d is not supported.\n", sa->sa_family);
307
+ if (sa->sa_family != AF_INET && sa->sa_family != AF_INET6)
381308 return ERR_PTR(-EAFNOSUPPORT);
382
- }
383309 cma_xprt = svc_rdma_create_xprt(serv, net);
384310 if (!cma_xprt)
385311 return ERR_PTR(-ENOMEM);
386312 set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
387313 strcpy(cma_xprt->sc_xprt.xpt_remotebuf, "listener");
388314
389
- listen_id = rdma_create_id(net, rdma_listen_handler, cma_xprt,
315
+ listen_id = rdma_create_id(net, svc_rdma_listen_handler, cma_xprt,
390316 RDMA_PS_TCP, IB_QPT_RC);
391317 if (IS_ERR(listen_id)) {
392318 ret = PTR_ERR(listen_id);
393
- dprintk("svcrdma: rdma_create_id failed = %d\n", ret);
394319 goto err0;
395320 }
396321
....@@ -399,23 +324,17 @@
399324 */
400325 #if IS_ENABLED(CONFIG_IPV6)
401326 ret = rdma_set_afonly(listen_id, 1);
402
- if (ret) {
403
- dprintk("svcrdma: rdma_set_afonly failed = %d\n", ret);
327
+ if (ret)
404328 goto err1;
405
- }
406329 #endif
407330 ret = rdma_bind_addr(listen_id, sa);
408
- if (ret) {
409
- dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret);
331
+ if (ret)
410332 goto err1;
411
- }
412333 cma_xprt->sc_cm_id = listen_id;
413334
414335 ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG);
415
- if (ret) {
416
- dprintk("svcrdma: rdma_listen failed = %d\n", ret);
336
+ if (ret)
417337 goto err1;
418
- }
419338
420339 /*
421340 * We need to use the address from the cm_id in case the
....@@ -453,13 +372,13 @@
453372 struct ib_qp_init_attr qp_attr;
454373 unsigned int ctxts, rq_depth;
455374 struct ib_device *dev;
456
- struct sockaddr *sap;
457375 int ret = 0;
376
+ RPC_IFDEBUG(struct sockaddr *sap);
458377
459378 listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt);
460379 clear_bit(XPT_CONN, &xprt->xpt_flags);
461380 /* Get the next entry off the accept list */
462
- spin_lock_bh(&listen_rdma->sc_lock);
381
+ spin_lock(&listen_rdma->sc_lock);
463382 if (!list_empty(&listen_rdma->sc_accept_q)) {
464383 newxprt = list_entry(listen_rdma->sc_accept_q.next,
465384 struct svcxprt_rdma, sc_accept_q);
....@@ -467,12 +386,9 @@
467386 }
468387 if (!list_empty(&listen_rdma->sc_accept_q))
469388 set_bit(XPT_CONN, &listen_rdma->sc_xprt.xpt_flags);
470
- spin_unlock_bh(&listen_rdma->sc_lock);
389
+ spin_unlock(&listen_rdma->sc_lock);
471390 if (!newxprt)
472391 return NULL;
473
-
474
- dprintk("svcrdma: newxprt from accept queue = %p, cm_id=%p\n",
475
- newxprt, newxprt->sc_cm_id);
476392
477393 dev = newxprt->sc_cm_id->device;
478394 newxprt->sc_port_num = newxprt->sc_cm_id->port_num;
....@@ -509,21 +425,17 @@
509425
510426 newxprt->sc_pd = ib_alloc_pd(dev, 0);
511427 if (IS_ERR(newxprt->sc_pd)) {
512
- dprintk("svcrdma: error creating PD for connect request\n");
428
+ trace_svcrdma_pd_err(newxprt, PTR_ERR(newxprt->sc_pd));
513429 goto errout;
514430 }
515
- newxprt->sc_sq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_sq_depth,
516
- 0, IB_POLL_WORKQUEUE);
517
- if (IS_ERR(newxprt->sc_sq_cq)) {
518
- dprintk("svcrdma: error creating SQ CQ for connect request\n");
431
+ newxprt->sc_sq_cq = ib_alloc_cq_any(dev, newxprt, newxprt->sc_sq_depth,
432
+ IB_POLL_WORKQUEUE);
433
+ if (IS_ERR(newxprt->sc_sq_cq))
519434 goto errout;
520
- }
521
- newxprt->sc_rq_cq = ib_alloc_cq(dev, newxprt, rq_depth,
522
- 0, IB_POLL_WORKQUEUE);
523
- if (IS_ERR(newxprt->sc_rq_cq)) {
524
- dprintk("svcrdma: error creating RQ CQ for connect request\n");
435
+ newxprt->sc_rq_cq =
436
+ ib_alloc_cq_any(dev, newxprt, rq_depth, IB_POLL_WORKQUEUE);
437
+ if (IS_ERR(newxprt->sc_rq_cq))
525438 goto errout;
526
- }
527439
528440 memset(&qp_attr, 0, sizeof qp_attr);
529441 qp_attr.event_handler = qp_event_handler;
....@@ -547,7 +459,7 @@
547459
548460 ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, &qp_attr);
549461 if (ret) {
550
- dprintk("svcrdma: failed to create QP, ret=%d\n", ret);
462
+ trace_svcrdma_qp_err(newxprt, ret);
551463 goto errout;
552464 }
553465 newxprt->sc_qp = newxprt->sc_cm_id->qp;
....@@ -555,14 +467,13 @@
555467 if (!(dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
556468 newxprt->sc_snd_w_inv = false;
557469 if (!rdma_protocol_iwarp(dev, newxprt->sc_port_num) &&
558
- !rdma_ib_or_roce(dev, newxprt->sc_port_num))
470
+ !rdma_ib_or_roce(dev, newxprt->sc_port_num)) {
471
+ trace_svcrdma_fabric_err(newxprt, -EINVAL);
559472 goto errout;
473
+ }
560474
561475 if (!svc_rdma_post_recvs(newxprt))
562476 goto errout;
563
-
564
- /* Swap out the handler */
565
- newxprt->sc_cm_id->event_handler = rdma_cma_handler;
566477
567478 /* Construct RDMA-CM private message */
568479 pmsg.cp_magic = rpcrdma_cmp_magic;
....@@ -578,16 +489,22 @@
578489 conn_param.initiator_depth = min_t(int, newxprt->sc_ord,
579490 dev->attrs.max_qp_init_rd_atom);
580491 if (!conn_param.initiator_depth) {
581
- dprintk("svcrdma: invalid ORD setting\n");
582492 ret = -EINVAL;
493
+ trace_svcrdma_initdepth_err(newxprt, ret);
583494 goto errout;
584495 }
585496 conn_param.private_data = &pmsg;
586497 conn_param.private_data_len = sizeof(pmsg);
498
+ rdma_lock_handler(newxprt->sc_cm_id);
499
+ newxprt->sc_cm_id->event_handler = svc_rdma_cma_handler;
587500 ret = rdma_accept(newxprt->sc_cm_id, &conn_param);
588
- if (ret)
501
+ rdma_unlock_handler(newxprt->sc_cm_id);
502
+ if (ret) {
503
+ trace_svcrdma_accept_err(newxprt, ret);
589504 goto errout;
505
+ }
590506
507
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
591508 dprintk("svcrdma: new connection %p accepted:\n", newxprt);
592509 sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
593510 dprintk(" local address : %pIS:%u\n", sap, rpc_get_port(sap));
....@@ -598,13 +515,11 @@
598515 dprintk(" rdma_rw_ctxs : %d\n", ctxts);
599516 dprintk(" max_requests : %d\n", newxprt->sc_max_requests);
600517 dprintk(" ord : %d\n", conn_param.initiator_depth);
518
+#endif
601519
602
- trace_svcrdma_xprt_accept(&newxprt->sc_xprt);
603520 return &newxprt->sc_xprt;
604521
605522 errout:
606
- dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret);
607
- trace_svcrdma_xprt_fail(&newxprt->sc_xprt);
608523 /* Take a reference in case the DTO handler runs */
609524 svc_xprt_get(&newxprt->sc_xprt);
610525 if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp))
....@@ -615,24 +530,11 @@
615530 return NULL;
616531 }
617532
618
-/*
619
- * When connected, an svc_xprt has at least two references:
620
- *
621
- * - A reference held by the cm_id between the ESTABLISHED and
622
- * DISCONNECTED events. If the remote peer disconnected first, this
623
- * reference could be gone.
624
- *
625
- * - A reference held by the svc_recv code that called this function
626
- * as part of close processing.
627
- *
628
- * At a minimum one references should still be held.
629
- */
630533 static void svc_rdma_detach(struct svc_xprt *xprt)
631534 {
632535 struct svcxprt_rdma *rdma =
633536 container_of(xprt, struct svcxprt_rdma, sc_xprt);
634537
635
- /* Disconnect and flush posted WQE */
636538 rdma_disconnect(rdma->sc_cm_id);
637539 }
638540
....@@ -642,15 +544,9 @@
642544 container_of(work, struct svcxprt_rdma, sc_work);
643545 struct svc_xprt *xprt = &rdma->sc_xprt;
644546
645
- trace_svcrdma_xprt_free(xprt);
646
-
547
+ /* This blocks until the Completion Queues are empty */
647548 if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
648549 ib_drain_qp(rdma->sc_qp);
649
-
650
- /* We should only be called from kref_put */
651
- if (kref_read(&xprt->xpt_ref) != 0)
652
- pr_err("svcrdma: sc_xprt still in use? (%d)\n",
653
- kref_read(&xprt->xpt_ref));
654550
655551 svc_rdma_flush_recv_queues(rdma);
656552
....@@ -687,8 +583,9 @@
687583 {
688584 struct svcxprt_rdma *rdma =
689585 container_of(xprt, struct svcxprt_rdma, sc_xprt);
586
+
690587 INIT_WORK(&rdma->sc_work, __svc_rdma_free);
691
- queue_work(svc_rdma_wq, &rdma->sc_work);
588
+ schedule_work(&rdma->sc_work);
692589 }
693590
694591 static int svc_rdma_has_wspace(struct svc_xprt *xprt)