hc
2024-10-22 8ac6c7a54ed1b98d142dce24b11c6de6a1e239a5
kernel/net/sunrpc/xprtrdma/transport.c
....@@ -70,7 +70,7 @@
7070
7171 static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
7272 unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
73
-static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
73
+unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
7474 unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRWR;
7575 int xprt_rdma_pad_optimize;
7676
....@@ -80,7 +80,6 @@
8080 static unsigned int max_slot_table_size = RPCRDMA_MAX_SLOT_TABLE;
8181 static unsigned int min_inline_size = RPCRDMA_MIN_INLINE;
8282 static unsigned int max_inline_size = RPCRDMA_MAX_INLINE;
83
-static unsigned int zero;
8483 static unsigned int max_padding = PAGE_SIZE;
8584 static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS;
8685 static unsigned int max_memreg = RPCRDMA_LAST - 1;
....@@ -122,7 +121,7 @@
122121 .maxlen = sizeof(unsigned int),
123122 .mode = 0644,
124123 .proc_handler = proc_dointvec_minmax,
125
- .extra1 = &zero,
124
+ .extra1 = SYSCTL_ZERO,
126125 .extra2 = &max_padding,
127126 },
128127 {
....@@ -225,88 +224,71 @@
225224 }
226225 }
227226
228
-void
229
-rpcrdma_conn_func(struct rpcrdma_ep *ep)
230
-{
231
- schedule_delayed_work(&ep->rep_connect_worker, 0);
232
-}
233
-
234
-void
235
-rpcrdma_connect_worker(struct work_struct *work)
236
-{
237
- struct rpcrdma_ep *ep =
238
- container_of(work, struct rpcrdma_ep, rep_connect_worker.work);
239
- struct rpcrdma_xprt *r_xprt =
240
- container_of(ep, struct rpcrdma_xprt, rx_ep);
241
- struct rpc_xprt *xprt = &r_xprt->rx_xprt;
242
-
243
- spin_lock_bh(&xprt->transport_lock);
244
- if (ep->rep_connected > 0) {
245
- if (!xprt_test_and_set_connected(xprt)) {
246
- xprt->stat.connect_count++;
247
- xprt->stat.connect_time += (long)jiffies -
248
- xprt->stat.connect_start;
249
- xprt_wake_pending_tasks(xprt, 0);
250
- }
251
- } else {
252
- if (xprt_test_and_clear_connected(xprt))
253
- xprt_wake_pending_tasks(xprt, -ENOTCONN);
254
- }
255
- spin_unlock_bh(&xprt->transport_lock);
256
-}
257
-
227
+/**
228
+ * xprt_rdma_connect_worker - establish connection in the background
229
+ * @work: worker thread context
230
+ *
231
+ * Requester holds the xprt's send lock to prevent activity on this
232
+ * transport while a fresh connection is being established. RPC tasks
233
+ * sleep on the xprt's pending queue waiting for connect to complete.
234
+ */
258235 static void
259236 xprt_rdma_connect_worker(struct work_struct *work)
260237 {
261238 struct rpcrdma_xprt *r_xprt = container_of(work, struct rpcrdma_xprt,
262239 rx_connect_worker.work);
263240 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
264
- int rc = 0;
241
+ int rc;
265242
266
- xprt_clear_connected(xprt);
267
-
268
- rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia);
269
- if (rc)
270
- xprt_wake_pending_tasks(xprt, rc);
271
-
243
+ rc = rpcrdma_xprt_connect(r_xprt);
272244 xprt_clear_connecting(xprt);
245
+ if (!rc) {
246
+ xprt->connect_cookie++;
247
+ xprt->stat.connect_count++;
248
+ xprt->stat.connect_time += (long)jiffies -
249
+ xprt->stat.connect_start;
250
+ xprt_set_connected(xprt);
251
+ rc = -EAGAIN;
252
+ } else
253
+ rpcrdma_xprt_disconnect(r_xprt);
254
+ xprt_unlock_connect(xprt, r_xprt);
255
+ xprt_wake_pending_tasks(xprt, rc);
273256 }
274257
258
+/**
259
+ * xprt_rdma_inject_disconnect - inject a connection fault
260
+ * @xprt: transport context
261
+ *
262
+ * If @xprt is connected, disconnect it to simulate spurious
263
+ * connection loss. Caller must hold @xprt's send lock to
264
+ * ensure that data structures and hardware resources are
265
+ * stable during the rdma_disconnect() call.
266
+ */
275267 static void
276268 xprt_rdma_inject_disconnect(struct rpc_xprt *xprt)
277269 {
278
- struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt,
279
- rx_xprt);
270
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
280271
281
- trace_xprtrdma_inject_dsc(r_xprt);
282
- rdma_disconnect(r_xprt->rx_ia.ri_id);
272
+ trace_xprtrdma_op_inject_dsc(r_xprt);
273
+ rdma_disconnect(r_xprt->rx_ep->re_id);
283274 }
284275
285
-/*
286
- * xprt_rdma_destroy
276
+/**
277
+ * xprt_rdma_destroy - Full tear down of transport
278
+ * @xprt: doomed transport context
287279 *
288
- * Destroy the xprt.
289
- * Free all memory associated with the object, including its own.
290
- * NOTE: none of the *destroy methods free memory for their top-level
291
- * objects, even though they may have allocated it (they do free
292
- * private memory). It's up to the caller to handle it. In this
293
- * case (RDMA transport), all structure memory is inlined with the
294
- * struct rpcrdma_xprt.
280
+ * Caller guarantees there will be no more calls to us with
281
+ * this @xprt.
295282 */
296283 static void
297284 xprt_rdma_destroy(struct rpc_xprt *xprt)
298285 {
299286 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
300287
301
- trace_xprtrdma_destroy(r_xprt);
302
-
303288 cancel_delayed_work_sync(&r_xprt->rx_connect_worker);
304289
305
- xprt_clear_connected(xprt);
306
-
307
- rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia);
290
+ rpcrdma_xprt_disconnect(r_xprt);
308291 rpcrdma_buffer_destroy(&r_xprt->rx_buf);
309
- rpcrdma_ia_close(&r_xprt->rx_ia);
310292
311293 xprt_rdma_free_addresses(xprt);
312294 xprt_free(xprt);
....@@ -314,6 +296,7 @@
314296 module_put(THIS_MODULE);
315297 }
316298
299
+/* 60 second timeout, no retries */
317300 static const struct rpc_timeout xprt_rdma_default_timeout = {
318301 .to_initval = 60 * HZ,
319302 .to_maxval = 60 * HZ,
....@@ -327,33 +310,32 @@
327310 static struct rpc_xprt *
328311 xprt_setup_rdma(struct xprt_create *args)
329312 {
330
- struct rpcrdma_create_data_internal cdata;
331313 struct rpc_xprt *xprt;
332314 struct rpcrdma_xprt *new_xprt;
333
- struct rpcrdma_ep *new_ep;
334315 struct sockaddr *sap;
335316 int rc;
336317
337
- if (args->addrlen > sizeof(xprt->addr)) {
338
- dprintk("RPC: %s: address too large\n", __func__);
318
+ if (args->addrlen > sizeof(xprt->addr))
339319 return ERR_PTR(-EBADF);
340
- }
341320
342
- xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 0, 0);
343
- if (xprt == NULL) {
344
- dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n",
345
- __func__);
321
+ if (!try_module_get(THIS_MODULE))
322
+ return ERR_PTR(-EIO);
323
+
324
+ xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 0,
325
+ xprt_rdma_slot_table_entries);
326
+ if (!xprt) {
327
+ module_put(THIS_MODULE);
346328 return ERR_PTR(-ENOMEM);
347329 }
348330
349
- /* 60 second timeout, no retries */
350331 xprt->timeout = &xprt_rdma_default_timeout;
332
+ xprt->connect_timeout = xprt->timeout->to_initval;
333
+ xprt->max_reconnect_timeout = xprt->timeout->to_maxval;
351334 xprt->bind_timeout = RPCRDMA_BIND_TO;
352335 xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
353336 xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
354337
355338 xprt->resvport = 0; /* privileged port not needed */
356
- xprt->tsh_size = 0; /* RPC-RDMA handles framing */
357339 xprt->ops = &xprt_rdma_procs;
358340
359341 /*
....@@ -371,113 +353,41 @@
371353 xprt_set_bound(xprt);
372354 xprt_rdma_format_addresses(xprt, sap);
373355
374
- cdata.max_requests = xprt_rdma_slot_table_entries;
375
-
376
- cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */
377
- cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */
378
-
379
- cdata.inline_wsize = xprt_rdma_max_inline_write;
380
- if (cdata.inline_wsize > cdata.wsize)
381
- cdata.inline_wsize = cdata.wsize;
382
-
383
- cdata.inline_rsize = xprt_rdma_max_inline_read;
384
- if (cdata.inline_rsize > cdata.rsize)
385
- cdata.inline_rsize = cdata.rsize;
386
-
387
- /*
388
- * Create new transport instance, which includes initialized
389
- * o ia
390
- * o endpoint
391
- * o buffers
392
- */
393
-
394356 new_xprt = rpcx_to_rdmax(xprt);
395
-
396
- rc = rpcrdma_ia_open(new_xprt);
397
- if (rc)
398
- goto out1;
399
-
400
- /*
401
- * initialize and create ep
402
- */
403
- new_xprt->rx_data = cdata;
404
- new_ep = &new_xprt->rx_ep;
405
-
406
- rc = rpcrdma_ep_create(&new_xprt->rx_ep,
407
- &new_xprt->rx_ia, &new_xprt->rx_data);
408
- if (rc)
409
- goto out2;
410
-
411357 rc = rpcrdma_buffer_create(new_xprt);
412
- if (rc)
413
- goto out3;
358
+ if (rc) {
359
+ xprt_rdma_free_addresses(xprt);
360
+ xprt_free(xprt);
361
+ module_put(THIS_MODULE);
362
+ return ERR_PTR(rc);
363
+ }
414364
415365 INIT_DELAYED_WORK(&new_xprt->rx_connect_worker,
416366 xprt_rdma_connect_worker);
417367
418
- xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt);
419
- if (xprt->max_payload == 0)
420
- goto out4;
421
- xprt->max_payload <<= PAGE_SHIFT;
422
- dprintk("RPC: %s: transport data payload maximum: %zu bytes\n",
423
- __func__, xprt->max_payload);
368
+ xprt->max_payload = RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT;
424369
425
- if (!try_module_get(THIS_MODULE))
426
- goto out4;
427
-
428
- dprintk("RPC: %s: %s:%s\n", __func__,
429
- xprt->address_strings[RPC_DISPLAY_ADDR],
430
- xprt->address_strings[RPC_DISPLAY_PORT]);
431
- trace_xprtrdma_create(new_xprt);
432370 return xprt;
433
-
434
-out4:
435
- rpcrdma_buffer_destroy(&new_xprt->rx_buf);
436
- rc = -ENODEV;
437
-out3:
438
- rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
439
-out2:
440
- rpcrdma_ia_close(&new_xprt->rx_ia);
441
-out1:
442
- trace_xprtrdma_destroy(new_xprt);
443
- xprt_rdma_free_addresses(xprt);
444
- xprt_free(xprt);
445
- return ERR_PTR(rc);
446371 }
447372
448373 /**
449
- * xprt_rdma_close - Close down RDMA connection
450
- * @xprt: generic transport to be closed
374
+ * xprt_rdma_close - close a transport connection
375
+ * @xprt: transport context
451376 *
452
- * Called during transport shutdown reconnect, or device
453
- * removal. Caller holds the transport's write lock.
377
+ * Called during autoclose or device removal.
378
+ *
379
+ * Caller holds @xprt's send lock to prevent activity on this
380
+ * transport while the connection is torn down.
454381 */
455
-static void
456
-xprt_rdma_close(struct rpc_xprt *xprt)
382
+void xprt_rdma_close(struct rpc_xprt *xprt)
457383 {
458384 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
459
- struct rpcrdma_ep *ep = &r_xprt->rx_ep;
460
- struct rpcrdma_ia *ia = &r_xprt->rx_ia;
461385
462
- dprintk("RPC: %s: closing xprt %p\n", __func__, xprt);
386
+ rpcrdma_xprt_disconnect(r_xprt);
463387
464
- if (test_and_clear_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags)) {
465
- xprt_clear_connected(xprt);
466
- rpcrdma_ia_remove(ia);
467
- return;
468
- }
469
- if (ep->rep_connected == -ENODEV)
470
- return;
471
- if (ep->rep_connected > 0)
472
- xprt->reestablish_timeout = 0;
388
+ xprt->reestablish_timeout = 0;
389
+ ++xprt->connect_cookie;
473390 xprt_disconnect_done(xprt);
474
- rpcrdma_ep_disconnect(ep, ia);
475
-
476
- /* Prepare @xprt for the next connection by reinitializing
477
- * its credit grant to one (see RFC 8166, Section 3.3.3).
478
- */
479
- r_xprt->rx_buf.rb_credits = 1;
480
- xprt->cwnd = RPC_CWNDSHIFT;
481391 }
482392
483393 /**
....@@ -492,12 +402,6 @@
492402 {
493403 struct sockaddr *sap = (struct sockaddr *)&xprt->addr;
494404 char buf[8];
495
-
496
- dprintk("RPC: %s: setting port for xprt %p (%s:%s) to %u\n",
497
- __func__, xprt,
498
- xprt->address_strings[RPC_DISPLAY_ADDR],
499
- xprt->address_strings[RPC_DISPLAY_PORT],
500
- port);
501405
502406 rpc_set_port(sap, port);
503407
....@@ -529,25 +433,67 @@
529433 xprt_force_disconnect(xprt);
530434 }
531435
436
+/**
437
+ * xprt_rdma_set_connect_timeout - set timeouts for establishing a connection
438
+ * @xprt: controlling transport instance
439
+ * @connect_timeout: reconnect timeout after client disconnects
440
+ * @reconnect_timeout: reconnect timeout after server disconnects
441
+ *
442
+ */
443
+static void xprt_rdma_set_connect_timeout(struct rpc_xprt *xprt,
444
+ unsigned long connect_timeout,
445
+ unsigned long reconnect_timeout)
446
+{
447
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
448
+
449
+ trace_xprtrdma_op_set_cto(r_xprt, connect_timeout, reconnect_timeout);
450
+
451
+ spin_lock(&xprt->transport_lock);
452
+
453
+ if (connect_timeout < xprt->connect_timeout) {
454
+ struct rpc_timeout to;
455
+ unsigned long initval;
456
+
457
+ to = *xprt->timeout;
458
+ initval = connect_timeout;
459
+ if (initval < RPCRDMA_INIT_REEST_TO << 1)
460
+ initval = RPCRDMA_INIT_REEST_TO << 1;
461
+ to.to_initval = initval;
462
+ to.to_maxval = initval;
463
+ r_xprt->rx_timeout = to;
464
+ xprt->timeout = &r_xprt->rx_timeout;
465
+ xprt->connect_timeout = connect_timeout;
466
+ }
467
+
468
+ if (reconnect_timeout < xprt->max_reconnect_timeout)
469
+ xprt->max_reconnect_timeout = reconnect_timeout;
470
+
471
+ spin_unlock(&xprt->transport_lock);
472
+}
473
+
474
+/**
475
+ * xprt_rdma_connect - schedule an attempt to reconnect
476
+ * @xprt: transport state
477
+ * @task: RPC scheduler context (unused)
478
+ *
479
+ */
532480 static void
533481 xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
534482 {
535483 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
484
+ struct rpcrdma_ep *ep = r_xprt->rx_ep;
485
+ unsigned long delay;
536486
537
- if (r_xprt->rx_ep.rep_connected != 0) {
538
- /* Reconnect */
539
- schedule_delayed_work(&r_xprt->rx_connect_worker,
540
- xprt->reestablish_timeout);
541
- xprt->reestablish_timeout <<= 1;
542
- if (xprt->reestablish_timeout > RPCRDMA_MAX_REEST_TO)
543
- xprt->reestablish_timeout = RPCRDMA_MAX_REEST_TO;
544
- else if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO)
545
- xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
546
- } else {
547
- schedule_delayed_work(&r_xprt->rx_connect_worker, 0);
548
- if (!RPC_IS_ASYNC(task))
549
- flush_delayed_work(&r_xprt->rx_connect_worker);
487
+ WARN_ON_ONCE(!xprt_lock_connect(xprt, task, r_xprt));
488
+
489
+ delay = 0;
490
+ if (ep && ep->re_connect_status != 0) {
491
+ delay = xprt_reconnect_delay(xprt);
492
+ xprt_reconnect_backoff(xprt, RPCRDMA_INIT_REEST_TO);
550493 }
494
+ trace_xprtrdma_op_connect(r_xprt, delay);
495
+ queue_delayed_work(xprtiod_workqueue, &r_xprt->rx_connect_worker,
496
+ delay);
551497 }
552498
553499 /**
....@@ -573,8 +519,8 @@
573519 return;
574520
575521 out_sleep:
576
- rpc_sleep_on(&xprt->backlog, task, NULL);
577
- task->tk_status = -EAGAIN;
522
+ task->tk_status = -ENOMEM;
523
+ xprt_add_backlog(xprt, task);
578524 }
579525
580526 /**
....@@ -586,57 +532,25 @@
586532 static void
587533 xprt_rdma_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *rqst)
588534 {
589
- memset(rqst, 0, sizeof(*rqst));
590
- rpcrdma_buffer_put(rpcr_to_rdmar(rqst));
591
- rpc_wake_up_next(&xprt->backlog);
535
+ struct rpcrdma_xprt *r_xprt =
536
+ container_of(xprt, struct rpcrdma_xprt, rx_xprt);
537
+
538
+ rpcrdma_reply_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst));
539
+ if (!xprt_wake_up_backlog(xprt, rqst)) {
540
+ memset(rqst, 0, sizeof(*rqst));
541
+ rpcrdma_buffer_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst));
542
+ }
592543 }
593544
594
-static bool
595
-rpcrdma_get_sendbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
596
- size_t size, gfp_t flags)
545
+static bool rpcrdma_check_regbuf(struct rpcrdma_xprt *r_xprt,
546
+ struct rpcrdma_regbuf *rb, size_t size,
547
+ gfp_t flags)
597548 {
598
- struct rpcrdma_regbuf *rb;
599
-
600
- if (req->rl_sendbuf && rdmab_length(req->rl_sendbuf) >= size)
601
- return true;
602
-
603
- rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, flags);
604
- if (IS_ERR(rb))
605
- return false;
606
-
607
- rpcrdma_free_regbuf(req->rl_sendbuf);
608
- r_xprt->rx_stats.hardway_register_count += size;
609
- req->rl_sendbuf = rb;
610
- return true;
611
-}
612
-
613
-/* The rq_rcv_buf is used only if a Reply chunk is necessary.
614
- * The decision to use a Reply chunk is made later in
615
- * rpcrdma_marshal_req. This buffer is registered at that time.
616
- *
617
- * Otherwise, the associated RPC Reply arrives in a separate
618
- * Receive buffer, arbitrarily chosen by the HCA. The buffer
619
- * allocated here for the RPC Reply is not utilized in that
620
- * case. See rpcrdma_inline_fixup.
621
- *
622
- * A regbuf is used here to remember the buffer size.
623
- */
624
-static bool
625
-rpcrdma_get_recvbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
626
- size_t size, gfp_t flags)
627
-{
628
- struct rpcrdma_regbuf *rb;
629
-
630
- if (req->rl_recvbuf && rdmab_length(req->rl_recvbuf) >= size)
631
- return true;
632
-
633
- rb = rpcrdma_alloc_regbuf(size, DMA_NONE, flags);
634
- if (IS_ERR(rb))
635
- return false;
636
-
637
- rpcrdma_free_regbuf(req->rl_recvbuf);
638
- r_xprt->rx_stats.hardway_register_count += size;
639
- req->rl_recvbuf = rb;
549
+ if (unlikely(rdmab_length(rb) < size)) {
550
+ if (!rpcrdma_regbuf_realloc(rb, size, flags))
551
+ return false;
552
+ r_xprt->rx_stats.hardway_register_count += size;
553
+ }
640554 return true;
641555 }
642556
....@@ -648,13 +562,6 @@
648562 * 0: Success; rq_buffer points to RPC buffer to use
649563 * ENOMEM: Out of memory, call again later
650564 * EIO: A permanent error occurred, do not retry
651
- *
652
- * The RDMA allocate/free functions need the task structure as a place
653
- * to hide the struct rpcrdma_req, which is necessary for the actual
654
- * send/recv sequence.
655
- *
656
- * xprt_rdma_allocate provides buffers that are already mapped for
657
- * DMA, and a local DMA lkey is provided for each.
658565 */
659566 static int
660567 xprt_rdma_allocate(struct rpc_task *task)
....@@ -665,21 +572,23 @@
665572 gfp_t flags;
666573
667574 flags = RPCRDMA_DEF_GFP;
575
+ if (RPC_IS_ASYNC(task))
576
+ flags = GFP_NOWAIT | __GFP_NOWARN;
668577 if (RPC_IS_SWAPPER(task))
669
- flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
578
+ flags |= __GFP_MEMALLOC;
670579
671
- if (!rpcrdma_get_sendbuf(r_xprt, req, rqst->rq_callsize, flags))
580
+ if (!rpcrdma_check_regbuf(r_xprt, req->rl_sendbuf, rqst->rq_callsize,
581
+ flags))
672582 goto out_fail;
673
- if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags))
583
+ if (!rpcrdma_check_regbuf(r_xprt, req->rl_recvbuf, rqst->rq_rcvsize,
584
+ flags))
674585 goto out_fail;
675586
676
- rqst->rq_buffer = req->rl_sendbuf->rg_base;
677
- rqst->rq_rbuffer = req->rl_recvbuf->rg_base;
678
- trace_xprtrdma_allocate(task, req);
587
+ rqst->rq_buffer = rdmab_data(req->rl_sendbuf);
588
+ rqst->rq_rbuffer = rdmab_data(req->rl_recvbuf);
679589 return 0;
680590
681591 out_fail:
682
- trace_xprtrdma_allocate(task, NULL);
683592 return -ENOMEM;
684593 }
685594
....@@ -696,14 +605,19 @@
696605 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
697606 struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
698607
699
- if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags))
700
- rpcrdma_release_rqst(r_xprt, req);
701
- trace_xprtrdma_rpc_done(task, req);
608
+ if (!list_empty(&req->rl_registered))
609
+ frwr_unmap_sync(r_xprt, req);
610
+
611
+ /* XXX: If the RPC is completing because of a signal and
612
+ * not because a reply was received, we ought to ensure
613
+ * that the Send completion has fired, so that memory
614
+ * involved with the Send is not still visible to the NIC.
615
+ */
702616 }
703617
704618 /**
705619 * xprt_rdma_send_request - marshal and send an RPC request
706
- * @task: RPC task with an RPC message in rq_snd_buf
620
+ * @rqst: RPC message in rq_snd_buf
707621 *
708622 * Caller holds the transport's write lock.
709623 *
....@@ -712,13 +626,14 @@
712626 * %-ENOTCONN if the caller should reconnect and call again
713627 * %-EAGAIN if the caller should call again
714628 * %-ENOBUFS if the caller should call again after a delay
715
- * %-EIO if a permanent error occurred and the request was not
716
- * sent. Do not try to send this message again.
629
+ * %-EMSGSIZE if encoding ran out of buffer space. The request
630
+ * was not sent. Do not try to send this message again.
631
+ * %-EIO if an I/O error occurred. The request was not sent.
632
+ * Do not try to send this message again.
717633 */
718634 static int
719
-xprt_rdma_send_request(struct rpc_task *task)
635
+xprt_rdma_send_request(struct rpc_rqst *rqst)
720636 {
721
- struct rpc_rqst *rqst = task->tk_rqstp;
722637 struct rpc_xprt *xprt = rqst->rq_xprt;
723638 struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
724639 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
....@@ -730,7 +645,10 @@
730645 #endif /* CONFIG_SUNRPC_BACKCHANNEL */
731646
732647 if (!xprt_connected(xprt))
733
- goto drop_connection;
648
+ return -ENOTCONN;
649
+
650
+ if (!xprt_request_get_cong(xprt, rqst))
651
+ return -EBADSLT;
734652
735653 rc = rpcrdma_marshal_req(r_xprt, rqst);
736654 if (rc < 0)
....@@ -741,17 +659,15 @@
741659 goto drop_connection;
742660 rqst->rq_xtime = ktime_get();
743661
744
- __set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
745
- if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
662
+ if (rpcrdma_post_sends(r_xprt, req))
746663 goto drop_connection;
747664
748665 rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len;
749
- rqst->rq_bytes_sent = 0;
750666
751667 /* An RPC with no reply will throw off credit accounting,
752668 * so drop the connection to reset the credit grant.
753669 */
754
- if (!rpc_reply_expected(task))
670
+ if (!rpc_reply_expected(rqst->rq_task))
755671 goto drop_connection;
756672 return 0;
757673
....@@ -759,8 +675,8 @@
759675 if (rc != -ENOTCONN)
760676 return rc;
761677 drop_connection:
762
- xprt_disconnect_done(xprt);
763
- return -ENOTCONN; /* implies disconnect */
678
+ xprt_rdma_close(xprt);
679
+ return -ENOTCONN;
764680 }
765681
766682 void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
....@@ -776,7 +692,7 @@
776692 0, /* need a local port? */
777693 xprt->stat.bind_count,
778694 xprt->stat.connect_count,
779
- xprt->stat.connect_time,
695
+ xprt->stat.connect_time / HZ,
780696 idle_time,
781697 xprt->stat.sends,
782698 xprt->stat.recvs,
....@@ -796,7 +712,7 @@
796712 r_xprt->rx_stats.bad_reply_count,
797713 r_xprt->rx_stats.nomsg_call_count);
798714 seq_printf(seq, "%lu %lu %lu %lu %lu %lu\n",
799
- r_xprt->rx_stats.mrs_recovered,
715
+ r_xprt->rx_stats.mrs_recycled,
800716 r_xprt->rx_stats.mrs_orphaned,
801717 r_xprt->rx_stats.mrs_allocated,
802718 r_xprt->rx_stats.local_inv_needed,
....@@ -825,7 +741,7 @@
825741 .alloc_slot = xprt_rdma_alloc_slot,
826742 .free_slot = xprt_rdma_free_slot,
827743 .release_request = xprt_release_rqst_cong, /* ditto */
828
- .set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */
744
+ .wait_for_reply_request = xprt_wait_for_reply_request_def, /* ditto */
829745 .timer = xprt_rdma_timer,
830746 .rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */
831747 .set_port = xprt_rdma_set_port,
....@@ -835,14 +751,15 @@
835751 .send_request = xprt_rdma_send_request,
836752 .close = xprt_rdma_close,
837753 .destroy = xprt_rdma_destroy,
754
+ .set_connect_timeout = xprt_rdma_set_connect_timeout,
838755 .print_stats = xprt_rdma_print_stats,
839756 .enable_swap = xprt_rdma_enable_swap,
840757 .disable_swap = xprt_rdma_disable_swap,
841758 .inject_disconnect = xprt_rdma_inject_disconnect,
842759 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
843760 .bc_setup = xprt_rdma_bc_setup,
844
- .bc_up = xprt_rdma_bc_up,
845761 .bc_maxpayload = xprt_rdma_bc_maxpayload,
762
+ .bc_num_slots = xprt_rdma_bc_max_slots,
846763 .bc_free_rqst = xprt_rdma_bc_free_rqst,
847764 .bc_destroy = xprt_rdma_bc_destroy,
848765 #endif
....@@ -859,57 +776,30 @@
859776
860777 void xprt_rdma_cleanup(void)
861778 {
862
- int rc;
863
-
864
- dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n");
865779 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
866780 if (sunrpc_table_header) {
867781 unregister_sysctl_table(sunrpc_table_header);
868782 sunrpc_table_header = NULL;
869783 }
870784 #endif
871
- rc = xprt_unregister_transport(&xprt_rdma);
872
- if (rc)
873
- dprintk("RPC: %s: xprt_unregister returned %i\n",
874
- __func__, rc);
875785
876
- rpcrdma_destroy_wq();
877
-
878
- rc = xprt_unregister_transport(&xprt_rdma_bc);
879
- if (rc)
880
- dprintk("RPC: %s: xprt_unregister(bc) returned %i\n",
881
- __func__, rc);
786
+ xprt_unregister_transport(&xprt_rdma);
787
+ xprt_unregister_transport(&xprt_rdma_bc);
882788 }
883789
884790 int xprt_rdma_init(void)
885791 {
886792 int rc;
887793
888
- rc = rpcrdma_alloc_wq();
794
+ rc = xprt_register_transport(&xprt_rdma);
889795 if (rc)
890796 return rc;
891
-
892
- rc = xprt_register_transport(&xprt_rdma);
893
- if (rc) {
894
- rpcrdma_destroy_wq();
895
- return rc;
896
- }
897797
898798 rc = xprt_register_transport(&xprt_rdma_bc);
899799 if (rc) {
900800 xprt_unregister_transport(&xprt_rdma);
901
- rpcrdma_destroy_wq();
902801 return rc;
903802 }
904
-
905
- dprintk("RPCRDMA Module Init, register RPC RDMA transport\n");
906
-
907
- dprintk("Defaults:\n");
908
- dprintk("\tSlots %d\n"
909
- "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n",
910
- xprt_rdma_slot_table_entries,
911
- xprt_rdma_max_inline_read, xprt_rdma_max_inline_write);
912
- dprintk("\tPadding 0\n\tMemreg %d\n", xprt_rdma_memreg_strategy);
913803
914804 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
915805 if (!sunrpc_table_header)