hc
2024-09-20 cf4ce59b3b70238352c7f1729f0f7223214828ad
kernel/net/smc/smc_tx.c
....@@ -24,10 +24,11 @@
2424 #include "smc.h"
2525 #include "smc_wr.h"
2626 #include "smc_cdc.h"
27
+#include "smc_close.h"
2728 #include "smc_ism.h"
2829 #include "smc_tx.h"
2930
30
-#define SMC_TX_WORK_DELAY HZ
31
+#define SMC_TX_WORK_DELAY 0
3132 #define SMC_TX_CORK_DELAY (HZ >> 2) /* 250 ms */
3233
3334 /***************************** sndbuf producer *******************************/
....@@ -85,6 +86,7 @@
8586 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
8687 if (sk->sk_err ||
8788 (sk->sk_shutdown & SEND_SHUTDOWN) ||
89
+ conn->killed ||
8890 conn->local_tx_ctrl.conn_state_flags.peer_done_writing) {
8991 rc = -EPIPE;
9092 break;
....@@ -108,8 +110,8 @@
108110 break; /* at least 1 byte of free & no urgent data */
109111 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
110112 sk_wait_event(sk, &timeo,
111
- sk->sk_err ||
112
- (sk->sk_shutdown & SEND_SHUTDOWN) ||
113
+ READ_ONCE(sk->sk_err) ||
114
+ (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) ||
113115 smc_cdc_rxed_any_close(conn) ||
114116 (atomic_read(&conn->sndbuf_space) &&
115117 !conn->urg_tx_pend),
....@@ -154,7 +156,7 @@
154156 return -ENOTCONN;
155157 if (smc->sk.sk_shutdown & SEND_SHUTDOWN ||
156158 (smc->sk.sk_err == ECONNABORTED) ||
157
- conn->local_tx_ctrl.conn_state_flags.peer_conn_abort)
159
+ conn->killed)
158160 return -EPIPE;
159161 if (smc_cdc_rxed_any_close(conn))
160162 return send_done ?: -ECONNRESET;
....@@ -226,8 +228,8 @@
226228 /* for a corked socket defer the RDMA writes if there
227229 * is still sufficient sndbuf_space available
228230 */
229
- schedule_delayed_work(&conn->tx_work,
230
- SMC_TX_CORK_DELAY);
231
+ queue_delayed_work(conn->lgr->tx_wq, &conn->tx_work,
232
+ SMC_TX_CORK_DELAY);
231233 else
232234 smc_tx_sndbuf_nonempty(conn);
233235 } /* while (msg_data_left(msg)) */
....@@ -264,31 +266,24 @@
264266
265267 /* sndbuf consumer: actual data transfer of one target chunk with RDMA write */
266268 static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
267
- int num_sges, struct ib_sge sges[])
269
+ int num_sges, struct ib_rdma_wr *rdma_wr)
268270 {
269271 struct smc_link_group *lgr = conn->lgr;
270
- struct ib_rdma_wr rdma_wr;
271
- struct smc_link *link;
272
+ struct smc_link *link = conn->lnk;
272273 int rc;
273274
274
- memset(&rdma_wr, 0, sizeof(rdma_wr));
275
- link = &lgr->lnk[SMC_SINGLE_LINK];
276
- rdma_wr.wr.wr_id = smc_wr_tx_get_next_wr_id(link);
277
- rdma_wr.wr.sg_list = sges;
278
- rdma_wr.wr.num_sge = num_sges;
279
- rdma_wr.wr.opcode = IB_WR_RDMA_WRITE;
280
- rdma_wr.remote_addr =
281
- lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr +
275
+ rdma_wr->wr.wr_id = smc_wr_tx_get_next_wr_id(link);
276
+ rdma_wr->wr.num_sge = num_sges;
277
+ rdma_wr->remote_addr =
278
+ lgr->rtokens[conn->rtoken_idx][link->link_idx].dma_addr +
282279 /* RMBE within RMB */
283280 conn->tx_off +
284281 /* offset within RMBE */
285282 peer_rmbe_offset;
286
- rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey;
287
- rc = ib_post_send(link->roce_qp, &rdma_wr.wr, NULL);
288
- if (rc) {
289
- conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
290
- smc_lgr_terminate(lgr);
291
- }
283
+ rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][link->link_idx].rkey;
284
+ rc = ib_post_send(link->roce_qp, &rdma_wr->wr, NULL);
285
+ if (rc)
286
+ smcr_link_down_cond_sched(link);
292287 return rc;
293288 }
294289
....@@ -311,24 +306,27 @@
311306 /* SMC-R helper for smc_tx_rdma_writes() */
312307 static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len,
313308 size_t src_off, size_t src_len,
314
- size_t dst_off, size_t dst_len)
309
+ size_t dst_off, size_t dst_len,
310
+ struct smc_rdma_wr *wr_rdma_buf)
315311 {
312
+ struct smc_link *link = conn->lnk;
313
+
316314 dma_addr_t dma_addr =
317
- sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl);
318
- struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
315
+ sg_dma_address(conn->sndbuf_desc->sgt[link->link_idx].sgl);
319316 int src_len_sum = src_len, dst_len_sum = dst_len;
320
- struct ib_sge sges[SMC_IB_MAX_SEND_SGE];
321317 int sent_count = src_off;
322318 int srcchunk, dstchunk;
323319 int num_sges;
324320 int rc;
325321
326322 for (dstchunk = 0; dstchunk < 2; dstchunk++) {
323
+ struct ib_sge *sge =
324
+ wr_rdma_buf->wr_tx_rdma[dstchunk].wr.sg_list;
325
+
327326 num_sges = 0;
328327 for (srcchunk = 0; srcchunk < 2; srcchunk++) {
329
- sges[srcchunk].addr = dma_addr + src_off;
330
- sges[srcchunk].length = src_len;
331
- sges[srcchunk].lkey = link->roce_pd->local_dma_lkey;
328
+ sge[srcchunk].addr = dma_addr + src_off;
329
+ sge[srcchunk].length = src_len;
332330 num_sges++;
333331
334332 src_off += src_len;
....@@ -341,7 +339,8 @@
341339 src_len = dst_len - src_len; /* remainder */
342340 src_len_sum += src_len;
343341 }
344
- rc = smc_tx_rdma_write(conn, dst_off, num_sges, sges);
342
+ rc = smc_tx_rdma_write(conn, dst_off, num_sges,
343
+ &wr_rdma_buf->wr_tx_rdma[dstchunk]);
345344 if (rc)
346345 return rc;
347346 if (dst_len_sum == len)
....@@ -400,7 +399,8 @@
400399 /* sndbuf consumer: prepare all necessary (src&dst) chunks of data transmit;
401400 * usable snd_wnd as max transmit
402401 */
403
-static int smc_tx_rdma_writes(struct smc_connection *conn)
402
+static int smc_tx_rdma_writes(struct smc_connection *conn,
403
+ struct smc_rdma_wr *wr_rdma_buf)
404404 {
405405 size_t len, src_len, dst_off, dst_len; /* current chunk values */
406406 union smc_host_cursor sent, prep, prod, cons;
....@@ -461,7 +461,7 @@
461461 dst_off, dst_len);
462462 else
463463 rc = smcr_tx_rdma_writes(conn, len, sent.count, src_len,
464
- dst_off, dst_len);
464
+ dst_off, dst_len, wr_rdma_buf);
465465 if (rc)
466466 return rc;
467467
....@@ -481,39 +481,51 @@
481481 */
482482 static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
483483 {
484
- struct smc_cdc_producer_flags *pflags;
484
+ struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags;
485
+ struct smc_link *link = conn->lnk;
486
+ struct smc_rdma_wr *wr_rdma_buf;
485487 struct smc_cdc_tx_pend *pend;
486488 struct smc_wr_buf *wr_buf;
487489 int rc;
488490
489
- rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend);
491
+ if (!link || !smc_wr_tx_link_hold(link))
492
+ return -ENOLINK;
493
+ rc = smc_cdc_get_free_slot(conn, link, &wr_buf, &wr_rdma_buf, &pend);
490494 if (rc < 0) {
495
+ smc_wr_tx_link_put(link);
491496 if (rc == -EBUSY) {
492497 struct smc_sock *smc =
493498 container_of(conn, struct smc_sock, conn);
494499
495500 if (smc->sk.sk_err == ECONNABORTED)
496501 return sock_error(&smc->sk);
502
+ if (conn->killed)
503
+ return -EPIPE;
497504 rc = 0;
498
- if (conn->alert_token_local) /* connection healthy */
499
- mod_delayed_work(system_wq, &conn->tx_work,
500
- SMC_TX_WORK_DELAY);
505
+ mod_delayed_work(conn->lgr->tx_wq, &conn->tx_work,
506
+ SMC_TX_WORK_DELAY);
501507 }
502508 return rc;
503509 }
504510
505511 spin_lock_bh(&conn->send_lock);
506
- if (!conn->local_tx_ctrl.prod_flags.urg_data_present) {
507
- rc = smc_tx_rdma_writes(conn);
512
+ if (link != conn->lnk) {
513
+ /* link of connection changed, tx_work will restart */
514
+ smc_wr_tx_put_slot(link,
515
+ (struct smc_wr_tx_pend_priv *)pend);
516
+ rc = -ENOLINK;
517
+ goto out_unlock;
518
+ }
519
+ if (!pflags->urg_data_present) {
520
+ rc = smc_tx_rdma_writes(conn, wr_rdma_buf);
508521 if (rc) {
509
- smc_wr_tx_put_slot(&conn->lgr->lnk[SMC_SINGLE_LINK],
522
+ smc_wr_tx_put_slot(link,
510523 (struct smc_wr_tx_pend_priv *)pend);
511524 goto out_unlock;
512525 }
513526 }
514527
515528 rc = smc_cdc_msg_send(conn, wr_buf, pend);
516
- pflags = &conn->local_tx_ctrl.prod_flags;
517529 if (!rc && pflags->urg_data_present) {
518530 pflags->urg_data_pending = 0;
519531 pflags->urg_data_present = 0;
....@@ -521,6 +533,7 @@
521533
522534 out_unlock:
523535 spin_unlock_bh(&conn->send_lock);
536
+ smc_wr_tx_link_put(link);
524537 return rc;
525538 }
526539
....@@ -531,7 +544,7 @@
531544
532545 spin_lock_bh(&conn->send_lock);
533546 if (!pflags->urg_data_present)
534
- rc = smc_tx_rdma_writes(conn);
547
+ rc = smc_tx_rdma_writes(conn, NULL);
535548 if (!rc)
536549 rc = smcd_cdc_msg_send(conn);
537550
....@@ -547,11 +560,20 @@
547560 {
548561 int rc;
549562
563
+ if (conn->killed ||
564
+ conn->local_rx_ctrl.conn_state_flags.peer_conn_abort)
565
+ return -EPIPE; /* connection being aborted */
550566 if (conn->lgr->is_smcd)
551567 rc = smcd_tx_sndbuf_nonempty(conn);
552568 else
553569 rc = smcr_tx_sndbuf_nonempty(conn);
554570
571
+ if (!rc) {
572
+ /* trigger socket release if connection is closing */
573
+ struct smc_sock *smc = container_of(conn, struct smc_sock,
574
+ conn);
575
+ smc_close_wake_tx_prepared(smc);
576
+ }
555577 return rc;
556578 }
557579
....@@ -567,9 +589,7 @@
567589 int rc;
568590
569591 lock_sock(&smc->sk);
570
- if (smc->sk.sk_err ||
571
- !conn->alert_token_local ||
572
- conn->local_rx_ctrl.conn_state_flags.peer_conn_abort)
592
+ if (smc->sk.sk_err)
573593 goto out;
574594
575595 rc = smc_tx_sndbuf_nonempty(conn);
....@@ -602,15 +622,15 @@
602622 ((to_confirm > conn->rmbe_update_limit) &&
603623 ((sender_free <= (conn->rmb_desc->len / 2)) ||
604624 conn->local_rx_ctrl.prod_flags.write_blocked))) {
625
+ if (conn->killed ||
626
+ conn->local_rx_ctrl.conn_state_flags.peer_conn_abort)
627
+ return;
605628 if ((smc_cdc_get_slot_and_msg_send(conn) < 0) &&
606
- conn->alert_token_local) { /* connection healthy */
607
- schedule_delayed_work(&conn->tx_work,
608
- SMC_TX_WORK_DELAY);
629
+ !conn->killed) {
630
+ queue_delayed_work(conn->lgr->tx_wq, &conn->tx_work,
631
+ SMC_TX_WORK_DELAY);
609632 return;
610633 }
611
- smc_curs_copy(&conn->rx_curs_confirmed,
612
- &conn->local_tx_ctrl.cons, conn);
613
- conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0;
614634 }
615635 if (conn->local_rx_ctrl.prod_flags.write_blocked &&
616636 !atomic_read(&conn->bytes_to_rcv))