.. | .. |
---|
24 | 24 | #include "smc.h" |
---|
25 | 25 | #include "smc_wr.h" |
---|
26 | 26 | #include "smc_cdc.h" |
---|
| 27 | +#include "smc_close.h" |
---|
27 | 28 | #include "smc_ism.h" |
---|
28 | 29 | #include "smc_tx.h" |
---|
29 | 30 | |
---|
30 | | -#define SMC_TX_WORK_DELAY HZ |
---|
| 31 | +#define SMC_TX_WORK_DELAY 0 |
---|
31 | 32 | #define SMC_TX_CORK_DELAY (HZ >> 2) /* 250 ms */ |
---|
32 | 33 | |
---|
33 | 34 | /***************************** sndbuf producer *******************************/ |
---|
.. | .. |
---|
85 | 86 | sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); |
---|
86 | 87 | if (sk->sk_err || |
---|
87 | 88 | (sk->sk_shutdown & SEND_SHUTDOWN) || |
---|
| 89 | + conn->killed || |
---|
88 | 90 | conn->local_tx_ctrl.conn_state_flags.peer_done_writing) { |
---|
89 | 91 | rc = -EPIPE; |
---|
90 | 92 | break; |
---|
.. | .. |
---|
108 | 110 | break; /* at least 1 byte of free & no urgent data */ |
---|
109 | 111 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); |
---|
110 | 112 | sk_wait_event(sk, &timeo, |
---|
111 | | - sk->sk_err || |
---|
112 | | - (sk->sk_shutdown & SEND_SHUTDOWN) || |
---|
| 113 | + READ_ONCE(sk->sk_err) || |
---|
| 114 | + (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) || |
---|
113 | 115 | smc_cdc_rxed_any_close(conn) || |
---|
114 | 116 | (atomic_read(&conn->sndbuf_space) && |
---|
115 | 117 | !conn->urg_tx_pend), |
---|
.. | .. |
---|
154 | 156 | return -ENOTCONN; |
---|
155 | 157 | if (smc->sk.sk_shutdown & SEND_SHUTDOWN || |
---|
156 | 158 | (smc->sk.sk_err == ECONNABORTED) || |
---|
157 | | - conn->local_tx_ctrl.conn_state_flags.peer_conn_abort) |
---|
| 159 | + conn->killed) |
---|
158 | 160 | return -EPIPE; |
---|
159 | 161 | if (smc_cdc_rxed_any_close(conn)) |
---|
160 | 162 | return send_done ?: -ECONNRESET; |
---|
.. | .. |
---|
226 | 228 | /* for a corked socket defer the RDMA writes if there |
---|
227 | 229 | * is still sufficient sndbuf_space available |
---|
228 | 230 | */ |
---|
229 | | - schedule_delayed_work(&conn->tx_work, |
---|
230 | | - SMC_TX_CORK_DELAY); |
---|
| 231 | + queue_delayed_work(conn->lgr->tx_wq, &conn->tx_work, |
---|
| 232 | + SMC_TX_CORK_DELAY); |
---|
231 | 233 | else |
---|
232 | 234 | smc_tx_sndbuf_nonempty(conn); |
---|
233 | 235 | } /* while (msg_data_left(msg)) */ |
---|
.. | .. |
---|
264 | 266 | |
---|
265 | 267 | /* sndbuf consumer: actual data transfer of one target chunk with RDMA write */ |
---|
266 | 268 | static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset, |
---|
267 | | - int num_sges, struct ib_sge sges[]) |
---|
| 269 | + int num_sges, struct ib_rdma_wr *rdma_wr) |
---|
268 | 270 | { |
---|
269 | 271 | struct smc_link_group *lgr = conn->lgr; |
---|
270 | | - struct ib_rdma_wr rdma_wr; |
---|
271 | | - struct smc_link *link; |
---|
| 272 | + struct smc_link *link = conn->lnk; |
---|
272 | 273 | int rc; |
---|
273 | 274 | |
---|
274 | | - memset(&rdma_wr, 0, sizeof(rdma_wr)); |
---|
275 | | - link = &lgr->lnk[SMC_SINGLE_LINK]; |
---|
276 | | - rdma_wr.wr.wr_id = smc_wr_tx_get_next_wr_id(link); |
---|
277 | | - rdma_wr.wr.sg_list = sges; |
---|
278 | | - rdma_wr.wr.num_sge = num_sges; |
---|
279 | | - rdma_wr.wr.opcode = IB_WR_RDMA_WRITE; |
---|
280 | | - rdma_wr.remote_addr = |
---|
281 | | - lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr + |
---|
| 275 | + rdma_wr->wr.wr_id = smc_wr_tx_get_next_wr_id(link); |
---|
| 276 | + rdma_wr->wr.num_sge = num_sges; |
---|
| 277 | + rdma_wr->remote_addr = |
---|
| 278 | + lgr->rtokens[conn->rtoken_idx][link->link_idx].dma_addr + |
---|
282 | 279 | /* RMBE within RMB */ |
---|
283 | 280 | conn->tx_off + |
---|
284 | 281 | /* offset within RMBE */ |
---|
285 | 282 | peer_rmbe_offset; |
---|
286 | | - rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey; |
---|
287 | | - rc = ib_post_send(link->roce_qp, &rdma_wr.wr, NULL); |
---|
288 | | - if (rc) { |
---|
289 | | - conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; |
---|
290 | | - smc_lgr_terminate(lgr); |
---|
291 | | - } |
---|
| 283 | + rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][link->link_idx].rkey; |
---|
| 284 | + rc = ib_post_send(link->roce_qp, &rdma_wr->wr, NULL); |
---|
| 285 | + if (rc) |
---|
| 286 | + smcr_link_down_cond_sched(link); |
---|
292 | 287 | return rc; |
---|
293 | 288 | } |
---|
294 | 289 | |
---|
.. | .. |
---|
311 | 306 | /* SMC-R helper for smc_tx_rdma_writes() */ |
---|
312 | 307 | static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len, |
---|
313 | 308 | size_t src_off, size_t src_len, |
---|
314 | | - size_t dst_off, size_t dst_len) |
---|
| 309 | + size_t dst_off, size_t dst_len, |
---|
| 310 | + struct smc_rdma_wr *wr_rdma_buf) |
---|
315 | 311 | { |
---|
| 312 | + struct smc_link *link = conn->lnk; |
---|
| 313 | + |
---|
316 | 314 | dma_addr_t dma_addr = |
---|
317 | | - sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl); |
---|
318 | | - struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK]; |
---|
| 315 | + sg_dma_address(conn->sndbuf_desc->sgt[link->link_idx].sgl); |
---|
319 | 316 | int src_len_sum = src_len, dst_len_sum = dst_len; |
---|
320 | | - struct ib_sge sges[SMC_IB_MAX_SEND_SGE]; |
---|
321 | 317 | int sent_count = src_off; |
---|
322 | 318 | int srcchunk, dstchunk; |
---|
323 | 319 | int num_sges; |
---|
324 | 320 | int rc; |
---|
325 | 321 | |
---|
326 | 322 | for (dstchunk = 0; dstchunk < 2; dstchunk++) { |
---|
| 323 | + struct ib_sge *sge = |
---|
| 324 | + wr_rdma_buf->wr_tx_rdma[dstchunk].wr.sg_list; |
---|
| 325 | + |
---|
327 | 326 | num_sges = 0; |
---|
328 | 327 | for (srcchunk = 0; srcchunk < 2; srcchunk++) { |
---|
329 | | - sges[srcchunk].addr = dma_addr + src_off; |
---|
330 | | - sges[srcchunk].length = src_len; |
---|
331 | | - sges[srcchunk].lkey = link->roce_pd->local_dma_lkey; |
---|
| 328 | + sge[srcchunk].addr = dma_addr + src_off; |
---|
| 329 | + sge[srcchunk].length = src_len; |
---|
332 | 330 | num_sges++; |
---|
333 | 331 | |
---|
334 | 332 | src_off += src_len; |
---|
.. | .. |
---|
341 | 339 | src_len = dst_len - src_len; /* remainder */ |
---|
342 | 340 | src_len_sum += src_len; |
---|
343 | 341 | } |
---|
344 | | - rc = smc_tx_rdma_write(conn, dst_off, num_sges, sges); |
---|
| 342 | + rc = smc_tx_rdma_write(conn, dst_off, num_sges, |
---|
| 343 | + &wr_rdma_buf->wr_tx_rdma[dstchunk]); |
---|
345 | 344 | if (rc) |
---|
346 | 345 | return rc; |
---|
347 | 346 | if (dst_len_sum == len) |
---|
.. | .. |
---|
400 | 399 | /* sndbuf consumer: prepare all necessary (src&dst) chunks of data transmit; |
---|
401 | 400 | * usable snd_wnd as max transmit |
---|
402 | 401 | */ |
---|
403 | | -static int smc_tx_rdma_writes(struct smc_connection *conn) |
---|
| 402 | +static int smc_tx_rdma_writes(struct smc_connection *conn, |
---|
| 403 | + struct smc_rdma_wr *wr_rdma_buf) |
---|
404 | 404 | { |
---|
405 | 405 | size_t len, src_len, dst_off, dst_len; /* current chunk values */ |
---|
406 | 406 | union smc_host_cursor sent, prep, prod, cons; |
---|
.. | .. |
---|
461 | 461 | dst_off, dst_len); |
---|
462 | 462 | else |
---|
463 | 463 | rc = smcr_tx_rdma_writes(conn, len, sent.count, src_len, |
---|
464 | | - dst_off, dst_len); |
---|
| 464 | + dst_off, dst_len, wr_rdma_buf); |
---|
465 | 465 | if (rc) |
---|
466 | 466 | return rc; |
---|
467 | 467 | |
---|
.. | .. |
---|
481 | 481 | */ |
---|
482 | 482 | static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn) |
---|
483 | 483 | { |
---|
484 | | - struct smc_cdc_producer_flags *pflags; |
---|
| 484 | + struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags; |
---|
| 485 | + struct smc_link *link = conn->lnk; |
---|
| 486 | + struct smc_rdma_wr *wr_rdma_buf; |
---|
485 | 487 | struct smc_cdc_tx_pend *pend; |
---|
486 | 488 | struct smc_wr_buf *wr_buf; |
---|
487 | 489 | int rc; |
---|
488 | 490 | |
---|
489 | | - rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend); |
---|
| 491 | + if (!link || !smc_wr_tx_link_hold(link)) |
---|
| 492 | + return -ENOLINK; |
---|
| 493 | + rc = smc_cdc_get_free_slot(conn, link, &wr_buf, &wr_rdma_buf, &pend); |
---|
490 | 494 | if (rc < 0) { |
---|
| 495 | + smc_wr_tx_link_put(link); |
---|
491 | 496 | if (rc == -EBUSY) { |
---|
492 | 497 | struct smc_sock *smc = |
---|
493 | 498 | container_of(conn, struct smc_sock, conn); |
---|
494 | 499 | |
---|
495 | 500 | if (smc->sk.sk_err == ECONNABORTED) |
---|
496 | 501 | return sock_error(&smc->sk); |
---|
| 502 | + if (conn->killed) |
---|
| 503 | + return -EPIPE; |
---|
497 | 504 | rc = 0; |
---|
498 | | - if (conn->alert_token_local) /* connection healthy */ |
---|
499 | | - mod_delayed_work(system_wq, &conn->tx_work, |
---|
500 | | - SMC_TX_WORK_DELAY); |
---|
| 505 | + mod_delayed_work(conn->lgr->tx_wq, &conn->tx_work, |
---|
| 506 | + SMC_TX_WORK_DELAY); |
---|
501 | 507 | } |
---|
502 | 508 | return rc; |
---|
503 | 509 | } |
---|
504 | 510 | |
---|
505 | 511 | spin_lock_bh(&conn->send_lock); |
---|
506 | | - if (!conn->local_tx_ctrl.prod_flags.urg_data_present) { |
---|
507 | | - rc = smc_tx_rdma_writes(conn); |
---|
| 512 | + if (link != conn->lnk) { |
---|
| 513 | + /* link of connection changed, tx_work will restart */ |
---|
| 514 | + smc_wr_tx_put_slot(link, |
---|
| 515 | + (struct smc_wr_tx_pend_priv *)pend); |
---|
| 516 | + rc = -ENOLINK; |
---|
| 517 | + goto out_unlock; |
---|
| 518 | + } |
---|
| 519 | + if (!pflags->urg_data_present) { |
---|
| 520 | + rc = smc_tx_rdma_writes(conn, wr_rdma_buf); |
---|
508 | 521 | if (rc) { |
---|
509 | | - smc_wr_tx_put_slot(&conn->lgr->lnk[SMC_SINGLE_LINK], |
---|
| 522 | + smc_wr_tx_put_slot(link, |
---|
510 | 523 | (struct smc_wr_tx_pend_priv *)pend); |
---|
511 | 524 | goto out_unlock; |
---|
512 | 525 | } |
---|
513 | 526 | } |
---|
514 | 527 | |
---|
515 | 528 | rc = smc_cdc_msg_send(conn, wr_buf, pend); |
---|
516 | | - pflags = &conn->local_tx_ctrl.prod_flags; |
---|
517 | 529 | if (!rc && pflags->urg_data_present) { |
---|
518 | 530 | pflags->urg_data_pending = 0; |
---|
519 | 531 | pflags->urg_data_present = 0; |
---|
.. | .. |
---|
521 | 533 | |
---|
522 | 534 | out_unlock: |
---|
523 | 535 | spin_unlock_bh(&conn->send_lock); |
---|
| 536 | + smc_wr_tx_link_put(link); |
---|
524 | 537 | return rc; |
---|
525 | 538 | } |
---|
526 | 539 | |
---|
.. | .. |
---|
531 | 544 | |
---|
532 | 545 | spin_lock_bh(&conn->send_lock); |
---|
533 | 546 | if (!pflags->urg_data_present) |
---|
534 | | - rc = smc_tx_rdma_writes(conn); |
---|
| 547 | + rc = smc_tx_rdma_writes(conn, NULL); |
---|
535 | 548 | if (!rc) |
---|
536 | 549 | rc = smcd_cdc_msg_send(conn); |
---|
537 | 550 | |
---|
.. | .. |
---|
547 | 560 | { |
---|
548 | 561 | int rc; |
---|
549 | 562 | |
---|
| 563 | + if (conn->killed || |
---|
| 564 | + conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) |
---|
| 565 | + return -EPIPE; /* connection being aborted */ |
---|
550 | 566 | if (conn->lgr->is_smcd) |
---|
551 | 567 | rc = smcd_tx_sndbuf_nonempty(conn); |
---|
552 | 568 | else |
---|
553 | 569 | rc = smcr_tx_sndbuf_nonempty(conn); |
---|
554 | 570 | |
---|
| 571 | + if (!rc) { |
---|
| 572 | + /* trigger socket release if connection is closing */ |
---|
| 573 | + struct smc_sock *smc = container_of(conn, struct smc_sock, |
---|
| 574 | + conn); |
---|
| 575 | + smc_close_wake_tx_prepared(smc); |
---|
| 576 | + } |
---|
555 | 577 | return rc; |
---|
556 | 578 | } |
---|
557 | 579 | |
---|
.. | .. |
---|
567 | 589 | int rc; |
---|
568 | 590 | |
---|
569 | 591 | lock_sock(&smc->sk); |
---|
570 | | - if (smc->sk.sk_err || |
---|
571 | | - !conn->alert_token_local || |
---|
572 | | - conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) |
---|
| 592 | + if (smc->sk.sk_err) |
---|
573 | 593 | goto out; |
---|
574 | 594 | |
---|
575 | 595 | rc = smc_tx_sndbuf_nonempty(conn); |
---|
.. | .. |
---|
602 | 622 | ((to_confirm > conn->rmbe_update_limit) && |
---|
603 | 623 | ((sender_free <= (conn->rmb_desc->len / 2)) || |
---|
604 | 624 | conn->local_rx_ctrl.prod_flags.write_blocked))) { |
---|
| 625 | + if (conn->killed || |
---|
| 626 | + conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) |
---|
| 627 | + return; |
---|
605 | 628 | if ((smc_cdc_get_slot_and_msg_send(conn) < 0) && |
---|
606 | | - conn->alert_token_local) { /* connection healthy */ |
---|
607 | | - schedule_delayed_work(&conn->tx_work, |
---|
608 | | - SMC_TX_WORK_DELAY); |
---|
| 629 | + !conn->killed) { |
---|
| 630 | + queue_delayed_work(conn->lgr->tx_wq, &conn->tx_work, |
---|
| 631 | + SMC_TX_WORK_DELAY); |
---|
609 | 632 | return; |
---|
610 | 633 | } |
---|
611 | | - smc_curs_copy(&conn->rx_curs_confirmed, |
---|
612 | | - &conn->local_tx_ctrl.cons, conn); |
---|
613 | | - conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0; |
---|
614 | 634 | } |
---|
615 | 635 | if (conn->local_rx_ctrl.prod_flags.write_blocked && |
---|
616 | 636 | !atomic_read(&conn->bytes_to_rcv)) |
---|