hc
2024-05-10 10ebd8556b7990499c896a550e3d416b444211e6
kernel/net/smc/af_smc.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Shared Memory Communications over RDMA (SMC-R) and RoCE
34 *
....@@ -24,11 +25,17 @@
2425 #include <linux/in.h>
2526 #include <linux/sched/signal.h>
2627 #include <linux/if_vlan.h>
28
+#include <linux/rcupdate_wait.h>
29
+#include <linux/ctype.h>
2730
2831 #include <net/sock.h>
2932 #include <net/tcp.h>
3033 #include <net/smc.h>
3134 #include <asm/ioctls.h>
35
+
36
+#include <net/net_namespace.h>
37
+#include <net/netns/generic.h>
38
+#include "smc_netns.h"
3239
3340 #include "smc.h"
3441 #include "smc_clc.h"
....@@ -42,9 +49,15 @@
4249 #include "smc_rx.h"
4350 #include "smc_close.h"
4451
45
-static DEFINE_MUTEX(smc_create_lgr_pending); /* serialize link group
46
- * creation
52
+static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group
53
+ * creation on server
4754 */
55
+static DEFINE_MUTEX(smc_client_lgr_pending); /* serialize link group
56
+ * creation on client
57
+ */
58
+
59
+struct workqueue_struct *smc_hs_wq; /* wq for handshake work */
60
+struct workqueue_struct *smc_close_wq; /* wq for close work */
4861
4962 static void smc_tcp_listen_work(struct work_struct *);
5063 static void smc_connect_work(struct work_struct *);
....@@ -115,21 +128,74 @@
115128 };
116129 EXPORT_SYMBOL_GPL(smc_proto6);
117130
131
+static void smc_restore_fallback_changes(struct smc_sock *smc)
132
+{
133
+ if (smc->clcsock->file) { /* non-accepted sockets have no file yet */
134
+ smc->clcsock->file->private_data = smc->sk.sk_socket;
135
+ smc->clcsock->file = NULL;
136
+ }
137
+}
138
+
139
+static int __smc_release(struct smc_sock *smc)
140
+{
141
+ struct sock *sk = &smc->sk;
142
+ int rc = 0;
143
+
144
+ if (!smc->use_fallback) {
145
+ rc = smc_close_active(smc);
146
+ sock_set_flag(sk, SOCK_DEAD);
147
+ sk->sk_shutdown |= SHUTDOWN_MASK;
148
+ } else {
149
+ if (sk->sk_state != SMC_CLOSED) {
150
+ if (sk->sk_state != SMC_LISTEN &&
151
+ sk->sk_state != SMC_INIT)
152
+ sock_put(sk); /* passive closing */
153
+ if (sk->sk_state == SMC_LISTEN) {
154
+ /* wake up clcsock accept */
155
+ rc = kernel_sock_shutdown(smc->clcsock,
156
+ SHUT_RDWR);
157
+ }
158
+ sk->sk_state = SMC_CLOSED;
159
+ sk->sk_state_change(sk);
160
+ }
161
+ smc_restore_fallback_changes(smc);
162
+ }
163
+
164
+ sk->sk_prot->unhash(sk);
165
+
166
+ if (sk->sk_state == SMC_CLOSED) {
167
+ if (smc->clcsock) {
168
+ release_sock(sk);
169
+ smc_clcsock_release(smc);
170
+ lock_sock(sk);
171
+ }
172
+ if (!smc->use_fallback)
173
+ smc_conn_free(&smc->conn);
174
+ }
175
+
176
+ return rc;
177
+}
178
+
118179 static int smc_release(struct socket *sock)
119180 {
120181 struct sock *sk = sock->sk;
121182 struct smc_sock *smc;
122
- int rc = 0;
183
+ int old_state, rc = 0;
123184
124185 if (!sk)
125186 goto out;
126187
188
+ sock_hold(sk); /* sock_put below */
127189 smc = smc_sk(sk);
128190
191
+ old_state = sk->sk_state;
192
+
129193 /* cleanup for a dangling non-blocking connect */
130
- flush_work(&smc->connect_work);
131
- kfree(smc->connect_info);
132
- smc->connect_info = NULL;
194
+ if (smc->connect_nonblock && old_state == SMC_INIT)
195
+ tcp_abort(smc->clcsock->sk, ECONNABORTED);
196
+
197
+ if (cancel_work_sync(&smc->connect_work))
198
+ sock_put(&smc->sk); /* sock_hold in smc_connect for passive closing */
133199
134200 if (sk->sk_state == SMC_LISTEN)
135201 /* smc_close_non_accepted() is called and acquires
....@@ -139,38 +205,18 @@
139205 else
140206 lock_sock(sk);
141207
142
- if (!smc->use_fallback) {
143
- rc = smc_close_active(smc);
144
- sock_set_flag(sk, SOCK_DEAD);
145
- sk->sk_shutdown |= SHUTDOWN_MASK;
146
- }
208
+ if (old_state == SMC_INIT && sk->sk_state == SMC_ACTIVE &&
209
+ !smc->use_fallback)
210
+ smc_close_active_abort(smc);
147211
148
- sk->sk_prot->unhash(sk);
149
-
150
- if (smc->clcsock) {
151
- if (smc->use_fallback && sk->sk_state == SMC_LISTEN) {
152
- /* wake up clcsock accept */
153
- rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
154
- }
155
- mutex_lock(&smc->clcsock_release_lock);
156
- sock_release(smc->clcsock);
157
- smc->clcsock = NULL;
158
- mutex_unlock(&smc->clcsock_release_lock);
159
- }
160
- if (smc->use_fallback) {
161
- if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT)
162
- sock_put(sk); /* passive closing */
163
- sk->sk_state = SMC_CLOSED;
164
- sk->sk_state_change(sk);
165
- }
212
+ rc = __smc_release(smc);
166213
167214 /* detach socket */
168215 sock_orphan(sk);
169216 sock->sk = NULL;
170
- if (!smc->use_fallback && sk->sk_state == SMC_CLOSED)
171
- smc_conn_free(&smc->conn);
172217 release_sock(sk);
173218
219
+ sock_put(sk); /* sock_hold above */
174220 sock_put(sk); /* final sock_put */
175221 out:
176222 return rc;
....@@ -245,7 +291,7 @@
245291
246292 /* Check if socket is already active */
247293 rc = -EINVAL;
248
- if (sk->sk_state != SMC_INIT)
294
+ if (sk->sk_state != SMC_INIT || smc->connect_nonblock)
249295 goto out_rel;
250296
251297 smc->clcsock->sk->sk_reuse = sk->sk_reuse;
....@@ -289,7 +335,8 @@
289335 (1UL << SOCK_RXQ_OVFL) | \
290336 (1UL << SOCK_WIFI_STATUS) | \
291337 (1UL << SOCK_NOFCS) | \
292
- (1UL << SOCK_FILTER_LOCKED))
338
+ (1UL << SOCK_FILTER_LOCKED) | \
339
+ (1UL << SOCK_TSTAMP_NEW))
293340 /* copy only relevant settings and flags of SOL_SOCKET level from smc to
294341 * clc socket (since smc is not called for these options from net/core)
295342 */
....@@ -308,47 +355,61 @@
308355 smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
309356 }
310357
311
-/* register a new rmb, optionally send confirm_rkey msg to register with peer */
312
-static int smc_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc,
313
- bool conf_rkey)
358
+/* register the new rmb on all links */
359
+static int smcr_lgr_reg_rmbs(struct smc_link *link,
360
+ struct smc_buf_desc *rmb_desc)
314361 {
315
- /* register memory region for new rmb */
316
- if (smc_wr_reg_send(link, rmb_desc->mr_rx[SMC_SINGLE_LINK])) {
317
- rmb_desc->regerr = 1;
318
- return -EFAULT;
362
+ struct smc_link_group *lgr = link->lgr;
363
+ int i, rc = 0;
364
+
365
+ rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
366
+ if (rc)
367
+ return rc;
368
+ /* protect against parallel smc_llc_cli_rkey_exchange() and
369
+ * parallel smcr_link_reg_rmb()
370
+ */
371
+ mutex_lock(&lgr->llc_conf_mutex);
372
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
373
+ if (!smc_link_active(&lgr->lnk[i]))
374
+ continue;
375
+ rc = smcr_link_reg_rmb(&lgr->lnk[i], rmb_desc);
376
+ if (rc)
377
+ goto out;
319378 }
320
- if (!conf_rkey)
321
- return 0;
379
+
322380 /* exchange confirm_rkey msg with peer */
323
- if (smc_llc_do_confirm_rkey(link, rmb_desc)) {
324
- rmb_desc->regerr = 1;
325
- return -EFAULT;
381
+ rc = smc_llc_do_confirm_rkey(link, rmb_desc);
382
+ if (rc) {
383
+ rc = -EFAULT;
384
+ goto out;
326385 }
327
- return 0;
386
+ rmb_desc->is_conf_rkey = true;
387
+out:
388
+ mutex_unlock(&lgr->llc_conf_mutex);
389
+ smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
390
+ return rc;
328391 }
329392
330
-static int smc_clnt_conf_first_link(struct smc_sock *smc)
393
+static int smcr_clnt_conf_first_link(struct smc_sock *smc)
331394 {
332
- struct net *net = sock_net(smc->clcsock->sk);
333
- struct smc_link_group *lgr = smc->conn.lgr;
334
- struct smc_link *link;
335
- int rest;
395
+ struct smc_link *link = smc->conn.lnk;
396
+ struct smc_llc_qentry *qentry;
336397 int rc;
337398
338
- link = &lgr->lnk[SMC_SINGLE_LINK];
339399 /* receive CONFIRM LINK request from server over RoCE fabric */
340
- rest = wait_for_completion_interruptible_timeout(
341
- &link->llc_confirm,
342
- SMC_LLC_WAIT_FIRST_TIME);
343
- if (rest <= 0) {
400
+ qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
401
+ SMC_LLC_CONFIRM_LINK);
402
+ if (!qentry) {
344403 struct smc_clc_msg_decline dclc;
345404
346405 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
347
- SMC_CLC_DECLINE);
348
- return rc;
406
+ SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
407
+ return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
349408 }
350
-
351
- if (link->llc_confirm_rc)
409
+ smc_llc_save_peer_uid(qentry);
410
+ rc = smc_llc_eval_conf_link(qentry, SMC_LLC_REQ);
411
+ smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl);
412
+ if (rc)
352413 return SMC_CLC_DECL_RMBE_EC;
353414
354415 rc = smc_ib_modify_qp_rts(link);
....@@ -357,60 +418,86 @@
357418
358419 smc_wr_remember_qp_attr(link);
359420
360
- if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
421
+ if (smcr_link_reg_rmb(link, smc->conn.rmb_desc))
361422 return SMC_CLC_DECL_ERR_REGRMB;
423
+
424
+ /* confirm_rkey is implicit on 1st contact */
425
+ smc->conn.rmb_desc->is_conf_rkey = true;
362426
363427 /* send CONFIRM LINK response over RoCE fabric */
364428 rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP);
365429 if (rc < 0)
366430 return SMC_CLC_DECL_TIMEOUT_CL;
367431
368
- /* receive ADD LINK request from server over RoCE fabric */
369
- rest = wait_for_completion_interruptible_timeout(&link->llc_add,
370
- SMC_LLC_WAIT_TIME);
371
- if (rest <= 0) {
432
+ smc_llc_link_active(link);
433
+ smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
434
+
435
+ /* optional 2nd link, receive ADD LINK request from server */
436
+ qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
437
+ SMC_LLC_ADD_LINK);
438
+ if (!qentry) {
372439 struct smc_clc_msg_decline dclc;
373440
374441 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
375
- SMC_CLC_DECLINE);
442
+ SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
443
+ if (rc == -EAGAIN)
444
+ rc = 0; /* no DECLINE received, go with one link */
376445 return rc;
377446 }
378
-
379
- /* send add link reject message, only one link supported for now */
380
- rc = smc_llc_send_add_link(link,
381
- link->smcibdev->mac[link->ibport - 1],
382
- link->gid, SMC_LLC_RESP);
383
- if (rc < 0)
384
- return SMC_CLC_DECL_TIMEOUT_AL;
385
-
386
- smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
387
-
447
+ smc_llc_flow_qentry_clr(&link->lgr->llc_flow_lcl);
448
+ smc_llc_cli_add_link(link, qentry);
388449 return 0;
389450 }
390451
391452 static void smcr_conn_save_peer_info(struct smc_sock *smc,
392453 struct smc_clc_msg_accept_confirm *clc)
393454 {
394
- int bufsize = smc_uncompress_bufsize(clc->rmbe_size);
455
+ int bufsize = smc_uncompress_bufsize(clc->r0.rmbe_size);
395456
396
- smc->conn.peer_rmbe_idx = clc->rmbe_idx;
397
- smc->conn.local_tx_ctrl.token = ntohl(clc->rmbe_alert_token);
457
+ smc->conn.peer_rmbe_idx = clc->r0.rmbe_idx;
458
+ smc->conn.local_tx_ctrl.token = ntohl(clc->r0.rmbe_alert_token);
398459 smc->conn.peer_rmbe_size = bufsize;
399460 atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
400461 smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1);
401462 }
402463
464
+static bool smc_isascii(char *hostname)
465
+{
466
+ int i;
467
+
468
+ for (i = 0; i < SMC_MAX_HOSTNAME_LEN; i++)
469
+ if (!isascii(hostname[i]))
470
+ return false;
471
+ return true;
472
+}
473
+
403474 static void smcd_conn_save_peer_info(struct smc_sock *smc,
404475 struct smc_clc_msg_accept_confirm *clc)
405476 {
406
- int bufsize = smc_uncompress_bufsize(clc->dmbe_size);
477
+ int bufsize = smc_uncompress_bufsize(clc->d0.dmbe_size);
407478
408
- smc->conn.peer_rmbe_idx = clc->dmbe_idx;
409
- smc->conn.peer_token = clc->token;
479
+ smc->conn.peer_rmbe_idx = clc->d0.dmbe_idx;
480
+ smc->conn.peer_token = clc->d0.token;
410481 /* msg header takes up space in the buffer */
411482 smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg);
412483 atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
413484 smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx;
485
+ if (clc->hdr.version > SMC_V1 &&
486
+ (clc->hdr.typev2 & SMC_FIRST_CONTACT_MASK)) {
487
+ struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
488
+ (struct smc_clc_msg_accept_confirm_v2 *)clc;
489
+ struct smc_clc_first_contact_ext *fce =
490
+ (struct smc_clc_first_contact_ext *)
491
+ (((u8 *)clc_v2) + sizeof(*clc_v2));
492
+
493
+ memcpy(smc->conn.lgr->negotiated_eid, clc_v2->eid,
494
+ SMC_MAX_EID_LEN);
495
+ smc->conn.lgr->peer_os = fce->os_type;
496
+ smc->conn.lgr->peer_smc_release = fce->release;
497
+ if (smc_isascii(fce->hostname))
498
+ memcpy(smc->conn.lgr->peer_hostname, fce->hostname,
499
+ SMC_MAX_HOSTNAME_LEN);
500
+ }
414501 }
415502
416503 static void smc_conn_save_peer_info(struct smc_sock *smc,
....@@ -425,26 +512,53 @@
425512 static void smc_link_save_peer_info(struct smc_link *link,
426513 struct smc_clc_msg_accept_confirm *clc)
427514 {
428
- link->peer_qpn = ntoh24(clc->qpn);
429
- memcpy(link->peer_gid, clc->lcl.gid, SMC_GID_SIZE);
430
- memcpy(link->peer_mac, clc->lcl.mac, sizeof(link->peer_mac));
431
- link->peer_psn = ntoh24(clc->psn);
432
- link->peer_mtu = clc->qp_mtu;
515
+ link->peer_qpn = ntoh24(clc->r0.qpn);
516
+ memcpy(link->peer_gid, clc->r0.lcl.gid, SMC_GID_SIZE);
517
+ memcpy(link->peer_mac, clc->r0.lcl.mac, sizeof(link->peer_mac));
518
+ link->peer_psn = ntoh24(clc->r0.psn);
519
+ link->peer_mtu = clc->r0.qp_mtu;
520
+}
521
+
522
+static void smc_switch_to_fallback(struct smc_sock *smc)
523
+{
524
+ wait_queue_head_t *smc_wait = sk_sleep(&smc->sk);
525
+ wait_queue_head_t *clc_wait = sk_sleep(smc->clcsock->sk);
526
+ unsigned long flags;
527
+
528
+ smc->use_fallback = true;
529
+ if (smc->sk.sk_socket && smc->sk.sk_socket->file) {
530
+ smc->clcsock->file = smc->sk.sk_socket->file;
531
+ smc->clcsock->file->private_data = smc->clcsock;
532
+ smc->clcsock->wq.fasync_list =
533
+ smc->sk.sk_socket->wq.fasync_list;
534
+
535
+ /* There may be some entries remaining in
536
+ * smc socket->wq, which should be removed
537
+ * to clcsocket->wq during the fallback.
538
+ */
539
+ spin_lock_irqsave(&smc_wait->lock, flags);
540
+ spin_lock_nested(&clc_wait->lock, SINGLE_DEPTH_NESTING);
541
+ list_splice_init(&smc_wait->head, &clc_wait->head);
542
+ spin_unlock(&clc_wait->lock);
543
+ spin_unlock_irqrestore(&smc_wait->lock, flags);
544
+ }
433545 }
434546
435547 /* fall back during connect */
436548 static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
437549 {
438
- smc->use_fallback = true;
550
+ smc_switch_to_fallback(smc);
439551 smc->fallback_rsn = reason_code;
440552 smc_copy_sock_settings_to_clc(smc);
553
+ smc->connect_nonblock = 0;
441554 if (smc->sk.sk_state == SMC_INIT)
442555 smc->sk.sk_state = SMC_ACTIVE;
443556 return 0;
444557 }
445558
446559 /* decline and fall back during connect */
447
-static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)
560
+static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code,
561
+ u8 version)
448562 {
449563 int rc;
450564
....@@ -454,7 +568,7 @@
454568 return reason_code;
455569 }
456570 if (reason_code != SMC_CLC_DECL_PEERDECL) {
457
- rc = smc_clc_send_decline(smc, reason_code);
571
+ rc = smc_clc_send_decline(smc, reason_code, version);
458572 if (rc < 0) {
459573 if (smc->sk.sk_state == SMC_INIT)
460574 sock_put(&smc->sk); /* passive closing */
....@@ -465,190 +579,367 @@
465579 }
466580
467581 /* abort connecting */
468
-static int smc_connect_abort(struct smc_sock *smc, int reason_code,
469
- int local_contact)
582
+static void smc_connect_abort(struct smc_sock *smc, int local_first)
470583 {
471
- if (local_contact == SMC_FIRST_CONTACT)
472
- smc_lgr_forget(smc->conn.lgr);
473
- mutex_unlock(&smc_create_lgr_pending);
474
- smc_conn_free(&smc->conn);
475
- return reason_code;
584
+ if (local_first)
585
+ smc_lgr_cleanup_early(&smc->conn);
586
+ else
587
+ smc_conn_free(&smc->conn);
476588 }
477589
478590 /* check if there is a rdma device available for this connection. */
479591 /* called for connect and listen */
480
-static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev,
481
- u8 *ibport, unsigned short vlan_id, u8 gid[])
592
+static int smc_find_rdma_device(struct smc_sock *smc, struct smc_init_info *ini)
482593 {
483
- int reason_code = 0;
484
-
485594 /* PNET table look up: search active ib_device and port
486595 * within same PNETID that also contains the ethernet device
487596 * used for the internal TCP socket
488597 */
489
- smc_pnet_find_roce_resource(smc->clcsock->sk, ibdev, ibport, vlan_id,
490
- gid);
491
- if (!(*ibdev))
492
- reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
493
-
494
- return reason_code;
598
+ smc_pnet_find_roce_resource(smc->clcsock->sk, ini);
599
+ if (!ini->ib_dev)
600
+ return SMC_CLC_DECL_NOSMCRDEV;
601
+ return 0;
495602 }
496603
497604 /* check if there is an ISM device available for this connection. */
498605 /* called for connect and listen */
499
-static int smc_check_ism(struct smc_sock *smc, struct smcd_dev **ismdev)
606
+static int smc_find_ism_device(struct smc_sock *smc, struct smc_init_info *ini)
500607 {
501608 /* Find ISM device with same PNETID as connecting interface */
502
- smc_pnet_find_ism_resource(smc->clcsock->sk, ismdev);
503
- if (!(*ismdev))
504
- return SMC_CLC_DECL_CNFERR; /* configuration error */
609
+ smc_pnet_find_ism_resource(smc->clcsock->sk, ini);
610
+ if (!ini->ism_dev[0])
611
+ return SMC_CLC_DECL_NOSMCDDEV;
612
+ else
613
+ ini->ism_chid[0] = smc_ism_get_chid(ini->ism_dev[0]);
505614 return 0;
615
+}
616
+
617
+/* is chid unique for the ism devices that are already determined? */
618
+static bool smc_find_ism_v2_is_unique_chid(u16 chid, struct smc_init_info *ini,
619
+ int cnt)
620
+{
621
+ int i = (!ini->ism_dev[0]) ? 1 : 0;
622
+
623
+ for (; i < cnt; i++)
624
+ if (ini->ism_chid[i] == chid)
625
+ return false;
626
+ return true;
627
+}
628
+
629
+/* determine possible V2 ISM devices (either without PNETID or with PNETID plus
630
+ * PNETID matching net_device)
631
+ */
632
+static int smc_find_ism_v2_device_clnt(struct smc_sock *smc,
633
+ struct smc_init_info *ini)
634
+{
635
+ int rc = SMC_CLC_DECL_NOSMCDDEV;
636
+ struct smcd_dev *smcd;
637
+ int i = 1;
638
+ u16 chid;
639
+
640
+ if (smcd_indicated(ini->smc_type_v1))
641
+ rc = 0; /* already initialized for V1 */
642
+ mutex_lock(&smcd_dev_list.mutex);
643
+ list_for_each_entry(smcd, &smcd_dev_list.list, list) {
644
+ if (smcd->going_away || smcd == ini->ism_dev[0])
645
+ continue;
646
+ chid = smc_ism_get_chid(smcd);
647
+ if (!smc_find_ism_v2_is_unique_chid(chid, ini, i))
648
+ continue;
649
+ if (!smc_pnet_is_pnetid_set(smcd->pnetid) ||
650
+ smc_pnet_is_ndev_pnetid(sock_net(&smc->sk), smcd->pnetid)) {
651
+ ini->ism_dev[i] = smcd;
652
+ ini->ism_chid[i] = chid;
653
+ ini->is_smcd = true;
654
+ rc = 0;
655
+ i++;
656
+ if (i > SMC_MAX_ISM_DEVS)
657
+ break;
658
+ }
659
+ }
660
+ mutex_unlock(&smcd_dev_list.mutex);
661
+ ini->ism_offered_cnt = i - 1;
662
+ if (!ini->ism_dev[0] && !ini->ism_dev[1])
663
+ ini->smcd_version = 0;
664
+
665
+ return rc;
506666 }
507667
508668 /* Check for VLAN ID and register it on ISM device just for CLC handshake */
509669 static int smc_connect_ism_vlan_setup(struct smc_sock *smc,
510
- struct smcd_dev *ismdev,
511
- unsigned short vlan_id)
670
+ struct smc_init_info *ini)
512671 {
513
- if (vlan_id && smc_ism_get_vlan(ismdev, vlan_id))
514
- return SMC_CLC_DECL_CNFERR;
672
+ if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev[0], ini->vlan_id))
673
+ return SMC_CLC_DECL_ISMVLANERR;
515674 return 0;
675
+}
676
+
677
+static int smc_find_proposal_devices(struct smc_sock *smc,
678
+ struct smc_init_info *ini)
679
+{
680
+ int rc = 0;
681
+
682
+ /* check if there is an ism device available */
683
+ if (ini->smcd_version & SMC_V1) {
684
+ if (smc_find_ism_device(smc, ini) ||
685
+ smc_connect_ism_vlan_setup(smc, ini)) {
686
+ if (ini->smc_type_v1 == SMC_TYPE_B)
687
+ ini->smc_type_v1 = SMC_TYPE_R;
688
+ else
689
+ ini->smc_type_v1 = SMC_TYPE_N;
690
+ } /* else ISM V1 is supported for this connection */
691
+ if (smc_find_rdma_device(smc, ini)) {
692
+ if (ini->smc_type_v1 == SMC_TYPE_B)
693
+ ini->smc_type_v1 = SMC_TYPE_D;
694
+ else
695
+ ini->smc_type_v1 = SMC_TYPE_N;
696
+ } /* else RDMA is supported for this connection */
697
+ }
698
+ if (smc_ism_v2_capable && smc_find_ism_v2_device_clnt(smc, ini))
699
+ ini->smc_type_v2 = SMC_TYPE_N;
700
+
701
+ /* if neither ISM nor RDMA are supported, fallback */
702
+ if (!smcr_indicated(ini->smc_type_v1) &&
703
+ ini->smc_type_v1 == SMC_TYPE_N && ini->smc_type_v2 == SMC_TYPE_N)
704
+ rc = SMC_CLC_DECL_NOSMCDEV;
705
+
706
+ return rc;
516707 }
517708
518709 /* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is
519710 * used, the VLAN ID will be registered again during the connection setup.
520711 */
521
-static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd,
522
- struct smcd_dev *ismdev,
523
- unsigned short vlan_id)
712
+static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc,
713
+ struct smc_init_info *ini)
524714 {
525
- if (!is_smcd)
715
+ if (!smcd_indicated(ini->smc_type_v1))
526716 return 0;
527
- if (vlan_id && smc_ism_put_vlan(ismdev, vlan_id))
717
+ if (ini->vlan_id && smc_ism_put_vlan(ini->ism_dev[0], ini->vlan_id))
528718 return SMC_CLC_DECL_CNFERR;
529719 return 0;
530720 }
531721
722
+#define SMC_CLC_MAX_ACCEPT_LEN \
723
+ (sizeof(struct smc_clc_msg_accept_confirm_v2) + \
724
+ sizeof(struct smc_clc_first_contact_ext) + \
725
+ sizeof(struct smc_clc_msg_trail))
726
+
532727 /* CLC handshake during connect */
533
-static int smc_connect_clc(struct smc_sock *smc, int smc_type,
534
- struct smc_clc_msg_accept_confirm *aclc,
535
- struct smc_ib_device *ibdev, u8 ibport,
536
- u8 gid[], struct smcd_dev *ismdev)
728
+static int smc_connect_clc(struct smc_sock *smc,
729
+ struct smc_clc_msg_accept_confirm_v2 *aclc2,
730
+ struct smc_init_info *ini)
537731 {
538732 int rc = 0;
539733
540734 /* do inband token exchange */
541
- rc = smc_clc_send_proposal(smc, smc_type, ibdev, ibport, gid, ismdev);
735
+ rc = smc_clc_send_proposal(smc, ini);
542736 if (rc)
543737 return rc;
544738 /* receive SMC Accept CLC message */
545
- return smc_clc_wait_msg(smc, aclc, sizeof(*aclc), SMC_CLC_ACCEPT);
739
+ return smc_clc_wait_msg(smc, aclc2, SMC_CLC_MAX_ACCEPT_LEN,
740
+ SMC_CLC_ACCEPT, CLC_WAIT_TIME);
546741 }
547742
548743 /* setup for RDMA connection of client */
549744 static int smc_connect_rdma(struct smc_sock *smc,
550745 struct smc_clc_msg_accept_confirm *aclc,
551
- struct smc_ib_device *ibdev, u8 ibport)
746
+ struct smc_init_info *ini)
552747 {
553
- int local_contact = SMC_FIRST_CONTACT;
748
+ int i, reason_code = 0;
554749 struct smc_link *link;
555
- int reason_code = 0;
556750
557
- mutex_lock(&smc_create_lgr_pending);
558
- local_contact = smc_conn_create(smc, false, aclc->hdr.flag, ibdev,
559
- ibport, &aclc->lcl, NULL, 0);
560
- if (local_contact < 0) {
561
- if (local_contact == -ENOMEM)
562
- reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
563
- else if (local_contact == -ENOLINK)
564
- reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */
565
- else
566
- reason_code = SMC_CLC_DECL_INTERR; /* other error */
567
- return smc_connect_abort(smc, reason_code, 0);
751
+ ini->is_smcd = false;
752
+ ini->ib_lcl = &aclc->r0.lcl;
753
+ ini->ib_clcqpn = ntoh24(aclc->r0.qpn);
754
+ ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK;
755
+
756
+ mutex_lock(&smc_client_lgr_pending);
757
+ reason_code = smc_conn_create(smc, ini);
758
+ if (reason_code) {
759
+ mutex_unlock(&smc_client_lgr_pending);
760
+ return reason_code;
568761 }
569
- link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK];
570762
571763 smc_conn_save_peer_info(smc, aclc);
572764
573
- /* create send buffer and rmb */
574
- if (smc_buf_create(smc, false))
575
- return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);
765
+ if (ini->first_contact_local) {
766
+ link = smc->conn.lnk;
767
+ } else {
768
+ /* set link that was assigned by server */
769
+ link = NULL;
770
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
771
+ struct smc_link *l = &smc->conn.lgr->lnk[i];
576772
577
- if (local_contact == SMC_FIRST_CONTACT)
773
+ if (l->peer_qpn == ntoh24(aclc->r0.qpn) &&
774
+ !memcmp(l->peer_gid, &aclc->r0.lcl.gid,
775
+ SMC_GID_SIZE) &&
776
+ !memcmp(l->peer_mac, &aclc->r0.lcl.mac,
777
+ sizeof(l->peer_mac))) {
778
+ link = l;
779
+ break;
780
+ }
781
+ }
782
+ if (!link) {
783
+ reason_code = SMC_CLC_DECL_NOSRVLINK;
784
+ goto connect_abort;
785
+ }
786
+ smc->conn.lnk = link;
787
+ }
788
+
789
+ /* create send buffer and rmb */
790
+ if (smc_buf_create(smc, false)) {
791
+ reason_code = SMC_CLC_DECL_MEM;
792
+ goto connect_abort;
793
+ }
794
+
795
+ if (ini->first_contact_local)
578796 smc_link_save_peer_info(link, aclc);
579797
580
- if (smc_rmb_rtoken_handling(&smc->conn, aclc))
581
- return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK,
582
- local_contact);
798
+ if (smc_rmb_rtoken_handling(&smc->conn, link, aclc)) {
799
+ reason_code = SMC_CLC_DECL_ERR_RTOK;
800
+ goto connect_abort;
801
+ }
583802
584803 smc_close_init(smc);
585804 smc_rx_init(smc);
586805
587
- if (local_contact == SMC_FIRST_CONTACT) {
588
- if (smc_ib_ready_link(link))
589
- return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK,
590
- local_contact);
806
+ if (ini->first_contact_local) {
807
+ if (smc_ib_ready_link(link)) {
808
+ reason_code = SMC_CLC_DECL_ERR_RDYLNK;
809
+ goto connect_abort;
810
+ }
591811 } else {
592
- if (!smc->conn.rmb_desc->reused &&
593
- smc_reg_rmb(link, smc->conn.rmb_desc, true))
594
- return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB,
595
- local_contact);
812
+ if (smcr_lgr_reg_rmbs(link, smc->conn.rmb_desc)) {
813
+ reason_code = SMC_CLC_DECL_ERR_REGRMB;
814
+ goto connect_abort;
815
+ }
596816 }
597817 smc_rmb_sync_sg_for_device(&smc->conn);
598818
599
- reason_code = smc_clc_send_confirm(smc);
819
+ reason_code = smc_clc_send_confirm(smc, ini->first_contact_local,
820
+ SMC_V1);
600821 if (reason_code)
601
- return smc_connect_abort(smc, reason_code, local_contact);
822
+ goto connect_abort;
602823
603824 smc_tx_init(smc);
604825
605
- if (local_contact == SMC_FIRST_CONTACT) {
826
+ if (ini->first_contact_local) {
606827 /* QP confirmation over RoCE fabric */
607
- reason_code = smc_clnt_conf_first_link(smc);
828
+ smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK);
829
+ reason_code = smcr_clnt_conf_first_link(smc);
830
+ smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl);
608831 if (reason_code)
609
- return smc_connect_abort(smc, reason_code,
610
- local_contact);
832
+ goto connect_abort;
611833 }
612
- mutex_unlock(&smc_create_lgr_pending);
834
+ mutex_unlock(&smc_client_lgr_pending);
613835
614836 smc_copy_sock_settings_to_clc(smc);
837
+ smc->connect_nonblock = 0;
615838 if (smc->sk.sk_state == SMC_INIT)
616839 smc->sk.sk_state = SMC_ACTIVE;
617840
618841 return 0;
842
+connect_abort:
843
+ smc_connect_abort(smc, ini->first_contact_local);
844
+ mutex_unlock(&smc_client_lgr_pending);
845
+ smc->connect_nonblock = 0;
846
+
847
+ return reason_code;
848
+}
849
+
850
+/* The server has chosen one of the proposed ISM devices for the communication.
851
+ * Determine from the CHID of the received CLC ACCEPT the ISM device chosen.
852
+ */
853
+static int
854
+smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm_v2 *aclc,
855
+ struct smc_init_info *ini)
856
+{
857
+ int i;
858
+
859
+ for (i = 0; i < ini->ism_offered_cnt + 1; i++) {
860
+ if (ini->ism_chid[i] == ntohs(aclc->chid)) {
861
+ ini->ism_selected = i;
862
+ return 0;
863
+ }
864
+ }
865
+
866
+ return -EPROTO;
619867 }
620868
621869 /* setup for ISM connection of client */
622870 static int smc_connect_ism(struct smc_sock *smc,
623871 struct smc_clc_msg_accept_confirm *aclc,
624
- struct smcd_dev *ismdev)
872
+ struct smc_init_info *ini)
625873 {
626
- int local_contact = SMC_FIRST_CONTACT;
627874 int rc = 0;
628875
629
- mutex_lock(&smc_create_lgr_pending);
630
- local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0,
631
- NULL, ismdev, aclc->gid);
632
- if (local_contact < 0)
633
- return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 0);
876
+ ini->is_smcd = true;
877
+ ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK;
878
+
879
+ if (aclc->hdr.version == SMC_V2) {
880
+ struct smc_clc_msg_accept_confirm_v2 *aclc_v2 =
881
+ (struct smc_clc_msg_accept_confirm_v2 *)aclc;
882
+
883
+ rc = smc_v2_determine_accepted_chid(aclc_v2, ini);
884
+ if (rc)
885
+ return rc;
886
+ }
887
+ ini->ism_peer_gid[ini->ism_selected] = aclc->d0.gid;
888
+
889
+ /* there is only one lgr role for SMC-D; use server lock */
890
+ mutex_lock(&smc_server_lgr_pending);
891
+ rc = smc_conn_create(smc, ini);
892
+ if (rc) {
893
+ mutex_unlock(&smc_server_lgr_pending);
894
+ return rc;
895
+ }
634896
635897 /* Create send and receive buffers */
636
- if (smc_buf_create(smc, true))
637
- return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);
898
+ rc = smc_buf_create(smc, true);
899
+ if (rc) {
900
+ rc = (rc == -ENOSPC) ? SMC_CLC_DECL_MAX_DMB : SMC_CLC_DECL_MEM;
901
+ goto connect_abort;
902
+ }
638903
639904 smc_conn_save_peer_info(smc, aclc);
640905 smc_close_init(smc);
641906 smc_rx_init(smc);
642907 smc_tx_init(smc);
643908
644
- rc = smc_clc_send_confirm(smc);
909
+ rc = smc_clc_send_confirm(smc, ini->first_contact_local,
910
+ aclc->hdr.version);
645911 if (rc)
646
- return smc_connect_abort(smc, rc, local_contact);
647
- mutex_unlock(&smc_create_lgr_pending);
912
+ goto connect_abort;
913
+ mutex_unlock(&smc_server_lgr_pending);
648914
649915 smc_copy_sock_settings_to_clc(smc);
916
+ smc->connect_nonblock = 0;
650917 if (smc->sk.sk_state == SMC_INIT)
651918 smc->sk.sk_state = SMC_ACTIVE;
919
+
920
+ return 0;
921
+connect_abort:
922
+ smc_connect_abort(smc, ini->first_contact_local);
923
+ mutex_unlock(&smc_server_lgr_pending);
924
+ smc->connect_nonblock = 0;
925
+
926
+ return rc;
927
+}
928
+
929
+/* check if received accept type and version matches a proposed one */
930
+static int smc_connect_check_aclc(struct smc_init_info *ini,
931
+ struct smc_clc_msg_accept_confirm *aclc)
932
+{
933
+ if ((aclc->hdr.typev1 == SMC_TYPE_R &&
934
+ !smcr_indicated(ini->smc_type_v1)) ||
935
+ (aclc->hdr.typev1 == SMC_TYPE_D &&
936
+ ((!smcd_indicated(ini->smc_type_v1) &&
937
+ !smcd_indicated(ini->smc_type_v2)) ||
938
+ (aclc->hdr.version == SMC_V1 &&
939
+ !smcd_indicated(ini->smc_type_v1)) ||
940
+ (aclc->hdr.version == SMC_V2 &&
941
+ !smcd_indicated(ini->smc_type_v2)))))
942
+ return SMC_CLC_DECL_MODEUNSUPP;
652943
653944 return 0;
654945 }
....@@ -656,17 +947,12 @@
656947 /* perform steps before actually connecting */
657948 static int __smc_connect(struct smc_sock *smc)
658949 {
659
- bool ism_supported = false, rdma_supported = false;
660
- struct smc_clc_msg_accept_confirm aclc;
661
- struct smc_ib_device *ibdev;
662
- struct smcd_dev *ismdev;
663
- u8 gid[SMC_GID_SIZE];
664
- unsigned short vlan;
665
- int smc_type;
950
+ u8 version = smc_ism_v2_capable ? SMC_V2 : SMC_V1;
951
+ struct smc_clc_msg_accept_confirm_v2 *aclc2;
952
+ struct smc_clc_msg_accept_confirm *aclc;
953
+ struct smc_init_info *ini = NULL;
954
+ u8 *buf = NULL;
666955 int rc = 0;
667
- u8 ibport;
668
-
669
- sock_hold(&smc->sk); /* sock put in passive closing */
670956
671957 if (smc->use_fallback)
672958 return smc_connect_fallback(smc, smc->fallback_rsn);
....@@ -675,74 +961,107 @@
675961 if (!tcp_sk(smc->clcsock->sk)->syn_smc)
676962 return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC);
677963
678
- /* IPSec connections opt out of SMC-R optimizations */
964
+ /* IPSec connections opt out of SMC optimizations */
679965 if (using_ipsec(smc))
680
- return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC);
966
+ return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC,
967
+ version);
681968
682
- /* check for VLAN ID */
683
- if (smc_vlan_by_tcpsk(smc->clcsock, &vlan))
684
- return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR);
969
+ ini = kzalloc(sizeof(*ini), GFP_KERNEL);
970
+ if (!ini)
971
+ return smc_connect_decline_fallback(smc, SMC_CLC_DECL_MEM,
972
+ version);
685973
686
- /* check if there is an ism device available */
687
- if (!smc_check_ism(smc, &ismdev) &&
688
- !smc_connect_ism_vlan_setup(smc, ismdev, vlan)) {
689
- /* ISM is supported for this connection */
690
- ism_supported = true;
691
- smc_type = SMC_TYPE_D;
974
+ ini->smcd_version = SMC_V1;
975
+ ini->smcd_version |= smc_ism_v2_capable ? SMC_V2 : 0;
976
+ ini->smc_type_v1 = SMC_TYPE_B;
977
+ ini->smc_type_v2 = smc_ism_v2_capable ? SMC_TYPE_D : SMC_TYPE_N;
978
+
979
+ /* get vlan id from IP device */
980
+ if (smc_vlan_by_tcpsk(smc->clcsock, ini)) {
981
+ ini->smcd_version &= ~SMC_V1;
982
+ ini->smc_type_v1 = SMC_TYPE_N;
983
+ if (!ini->smcd_version) {
984
+ rc = SMC_CLC_DECL_GETVLANERR;
985
+ goto fallback;
986
+ }
692987 }
693988
694
- /* check if there is a rdma device available */
695
- if (!smc_check_rdma(smc, &ibdev, &ibport, vlan, gid)) {
696
- /* RDMA is supported for this connection */
697
- rdma_supported = true;
698
- if (ism_supported)
699
- smc_type = SMC_TYPE_B; /* both */
700
- else
701
- smc_type = SMC_TYPE_R; /* only RDMA */
702
- }
989
+ rc = smc_find_proposal_devices(smc, ini);
990
+ if (rc)
991
+ goto fallback;
703992
704
- /* if neither ISM nor RDMA are supported, fallback */
705
- if (!rdma_supported && !ism_supported)
706
- return smc_connect_decline_fallback(smc, SMC_CLC_DECL_NOSMCDEV);
993
+ buf = kzalloc(SMC_CLC_MAX_ACCEPT_LEN, GFP_KERNEL);
994
+ if (!buf) {
995
+ rc = SMC_CLC_DECL_MEM;
996
+ goto fallback;
997
+ }
998
+ aclc2 = (struct smc_clc_msg_accept_confirm_v2 *)buf;
999
+ aclc = (struct smc_clc_msg_accept_confirm *)aclc2;
7071000
7081001 /* perform CLC handshake */
709
- rc = smc_connect_clc(smc, smc_type, &aclc, ibdev, ibport, gid, ismdev);
710
- if (rc) {
711
- smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
712
- return smc_connect_decline_fallback(smc, rc);
713
- }
1002
+ rc = smc_connect_clc(smc, aclc2, ini);
1003
+ if (rc)
1004
+ goto vlan_cleanup;
1005
+
1006
+ /* check if smc modes and versions of CLC proposal and accept match */
1007
+ rc = smc_connect_check_aclc(ini, aclc);
1008
+ version = aclc->hdr.version == SMC_V1 ? SMC_V1 : SMC_V2;
1009
+ ini->smcd_version = version;
1010
+ if (rc)
1011
+ goto vlan_cleanup;
7141012
7151013 /* depending on previous steps, connect using rdma or ism */
716
- if (rdma_supported && aclc.hdr.path == SMC_TYPE_R)
717
- rc = smc_connect_rdma(smc, &aclc, ibdev, ibport);
718
- else if (ism_supported && aclc.hdr.path == SMC_TYPE_D)
719
- rc = smc_connect_ism(smc, &aclc, ismdev);
720
- else
721
- rc = SMC_CLC_DECL_MODEUNSUPP;
722
- if (rc) {
723
- smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
724
- return smc_connect_decline_fallback(smc, rc);
725
- }
1014
+ if (aclc->hdr.typev1 == SMC_TYPE_R)
1015
+ rc = smc_connect_rdma(smc, aclc, ini);
1016
+ else if (aclc->hdr.typev1 == SMC_TYPE_D)
1017
+ rc = smc_connect_ism(smc, aclc, ini);
1018
+ if (rc)
1019
+ goto vlan_cleanup;
7261020
727
- smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
1021
+ smc_connect_ism_vlan_cleanup(smc, ini);
1022
+ kfree(buf);
1023
+ kfree(ini);
7281024 return 0;
1025
+
1026
+vlan_cleanup:
1027
+ smc_connect_ism_vlan_cleanup(smc, ini);
1028
+ kfree(buf);
1029
+fallback:
1030
+ kfree(ini);
1031
+ return smc_connect_decline_fallback(smc, rc, version);
7291032 }
7301033
7311034 static void smc_connect_work(struct work_struct *work)
7321035 {
7331036 struct smc_sock *smc = container_of(work, struct smc_sock,
7341037 connect_work);
735
- int rc;
1038
+ long timeo = smc->sk.sk_sndtimeo;
1039
+ int rc = 0;
7361040
737
- lock_sock(&smc->sk);
738
- rc = kernel_connect(smc->clcsock, &smc->connect_info->addr,
739
- smc->connect_info->alen, smc->connect_info->flags);
1041
+ if (!timeo)
1042
+ timeo = MAX_SCHEDULE_TIMEOUT;
1043
+ lock_sock(smc->clcsock->sk);
7401044 if (smc->clcsock->sk->sk_err) {
7411045 smc->sk.sk_err = smc->clcsock->sk->sk_err;
742
- goto out;
1046
+ } else if ((1 << smc->clcsock->sk->sk_state) &
1047
+ (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
1048
+ rc = sk_stream_wait_connect(smc->clcsock->sk, &timeo);
1049
+ if ((rc == -EPIPE) &&
1050
+ ((1 << smc->clcsock->sk->sk_state) &
1051
+ (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)))
1052
+ rc = 0;
7431053 }
744
- if (rc < 0) {
745
- smc->sk.sk_err = -rc;
1054
+ release_sock(smc->clcsock->sk);
1055
+ lock_sock(&smc->sk);
1056
+ if (rc != 0 || smc->sk.sk_err) {
1057
+ smc->sk.sk_state = SMC_CLOSED;
1058
+ if (rc == -EPIPE || rc == -EAGAIN)
1059
+ smc->sk.sk_err = EPIPE;
1060
+ else if (rc == -ECONNREFUSED)
1061
+ smc->sk.sk_err = ECONNREFUSED;
1062
+ else if (signal_pending(current))
1063
+ smc->sk.sk_err = -sock_intr_errno(timeo);
1064
+ sock_put(&smc->sk); /* passive closing */
7461065 goto out;
7471066 }
7481067
....@@ -751,12 +1070,14 @@
7511070 smc->sk.sk_err = -rc;
7521071
7531072 out:
754
- if (smc->sk.sk_err)
755
- smc->sk.sk_state_change(&smc->sk);
756
- else
757
- smc->sk.sk_write_space(&smc->sk);
758
- kfree(smc->connect_info);
759
- smc->connect_info = NULL;
1073
+ if (!sock_flag(&smc->sk, SOCK_DEAD)) {
1074
+ if (smc->sk.sk_err) {
1075
+ smc->sk.sk_state_change(&smc->sk);
1076
+ } else { /* allow polling before and after fallback decision */
1077
+ smc->clcsock->sk->sk_write_space(smc->clcsock->sk);
1078
+ smc->sk.sk_write_space(&smc->sk);
1079
+ }
1080
+ }
7601081 release_sock(&smc->sk);
7611082 }
7621083
....@@ -789,26 +1110,22 @@
7891110
7901111 smc_copy_sock_settings_to_clc(smc);
7911112 tcp_sk(smc->clcsock->sk)->syn_smc = 1;
1113
+ if (smc->connect_nonblock) {
1114
+ rc = -EALREADY;
1115
+ goto out;
1116
+ }
1117
+ rc = kernel_connect(smc->clcsock, addr, alen, flags);
1118
+ if (rc && rc != -EINPROGRESS)
1119
+ goto out;
1120
+
1121
+ if (smc->use_fallback)
1122
+ goto out;
1123
+ sock_hold(&smc->sk); /* sock put in passive closing */
7921124 if (flags & O_NONBLOCK) {
793
- if (smc->connect_info) {
794
- rc = -EALREADY;
795
- goto out;
796
- }
797
- smc->connect_info = kzalloc(alen + 2 * sizeof(int), GFP_KERNEL);
798
- if (!smc->connect_info) {
799
- rc = -ENOMEM;
800
- goto out;
801
- }
802
- smc->connect_info->alen = alen;
803
- smc->connect_info->flags = flags ^ O_NONBLOCK;
804
- memcpy(&smc->connect_info->addr, addr, alen);
805
- schedule_work(&smc->connect_work);
1125
+ if (queue_work(smc_hs_wq, &smc->connect_work))
1126
+ smc->connect_nonblock = 1;
8061127 rc = -EINPROGRESS;
8071128 } else {
808
- rc = kernel_connect(smc->clcsock, addr, alen, flags);
809
- if (rc)
810
- goto out;
811
-
8121129 rc = __smc_connect(smc);
8131130 if (rc < 0)
8141131 goto out;
....@@ -842,10 +1159,10 @@
8421159
8431160 mutex_lock(&lsmc->clcsock_release_lock);
8441161 if (lsmc->clcsock)
845
- rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0);
1162
+ rc = kernel_accept(lsmc->clcsock, &new_clcsock, SOCK_NONBLOCK);
8461163 mutex_unlock(&lsmc->clcsock_release_lock);
8471164 lock_sock(lsk);
848
- if (rc < 0)
1165
+ if (rc < 0 && rc != -EAGAIN)
8491166 lsk->sk_err = -rc;
8501167 if (rc < 0 || lsk->sk_state == SMC_CLOSED) {
8511168 new_sk->sk_prot->unhash(new_sk);
....@@ -858,6 +1175,10 @@
8581175 goto out;
8591176 }
8601177
1178
+ /* new clcsock has inherited the smc listen-specific sk_data_ready
1179
+ * function; switch it back to the original sk_data_ready function
1180
+ */
1181
+ new_clcsock->sk->sk_data_ready = lsmc->clcsk_data_ready;
8611182 (*new_smc)->clcsock = new_clcsock;
8621183 out:
8631184 return rc;
....@@ -911,8 +1232,13 @@
9111232 sock_put(new_sk); /* final */
9121233 continue;
9131234 }
914
- if (new_sock)
1235
+ if (new_sock) {
9151236 sock_graft(new_sk, new_sock);
1237
+ if (isk->use_fallback) {
1238
+ smc_sk(new_sk)->clcsock->file = new_sock->file;
1239
+ isk->clcsock->file->private_data = isk->clcsock;
1240
+ }
1241
+ }
9161242 return new_sk;
9171243 }
9181244 return NULL;
....@@ -923,45 +1249,24 @@
9231249 {
9241250 struct smc_sock *smc = smc_sk(sk);
9251251
1252
+ sock_hold(sk); /* sock_put below */
9261253 lock_sock(sk);
9271254 if (!sk->sk_lingertime)
9281255 /* wait for peer closing */
9291256 sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
930
- if (!smc->use_fallback) {
931
- smc_close_active(smc);
932
- sock_set_flag(sk, SOCK_DEAD);
933
- sk->sk_shutdown |= SHUTDOWN_MASK;
934
- }
935
- sk->sk_prot->unhash(sk);
936
- if (smc->clcsock) {
937
- struct socket *tcp;
938
-
939
- tcp = smc->clcsock;
940
- smc->clcsock = NULL;
941
- sock_release(tcp);
942
- }
943
- if (smc->use_fallback) {
944
- sock_put(sk); /* passive closing */
945
- sk->sk_state = SMC_CLOSED;
946
- } else {
947
- if (sk->sk_state == SMC_CLOSED)
948
- smc_conn_free(&smc->conn);
949
- }
1257
+ __smc_release(smc);
9501258 release_sock(sk);
1259
+ sock_put(sk); /* sock_hold above */
9511260 sock_put(sk); /* final sock_put */
9521261 }
9531262
954
-static int smc_serv_conf_first_link(struct smc_sock *smc)
1263
+static int smcr_serv_conf_first_link(struct smc_sock *smc)
9551264 {
956
- struct net *net = sock_net(smc->clcsock->sk);
957
- struct smc_link_group *lgr = smc->conn.lgr;
958
- struct smc_link *link;
959
- int rest;
1265
+ struct smc_link *link = smc->conn.lnk;
1266
+ struct smc_llc_qentry *qentry;
9601267 int rc;
9611268
962
- link = &lgr->lnk[SMC_SINGLE_LINK];
963
-
964
- if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
1269
+ if (smcr_link_reg_rmb(link, smc->conn.rmb_desc))
9651270 return SMC_CLC_DECL_ERR_REGRMB;
9661271
9671272 /* send CONFIRM LINK request to client over the RoCE fabric */
....@@ -970,40 +1275,29 @@
9701275 return SMC_CLC_DECL_TIMEOUT_CL;
9711276
9721277 /* receive CONFIRM LINK response from client over the RoCE fabric */
973
- rest = wait_for_completion_interruptible_timeout(
974
- &link->llc_confirm_resp,
975
- SMC_LLC_WAIT_FIRST_TIME);
976
- if (rest <= 0) {
1278
+ qentry = smc_llc_wait(link->lgr, link, SMC_LLC_WAIT_TIME,
1279
+ SMC_LLC_CONFIRM_LINK);
1280
+ if (!qentry) {
9771281 struct smc_clc_msg_decline dclc;
9781282
9791283 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
980
- SMC_CLC_DECLINE);
981
- return rc;
1284
+ SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
1285
+ return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
9821286 }
983
-
984
- if (link->llc_confirm_resp_rc)
1287
+ smc_llc_save_peer_uid(qentry);
1288
+ rc = smc_llc_eval_conf_link(qentry, SMC_LLC_RESP);
1289
+ smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl);
1290
+ if (rc)
9851291 return SMC_CLC_DECL_RMBE_EC;
9861292
987
- /* send ADD LINK request to client over the RoCE fabric */
988
- rc = smc_llc_send_add_link(link,
989
- link->smcibdev->mac[link->ibport - 1],
990
- link->gid, SMC_LLC_REQ);
991
- if (rc < 0)
992
- return SMC_CLC_DECL_TIMEOUT_AL;
1293
+ /* confirm_rkey is implicit on 1st contact */
1294
+ smc->conn.rmb_desc->is_conf_rkey = true;
9931295
994
- /* receive ADD LINK response from client over the RoCE fabric */
995
- rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp,
996
- SMC_LLC_WAIT_TIME);
997
- if (rest <= 0) {
998
- struct smc_clc_msg_decline dclc;
1296
+ smc_llc_link_active(link);
1297
+ smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
9991298
1000
- rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
1001
- SMC_CLC_DECLINE);
1002
- return rc;
1003
- }
1004
-
1005
- smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
1006
-
1299
+ /* initial contact - try to establish second link */
1300
+ smc_llc_srv_add_link(link);
10071301 return 0;
10081302 }
10091303
....@@ -1013,13 +1307,13 @@
10131307 struct smc_sock *lsmc = new_smc->listen_smc;
10141308 struct sock *newsmcsk = &new_smc->sk;
10151309
1016
- lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
10171310 if (lsmc->sk.sk_state == SMC_LISTEN) {
1311
+ lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
10181312 smc_accept_enqueue(&lsmc->sk, newsmcsk);
1313
+ release_sock(&lsmc->sk);
10191314 } else { /* no longer listening */
10201315 smc_close_non_accepted(newsmcsk);
10211316 }
1022
- release_sock(&lsmc->sk);
10231317
10241318 /* Wake up accept */
10251319 lsmc->sk.sk_data_ready(&lsmc->sk);
....@@ -1031,7 +1325,6 @@
10311325 {
10321326 struct sock *newsmcsk = &new_smc->sk;
10331327
1034
- sk_refcnt_debug_inc(newsmcsk);
10351328 if (newsmcsk->sk_state == SMC_INIT)
10361329 newsmcsk->sk_state = SMC_ACTIVE;
10371330
....@@ -1046,27 +1339,27 @@
10461339 if (newsmcsk->sk_state == SMC_INIT)
10471340 sock_put(&new_smc->sk); /* passive closing */
10481341 newsmcsk->sk_state = SMC_CLOSED;
1049
- smc_conn_free(&new_smc->conn);
10501342
10511343 smc_listen_out(new_smc);
10521344 }
10531345
10541346 /* listen worker: decline and fall back if possible */
10551347 static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
1056
- int local_contact)
1348
+ int local_first, u8 version)
10571349 {
10581350 /* RDMA setup failed, switch back to TCP */
1059
- if (local_contact == SMC_FIRST_CONTACT)
1060
- smc_lgr_forget(new_smc->conn.lgr);
1351
+ if (local_first)
1352
+ smc_lgr_cleanup_early(&new_smc->conn);
1353
+ else
1354
+ smc_conn_free(&new_smc->conn);
10611355 if (reason_code < 0) { /* error, no fallback possible */
10621356 smc_listen_out_err(new_smc);
10631357 return;
10641358 }
1065
- smc_conn_free(&new_smc->conn);
1066
- new_smc->use_fallback = true;
1359
+ smc_switch_to_fallback(new_smc);
10671360 new_smc->fallback_rsn = reason_code;
10681361 if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
1069
- if (smc_clc_send_decline(new_smc, reason_code) < 0) {
1362
+ if (smc_clc_send_decline(new_smc, reason_code, version) < 0) {
10701363 smc_listen_out_err(new_smc);
10711364 return;
10721365 }
....@@ -1074,34 +1367,73 @@
10741367 smc_listen_out_connected(new_smc);
10751368 }
10761369
1370
+/* listen worker: version checking */
1371
+static int smc_listen_v2_check(struct smc_sock *new_smc,
1372
+ struct smc_clc_msg_proposal *pclc,
1373
+ struct smc_init_info *ini)
1374
+{
1375
+ struct smc_clc_smcd_v2_extension *pclc_smcd_v2_ext;
1376
+ struct smc_clc_v2_extension *pclc_v2_ext;
1377
+
1378
+ ini->smc_type_v1 = pclc->hdr.typev1;
1379
+ ini->smc_type_v2 = pclc->hdr.typev2;
1380
+ ini->smcd_version = ini->smc_type_v1 != SMC_TYPE_N ? SMC_V1 : 0;
1381
+ if (pclc->hdr.version > SMC_V1)
1382
+ ini->smcd_version |=
1383
+ ini->smc_type_v2 != SMC_TYPE_N ? SMC_V2 : 0;
1384
+ if (!smc_ism_v2_capable) {
1385
+ ini->smcd_version &= ~SMC_V2;
1386
+ goto out;
1387
+ }
1388
+ pclc_v2_ext = smc_get_clc_v2_ext(pclc);
1389
+ if (!pclc_v2_ext) {
1390
+ ini->smcd_version &= ~SMC_V2;
1391
+ goto out;
1392
+ }
1393
+ pclc_smcd_v2_ext = smc_get_clc_smcd_v2_ext(pclc_v2_ext);
1394
+ if (!pclc_smcd_v2_ext)
1395
+ ini->smcd_version &= ~SMC_V2;
1396
+
1397
+out:
1398
+ if (!ini->smcd_version) {
1399
+ if (pclc->hdr.typev1 == SMC_TYPE_B ||
1400
+ pclc->hdr.typev2 == SMC_TYPE_B)
1401
+ return SMC_CLC_DECL_NOSMCDEV;
1402
+ if (pclc->hdr.typev1 == SMC_TYPE_D ||
1403
+ pclc->hdr.typev2 == SMC_TYPE_D)
1404
+ return SMC_CLC_DECL_NOSMCDDEV;
1405
+ return SMC_CLC_DECL_NOSMCRDEV;
1406
+ }
1407
+
1408
+ return 0;
1409
+}
1410
+
10771411 /* listen worker: check prefixes */
1078
-static int smc_listen_rdma_check(struct smc_sock *new_smc,
1412
+static int smc_listen_prfx_check(struct smc_sock *new_smc,
10791413 struct smc_clc_msg_proposal *pclc)
10801414 {
10811415 struct smc_clc_msg_proposal_prefix *pclc_prfx;
10821416 struct socket *newclcsock = new_smc->clcsock;
10831417
1418
+ if (pclc->hdr.typev1 == SMC_TYPE_N)
1419
+ return 0;
10841420 pclc_prfx = smc_clc_proposal_get_prefix(pclc);
10851421 if (smc_clc_prfx_match(newclcsock, pclc_prfx))
1086
- return SMC_CLC_DECL_CNFERR;
1422
+ return SMC_CLC_DECL_DIFFPREFIX;
10871423
10881424 return 0;
10891425 }
10901426
10911427 /* listen worker: initialize connection and buffers */
10921428 static int smc_listen_rdma_init(struct smc_sock *new_smc,
1093
- struct smc_clc_msg_proposal *pclc,
1094
- struct smc_ib_device *ibdev, u8 ibport,
1095
- int *local_contact)
1429
+ struct smc_init_info *ini)
10961430 {
1431
+ int rc;
1432
+
10971433 /* allocate connection / link group */
1098
- *local_contact = smc_conn_create(new_smc, false, 0, ibdev, ibport,
1099
- &pclc->lcl, NULL, 0);
1100
- if (*local_contact < 0) {
1101
- if (*local_contact == -ENOMEM)
1102
- return SMC_CLC_DECL_MEM;/* insufficient memory*/
1103
- return SMC_CLC_DECL_INTERR; /* other error */
1104
- }
1434
+ rc = smc_conn_create(new_smc, ini);
1435
+ if (rc)
1436
+ return rc;
11051437
11061438 /* create send buffer and rmb */
11071439 if (smc_buf_create(new_smc, false))
....@@ -1112,109 +1444,266 @@
11121444
11131445 /* listen worker: initialize connection and buffers for SMC-D */
11141446 static int smc_listen_ism_init(struct smc_sock *new_smc,
1115
- struct smc_clc_msg_proposal *pclc,
1116
- struct smcd_dev *ismdev,
1117
- int *local_contact)
1447
+ struct smc_init_info *ini)
11181448 {
1119
- struct smc_clc_msg_smcd *pclc_smcd;
1449
+ int rc;
11201450
1121
- pclc_smcd = smc_get_clc_msg_smcd(pclc);
1122
- *local_contact = smc_conn_create(new_smc, true, 0, NULL, 0, NULL,
1123
- ismdev, pclc_smcd->gid);
1124
- if (*local_contact < 0) {
1125
- if (*local_contact == -ENOMEM)
1126
- return SMC_CLC_DECL_MEM;/* insufficient memory*/
1127
- return SMC_CLC_DECL_INTERR; /* other error */
1128
- }
1129
-
1130
- /* Check if peer can be reached via ISM device */
1131
- if (smc_ism_cantalk(new_smc->conn.lgr->peer_gid,
1132
- new_smc->conn.lgr->vlan_id,
1133
- new_smc->conn.lgr->smcd)) {
1134
- if (*local_contact == SMC_FIRST_CONTACT)
1135
- smc_lgr_forget(new_smc->conn.lgr);
1136
- smc_conn_free(&new_smc->conn);
1137
- return SMC_CLC_DECL_CNFERR;
1138
- }
1451
+ rc = smc_conn_create(new_smc, ini);
1452
+ if (rc)
1453
+ return rc;
11391454
11401455 /* Create send and receive buffers */
1141
- if (smc_buf_create(new_smc, true)) {
1142
- if (*local_contact == SMC_FIRST_CONTACT)
1143
- smc_lgr_forget(new_smc->conn.lgr);
1144
- smc_conn_free(&new_smc->conn);
1145
- return SMC_CLC_DECL_MEM;
1456
+ rc = smc_buf_create(new_smc, true);
1457
+ if (rc) {
1458
+ if (ini->first_contact_local)
1459
+ smc_lgr_cleanup_early(&new_smc->conn);
1460
+ else
1461
+ smc_conn_free(&new_smc->conn);
1462
+ return (rc == -ENOSPC) ? SMC_CLC_DECL_MAX_DMB :
1463
+ SMC_CLC_DECL_MEM;
11461464 }
11471465
11481466 return 0;
11491467 }
11501468
1151
-/* listen worker: register buffers */
1152
-static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
1469
+static bool smc_is_already_selected(struct smcd_dev *smcd,
1470
+ struct smc_init_info *ini,
1471
+ int matches)
11531472 {
1154
- struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
1473
+ int i;
11551474
1156
- if (local_contact != SMC_FIRST_CONTACT) {
1157
- if (!new_smc->conn.rmb_desc->reused) {
1158
- if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true))
1159
- return SMC_CLC_DECL_ERR_REGRMB;
1475
+ for (i = 0; i < matches; i++)
1476
+ if (smcd == ini->ism_dev[i])
1477
+ return true;
1478
+
1479
+ return false;
1480
+}
1481
+
1482
+/* check for ISM devices matching proposed ISM devices */
1483
+static void smc_check_ism_v2_match(struct smc_init_info *ini,
1484
+ u16 proposed_chid, u64 proposed_gid,
1485
+ unsigned int *matches)
1486
+{
1487
+ struct smcd_dev *smcd;
1488
+
1489
+ list_for_each_entry(smcd, &smcd_dev_list.list, list) {
1490
+ if (smcd->going_away)
1491
+ continue;
1492
+ if (smc_is_already_selected(smcd, ini, *matches))
1493
+ continue;
1494
+ if (smc_ism_get_chid(smcd) == proposed_chid &&
1495
+ !smc_ism_cantalk(proposed_gid, ISM_RESERVED_VLANID, smcd)) {
1496
+ ini->ism_peer_gid[*matches] = proposed_gid;
1497
+ ini->ism_dev[*matches] = smcd;
1498
+ (*matches)++;
1499
+ break;
11601500 }
1501
+ }
1502
+}
1503
+
1504
+static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc,
1505
+ struct smc_clc_msg_proposal *pclc,
1506
+ struct smc_init_info *ini)
1507
+{
1508
+ struct smc_clc_smcd_v2_extension *smcd_v2_ext;
1509
+ struct smc_clc_v2_extension *smc_v2_ext;
1510
+ struct smc_clc_msg_smcd *pclc_smcd;
1511
+ unsigned int matches = 0;
1512
+ u8 smcd_version;
1513
+ u8 *eid = NULL;
1514
+ int i;
1515
+
1516
+ if (!(ini->smcd_version & SMC_V2) || !smcd_indicated(ini->smc_type_v2))
1517
+ goto not_found;
1518
+
1519
+ pclc_smcd = smc_get_clc_msg_smcd(pclc);
1520
+ smc_v2_ext = smc_get_clc_v2_ext(pclc);
1521
+ smcd_v2_ext = smc_get_clc_smcd_v2_ext(smc_v2_ext);
1522
+ if (!smcd_v2_ext ||
1523
+ !smc_v2_ext->hdr.flag.seid) /* no system EID support for SMCD */
1524
+ goto not_found;
1525
+
1526
+ mutex_lock(&smcd_dev_list.mutex);
1527
+ if (pclc_smcd->ism.chid)
1528
+ /* check for ISM device matching proposed native ISM device */
1529
+ smc_check_ism_v2_match(ini, ntohs(pclc_smcd->ism.chid),
1530
+ ntohll(pclc_smcd->ism.gid), &matches);
1531
+ for (i = 1; i <= smc_v2_ext->hdr.ism_gid_cnt; i++) {
1532
+ /* check for ISM devices matching proposed non-native ISM
1533
+ * devices
1534
+ */
1535
+ smc_check_ism_v2_match(ini,
1536
+ ntohs(smcd_v2_ext->gidchid[i - 1].chid),
1537
+ ntohll(smcd_v2_ext->gidchid[i - 1].gid),
1538
+ &matches);
1539
+ }
1540
+ mutex_unlock(&smcd_dev_list.mutex);
1541
+
1542
+ if (ini->ism_dev[0]) {
1543
+ smc_ism_get_system_eid(ini->ism_dev[0], &eid);
1544
+ if (memcmp(eid, smcd_v2_ext->system_eid, SMC_MAX_EID_LEN))
1545
+ goto not_found;
1546
+ } else {
1547
+ goto not_found;
1548
+ }
1549
+
1550
+ /* separate - outside the smcd_dev_list.lock */
1551
+ smcd_version = ini->smcd_version;
1552
+ for (i = 0; i < matches; i++) {
1553
+ ini->smcd_version = SMC_V2;
1554
+ ini->is_smcd = true;
1555
+ ini->ism_selected = i;
1556
+ if (smc_listen_ism_init(new_smc, ini))
1557
+ /* try next active ISM device */
1558
+ continue;
1559
+ return; /* matching and usable V2 ISM device found */
1560
+ }
1561
+ /* no V2 ISM device could be initialized */
1562
+ ini->smcd_version = smcd_version; /* restore original value */
1563
+
1564
+not_found:
1565
+ ini->smcd_version &= ~SMC_V2;
1566
+ ini->ism_dev[0] = NULL;
1567
+ ini->is_smcd = false;
1568
+}
1569
+
1570
+static void smc_find_ism_v1_device_serv(struct smc_sock *new_smc,
1571
+ struct smc_clc_msg_proposal *pclc,
1572
+ struct smc_init_info *ini)
1573
+{
1574
+ struct smc_clc_msg_smcd *pclc_smcd = smc_get_clc_msg_smcd(pclc);
1575
+
1576
+ /* check if ISM V1 is available */
1577
+ if (!(ini->smcd_version & SMC_V1) || !smcd_indicated(ini->smc_type_v1))
1578
+ goto not_found;
1579
+ ini->is_smcd = true; /* prepare ISM check */
1580
+ ini->ism_peer_gid[0] = ntohll(pclc_smcd->ism.gid);
1581
+ if (smc_find_ism_device(new_smc, ini))
1582
+ goto not_found;
1583
+ ini->ism_selected = 0;
1584
+ if (!smc_listen_ism_init(new_smc, ini))
1585
+ return; /* V1 ISM device found */
1586
+
1587
+not_found:
1588
+ ini->ism_dev[0] = NULL;
1589
+ ini->is_smcd = false;
1590
+}
1591
+
1592
+/* listen worker: register buffers */
1593
+static int smc_listen_rdma_reg(struct smc_sock *new_smc, bool local_first)
1594
+{
1595
+ struct smc_connection *conn = &new_smc->conn;
1596
+
1597
+ if (!local_first) {
1598
+ if (smcr_lgr_reg_rmbs(conn->lnk, conn->rmb_desc))
1599
+ return SMC_CLC_DECL_ERR_REGRMB;
11611600 }
11621601 smc_rmb_sync_sg_for_device(&new_smc->conn);
11631602
11641603 return 0;
11651604 }
11661605
1606
+static int smc_find_rdma_v1_device_serv(struct smc_sock *new_smc,
1607
+ struct smc_clc_msg_proposal *pclc,
1608
+ struct smc_init_info *ini)
1609
+{
1610
+ int rc;
1611
+
1612
+ if (!smcr_indicated(ini->smc_type_v1))
1613
+ return SMC_CLC_DECL_NOSMCDEV;
1614
+
1615
+ /* prepare RDMA check */
1616
+ ini->ib_lcl = &pclc->lcl;
1617
+ rc = smc_find_rdma_device(new_smc, ini);
1618
+ if (rc) {
1619
+ /* no RDMA device found */
1620
+ if (ini->smc_type_v1 == SMC_TYPE_B)
1621
+ /* neither ISM nor RDMA device found */
1622
+ rc = SMC_CLC_DECL_NOSMCDEV;
1623
+ return rc;
1624
+ }
1625
+ rc = smc_listen_rdma_init(new_smc, ini);
1626
+ if (rc)
1627
+ return rc;
1628
+ return smc_listen_rdma_reg(new_smc, ini->first_contact_local);
1629
+}
1630
+
1631
+/* determine the local device matching to proposal */
1632
+static int smc_listen_find_device(struct smc_sock *new_smc,
1633
+ struct smc_clc_msg_proposal *pclc,
1634
+ struct smc_init_info *ini)
1635
+{
1636
+ int rc;
1637
+
1638
+ /* check for ISM device matching V2 proposed device */
1639
+ smc_find_ism_v2_device_serv(new_smc, pclc, ini);
1640
+ if (ini->ism_dev[0])
1641
+ return 0;
1642
+
1643
+ if (!(ini->smcd_version & SMC_V1))
1644
+ return SMC_CLC_DECL_NOSMCDEV;
1645
+
1646
+ /* check for matching IP prefix and subnet length */
1647
+ rc = smc_listen_prfx_check(new_smc, pclc);
1648
+ if (rc)
1649
+ return rc;
1650
+
1651
+ /* get vlan id from IP device */
1652
+ if (smc_vlan_by_tcpsk(new_smc->clcsock, ini))
1653
+ return SMC_CLC_DECL_GETVLANERR;
1654
+
1655
+ /* check for ISM device matching V1 proposed device */
1656
+ smc_find_ism_v1_device_serv(new_smc, pclc, ini);
1657
+ if (ini->ism_dev[0])
1658
+ return 0;
1659
+
1660
+ if (pclc->hdr.typev1 == SMC_TYPE_D)
1661
+ return SMC_CLC_DECL_NOSMCDDEV; /* skip RDMA and decline */
1662
+
1663
+ /* check if RDMA is available */
1664
+ return smc_find_rdma_v1_device_serv(new_smc, pclc, ini);
1665
+}
1666
+
11671667 /* listen worker: finish RDMA setup */
11681668 static int smc_listen_rdma_finish(struct smc_sock *new_smc,
11691669 struct smc_clc_msg_accept_confirm *cclc,
1170
- int local_contact)
1670
+ bool local_first)
11711671 {
1172
- struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
1672
+ struct smc_link *link = new_smc->conn.lnk;
11731673 int reason_code = 0;
11741674
1175
- if (local_contact == SMC_FIRST_CONTACT)
1675
+ if (local_first)
11761676 smc_link_save_peer_info(link, cclc);
11771677
1178
- if (smc_rmb_rtoken_handling(&new_smc->conn, cclc)) {
1179
- reason_code = SMC_CLC_DECL_ERR_RTOK;
1180
- goto decline;
1181
- }
1678
+ if (smc_rmb_rtoken_handling(&new_smc->conn, link, cclc))
1679
+ return SMC_CLC_DECL_ERR_RTOK;
11821680
1183
- if (local_contact == SMC_FIRST_CONTACT) {
1184
- if (smc_ib_ready_link(link)) {
1185
- reason_code = SMC_CLC_DECL_ERR_RDYLNK;
1186
- goto decline;
1187
- }
1681
+ if (local_first) {
1682
+ if (smc_ib_ready_link(link))
1683
+ return SMC_CLC_DECL_ERR_RDYLNK;
11881684 /* QP confirmation over RoCE fabric */
1189
- reason_code = smc_serv_conf_first_link(new_smc);
1190
- if (reason_code)
1191
- goto decline;
1685
+ smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK);
1686
+ reason_code = smcr_serv_conf_first_link(new_smc);
1687
+ smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl);
11921688 }
1193
- return 0;
1194
-
1195
-decline:
1196
- mutex_unlock(&smc_create_lgr_pending);
1197
- smc_listen_decline(new_smc, reason_code, local_contact);
11981689 return reason_code;
11991690 }
12001691
1201
-/* setup for RDMA connection of server */
1692
+/* setup for connection of server */
12021693 static void smc_listen_work(struct work_struct *work)
12031694 {
12041695 struct smc_sock *new_smc = container_of(work, struct smc_sock,
12051696 smc_listen_work);
1697
+ u8 version = smc_ism_v2_capable ? SMC_V2 : SMC_V1;
12061698 struct socket *newclcsock = new_smc->clcsock;
1207
- struct smc_clc_msg_accept_confirm cclc;
1699
+ struct smc_clc_msg_accept_confirm *cclc;
1700
+ struct smc_clc_msg_proposal_area *buf;
12081701 struct smc_clc_msg_proposal *pclc;
1209
- struct smc_ib_device *ibdev;
1210
- bool ism_supported = false;
1211
- struct smcd_dev *ismdev;
1212
- u8 buf[SMC_CLC_MAX_LEN];
1213
- int local_contact = 0;
1214
- unsigned short vlan;
1215
- int reason_code = 0;
1702
+ struct smc_init_info *ini = NULL;
12161703 int rc = 0;
1217
- u8 ibport;
1704
+
1705
+ if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN)
1706
+ return smc_listen_out_err(new_smc);
12181707
12191708 if (new_smc->use_fallback) {
12201709 smc_listen_out_connected(new_smc);
....@@ -1223,7 +1712,7 @@
12231712
12241713 /* check if peer is smc capable */
12251714 if (!tcp_sk(newclcsock->sk)->syn_smc) {
1226
- new_smc->use_fallback = true;
1715
+ smc_switch_to_fallback(new_smc);
12271716 new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC;
12281717 smc_listen_out_connected(new_smc);
12291718 return;
....@@ -1232,73 +1721,86 @@
12321721 /* do inband token exchange -
12331722 * wait for and receive SMC Proposal CLC message
12341723 */
1235
- pclc = (struct smc_clc_msg_proposal *)&buf;
1236
- reason_code = smc_clc_wait_msg(new_smc, pclc, SMC_CLC_MAX_LEN,
1237
- SMC_CLC_PROPOSAL);
1238
- if (reason_code) {
1239
- smc_listen_decline(new_smc, reason_code, 0);
1240
- return;
1724
+ buf = kzalloc(sizeof(*buf), GFP_KERNEL);
1725
+ if (!buf) {
1726
+ rc = SMC_CLC_DECL_MEM;
1727
+ goto out_decl;
12411728 }
1729
+ pclc = (struct smc_clc_msg_proposal *)buf;
1730
+ rc = smc_clc_wait_msg(new_smc, pclc, sizeof(*buf),
1731
+ SMC_CLC_PROPOSAL, CLC_WAIT_TIME);
1732
+ if (rc)
1733
+ goto out_decl;
1734
+ version = pclc->hdr.version == SMC_V1 ? SMC_V1 : version;
12421735
1243
- /* IPSec connections opt out of SMC-R optimizations */
1736
+ /* IPSec connections opt out of SMC optimizations */
12441737 if (using_ipsec(new_smc)) {
1245
- smc_listen_decline(new_smc, SMC_CLC_DECL_IPSEC, 0);
1246
- return;
1738
+ rc = SMC_CLC_DECL_IPSEC;
1739
+ goto out_decl;
12471740 }
12481741
1249
- mutex_lock(&smc_create_lgr_pending);
1742
+ ini = kzalloc(sizeof(*ini), GFP_KERNEL);
1743
+ if (!ini) {
1744
+ rc = SMC_CLC_DECL_MEM;
1745
+ goto out_decl;
1746
+ }
1747
+
1748
+ /* initial version checking */
1749
+ rc = smc_listen_v2_check(new_smc, pclc, ini);
1750
+ if (rc)
1751
+ goto out_decl;
1752
+
1753
+ mutex_lock(&smc_server_lgr_pending);
12501754 smc_close_init(new_smc);
12511755 smc_rx_init(new_smc);
12521756 smc_tx_init(new_smc);
12531757
1254
- /* check if ISM is available */
1255
- if ((pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) &&
1256
- !smc_check_ism(new_smc, &ismdev) &&
1257
- !smc_listen_ism_init(new_smc, pclc, ismdev, &local_contact)) {
1258
- ism_supported = true;
1259
- }
1260
-
1261
- /* check if RDMA is available */
1262
- if (!ism_supported &&
1263
- ((pclc->hdr.path != SMC_TYPE_R && pclc->hdr.path != SMC_TYPE_B) ||
1264
- smc_vlan_by_tcpsk(new_smc->clcsock, &vlan) ||
1265
- smc_check_rdma(new_smc, &ibdev, &ibport, vlan, NULL) ||
1266
- smc_listen_rdma_check(new_smc, pclc) ||
1267
- smc_listen_rdma_init(new_smc, pclc, ibdev, ibport,
1268
- &local_contact) ||
1269
- smc_listen_rdma_reg(new_smc, local_contact))) {
1270
- /* SMC not supported, decline */
1271
- mutex_unlock(&smc_create_lgr_pending);
1272
- smc_listen_decline(new_smc, SMC_CLC_DECL_MODEUNSUPP,
1273
- local_contact);
1274
- return;
1275
- }
1758
+ /* determine ISM or RoCE device used for connection */
1759
+ rc = smc_listen_find_device(new_smc, pclc, ini);
1760
+ if (rc)
1761
+ goto out_unlock;
12761762
12771763 /* send SMC Accept CLC message */
1278
- rc = smc_clc_send_accept(new_smc, local_contact);
1279
- if (rc) {
1280
- mutex_unlock(&smc_create_lgr_pending);
1281
- smc_listen_decline(new_smc, rc, local_contact);
1282
- return;
1283
- }
1764
+ rc = smc_clc_send_accept(new_smc, ini->first_contact_local,
1765
+ ini->smcd_version == SMC_V2 ? SMC_V2 : SMC_V1);
1766
+ if (rc)
1767
+ goto out_unlock;
1768
+
1769
+ /* SMC-D does not need this lock any more */
1770
+ if (ini->is_smcd)
1771
+ mutex_unlock(&smc_server_lgr_pending);
12841772
12851773 /* receive SMC Confirm CLC message */
1286
- reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
1287
- SMC_CLC_CONFIRM);
1288
- if (reason_code) {
1289
- mutex_unlock(&smc_create_lgr_pending);
1290
- smc_listen_decline(new_smc, reason_code, local_contact);
1291
- return;
1774
+ memset(buf, 0, sizeof(*buf));
1775
+ cclc = (struct smc_clc_msg_accept_confirm *)buf;
1776
+ rc = smc_clc_wait_msg(new_smc, cclc, sizeof(*buf),
1777
+ SMC_CLC_CONFIRM, CLC_WAIT_TIME);
1778
+ if (rc) {
1779
+ if (!ini->is_smcd)
1780
+ goto out_unlock;
1781
+ goto out_decl;
12921782 }
12931783
12941784 /* finish worker */
1295
- if (!ism_supported) {
1296
- if (smc_listen_rdma_finish(new_smc, &cclc, local_contact))
1297
- return;
1785
+ if (!ini->is_smcd) {
1786
+ rc = smc_listen_rdma_finish(new_smc, cclc,
1787
+ ini->first_contact_local);
1788
+ if (rc)
1789
+ goto out_unlock;
1790
+ mutex_unlock(&smc_server_lgr_pending);
12981791 }
1299
- smc_conn_save_peer_info(new_smc, &cclc);
1300
- mutex_unlock(&smc_create_lgr_pending);
1792
+ smc_conn_save_peer_info(new_smc, cclc);
13011793 smc_listen_out_connected(new_smc);
1794
+ goto out_free;
1795
+
1796
+out_unlock:
1797
+ mutex_unlock(&smc_server_lgr_pending);
1798
+out_decl:
1799
+ smc_listen_decline(new_smc, rc, ini ? ini->first_contact_local : 0,
1800
+ version);
1801
+out_free:
1802
+ kfree(ini);
1803
+ kfree(buf);
13021804 }
13031805
13041806 static void smc_tcp_listen_work(struct work_struct *work)
....@@ -1312,7 +1814,7 @@
13121814 lock_sock(lsk);
13131815 while (lsk->sk_state == SMC_LISTEN) {
13141816 rc = smc_clcsock_accept(lsmc, &new_smc);
1315
- if (rc)
1817
+ if (rc) /* clcsock accept queue empty or error */
13161818 goto out;
13171819 if (!new_smc)
13181820 continue;
....@@ -1326,13 +1828,29 @@
13261828 new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf;
13271829 new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf;
13281830 sock_hold(&new_smc->sk); /* sock_put in passive closing */
1329
- if (!schedule_work(&new_smc->smc_listen_work))
1831
+ if (!queue_work(smc_hs_wq, &new_smc->smc_listen_work))
13301832 sock_put(&new_smc->sk);
13311833 }
13321834
13331835 out:
13341836 release_sock(lsk);
1335
- sock_put(&lsmc->sk); /* sock_hold in smc_listen */
1837
+ sock_put(&lsmc->sk); /* sock_hold in smc_clcsock_data_ready() */
1838
+}
1839
+
1840
+static void smc_clcsock_data_ready(struct sock *listen_clcsock)
1841
+{
1842
+ struct smc_sock *lsmc;
1843
+
1844
+ lsmc = (struct smc_sock *)
1845
+ ((uintptr_t)listen_clcsock->sk_user_data & ~SK_USER_DATA_NOCOPY);
1846
+ if (!lsmc)
1847
+ return;
1848
+ lsmc->clcsk_data_ready(listen_clcsock);
1849
+ if (lsmc->sk.sk_state == SMC_LISTEN) {
1850
+ sock_hold(&lsmc->sk); /* sock_put in smc_tcp_listen_work() */
1851
+ if (!queue_work(smc_hs_wq, &lsmc->tcp_listen_work))
1852
+ sock_put(&lsmc->sk);
1853
+ }
13361854 }
13371855
13381856 static int smc_listen(struct socket *sock, int backlog)
....@@ -1345,7 +1863,8 @@
13451863 lock_sock(sk);
13461864
13471865 rc = -EINVAL;
1348
- if ((sk->sk_state != SMC_INIT) && (sk->sk_state != SMC_LISTEN))
1866
+ if ((sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) ||
1867
+ smc->connect_nonblock)
13491868 goto out;
13501869
13511870 rc = 0;
....@@ -1360,16 +1879,21 @@
13601879 if (!smc->use_fallback)
13611880 tcp_sk(smc->clcsock->sk)->syn_smc = 1;
13621881
1882
+ /* save original sk_data_ready function and establish
1883
+ * smc-specific sk_data_ready function
1884
+ */
1885
+ smc->clcsk_data_ready = smc->clcsock->sk->sk_data_ready;
1886
+ smc->clcsock->sk->sk_data_ready = smc_clcsock_data_ready;
1887
+ smc->clcsock->sk->sk_user_data =
1888
+ (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
13631889 rc = kernel_listen(smc->clcsock, backlog);
1364
- if (rc)
1890
+ if (rc) {
1891
+ smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready;
13651892 goto out;
1893
+ }
13661894 sk->sk_max_ack_backlog = backlog;
13671895 sk->sk_ack_backlog = 0;
13681896 sk->sk_state = SMC_LISTEN;
1369
- INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
1370
- sock_hold(sk); /* sock_hold in tcp_listen_worker */
1371
- if (!schedule_work(&smc->tcp_listen_work))
1372
- sock_put(sk);
13731897
13741898 out:
13751899 release_sock(sk);
....@@ -1464,23 +1988,26 @@
14641988 {
14651989 struct sock *sk = sock->sk;
14661990 struct smc_sock *smc;
1467
- int rc = -EPIPE;
1991
+ int rc;
14681992
14691993 smc = smc_sk(sk);
14701994 lock_sock(sk);
1471
- if ((sk->sk_state != SMC_ACTIVE) &&
1472
- (sk->sk_state != SMC_APPCLOSEWAIT1) &&
1473
- (sk->sk_state != SMC_INIT))
1474
- goto out;
14751995
1996
+ /* SMC does not support connect with fastopen */
14761997 if (msg->msg_flags & MSG_FASTOPEN) {
1477
- if (sk->sk_state == SMC_INIT) {
1478
- smc->use_fallback = true;
1998
+ /* not connected yet, fallback */
1999
+ if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
2000
+ smc_switch_to_fallback(smc);
14792001 smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
14802002 } else {
14812003 rc = -EINVAL;
14822004 goto out;
14832005 }
2006
+ } else if ((sk->sk_state != SMC_ACTIVE) &&
2007
+ (sk->sk_state != SMC_APPCLOSEWAIT1) &&
2008
+ (sk->sk_state != SMC_INIT)) {
2009
+ rc = -EPIPE;
2010
+ goto out;
14842011 }
14852012
14862013 if (smc->use_fallback)
....@@ -1501,6 +2028,11 @@
15012028
15022029 smc = smc_sk(sk);
15032030 lock_sock(sk);
2031
+ if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) {
2032
+ /* socket was connected before, no more data to read */
2033
+ rc = 0;
2034
+ goto out;
2035
+ }
15042036 if ((sk->sk_state == SMC_INIT) ||
15052037 (sk->sk_state == SMC_LISTEN) ||
15062038 (sk->sk_state == SMC_CLOSED))
....@@ -1540,8 +2072,8 @@
15402072 poll_table *wait)
15412073 {
15422074 struct sock *sk = sock->sk;
1543
- __poll_t mask = 0;
15442075 struct smc_sock *smc;
2076
+ __poll_t mask = 0;
15452077
15462078 if (!sk)
15472079 return EPOLLNVAL;
....@@ -1551,8 +2083,6 @@
15512083 /* delegate to CLC child sock */
15522084 mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
15532085 sk->sk_err = smc->clcsock->sk->sk_err;
1554
- if (sk->sk_err)
1555
- mask |= EPOLLERR;
15562086 } else {
15572087 if (sk->sk_state != SMC_CLOSED)
15582088 sock_poll_wait(file, sock, wait);
....@@ -1563,9 +2093,14 @@
15632093 mask |= EPOLLHUP;
15642094 if (sk->sk_state == SMC_LISTEN) {
15652095 /* woken up by sk_data_ready in smc_listen_work() */
1566
- mask = smc_accept_poll(sk);
2096
+ mask |= smc_accept_poll(sk);
2097
+ } else if (smc->use_fallback) { /* as result of connect_work()*/
2098
+ mask |= smc->clcsock->ops->poll(file, smc->clcsock,
2099
+ wait);
2100
+ sk->sk_err = smc->clcsock->sk->sk_err;
15672101 } else {
1568
- if (atomic_read(&smc->conn.sndbuf_space) ||
2102
+ if ((sk->sk_state != SMC_INIT &&
2103
+ atomic_read(&smc->conn.sndbuf_space)) ||
15692104 sk->sk_shutdown & SEND_SHUTDOWN) {
15702105 mask |= EPOLLOUT | EPOLLWRNORM;
15712106 } else {
....@@ -1613,8 +2148,10 @@
16132148 if (smc->use_fallback) {
16142149 rc = kernel_sock_shutdown(smc->clcsock, how);
16152150 sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
1616
- if (sk->sk_shutdown == SHUTDOWN_MASK)
2151
+ if (sk->sk_shutdown == SHUTDOWN_MASK) {
16172152 sk->sk_state = SMC_CLOSED;
2153
+ sock_put(sk);
2154
+ }
16182155 goto out;
16192156 }
16202157 switch (how) {
....@@ -1644,7 +2181,7 @@
16442181 }
16452182
16462183 static int smc_setsockopt(struct socket *sock, int level, int optname,
1647
- char __user *optval, unsigned int optlen)
2184
+ sockptr_t optval, unsigned int optlen)
16482185 {
16492186 struct sock *sk = sock->sk;
16502187 struct smc_sock *smc;
....@@ -1658,51 +2195,53 @@
16582195 /* generic setsockopts reaching us here always apply to the
16592196 * CLC socket
16602197 */
1661
- rc = smc->clcsock->ops->setsockopt(smc->clcsock, level, optname,
1662
- optval, optlen);
2198
+ if (unlikely(!smc->clcsock->ops->setsockopt))
2199
+ rc = -EOPNOTSUPP;
2200
+ else
2201
+ rc = smc->clcsock->ops->setsockopt(smc->clcsock, level, optname,
2202
+ optval, optlen);
16632203 if (smc->clcsock->sk->sk_err) {
16642204 sk->sk_err = smc->clcsock->sk->sk_err;
16652205 sk->sk_error_report(sk);
16662206 }
1667
- if (rc)
1668
- return rc;
16692207
16702208 if (optlen < sizeof(int))
16712209 return -EINVAL;
1672
- if (get_user(val, (int __user *)optval))
2210
+ if (copy_from_sockptr(&val, optval, sizeof(int)))
16732211 return -EFAULT;
16742212
16752213 lock_sock(sk);
2214
+ if (rc || smc->use_fallback)
2215
+ goto out;
16762216 switch (optname) {
16772217 case TCP_FASTOPEN:
16782218 case TCP_FASTOPEN_CONNECT:
16792219 case TCP_FASTOPEN_KEY:
16802220 case TCP_FASTOPEN_NO_COOKIE:
16812221 /* option not supported by SMC */
1682
- if (sk->sk_state == SMC_INIT) {
1683
- smc->use_fallback = true;
2222
+ if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
2223
+ smc_switch_to_fallback(smc);
16842224 smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
16852225 } else {
1686
- if (!smc->use_fallback)
1687
- rc = -EINVAL;
2226
+ rc = -EINVAL;
16882227 }
16892228 break;
16902229 case TCP_NODELAY:
16912230 if (sk->sk_state != SMC_INIT &&
16922231 sk->sk_state != SMC_LISTEN &&
16932232 sk->sk_state != SMC_CLOSED) {
1694
- if (val && !smc->use_fallback)
1695
- mod_delayed_work(system_wq, &smc->conn.tx_work,
1696
- 0);
2233
+ if (val)
2234
+ mod_delayed_work(smc->conn.lgr->tx_wq,
2235
+ &smc->conn.tx_work, 0);
16972236 }
16982237 break;
16992238 case TCP_CORK:
17002239 if (sk->sk_state != SMC_INIT &&
17012240 sk->sk_state != SMC_LISTEN &&
17022241 sk->sk_state != SMC_CLOSED) {
1703
- if (!val && !smc->use_fallback)
1704
- mod_delayed_work(system_wq, &smc->conn.tx_work,
1705
- 0);
2242
+ if (!val)
2243
+ mod_delayed_work(smc->conn.lgr->tx_wq,
2244
+ &smc->conn.tx_work, 0);
17062245 }
17072246 break;
17082247 case TCP_DEFER_ACCEPT:
....@@ -1711,6 +2250,7 @@
17112250 default:
17122251 break;
17132252 }
2253
+out:
17142254 release_sock(sk);
17152255
17162256 return rc;
....@@ -1723,6 +2263,8 @@
17232263
17242264 smc = smc_sk(sock->sk);
17252265 /* socket options apply to the CLC socket */
2266
+ if (unlikely(!smc->clcsock->ops->getsockopt))
2267
+ return -EOPNOTSUPP;
17262268 return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
17272269 optval, optlen);
17282270 }
....@@ -1848,7 +2390,11 @@
18482390
18492391 smc = smc_sk(sk);
18502392 lock_sock(sk);
1851
-
2393
+ if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) {
2394
+ /* socket was connected before, no more data to read */
2395
+ rc = 0;
2396
+ goto out;
2397
+ }
18522398 if (sk->sk_state == SMC_INIT ||
18532399 sk->sk_state == SMC_LISTEN ||
18542400 sk->sk_state == SMC_CLOSED)
....@@ -1947,30 +2493,71 @@
19472493 .create = smc_create,
19482494 };
19492495
2496
+unsigned int smc_net_id;
2497
+
2498
+static __net_init int smc_net_init(struct net *net)
2499
+{
2500
+ return smc_pnet_net_init(net);
2501
+}
2502
+
2503
+static void __net_exit smc_net_exit(struct net *net)
2504
+{
2505
+ smc_pnet_net_exit(net);
2506
+}
2507
+
2508
+static struct pernet_operations smc_net_ops = {
2509
+ .init = smc_net_init,
2510
+ .exit = smc_net_exit,
2511
+ .id = &smc_net_id,
2512
+ .size = sizeof(struct smc_net),
2513
+};
2514
+
19502515 static int __init smc_init(void)
19512516 {
19522517 int rc;
19532518
1954
- rc = smc_pnet_init();
2519
+ rc = register_pernet_subsys(&smc_net_ops);
19552520 if (rc)
19562521 return rc;
2522
+
2523
+ smc_ism_init();
2524
+ smc_clc_init();
2525
+
2526
+ rc = smc_pnet_init();
2527
+ if (rc)
2528
+ goto out_pernet_subsys;
2529
+
2530
+ rc = -ENOMEM;
2531
+ smc_hs_wq = alloc_workqueue("smc_hs_wq", 0, 0);
2532
+ if (!smc_hs_wq)
2533
+ goto out_pnet;
2534
+
2535
+ smc_close_wq = alloc_workqueue("smc_close_wq", 0, 0);
2536
+ if (!smc_close_wq)
2537
+ goto out_alloc_hs_wq;
2538
+
2539
+ rc = smc_core_init();
2540
+ if (rc) {
2541
+ pr_err("%s: smc_core_init fails with %d\n", __func__, rc);
2542
+ goto out_alloc_wqs;
2543
+ }
19572544
19582545 rc = smc_llc_init();
19592546 if (rc) {
19602547 pr_err("%s: smc_llc_init fails with %d\n", __func__, rc);
1961
- goto out_pnet;
2548
+ goto out_core;
19622549 }
19632550
19642551 rc = smc_cdc_init();
19652552 if (rc) {
19662553 pr_err("%s: smc_cdc_init fails with %d\n", __func__, rc);
1967
- goto out_pnet;
2554
+ goto out_core;
19682555 }
19692556
19702557 rc = proto_register(&smc_proto, 1);
19712558 if (rc) {
19722559 pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc);
1973
- goto out_pnet;
2560
+ goto out_core;
19742561 }
19752562
19762563 rc = proto_register(&smc_proto6, 1);
....@@ -2002,20 +2589,33 @@
20022589 proto_unregister(&smc_proto6);
20032590 out_proto:
20042591 proto_unregister(&smc_proto);
2592
+out_core:
2593
+ smc_core_exit();
2594
+out_alloc_wqs:
2595
+ destroy_workqueue(smc_close_wq);
2596
+out_alloc_hs_wq:
2597
+ destroy_workqueue(smc_hs_wq);
20052598 out_pnet:
20062599 smc_pnet_exit();
2600
+out_pernet_subsys:
2601
+ unregister_pernet_subsys(&smc_net_ops);
2602
+
20072603 return rc;
20082604 }
20092605
20102606 static void __exit smc_exit(void)
20112607 {
2012
- smc_core_exit();
20132608 static_branch_disable(&tcp_have_smc);
2014
- smc_ib_unregister_client();
20152609 sock_unregister(PF_SMC);
2610
+ smc_core_exit();
2611
+ smc_ib_unregister_client();
2612
+ destroy_workqueue(smc_close_wq);
2613
+ destroy_workqueue(smc_hs_wq);
20162614 proto_unregister(&smc_proto6);
20172615 proto_unregister(&smc_proto);
20182616 smc_pnet_exit();
2617
+ unregister_pernet_subsys(&smc_net_ops);
2618
+ rcu_barrier();
20192619 }
20202620
20212621 module_init(smc_init);