forked from ~ljy/RK356X_SDK_RELEASE

hc
2023-12-09 95099d4622f8cb224d94e314c7a8e0df60b13f87
kernel/drivers/infiniband/core/cma.c
....@@ -1,36 +1,9 @@
1
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
12 /*
23 * Copyright (c) 2005 Voltaire Inc. All rights reserved.
34 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
4
- * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
5
+ * Copyright (c) 1999-2019, Mellanox Technologies, Inc. All rights reserved.
56 * Copyright (c) 2005-2006 Intel Corporation. All rights reserved.
6
- *
7
- * This software is available to you under a choice of one of two
8
- * licenses. You may choose to be licensed under the terms of the GNU
9
- * General Public License (GPL) Version 2, available from the file
10
- * COPYING in the main directory of this source tree, or the
11
- * OpenIB.org BSD license below:
12
- *
13
- * Redistribution and use in source and binary forms, with or
14
- * without modification, are permitted provided that the following
15
- * conditions are met:
16
- *
17
- * - Redistributions of source code must retain the above
18
- * copyright notice, this list of conditions and the following
19
- * disclaimer.
20
- *
21
- * - Redistributions in binary form must reproduce the above
22
- * copyright notice, this list of conditions and the following
23
- * disclaimer in the documentation and/or other materials
24
- * provided with the distribution.
25
- *
26
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33
- * SOFTWARE.
347 */
358
369 #include <linux/completion.h>
....@@ -39,7 +12,7 @@
3912 #include <linux/mutex.h>
4013 #include <linux/random.h>
4114 #include <linux/igmp.h>
42
-#include <linux/idr.h>
15
+#include <linux/xarray.h>
4316 #include <linux/inetdevice.h>
4417 #include <linux/slab.h>
4518 #include <linux/module.h>
....@@ -63,6 +36,7 @@
6336
6437 #include "core_priv.h"
6538 #include "cma_priv.h"
39
+#include "cma_trace.h"
6640
6741 MODULE_AUTHOR("Sean Hefty");
6842 MODULE_DESCRIPTION("Generic RDMA CM Agent");
....@@ -94,6 +68,9 @@
9468 [RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit",
9569 };
9670
71
+static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
72
+ enum ib_gid_type gid_type);
73
+
9774 const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event)
9875 {
9976 size_t index = event;
....@@ -117,7 +94,13 @@
11794 }
11895 EXPORT_SYMBOL(rdma_reject_msg);
11996
120
-bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason)
97
+/**
98
+ * rdma_is_consumer_reject - return true if the consumer rejected the connect
99
+ * request.
100
+ * @id: Communication identifier that received the REJECT event.
101
+ * @reason: Value returned in the REJECT event status field.
102
+ */
103
+static bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason)
121104 {
122105 if (rdma_ib_or_roce(id->device, id->port_num))
123106 return reason == IB_CM_REJ_CONSUMER_DEFINED;
....@@ -128,7 +111,6 @@
128111 WARN_ON_ONCE(1);
129112 return false;
130113 }
131
-EXPORT_SYMBOL(rdma_is_consumer_reject);
132114
133115 const void *rdma_consumer_reject_data(struct rdma_cm_id *id,
134116 struct rdma_cm_event *ev, u8 *data_len)
....@@ -174,7 +156,7 @@
174156 }
175157 EXPORT_SYMBOL(rdma_res_to_id);
176158
177
-static void cma_add_one(struct ib_device *device);
159
+static int cma_add_one(struct ib_device *device);
178160 static void cma_remove_one(struct ib_device *device, void *client_data);
179161
180162 static struct ib_client cma_client = {
....@@ -191,10 +173,10 @@
191173 static unsigned int cma_pernet_id;
192174
193175 struct cma_pernet {
194
- struct idr tcp_ps;
195
- struct idr udp_ps;
196
- struct idr ipoib_ps;
197
- struct idr ib_ps;
176
+ struct xarray tcp_ps;
177
+ struct xarray udp_ps;
178
+ struct xarray ipoib_ps;
179
+ struct xarray ib_ps;
198180 };
199181
200182 static struct cma_pernet *cma_pernet(struct net *net)
....@@ -202,7 +184,8 @@
202184 return net_generic(net, cma_pernet_id);
203185 }
204186
205
-static struct idr *cma_pernet_idr(struct net *net, enum rdma_ucm_port_space ps)
187
+static
188
+struct xarray *cma_pernet_xa(struct net *net, enum rdma_ucm_port_space ps)
206189 {
207190 struct cma_pernet *pernet = cma_pernet(net);
208191
....@@ -224,7 +207,7 @@
224207 struct list_head list;
225208 struct ib_device *device;
226209 struct completion comp;
227
- atomic_t refcount;
210
+ refcount_t refcount;
228211 struct list_head id_list;
229212 enum ib_gid_type *default_gid_type;
230213 u8 *default_roce_tos;
....@@ -247,34 +230,40 @@
247230 static int cma_ps_alloc(struct net *net, enum rdma_ucm_port_space ps,
248231 struct rdma_bind_list *bind_list, int snum)
249232 {
250
- struct idr *idr = cma_pernet_idr(net, ps);
233
+ struct xarray *xa = cma_pernet_xa(net, ps);
251234
252
- return idr_alloc(idr, bind_list, snum, snum + 1, GFP_KERNEL);
235
+ return xa_insert(xa, snum, bind_list, GFP_KERNEL);
253236 }
254237
255238 static struct rdma_bind_list *cma_ps_find(struct net *net,
256239 enum rdma_ucm_port_space ps, int snum)
257240 {
258
- struct idr *idr = cma_pernet_idr(net, ps);
241
+ struct xarray *xa = cma_pernet_xa(net, ps);
259242
260
- return idr_find(idr, snum);
243
+ return xa_load(xa, snum);
261244 }
262245
263246 static void cma_ps_remove(struct net *net, enum rdma_ucm_port_space ps,
264247 int snum)
265248 {
266
- struct idr *idr = cma_pernet_idr(net, ps);
249
+ struct xarray *xa = cma_pernet_xa(net, ps);
267250
268
- idr_remove(idr, snum);
251
+ xa_erase(xa, snum);
269252 }
270253
271254 enum {
272255 CMA_OPTION_AFONLY,
273256 };
274257
275
-void cma_ref_dev(struct cma_device *cma_dev)
258
+void cma_dev_get(struct cma_device *cma_dev)
276259 {
277
- atomic_inc(&cma_dev->refcount);
260
+ refcount_inc(&cma_dev->refcount);
261
+}
262
+
263
+void cma_dev_put(struct cma_device *cma_dev)
264
+{
265
+ if (refcount_dec_and_test(&cma_dev->refcount))
266
+ complete(&cma_dev->comp);
278267 }
279268
280269 struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
....@@ -292,7 +281,7 @@
292281 }
293282
294283 if (found_cma_dev)
295
- cma_ref_dev(found_cma_dev);
284
+ cma_dev_get(found_cma_dev);
296285 mutex_unlock(&lock);
297286 return found_cma_dev;
298287 }
....@@ -314,6 +303,10 @@
314303
315304 if (!rdma_is_port_valid(cma_dev->device, port))
316305 return -EINVAL;
306
+
307
+ if (default_gid_type == IB_GID_TYPE_IB &&
308
+ rdma_protocol_roce_eth_encap(cma_dev->device, port))
309
+ default_gid_type = IB_GID_TYPE_ROCE;
317310
318311 supported_gids = roce_gid_type_mask_support(cma_dev->device, port);
319312
....@@ -360,12 +353,15 @@
360353 struct cma_multicast {
361354 struct rdma_id_private *id_priv;
362355 union {
363
- struct ib_sa_multicast *ib;
364
- } multicast;
356
+ struct ib_sa_multicast *sa_mc;
357
+ struct {
358
+ struct work_struct work;
359
+ struct rdma_cm_event event;
360
+ } iboe_join;
361
+ };
365362 struct list_head list;
366363 void *context;
367364 struct sockaddr_storage addr;
368
- struct kref mcref;
369365 u8 join_state;
370366 };
371367
....@@ -375,18 +371,6 @@
375371 enum rdma_cm_state old_state;
376372 enum rdma_cm_state new_state;
377373 struct rdma_cm_event event;
378
-};
379
-
380
-struct cma_ndev_work {
381
- struct work_struct work;
382
- struct rdma_id_private *id;
383
- struct rdma_cm_event event;
384
-};
385
-
386
-struct iboe_mcast_work {
387
- struct work_struct work;
388
- struct rdma_id_private *id;
389
- struct cma_multicast *mc;
390374 };
391375
392376 union cma_ip_addr {
....@@ -418,41 +402,26 @@
418402 u16 pkey;
419403 };
420404
421
-static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp)
422
-{
423
- unsigned long flags;
424
- int ret;
425
-
426
- spin_lock_irqsave(&id_priv->lock, flags);
427
- ret = (id_priv->state == comp);
428
- spin_unlock_irqrestore(&id_priv->lock, flags);
429
- return ret;
430
-}
431
-
432405 static int cma_comp_exch(struct rdma_id_private *id_priv,
433406 enum rdma_cm_state comp, enum rdma_cm_state exch)
434407 {
435408 unsigned long flags;
436409 int ret;
437410
411
+ /*
412
+ * The FSM uses a funny double locking where state is protected by both
413
+ * the handler_mutex and the spinlock. State is not allowed to change
414
+ * to/from a handler_mutex protected value without also holding
415
+ * handler_mutex.
416
+ */
417
+ if (comp == RDMA_CM_CONNECT || exch == RDMA_CM_CONNECT)
418
+ lockdep_assert_held(&id_priv->handler_mutex);
419
+
438420 spin_lock_irqsave(&id_priv->lock, flags);
439421 if ((ret = (id_priv->state == comp)))
440422 id_priv->state = exch;
441423 spin_unlock_irqrestore(&id_priv->lock, flags);
442424 return ret;
443
-}
444
-
445
-static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv,
446
- enum rdma_cm_state exch)
447
-{
448
- unsigned long flags;
449
- enum rdma_cm_state old;
450
-
451
- spin_lock_irqsave(&id_priv->lock, flags);
452
- old = id_priv->state;
453
- id_priv->state = exch;
454
- spin_unlock_irqrestore(&id_priv->lock, flags);
455
- return old;
456425 }
457426
458427 static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr)
....@@ -488,13 +457,14 @@
488457 static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
489458 struct cma_device *cma_dev)
490459 {
491
- cma_ref_dev(cma_dev);
460
+ cma_dev_get(cma_dev);
492461 id_priv->cma_dev = cma_dev;
493462 id_priv->id.device = cma_dev->device;
494463 id_priv->id.route.addr.dev_addr.transport =
495464 rdma_node_get_transport(cma_dev->device->node_type);
496465 list_add_tail(&id_priv->list, &cma_dev->id_list);
497
- rdma_restrack_add(&id_priv->res);
466
+
467
+ trace_cm_id_attach(id_priv, cma_dev->device);
498468 }
499469
500470 static void cma_attach_to_dev(struct rdma_id_private *id_priv,
....@@ -506,26 +476,17 @@
506476 rdma_start_port(cma_dev->device)];
507477 }
508478
509
-void cma_deref_dev(struct cma_device *cma_dev)
510
-{
511
- if (atomic_dec_and_test(&cma_dev->refcount))
512
- complete(&cma_dev->comp);
513
-}
514
-
515
-static inline void release_mc(struct kref *kref)
516
-{
517
- struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref);
518
-
519
- kfree(mc->multicast.ib);
520
- kfree(mc);
521
-}
522
-
523479 static void cma_release_dev(struct rdma_id_private *id_priv)
524480 {
525481 mutex_lock(&lock);
526482 list_del(&id_priv->list);
527
- cma_deref_dev(id_priv->cma_dev);
483
+ cma_dev_put(id_priv->cma_dev);
528484 id_priv->cma_dev = NULL;
485
+ id_priv->id.device = NULL;
486
+ if (id_priv->id.route.addr.dev_addr.sgid_attr) {
487
+ rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr);
488
+ id_priv->id.route.addr.dev_addr.sgid_attr = NULL;
489
+ }
529490 mutex_unlock(&lock);
530491 }
531492
....@@ -612,6 +573,9 @@
612573 int dev_type = dev_addr->dev_type;
613574 struct net_device *ndev = NULL;
614575
576
+ if (!rdma_dev_access_netns(device, id_priv->id.route.addr.dev_addr.net))
577
+ return ERR_PTR(-ENODEV);
578
+
615579 if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port))
616580 return ERR_PTR(-ENODEV);
617581
....@@ -639,56 +603,148 @@
639603 id_priv->id.route.addr.dev_addr.sgid_attr = sgid_attr;
640604 }
641605
642
-static int cma_acquire_dev(struct rdma_id_private *id_priv,
643
- const struct rdma_id_private *listen_id_priv)
606
+/**
607
+ * cma_acquire_dev_by_src_ip - Acquire cma device, port, gid attribute
608
+ * based on source ip address.
609
+ * @id_priv: cm_id which should be bound to cma device
610
+ *
611
+ * cma_acquire_dev_by_src_ip() binds cm id to cma device, port and GID attribute
612
+ * based on source IP address. It returns 0 on success or error code otherwise.
613
+ * It is applicable to active and passive side cm_id.
614
+ */
615
+static int cma_acquire_dev_by_src_ip(struct rdma_id_private *id_priv)
644616 {
645617 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
646618 const struct ib_gid_attr *sgid_attr;
647
- struct cma_device *cma_dev;
648619 union ib_gid gid, iboe_gid, *gidp;
620
+ struct cma_device *cma_dev;
649621 enum ib_gid_type gid_type;
650622 int ret = -ENODEV;
651
- u8 port;
623
+ unsigned int port;
652624
653625 if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
654626 id_priv->id.ps == RDMA_PS_IPOIB)
655627 return -EINVAL;
656628
657
- mutex_lock(&lock);
658629 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
659630 &iboe_gid);
660631
661632 memcpy(&gid, dev_addr->src_dev_addr +
662
- rdma_addr_gid_offset(dev_addr), sizeof gid);
633
+ rdma_addr_gid_offset(dev_addr), sizeof(gid));
663634
664
- if (listen_id_priv) {
665
- cma_dev = listen_id_priv->cma_dev;
666
- port = listen_id_priv->id.port_num;
667
- gidp = rdma_protocol_roce(cma_dev->device, port) ?
668
- &iboe_gid : &gid;
669
- gid_type = listen_id_priv->gid_type;
670
- sgid_attr = cma_validate_port(cma_dev->device, port,
671
- gid_type, gidp, id_priv);
672
- if (!IS_ERR(sgid_attr)) {
673
- id_priv->id.port_num = port;
674
- cma_bind_sgid_attr(id_priv, sgid_attr);
675
- ret = 0;
676
- goto out;
677
- }
678
- }
679
-
635
+ mutex_lock(&lock);
680636 list_for_each_entry(cma_dev, &dev_list, list) {
681
- for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
682
- if (listen_id_priv &&
683
- listen_id_priv->cma_dev == cma_dev &&
684
- listen_id_priv->id.port_num == port)
685
- continue;
686
-
637
+ rdma_for_each_port (cma_dev->device, port) {
687638 gidp = rdma_protocol_roce(cma_dev->device, port) ?
688639 &iboe_gid : &gid;
689640 gid_type = cma_dev->default_gid_type[port - 1];
690641 sgid_attr = cma_validate_port(cma_dev->device, port,
691642 gid_type, gidp, id_priv);
643
+ if (!IS_ERR(sgid_attr)) {
644
+ id_priv->id.port_num = port;
645
+ cma_bind_sgid_attr(id_priv, sgid_attr);
646
+ cma_attach_to_dev(id_priv, cma_dev);
647
+ ret = 0;
648
+ goto out;
649
+ }
650
+ }
651
+ }
652
+out:
653
+ mutex_unlock(&lock);
654
+ return ret;
655
+}
656
+
657
+/**
658
+ * cma_ib_acquire_dev - Acquire cma device, port and SGID attribute
659
+ * @id_priv: cm id to bind to cma device
660
+ * @listen_id_priv: listener cm id to match against
661
+ * @req: Pointer to req structure containaining incoming
662
+ * request information
663
+ * cma_ib_acquire_dev() acquires cma device, port and SGID attribute when
664
+ * rdma device matches for listen_id and incoming request. It also verifies
665
+ * that a GID table entry is present for the source address.
666
+ * Returns 0 on success, or returns error code otherwise.
667
+ */
668
+static int cma_ib_acquire_dev(struct rdma_id_private *id_priv,
669
+ const struct rdma_id_private *listen_id_priv,
670
+ struct cma_req_info *req)
671
+{
672
+ struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
673
+ const struct ib_gid_attr *sgid_attr;
674
+ enum ib_gid_type gid_type;
675
+ union ib_gid gid;
676
+
677
+ if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
678
+ id_priv->id.ps == RDMA_PS_IPOIB)
679
+ return -EINVAL;
680
+
681
+ if (rdma_protocol_roce(req->device, req->port))
682
+ rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
683
+ &gid);
684
+ else
685
+ memcpy(&gid, dev_addr->src_dev_addr +
686
+ rdma_addr_gid_offset(dev_addr), sizeof(gid));
687
+
688
+ gid_type = listen_id_priv->cma_dev->default_gid_type[req->port - 1];
689
+ sgid_attr = cma_validate_port(req->device, req->port,
690
+ gid_type, &gid, id_priv);
691
+ if (IS_ERR(sgid_attr))
692
+ return PTR_ERR(sgid_attr);
693
+
694
+ id_priv->id.port_num = req->port;
695
+ cma_bind_sgid_attr(id_priv, sgid_attr);
696
+ /* Need to acquire lock to protect against reader
697
+ * of cma_dev->id_list such as cma_netdev_callback() and
698
+ * cma_process_remove().
699
+ */
700
+ mutex_lock(&lock);
701
+ cma_attach_to_dev(id_priv, listen_id_priv->cma_dev);
702
+ mutex_unlock(&lock);
703
+ rdma_restrack_add(&id_priv->res);
704
+ return 0;
705
+}
706
+
707
+static int cma_iw_acquire_dev(struct rdma_id_private *id_priv,
708
+ const struct rdma_id_private *listen_id_priv)
709
+{
710
+ struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
711
+ const struct ib_gid_attr *sgid_attr;
712
+ struct cma_device *cma_dev;
713
+ enum ib_gid_type gid_type;
714
+ int ret = -ENODEV;
715
+ unsigned int port;
716
+ union ib_gid gid;
717
+
718
+ if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
719
+ id_priv->id.ps == RDMA_PS_IPOIB)
720
+ return -EINVAL;
721
+
722
+ memcpy(&gid, dev_addr->src_dev_addr +
723
+ rdma_addr_gid_offset(dev_addr), sizeof(gid));
724
+
725
+ mutex_lock(&lock);
726
+
727
+ cma_dev = listen_id_priv->cma_dev;
728
+ port = listen_id_priv->id.port_num;
729
+ gid_type = listen_id_priv->gid_type;
730
+ sgid_attr = cma_validate_port(cma_dev->device, port,
731
+ gid_type, &gid, id_priv);
732
+ if (!IS_ERR(sgid_attr)) {
733
+ id_priv->id.port_num = port;
734
+ cma_bind_sgid_attr(id_priv, sgid_attr);
735
+ ret = 0;
736
+ goto out;
737
+ }
738
+
739
+ list_for_each_entry(cma_dev, &dev_list, list) {
740
+ rdma_for_each_port (cma_dev->device, port) {
741
+ if (listen_id_priv->cma_dev == cma_dev &&
742
+ listen_id_priv->id.port_num == port)
743
+ continue;
744
+
745
+ gid_type = cma_dev->default_gid_type[port - 1];
746
+ sgid_attr = cma_validate_port(cma_dev->device, port,
747
+ gid_type, &gid, id_priv);
692748 if (!IS_ERR(sgid_attr)) {
693749 id_priv->id.port_num = port;
694750 cma_bind_sgid_attr(id_priv, sgid_attr);
....@@ -699,8 +755,10 @@
699755 }
700756
701757 out:
702
- if (!ret)
758
+ if (!ret) {
703759 cma_attach_to_dev(id_priv, cma_dev);
760
+ rdma_restrack_add(&id_priv->res);
761
+ }
704762
705763 mutex_unlock(&lock);
706764 return ret;
....@@ -714,9 +772,10 @@
714772 struct cma_device *cma_dev, *cur_dev;
715773 struct sockaddr_ib *addr;
716774 union ib_gid gid, sgid, *dgid;
775
+ unsigned int p;
717776 u16 pkey, index;
718
- u8 p;
719777 enum ib_port_state port_state;
778
+ int ret;
720779 int i;
721780
722781 cma_dev = NULL;
....@@ -726,7 +785,7 @@
726785
727786 mutex_lock(&lock);
728787 list_for_each_entry(cur_dev, &dev_list, list) {
729
- for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
788
+ rdma_for_each_port (cur_dev->device, p) {
730789 if (!rdma_cap_af_ib(cur_dev->device, p))
731790 continue;
732791
....@@ -735,9 +794,14 @@
735794
736795 if (ib_get_cached_port_state(cur_dev->device, p, &port_state))
737796 continue;
738
- for (i = 0; !rdma_query_gid(cur_dev->device,
739
- p, i, &gid);
740
- i++) {
797
+
798
+ for (i = 0; i < cur_dev->device->port_data[p].immutable.gid_tbl_len;
799
+ ++i) {
800
+ ret = rdma_query_gid(cur_dev->device, p, i,
801
+ &gid);
802
+ if (ret)
803
+ continue;
804
+
741805 if (!memcmp(&gid, dgid, sizeof(gid))) {
742806 cma_dev = cur_dev;
743807 sgid = gid;
....@@ -761,6 +825,7 @@
761825
762826 found:
763827 cma_attach_to_dev(id_priv, cma_dev);
828
+ rdma_restrack_add(&id_priv->res);
764829 mutex_unlock(&lock);
765830 addr = (struct sockaddr_ib *)cma_src_addr(id_priv);
766831 memcpy(&addr->sib_addr, &sgid, sizeof(sgid));
....@@ -768,16 +833,21 @@
768833 return 0;
769834 }
770835
771
-static void cma_deref_id(struct rdma_id_private *id_priv)
836
+static void cma_id_get(struct rdma_id_private *id_priv)
772837 {
773
- if (atomic_dec_and_test(&id_priv->refcount))
838
+ refcount_inc(&id_priv->refcount);
839
+}
840
+
841
+static void cma_id_put(struct rdma_id_private *id_priv)
842
+{
843
+ if (refcount_dec_and_test(&id_priv->refcount))
774844 complete(&id_priv->comp);
775845 }
776846
777
-struct rdma_cm_id *__rdma_create_id(struct net *net,
778
- rdma_cm_event_handler event_handler,
779
- void *context, enum rdma_ucm_port_space ps,
780
- enum ib_qp_type qp_type, const char *caller)
847
+static struct rdma_id_private *
848
+__rdma_create_id(struct net *net, rdma_cm_event_handler event_handler,
849
+ void *context, enum rdma_ucm_port_space ps,
850
+ enum ib_qp_type qp_type, const struct rdma_id_private *parent)
781851 {
782852 struct rdma_id_private *id_priv;
783853
....@@ -785,22 +855,18 @@
785855 if (!id_priv)
786856 return ERR_PTR(-ENOMEM);
787857
788
- if (caller)
789
- id_priv->res.kern_name = caller;
790
- else
791
- rdma_restrack_set_task(&id_priv->res, current);
792
- id_priv->res.type = RDMA_RESTRACK_CM_ID;
793858 id_priv->state = RDMA_CM_IDLE;
794859 id_priv->id.context = context;
795860 id_priv->id.event_handler = event_handler;
796861 id_priv->id.ps = ps;
797862 id_priv->id.qp_type = qp_type;
798863 id_priv->tos_set = false;
864
+ id_priv->timeout_set = false;
799865 id_priv->gid_type = IB_GID_TYPE_IB;
800866 spin_lock_init(&id_priv->lock);
801867 mutex_init(&id_priv->qp_mutex);
802868 init_completion(&id_priv->comp);
803
- atomic_set(&id_priv->refcount, 1);
869
+ refcount_set(&id_priv->refcount, 1);
804870 mutex_init(&id_priv->handler_mutex);
805871 INIT_LIST_HEAD(&id_priv->listen_list);
806872 INIT_LIST_HEAD(&id_priv->mc_list);
....@@ -808,9 +874,45 @@
808874 id_priv->id.route.addr.dev_addr.net = get_net(net);
809875 id_priv->seq_num &= 0x00ffffff;
810876
811
- return &id_priv->id;
877
+ rdma_restrack_new(&id_priv->res, RDMA_RESTRACK_CM_ID);
878
+ if (parent)
879
+ rdma_restrack_parent_name(&id_priv->res, &parent->res);
880
+
881
+ return id_priv;
812882 }
813
-EXPORT_SYMBOL(__rdma_create_id);
883
+
884
+struct rdma_cm_id *
885
+__rdma_create_kernel_id(struct net *net, rdma_cm_event_handler event_handler,
886
+ void *context, enum rdma_ucm_port_space ps,
887
+ enum ib_qp_type qp_type, const char *caller)
888
+{
889
+ struct rdma_id_private *ret;
890
+
891
+ ret = __rdma_create_id(net, event_handler, context, ps, qp_type, NULL);
892
+ if (IS_ERR(ret))
893
+ return ERR_CAST(ret);
894
+
895
+ rdma_restrack_set_name(&ret->res, caller);
896
+ return &ret->id;
897
+}
898
+EXPORT_SYMBOL(__rdma_create_kernel_id);
899
+
900
+struct rdma_cm_id *rdma_create_user_id(rdma_cm_event_handler event_handler,
901
+ void *context,
902
+ enum rdma_ucm_port_space ps,
903
+ enum ib_qp_type qp_type)
904
+{
905
+ struct rdma_id_private *ret;
906
+
907
+ ret = __rdma_create_id(current->nsproxy->net_ns, event_handler, context,
908
+ ps, qp_type, NULL);
909
+ if (IS_ERR(ret))
910
+ return ERR_CAST(ret);
911
+
912
+ rdma_restrack_set_name(&ret->res, NULL);
913
+ return &ret->id;
914
+}
915
+EXPORT_SYMBOL(rdma_create_user_id);
814916
815917 static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
816918 {
....@@ -859,27 +961,34 @@
859961 int ret;
860962
861963 id_priv = container_of(id, struct rdma_id_private, id);
862
- if (id->device != pd->device)
863
- return -EINVAL;
964
+ if (id->device != pd->device) {
965
+ ret = -EINVAL;
966
+ goto out_err;
967
+ }
864968
865969 qp_init_attr->port_num = id->port_num;
866970 qp = ib_create_qp(pd, qp_init_attr);
867
- if (IS_ERR(qp))
868
- return PTR_ERR(qp);
971
+ if (IS_ERR(qp)) {
972
+ ret = PTR_ERR(qp);
973
+ goto out_err;
974
+ }
869975
870976 if (id->qp_type == IB_QPT_UD)
871977 ret = cma_init_ud_qp(id_priv, qp);
872978 else
873979 ret = cma_init_conn_qp(id_priv, qp);
874980 if (ret)
875
- goto err;
981
+ goto out_destroy;
876982
877983 id->qp = qp;
878984 id_priv->qp_num = qp->qp_num;
879985 id_priv->srq = (qp->srq != NULL);
986
+ trace_cm_qp_create(id_priv, pd, qp_init_attr, 0);
880987 return 0;
881
-err:
988
+out_destroy:
882989 ib_destroy_qp(qp);
990
+out_err:
991
+ trace_cm_qp_create(id_priv, pd, qp_init_attr, ret);
883992 return ret;
884993 }
885994 EXPORT_SYMBOL(rdma_create_qp);
....@@ -889,6 +998,7 @@
889998 struct rdma_id_private *id_priv;
890999
8911000 id_priv = container_of(id, struct rdma_id_private, id);
1001
+ trace_cm_qp_destroy(id_priv);
8921002 mutex_lock(&id_priv->qp_mutex);
8931003 ib_destroy_qp(id_priv->id.qp);
8941004 id_priv->id.qp = NULL;
....@@ -1037,6 +1147,9 @@
10371147 *qp_attr_mask |= IB_QP_PORT;
10381148 } else
10391149 ret = -ENOSYS;
1150
+
1151
+ if ((*qp_attr_mask & IB_QP_TIMEOUT) && id_priv->timeout_set)
1152
+ qp_attr->timeout = id_priv->timeout;
10401153
10411154 return ret;
10421155 }
....@@ -1324,7 +1437,7 @@
13241437 return false;
13251438
13261439 memset(&fl4, 0, sizeof(fl4));
1327
- fl4.flowi4_iif = net_dev->ifindex;
1440
+ fl4.flowi4_oif = net_dev->ifindex;
13281441 fl4.daddr = daddr;
13291442 fl4.saddr = saddr;
13301443
....@@ -1387,6 +1500,7 @@
13871500 roce_get_net_dev_by_cm_event(const struct ib_cm_event *ib_event)
13881501 {
13891502 const struct ib_gid_attr *sgid_attr = NULL;
1503
+ struct net_device *ndev;
13901504
13911505 if (ib_event->event == IB_CM_REQ_RECEIVED)
13921506 sgid_attr = ib_event->param.req_rcvd.ppath_sgid_attr;
....@@ -1395,8 +1509,15 @@
13951509
13961510 if (!sgid_attr)
13971511 return NULL;
1398
- dev_hold(sgid_attr->ndev);
1399
- return sgid_attr->ndev;
1512
+
1513
+ rcu_read_lock();
1514
+ ndev = rdma_read_gid_attr_ndev_rcu(sgid_attr);
1515
+ if (IS_ERR(ndev))
1516
+ ndev = NULL;
1517
+ else
1518
+ dev_hold(ndev);
1519
+ rcu_read_unlock();
1520
+ return ndev;
14001521 }
14011522
14021523 static struct net_device *cma_get_net_dev(const struct ib_cm_event *ib_event,
....@@ -1475,17 +1596,34 @@
14751596 return rdma_protocol_roce(device, port_num);
14761597 }
14771598
1599
+static bool cma_is_req_ipv6_ll(const struct cma_req_info *req)
1600
+{
1601
+ const struct sockaddr *daddr =
1602
+ (const struct sockaddr *)&req->listen_addr_storage;
1603
+ const struct sockaddr_in6 *daddr6 = (const struct sockaddr_in6 *)daddr;
1604
+
1605
+ /* Returns true if the req is for IPv6 link local */
1606
+ return (daddr->sa_family == AF_INET6 &&
1607
+ (ipv6_addr_type(&daddr6->sin6_addr) & IPV6_ADDR_LINKLOCAL));
1608
+}
1609
+
14781610 static bool cma_match_net_dev(const struct rdma_cm_id *id,
14791611 const struct net_device *net_dev,
1480
- u8 port_num)
1612
+ const struct cma_req_info *req)
14811613 {
14821614 const struct rdma_addr *addr = &id->route.addr;
14831615
14841616 if (!net_dev)
14851617 /* This request is an AF_IB request */
1486
- return (!id->port_num || id->port_num == port_num) &&
1618
+ return (!id->port_num || id->port_num == req->port) &&
14871619 (addr->src_addr.ss_family == AF_IB);
14881620
1621
+ /*
1622
+ * If the request is not for IPv6 link local, allow matching
1623
+ * request to any netdevice of the one or multiport rdma device.
1624
+ */
1625
+ if (!cma_is_req_ipv6_ll(req))
1626
+ return true;
14891627 /*
14901628 * Net namespaces must match, and if the listner is listening
14911629 * on a specific netdevice than netdevice must match as well.
....@@ -1515,13 +1653,14 @@
15151653 hlist_for_each_entry(id_priv, &bind_list->owners, node) {
15161654 if (cma_match_private_data(id_priv, ib_event->private_data)) {
15171655 if (id_priv->id.device == cm_id->device &&
1518
- cma_match_net_dev(&id_priv->id, net_dev, req->port))
1656
+ cma_match_net_dev(&id_priv->id, net_dev, req))
15191657 return id_priv;
15201658 list_for_each_entry(id_priv_dev,
15211659 &id_priv->listen_list,
15221660 listen_list) {
15231661 if (id_priv_dev->id.device == cm_id->device &&
1524
- cma_match_net_dev(&id_priv_dev->id, net_dev, req->port))
1662
+ cma_match_net_dev(&id_priv_dev->id,
1663
+ net_dev, req))
15251664 return id_priv_dev;
15261665 }
15271666 }
....@@ -1533,18 +1672,18 @@
15331672 static struct rdma_id_private *
15341673 cma_ib_id_from_event(struct ib_cm_id *cm_id,
15351674 const struct ib_cm_event *ib_event,
1675
+ struct cma_req_info *req,
15361676 struct net_device **net_dev)
15371677 {
1538
- struct cma_req_info req;
15391678 struct rdma_bind_list *bind_list;
15401679 struct rdma_id_private *id_priv;
15411680 int err;
15421681
1543
- err = cma_save_req_info(ib_event, &req);
1682
+ err = cma_save_req_info(ib_event, req);
15441683 if (err)
15451684 return ERR_PTR(err);
15461685
1547
- *net_dev = cma_get_net_dev(ib_event, &req);
1686
+ *net_dev = cma_get_net_dev(ib_event, req);
15481687 if (IS_ERR(*net_dev)) {
15491688 if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) {
15501689 /* Assuming the protocol is AF_IB */
....@@ -1583,17 +1722,17 @@
15831722 }
15841723
15851724 if (!validate_net_dev(*net_dev,
1586
- (struct sockaddr *)&req.listen_addr_storage,
1587
- (struct sockaddr *)&req.src_addr_storage)) {
1725
+ (struct sockaddr *)&req->src_addr_storage,
1726
+ (struct sockaddr *)&req->listen_addr_storage)) {
15881727 id_priv = ERR_PTR(-EHOSTUNREACH);
15891728 goto err;
15901729 }
15911730 }
15921731
15931732 bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net,
1594
- rdma_ps_from_service_id(req.service_id),
1595
- cma_port_from_service_id(req.service_id));
1596
- id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev);
1733
+ rdma_ps_from_service_id(req->service_id),
1734
+ cma_port_from_service_id(req->service_id));
1735
+ id_priv = cma_find_listener(bind_list, cm_id, ib_event, req, *net_dev);
15971736 err:
15981737 rcu_read_unlock();
15991738 mutex_unlock(&lock);
....@@ -1617,15 +1756,16 @@
16171756 }
16181757 }
16191758
1620
-static void cma_cancel_listens(struct rdma_id_private *id_priv)
1759
+static void _cma_cancel_listens(struct rdma_id_private *id_priv)
16211760 {
16221761 struct rdma_id_private *dev_id_priv;
1762
+
1763
+ lockdep_assert_held(&lock);
16231764
16241765 /*
16251766 * Remove from listen_any_list to prevent added devices from spawning
16261767 * additional listen requests.
16271768 */
1628
- mutex_lock(&lock);
16291769 list_del(&id_priv->list);
16301770
16311771 while (!list_empty(&id_priv->listen_list)) {
....@@ -1639,6 +1779,12 @@
16391779 rdma_destroy_id(&dev_id_priv->id);
16401780 mutex_lock(&lock);
16411781 }
1782
+}
1783
+
1784
+static void cma_cancel_listens(struct rdma_id_private *id_priv)
1785
+{
1786
+ mutex_lock(&lock);
1787
+ _cma_cancel_listens(id_priv);
16421788 mutex_unlock(&lock);
16431789 }
16441790
....@@ -1681,14 +1827,12 @@
16811827 static void destroy_mc(struct rdma_id_private *id_priv,
16821828 struct cma_multicast *mc)
16831829 {
1684
- if (rdma_cap_ib_mcast(id_priv->id.device, id_priv->id.port_num)) {
1685
- ib_sa_free_multicast(mc->multicast.ib);
1686
- kfree(mc);
1687
- return;
1688
- }
1830
+ bool send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN);
16891831
1690
- if (rdma_protocol_roce(id_priv->id.device,
1691
- id_priv->id.port_num)) {
1832
+ if (rdma_cap_ib_mcast(id_priv->id.device, id_priv->id.port_num))
1833
+ ib_sa_free_multicast(mc->sa_mc);
1834
+
1835
+ if (rdma_protocol_roce(id_priv->id.device, id_priv->id.port_num)) {
16921836 struct rdma_dev_addr *dev_addr =
16931837 &id_priv->id.route.addr.dev_addr;
16941838 struct net_device *ndev = NULL;
....@@ -1696,12 +1840,23 @@
16961840 if (dev_addr->bound_dev_if)
16971841 ndev = dev_get_by_index(dev_addr->net,
16981842 dev_addr->bound_dev_if);
1699
- if (ndev) {
1700
- cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, false);
1701
- dev_put(ndev);
1843
+ if (ndev && !send_only) {
1844
+ enum ib_gid_type gid_type;
1845
+ union ib_gid mgid;
1846
+
1847
+ gid_type = id_priv->cma_dev->default_gid_type
1848
+ [id_priv->id.port_num -
1849
+ rdma_start_port(
1850
+ id_priv->cma_dev->device)];
1851
+ cma_iboe_set_mgid((struct sockaddr *)&mc->addr, &mgid,
1852
+ gid_type);
1853
+ cma_igmp_send(ndev, &mgid, false);
17021854 }
1703
- kref_put(&mc->mcref, release_mc);
1855
+ dev_put(ndev);
1856
+
1857
+ cancel_work_sync(&mc->iboe_join.work);
17041858 }
1859
+ kfree(mc);
17051860 }
17061861
17071862 static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
....@@ -1716,21 +1871,10 @@
17161871 }
17171872 }
17181873
1719
-void rdma_destroy_id(struct rdma_cm_id *id)
1874
+static void _destroy_id(struct rdma_id_private *id_priv,
1875
+ enum rdma_cm_state state)
17201876 {
1721
- struct rdma_id_private *id_priv;
1722
- enum rdma_cm_state state;
1723
-
1724
- id_priv = container_of(id, struct rdma_id_private, id);
1725
- state = cma_exch(id_priv, RDMA_CM_DESTROYING);
17261877 cma_cancel_operation(id_priv, state);
1727
-
1728
- /*
1729
- * Wait for any active callback to finish. New callbacks will find
1730
- * the id_priv state set to destroying and abort.
1731
- */
1732
- mutex_lock(&id_priv->handler_mutex);
1733
- mutex_unlock(&id_priv->handler_mutex);
17341878
17351879 rdma_restrack_del(&id_priv->res);
17361880 if (id_priv->cma_dev) {
....@@ -1746,19 +1890,52 @@
17461890 }
17471891
17481892 cma_release_port(id_priv);
1749
- cma_deref_id(id_priv);
1893
+ cma_id_put(id_priv);
17501894 wait_for_completion(&id_priv->comp);
17511895
17521896 if (id_priv->internal_id)
1753
- cma_deref_id(id_priv->id.context);
1897
+ cma_id_put(id_priv->id.context);
17541898
17551899 kfree(id_priv->id.route.path_rec);
17561900
1757
- if (id_priv->id.route.addr.dev_addr.sgid_attr)
1758
- rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr);
1759
-
17601901 put_net(id_priv->id.route.addr.dev_addr.net);
17611902 kfree(id_priv);
1903
+}
1904
+
1905
+/*
1906
+ * destroy an ID from within the handler_mutex. This ensures that no other
1907
+ * handlers can start running concurrently.
1908
+ */
1909
+static void destroy_id_handler_unlock(struct rdma_id_private *id_priv)
1910
+ __releases(&idprv->handler_mutex)
1911
+{
1912
+ enum rdma_cm_state state;
1913
+ unsigned long flags;
1914
+
1915
+ trace_cm_id_destroy(id_priv);
1916
+
1917
+ /*
1918
+ * Setting the state to destroyed under the handler mutex provides a
1919
+ * fence against calling handler callbacks. If this is invoked due to
1920
+ * the failure of a handler callback then it guarentees that no future
1921
+ * handlers will be called.
1922
+ */
1923
+ lockdep_assert_held(&id_priv->handler_mutex);
1924
+ spin_lock_irqsave(&id_priv->lock, flags);
1925
+ state = id_priv->state;
1926
+ id_priv->state = RDMA_CM_DESTROYING;
1927
+ spin_unlock_irqrestore(&id_priv->lock, flags);
1928
+ mutex_unlock(&id_priv->handler_mutex);
1929
+ _destroy_id(id_priv, state);
1930
+}
1931
+
1932
+void rdma_destroy_id(struct rdma_cm_id *id)
1933
+{
1934
+ struct rdma_id_private *id_priv =
1935
+ container_of(id, struct rdma_id_private, id);
1936
+
1937
+ mutex_lock(&id_priv->handler_mutex);
1938
+ destroy_id_handler_unlock(id_priv);
17621939 }
17631940 EXPORT_SYMBOL(rdma_destroy_id);
17641941
....@@ -1774,6 +1951,7 @@
17741951 if (ret)
17751952 goto reject;
17761953
1954
+ trace_cm_send_rtu(id_priv);
17771955 ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
17781956 if (ret)
17791957 goto reject;
....@@ -1782,6 +1960,7 @@
17821960 reject:
17831961 pr_debug_ratelimited("RDMA CM: CONNECT_ERROR: failed to handle reply. status %d\n", ret);
17841962 cma_modify_qp_err(id_priv);
1963
+ trace_cm_send_rej(id_priv);
17851964 ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
17861965 NULL, 0, NULL, 0);
17871966 return ret;
....@@ -1799,6 +1978,22 @@
17991978 event->param.conn.rnr_retry_count = rep_data->rnr_retry_count;
18001979 event->param.conn.srq = rep_data->srq;
18011980 event->param.conn.qp_num = rep_data->remote_qpn;
1981
+
1982
+ event->ece.vendor_id = rep_data->ece.vendor_id;
1983
+ event->ece.attr_mod = rep_data->ece.attr_mod;
1984
+}
1985
+
1986
+static int cma_cm_event_handler(struct rdma_id_private *id_priv,
1987
+ struct rdma_cm_event *event)
1988
+{
1989
+ int ret;
1990
+
1991
+ lockdep_assert_held(&id_priv->handler_mutex);
1992
+
1993
+ trace_cm_event_handler(id_priv, event);
1994
+ ret = id_priv->id.event_handler(&id_priv->id, event);
1995
+ trace_cm_event_done(id_priv, event, ret);
1996
+ return ret;
18021997 }
18031998
18041999 static int cma_ib_handler(struct ib_cm_id *cm_id,
....@@ -1806,13 +2001,15 @@
18062001 {
18072002 struct rdma_id_private *id_priv = cm_id->context;
18082003 struct rdma_cm_event event = {};
1809
- int ret = 0;
2004
+ enum rdma_cm_state state;
2005
+ int ret;
18102006
18112007 mutex_lock(&id_priv->handler_mutex);
2008
+ state = READ_ONCE(id_priv->state);
18122009 if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
1813
- id_priv->state != RDMA_CM_CONNECT) ||
2010
+ state != RDMA_CM_CONNECT) ||
18142011 (ib_event->event == IB_CM_TIMEWAIT_EXIT &&
1815
- id_priv->state != RDMA_CM_DISCONNECT))
2012
+ state != RDMA_CM_DISCONNECT))
18162013 goto out;
18172014
18182015 switch (ib_event->event) {
....@@ -1822,9 +2019,11 @@
18222019 event.status = -ETIMEDOUT;
18232020 break;
18242021 case IB_CM_REP_RECEIVED:
1825
- if (cma_comp(id_priv, RDMA_CM_CONNECT) &&
1826
- (id_priv->id.qp_type != IB_QPT_UD))
2022
+ if (state == RDMA_CM_CONNECT &&
2023
+ (id_priv->id.qp_type != IB_QPT_UD)) {
2024
+ trace_cm_send_mra(id_priv);
18272025 ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
2026
+ }
18282027 if (id_priv->id.qp) {
18292028 event.status = cma_rep_recv(id_priv);
18302029 event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
....@@ -1840,7 +2039,8 @@
18402039 event.event = RDMA_CM_EVENT_ESTABLISHED;
18412040 break;
18422041 case IB_CM_DREQ_ERROR:
1843
- event.status = -ETIMEDOUT; /* fall through */
2042
+ event.status = -ETIMEDOUT;
2043
+ fallthrough;
18442044 case IB_CM_DREQ_RECEIVED:
18452045 case IB_CM_DREP_RECEIVED:
18462046 if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT,
....@@ -1869,18 +2069,16 @@
18692069 goto out;
18702070 }
18712071
1872
- ret = id_priv->id.event_handler(&id_priv->id, &event);
2072
+ ret = cma_cm_event_handler(id_priv, &event);
18732073 if (ret) {
18742074 /* Destroy the CM ID by returning a non-zero value. */
18752075 id_priv->cm_id.ib = NULL;
1876
- cma_exch(id_priv, RDMA_CM_DESTROYING);
1877
- mutex_unlock(&id_priv->handler_mutex);
1878
- rdma_destroy_id(&id_priv->id);
2076
+ destroy_id_handler_unlock(id_priv);
18792077 return ret;
18802078 }
18812079 out:
18822080 mutex_unlock(&id_priv->handler_mutex);
1883
- return ret;
2081
+ return 0;
18842082 }
18852083
18862084 static struct rdma_id_private *
....@@ -1899,14 +2097,15 @@
18992097 int ret;
19002098
19012099 listen_id_priv = container_of(listen_id, struct rdma_id_private, id);
1902
- id = __rdma_create_id(listen_id->route.addr.dev_addr.net,
1903
- listen_id->event_handler, listen_id->context,
1904
- listen_id->ps, ib_event->param.req_rcvd.qp_type,
1905
- listen_id_priv->res.kern_name);
1906
- if (IS_ERR(id))
2100
+ id_priv = __rdma_create_id(listen_id->route.addr.dev_addr.net,
2101
+ listen_id->event_handler, listen_id->context,
2102
+ listen_id->ps,
2103
+ ib_event->param.req_rcvd.qp_type,
2104
+ listen_id_priv);
2105
+ if (IS_ERR(id_priv))
19072106 return NULL;
19082107
1909
- id_priv = container_of(id, struct rdma_id_private, id);
2108
+ id = &id_priv->id;
19102109 if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr,
19112110 (struct sockaddr *)&id->route.addr.dst_addr,
19122111 listen_id, ib_event, ss_family, service_id))
....@@ -1924,7 +2123,7 @@
19242123 rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
19252124
19262125 if (net_dev) {
1927
- rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL);
2126
+ rdma_copy_src_l2_addr(&rt->addr.dev_addr, net_dev);
19282127 } else {
19292128 if (!cma_protocol_roce(listen_id) &&
19302129 cma_any_addr(cma_src_addr(id_priv))) {
....@@ -1960,13 +2159,13 @@
19602159 int ret;
19612160
19622161 listen_id_priv = container_of(listen_id, struct rdma_id_private, id);
1963
- id = __rdma_create_id(net, listen_id->event_handler, listen_id->context,
1964
- listen_id->ps, IB_QPT_UD,
1965
- listen_id_priv->res.kern_name);
1966
- if (IS_ERR(id))
2162
+ id_priv = __rdma_create_id(net, listen_id->event_handler,
2163
+ listen_id->context, listen_id->ps, IB_QPT_UD,
2164
+ listen_id_priv);
2165
+ if (IS_ERR(id_priv))
19672166 return NULL;
19682167
1969
- id_priv = container_of(id, struct rdma_id_private, id);
2168
+ id = &id_priv->id;
19702169 if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr,
19712170 (struct sockaddr *)&id->route.addr.dst_addr,
19722171 listen_id, ib_event, ss_family,
....@@ -1974,7 +2173,7 @@
19742173 goto err;
19752174
19762175 if (net_dev) {
1977
- rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL);
2176
+ rdma_copy_src_l2_addr(&id->route.addr.dev_addr, net_dev);
19782177 } else {
19792178 if (!cma_any_addr(cma_src_addr(id_priv))) {
19802179 ret = cma_translate_addr(cma_src_addr(id_priv),
....@@ -2004,6 +2203,9 @@
20042203 event->param.conn.rnr_retry_count = req_data->rnr_retry_count;
20052204 event->param.conn.srq = req_data->srq;
20062205 event->param.conn.qp_num = req_data->remote_qpn;
2206
+
2207
+ event->ece.vendor_id = req_data->ece.vendor_id;
2208
+ event->ece.attr_mod = req_data->ece.attr_mod;
20072209 }
20082210
20092211 static int cma_ib_check_req_qp_type(const struct rdma_cm_id *id,
....@@ -2021,23 +2223,25 @@
20212223 {
20222224 struct rdma_id_private *listen_id, *conn_id = NULL;
20232225 struct rdma_cm_event event = {};
2226
+ struct cma_req_info req = {};
20242227 struct net_device *net_dev;
20252228 u8 offset;
20262229 int ret;
20272230
2028
- listen_id = cma_ib_id_from_event(cm_id, ib_event, &net_dev);
2231
+ listen_id = cma_ib_id_from_event(cm_id, ib_event, &req, &net_dev);
20292232 if (IS_ERR(listen_id))
20302233 return PTR_ERR(listen_id);
20312234
2235
+ trace_cm_req_handler(listen_id, ib_event->event);
20322236 if (!cma_ib_check_req_qp_type(&listen_id->id, ib_event)) {
20332237 ret = -EINVAL;
20342238 goto net_dev_put;
20352239 }
20362240
20372241 mutex_lock(&listen_id->handler_mutex);
2038
- if (listen_id->state != RDMA_CM_LISTEN) {
2242
+ if (READ_ONCE(listen_id->state) != RDMA_CM_LISTEN) {
20392243 ret = -ECONNABORTED;
2040
- goto err1;
2244
+ goto err_unlock;
20412245 }
20422246
20432247 offset = cma_user_data_offset(listen_id);
....@@ -2054,53 +2258,38 @@
20542258 }
20552259 if (!conn_id) {
20562260 ret = -ENOMEM;
2057
- goto err1;
2261
+ goto err_unlock;
20582262 }
20592263
20602264 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
2061
- ret = cma_acquire_dev(conn_id, listen_id);
2062
- if (ret)
2063
- goto err2;
2265
+ ret = cma_ib_acquire_dev(conn_id, listen_id, &req);
2266
+ if (ret) {
2267
+ destroy_id_handler_unlock(conn_id);
2268
+ goto err_unlock;
2269
+ }
20642270
20652271 conn_id->cm_id.ib = cm_id;
20662272 cm_id->context = conn_id;
20672273 cm_id->cm_handler = cma_ib_handler;
20682274
2069
- /*
2070
- * Protect against the user destroying conn_id from another thread
2071
- * until we're done accessing it.
2072
- */
2073
- atomic_inc(&conn_id->refcount);
2074
- ret = conn_id->id.event_handler(&conn_id->id, &event);
2075
- if (ret)
2076
- goto err3;
2077
- /*
2078
- * Acquire mutex to prevent user executing rdma_destroy_id()
2079
- * while we're accessing the cm_id.
2080
- */
2081
- mutex_lock(&lock);
2082
- if (cma_comp(conn_id, RDMA_CM_CONNECT) &&
2083
- (conn_id->id.qp_type != IB_QPT_UD))
2084
- ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
2085
- mutex_unlock(&lock);
2086
- mutex_unlock(&conn_id->handler_mutex);
2087
- mutex_unlock(&listen_id->handler_mutex);
2088
- cma_deref_id(conn_id);
2089
- if (net_dev)
2090
- dev_put(net_dev);
2091
- return 0;
2275
+ ret = cma_cm_event_handler(conn_id, &event);
2276
+ if (ret) {
2277
+ /* Destroy the CM ID by returning a non-zero value. */
2278
+ conn_id->cm_id.ib = NULL;
2279
+ mutex_unlock(&listen_id->handler_mutex);
2280
+ destroy_id_handler_unlock(conn_id);
2281
+ goto net_dev_put;
2282
+ }
20922283
2093
-err3:
2094
- cma_deref_id(conn_id);
2095
- /* Destroy the CM ID by returning a non-zero value. */
2096
- conn_id->cm_id.ib = NULL;
2097
-err2:
2098
- cma_exch(conn_id, RDMA_CM_DESTROYING);
2284
+ if (READ_ONCE(conn_id->state) == RDMA_CM_CONNECT &&
2285
+ conn_id->id.qp_type != IB_QPT_UD) {
2286
+ trace_cm_send_mra(cm_id->context);
2287
+ ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
2288
+ }
20992289 mutex_unlock(&conn_id->handler_mutex);
2100
-err1:
2290
+
2291
+err_unlock:
21012292 mutex_unlock(&listen_id->handler_mutex);
2102
- if (conn_id)
2103
- rdma_destroy_id(&conn_id->id);
21042293
21052294 net_dev_put:
21062295 if (net_dev)
....@@ -2154,7 +2343,7 @@
21542343 struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
21552344
21562345 mutex_lock(&id_priv->handler_mutex);
2157
- if (id_priv->state != RDMA_CM_CONNECT)
2346
+ if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT)
21582347 goto out;
21592348
21602349 switch (iw_event->event) {
....@@ -2196,13 +2385,11 @@
21962385 event.status = iw_event->status;
21972386 event.param.conn.private_data = iw_event->private_data;
21982387 event.param.conn.private_data_len = iw_event->private_data_len;
2199
- ret = id_priv->id.event_handler(&id_priv->id, &event);
2388
+ ret = cma_cm_event_handler(id_priv, &event);
22002389 if (ret) {
22012390 /* Destroy the CM ID by returning a non-zero value. */
22022391 id_priv->cm_id.iw = NULL;
2203
- cma_exch(id_priv, RDMA_CM_DESTROYING);
2204
- mutex_unlock(&id_priv->handler_mutex);
2205
- rdma_destroy_id(&id_priv->id);
2392
+ destroy_id_handler_unlock(id_priv);
22062393 return ret;
22072394 }
22082395
....@@ -2214,7 +2401,6 @@
22142401 static int iw_conn_req_handler(struct iw_cm_id *cm_id,
22152402 struct iw_cm_event *iw_event)
22162403 {
2217
- struct rdma_cm_id *new_cm_id;
22182404 struct rdma_id_private *listen_id, *conn_id;
22192405 struct rdma_cm_event event = {};
22202406 int ret = -ECONNABORTED;
....@@ -2230,35 +2416,33 @@
22302416 listen_id = cm_id->context;
22312417
22322418 mutex_lock(&listen_id->handler_mutex);
2233
- if (listen_id->state != RDMA_CM_LISTEN)
2419
+ if (READ_ONCE(listen_id->state) != RDMA_CM_LISTEN)
22342420 goto out;
22352421
22362422 /* Create a new RDMA id for the new IW CM ID */
2237
- new_cm_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
2238
- listen_id->id.event_handler,
2239
- listen_id->id.context,
2240
- RDMA_PS_TCP, IB_QPT_RC,
2241
- listen_id->res.kern_name);
2242
- if (IS_ERR(new_cm_id)) {
2423
+ conn_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
2424
+ listen_id->id.event_handler,
2425
+ listen_id->id.context, RDMA_PS_TCP,
2426
+ IB_QPT_RC, listen_id);
2427
+ if (IS_ERR(conn_id)) {
22432428 ret = -ENOMEM;
22442429 goto out;
22452430 }
2246
- conn_id = container_of(new_cm_id, struct rdma_id_private, id);
22472431 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
22482432 conn_id->state = RDMA_CM_CONNECT;
22492433
22502434 ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr);
22512435 if (ret) {
2252
- mutex_unlock(&conn_id->handler_mutex);
2253
- rdma_destroy_id(new_cm_id);
2254
- goto out;
2436
+ mutex_unlock(&listen_id->handler_mutex);
2437
+ destroy_id_handler_unlock(conn_id);
2438
+ return ret;
22552439 }
22562440
2257
- ret = cma_acquire_dev(conn_id, listen_id);
2441
+ ret = cma_iw_acquire_dev(conn_id, listen_id);
22582442 if (ret) {
2259
- mutex_unlock(&conn_id->handler_mutex);
2260
- rdma_destroy_id(new_cm_id);
2261
- goto out;
2443
+ mutex_unlock(&listen_id->handler_mutex);
2444
+ destroy_id_handler_unlock(conn_id);
2445
+ return ret;
22622446 }
22632447
22642448 conn_id->cm_id.iw = cm_id;
....@@ -2268,25 +2452,16 @@
22682452 memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr));
22692453 memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr));
22702454
2271
- /*
2272
- * Protect against the user destroying conn_id from another thread
2273
- * until we're done accessing it.
2274
- */
2275
- atomic_inc(&conn_id->refcount);
2276
- ret = conn_id->id.event_handler(&conn_id->id, &event);
2455
+ ret = cma_cm_event_handler(conn_id, &event);
22772456 if (ret) {
22782457 /* User wants to destroy the CM ID */
22792458 conn_id->cm_id.iw = NULL;
2280
- cma_exch(conn_id, RDMA_CM_DESTROYING);
2281
- mutex_unlock(&conn_id->handler_mutex);
22822459 mutex_unlock(&listen_id->handler_mutex);
2283
- cma_deref_id(conn_id);
2284
- rdma_destroy_id(&conn_id->id);
2460
+ destroy_id_handler_unlock(conn_id);
22852461 return ret;
22862462 }
22872463
22882464 mutex_unlock(&conn_id->handler_mutex);
2289
- cma_deref_id(conn_id);
22902465
22912466 out:
22922467 mutex_unlock(&listen_id->handler_mutex);
....@@ -2321,7 +2496,10 @@
23212496 if (IS_ERR(id))
23222497 return PTR_ERR(id);
23232498
2499
+ mutex_lock(&id_priv->qp_mutex);
23242500 id->tos = id_priv->tos;
2501
+ id->tos_set = id_priv->tos_set;
2502
+ mutex_unlock(&id_priv->qp_mutex);
23252503 id_priv->cm_id.iw = id;
23262504
23272505 memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv),
....@@ -2342,56 +2520,88 @@
23422520 {
23432521 struct rdma_id_private *id_priv = id->context;
23442522
2523
+ /* Listening IDs are always destroyed on removal */
2524
+ if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
2525
+ return -1;
2526
+
23452527 id->context = id_priv->id.context;
23462528 id->event_handler = id_priv->id.event_handler;
2529
+ trace_cm_event_handler(id_priv, event);
23472530 return id_priv->id.event_handler(id, event);
23482531 }
23492532
2350
-static void cma_listen_on_dev(struct rdma_id_private *id_priv,
2351
- struct cma_device *cma_dev)
2533
+static int cma_listen_on_dev(struct rdma_id_private *id_priv,
2534
+ struct cma_device *cma_dev,
2535
+ struct rdma_id_private **to_destroy)
23522536 {
23532537 struct rdma_id_private *dev_id_priv;
2354
- struct rdma_cm_id *id;
23552538 struct net *net = id_priv->id.route.addr.dev_addr.net;
23562539 int ret;
23572540
23582541 lockdep_assert_held(&lock);
23592542
2543
+ *to_destroy = NULL;
23602544 if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
2361
- return;
2545
+ return 0;
23622546
2363
- id = __rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
2364
- id_priv->id.qp_type, id_priv->res.kern_name);
2365
- if (IS_ERR(id))
2366
- return;
2367
-
2368
- dev_id_priv = container_of(id, struct rdma_id_private, id);
2547
+ dev_id_priv =
2548
+ __rdma_create_id(net, cma_listen_handler, id_priv,
2549
+ id_priv->id.ps, id_priv->id.qp_type, id_priv);
2550
+ if (IS_ERR(dev_id_priv))
2551
+ return PTR_ERR(dev_id_priv);
23692552
23702553 dev_id_priv->state = RDMA_CM_ADDR_BOUND;
23712554 memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv),
23722555 rdma_addr_size(cma_src_addr(id_priv)));
23732556
23742557 _cma_attach_to_dev(dev_id_priv, cma_dev);
2375
- list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
2376
- atomic_inc(&id_priv->refcount);
2558
+ rdma_restrack_add(&dev_id_priv->res);
2559
+ cma_id_get(id_priv);
23772560 dev_id_priv->internal_id = 1;
23782561 dev_id_priv->afonly = id_priv->afonly;
2562
+ mutex_lock(&id_priv->qp_mutex);
2563
+ dev_id_priv->tos_set = id_priv->tos_set;
2564
+ dev_id_priv->tos = id_priv->tos;
2565
+ mutex_unlock(&id_priv->qp_mutex);
23792566
2380
- ret = rdma_listen(id, id_priv->backlog);
2567
+ ret = rdma_listen(&dev_id_priv->id, id_priv->backlog);
23812568 if (ret)
2382
- pr_warn("RDMA CMA: cma_listen_on_dev, error %d, listening on device %s\n",
2383
- ret, cma_dev->device->name);
2569
+ goto err_listen;
2570
+ list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
2571
+ return 0;
2572
+err_listen:
2573
+ /* Caller must destroy this after releasing lock */
2574
+ *to_destroy = dev_id_priv;
2575
+ dev_warn(&cma_dev->device->dev, "RDMA CMA: %s, error %d\n", __func__, ret);
2576
+ return ret;
23842577 }
23852578
2386
-static void cma_listen_on_all(struct rdma_id_private *id_priv)
2579
+static int cma_listen_on_all(struct rdma_id_private *id_priv)
23872580 {
2581
+ struct rdma_id_private *to_destroy;
23882582 struct cma_device *cma_dev;
2583
+ int ret;
23892584
23902585 mutex_lock(&lock);
23912586 list_add_tail(&id_priv->list, &listen_any_list);
2392
- list_for_each_entry(cma_dev, &dev_list, list)
2393
- cma_listen_on_dev(id_priv, cma_dev);
2587
+ list_for_each_entry(cma_dev, &dev_list, list) {
2588
+ ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy);
2589
+ if (ret) {
2590
+ /* Prevent racing with cma_process_remove() */
2591
+ if (to_destroy)
2592
+ list_del_init(&to_destroy->list);
2593
+ goto err_listen;
2594
+ }
2595
+ }
23942596 mutex_unlock(&lock);
2597
+ return 0;
2598
+
2599
+err_listen:
2600
+ _cma_cancel_listens(id_priv);
2601
+ mutex_unlock(&lock);
2602
+ if (to_destroy)
2603
+ rdma_destroy_id(&to_destroy->id);
2604
+ return ret;
23952605 }
23962606
23972607 void rdma_set_service_type(struct rdma_cm_id *id, int tos)
....@@ -2399,10 +2609,44 @@
23992609 struct rdma_id_private *id_priv;
24002610
24012611 id_priv = container_of(id, struct rdma_id_private, id);
2612
+ mutex_lock(&id_priv->qp_mutex);
24022613 id_priv->tos = (u8) tos;
24032614 id_priv->tos_set = true;
2615
+ mutex_unlock(&id_priv->qp_mutex);
24042616 }
24052617 EXPORT_SYMBOL(rdma_set_service_type);
2618
+
2619
+/**
2620
+ * rdma_set_ack_timeout() - Set the ack timeout of QP associated
2621
+ * with a connection identifier.
2622
+ * @id: Communication identifier to associated with service type.
2623
+ * @timeout: Ack timeout to set a QP, expressed as 4.096 * 2^(timeout) usec.
2624
+ *
2625
+ * This function should be called before rdma_connect() on active side,
2626
+ * and on passive side before rdma_accept(). It is applicable to primary
2627
+ * path only. The timeout will affect the local side of the QP, it is not
2628
+ * negotiated with remote side and zero disables the timer. In case it is
2629
+ * set before rdma_resolve_route, the value will also be used to determine
2630
+ * PacketLifeTime for RoCE.
2631
+ *
2632
+ * Return: 0 for success
2633
+ */
2634
+int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout)
2635
+{
2636
+ struct rdma_id_private *id_priv;
2637
+
2638
+ if (id->qp_type != IB_QPT_RC && id->qp_type != IB_QPT_XRC_INI)
2639
+ return -EINVAL;
2640
+
2641
+ id_priv = container_of(id, struct rdma_id_private, id);
2642
+ mutex_lock(&id_priv->qp_mutex);
2643
+ id_priv->timeout = timeout;
2644
+ id_priv->timeout_set = true;
2645
+ mutex_unlock(&id_priv->qp_mutex);
2646
+
2647
+ return 0;
2648
+}
2649
+EXPORT_SYMBOL(rdma_set_ack_timeout);
24062650
24072651 static void cma_query_handler(int status, struct sa_path_rec *path_rec,
24082652 void *context)
....@@ -2427,8 +2671,8 @@
24272671 queue_work(cma_wq, &work->work);
24282672 }
24292673
2430
-static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
2431
- struct cma_work *work)
2674
+static int cma_query_ib_route(struct rdma_id_private *id_priv,
2675
+ unsigned long timeout_ms, struct cma_work *work)
24322676 {
24332677 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
24342678 struct sa_path_rec path_rec;
....@@ -2480,49 +2724,54 @@
24802724 return (id_priv->query_id < 0) ? id_priv->query_id : 0;
24812725 }
24822726
2727
+static void cma_iboe_join_work_handler(struct work_struct *work)
2728
+{
2729
+ struct cma_multicast *mc =
2730
+ container_of(work, struct cma_multicast, iboe_join.work);
2731
+ struct rdma_cm_event *event = &mc->iboe_join.event;
2732
+ struct rdma_id_private *id_priv = mc->id_priv;
2733
+ int ret;
2734
+
2735
+ mutex_lock(&id_priv->handler_mutex);
2736
+ if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING ||
2737
+ READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL)
2738
+ goto out_unlock;
2739
+
2740
+ ret = cma_cm_event_handler(id_priv, event);
2741
+ WARN_ON(ret);
2742
+
2743
+out_unlock:
2744
+ mutex_unlock(&id_priv->handler_mutex);
2745
+ if (event->event == RDMA_CM_EVENT_MULTICAST_JOIN)
2746
+ rdma_destroy_ah_attr(&event->param.ud.ah_attr);
2747
+}
2748
+
24832749 static void cma_work_handler(struct work_struct *_work)
24842750 {
24852751 struct cma_work *work = container_of(_work, struct cma_work, work);
24862752 struct rdma_id_private *id_priv = work->id;
2487
- int destroy = 0;
24882753
24892754 mutex_lock(&id_priv->handler_mutex);
2490
- if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
2491
- goto out;
2492
-
2493
- if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
2494
- cma_exch(id_priv, RDMA_CM_DESTROYING);
2495
- destroy = 1;
2496
- }
2497
-out:
2498
- mutex_unlock(&id_priv->handler_mutex);
2499
- cma_deref_id(id_priv);
2500
- if (destroy)
2501
- rdma_destroy_id(&id_priv->id);
2502
- kfree(work);
2503
-}
2504
-
2505
-static void cma_ndev_work_handler(struct work_struct *_work)
2506
-{
2507
- struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work);
2508
- struct rdma_id_private *id_priv = work->id;
2509
- int destroy = 0;
2510
-
2511
- mutex_lock(&id_priv->handler_mutex);
2512
- if (id_priv->state == RDMA_CM_DESTROYING ||
2513
- id_priv->state == RDMA_CM_DEVICE_REMOVAL)
2514
- goto out;
2515
-
2516
- if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
2517
- cma_exch(id_priv, RDMA_CM_DESTROYING);
2518
- destroy = 1;
2755
+ if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING ||
2756
+ READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL)
2757
+ goto out_unlock;
2758
+ if (work->old_state != 0 || work->new_state != 0) {
2759
+ if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
2760
+ goto out_unlock;
25192761 }
25202762
2521
-out:
2763
+ if (cma_cm_event_handler(id_priv, &work->event)) {
2764
+ cma_id_put(id_priv);
2765
+ destroy_id_handler_unlock(id_priv);
2766
+ goto out_free;
2767
+ }
2768
+
2769
+out_unlock:
25222770 mutex_unlock(&id_priv->handler_mutex);
2523
- cma_deref_id(id_priv);
2524
- if (destroy)
2525
- rdma_destroy_id(&id_priv->id);
2771
+ cma_id_put(id_priv);
2772
+out_free:
2773
+ if (work->event.event == RDMA_CM_EVENT_MULTICAST_JOIN)
2774
+ rdma_destroy_ah_attr(&work->event.param.ud.ah_attr);
25262775 kfree(work);
25272776 }
25282777
....@@ -2536,17 +2785,23 @@
25362785 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
25372786 }
25382787
2539
-static void cma_init_resolve_addr_work(struct cma_work *work,
2540
- struct rdma_id_private *id_priv)
2788
+static void enqueue_resolve_addr_work(struct cma_work *work,
2789
+ struct rdma_id_private *id_priv)
25412790 {
2791
+ /* Balances with cma_id_put() in cma_work_handler */
2792
+ cma_id_get(id_priv);
2793
+
25422794 work->id = id_priv;
25432795 INIT_WORK(&work->work, cma_work_handler);
25442796 work->old_state = RDMA_CM_ADDR_QUERY;
25452797 work->new_state = RDMA_CM_ADDR_RESOLVED;
25462798 work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
2799
+
2800
+ queue_work(cma_wq, &work->work);
25472801 }
25482802
2549
-static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
2803
+static int cma_resolve_ib_route(struct rdma_id_private *id_priv,
2804
+ unsigned long timeout_ms)
25502805 {
25512806 struct rdma_route *route = &id_priv->id.route;
25522807 struct cma_work *work;
....@@ -2669,7 +2924,7 @@
26692924 }
26702925 EXPORT_SYMBOL(rdma_set_ib_path);
26712926
2672
-static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
2927
+static int cma_resolve_iw_route(struct rdma_id_private *id_priv)
26732928 {
26742929 struct cma_work *work;
26752930
....@@ -2682,22 +2937,86 @@
26822937 return 0;
26832938 }
26842939
2685
-static int iboe_tos_to_sl(struct net_device *ndev, int tos)
2940
+static int get_vlan_ndev_tc(struct net_device *vlan_ndev, int prio)
26862941 {
2687
- int prio;
26882942 struct net_device *dev;
26892943
2690
- prio = rt_tos2priority(tos);
2691
- dev = is_vlan_dev(ndev) ? vlan_dev_real_dev(ndev) : ndev;
2944
+ dev = vlan_dev_real_dev(vlan_ndev);
26922945 if (dev->num_tc)
26932946 return netdev_get_prio_tc_map(dev, prio);
26942947
2695
-#if IS_ENABLED(CONFIG_VLAN_8021Q)
2948
+ return (vlan_dev_get_egress_qos_mask(vlan_ndev, prio) &
2949
+ VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
2950
+}
2951
+
2952
+struct iboe_prio_tc_map {
2953
+ int input_prio;
2954
+ int output_tc;
2955
+ bool found;
2956
+};
2957
+
2958
+static int get_lower_vlan_dev_tc(struct net_device *dev,
2959
+ struct netdev_nested_priv *priv)
2960
+{
2961
+ struct iboe_prio_tc_map *map = (struct iboe_prio_tc_map *)priv->data;
2962
+
2963
+ if (is_vlan_dev(dev))
2964
+ map->output_tc = get_vlan_ndev_tc(dev, map->input_prio);
2965
+ else if (dev->num_tc)
2966
+ map->output_tc = netdev_get_prio_tc_map(dev, map->input_prio);
2967
+ else
2968
+ map->output_tc = 0;
2969
+ /* We are interested only in first level VLAN device, so always
2970
+ * return 1 to stop iterating over next level devices.
2971
+ */
2972
+ map->found = true;
2973
+ return 1;
2974
+}
2975
+
2976
+static int iboe_tos_to_sl(struct net_device *ndev, int tos)
2977
+{
2978
+ struct iboe_prio_tc_map prio_tc_map = {};
2979
+ int prio = rt_tos2priority(tos);
2980
+ struct netdev_nested_priv priv;
2981
+
2982
+ /* If VLAN device, get it directly from the VLAN netdev */
26962983 if (is_vlan_dev(ndev))
2697
- return (vlan_dev_get_egress_qos_mask(ndev, prio) &
2698
- VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
2699
-#endif
2700
- return 0;
2984
+ return get_vlan_ndev_tc(ndev, prio);
2985
+
2986
+ prio_tc_map.input_prio = prio;
2987
+ priv.data = (void *)&prio_tc_map;
2988
+ rcu_read_lock();
2989
+ netdev_walk_all_lower_dev_rcu(ndev,
2990
+ get_lower_vlan_dev_tc,
2991
+ &priv);
2992
+ rcu_read_unlock();
2993
+ /* If map is found from lower device, use it; Otherwise
2994
+ * continue with the current netdevice to get priority to tc map.
2995
+ */
2996
+ if (prio_tc_map.found)
2997
+ return prio_tc_map.output_tc;
2998
+ else if (ndev->num_tc)
2999
+ return netdev_get_prio_tc_map(ndev, prio);
3000
+ else
3001
+ return 0;
3002
+}
3003
+
3004
+static __be32 cma_get_roce_udp_flow_label(struct rdma_id_private *id_priv)
3005
+{
3006
+ struct sockaddr_in6 *addr6;
3007
+ u16 dport, sport;
3008
+ u32 hash, fl;
3009
+
3010
+ addr6 = (struct sockaddr_in6 *)cma_src_addr(id_priv);
3011
+ fl = be32_to_cpu(addr6->sin6_flowinfo) & IB_GRH_FLOWLABEL_MASK;
3012
+ if ((cma_family(id_priv) != AF_INET6) || !fl) {
3013
+ dport = be16_to_cpu(cma_port(cma_dst_addr(id_priv)));
3014
+ sport = be16_to_cpu(cma_port(cma_src_addr(id_priv)));
3015
+ hash = (u32)sport * 31 + dport;
3016
+ fl = hash & IB_GRH_FLOWLABEL_MASK;
3017
+ }
3018
+
3019
+ return cpu_to_be32(fl);
27013020 }
27023021
27033022 static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
....@@ -2710,8 +3029,11 @@
27103029
27113030 u8 default_roce_tos = id_priv->cma_dev->default_roce_tos[id_priv->id.port_num -
27123031 rdma_start_port(id_priv->cma_dev->device)];
2713
- u8 tos = id_priv->tos_set ? id_priv->tos : default_roce_tos;
3032
+ u8 tos;
27143033
3034
+ mutex_lock(&id_priv->qp_mutex);
3035
+ tos = id_priv->tos_set ? id_priv->tos : default_roce_tos;
3036
+ mutex_unlock(&id_priv->qp_mutex);
27153037
27163038 work = kzalloc(sizeof *work, GFP_KERNEL);
27173039 if (!work)
....@@ -2751,11 +3073,29 @@
27513073 route->path_rec->rate = iboe_get_rate(ndev);
27523074 dev_put(ndev);
27533075 route->path_rec->packet_life_time_selector = IB_SA_EQ;
2754
- route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME;
3076
+ /* In case ACK timeout is set, use this value to calculate
3077
+ * PacketLifeTime. As per IBTA 12.7.34,
3078
+ * local ACK timeout = (2 * PacketLifeTime + Local CA’s ACK delay).
3079
+ * Assuming a negligible local ACK delay, we can use
3080
+ * PacketLifeTime = local ACK timeout/2
3081
+ * as a reasonable approximation for RoCE networks.
3082
+ */
3083
+ mutex_lock(&id_priv->qp_mutex);
3084
+ if (id_priv->timeout_set && id_priv->timeout)
3085
+ route->path_rec->packet_life_time = id_priv->timeout - 1;
3086
+ else
3087
+ route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME;
3088
+ mutex_unlock(&id_priv->qp_mutex);
3089
+
27553090 if (!route->path_rec->mtu) {
27563091 ret = -EINVAL;
27573092 goto err2;
27583093 }
3094
+
3095
+ if (rdma_protocol_roce_udp_encap(id_priv->id.device,
3096
+ id_priv->id.port_num))
3097
+ route->path_rec->flow_label =
3098
+ cma_get_roce_udp_flow_label(id_priv);
27593099
27603100 cma_init_resolve_route_work(work, id_priv);
27613101 queue_work(cma_wq, &work->work);
....@@ -2771,7 +3111,7 @@
27713111 return ret;
27723112 }
27733113
2774
-int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
3114
+int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms)
27753115 {
27763116 struct rdma_id_private *id_priv;
27773117 int ret;
....@@ -2780,13 +3120,13 @@
27803120 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY))
27813121 return -EINVAL;
27823122
2783
- atomic_inc(&id_priv->refcount);
3123
+ cma_id_get(id_priv);
27843124 if (rdma_cap_ib_sa(id->device, id->port_num))
27853125 ret = cma_resolve_ib_route(id_priv, timeout_ms);
27863126 else if (rdma_protocol_roce(id->device, id->port_num))
27873127 ret = cma_resolve_iboe_route(id_priv);
27883128 else if (rdma_protocol_iwarp(id->device, id->port_num))
2789
- ret = cma_resolve_iw_route(id_priv, timeout_ms);
3129
+ ret = cma_resolve_iw_route(id_priv);
27903130 else
27913131 ret = -ENOSYS;
27923132
....@@ -2796,7 +3136,7 @@
27963136 return 0;
27973137 err:
27983138 cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED);
2799
- cma_deref_id(id_priv);
3139
+ cma_id_put(id_priv);
28003140 return ret;
28013141 }
28023142 EXPORT_SYMBOL(rdma_resolve_route);
....@@ -2823,9 +3163,9 @@
28233163 struct cma_device *cma_dev, *cur_dev;
28243164 union ib_gid gid;
28253165 enum ib_port_state port_state;
3166
+ unsigned int p;
28263167 u16 pkey;
28273168 int ret;
2828
- u8 p;
28293169
28303170 cma_dev = NULL;
28313171 mutex_lock(&lock);
....@@ -2837,7 +3177,7 @@
28373177 if (!cma_dev)
28383178 cma_dev = cur_dev;
28393179
2840
- for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
3180
+ rdma_for_each_port (cur_dev->device, p) {
28413181 if (!ib_get_cached_port_state(cur_dev->device, p, &port_state) &&
28423182 port_state == IB_PORT_ACTIVE) {
28433183 cma_dev = cur_dev;
....@@ -2870,6 +3210,7 @@
28703210 ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
28713211 id_priv->id.port_num = p;
28723212 cma_attach_to_dev(id_priv, cma_dev);
3213
+ rdma_restrack_add(&id_priv->res);
28733214 cma_set_loopback(cma_src_addr(id_priv));
28743215 out:
28753216 mutex_unlock(&lock);
....@@ -2898,10 +3239,11 @@
28983239 memcpy(&old_addr, addr, rdma_addr_size(addr));
28993240 memcpy(addr, src_addr, rdma_addr_size(src_addr));
29003241 if (!status && !id_priv->cma_dev) {
2901
- status = cma_acquire_dev(id_priv, NULL);
3242
+ status = cma_acquire_dev_by_src_ip(id_priv);
29023243 if (status)
29033244 pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n",
29043245 status);
3246
+ rdma_restrack_add(&id_priv->res);
29053247 } else if (status) {
29063248 pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to resolve IP. status %d\n", status);
29073249 }
....@@ -2917,16 +3259,12 @@
29173259 } else
29183260 event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
29193261
2920
- if (id_priv->id.event_handler(&id_priv->id, &event)) {
2921
- cma_exch(id_priv, RDMA_CM_DESTROYING);
2922
- mutex_unlock(&id_priv->handler_mutex);
2923
- cma_deref_id(id_priv);
2924
- rdma_destroy_id(&id_priv->id);
3262
+ if (cma_cm_event_handler(id_priv, &event)) {
3263
+ destroy_id_handler_unlock(id_priv);
29253264 return;
29263265 }
29273266 out:
29283267 mutex_unlock(&id_priv->handler_mutex);
2929
- cma_deref_id(id_priv);
29303268 }
29313269
29323270 static int cma_resolve_loopback(struct rdma_id_private *id_priv)
....@@ -2948,8 +3286,7 @@
29483286 rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
29493287 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
29503288
2951
- cma_init_resolve_addr_work(work, id_priv);
2952
- queue_work(cma_wq, &work->work);
3289
+ enqueue_resolve_addr_work(work, id_priv);
29533290 return 0;
29543291 err:
29553292 kfree(work);
....@@ -2974,8 +3311,7 @@
29743311 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *)
29753312 &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr));
29763313
2977
- cma_init_resolve_addr_work(work, id_priv);
2978
- queue_work(cma_wq, &work->work);
3314
+ enqueue_resolve_addr_work(work, id_priv);
29793315 return 0;
29803316 err:
29813317 kfree(work);
....@@ -2985,54 +3321,91 @@
29853321 static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
29863322 const struct sockaddr *dst_addr)
29873323 {
2988
- if (!src_addr || !src_addr->sa_family) {
2989
- src_addr = (struct sockaddr *) &id->route.addr.src_addr;
2990
- src_addr->sa_family = dst_addr->sa_family;
2991
- if (IS_ENABLED(CONFIG_IPV6) &&
2992
- dst_addr->sa_family == AF_INET6) {
2993
- struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr;
2994
- struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr;
2995
- src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
2996
- if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
2997
- id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id;
2998
- } else if (dst_addr->sa_family == AF_IB) {
2999
- ((struct sockaddr_ib *) src_addr)->sib_pkey =
3000
- ((struct sockaddr_ib *) dst_addr)->sib_pkey;
3324
+ struct sockaddr_storage zero_sock = {};
3325
+
3326
+ if (src_addr && src_addr->sa_family)
3327
+ return rdma_bind_addr(id, src_addr);
3328
+
3329
+ /*
3330
+ * When the src_addr is not specified, automatically supply an any addr
3331
+ */
3332
+ zero_sock.ss_family = dst_addr->sa_family;
3333
+ if (IS_ENABLED(CONFIG_IPV6) && dst_addr->sa_family == AF_INET6) {
3334
+ struct sockaddr_in6 *src_addr6 =
3335
+ (struct sockaddr_in6 *)&zero_sock;
3336
+ struct sockaddr_in6 *dst_addr6 =
3337
+ (struct sockaddr_in6 *)dst_addr;
3338
+
3339
+ src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
3340
+ if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
3341
+ id->route.addr.dev_addr.bound_dev_if =
3342
+ dst_addr6->sin6_scope_id;
3343
+ } else if (dst_addr->sa_family == AF_IB) {
3344
+ ((struct sockaddr_ib *)&zero_sock)->sib_pkey =
3345
+ ((struct sockaddr_ib *)dst_addr)->sib_pkey;
3346
+ }
3347
+ return rdma_bind_addr(id, (struct sockaddr *)&zero_sock);
3348
+}
3349
+
3350
+/*
3351
+ * If required, resolve the source address for bind and leave the id_priv in
3352
+ * state RDMA_CM_ADDR_BOUND. This oddly uses the state to determine the prior
3353
+ * calls made by ULP, a previously bound ID will not be re-bound and src_addr is
3354
+ * ignored.
3355
+ */
3356
+static int resolve_prepare_src(struct rdma_id_private *id_priv,
3357
+ struct sockaddr *src_addr,
3358
+ const struct sockaddr *dst_addr)
3359
+{
3360
+ int ret;
3361
+
3362
+ memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
3363
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) {
3364
+ /* For a well behaved ULP state will be RDMA_CM_IDLE */
3365
+ ret = cma_bind_addr(&id_priv->id, src_addr, dst_addr);
3366
+ if (ret)
3367
+ goto err_dst;
3368
+ if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND,
3369
+ RDMA_CM_ADDR_QUERY))) {
3370
+ ret = -EINVAL;
3371
+ goto err_dst;
30013372 }
30023373 }
3003
- return rdma_bind_addr(id, src_addr);
3374
+
3375
+ if (cma_family(id_priv) != dst_addr->sa_family) {
3376
+ ret = -EINVAL;
3377
+ goto err_state;
3378
+ }
3379
+ return 0;
3380
+
3381
+err_state:
3382
+ cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
3383
+err_dst:
3384
+ memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
3385
+ return ret;
30043386 }
30053387
30063388 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
3007
- const struct sockaddr *dst_addr, int timeout_ms)
3389
+ const struct sockaddr *dst_addr, unsigned long timeout_ms)
30083390 {
3009
- struct rdma_id_private *id_priv;
3391
+ struct rdma_id_private *id_priv =
3392
+ container_of(id, struct rdma_id_private, id);
30103393 int ret;
30113394
3012
- id_priv = container_of(id, struct rdma_id_private, id);
3013
- if (id_priv->state == RDMA_CM_IDLE) {
3014
- ret = cma_bind_addr(id, src_addr, dst_addr);
3015
- if (ret)
3016
- return ret;
3017
- }
3395
+ ret = resolve_prepare_src(id_priv, src_addr, dst_addr);
3396
+ if (ret)
3397
+ return ret;
30183398
3019
- if (cma_family(id_priv) != dst_addr->sa_family)
3020
- return -EINVAL;
3021
-
3022
- if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY))
3023
- return -EINVAL;
3024
-
3025
- memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
3026
- atomic_inc(&id_priv->refcount);
30273399 if (cma_any_addr(dst_addr)) {
30283400 ret = cma_resolve_loopback(id_priv);
30293401 } else {
30303402 if (dst_addr->sa_family == AF_IB) {
30313403 ret = cma_resolve_ib_addr(id_priv);
30323404 } else {
3033
- ret = rdma_resolve_ip(cma_src_addr(id_priv),
3034
- dst_addr, &id->route.addr.dev_addr,
3035
- timeout_ms, addr_handler, id_priv);
3405
+ ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr,
3406
+ &id->route.addr.dev_addr,
3407
+ timeout_ms, addr_handler,
3408
+ false, id_priv);
30363409 }
30373410 }
30383411 if (ret)
....@@ -3041,7 +3414,6 @@
30413414 return 0;
30423415 err:
30433416 cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
3044
- cma_deref_id(id_priv);
30453417 return ret;
30463418 }
30473419 EXPORT_SYMBOL(rdma_resolve_addr);
....@@ -3054,7 +3426,8 @@
30543426
30553427 id_priv = container_of(id, struct rdma_id_private, id);
30563428 spin_lock_irqsave(&id_priv->lock, flags);
3057
- if (reuse || id_priv->state == RDMA_CM_IDLE) {
3429
+ if ((reuse && id_priv->state != RDMA_CM_LISTEN) ||
3430
+ id_priv->state == RDMA_CM_IDLE) {
30583431 id_priv->reuseaddr = reuse;
30593432 ret = 0;
30603433 } else {
....@@ -3135,7 +3508,7 @@
31353508 goto err;
31363509
31373510 bind_list->ps = ps;
3138
- bind_list->port = (unsigned short)ret;
3511
+ bind_list->port = snum;
31393512 cma_bind_port(bind_list, id_priv);
31403513 return 0;
31413514 err:
....@@ -3248,8 +3621,7 @@
32483621 if (id_priv == cur_id)
32493622 continue;
32503623
3251
- if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr &&
3252
- cur_id->reuseaddr)
3624
+ if (reuseaddr && cur_id->reuseaddr)
32533625 continue;
32543626
32553627 cur_addr = cma_src_addr(cur_id);
....@@ -3287,18 +3659,6 @@
32873659 if (!ret)
32883660 cma_bind_port(bind_list, id_priv);
32893661 }
3290
- return ret;
3291
-}
3292
-
3293
-static int cma_bind_listen(struct rdma_id_private *id_priv)
3294
-{
3295
- struct rdma_bind_list *bind_list = id_priv->bind_list;
3296
- int ret = 0;
3297
-
3298
- mutex_lock(&lock);
3299
- if (bind_list->owners.first->next)
3300
- ret = cma_check_port(bind_list, id_priv, 0);
3301
- mutex_unlock(&lock);
33023662 return ret;
33033663 }
33043664
....@@ -3395,28 +3755,41 @@
33953755
33963756 int rdma_listen(struct rdma_cm_id *id, int backlog)
33973757 {
3398
- struct rdma_id_private *id_priv;
3758
+ struct rdma_id_private *id_priv =
3759
+ container_of(id, struct rdma_id_private, id);
33993760 int ret;
34003761
3401
- id_priv = container_of(id, struct rdma_id_private, id);
3402
- if (id_priv->state == RDMA_CM_IDLE) {
3403
- id->route.addr.src_addr.ss_family = AF_INET;
3404
- ret = rdma_bind_addr(id, cma_src_addr(id_priv));
3762
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) {
3763
+ struct sockaddr_in any_in = {
3764
+ .sin_family = AF_INET,
3765
+ .sin_addr.s_addr = htonl(INADDR_ANY),
3766
+ };
3767
+
3768
+ /* For a well behaved ULP state will be RDMA_CM_IDLE */
3769
+ ret = rdma_bind_addr(id, (struct sockaddr *)&any_in);
34053770 if (ret)
34063771 return ret;
3772
+ if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND,
3773
+ RDMA_CM_LISTEN)))
3774
+ return -EINVAL;
34073775 }
34083776
3409
- if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN))
3410
- return -EINVAL;
3411
-
3777
+ /*
3778
+ * Once the ID reaches RDMA_CM_LISTEN it is not allowed to be reusable
3779
+ * any more, and has to be unique in the bind list.
3780
+ */
34123781 if (id_priv->reuseaddr) {
3413
- ret = cma_bind_listen(id_priv);
3782
+ mutex_lock(&lock);
3783
+ ret = cma_check_port(id_priv->bind_list, id_priv, 0);
3784
+ if (!ret)
3785
+ id_priv->reuseaddr = 0;
3786
+ mutex_unlock(&lock);
34143787 if (ret)
34153788 goto err;
34163789 }
34173790
34183791 id_priv->backlog = backlog;
3419
- if (id->device) {
3792
+ if (id_priv->cma_dev) {
34203793 if (rdma_cap_ib_cm(id->device, 1)) {
34213794 ret = cma_ib_listen(id_priv);
34223795 if (ret)
....@@ -3429,12 +3802,19 @@
34293802 ret = -ENOSYS;
34303803 goto err;
34313804 }
3432
- } else
3433
- cma_listen_on_all(id_priv);
3805
+ } else {
3806
+ ret = cma_listen_on_all(id_priv);
3807
+ if (ret)
3808
+ goto err;
3809
+ }
34343810
34353811 return 0;
34363812 err:
34373813 id_priv->backlog = 0;
3814
+ /*
3815
+ * All the failure paths that lead here will not allow the req_handler's
3816
+ * to have run.
3817
+ */
34383818 cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND);
34393819 return ret;
34403820 }
....@@ -3464,7 +3844,7 @@
34643844 if (ret)
34653845 goto err1;
34663846
3467
- ret = cma_acquire_dev(id_priv, NULL);
3847
+ ret = cma_acquire_dev_by_src_ip(id_priv);
34683848 if (ret)
34693849 goto err1;
34703850 }
....@@ -3487,9 +3867,10 @@
34873867 if (ret)
34883868 goto err2;
34893869
3870
+ if (!cma_any_addr(addr))
3871
+ rdma_restrack_add(&id_priv->res);
34903872 return 0;
34913873 err2:
3492
- rdma_restrack_del(&id_priv->res);
34933874 if (id_priv->cma_dev)
34943875 cma_release_dev(id_priv);
34953876 err1:
....@@ -3535,10 +3916,10 @@
35353916 struct rdma_cm_event event = {};
35363917 const struct ib_cm_sidr_rep_event_param *rep =
35373918 &ib_event->param.sidr_rep_rcvd;
3538
- int ret = 0;
3919
+ int ret;
35393920
35403921 mutex_lock(&id_priv->handler_mutex);
3541
- if (id_priv->state != RDMA_CM_CONNECT)
3922
+ if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT)
35423923 goto out;
35433924
35443925 switch (ib_event->event) {
....@@ -3579,20 +3960,18 @@
35793960 goto out;
35803961 }
35813962
3582
- ret = id_priv->id.event_handler(&id_priv->id, &event);
3963
+ ret = cma_cm_event_handler(id_priv, &event);
35833964
35843965 rdma_destroy_ah_attr(&event.param.ud.ah_attr);
35853966 if (ret) {
35863967 /* Destroy the CM ID by returning a non-zero value. */
35873968 id_priv->cm_id.ib = NULL;
3588
- cma_exch(id_priv, RDMA_CM_DESTROYING);
3589
- mutex_unlock(&id_priv->handler_mutex);
3590
- rdma_destroy_id(&id_priv->id);
3969
+ destroy_id_handler_unlock(id_priv);
35913970 return ret;
35923971 }
35933972 out:
35943973 mutex_unlock(&id_priv->handler_mutex);
3595
- return ret;
3974
+ return 0;
35963975 }
35973976
35983977 static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
....@@ -3643,6 +4022,7 @@
36434022 req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
36444023 req.max_cm_retries = CMA_MAX_CM_RETRIES;
36454024
4025
+ trace_cm_send_sidr_req(id_priv);
36464026 ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req);
36474027 if (ret) {
36484028 ib_destroy_cm_id(id_priv->cm_id.ib);
....@@ -3715,7 +4095,10 @@
37154095 req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
37164096 req.max_cm_retries = CMA_MAX_CM_RETRIES;
37174097 req.srq = id_priv->srq ? 1 : 0;
4098
+ req.ece.vendor_id = id_priv->ece.vendor_id;
4099
+ req.ece.attr_mod = id_priv->ece.attr_mod;
37184100
4101
+ trace_cm_send_req(id_priv);
37194102 ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
37204103 out:
37214104 if (ret && !IS_ERR(id)) {
....@@ -3738,7 +4121,11 @@
37384121 if (IS_ERR(cm_id))
37394122 return PTR_ERR(cm_id);
37404123
4124
+ mutex_lock(&id_priv->qp_mutex);
37414125 cm_id->tos = id_priv->tos;
4126
+ cm_id->tos_set = id_priv->tos_set;
4127
+ mutex_unlock(&id_priv->qp_mutex);
4128
+
37424129 id_priv->cm_id.iw = cm_id;
37434130
37444131 memcpy(&cm_id->local_addr, cma_src_addr(id_priv),
....@@ -3769,12 +4156,21 @@
37694156 return ret;
37704157 }
37714158
3772
-int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
4159
+/**
4160
+ * rdma_connect_locked - Initiate an active connection request.
4161
+ * @id: Connection identifier to connect.
4162
+ * @conn_param: Connection information used for connected QPs.
4163
+ *
4164
+ * Same as rdma_connect() but can only be called from the
4165
+ * RDMA_CM_EVENT_ROUTE_RESOLVED handler callback.
4166
+ */
4167
+int rdma_connect_locked(struct rdma_cm_id *id,
4168
+ struct rdma_conn_param *conn_param)
37734169 {
3774
- struct rdma_id_private *id_priv;
4170
+ struct rdma_id_private *id_priv =
4171
+ container_of(id, struct rdma_id_private, id);
37754172 int ret;
37764173
3777
- id_priv = container_of(id, struct rdma_id_private, id);
37784174 if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT))
37794175 return -EINVAL;
37804176
....@@ -3793,14 +4189,59 @@
37934189 else
37944190 ret = -ENOSYS;
37954191 if (ret)
3796
- goto err;
3797
-
4192
+ goto err_state;
37984193 return 0;
3799
-err:
4194
+err_state:
38004195 cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED);
38014196 return ret;
38024197 }
4198
+EXPORT_SYMBOL(rdma_connect_locked);
4199
+
4200
+/**
4201
+ * rdma_connect - Initiate an active connection request.
4202
+ * @id: Connection identifier to connect.
4203
+ * @conn_param: Connection information used for connected QPs.
4204
+ *
4205
+ * Users must have resolved a route for the rdma_cm_id to connect with by having
4206
+ * called rdma_resolve_route before calling this routine.
4207
+ *
4208
+ * This call will either connect to a remote QP or obtain remote QP information
4209
+ * for unconnected rdma_cm_id's. The actual operation is based on the
4210
+ * rdma_cm_id's port space.
4211
+ */
4212
+int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
4213
+{
4214
+ struct rdma_id_private *id_priv =
4215
+ container_of(id, struct rdma_id_private, id);
4216
+ int ret;
4217
+
4218
+ mutex_lock(&id_priv->handler_mutex);
4219
+ ret = rdma_connect_locked(id, conn_param);
4220
+ mutex_unlock(&id_priv->handler_mutex);
4221
+ return ret;
4222
+}
38034223 EXPORT_SYMBOL(rdma_connect);
4224
+
4225
+/**
4226
+ * rdma_connect_ece - Initiate an active connection request with ECE data.
4227
+ * @id: Connection identifier to connect.
4228
+ * @conn_param: Connection information used for connected QPs.
4229
+ * @ece: ECE parameters
4230
+ *
4231
+ * See rdma_connect() explanation.
4232
+ */
4233
+int rdma_connect_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
4234
+ struct rdma_ucm_ece *ece)
4235
+{
4236
+ struct rdma_id_private *id_priv =
4237
+ container_of(id, struct rdma_id_private, id);
4238
+
4239
+ id_priv->ece.vendor_id = ece->vendor_id;
4240
+ id_priv->ece.attr_mod = ece->attr_mod;
4241
+
4242
+ return rdma_connect(id, conn_param);
4243
+}
4244
+EXPORT_SYMBOL(rdma_connect_ece);
38044245
38054246 static int cma_accept_ib(struct rdma_id_private *id_priv,
38064247 struct rdma_conn_param *conn_param)
....@@ -3827,7 +4268,10 @@
38274268 rep.flow_control = conn_param->flow_control;
38284269 rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
38294270 rep.srq = id_priv->srq ? 1 : 0;
4271
+ rep.ece.vendor_id = id_priv->ece.vendor_id;
4272
+ rep.ece.attr_mod = id_priv->ece.attr_mod;
38304273
4274
+ trace_cm_send_rep(id_priv);
38314275 ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
38324276 out:
38334277 return ret;
....@@ -3873,27 +4317,45 @@
38734317 return ret;
38744318 rep.qp_num = id_priv->qp_num;
38754319 rep.qkey = id_priv->qkey;
4320
+
4321
+ rep.ece.vendor_id = id_priv->ece.vendor_id;
4322
+ rep.ece.attr_mod = id_priv->ece.attr_mod;
38764323 }
4324
+
38774325 rep.private_data = private_data;
38784326 rep.private_data_len = private_data_len;
38794327
4328
+ trace_cm_send_sidr_rep(id_priv);
38804329 return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
38814330 }
38824331
3883
-int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
3884
- const char *caller)
4332
+/**
4333
+ * rdma_accept - Called to accept a connection request or response.
4334
+ * @id: Connection identifier associated with the request.
4335
+ * @conn_param: Information needed to establish the connection. This must be
4336
+ * provided if accepting a connection request. If accepting a connection
4337
+ * response, this parameter must be NULL.
4338
+ *
4339
+ * Typically, this routine is only called by the listener to accept a connection
4340
+ * request. It must also be called on the active side of a connection if the
4341
+ * user is performing their own QP transitions.
4342
+ *
4343
+ * In the case of error, a reject message is sent to the remote side and the
4344
+ * state of the qp associated with the id is modified to error, such that any
4345
+ * previously posted receive buffers would be flushed.
4346
+ *
4347
+ * This function is for use by kernel ULPs and must be called from under the
4348
+ * handler callback.
4349
+ */
4350
+int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
38854351 {
3886
- struct rdma_id_private *id_priv;
4352
+ struct rdma_id_private *id_priv =
4353
+ container_of(id, struct rdma_id_private, id);
38874354 int ret;
38884355
3889
- id_priv = container_of(id, struct rdma_id_private, id);
4356
+ lockdep_assert_held(&id_priv->handler_mutex);
38904357
3891
- if (caller)
3892
- id_priv->res.kern_name = caller;
3893
- else
3894
- rdma_restrack_set_task(&id_priv->res, current);
3895
-
3896
- if (!cma_comp(id_priv, RDMA_CM_CONNECT))
4358
+ if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT)
38974359 return -EINVAL;
38984360
38994361 if (!id->qp && conn_param) {
....@@ -3928,10 +4390,41 @@
39284390 return 0;
39294391 reject:
39304392 cma_modify_qp_err(id_priv);
3931
- rdma_reject(id, NULL, 0);
4393
+ rdma_reject(id, NULL, 0, IB_CM_REJ_CONSUMER_DEFINED);
39324394 return ret;
39334395 }
3934
-EXPORT_SYMBOL(__rdma_accept);
4396
+EXPORT_SYMBOL(rdma_accept);
4397
+
4398
+int rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
4399
+ struct rdma_ucm_ece *ece)
4400
+{
4401
+ struct rdma_id_private *id_priv =
4402
+ container_of(id, struct rdma_id_private, id);
4403
+
4404
+ id_priv->ece.vendor_id = ece->vendor_id;
4405
+ id_priv->ece.attr_mod = ece->attr_mod;
4406
+
4407
+ return rdma_accept(id, conn_param);
4408
+}
4409
+EXPORT_SYMBOL(rdma_accept_ece);
4410
+
4411
+void rdma_lock_handler(struct rdma_cm_id *id)
4412
+{
4413
+ struct rdma_id_private *id_priv =
4414
+ container_of(id, struct rdma_id_private, id);
4415
+
4416
+ mutex_lock(&id_priv->handler_mutex);
4417
+}
4418
+EXPORT_SYMBOL(rdma_lock_handler);
4419
+
4420
+void rdma_unlock_handler(struct rdma_cm_id *id)
4421
+{
4422
+ struct rdma_id_private *id_priv =
4423
+ container_of(id, struct rdma_id_private, id);
4424
+
4425
+ mutex_unlock(&id_priv->handler_mutex);
4426
+}
4427
+EXPORT_SYMBOL(rdma_unlock_handler);
39354428
39364429 int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
39374430 {
....@@ -3955,7 +4448,7 @@
39554448 EXPORT_SYMBOL(rdma_notify);
39564449
39574450 int rdma_reject(struct rdma_cm_id *id, const void *private_data,
3958
- u8 private_data_len)
4451
+ u8 private_data_len, u8 reason)
39594452 {
39604453 struct rdma_id_private *id_priv;
39614454 int ret;
....@@ -3965,13 +4458,14 @@
39654458 return -EINVAL;
39664459
39674460 if (rdma_cap_ib_cm(id->device, id->port_num)) {
3968
- if (id->qp_type == IB_QPT_UD)
4461
+ if (id->qp_type == IB_QPT_UD) {
39694462 ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0,
39704463 private_data, private_data_len);
3971
- else
3972
- ret = ib_send_cm_rej(id_priv->cm_id.ib,
3973
- IB_CM_REJ_CONSUMER_DEFINED, NULL,
3974
- 0, private_data, private_data_len);
4464
+ } else {
4465
+ trace_cm_send_rej(id_priv);
4466
+ ret = ib_send_cm_rej(id_priv->cm_id.ib, reason, NULL, 0,
4467
+ private_data, private_data_len);
4468
+ }
39754469 } else if (rdma_cap_iw_cm(id->device, id->port_num)) {
39764470 ret = iw_cm_reject(id_priv->cm_id.iw,
39774471 private_data, private_data_len);
....@@ -3996,8 +4490,13 @@
39964490 if (ret)
39974491 goto out;
39984492 /* Initiate or respond to a disconnect. */
3999
- if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
4000
- ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
4493
+ trace_cm_disconnect(id_priv);
4494
+ if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) {
4495
+ if (!ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0))
4496
+ trace_cm_sent_drep(id_priv);
4497
+ } else {
4498
+ trace_cm_sent_dreq(id_priv);
4499
+ }
40014500 } else if (rdma_cap_iw_cm(id->device, id->port_num)) {
40024501 ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
40034502 } else
....@@ -4008,60 +4507,68 @@
40084507 }
40094508 EXPORT_SYMBOL(rdma_disconnect);
40104509
4011
-static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
4510
+static void cma_make_mc_event(int status, struct rdma_id_private *id_priv,
4511
+ struct ib_sa_multicast *multicast,
4512
+ struct rdma_cm_event *event,
4513
+ struct cma_multicast *mc)
40124514 {
4013
- struct rdma_id_private *id_priv;
4014
- struct cma_multicast *mc = multicast->context;
4015
- struct rdma_cm_event event = {};
4016
- int ret = 0;
4017
-
4018
- id_priv = mc->id_priv;
4019
- mutex_lock(&id_priv->handler_mutex);
4020
- if (id_priv->state != RDMA_CM_ADDR_BOUND &&
4021
- id_priv->state != RDMA_CM_ADDR_RESOLVED)
4022
- goto out;
4515
+ struct rdma_dev_addr *dev_addr;
4516
+ enum ib_gid_type gid_type;
4517
+ struct net_device *ndev;
40234518
40244519 if (!status)
40254520 status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
40264521 else
40274522 pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n",
40284523 status);
4029
- event.status = status;
4030
- event.param.ud.private_data = mc->context;
4031
- if (!status) {
4032
- struct rdma_dev_addr *dev_addr =
4033
- &id_priv->id.route.addr.dev_addr;
4034
- struct net_device *ndev =
4035
- dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
4036
- enum ib_gid_type gid_type =
4037
- id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
4038
- rdma_start_port(id_priv->cma_dev->device)];
40394524
4040
- event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
4041
- ret = ib_init_ah_from_mcmember(id_priv->id.device,
4042
- id_priv->id.port_num,
4043
- &multicast->rec,
4044
- ndev, gid_type,
4045
- &event.param.ud.ah_attr);
4046
- if (ret)
4047
- event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
4048
-
4049
- event.param.ud.qp_num = 0xFFFFFF;
4050
- event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
4051
- if (ndev)
4052
- dev_put(ndev);
4053
- } else
4054
- event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
4055
-
4056
- ret = id_priv->id.event_handler(&id_priv->id, &event);
4057
-
4058
- rdma_destroy_ah_attr(&event.param.ud.ah_attr);
4059
- if (ret) {
4060
- cma_exch(id_priv, RDMA_CM_DESTROYING);
4061
- mutex_unlock(&id_priv->handler_mutex);
4062
- rdma_destroy_id(&id_priv->id);
4063
- return 0;
4525
+ event->status = status;
4526
+ event->param.ud.private_data = mc->context;
4527
+ if (status) {
4528
+ event->event = RDMA_CM_EVENT_MULTICAST_ERROR;
4529
+ return;
40644530 }
4531
+
4532
+ dev_addr = &id_priv->id.route.addr.dev_addr;
4533
+ ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
4534
+ gid_type =
4535
+ id_priv->cma_dev
4536
+ ->default_gid_type[id_priv->id.port_num -
4537
+ rdma_start_port(
4538
+ id_priv->cma_dev->device)];
4539
+
4540
+ event->event = RDMA_CM_EVENT_MULTICAST_JOIN;
4541
+ if (ib_init_ah_from_mcmember(id_priv->id.device, id_priv->id.port_num,
4542
+ &multicast->rec, ndev, gid_type,
4543
+ &event->param.ud.ah_attr)) {
4544
+ event->event = RDMA_CM_EVENT_MULTICAST_ERROR;
4545
+ goto out;
4546
+ }
4547
+
4548
+ event->param.ud.qp_num = 0xFFFFFF;
4549
+ event->param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
4550
+
4551
+out:
4552
+ if (ndev)
4553
+ dev_put(ndev);
4554
+}
4555
+
4556
+static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
4557
+{
4558
+ struct cma_multicast *mc = multicast->context;
4559
+ struct rdma_id_private *id_priv = mc->id_priv;
4560
+ struct rdma_cm_event event = {};
4561
+ int ret = 0;
4562
+
4563
+ mutex_lock(&id_priv->handler_mutex);
4564
+ if (READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL ||
4565
+ READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING)
4566
+ goto out;
4567
+
4568
+ cma_make_mc_event(status, id_priv, multicast, &event, mc);
4569
+ ret = cma_cm_event_handler(id_priv, &event);
4570
+ rdma_destroy_ah_attr(&event.param.ud.ah_attr);
4571
+ WARN_ON(ret);
40654572
40664573 out:
40674574 mutex_unlock(&id_priv->handler_mutex);
....@@ -4126,9 +4633,10 @@
41264633 (!ib_sa_sendonly_fullmem_support(&sa_client,
41274634 id_priv->id.device,
41284635 id_priv->id.port_num))) {
4129
- pr_warn("RDMA CM: %s port %u Unable to multicast join\n"
4130
- "RDMA CM: SM doesn't support Send Only Full Member option\n",
4131
- id_priv->id.device->name, id_priv->id.port_num);
4636
+ dev_warn(
4637
+ &id_priv->id.device->dev,
4638
+ "RDMA CM: port %u Unable to multicast join: SM doesn't support Send Only Full Member option\n",
4639
+ id_priv->id.port_num);
41324640 return -EOPNOTSUPP;
41334641 }
41344642
....@@ -4145,23 +4653,10 @@
41454653 IB_SA_MCMEMBER_REC_MTU |
41464654 IB_SA_MCMEMBER_REC_HOP_LIMIT;
41474655
4148
- mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
4149
- id_priv->id.port_num, &rec,
4150
- comp_mask, GFP_KERNEL,
4151
- cma_ib_mc_handler, mc);
4152
- return PTR_ERR_OR_ZERO(mc->multicast.ib);
4153
-}
4154
-
4155
-static void iboe_mcast_work_handler(struct work_struct *work)
4156
-{
4157
- struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work);
4158
- struct cma_multicast *mc = mw->mc;
4159
- struct ib_sa_multicast *m = mc->multicast.ib;
4160
-
4161
- mc->multicast.ib->context = mc;
4162
- cma_ib_mc_handler(0, m);
4163
- kref_put(&mc->mcref, release_mc);
4164
- kfree(mw);
4656
+ mc->sa_mc = ib_sa_join_multicast(&sa_client, id_priv->id.device,
4657
+ id_priv->id.port_num, &rec, comp_mask,
4658
+ GFP_KERNEL, cma_ib_mc_handler, mc);
4659
+ return PTR_ERR_OR_ZERO(mc->sa_mc);
41654660 }
41664661
41674662 static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
....@@ -4196,52 +4691,41 @@
41964691 static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
41974692 struct cma_multicast *mc)
41984693 {
4199
- struct iboe_mcast_work *work;
42004694 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
42014695 int err = 0;
42024696 struct sockaddr *addr = (struct sockaddr *)&mc->addr;
42034697 struct net_device *ndev = NULL;
4698
+ struct ib_sa_multicast ib;
42044699 enum ib_gid_type gid_type;
42054700 bool send_only;
42064701
42074702 send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN);
42084703
4209
- if (cma_zero_addr((struct sockaddr *)&mc->addr))
4704
+ if (cma_zero_addr(addr))
42104705 return -EINVAL;
4211
-
4212
- work = kzalloc(sizeof *work, GFP_KERNEL);
4213
- if (!work)
4214
- return -ENOMEM;
4215
-
4216
- mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL);
4217
- if (!mc->multicast.ib) {
4218
- err = -ENOMEM;
4219
- goto out1;
4220
- }
42214706
42224707 gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
42234708 rdma_start_port(id_priv->cma_dev->device)];
4224
- cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid, gid_type);
4709
+ cma_iboe_set_mgid(addr, &ib.rec.mgid, gid_type);
42254710
4226
- mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff);
4711
+ ib.rec.pkey = cpu_to_be16(0xffff);
42274712 if (id_priv->id.ps == RDMA_PS_UDP)
4228
- mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
4713
+ ib.rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
42294714
42304715 if (dev_addr->bound_dev_if)
42314716 ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
4232
- if (!ndev) {
4233
- err = -ENODEV;
4234
- goto out2;
4235
- }
4236
- mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
4237
- mc->multicast.ib->rec.hop_limit = 1;
4238
- mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
4717
+ if (!ndev)
4718
+ return -ENODEV;
4719
+
4720
+ ib.rec.rate = iboe_get_rate(ndev);
4721
+ ib.rec.hop_limit = 1;
4722
+ ib.rec.mtu = iboe_get_mtu(ndev->mtu);
42394723
42404724 if (addr->sa_family == AF_INET) {
42414725 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
4242
- mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
4726
+ ib.rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
42434727 if (!send_only) {
4244
- err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
4728
+ err = cma_igmp_send(ndev, &ib.rec.mgid,
42454729 true);
42464730 }
42474731 }
....@@ -4250,32 +4734,22 @@
42504734 err = -ENOTSUPP;
42514735 }
42524736 dev_put(ndev);
4253
- if (err || !mc->multicast.ib->rec.mtu) {
4254
- if (!err)
4255
- err = -EINVAL;
4256
- goto out2;
4257
- }
4737
+ if (err || !ib.rec.mtu)
4738
+ return err ?: -EINVAL;
4739
+
42584740 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
4259
- &mc->multicast.ib->rec.port_gid);
4260
- work->id = id_priv;
4261
- work->mc = mc;
4262
- INIT_WORK(&work->work, iboe_mcast_work_handler);
4263
- kref_get(&mc->mcref);
4264
- queue_work(cma_wq, &work->work);
4265
-
4741
+ &ib.rec.port_gid);
4742
+ INIT_WORK(&mc->iboe_join.work, cma_iboe_join_work_handler);
4743
+ cma_make_mc_event(0, id_priv, &ib, &mc->iboe_join.event, mc);
4744
+ queue_work(cma_wq, &mc->iboe_join.work);
42664745 return 0;
4267
-
4268
-out2:
4269
- kfree(mc->multicast.ib);
4270
-out1:
4271
- kfree(work);
4272
- return err;
42734746 }
42744747
42754748 int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
42764749 u8 join_state, void *context)
42774750 {
4278
- struct rdma_id_private *id_priv;
4751
+ struct rdma_id_private *id_priv =
4752
+ container_of(id, struct rdma_id_private, id);
42794753 struct cma_multicast *mc;
42804754 int ret;
42814755
....@@ -4283,15 +4757,12 @@
42834757 if (WARN_ON(id->qp))
42844758 return -EINVAL;
42854759
4286
- if (!id->device)
4760
+ /* ULP is calling this wrong. */
4761
+ if (!id->device || (READ_ONCE(id_priv->state) != RDMA_CM_ADDR_BOUND &&
4762
+ READ_ONCE(id_priv->state) != RDMA_CM_ADDR_RESOLVED))
42874763 return -EINVAL;
42884764
4289
- id_priv = container_of(id, struct rdma_id_private, id);
4290
- if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) &&
4291
- !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED))
4292
- return -EINVAL;
4293
-
4294
- mc = kmalloc(sizeof *mc, GFP_KERNEL);
4765
+ mc = kzalloc(sizeof(*mc), GFP_KERNEL);
42954766 if (!mc)
42964767 return -ENOMEM;
42974768
....@@ -4301,7 +4772,6 @@
43014772 mc->join_state = join_state;
43024773
43034774 if (rdma_protocol_roce(id->device, id->port_num)) {
4304
- kref_init(&mc->mcref);
43054775 ret = cma_iboe_join_multicast(id_priv, mc);
43064776 if (ret)
43074777 goto out_err;
....@@ -4349,7 +4819,7 @@
43494819 static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv)
43504820 {
43514821 struct rdma_dev_addr *dev_addr;
4352
- struct cma_ndev_work *work;
4822
+ struct cma_work *work;
43534823
43544824 dev_addr = &id_priv->id.route.addr.dev_addr;
43554825
....@@ -4362,10 +4832,10 @@
43624832 if (!work)
43634833 return -ENOMEM;
43644834
4365
- INIT_WORK(&work->work, cma_ndev_work_handler);
4835
+ INIT_WORK(&work->work, cma_work_handler);
43664836 work->id = id_priv;
43674837 work->event.event = RDMA_CM_EVENT_ADDR_CHANGE;
4368
- atomic_inc(&id_priv->refcount);
4838
+ cma_id_get(id_priv);
43694839 queue_work(cma_wq, &work->work);
43704840 }
43714841
....@@ -4403,31 +4873,99 @@
44034873 .notifier_call = cma_netdev_callback
44044874 };
44054875
4406
-static void cma_add_one(struct ib_device *device)
4876
+static void cma_send_device_removal_put(struct rdma_id_private *id_priv)
44074877 {
4878
+ struct rdma_cm_event event = { .event = RDMA_CM_EVENT_DEVICE_REMOVAL };
4879
+ enum rdma_cm_state state;
4880
+ unsigned long flags;
4881
+
4882
+ mutex_lock(&id_priv->handler_mutex);
4883
+ /* Record that we want to remove the device */
4884
+ spin_lock_irqsave(&id_priv->lock, flags);
4885
+ state = id_priv->state;
4886
+ if (state == RDMA_CM_DESTROYING || state == RDMA_CM_DEVICE_REMOVAL) {
4887
+ spin_unlock_irqrestore(&id_priv->lock, flags);
4888
+ mutex_unlock(&id_priv->handler_mutex);
4889
+ cma_id_put(id_priv);
4890
+ return;
4891
+ }
4892
+ id_priv->state = RDMA_CM_DEVICE_REMOVAL;
4893
+ spin_unlock_irqrestore(&id_priv->lock, flags);
4894
+
4895
+ if (cma_cm_event_handler(id_priv, &event)) {
4896
+ /*
4897
+ * At this point the ULP promises it won't call
4898
+ * rdma_destroy_id() concurrently
4899
+ */
4900
+ cma_id_put(id_priv);
4901
+ mutex_unlock(&id_priv->handler_mutex);
4902
+ trace_cm_id_destroy(id_priv);
4903
+ _destroy_id(id_priv, state);
4904
+ return;
4905
+ }
4906
+ mutex_unlock(&id_priv->handler_mutex);
4907
+
4908
+ /*
4909
+ * If this races with destroy then the thread that first assigns state
4910
+ * to a destroying does the cancel.
4911
+ */
4912
+ cma_cancel_operation(id_priv, state);
4913
+ cma_id_put(id_priv);
4914
+}
4915
+
4916
+static void cma_process_remove(struct cma_device *cma_dev)
4917
+{
4918
+ mutex_lock(&lock);
4919
+ while (!list_empty(&cma_dev->id_list)) {
4920
+ struct rdma_id_private *id_priv = list_first_entry(
4921
+ &cma_dev->id_list, struct rdma_id_private, list);
4922
+
4923
+ list_del(&id_priv->listen_list);
4924
+ list_del_init(&id_priv->list);
4925
+ cma_id_get(id_priv);
4926
+ mutex_unlock(&lock);
4927
+
4928
+ cma_send_device_removal_put(id_priv);
4929
+
4930
+ mutex_lock(&lock);
4931
+ }
4932
+ mutex_unlock(&lock);
4933
+
4934
+ cma_dev_put(cma_dev);
4935
+ wait_for_completion(&cma_dev->comp);
4936
+}
4937
+
4938
+static int cma_add_one(struct ib_device *device)
4939
+{
4940
+ struct rdma_id_private *to_destroy;
44084941 struct cma_device *cma_dev;
44094942 struct rdma_id_private *id_priv;
44104943 unsigned int i;
44114944 unsigned long supported_gids = 0;
4945
+ int ret;
44124946
4413
- cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
4947
+ cma_dev = kmalloc(sizeof(*cma_dev), GFP_KERNEL);
44144948 if (!cma_dev)
4415
- return;
4949
+ return -ENOMEM;
44164950
44174951 cma_dev->device = device;
44184952 cma_dev->default_gid_type = kcalloc(device->phys_port_cnt,
44194953 sizeof(*cma_dev->default_gid_type),
44204954 GFP_KERNEL);
4421
- if (!cma_dev->default_gid_type)
4955
+ if (!cma_dev->default_gid_type) {
4956
+ ret = -ENOMEM;
44224957 goto free_cma_dev;
4958
+ }
44234959
44244960 cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt,
44254961 sizeof(*cma_dev->default_roce_tos),
44264962 GFP_KERNEL);
4427
- if (!cma_dev->default_roce_tos)
4963
+ if (!cma_dev->default_roce_tos) {
4964
+ ret = -ENOMEM;
44284965 goto free_gid_type;
4966
+ }
44294967
4430
- for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
4968
+ rdma_for_each_port (device, i) {
44314969 supported_gids = roce_gid_type_mask_support(device, i);
44324970 WARN_ON(!supported_gids);
44334971 if (supported_gids & (1 << CMA_PREFERRED_ROCE_GID_TYPE))
....@@ -4440,86 +4978,42 @@
44404978 }
44414979
44424980 init_completion(&cma_dev->comp);
4443
- atomic_set(&cma_dev->refcount, 1);
4981
+ refcount_set(&cma_dev->refcount, 1);
44444982 INIT_LIST_HEAD(&cma_dev->id_list);
44454983 ib_set_client_data(device, &cma_client, cma_dev);
44464984
44474985 mutex_lock(&lock);
44484986 list_add_tail(&cma_dev->list, &dev_list);
4449
- list_for_each_entry(id_priv, &listen_any_list, list)
4450
- cma_listen_on_dev(id_priv, cma_dev);
4987
+ list_for_each_entry(id_priv, &listen_any_list, list) {
4988
+ ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy);
4989
+ if (ret)
4990
+ goto free_listen;
4991
+ }
44514992 mutex_unlock(&lock);
44524993
4453
- return;
4994
+ trace_cm_add_one(device);
4995
+ return 0;
44544996
4997
+free_listen:
4998
+ list_del(&cma_dev->list);
4999
+ mutex_unlock(&lock);
5000
+
5001
+ /* cma_process_remove() will delete to_destroy */
5002
+ cma_process_remove(cma_dev);
5003
+ kfree(cma_dev->default_roce_tos);
44555004 free_gid_type:
44565005 kfree(cma_dev->default_gid_type);
44575006
44585007 free_cma_dev:
44595008 kfree(cma_dev);
4460
-
4461
- return;
4462
-}
4463
-
4464
-static int cma_remove_id_dev(struct rdma_id_private *id_priv)
4465
-{
4466
- struct rdma_cm_event event = {};
4467
- enum rdma_cm_state state;
4468
- int ret = 0;
4469
-
4470
- /* Record that we want to remove the device */
4471
- state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL);
4472
- if (state == RDMA_CM_DESTROYING)
4473
- return 0;
4474
-
4475
- cma_cancel_operation(id_priv, state);
4476
- mutex_lock(&id_priv->handler_mutex);
4477
-
4478
- /* Check for destruction from another callback. */
4479
- if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL))
4480
- goto out;
4481
-
4482
- event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
4483
- ret = id_priv->id.event_handler(&id_priv->id, &event);
4484
-out:
4485
- mutex_unlock(&id_priv->handler_mutex);
44865009 return ret;
4487
-}
4488
-
4489
-static void cma_process_remove(struct cma_device *cma_dev)
4490
-{
4491
- struct rdma_id_private *id_priv;
4492
- int ret;
4493
-
4494
- mutex_lock(&lock);
4495
- while (!list_empty(&cma_dev->id_list)) {
4496
- id_priv = list_entry(cma_dev->id_list.next,
4497
- struct rdma_id_private, list);
4498
-
4499
- list_del(&id_priv->listen_list);
4500
- list_del_init(&id_priv->list);
4501
- atomic_inc(&id_priv->refcount);
4502
- mutex_unlock(&lock);
4503
-
4504
- ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv);
4505
- cma_deref_id(id_priv);
4506
- if (ret)
4507
- rdma_destroy_id(&id_priv->id);
4508
-
4509
- mutex_lock(&lock);
4510
- }
4511
- mutex_unlock(&lock);
4512
-
4513
- cma_deref_dev(cma_dev);
4514
- wait_for_completion(&cma_dev->comp);
45155010 }
45165011
45175012 static void cma_remove_one(struct ib_device *device, void *client_data)
45185013 {
45195014 struct cma_device *cma_dev = client_data;
45205015
4521
- if (!cma_dev)
4522
- return;
5016
+ trace_cm_remove_one(device);
45235017
45245018 mutex_lock(&lock);
45255019 list_del(&cma_dev->list);
....@@ -4531,93 +5025,14 @@
45315025 kfree(cma_dev);
45325026 }
45335027
4534
-static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb)
4535
-{
4536
- struct nlmsghdr *nlh;
4537
- struct rdma_cm_id_stats *id_stats;
4538
- struct rdma_id_private *id_priv;
4539
- struct rdma_cm_id *id = NULL;
4540
- struct cma_device *cma_dev;
4541
- int i_dev = 0, i_id = 0;
4542
-
4543
- /*
4544
- * We export all of the IDs as a sequence of messages. Each
4545
- * ID gets its own netlink message.
4546
- */
4547
- mutex_lock(&lock);
4548
-
4549
- list_for_each_entry(cma_dev, &dev_list, list) {
4550
- if (i_dev < cb->args[0]) {
4551
- i_dev++;
4552
- continue;
4553
- }
4554
-
4555
- i_id = 0;
4556
- list_for_each_entry(id_priv, &cma_dev->id_list, list) {
4557
- if (i_id < cb->args[1]) {
4558
- i_id++;
4559
- continue;
4560
- }
4561
-
4562
- id_stats = ibnl_put_msg(skb, &nlh, cb->nlh->nlmsg_seq,
4563
- sizeof *id_stats, RDMA_NL_RDMA_CM,
4564
- RDMA_NL_RDMA_CM_ID_STATS,
4565
- NLM_F_MULTI);
4566
- if (!id_stats)
4567
- goto out;
4568
-
4569
- memset(id_stats, 0, sizeof *id_stats);
4570
- id = &id_priv->id;
4571
- id_stats->node_type = id->route.addr.dev_addr.dev_type;
4572
- id_stats->port_num = id->port_num;
4573
- id_stats->bound_dev_if =
4574
- id->route.addr.dev_addr.bound_dev_if;
4575
-
4576
- if (ibnl_put_attr(skb, nlh,
4577
- rdma_addr_size(cma_src_addr(id_priv)),
4578
- cma_src_addr(id_priv),
4579
- RDMA_NL_RDMA_CM_ATTR_SRC_ADDR))
4580
- goto out;
4581
- if (ibnl_put_attr(skb, nlh,
4582
- rdma_addr_size(cma_dst_addr(id_priv)),
4583
- cma_dst_addr(id_priv),
4584
- RDMA_NL_RDMA_CM_ATTR_DST_ADDR))
4585
- goto out;
4586
-
4587
- id_stats->pid = task_pid_vnr(id_priv->res.task);
4588
- id_stats->port_space = id->ps;
4589
- id_stats->cm_state = id_priv->state;
4590
- id_stats->qp_num = id_priv->qp_num;
4591
- id_stats->qp_type = id->qp_type;
4592
-
4593
- i_id++;
4594
- nlmsg_end(skb, nlh);
4595
- }
4596
-
4597
- cb->args[1] = 0;
4598
- i_dev++;
4599
- }
4600
-
4601
-out:
4602
- mutex_unlock(&lock);
4603
- cb->args[0] = i_dev;
4604
- cb->args[1] = i_id;
4605
-
4606
- return skb->len;
4607
-}
4608
-
4609
-static const struct rdma_nl_cbs cma_cb_table[RDMA_NL_RDMA_CM_NUM_OPS] = {
4610
- [RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats},
4611
-};
4612
-
46135028 static int cma_init_net(struct net *net)
46145029 {
46155030 struct cma_pernet *pernet = cma_pernet(net);
46165031
4617
- idr_init(&pernet->tcp_ps);
4618
- idr_init(&pernet->udp_ps);
4619
- idr_init(&pernet->ipoib_ps);
4620
- idr_init(&pernet->ib_ps);
5032
+ xa_init(&pernet->tcp_ps);
5033
+ xa_init(&pernet->udp_ps);
5034
+ xa_init(&pernet->ipoib_ps);
5035
+ xa_init(&pernet->ib_ps);
46215036
46225037 return 0;
46235038 }
....@@ -4626,10 +5041,10 @@
46265041 {
46275042 struct cma_pernet *pernet = cma_pernet(net);
46285043
4629
- idr_destroy(&pernet->tcp_ps);
4630
- idr_destroy(&pernet->udp_ps);
4631
- idr_destroy(&pernet->ipoib_ps);
4632
- idr_destroy(&pernet->ib_ps);
5044
+ WARN_ON(!xa_empty(&pernet->tcp_ps));
5045
+ WARN_ON(!xa_empty(&pernet->udp_ps));
5046
+ WARN_ON(!xa_empty(&pernet->ipoib_ps));
5047
+ WARN_ON(!xa_empty(&pernet->ib_ps));
46335048 }
46345049
46355050 static struct pernet_operations cma_pernet_operations = {
....@@ -4671,11 +5086,14 @@
46715086 if (ret)
46725087 goto err;
46735088
4674
- rdma_nl_register(RDMA_NL_RDMA_CM, cma_cb_table);
4675
- cma_configfs_init();
5089
+ ret = cma_configfs_init();
5090
+ if (ret)
5091
+ goto err_ib;
46765092
46775093 return 0;
46785094
5095
+err_ib:
5096
+ ib_unregister_client(&cma_client);
46795097 err:
46805098 unregister_netdevice_notifier(&cma_nb);
46815099 ib_sa_unregister_client(&sa_client);
....@@ -4688,15 +5106,12 @@
46885106 static void __exit cma_cleanup(void)
46895107 {
46905108 cma_configfs_exit();
4691
- rdma_nl_unregister(RDMA_NL_RDMA_CM);
46925109 ib_unregister_client(&cma_client);
46935110 unregister_netdevice_notifier(&cma_nb);
46945111 ib_sa_unregister_client(&sa_client);
46955112 unregister_pernet_subsys(&cma_pernet_operations);
46965113 destroy_workqueue(cma_wq);
46975114 }
4698
-
4699
-MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_RDMA_CM, 1);
47005115
47015116 module_init(cma_init);
47025117 module_exit(cma_cleanup);