forked from ~ljy/RK356X_SDK_RELEASE

hc
2024-01-04 1543e317f1da31b75942316931e8f491a8920811
kernel/drivers/infiniband/core/cma.c
....@@ -1,36 +1,9 @@
1
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
12 /*
23 * Copyright (c) 2005 Voltaire Inc. All rights reserved.
34 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
4
- * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
5
+ * Copyright (c) 1999-2019, Mellanox Technologies, Inc. All rights reserved.
56 * Copyright (c) 2005-2006 Intel Corporation. All rights reserved.
6
- *
7
- * This software is available to you under a choice of one of two
8
- * licenses. You may choose to be licensed under the terms of the GNU
9
- * General Public License (GPL) Version 2, available from the file
10
- * COPYING in the main directory of this source tree, or the
11
- * OpenIB.org BSD license below:
12
- *
13
- * Redistribution and use in source and binary forms, with or
14
- * without modification, are permitted provided that the following
15
- * conditions are met:
16
- *
17
- * - Redistributions of source code must retain the above
18
- * copyright notice, this list of conditions and the following
19
- * disclaimer.
20
- *
21
- * - Redistributions in binary form must reproduce the above
22
- * copyright notice, this list of conditions and the following
23
- * disclaimer in the documentation and/or other materials
24
- * provided with the distribution.
25
- *
26
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33
- * SOFTWARE.
347 */
358
369 #include <linux/completion.h>
....@@ -39,7 +12,7 @@
3912 #include <linux/mutex.h>
4013 #include <linux/random.h>
4114 #include <linux/igmp.h>
42
-#include <linux/idr.h>
15
+#include <linux/xarray.h>
4316 #include <linux/inetdevice.h>
4417 #include <linux/slab.h>
4518 #include <linux/module.h>
....@@ -63,6 +36,7 @@
6336
6437 #include "core_priv.h"
6538 #include "cma_priv.h"
39
+#include "cma_trace.h"
6640
6741 MODULE_AUTHOR("Sean Hefty");
6842 MODULE_DESCRIPTION("Generic RDMA CM Agent");
....@@ -94,6 +68,9 @@
9468 [RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit",
9569 };
9670
71
+static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
72
+ enum ib_gid_type gid_type);
73
+
9774 const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event)
9875 {
9976 size_t index = event;
....@@ -117,7 +94,13 @@
11794 }
11895 EXPORT_SYMBOL(rdma_reject_msg);
11996
120
-bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason)
97
+/**
98
+ * rdma_is_consumer_reject - return true if the consumer rejected the connect
99
+ * request.
100
+ * @id: Communication identifier that received the REJECT event.
101
+ * @reason: Value returned in the REJECT event status field.
102
+ */
103
+static bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason)
121104 {
122105 if (rdma_ib_or_roce(id->device, id->port_num))
123106 return reason == IB_CM_REJ_CONSUMER_DEFINED;
....@@ -128,7 +111,6 @@
128111 WARN_ON_ONCE(1);
129112 return false;
130113 }
131
-EXPORT_SYMBOL(rdma_is_consumer_reject);
132114
133115 const void *rdma_consumer_reject_data(struct rdma_cm_id *id,
134116 struct rdma_cm_event *ev, u8 *data_len)
....@@ -174,7 +156,7 @@
174156 }
175157 EXPORT_SYMBOL(rdma_res_to_id);
176158
177
-static void cma_add_one(struct ib_device *device);
159
+static int cma_add_one(struct ib_device *device);
178160 static void cma_remove_one(struct ib_device *device, void *client_data);
179161
180162 static struct ib_client cma_client = {
....@@ -191,10 +173,10 @@
191173 static unsigned int cma_pernet_id;
192174
193175 struct cma_pernet {
194
- struct idr tcp_ps;
195
- struct idr udp_ps;
196
- struct idr ipoib_ps;
197
- struct idr ib_ps;
176
+ struct xarray tcp_ps;
177
+ struct xarray udp_ps;
178
+ struct xarray ipoib_ps;
179
+ struct xarray ib_ps;
198180 };
199181
200182 static struct cma_pernet *cma_pernet(struct net *net)
....@@ -202,7 +184,8 @@
202184 return net_generic(net, cma_pernet_id);
203185 }
204186
205
-static struct idr *cma_pernet_idr(struct net *net, enum rdma_ucm_port_space ps)
187
+static
188
+struct xarray *cma_pernet_xa(struct net *net, enum rdma_ucm_port_space ps)
206189 {
207190 struct cma_pernet *pernet = cma_pernet(net);
208191
....@@ -224,7 +207,7 @@
224207 struct list_head list;
225208 struct ib_device *device;
226209 struct completion comp;
227
- atomic_t refcount;
210
+ refcount_t refcount;
228211 struct list_head id_list;
229212 enum ib_gid_type *default_gid_type;
230213 u8 *default_roce_tos;
....@@ -247,34 +230,40 @@
247230 static int cma_ps_alloc(struct net *net, enum rdma_ucm_port_space ps,
248231 struct rdma_bind_list *bind_list, int snum)
249232 {
250
- struct idr *idr = cma_pernet_idr(net, ps);
233
+ struct xarray *xa = cma_pernet_xa(net, ps);
251234
252
- return idr_alloc(idr, bind_list, snum, snum + 1, GFP_KERNEL);
235
+ return xa_insert(xa, snum, bind_list, GFP_KERNEL);
253236 }
254237
255238 static struct rdma_bind_list *cma_ps_find(struct net *net,
256239 enum rdma_ucm_port_space ps, int snum)
257240 {
258
- struct idr *idr = cma_pernet_idr(net, ps);
241
+ struct xarray *xa = cma_pernet_xa(net, ps);
259242
260
- return idr_find(idr, snum);
243
+ return xa_load(xa, snum);
261244 }
262245
263246 static void cma_ps_remove(struct net *net, enum rdma_ucm_port_space ps,
264247 int snum)
265248 {
266
- struct idr *idr = cma_pernet_idr(net, ps);
249
+ struct xarray *xa = cma_pernet_xa(net, ps);
267250
268
- idr_remove(idr, snum);
251
+ xa_erase(xa, snum);
269252 }
270253
271254 enum {
272255 CMA_OPTION_AFONLY,
273256 };
274257
275
-void cma_ref_dev(struct cma_device *cma_dev)
258
+void cma_dev_get(struct cma_device *cma_dev)
276259 {
277
- atomic_inc(&cma_dev->refcount);
260
+ refcount_inc(&cma_dev->refcount);
261
+}
262
+
263
+void cma_dev_put(struct cma_device *cma_dev)
264
+{
265
+ if (refcount_dec_and_test(&cma_dev->refcount))
266
+ complete(&cma_dev->comp);
278267 }
279268
280269 struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
....@@ -292,7 +281,7 @@
292281 }
293282
294283 if (found_cma_dev)
295
- cma_ref_dev(found_cma_dev);
284
+ cma_dev_get(found_cma_dev);
296285 mutex_unlock(&lock);
297286 return found_cma_dev;
298287 }
....@@ -314,6 +303,10 @@
314303
315304 if (!rdma_is_port_valid(cma_dev->device, port))
316305 return -EINVAL;
306
+
307
+ if (default_gid_type == IB_GID_TYPE_IB &&
308
+ rdma_protocol_roce_eth_encap(cma_dev->device, port))
309
+ default_gid_type = IB_GID_TYPE_ROCE;
317310
318311 supported_gids = roce_gid_type_mask_support(cma_dev->device, port);
319312
....@@ -360,12 +353,15 @@
360353 struct cma_multicast {
361354 struct rdma_id_private *id_priv;
362355 union {
363
- struct ib_sa_multicast *ib;
364
- } multicast;
356
+ struct ib_sa_multicast *sa_mc;
357
+ struct {
358
+ struct work_struct work;
359
+ struct rdma_cm_event event;
360
+ } iboe_join;
361
+ };
365362 struct list_head list;
366363 void *context;
367364 struct sockaddr_storage addr;
368
- struct kref mcref;
369365 u8 join_state;
370366 };
371367
....@@ -375,18 +371,6 @@
375371 enum rdma_cm_state old_state;
376372 enum rdma_cm_state new_state;
377373 struct rdma_cm_event event;
378
-};
379
-
380
-struct cma_ndev_work {
381
- struct work_struct work;
382
- struct rdma_id_private *id;
383
- struct rdma_cm_event event;
384
-};
385
-
386
-struct iboe_mcast_work {
387
- struct work_struct work;
388
- struct rdma_id_private *id;
389
- struct cma_multicast *mc;
390374 };
391375
392376 union cma_ip_addr {
....@@ -418,41 +402,26 @@
418402 u16 pkey;
419403 };
420404
421
-static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp)
422
-{
423
- unsigned long flags;
424
- int ret;
425
-
426
- spin_lock_irqsave(&id_priv->lock, flags);
427
- ret = (id_priv->state == comp);
428
- spin_unlock_irqrestore(&id_priv->lock, flags);
429
- return ret;
430
-}
431
-
432405 static int cma_comp_exch(struct rdma_id_private *id_priv,
433406 enum rdma_cm_state comp, enum rdma_cm_state exch)
434407 {
435408 unsigned long flags;
436409 int ret;
437410
411
+ /*
412
+ * The FSM uses a funny double locking where state is protected by both
413
+ * the handler_mutex and the spinlock. State is not allowed to change
414
+ * to/from a handler_mutex protected value without also holding
415
+ * handler_mutex.
416
+ */
417
+ if (comp == RDMA_CM_CONNECT || exch == RDMA_CM_CONNECT)
418
+ lockdep_assert_held(&id_priv->handler_mutex);
419
+
438420 spin_lock_irqsave(&id_priv->lock, flags);
439421 if ((ret = (id_priv->state == comp)))
440422 id_priv->state = exch;
441423 spin_unlock_irqrestore(&id_priv->lock, flags);
442424 return ret;
443
-}
444
-
445
-static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv,
446
- enum rdma_cm_state exch)
447
-{
448
- unsigned long flags;
449
- enum rdma_cm_state old;
450
-
451
- spin_lock_irqsave(&id_priv->lock, flags);
452
- old = id_priv->state;
453
- id_priv->state = exch;
454
- spin_unlock_irqrestore(&id_priv->lock, flags);
455
- return old;
456425 }
457426
458427 static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr)
....@@ -488,13 +457,14 @@
488457 static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
489458 struct cma_device *cma_dev)
490459 {
491
- cma_ref_dev(cma_dev);
460
+ cma_dev_get(cma_dev);
492461 id_priv->cma_dev = cma_dev;
493462 id_priv->id.device = cma_dev->device;
494463 id_priv->id.route.addr.dev_addr.transport =
495464 rdma_node_get_transport(cma_dev->device->node_type);
496465 list_add_tail(&id_priv->list, &cma_dev->id_list);
497
- rdma_restrack_add(&id_priv->res);
466
+
467
+ trace_cm_id_attach(id_priv, cma_dev->device);
498468 }
499469
500470 static void cma_attach_to_dev(struct rdma_id_private *id_priv,
....@@ -506,26 +476,17 @@
506476 rdma_start_port(cma_dev->device)];
507477 }
508478
509
-void cma_deref_dev(struct cma_device *cma_dev)
510
-{
511
- if (atomic_dec_and_test(&cma_dev->refcount))
512
- complete(&cma_dev->comp);
513
-}
514
-
515
-static inline void release_mc(struct kref *kref)
516
-{
517
- struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref);
518
-
519
- kfree(mc->multicast.ib);
520
- kfree(mc);
521
-}
522
-
523479 static void cma_release_dev(struct rdma_id_private *id_priv)
524480 {
525481 mutex_lock(&lock);
526482 list_del(&id_priv->list);
527
- cma_deref_dev(id_priv->cma_dev);
483
+ cma_dev_put(id_priv->cma_dev);
528484 id_priv->cma_dev = NULL;
485
+ id_priv->id.device = NULL;
486
+ if (id_priv->id.route.addr.dev_addr.sgid_attr) {
487
+ rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr);
488
+ id_priv->id.route.addr.dev_addr.sgid_attr = NULL;
489
+ }
529490 mutex_unlock(&lock);
530491 }
531492
....@@ -544,21 +505,10 @@
544505 return id_priv->id.route.addr.src_addr.ss_family;
545506 }
546507
547
-static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)
508
+static int cma_set_default_qkey(struct rdma_id_private *id_priv)
548509 {
549510 struct ib_sa_mcmember_rec rec;
550511 int ret = 0;
551
-
552
- if (id_priv->qkey) {
553
- if (qkey && id_priv->qkey != qkey)
554
- return -EINVAL;
555
- return 0;
556
- }
557
-
558
- if (qkey) {
559
- id_priv->qkey = qkey;
560
- return 0;
561
- }
562512
563513 switch (id_priv->id.ps) {
564514 case RDMA_PS_UDP:
....@@ -577,6 +527,16 @@
577527 break;
578528 }
579529 return ret;
530
+}
531
+
532
+static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)
533
+{
534
+ if (!qkey ||
535
+ (id_priv->qkey && (id_priv->qkey != qkey)))
536
+ return -EINVAL;
537
+
538
+ id_priv->qkey = qkey;
539
+ return 0;
580540 }
581541
582542 static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr)
....@@ -612,6 +572,9 @@
612572 int dev_type = dev_addr->dev_type;
613573 struct net_device *ndev = NULL;
614574
575
+ if (!rdma_dev_access_netns(device, id_priv->id.route.addr.dev_addr.net))
576
+ return ERR_PTR(-ENODEV);
577
+
615578 if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port))
616579 return ERR_PTR(-ENODEV);
617580
....@@ -639,56 +602,148 @@
639602 id_priv->id.route.addr.dev_addr.sgid_attr = sgid_attr;
640603 }
641604
642
-static int cma_acquire_dev(struct rdma_id_private *id_priv,
643
- const struct rdma_id_private *listen_id_priv)
605
+/**
606
+ * cma_acquire_dev_by_src_ip - Acquire cma device, port, gid attribute
607
+ * based on source ip address.
608
+ * @id_priv: cm_id which should be bound to cma device
609
+ *
610
+ * cma_acquire_dev_by_src_ip() binds cm id to cma device, port and GID attribute
611
+ * based on source IP address. It returns 0 on success or error code otherwise.
612
+ * It is applicable to active and passive side cm_id.
613
+ */
614
+static int cma_acquire_dev_by_src_ip(struct rdma_id_private *id_priv)
644615 {
645616 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
646617 const struct ib_gid_attr *sgid_attr;
647
- struct cma_device *cma_dev;
648618 union ib_gid gid, iboe_gid, *gidp;
619
+ struct cma_device *cma_dev;
649620 enum ib_gid_type gid_type;
650621 int ret = -ENODEV;
651
- u8 port;
622
+ unsigned int port;
652623
653624 if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
654625 id_priv->id.ps == RDMA_PS_IPOIB)
655626 return -EINVAL;
656627
657
- mutex_lock(&lock);
658628 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
659629 &iboe_gid);
660630
661631 memcpy(&gid, dev_addr->src_dev_addr +
662
- rdma_addr_gid_offset(dev_addr), sizeof gid);
632
+ rdma_addr_gid_offset(dev_addr), sizeof(gid));
663633
664
- if (listen_id_priv) {
665
- cma_dev = listen_id_priv->cma_dev;
666
- port = listen_id_priv->id.port_num;
667
- gidp = rdma_protocol_roce(cma_dev->device, port) ?
668
- &iboe_gid : &gid;
669
- gid_type = listen_id_priv->gid_type;
670
- sgid_attr = cma_validate_port(cma_dev->device, port,
671
- gid_type, gidp, id_priv);
672
- if (!IS_ERR(sgid_attr)) {
673
- id_priv->id.port_num = port;
674
- cma_bind_sgid_attr(id_priv, sgid_attr);
675
- ret = 0;
676
- goto out;
677
- }
678
- }
679
-
634
+ mutex_lock(&lock);
680635 list_for_each_entry(cma_dev, &dev_list, list) {
681
- for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
682
- if (listen_id_priv &&
683
- listen_id_priv->cma_dev == cma_dev &&
684
- listen_id_priv->id.port_num == port)
685
- continue;
686
-
636
+ rdma_for_each_port (cma_dev->device, port) {
687637 gidp = rdma_protocol_roce(cma_dev->device, port) ?
688638 &iboe_gid : &gid;
689639 gid_type = cma_dev->default_gid_type[port - 1];
690640 sgid_attr = cma_validate_port(cma_dev->device, port,
691641 gid_type, gidp, id_priv);
642
+ if (!IS_ERR(sgid_attr)) {
643
+ id_priv->id.port_num = port;
644
+ cma_bind_sgid_attr(id_priv, sgid_attr);
645
+ cma_attach_to_dev(id_priv, cma_dev);
646
+ ret = 0;
647
+ goto out;
648
+ }
649
+ }
650
+ }
651
+out:
652
+ mutex_unlock(&lock);
653
+ return ret;
654
+}
655
+
656
+/**
657
+ * cma_ib_acquire_dev - Acquire cma device, port and SGID attribute
658
+ * @id_priv: cm id to bind to cma device
659
+ * @listen_id_priv: listener cm id to match against
660
+ * @req: Pointer to req structure containaining incoming
661
+ * request information
662
+ * cma_ib_acquire_dev() acquires cma device, port and SGID attribute when
663
+ * rdma device matches for listen_id and incoming request. It also verifies
664
+ * that a GID table entry is present for the source address.
665
+ * Returns 0 on success, or returns error code otherwise.
666
+ */
667
+static int cma_ib_acquire_dev(struct rdma_id_private *id_priv,
668
+ const struct rdma_id_private *listen_id_priv,
669
+ struct cma_req_info *req)
670
+{
671
+ struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
672
+ const struct ib_gid_attr *sgid_attr;
673
+ enum ib_gid_type gid_type;
674
+ union ib_gid gid;
675
+
676
+ if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
677
+ id_priv->id.ps == RDMA_PS_IPOIB)
678
+ return -EINVAL;
679
+
680
+ if (rdma_protocol_roce(req->device, req->port))
681
+ rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
682
+ &gid);
683
+ else
684
+ memcpy(&gid, dev_addr->src_dev_addr +
685
+ rdma_addr_gid_offset(dev_addr), sizeof(gid));
686
+
687
+ gid_type = listen_id_priv->cma_dev->default_gid_type[req->port - 1];
688
+ sgid_attr = cma_validate_port(req->device, req->port,
689
+ gid_type, &gid, id_priv);
690
+ if (IS_ERR(sgid_attr))
691
+ return PTR_ERR(sgid_attr);
692
+
693
+ id_priv->id.port_num = req->port;
694
+ cma_bind_sgid_attr(id_priv, sgid_attr);
695
+ /* Need to acquire lock to protect against reader
696
+ * of cma_dev->id_list such as cma_netdev_callback() and
697
+ * cma_process_remove().
698
+ */
699
+ mutex_lock(&lock);
700
+ cma_attach_to_dev(id_priv, listen_id_priv->cma_dev);
701
+ mutex_unlock(&lock);
702
+ rdma_restrack_add(&id_priv->res);
703
+ return 0;
704
+}
705
+
706
+static int cma_iw_acquire_dev(struct rdma_id_private *id_priv,
707
+ const struct rdma_id_private *listen_id_priv)
708
+{
709
+ struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
710
+ const struct ib_gid_attr *sgid_attr;
711
+ struct cma_device *cma_dev;
712
+ enum ib_gid_type gid_type;
713
+ int ret = -ENODEV;
714
+ unsigned int port;
715
+ union ib_gid gid;
716
+
717
+ if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
718
+ id_priv->id.ps == RDMA_PS_IPOIB)
719
+ return -EINVAL;
720
+
721
+ memcpy(&gid, dev_addr->src_dev_addr +
722
+ rdma_addr_gid_offset(dev_addr), sizeof(gid));
723
+
724
+ mutex_lock(&lock);
725
+
726
+ cma_dev = listen_id_priv->cma_dev;
727
+ port = listen_id_priv->id.port_num;
728
+ gid_type = listen_id_priv->gid_type;
729
+ sgid_attr = cma_validate_port(cma_dev->device, port,
730
+ gid_type, &gid, id_priv);
731
+ if (!IS_ERR(sgid_attr)) {
732
+ id_priv->id.port_num = port;
733
+ cma_bind_sgid_attr(id_priv, sgid_attr);
734
+ ret = 0;
735
+ goto out;
736
+ }
737
+
738
+ list_for_each_entry(cma_dev, &dev_list, list) {
739
+ rdma_for_each_port (cma_dev->device, port) {
740
+ if (listen_id_priv->cma_dev == cma_dev &&
741
+ listen_id_priv->id.port_num == port)
742
+ continue;
743
+
744
+ gid_type = cma_dev->default_gid_type[port - 1];
745
+ sgid_attr = cma_validate_port(cma_dev->device, port,
746
+ gid_type, &gid, id_priv);
692747 if (!IS_ERR(sgid_attr)) {
693748 id_priv->id.port_num = port;
694749 cma_bind_sgid_attr(id_priv, sgid_attr);
....@@ -699,8 +754,10 @@
699754 }
700755
701756 out:
702
- if (!ret)
757
+ if (!ret) {
703758 cma_attach_to_dev(id_priv, cma_dev);
759
+ rdma_restrack_add(&id_priv->res);
760
+ }
704761
705762 mutex_unlock(&lock);
706763 return ret;
....@@ -714,9 +771,10 @@
714771 struct cma_device *cma_dev, *cur_dev;
715772 struct sockaddr_ib *addr;
716773 union ib_gid gid, sgid, *dgid;
774
+ unsigned int p;
717775 u16 pkey, index;
718
- u8 p;
719776 enum ib_port_state port_state;
777
+ int ret;
720778 int i;
721779
722780 cma_dev = NULL;
....@@ -726,7 +784,7 @@
726784
727785 mutex_lock(&lock);
728786 list_for_each_entry(cur_dev, &dev_list, list) {
729
- for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
787
+ rdma_for_each_port (cur_dev->device, p) {
730788 if (!rdma_cap_af_ib(cur_dev->device, p))
731789 continue;
732790
....@@ -735,9 +793,14 @@
735793
736794 if (ib_get_cached_port_state(cur_dev->device, p, &port_state))
737795 continue;
738
- for (i = 0; !rdma_query_gid(cur_dev->device,
739
- p, i, &gid);
740
- i++) {
796
+
797
+ for (i = 0; i < cur_dev->device->port_data[p].immutable.gid_tbl_len;
798
+ ++i) {
799
+ ret = rdma_query_gid(cur_dev->device, p, i,
800
+ &gid);
801
+ if (ret)
802
+ continue;
803
+
741804 if (!memcmp(&gid, dgid, sizeof(gid))) {
742805 cma_dev = cur_dev;
743806 sgid = gid;
....@@ -761,6 +824,7 @@
761824
762825 found:
763826 cma_attach_to_dev(id_priv, cma_dev);
827
+ rdma_restrack_add(&id_priv->res);
764828 mutex_unlock(&lock);
765829 addr = (struct sockaddr_ib *)cma_src_addr(id_priv);
766830 memcpy(&addr->sib_addr, &sgid, sizeof(sgid));
....@@ -768,16 +832,21 @@
768832 return 0;
769833 }
770834
771
-static void cma_deref_id(struct rdma_id_private *id_priv)
835
+static void cma_id_get(struct rdma_id_private *id_priv)
772836 {
773
- if (atomic_dec_and_test(&id_priv->refcount))
837
+ refcount_inc(&id_priv->refcount);
838
+}
839
+
840
+static void cma_id_put(struct rdma_id_private *id_priv)
841
+{
842
+ if (refcount_dec_and_test(&id_priv->refcount))
774843 complete(&id_priv->comp);
775844 }
776845
777
-struct rdma_cm_id *__rdma_create_id(struct net *net,
778
- rdma_cm_event_handler event_handler,
779
- void *context, enum rdma_ucm_port_space ps,
780
- enum ib_qp_type qp_type, const char *caller)
846
+static struct rdma_id_private *
847
+__rdma_create_id(struct net *net, rdma_cm_event_handler event_handler,
848
+ void *context, enum rdma_ucm_port_space ps,
849
+ enum ib_qp_type qp_type, const struct rdma_id_private *parent)
781850 {
782851 struct rdma_id_private *id_priv;
783852
....@@ -785,22 +854,18 @@
785854 if (!id_priv)
786855 return ERR_PTR(-ENOMEM);
787856
788
- if (caller)
789
- id_priv->res.kern_name = caller;
790
- else
791
- rdma_restrack_set_task(&id_priv->res, current);
792
- id_priv->res.type = RDMA_RESTRACK_CM_ID;
793857 id_priv->state = RDMA_CM_IDLE;
794858 id_priv->id.context = context;
795859 id_priv->id.event_handler = event_handler;
796860 id_priv->id.ps = ps;
797861 id_priv->id.qp_type = qp_type;
798862 id_priv->tos_set = false;
863
+ id_priv->timeout_set = false;
799864 id_priv->gid_type = IB_GID_TYPE_IB;
800865 spin_lock_init(&id_priv->lock);
801866 mutex_init(&id_priv->qp_mutex);
802867 init_completion(&id_priv->comp);
803
- atomic_set(&id_priv->refcount, 1);
868
+ refcount_set(&id_priv->refcount, 1);
804869 mutex_init(&id_priv->handler_mutex);
805870 INIT_LIST_HEAD(&id_priv->listen_list);
806871 INIT_LIST_HEAD(&id_priv->mc_list);
....@@ -808,9 +873,45 @@
808873 id_priv->id.route.addr.dev_addr.net = get_net(net);
809874 id_priv->seq_num &= 0x00ffffff;
810875
811
- return &id_priv->id;
876
+ rdma_restrack_new(&id_priv->res, RDMA_RESTRACK_CM_ID);
877
+ if (parent)
878
+ rdma_restrack_parent_name(&id_priv->res, &parent->res);
879
+
880
+ return id_priv;
812881 }
813
-EXPORT_SYMBOL(__rdma_create_id);
882
+
883
+struct rdma_cm_id *
884
+__rdma_create_kernel_id(struct net *net, rdma_cm_event_handler event_handler,
885
+ void *context, enum rdma_ucm_port_space ps,
886
+ enum ib_qp_type qp_type, const char *caller)
887
+{
888
+ struct rdma_id_private *ret;
889
+
890
+ ret = __rdma_create_id(net, event_handler, context, ps, qp_type, NULL);
891
+ if (IS_ERR(ret))
892
+ return ERR_CAST(ret);
893
+
894
+ rdma_restrack_set_name(&ret->res, caller);
895
+ return &ret->id;
896
+}
897
+EXPORT_SYMBOL(__rdma_create_kernel_id);
898
+
899
+struct rdma_cm_id *rdma_create_user_id(rdma_cm_event_handler event_handler,
900
+ void *context,
901
+ enum rdma_ucm_port_space ps,
902
+ enum ib_qp_type qp_type)
903
+{
904
+ struct rdma_id_private *ret;
905
+
906
+ ret = __rdma_create_id(current->nsproxy->net_ns, event_handler, context,
907
+ ps, qp_type, NULL);
908
+ if (IS_ERR(ret))
909
+ return ERR_CAST(ret);
910
+
911
+ rdma_restrack_set_name(&ret->res, NULL);
912
+ return &ret->id;
913
+}
914
+EXPORT_SYMBOL(rdma_create_user_id);
814915
815916 static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
816917 {
....@@ -859,27 +960,34 @@
859960 int ret;
860961
861962 id_priv = container_of(id, struct rdma_id_private, id);
862
- if (id->device != pd->device)
863
- return -EINVAL;
963
+ if (id->device != pd->device) {
964
+ ret = -EINVAL;
965
+ goto out_err;
966
+ }
864967
865968 qp_init_attr->port_num = id->port_num;
866969 qp = ib_create_qp(pd, qp_init_attr);
867
- if (IS_ERR(qp))
868
- return PTR_ERR(qp);
970
+ if (IS_ERR(qp)) {
971
+ ret = PTR_ERR(qp);
972
+ goto out_err;
973
+ }
869974
870975 if (id->qp_type == IB_QPT_UD)
871976 ret = cma_init_ud_qp(id_priv, qp);
872977 else
873978 ret = cma_init_conn_qp(id_priv, qp);
874979 if (ret)
875
- goto err;
980
+ goto out_destroy;
876981
877982 id->qp = qp;
878983 id_priv->qp_num = qp->qp_num;
879984 id_priv->srq = (qp->srq != NULL);
985
+ trace_cm_qp_create(id_priv, pd, qp_init_attr, 0);
880986 return 0;
881
-err:
987
+out_destroy:
882988 ib_destroy_qp(qp);
989
+out_err:
990
+ trace_cm_qp_create(id_priv, pd, qp_init_attr, ret);
883991 return ret;
884992 }
885993 EXPORT_SYMBOL(rdma_create_qp);
....@@ -889,6 +997,7 @@
889997 struct rdma_id_private *id_priv;
890998
891999 id_priv = container_of(id, struct rdma_id_private, id);
1000
+ trace_cm_qp_destroy(id_priv);
8921001 mutex_lock(&id_priv->qp_mutex);
8931002 ib_destroy_qp(id_priv->id.qp);
8941003 id_priv->id.qp = NULL;
....@@ -997,7 +1106,7 @@
9971106 *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
9981107
9991108 if (id_priv->id.qp_type == IB_QPT_UD) {
1000
- ret = cma_set_qkey(id_priv, 0);
1109
+ ret = cma_set_default_qkey(id_priv);
10011110 if (ret)
10021111 return ret;
10031112
....@@ -1037,6 +1146,9 @@
10371146 *qp_attr_mask |= IB_QP_PORT;
10381147 } else
10391148 ret = -ENOSYS;
1149
+
1150
+ if ((*qp_attr_mask & IB_QP_TIMEOUT) && id_priv->timeout_set)
1151
+ qp_attr->timeout = id_priv->timeout;
10401152
10411153 return ret;
10421154 }
....@@ -1324,7 +1436,7 @@
13241436 return false;
13251437
13261438 memset(&fl4, 0, sizeof(fl4));
1327
- fl4.flowi4_iif = net_dev->ifindex;
1439
+ fl4.flowi4_oif = net_dev->ifindex;
13281440 fl4.daddr = daddr;
13291441 fl4.saddr = saddr;
13301442
....@@ -1387,6 +1499,7 @@
13871499 roce_get_net_dev_by_cm_event(const struct ib_cm_event *ib_event)
13881500 {
13891501 const struct ib_gid_attr *sgid_attr = NULL;
1502
+ struct net_device *ndev;
13901503
13911504 if (ib_event->event == IB_CM_REQ_RECEIVED)
13921505 sgid_attr = ib_event->param.req_rcvd.ppath_sgid_attr;
....@@ -1395,8 +1508,15 @@
13951508
13961509 if (!sgid_attr)
13971510 return NULL;
1398
- dev_hold(sgid_attr->ndev);
1399
- return sgid_attr->ndev;
1511
+
1512
+ rcu_read_lock();
1513
+ ndev = rdma_read_gid_attr_ndev_rcu(sgid_attr);
1514
+ if (IS_ERR(ndev))
1515
+ ndev = NULL;
1516
+ else
1517
+ dev_hold(ndev);
1518
+ rcu_read_unlock();
1519
+ return ndev;
14001520 }
14011521
14021522 static struct net_device *cma_get_net_dev(const struct ib_cm_event *ib_event,
....@@ -1475,17 +1595,34 @@
14751595 return rdma_protocol_roce(device, port_num);
14761596 }
14771597
1598
+static bool cma_is_req_ipv6_ll(const struct cma_req_info *req)
1599
+{
1600
+ const struct sockaddr *daddr =
1601
+ (const struct sockaddr *)&req->listen_addr_storage;
1602
+ const struct sockaddr_in6 *daddr6 = (const struct sockaddr_in6 *)daddr;
1603
+
1604
+ /* Returns true if the req is for IPv6 link local */
1605
+ return (daddr->sa_family == AF_INET6 &&
1606
+ (ipv6_addr_type(&daddr6->sin6_addr) & IPV6_ADDR_LINKLOCAL));
1607
+}
1608
+
14781609 static bool cma_match_net_dev(const struct rdma_cm_id *id,
14791610 const struct net_device *net_dev,
1480
- u8 port_num)
1611
+ const struct cma_req_info *req)
14811612 {
14821613 const struct rdma_addr *addr = &id->route.addr;
14831614
14841615 if (!net_dev)
14851616 /* This request is an AF_IB request */
1486
- return (!id->port_num || id->port_num == port_num) &&
1617
+ return (!id->port_num || id->port_num == req->port) &&
14871618 (addr->src_addr.ss_family == AF_IB);
14881619
1620
+ /*
1621
+ * If the request is not for IPv6 link local, allow matching
1622
+ * request to any netdevice of the one or multiport rdma device.
1623
+ */
1624
+ if (!cma_is_req_ipv6_ll(req))
1625
+ return true;
14891626 /*
14901627 * Net namespaces must match, and if the listner is listening
14911628 * on a specific netdevice than netdevice must match as well.
....@@ -1515,13 +1652,14 @@
15151652 hlist_for_each_entry(id_priv, &bind_list->owners, node) {
15161653 if (cma_match_private_data(id_priv, ib_event->private_data)) {
15171654 if (id_priv->id.device == cm_id->device &&
1518
- cma_match_net_dev(&id_priv->id, net_dev, req->port))
1655
+ cma_match_net_dev(&id_priv->id, net_dev, req))
15191656 return id_priv;
15201657 list_for_each_entry(id_priv_dev,
15211658 &id_priv->listen_list,
15221659 listen_list) {
15231660 if (id_priv_dev->id.device == cm_id->device &&
1524
- cma_match_net_dev(&id_priv_dev->id, net_dev, req->port))
1661
+ cma_match_net_dev(&id_priv_dev->id,
1662
+ net_dev, req))
15251663 return id_priv_dev;
15261664 }
15271665 }
....@@ -1533,18 +1671,18 @@
15331671 static struct rdma_id_private *
15341672 cma_ib_id_from_event(struct ib_cm_id *cm_id,
15351673 const struct ib_cm_event *ib_event,
1674
+ struct cma_req_info *req,
15361675 struct net_device **net_dev)
15371676 {
1538
- struct cma_req_info req;
15391677 struct rdma_bind_list *bind_list;
15401678 struct rdma_id_private *id_priv;
15411679 int err;
15421680
1543
- err = cma_save_req_info(ib_event, &req);
1681
+ err = cma_save_req_info(ib_event, req);
15441682 if (err)
15451683 return ERR_PTR(err);
15461684
1547
- *net_dev = cma_get_net_dev(ib_event, &req);
1685
+ *net_dev = cma_get_net_dev(ib_event, req);
15481686 if (IS_ERR(*net_dev)) {
15491687 if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) {
15501688 /* Assuming the protocol is AF_IB */
....@@ -1583,17 +1721,17 @@
15831721 }
15841722
15851723 if (!validate_net_dev(*net_dev,
1586
- (struct sockaddr *)&req.listen_addr_storage,
1587
- (struct sockaddr *)&req.src_addr_storage)) {
1724
+ (struct sockaddr *)&req->src_addr_storage,
1725
+ (struct sockaddr *)&req->listen_addr_storage)) {
15881726 id_priv = ERR_PTR(-EHOSTUNREACH);
15891727 goto err;
15901728 }
15911729 }
15921730
15931731 bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net,
1594
- rdma_ps_from_service_id(req.service_id),
1595
- cma_port_from_service_id(req.service_id));
1596
- id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev);
1732
+ rdma_ps_from_service_id(req->service_id),
1733
+ cma_port_from_service_id(req->service_id));
1734
+ id_priv = cma_find_listener(bind_list, cm_id, ib_event, req, *net_dev);
15971735 err:
15981736 rcu_read_unlock();
15991737 mutex_unlock(&lock);
....@@ -1617,15 +1755,16 @@
16171755 }
16181756 }
16191757
1620
-static void cma_cancel_listens(struct rdma_id_private *id_priv)
1758
+static void _cma_cancel_listens(struct rdma_id_private *id_priv)
16211759 {
16221760 struct rdma_id_private *dev_id_priv;
1761
+
1762
+ lockdep_assert_held(&lock);
16231763
16241764 /*
16251765 * Remove from listen_any_list to prevent added devices from spawning
16261766 * additional listen requests.
16271767 */
1628
- mutex_lock(&lock);
16291768 list_del(&id_priv->list);
16301769
16311770 while (!list_empty(&id_priv->listen_list)) {
....@@ -1639,6 +1778,12 @@
16391778 rdma_destroy_id(&dev_id_priv->id);
16401779 mutex_lock(&lock);
16411780 }
1781
+}
1782
+
1783
+static void cma_cancel_listens(struct rdma_id_private *id_priv)
1784
+{
1785
+ mutex_lock(&lock);
1786
+ _cma_cancel_listens(id_priv);
16421787 mutex_unlock(&lock);
16431788 }
16441789
....@@ -1647,6 +1792,14 @@
16471792 {
16481793 switch (state) {
16491794 case RDMA_CM_ADDR_QUERY:
1795
+ /*
1796
+ * We can avoid doing the rdma_addr_cancel() based on state,
1797
+ * only RDMA_CM_ADDR_QUERY has a work that could still execute.
1798
+ * Notice that the addr_handler work could still be exiting
1799
+ * outside this state, however due to the interaction with the
1800
+ * handler_mutex the work is guaranteed not to touch id_priv
1801
+ * during exit.
1802
+ */
16501803 rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
16511804 break;
16521805 case RDMA_CM_ROUTE_QUERY:
....@@ -1681,14 +1834,12 @@
16811834 static void destroy_mc(struct rdma_id_private *id_priv,
16821835 struct cma_multicast *mc)
16831836 {
1684
- if (rdma_cap_ib_mcast(id_priv->id.device, id_priv->id.port_num)) {
1685
- ib_sa_free_multicast(mc->multicast.ib);
1686
- kfree(mc);
1687
- return;
1688
- }
1837
+ bool send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN);
16891838
1690
- if (rdma_protocol_roce(id_priv->id.device,
1691
- id_priv->id.port_num)) {
1839
+ if (rdma_cap_ib_mcast(id_priv->id.device, id_priv->id.port_num))
1840
+ ib_sa_free_multicast(mc->sa_mc);
1841
+
1842
+ if (rdma_protocol_roce(id_priv->id.device, id_priv->id.port_num)) {
16921843 struct rdma_dev_addr *dev_addr =
16931844 &id_priv->id.route.addr.dev_addr;
16941845 struct net_device *ndev = NULL;
....@@ -1696,12 +1847,23 @@
16961847 if (dev_addr->bound_dev_if)
16971848 ndev = dev_get_by_index(dev_addr->net,
16981849 dev_addr->bound_dev_if);
1699
- if (ndev) {
1700
- cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, false);
1701
- dev_put(ndev);
1850
+ if (ndev && !send_only) {
1851
+ enum ib_gid_type gid_type;
1852
+ union ib_gid mgid;
1853
+
1854
+ gid_type = id_priv->cma_dev->default_gid_type
1855
+ [id_priv->id.port_num -
1856
+ rdma_start_port(
1857
+ id_priv->cma_dev->device)];
1858
+ cma_iboe_set_mgid((struct sockaddr *)&mc->addr, &mgid,
1859
+ gid_type);
1860
+ cma_igmp_send(ndev, &mgid, false);
17021861 }
1703
- kref_put(&mc->mcref, release_mc);
1862
+ dev_put(ndev);
1863
+
1864
+ cancel_work_sync(&mc->iboe_join.work);
17041865 }
1866
+ kfree(mc);
17051867 }
17061868
17071869 static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
....@@ -1716,21 +1878,10 @@
17161878 }
17171879 }
17181880
1719
-void rdma_destroy_id(struct rdma_cm_id *id)
1881
+static void _destroy_id(struct rdma_id_private *id_priv,
1882
+ enum rdma_cm_state state)
17201883 {
1721
- struct rdma_id_private *id_priv;
1722
- enum rdma_cm_state state;
1723
-
1724
- id_priv = container_of(id, struct rdma_id_private, id);
1725
- state = cma_exch(id_priv, RDMA_CM_DESTROYING);
17261884 cma_cancel_operation(id_priv, state);
1727
-
1728
- /*
1729
- * Wait for any active callback to finish. New callbacks will find
1730
- * the id_priv state set to destroying and abort.
1731
- */
1732
- mutex_lock(&id_priv->handler_mutex);
1733
- mutex_unlock(&id_priv->handler_mutex);
17341885
17351886 rdma_restrack_del(&id_priv->res);
17361887 if (id_priv->cma_dev) {
....@@ -1746,19 +1897,52 @@
17461897 }
17471898
17481899 cma_release_port(id_priv);
1749
- cma_deref_id(id_priv);
1900
+ cma_id_put(id_priv);
17501901 wait_for_completion(&id_priv->comp);
17511902
17521903 if (id_priv->internal_id)
1753
- cma_deref_id(id_priv->id.context);
1904
+ cma_id_put(id_priv->id.context);
17541905
17551906 kfree(id_priv->id.route.path_rec);
17561907
1757
- if (id_priv->id.route.addr.dev_addr.sgid_attr)
1758
- rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr);
1759
-
17601908 put_net(id_priv->id.route.addr.dev_addr.net);
17611909 kfree(id_priv);
1910
+}
1911
+
1912
+/*
1913
+ * destroy an ID from within the handler_mutex. This ensures that no other
1914
+ * handlers can start running concurrently.
1915
+ */
1916
+static void destroy_id_handler_unlock(struct rdma_id_private *id_priv)
1917
+ __releases(&idprv->handler_mutex)
1918
+{
1919
+ enum rdma_cm_state state;
1920
+ unsigned long flags;
1921
+
1922
+ trace_cm_id_destroy(id_priv);
1923
+
1924
+ /*
1925
+ * Setting the state to destroyed under the handler mutex provides a
1926
+ * fence against calling handler callbacks. If this is invoked due to
1927
+ * the failure of a handler callback then it guarentees that no future
1928
+ * handlers will be called.
1929
+ */
1930
+ lockdep_assert_held(&id_priv->handler_mutex);
1931
+ spin_lock_irqsave(&id_priv->lock, flags);
1932
+ state = id_priv->state;
1933
+ id_priv->state = RDMA_CM_DESTROYING;
1934
+ spin_unlock_irqrestore(&id_priv->lock, flags);
1935
+ mutex_unlock(&id_priv->handler_mutex);
1936
+ _destroy_id(id_priv, state);
1937
+}
1938
+
1939
+void rdma_destroy_id(struct rdma_cm_id *id)
1940
+{
1941
+ struct rdma_id_private *id_priv =
1942
+ container_of(id, struct rdma_id_private, id);
1943
+
1944
+ mutex_lock(&id_priv->handler_mutex);
1945
+ destroy_id_handler_unlock(id_priv);
17621946 }
17631947 EXPORT_SYMBOL(rdma_destroy_id);
17641948
....@@ -1774,6 +1958,7 @@
17741958 if (ret)
17751959 goto reject;
17761960
1961
+ trace_cm_send_rtu(id_priv);
17771962 ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
17781963 if (ret)
17791964 goto reject;
....@@ -1782,6 +1967,7 @@
17821967 reject:
17831968 pr_debug_ratelimited("RDMA CM: CONNECT_ERROR: failed to handle reply. status %d\n", ret);
17841969 cma_modify_qp_err(id_priv);
1970
+ trace_cm_send_rej(id_priv);
17851971 ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
17861972 NULL, 0, NULL, 0);
17871973 return ret;
....@@ -1799,6 +1985,22 @@
17991985 event->param.conn.rnr_retry_count = rep_data->rnr_retry_count;
18001986 event->param.conn.srq = rep_data->srq;
18011987 event->param.conn.qp_num = rep_data->remote_qpn;
1988
+
1989
+ event->ece.vendor_id = rep_data->ece.vendor_id;
1990
+ event->ece.attr_mod = rep_data->ece.attr_mod;
1991
+}
1992
+
1993
+static int cma_cm_event_handler(struct rdma_id_private *id_priv,
1994
+ struct rdma_cm_event *event)
1995
+{
1996
+ int ret;
1997
+
1998
+ lockdep_assert_held(&id_priv->handler_mutex);
1999
+
2000
+ trace_cm_event_handler(id_priv, event);
2001
+ ret = id_priv->id.event_handler(&id_priv->id, event);
2002
+ trace_cm_event_done(id_priv, event, ret);
2003
+ return ret;
18022004 }
18032005
18042006 static int cma_ib_handler(struct ib_cm_id *cm_id,
....@@ -1806,13 +2008,15 @@
18062008 {
18072009 struct rdma_id_private *id_priv = cm_id->context;
18082010 struct rdma_cm_event event = {};
1809
- int ret = 0;
2011
+ enum rdma_cm_state state;
2012
+ int ret;
18102013
18112014 mutex_lock(&id_priv->handler_mutex);
2015
+ state = READ_ONCE(id_priv->state);
18122016 if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
1813
- id_priv->state != RDMA_CM_CONNECT) ||
2017
+ state != RDMA_CM_CONNECT) ||
18142018 (ib_event->event == IB_CM_TIMEWAIT_EXIT &&
1815
- id_priv->state != RDMA_CM_DISCONNECT))
2019
+ state != RDMA_CM_DISCONNECT))
18162020 goto out;
18172021
18182022 switch (ib_event->event) {
....@@ -1822,9 +2026,11 @@
18222026 event.status = -ETIMEDOUT;
18232027 break;
18242028 case IB_CM_REP_RECEIVED:
1825
- if (cma_comp(id_priv, RDMA_CM_CONNECT) &&
1826
- (id_priv->id.qp_type != IB_QPT_UD))
2029
+ if (state == RDMA_CM_CONNECT &&
2030
+ (id_priv->id.qp_type != IB_QPT_UD)) {
2031
+ trace_cm_send_mra(id_priv);
18272032 ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
2033
+ }
18282034 if (id_priv->id.qp) {
18292035 event.status = cma_rep_recv(id_priv);
18302036 event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
....@@ -1840,7 +2046,8 @@
18402046 event.event = RDMA_CM_EVENT_ESTABLISHED;
18412047 break;
18422048 case IB_CM_DREQ_ERROR:
1843
- event.status = -ETIMEDOUT; /* fall through */
2049
+ event.status = -ETIMEDOUT;
2050
+ fallthrough;
18442051 case IB_CM_DREQ_RECEIVED:
18452052 case IB_CM_DREP_RECEIVED:
18462053 if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT,
....@@ -1869,18 +2076,16 @@
18692076 goto out;
18702077 }
18712078
1872
- ret = id_priv->id.event_handler(&id_priv->id, &event);
2079
+ ret = cma_cm_event_handler(id_priv, &event);
18732080 if (ret) {
18742081 /* Destroy the CM ID by returning a non-zero value. */
18752082 id_priv->cm_id.ib = NULL;
1876
- cma_exch(id_priv, RDMA_CM_DESTROYING);
1877
- mutex_unlock(&id_priv->handler_mutex);
1878
- rdma_destroy_id(&id_priv->id);
2083
+ destroy_id_handler_unlock(id_priv);
18792084 return ret;
18802085 }
18812086 out:
18822087 mutex_unlock(&id_priv->handler_mutex);
1883
- return ret;
2088
+ return 0;
18842089 }
18852090
18862091 static struct rdma_id_private *
....@@ -1899,14 +2104,15 @@
18992104 int ret;
19002105
19012106 listen_id_priv = container_of(listen_id, struct rdma_id_private, id);
1902
- id = __rdma_create_id(listen_id->route.addr.dev_addr.net,
1903
- listen_id->event_handler, listen_id->context,
1904
- listen_id->ps, ib_event->param.req_rcvd.qp_type,
1905
- listen_id_priv->res.kern_name);
1906
- if (IS_ERR(id))
2107
+ id_priv = __rdma_create_id(listen_id->route.addr.dev_addr.net,
2108
+ listen_id->event_handler, listen_id->context,
2109
+ listen_id->ps,
2110
+ ib_event->param.req_rcvd.qp_type,
2111
+ listen_id_priv);
2112
+ if (IS_ERR(id_priv))
19072113 return NULL;
19082114
1909
- id_priv = container_of(id, struct rdma_id_private, id);
2115
+ id = &id_priv->id;
19102116 if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr,
19112117 (struct sockaddr *)&id->route.addr.dst_addr,
19122118 listen_id, ib_event, ss_family, service_id))
....@@ -1924,7 +2130,7 @@
19242130 rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
19252131
19262132 if (net_dev) {
1927
- rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL);
2133
+ rdma_copy_src_l2_addr(&rt->addr.dev_addr, net_dev);
19282134 } else {
19292135 if (!cma_protocol_roce(listen_id) &&
19302136 cma_any_addr(cma_src_addr(id_priv))) {
....@@ -1960,13 +2166,13 @@
19602166 int ret;
19612167
19622168 listen_id_priv = container_of(listen_id, struct rdma_id_private, id);
1963
- id = __rdma_create_id(net, listen_id->event_handler, listen_id->context,
1964
- listen_id->ps, IB_QPT_UD,
1965
- listen_id_priv->res.kern_name);
1966
- if (IS_ERR(id))
2169
+ id_priv = __rdma_create_id(net, listen_id->event_handler,
2170
+ listen_id->context, listen_id->ps, IB_QPT_UD,
2171
+ listen_id_priv);
2172
+ if (IS_ERR(id_priv))
19672173 return NULL;
19682174
1969
- id_priv = container_of(id, struct rdma_id_private, id);
2175
+ id = &id_priv->id;
19702176 if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr,
19712177 (struct sockaddr *)&id->route.addr.dst_addr,
19722178 listen_id, ib_event, ss_family,
....@@ -1974,7 +2180,7 @@
19742180 goto err;
19752181
19762182 if (net_dev) {
1977
- rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL);
2183
+ rdma_copy_src_l2_addr(&id->route.addr.dev_addr, net_dev);
19782184 } else {
19792185 if (!cma_any_addr(cma_src_addr(id_priv))) {
19802186 ret = cma_translate_addr(cma_src_addr(id_priv),
....@@ -2004,6 +2210,9 @@
20042210 event->param.conn.rnr_retry_count = req_data->rnr_retry_count;
20052211 event->param.conn.srq = req_data->srq;
20062212 event->param.conn.qp_num = req_data->remote_qpn;
2213
+
2214
+ event->ece.vendor_id = req_data->ece.vendor_id;
2215
+ event->ece.attr_mod = req_data->ece.attr_mod;
20072216 }
20082217
20092218 static int cma_ib_check_req_qp_type(const struct rdma_cm_id *id,
....@@ -2021,23 +2230,25 @@
20212230 {
20222231 struct rdma_id_private *listen_id, *conn_id = NULL;
20232232 struct rdma_cm_event event = {};
2233
+ struct cma_req_info req = {};
20242234 struct net_device *net_dev;
20252235 u8 offset;
20262236 int ret;
20272237
2028
- listen_id = cma_ib_id_from_event(cm_id, ib_event, &net_dev);
2238
+ listen_id = cma_ib_id_from_event(cm_id, ib_event, &req, &net_dev);
20292239 if (IS_ERR(listen_id))
20302240 return PTR_ERR(listen_id);
20312241
2242
+ trace_cm_req_handler(listen_id, ib_event->event);
20322243 if (!cma_ib_check_req_qp_type(&listen_id->id, ib_event)) {
20332244 ret = -EINVAL;
20342245 goto net_dev_put;
20352246 }
20362247
20372248 mutex_lock(&listen_id->handler_mutex);
2038
- if (listen_id->state != RDMA_CM_LISTEN) {
2249
+ if (READ_ONCE(listen_id->state) != RDMA_CM_LISTEN) {
20392250 ret = -ECONNABORTED;
2040
- goto err1;
2251
+ goto err_unlock;
20412252 }
20422253
20432254 offset = cma_user_data_offset(listen_id);
....@@ -2054,53 +2265,38 @@
20542265 }
20552266 if (!conn_id) {
20562267 ret = -ENOMEM;
2057
- goto err1;
2268
+ goto err_unlock;
20582269 }
20592270
20602271 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
2061
- ret = cma_acquire_dev(conn_id, listen_id);
2062
- if (ret)
2063
- goto err2;
2272
+ ret = cma_ib_acquire_dev(conn_id, listen_id, &req);
2273
+ if (ret) {
2274
+ destroy_id_handler_unlock(conn_id);
2275
+ goto err_unlock;
2276
+ }
20642277
20652278 conn_id->cm_id.ib = cm_id;
20662279 cm_id->context = conn_id;
20672280 cm_id->cm_handler = cma_ib_handler;
20682281
2069
- /*
2070
- * Protect against the user destroying conn_id from another thread
2071
- * until we're done accessing it.
2072
- */
2073
- atomic_inc(&conn_id->refcount);
2074
- ret = conn_id->id.event_handler(&conn_id->id, &event);
2075
- if (ret)
2076
- goto err3;
2077
- /*
2078
- * Acquire mutex to prevent user executing rdma_destroy_id()
2079
- * while we're accessing the cm_id.
2080
- */
2081
- mutex_lock(&lock);
2082
- if (cma_comp(conn_id, RDMA_CM_CONNECT) &&
2083
- (conn_id->id.qp_type != IB_QPT_UD))
2084
- ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
2085
- mutex_unlock(&lock);
2086
- mutex_unlock(&conn_id->handler_mutex);
2087
- mutex_unlock(&listen_id->handler_mutex);
2088
- cma_deref_id(conn_id);
2089
- if (net_dev)
2090
- dev_put(net_dev);
2091
- return 0;
2282
+ ret = cma_cm_event_handler(conn_id, &event);
2283
+ if (ret) {
2284
+ /* Destroy the CM ID by returning a non-zero value. */
2285
+ conn_id->cm_id.ib = NULL;
2286
+ mutex_unlock(&listen_id->handler_mutex);
2287
+ destroy_id_handler_unlock(conn_id);
2288
+ goto net_dev_put;
2289
+ }
20922290
2093
-err3:
2094
- cma_deref_id(conn_id);
2095
- /* Destroy the CM ID by returning a non-zero value. */
2096
- conn_id->cm_id.ib = NULL;
2097
-err2:
2098
- cma_exch(conn_id, RDMA_CM_DESTROYING);
2291
+ if (READ_ONCE(conn_id->state) == RDMA_CM_CONNECT &&
2292
+ conn_id->id.qp_type != IB_QPT_UD) {
2293
+ trace_cm_send_mra(cm_id->context);
2294
+ ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
2295
+ }
20992296 mutex_unlock(&conn_id->handler_mutex);
2100
-err1:
2297
+
2298
+err_unlock:
21012299 mutex_unlock(&listen_id->handler_mutex);
2102
- if (conn_id)
2103
- rdma_destroy_id(&conn_id->id);
21042300
21052301 net_dev_put:
21062302 if (net_dev)
....@@ -2154,7 +2350,7 @@
21542350 struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
21552351
21562352 mutex_lock(&id_priv->handler_mutex);
2157
- if (id_priv->state != RDMA_CM_CONNECT)
2353
+ if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT)
21582354 goto out;
21592355
21602356 switch (iw_event->event) {
....@@ -2196,13 +2392,11 @@
21962392 event.status = iw_event->status;
21972393 event.param.conn.private_data = iw_event->private_data;
21982394 event.param.conn.private_data_len = iw_event->private_data_len;
2199
- ret = id_priv->id.event_handler(&id_priv->id, &event);
2395
+ ret = cma_cm_event_handler(id_priv, &event);
22002396 if (ret) {
22012397 /* Destroy the CM ID by returning a non-zero value. */
22022398 id_priv->cm_id.iw = NULL;
2203
- cma_exch(id_priv, RDMA_CM_DESTROYING);
2204
- mutex_unlock(&id_priv->handler_mutex);
2205
- rdma_destroy_id(&id_priv->id);
2399
+ destroy_id_handler_unlock(id_priv);
22062400 return ret;
22072401 }
22082402
....@@ -2214,7 +2408,6 @@
22142408 static int iw_conn_req_handler(struct iw_cm_id *cm_id,
22152409 struct iw_cm_event *iw_event)
22162410 {
2217
- struct rdma_cm_id *new_cm_id;
22182411 struct rdma_id_private *listen_id, *conn_id;
22192412 struct rdma_cm_event event = {};
22202413 int ret = -ECONNABORTED;
....@@ -2230,35 +2423,33 @@
22302423 listen_id = cm_id->context;
22312424
22322425 mutex_lock(&listen_id->handler_mutex);
2233
- if (listen_id->state != RDMA_CM_LISTEN)
2426
+ if (READ_ONCE(listen_id->state) != RDMA_CM_LISTEN)
22342427 goto out;
22352428
22362429 /* Create a new RDMA id for the new IW CM ID */
2237
- new_cm_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
2238
- listen_id->id.event_handler,
2239
- listen_id->id.context,
2240
- RDMA_PS_TCP, IB_QPT_RC,
2241
- listen_id->res.kern_name);
2242
- if (IS_ERR(new_cm_id)) {
2430
+ conn_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
2431
+ listen_id->id.event_handler,
2432
+ listen_id->id.context, RDMA_PS_TCP,
2433
+ IB_QPT_RC, listen_id);
2434
+ if (IS_ERR(conn_id)) {
22432435 ret = -ENOMEM;
22442436 goto out;
22452437 }
2246
- conn_id = container_of(new_cm_id, struct rdma_id_private, id);
22472438 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
22482439 conn_id->state = RDMA_CM_CONNECT;
22492440
22502441 ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr);
22512442 if (ret) {
2252
- mutex_unlock(&conn_id->handler_mutex);
2253
- rdma_destroy_id(new_cm_id);
2254
- goto out;
2443
+ mutex_unlock(&listen_id->handler_mutex);
2444
+ destroy_id_handler_unlock(conn_id);
2445
+ return ret;
22552446 }
22562447
2257
- ret = cma_acquire_dev(conn_id, listen_id);
2448
+ ret = cma_iw_acquire_dev(conn_id, listen_id);
22582449 if (ret) {
2259
- mutex_unlock(&conn_id->handler_mutex);
2260
- rdma_destroy_id(new_cm_id);
2261
- goto out;
2450
+ mutex_unlock(&listen_id->handler_mutex);
2451
+ destroy_id_handler_unlock(conn_id);
2452
+ return ret;
22622453 }
22632454
22642455 conn_id->cm_id.iw = cm_id;
....@@ -2268,25 +2459,16 @@
22682459 memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr));
22692460 memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr));
22702461
2271
- /*
2272
- * Protect against the user destroying conn_id from another thread
2273
- * until we're done accessing it.
2274
- */
2275
- atomic_inc(&conn_id->refcount);
2276
- ret = conn_id->id.event_handler(&conn_id->id, &event);
2462
+ ret = cma_cm_event_handler(conn_id, &event);
22772463 if (ret) {
22782464 /* User wants to destroy the CM ID */
22792465 conn_id->cm_id.iw = NULL;
2280
- cma_exch(conn_id, RDMA_CM_DESTROYING);
2281
- mutex_unlock(&conn_id->handler_mutex);
22822466 mutex_unlock(&listen_id->handler_mutex);
2283
- cma_deref_id(conn_id);
2284
- rdma_destroy_id(&conn_id->id);
2467
+ destroy_id_handler_unlock(conn_id);
22852468 return ret;
22862469 }
22872470
22882471 mutex_unlock(&conn_id->handler_mutex);
2289
- cma_deref_id(conn_id);
22902472
22912473 out:
22922474 mutex_unlock(&listen_id->handler_mutex);
....@@ -2321,7 +2503,10 @@
23212503 if (IS_ERR(id))
23222504 return PTR_ERR(id);
23232505
2506
+ mutex_lock(&id_priv->qp_mutex);
23242507 id->tos = id_priv->tos;
2508
+ id->tos_set = id_priv->tos_set;
2509
+ mutex_unlock(&id_priv->qp_mutex);
23252510 id_priv->cm_id.iw = id;
23262511
23272512 memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv),
....@@ -2342,56 +2527,88 @@
23422527 {
23432528 struct rdma_id_private *id_priv = id->context;
23442529
2530
+ /* Listening IDs are always destroyed on removal */
2531
+ if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
2532
+ return -1;
2533
+
23452534 id->context = id_priv->id.context;
23462535 id->event_handler = id_priv->id.event_handler;
2536
+ trace_cm_event_handler(id_priv, event);
23472537 return id_priv->id.event_handler(id, event);
23482538 }
23492539
2350
-static void cma_listen_on_dev(struct rdma_id_private *id_priv,
2351
- struct cma_device *cma_dev)
2540
+static int cma_listen_on_dev(struct rdma_id_private *id_priv,
2541
+ struct cma_device *cma_dev,
2542
+ struct rdma_id_private **to_destroy)
23522543 {
23532544 struct rdma_id_private *dev_id_priv;
2354
- struct rdma_cm_id *id;
23552545 struct net *net = id_priv->id.route.addr.dev_addr.net;
23562546 int ret;
23572547
23582548 lockdep_assert_held(&lock);
23592549
2550
+ *to_destroy = NULL;
23602551 if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
2361
- return;
2552
+ return 0;
23622553
2363
- id = __rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
2364
- id_priv->id.qp_type, id_priv->res.kern_name);
2365
- if (IS_ERR(id))
2366
- return;
2367
-
2368
- dev_id_priv = container_of(id, struct rdma_id_private, id);
2554
+ dev_id_priv =
2555
+ __rdma_create_id(net, cma_listen_handler, id_priv,
2556
+ id_priv->id.ps, id_priv->id.qp_type, id_priv);
2557
+ if (IS_ERR(dev_id_priv))
2558
+ return PTR_ERR(dev_id_priv);
23692559
23702560 dev_id_priv->state = RDMA_CM_ADDR_BOUND;
23712561 memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv),
23722562 rdma_addr_size(cma_src_addr(id_priv)));
23732563
23742564 _cma_attach_to_dev(dev_id_priv, cma_dev);
2375
- list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
2376
- atomic_inc(&id_priv->refcount);
2565
+ rdma_restrack_add(&dev_id_priv->res);
2566
+ cma_id_get(id_priv);
23772567 dev_id_priv->internal_id = 1;
23782568 dev_id_priv->afonly = id_priv->afonly;
2569
+ mutex_lock(&id_priv->qp_mutex);
2570
+ dev_id_priv->tos_set = id_priv->tos_set;
2571
+ dev_id_priv->tos = id_priv->tos;
2572
+ mutex_unlock(&id_priv->qp_mutex);
23792573
2380
- ret = rdma_listen(id, id_priv->backlog);
2574
+ ret = rdma_listen(&dev_id_priv->id, id_priv->backlog);
23812575 if (ret)
2382
- pr_warn("RDMA CMA: cma_listen_on_dev, error %d, listening on device %s\n",
2383
- ret, cma_dev->device->name);
2576
+ goto err_listen;
2577
+ list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
2578
+ return 0;
2579
+err_listen:
2580
+ /* Caller must destroy this after releasing lock */
2581
+ *to_destroy = dev_id_priv;
2582
+ dev_warn(&cma_dev->device->dev, "RDMA CMA: %s, error %d\n", __func__, ret);
2583
+ return ret;
23842584 }
23852585
2386
-static void cma_listen_on_all(struct rdma_id_private *id_priv)
2586
+static int cma_listen_on_all(struct rdma_id_private *id_priv)
23872587 {
2588
+ struct rdma_id_private *to_destroy;
23882589 struct cma_device *cma_dev;
2590
+ int ret;
23892591
23902592 mutex_lock(&lock);
23912593 list_add_tail(&id_priv->list, &listen_any_list);
2392
- list_for_each_entry(cma_dev, &dev_list, list)
2393
- cma_listen_on_dev(id_priv, cma_dev);
2594
+ list_for_each_entry(cma_dev, &dev_list, list) {
2595
+ ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy);
2596
+ if (ret) {
2597
+ /* Prevent racing with cma_process_remove() */
2598
+ if (to_destroy)
2599
+ list_del_init(&to_destroy->list);
2600
+ goto err_listen;
2601
+ }
2602
+ }
23942603 mutex_unlock(&lock);
2604
+ return 0;
2605
+
2606
+err_listen:
2607
+ _cma_cancel_listens(id_priv);
2608
+ mutex_unlock(&lock);
2609
+ if (to_destroy)
2610
+ rdma_destroy_id(&to_destroy->id);
2611
+ return ret;
23952612 }
23962613
23972614 void rdma_set_service_type(struct rdma_cm_id *id, int tos)
....@@ -2399,10 +2616,44 @@
23992616 struct rdma_id_private *id_priv;
24002617
24012618 id_priv = container_of(id, struct rdma_id_private, id);
2619
+ mutex_lock(&id_priv->qp_mutex);
24022620 id_priv->tos = (u8) tos;
24032621 id_priv->tos_set = true;
2622
+ mutex_unlock(&id_priv->qp_mutex);
24042623 }
24052624 EXPORT_SYMBOL(rdma_set_service_type);
2625
+
2626
+/**
2627
+ * rdma_set_ack_timeout() - Set the ack timeout of QP associated
2628
+ * with a connection identifier.
2629
+ * @id: Communication identifier to associated with service type.
2630
+ * @timeout: Ack timeout to set a QP, expressed as 4.096 * 2^(timeout) usec.
2631
+ *
2632
+ * This function should be called before rdma_connect() on active side,
2633
+ * and on passive side before rdma_accept(). It is applicable to primary
2634
+ * path only. The timeout will affect the local side of the QP, it is not
2635
+ * negotiated with remote side and zero disables the timer. In case it is
2636
+ * set before rdma_resolve_route, the value will also be used to determine
2637
+ * PacketLifeTime for RoCE.
2638
+ *
2639
+ * Return: 0 for success
2640
+ */
2641
+int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout)
2642
+{
2643
+ struct rdma_id_private *id_priv;
2644
+
2645
+ if (id->qp_type != IB_QPT_RC && id->qp_type != IB_QPT_XRC_INI)
2646
+ return -EINVAL;
2647
+
2648
+ id_priv = container_of(id, struct rdma_id_private, id);
2649
+ mutex_lock(&id_priv->qp_mutex);
2650
+ id_priv->timeout = timeout;
2651
+ id_priv->timeout_set = true;
2652
+ mutex_unlock(&id_priv->qp_mutex);
2653
+
2654
+ return 0;
2655
+}
2656
+EXPORT_SYMBOL(rdma_set_ack_timeout);
24062657
24072658 static void cma_query_handler(int status, struct sa_path_rec *path_rec,
24082659 void *context)
....@@ -2427,8 +2678,8 @@
24272678 queue_work(cma_wq, &work->work);
24282679 }
24292680
2430
-static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
2431
- struct cma_work *work)
2681
+static int cma_query_ib_route(struct rdma_id_private *id_priv,
2682
+ unsigned long timeout_ms, struct cma_work *work)
24322683 {
24332684 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
24342685 struct sa_path_rec path_rec;
....@@ -2480,49 +2731,54 @@
24802731 return (id_priv->query_id < 0) ? id_priv->query_id : 0;
24812732 }
24822733
2734
+static void cma_iboe_join_work_handler(struct work_struct *work)
2735
+{
2736
+ struct cma_multicast *mc =
2737
+ container_of(work, struct cma_multicast, iboe_join.work);
2738
+ struct rdma_cm_event *event = &mc->iboe_join.event;
2739
+ struct rdma_id_private *id_priv = mc->id_priv;
2740
+ int ret;
2741
+
2742
+ mutex_lock(&id_priv->handler_mutex);
2743
+ if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING ||
2744
+ READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL)
2745
+ goto out_unlock;
2746
+
2747
+ ret = cma_cm_event_handler(id_priv, event);
2748
+ WARN_ON(ret);
2749
+
2750
+out_unlock:
2751
+ mutex_unlock(&id_priv->handler_mutex);
2752
+ if (event->event == RDMA_CM_EVENT_MULTICAST_JOIN)
2753
+ rdma_destroy_ah_attr(&event->param.ud.ah_attr);
2754
+}
2755
+
24832756 static void cma_work_handler(struct work_struct *_work)
24842757 {
24852758 struct cma_work *work = container_of(_work, struct cma_work, work);
24862759 struct rdma_id_private *id_priv = work->id;
2487
- int destroy = 0;
24882760
24892761 mutex_lock(&id_priv->handler_mutex);
2490
- if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
2491
- goto out;
2492
-
2493
- if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
2494
- cma_exch(id_priv, RDMA_CM_DESTROYING);
2495
- destroy = 1;
2496
- }
2497
-out:
2498
- mutex_unlock(&id_priv->handler_mutex);
2499
- cma_deref_id(id_priv);
2500
- if (destroy)
2501
- rdma_destroy_id(&id_priv->id);
2502
- kfree(work);
2503
-}
2504
-
2505
-static void cma_ndev_work_handler(struct work_struct *_work)
2506
-{
2507
- struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work);
2508
- struct rdma_id_private *id_priv = work->id;
2509
- int destroy = 0;
2510
-
2511
- mutex_lock(&id_priv->handler_mutex);
2512
- if (id_priv->state == RDMA_CM_DESTROYING ||
2513
- id_priv->state == RDMA_CM_DEVICE_REMOVAL)
2514
- goto out;
2515
-
2516
- if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
2517
- cma_exch(id_priv, RDMA_CM_DESTROYING);
2518
- destroy = 1;
2762
+ if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING ||
2763
+ READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL)
2764
+ goto out_unlock;
2765
+ if (work->old_state != 0 || work->new_state != 0) {
2766
+ if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
2767
+ goto out_unlock;
25192768 }
25202769
2521
-out:
2770
+ if (cma_cm_event_handler(id_priv, &work->event)) {
2771
+ cma_id_put(id_priv);
2772
+ destroy_id_handler_unlock(id_priv);
2773
+ goto out_free;
2774
+ }
2775
+
2776
+out_unlock:
25222777 mutex_unlock(&id_priv->handler_mutex);
2523
- cma_deref_id(id_priv);
2524
- if (destroy)
2525
- rdma_destroy_id(&id_priv->id);
2778
+ cma_id_put(id_priv);
2779
+out_free:
2780
+ if (work->event.event == RDMA_CM_EVENT_MULTICAST_JOIN)
2781
+ rdma_destroy_ah_attr(&work->event.param.ud.ah_attr);
25262782 kfree(work);
25272783 }
25282784
....@@ -2536,17 +2792,23 @@
25362792 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
25372793 }
25382794
2539
-static void cma_init_resolve_addr_work(struct cma_work *work,
2540
- struct rdma_id_private *id_priv)
2795
+static void enqueue_resolve_addr_work(struct cma_work *work,
2796
+ struct rdma_id_private *id_priv)
25412797 {
2798
+ /* Balances with cma_id_put() in cma_work_handler */
2799
+ cma_id_get(id_priv);
2800
+
25422801 work->id = id_priv;
25432802 INIT_WORK(&work->work, cma_work_handler);
25442803 work->old_state = RDMA_CM_ADDR_QUERY;
25452804 work->new_state = RDMA_CM_ADDR_RESOLVED;
25462805 work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
2806
+
2807
+ queue_work(cma_wq, &work->work);
25472808 }
25482809
2549
-static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
2810
+static int cma_resolve_ib_route(struct rdma_id_private *id_priv,
2811
+ unsigned long timeout_ms)
25502812 {
25512813 struct rdma_route *route = &id_priv->id.route;
25522814 struct cma_work *work;
....@@ -2669,7 +2931,7 @@
26692931 }
26702932 EXPORT_SYMBOL(rdma_set_ib_path);
26712933
2672
-static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
2934
+static int cma_resolve_iw_route(struct rdma_id_private *id_priv)
26732935 {
26742936 struct cma_work *work;
26752937
....@@ -2682,22 +2944,86 @@
26822944 return 0;
26832945 }
26842946
2685
-static int iboe_tos_to_sl(struct net_device *ndev, int tos)
2947
+static int get_vlan_ndev_tc(struct net_device *vlan_ndev, int prio)
26862948 {
2687
- int prio;
26882949 struct net_device *dev;
26892950
2690
- prio = rt_tos2priority(tos);
2691
- dev = is_vlan_dev(ndev) ? vlan_dev_real_dev(ndev) : ndev;
2951
+ dev = vlan_dev_real_dev(vlan_ndev);
26922952 if (dev->num_tc)
26932953 return netdev_get_prio_tc_map(dev, prio);
26942954
2695
-#if IS_ENABLED(CONFIG_VLAN_8021Q)
2955
+ return (vlan_dev_get_egress_qos_mask(vlan_ndev, prio) &
2956
+ VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
2957
+}
2958
+
2959
+struct iboe_prio_tc_map {
2960
+ int input_prio;
2961
+ int output_tc;
2962
+ bool found;
2963
+};
2964
+
2965
+static int get_lower_vlan_dev_tc(struct net_device *dev,
2966
+ struct netdev_nested_priv *priv)
2967
+{
2968
+ struct iboe_prio_tc_map *map = (struct iboe_prio_tc_map *)priv->data;
2969
+
2970
+ if (is_vlan_dev(dev))
2971
+ map->output_tc = get_vlan_ndev_tc(dev, map->input_prio);
2972
+ else if (dev->num_tc)
2973
+ map->output_tc = netdev_get_prio_tc_map(dev, map->input_prio);
2974
+ else
2975
+ map->output_tc = 0;
2976
+ /* We are interested only in first level VLAN device, so always
2977
+ * return 1 to stop iterating over next level devices.
2978
+ */
2979
+ map->found = true;
2980
+ return 1;
2981
+}
2982
+
2983
+static int iboe_tos_to_sl(struct net_device *ndev, int tos)
2984
+{
2985
+ struct iboe_prio_tc_map prio_tc_map = {};
2986
+ int prio = rt_tos2priority(tos);
2987
+ struct netdev_nested_priv priv;
2988
+
2989
+ /* If VLAN device, get it directly from the VLAN netdev */
26962990 if (is_vlan_dev(ndev))
2697
- return (vlan_dev_get_egress_qos_mask(ndev, prio) &
2698
- VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
2699
-#endif
2700
- return 0;
2991
+ return get_vlan_ndev_tc(ndev, prio);
2992
+
2993
+ prio_tc_map.input_prio = prio;
2994
+ priv.data = (void *)&prio_tc_map;
2995
+ rcu_read_lock();
2996
+ netdev_walk_all_lower_dev_rcu(ndev,
2997
+ get_lower_vlan_dev_tc,
2998
+ &priv);
2999
+ rcu_read_unlock();
3000
+ /* If map is found from lower device, use it; Otherwise
3001
+ * continue with the current netdevice to get priority to tc map.
3002
+ */
3003
+ if (prio_tc_map.found)
3004
+ return prio_tc_map.output_tc;
3005
+ else if (ndev->num_tc)
3006
+ return netdev_get_prio_tc_map(ndev, prio);
3007
+ else
3008
+ return 0;
3009
+}
3010
+
3011
+static __be32 cma_get_roce_udp_flow_label(struct rdma_id_private *id_priv)
3012
+{
3013
+ struct sockaddr_in6 *addr6;
3014
+ u16 dport, sport;
3015
+ u32 hash, fl;
3016
+
3017
+ addr6 = (struct sockaddr_in6 *)cma_src_addr(id_priv);
3018
+ fl = be32_to_cpu(addr6->sin6_flowinfo) & IB_GRH_FLOWLABEL_MASK;
3019
+ if ((cma_family(id_priv) != AF_INET6) || !fl) {
3020
+ dport = be16_to_cpu(cma_port(cma_dst_addr(id_priv)));
3021
+ sport = be16_to_cpu(cma_port(cma_src_addr(id_priv)));
3022
+ hash = (u32)sport * 31 + dport;
3023
+ fl = hash & IB_GRH_FLOWLABEL_MASK;
3024
+ }
3025
+
3026
+ return cpu_to_be32(fl);
27013027 }
27023028
27033029 static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
....@@ -2710,8 +3036,11 @@
27103036
27113037 u8 default_roce_tos = id_priv->cma_dev->default_roce_tos[id_priv->id.port_num -
27123038 rdma_start_port(id_priv->cma_dev->device)];
2713
- u8 tos = id_priv->tos_set ? id_priv->tos : default_roce_tos;
3039
+ u8 tos;
27143040
3041
+ mutex_lock(&id_priv->qp_mutex);
3042
+ tos = id_priv->tos_set ? id_priv->tos : default_roce_tos;
3043
+ mutex_unlock(&id_priv->qp_mutex);
27153044
27163045 work = kzalloc(sizeof *work, GFP_KERNEL);
27173046 if (!work)
....@@ -2748,14 +3077,32 @@
27483077 route->path_rec->traffic_class = tos;
27493078 route->path_rec->mtu = iboe_get_mtu(ndev->mtu);
27503079 route->path_rec->rate_selector = IB_SA_EQ;
2751
- route->path_rec->rate = iboe_get_rate(ndev);
3080
+ route->path_rec->rate = IB_RATE_PORT_CURRENT;
27523081 dev_put(ndev);
27533082 route->path_rec->packet_life_time_selector = IB_SA_EQ;
2754
- route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME;
3083
+ /* In case ACK timeout is set, use this value to calculate
3084
+ * PacketLifeTime. As per IBTA 12.7.34,
3085
+ * local ACK timeout = (2 * PacketLifeTime + Local CA’s ACK delay).
3086
+ * Assuming a negligible local ACK delay, we can use
3087
+ * PacketLifeTime = local ACK timeout/2
3088
+ * as a reasonable approximation for RoCE networks.
3089
+ */
3090
+ mutex_lock(&id_priv->qp_mutex);
3091
+ if (id_priv->timeout_set && id_priv->timeout)
3092
+ route->path_rec->packet_life_time = id_priv->timeout - 1;
3093
+ else
3094
+ route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME;
3095
+ mutex_unlock(&id_priv->qp_mutex);
3096
+
27553097 if (!route->path_rec->mtu) {
27563098 ret = -EINVAL;
27573099 goto err2;
27583100 }
3101
+
3102
+ if (rdma_protocol_roce_udp_encap(id_priv->id.device,
3103
+ id_priv->id.port_num))
3104
+ route->path_rec->flow_label =
3105
+ cma_get_roce_udp_flow_label(id_priv);
27593106
27603107 cma_init_resolve_route_work(work, id_priv);
27613108 queue_work(cma_wq, &work->work);
....@@ -2771,7 +3118,7 @@
27713118 return ret;
27723119 }
27733120
2774
-int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
3121
+int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms)
27753122 {
27763123 struct rdma_id_private *id_priv;
27773124 int ret;
....@@ -2780,13 +3127,13 @@
27803127 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY))
27813128 return -EINVAL;
27823129
2783
- atomic_inc(&id_priv->refcount);
3130
+ cma_id_get(id_priv);
27843131 if (rdma_cap_ib_sa(id->device, id->port_num))
27853132 ret = cma_resolve_ib_route(id_priv, timeout_ms);
27863133 else if (rdma_protocol_roce(id->device, id->port_num))
27873134 ret = cma_resolve_iboe_route(id_priv);
27883135 else if (rdma_protocol_iwarp(id->device, id->port_num))
2789
- ret = cma_resolve_iw_route(id_priv, timeout_ms);
3136
+ ret = cma_resolve_iw_route(id_priv);
27903137 else
27913138 ret = -ENOSYS;
27923139
....@@ -2796,7 +3143,7 @@
27963143 return 0;
27973144 err:
27983145 cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED);
2799
- cma_deref_id(id_priv);
3146
+ cma_id_put(id_priv);
28003147 return ret;
28013148 }
28023149 EXPORT_SYMBOL(rdma_resolve_route);
....@@ -2823,9 +3170,9 @@
28233170 struct cma_device *cma_dev, *cur_dev;
28243171 union ib_gid gid;
28253172 enum ib_port_state port_state;
3173
+ unsigned int p;
28263174 u16 pkey;
28273175 int ret;
2828
- u8 p;
28293176
28303177 cma_dev = NULL;
28313178 mutex_lock(&lock);
....@@ -2837,7 +3184,7 @@
28373184 if (!cma_dev)
28383185 cma_dev = cur_dev;
28393186
2840
- for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
3187
+ rdma_for_each_port (cur_dev->device, p) {
28413188 if (!ib_get_cached_port_state(cur_dev->device, p, &port_state) &&
28423189 port_state == IB_PORT_ACTIVE) {
28433190 cma_dev = cur_dev;
....@@ -2870,6 +3217,7 @@
28703217 ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
28713218 id_priv->id.port_num = p;
28723219 cma_attach_to_dev(id_priv, cma_dev);
3220
+ rdma_restrack_add(&id_priv->res);
28733221 cma_set_loopback(cma_src_addr(id_priv));
28743222 out:
28753223 mutex_unlock(&lock);
....@@ -2898,10 +3246,11 @@
28983246 memcpy(&old_addr, addr, rdma_addr_size(addr));
28993247 memcpy(addr, src_addr, rdma_addr_size(src_addr));
29003248 if (!status && !id_priv->cma_dev) {
2901
- status = cma_acquire_dev(id_priv, NULL);
3249
+ status = cma_acquire_dev_by_src_ip(id_priv);
29023250 if (status)
29033251 pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n",
29043252 status);
3253
+ rdma_restrack_add(&id_priv->res);
29053254 } else if (status) {
29063255 pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to resolve IP. status %d\n", status);
29073256 }
....@@ -2917,16 +3266,12 @@
29173266 } else
29183267 event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
29193268
2920
- if (id_priv->id.event_handler(&id_priv->id, &event)) {
2921
- cma_exch(id_priv, RDMA_CM_DESTROYING);
2922
- mutex_unlock(&id_priv->handler_mutex);
2923
- cma_deref_id(id_priv);
2924
- rdma_destroy_id(&id_priv->id);
3269
+ if (cma_cm_event_handler(id_priv, &event)) {
3270
+ destroy_id_handler_unlock(id_priv);
29253271 return;
29263272 }
29273273 out:
29283274 mutex_unlock(&id_priv->handler_mutex);
2929
- cma_deref_id(id_priv);
29303275 }
29313276
29323277 static int cma_resolve_loopback(struct rdma_id_private *id_priv)
....@@ -2948,8 +3293,7 @@
29483293 rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
29493294 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
29503295
2951
- cma_init_resolve_addr_work(work, id_priv);
2952
- queue_work(cma_wq, &work->work);
3296
+ enqueue_resolve_addr_work(work, id_priv);
29533297 return 0;
29543298 err:
29553299 kfree(work);
....@@ -2974,8 +3318,7 @@
29743318 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *)
29753319 &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr));
29763320
2977
- cma_init_resolve_addr_work(work, id_priv);
2978
- queue_work(cma_wq, &work->work);
3321
+ enqueue_resolve_addr_work(work, id_priv);
29793322 return 0;
29803323 err:
29813324 kfree(work);
....@@ -2985,54 +3328,106 @@
29853328 static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
29863329 const struct sockaddr *dst_addr)
29873330 {
2988
- if (!src_addr || !src_addr->sa_family) {
2989
- src_addr = (struct sockaddr *) &id->route.addr.src_addr;
2990
- src_addr->sa_family = dst_addr->sa_family;
2991
- if (IS_ENABLED(CONFIG_IPV6) &&
2992
- dst_addr->sa_family == AF_INET6) {
2993
- struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr;
2994
- struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr;
2995
- src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
2996
- if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
2997
- id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id;
2998
- } else if (dst_addr->sa_family == AF_IB) {
2999
- ((struct sockaddr_ib *) src_addr)->sib_pkey =
3000
- ((struct sockaddr_ib *) dst_addr)->sib_pkey;
3331
+ struct sockaddr_storage zero_sock = {};
3332
+
3333
+ if (src_addr && src_addr->sa_family)
3334
+ return rdma_bind_addr(id, src_addr);
3335
+
3336
+ /*
3337
+ * When the src_addr is not specified, automatically supply an any addr
3338
+ */
3339
+ zero_sock.ss_family = dst_addr->sa_family;
3340
+ if (IS_ENABLED(CONFIG_IPV6) && dst_addr->sa_family == AF_INET6) {
3341
+ struct sockaddr_in6 *src_addr6 =
3342
+ (struct sockaddr_in6 *)&zero_sock;
3343
+ struct sockaddr_in6 *dst_addr6 =
3344
+ (struct sockaddr_in6 *)dst_addr;
3345
+
3346
+ src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
3347
+ if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
3348
+ id->route.addr.dev_addr.bound_dev_if =
3349
+ dst_addr6->sin6_scope_id;
3350
+ } else if (dst_addr->sa_family == AF_IB) {
3351
+ ((struct sockaddr_ib *)&zero_sock)->sib_pkey =
3352
+ ((struct sockaddr_ib *)dst_addr)->sib_pkey;
3353
+ }
3354
+ return rdma_bind_addr(id, (struct sockaddr *)&zero_sock);
3355
+}
3356
+
3357
+/*
3358
+ * If required, resolve the source address for bind and leave the id_priv in
3359
+ * state RDMA_CM_ADDR_BOUND. This oddly uses the state to determine the prior
3360
+ * calls made by ULP, a previously bound ID will not be re-bound and src_addr is
3361
+ * ignored.
3362
+ */
3363
+static int resolve_prepare_src(struct rdma_id_private *id_priv,
3364
+ struct sockaddr *src_addr,
3365
+ const struct sockaddr *dst_addr)
3366
+{
3367
+ int ret;
3368
+
3369
+ memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
3370
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) {
3371
+ /* For a well behaved ULP state will be RDMA_CM_IDLE */
3372
+ ret = cma_bind_addr(&id_priv->id, src_addr, dst_addr);
3373
+ if (ret)
3374
+ goto err_dst;
3375
+ if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND,
3376
+ RDMA_CM_ADDR_QUERY))) {
3377
+ ret = -EINVAL;
3378
+ goto err_dst;
30013379 }
30023380 }
3003
- return rdma_bind_addr(id, src_addr);
3381
+
3382
+ if (cma_family(id_priv) != dst_addr->sa_family) {
3383
+ ret = -EINVAL;
3384
+ goto err_state;
3385
+ }
3386
+ return 0;
3387
+
3388
+err_state:
3389
+ cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
3390
+err_dst:
3391
+ memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
3392
+ return ret;
30043393 }
30053394
30063395 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
3007
- const struct sockaddr *dst_addr, int timeout_ms)
3396
+ const struct sockaddr *dst_addr, unsigned long timeout_ms)
30083397 {
3009
- struct rdma_id_private *id_priv;
3398
+ struct rdma_id_private *id_priv =
3399
+ container_of(id, struct rdma_id_private, id);
30103400 int ret;
30113401
3012
- id_priv = container_of(id, struct rdma_id_private, id);
3013
- if (id_priv->state == RDMA_CM_IDLE) {
3014
- ret = cma_bind_addr(id, src_addr, dst_addr);
3015
- if (ret)
3016
- return ret;
3017
- }
3402
+ ret = resolve_prepare_src(id_priv, src_addr, dst_addr);
3403
+ if (ret)
3404
+ return ret;
30183405
3019
- if (cma_family(id_priv) != dst_addr->sa_family)
3020
- return -EINVAL;
3021
-
3022
- if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY))
3023
- return -EINVAL;
3024
-
3025
- memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
3026
- atomic_inc(&id_priv->refcount);
30273406 if (cma_any_addr(dst_addr)) {
30283407 ret = cma_resolve_loopback(id_priv);
30293408 } else {
30303409 if (dst_addr->sa_family == AF_IB) {
30313410 ret = cma_resolve_ib_addr(id_priv);
30323411 } else {
3033
- ret = rdma_resolve_ip(cma_src_addr(id_priv),
3034
- dst_addr, &id->route.addr.dev_addr,
3035
- timeout_ms, addr_handler, id_priv);
3412
+ /*
3413
+ * The FSM can return back to RDMA_CM_ADDR_BOUND after
3414
+ * rdma_resolve_ip() is called, eg through the error
3415
+ * path in addr_handler(). If this happens the existing
3416
+ * request must be canceled before issuing a new one.
3417
+ * Since canceling a request is a bit slow and this
3418
+ * oddball path is rare, keep track once a request has
3419
+ * been issued. The track turns out to be a permanent
3420
+ * state since this is the only cancel as it is
3421
+ * immediately before rdma_resolve_ip().
3422
+ */
3423
+ if (id_priv->used_resolve_ip)
3424
+ rdma_addr_cancel(&id->route.addr.dev_addr);
3425
+ else
3426
+ id_priv->used_resolve_ip = 1;
3427
+ ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr,
3428
+ &id->route.addr.dev_addr,
3429
+ timeout_ms, addr_handler,
3430
+ false, id_priv);
30363431 }
30373432 }
30383433 if (ret)
....@@ -3041,7 +3436,6 @@
30413436 return 0;
30423437 err:
30433438 cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
3044
- cma_deref_id(id_priv);
30453439 return ret;
30463440 }
30473441 EXPORT_SYMBOL(rdma_resolve_addr);
....@@ -3054,7 +3448,8 @@
30543448
30553449 id_priv = container_of(id, struct rdma_id_private, id);
30563450 spin_lock_irqsave(&id_priv->lock, flags);
3057
- if (reuse || id_priv->state == RDMA_CM_IDLE) {
3451
+ if ((reuse && id_priv->state != RDMA_CM_LISTEN) ||
3452
+ id_priv->state == RDMA_CM_IDLE) {
30583453 id_priv->reuseaddr = reuse;
30593454 ret = 0;
30603455 } else {
....@@ -3135,7 +3530,7 @@
31353530 goto err;
31363531
31373532 bind_list->ps = ps;
3138
- bind_list->port = (unsigned short)ret;
3533
+ bind_list->port = snum;
31393534 cma_bind_port(bind_list, id_priv);
31403535 return 0;
31413536 err:
....@@ -3248,8 +3643,7 @@
32483643 if (id_priv == cur_id)
32493644 continue;
32503645
3251
- if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr &&
3252
- cur_id->reuseaddr)
3646
+ if (reuseaddr && cur_id->reuseaddr)
32533647 continue;
32543648
32553649 cur_addr = cma_src_addr(cur_id);
....@@ -3287,18 +3681,6 @@
32873681 if (!ret)
32883682 cma_bind_port(bind_list, id_priv);
32893683 }
3290
- return ret;
3291
-}
3292
-
3293
-static int cma_bind_listen(struct rdma_id_private *id_priv)
3294
-{
3295
- struct rdma_bind_list *bind_list = id_priv->bind_list;
3296
- int ret = 0;
3297
-
3298
- mutex_lock(&lock);
3299
- if (bind_list->owners.first->next)
3300
- ret = cma_check_port(bind_list, id_priv, 0);
3301
- mutex_unlock(&lock);
33023684 return ret;
33033685 }
33043686
....@@ -3395,28 +3777,41 @@
33953777
33963778 int rdma_listen(struct rdma_cm_id *id, int backlog)
33973779 {
3398
- struct rdma_id_private *id_priv;
3780
+ struct rdma_id_private *id_priv =
3781
+ container_of(id, struct rdma_id_private, id);
33993782 int ret;
34003783
3401
- id_priv = container_of(id, struct rdma_id_private, id);
3402
- if (id_priv->state == RDMA_CM_IDLE) {
3403
- id->route.addr.src_addr.ss_family = AF_INET;
3404
- ret = rdma_bind_addr(id, cma_src_addr(id_priv));
3784
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) {
3785
+ struct sockaddr_in any_in = {
3786
+ .sin_family = AF_INET,
3787
+ .sin_addr.s_addr = htonl(INADDR_ANY),
3788
+ };
3789
+
3790
+ /* For a well behaved ULP state will be RDMA_CM_IDLE */
3791
+ ret = rdma_bind_addr(id, (struct sockaddr *)&any_in);
34053792 if (ret)
34063793 return ret;
3794
+ if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND,
3795
+ RDMA_CM_LISTEN)))
3796
+ return -EINVAL;
34073797 }
34083798
3409
- if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN))
3410
- return -EINVAL;
3411
-
3799
+ /*
3800
+ * Once the ID reaches RDMA_CM_LISTEN it is not allowed to be reusable
3801
+ * any more, and has to be unique in the bind list.
3802
+ */
34123803 if (id_priv->reuseaddr) {
3413
- ret = cma_bind_listen(id_priv);
3804
+ mutex_lock(&lock);
3805
+ ret = cma_check_port(id_priv->bind_list, id_priv, 0);
3806
+ if (!ret)
3807
+ id_priv->reuseaddr = 0;
3808
+ mutex_unlock(&lock);
34143809 if (ret)
34153810 goto err;
34163811 }
34173812
34183813 id_priv->backlog = backlog;
3419
- if (id->device) {
3814
+ if (id_priv->cma_dev) {
34203815 if (rdma_cap_ib_cm(id->device, 1)) {
34213816 ret = cma_ib_listen(id_priv);
34223817 if (ret)
....@@ -3429,12 +3824,19 @@
34293824 ret = -ENOSYS;
34303825 goto err;
34313826 }
3432
- } else
3433
- cma_listen_on_all(id_priv);
3827
+ } else {
3828
+ ret = cma_listen_on_all(id_priv);
3829
+ if (ret)
3830
+ goto err;
3831
+ }
34343832
34353833 return 0;
34363834 err:
34373835 id_priv->backlog = 0;
3836
+ /*
3837
+ * All the failure paths that lead here will not allow the req_handler's
3838
+ * to have run.
3839
+ */
34383840 cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND);
34393841 return ret;
34403842 }
....@@ -3464,7 +3866,7 @@
34643866 if (ret)
34653867 goto err1;
34663868
3467
- ret = cma_acquire_dev(id_priv, NULL);
3869
+ ret = cma_acquire_dev_by_src_ip(id_priv);
34683870 if (ret)
34693871 goto err1;
34703872 }
....@@ -3487,9 +3889,10 @@
34873889 if (ret)
34883890 goto err2;
34893891
3892
+ if (!cma_any_addr(addr))
3893
+ rdma_restrack_add(&id_priv->res);
34903894 return 0;
34913895 err2:
3492
- rdma_restrack_del(&id_priv->res);
34933896 if (id_priv->cma_dev)
34943897 cma_release_dev(id_priv);
34953898 err1:
....@@ -3535,10 +3938,10 @@
35353938 struct rdma_cm_event event = {};
35363939 const struct ib_cm_sidr_rep_event_param *rep =
35373940 &ib_event->param.sidr_rep_rcvd;
3538
- int ret = 0;
3941
+ int ret;
35393942
35403943 mutex_lock(&id_priv->handler_mutex);
3541
- if (id_priv->state != RDMA_CM_CONNECT)
3944
+ if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT)
35423945 goto out;
35433946
35443947 switch (ib_event->event) {
....@@ -3579,20 +3982,18 @@
35793982 goto out;
35803983 }
35813984
3582
- ret = id_priv->id.event_handler(&id_priv->id, &event);
3985
+ ret = cma_cm_event_handler(id_priv, &event);
35833986
35843987 rdma_destroy_ah_attr(&event.param.ud.ah_attr);
35853988 if (ret) {
35863989 /* Destroy the CM ID by returning a non-zero value. */
35873990 id_priv->cm_id.ib = NULL;
3588
- cma_exch(id_priv, RDMA_CM_DESTROYING);
3589
- mutex_unlock(&id_priv->handler_mutex);
3590
- rdma_destroy_id(&id_priv->id);
3991
+ destroy_id_handler_unlock(id_priv);
35913992 return ret;
35923993 }
35933994 out:
35943995 mutex_unlock(&id_priv->handler_mutex);
3595
- return ret;
3996
+ return 0;
35963997 }
35973998
35983999 static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
....@@ -3643,6 +4044,7 @@
36434044 req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
36444045 req.max_cm_retries = CMA_MAX_CM_RETRIES;
36454046
4047
+ trace_cm_send_sidr_req(id_priv);
36464048 ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req);
36474049 if (ret) {
36484050 ib_destroy_cm_id(id_priv->cm_id.ib);
....@@ -3715,7 +4117,10 @@
37154117 req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
37164118 req.max_cm_retries = CMA_MAX_CM_RETRIES;
37174119 req.srq = id_priv->srq ? 1 : 0;
4120
+ req.ece.vendor_id = id_priv->ece.vendor_id;
4121
+ req.ece.attr_mod = id_priv->ece.attr_mod;
37184122
4123
+ trace_cm_send_req(id_priv);
37194124 ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
37204125 out:
37214126 if (ret && !IS_ERR(id)) {
....@@ -3738,7 +4143,11 @@
37384143 if (IS_ERR(cm_id))
37394144 return PTR_ERR(cm_id);
37404145
4146
+ mutex_lock(&id_priv->qp_mutex);
37414147 cm_id->tos = id_priv->tos;
4148
+ cm_id->tos_set = id_priv->tos_set;
4149
+ mutex_unlock(&id_priv->qp_mutex);
4150
+
37424151 id_priv->cm_id.iw = cm_id;
37434152
37444153 memcpy(&cm_id->local_addr, cma_src_addr(id_priv),
....@@ -3769,12 +4178,21 @@
37694178 return ret;
37704179 }
37714180
3772
-int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
4181
+/**
4182
+ * rdma_connect_locked - Initiate an active connection request.
4183
+ * @id: Connection identifier to connect.
4184
+ * @conn_param: Connection information used for connected QPs.
4185
+ *
4186
+ * Same as rdma_connect() but can only be called from the
4187
+ * RDMA_CM_EVENT_ROUTE_RESOLVED handler callback.
4188
+ */
4189
+int rdma_connect_locked(struct rdma_cm_id *id,
4190
+ struct rdma_conn_param *conn_param)
37734191 {
3774
- struct rdma_id_private *id_priv;
4192
+ struct rdma_id_private *id_priv =
4193
+ container_of(id, struct rdma_id_private, id);
37754194 int ret;
37764195
3777
- id_priv = container_of(id, struct rdma_id_private, id);
37784196 if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT))
37794197 return -EINVAL;
37804198
....@@ -3793,14 +4211,59 @@
37934211 else
37944212 ret = -ENOSYS;
37954213 if (ret)
3796
- goto err;
3797
-
4214
+ goto err_state;
37984215 return 0;
3799
-err:
4216
+err_state:
38004217 cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED);
38014218 return ret;
38024219 }
4220
+EXPORT_SYMBOL(rdma_connect_locked);
4221
+
4222
+/**
4223
+ * rdma_connect - Initiate an active connection request.
4224
+ * @id: Connection identifier to connect.
4225
+ * @conn_param: Connection information used for connected QPs.
4226
+ *
4227
+ * Users must have resolved a route for the rdma_cm_id to connect with by having
4228
+ * called rdma_resolve_route before calling this routine.
4229
+ *
4230
+ * This call will either connect to a remote QP or obtain remote QP information
4231
+ * for unconnected rdma_cm_id's. The actual operation is based on the
4232
+ * rdma_cm_id's port space.
4233
+ */
4234
+int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
4235
+{
4236
+ struct rdma_id_private *id_priv =
4237
+ container_of(id, struct rdma_id_private, id);
4238
+ int ret;
4239
+
4240
+ mutex_lock(&id_priv->handler_mutex);
4241
+ ret = rdma_connect_locked(id, conn_param);
4242
+ mutex_unlock(&id_priv->handler_mutex);
4243
+ return ret;
4244
+}
38034245 EXPORT_SYMBOL(rdma_connect);
4246
+
4247
+/**
4248
+ * rdma_connect_ece - Initiate an active connection request with ECE data.
4249
+ * @id: Connection identifier to connect.
4250
+ * @conn_param: Connection information used for connected QPs.
4251
+ * @ece: ECE parameters
4252
+ *
4253
+ * See rdma_connect() explanation.
4254
+ */
4255
+int rdma_connect_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
4256
+ struct rdma_ucm_ece *ece)
4257
+{
4258
+ struct rdma_id_private *id_priv =
4259
+ container_of(id, struct rdma_id_private, id);
4260
+
4261
+ id_priv->ece.vendor_id = ece->vendor_id;
4262
+ id_priv->ece.attr_mod = ece->attr_mod;
4263
+
4264
+ return rdma_connect(id, conn_param);
4265
+}
4266
+EXPORT_SYMBOL(rdma_connect_ece);
38044267
38054268 static int cma_accept_ib(struct rdma_id_private *id_priv,
38064269 struct rdma_conn_param *conn_param)
....@@ -3827,7 +4290,10 @@
38274290 rep.flow_control = conn_param->flow_control;
38284291 rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
38294292 rep.srq = id_priv->srq ? 1 : 0;
4293
+ rep.ece.vendor_id = id_priv->ece.vendor_id;
4294
+ rep.ece.attr_mod = id_priv->ece.attr_mod;
38304295
4296
+ trace_cm_send_rep(id_priv);
38314297 ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
38324298 out:
38334299 return ret;
....@@ -3868,32 +4334,53 @@
38684334 memset(&rep, 0, sizeof rep);
38694335 rep.status = status;
38704336 if (status == IB_SIDR_SUCCESS) {
3871
- ret = cma_set_qkey(id_priv, qkey);
4337
+ if (qkey)
4338
+ ret = cma_set_qkey(id_priv, qkey);
4339
+ else
4340
+ ret = cma_set_default_qkey(id_priv);
38724341 if (ret)
38734342 return ret;
38744343 rep.qp_num = id_priv->qp_num;
38754344 rep.qkey = id_priv->qkey;
4345
+
4346
+ rep.ece.vendor_id = id_priv->ece.vendor_id;
4347
+ rep.ece.attr_mod = id_priv->ece.attr_mod;
38764348 }
4349
+
38774350 rep.private_data = private_data;
38784351 rep.private_data_len = private_data_len;
38794352
4353
+ trace_cm_send_sidr_rep(id_priv);
38804354 return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
38814355 }
38824356
3883
-int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
3884
- const char *caller)
4357
+/**
4358
+ * rdma_accept - Called to accept a connection request or response.
4359
+ * @id: Connection identifier associated with the request.
4360
+ * @conn_param: Information needed to establish the connection. This must be
4361
+ * provided if accepting a connection request. If accepting a connection
4362
+ * response, this parameter must be NULL.
4363
+ *
4364
+ * Typically, this routine is only called by the listener to accept a connection
4365
+ * request. It must also be called on the active side of a connection if the
4366
+ * user is performing their own QP transitions.
4367
+ *
4368
+ * In the case of error, a reject message is sent to the remote side and the
4369
+ * state of the qp associated with the id is modified to error, such that any
4370
+ * previously posted receive buffers would be flushed.
4371
+ *
4372
+ * This function is for use by kernel ULPs and must be called from under the
4373
+ * handler callback.
4374
+ */
4375
+int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
38854376 {
3886
- struct rdma_id_private *id_priv;
4377
+ struct rdma_id_private *id_priv =
4378
+ container_of(id, struct rdma_id_private, id);
38874379 int ret;
38884380
3889
- id_priv = container_of(id, struct rdma_id_private, id);
4381
+ lockdep_assert_held(&id_priv->handler_mutex);
38904382
3891
- if (caller)
3892
- id_priv->res.kern_name = caller;
3893
- else
3894
- rdma_restrack_set_task(&id_priv->res, current);
3895
-
3896
- if (!cma_comp(id_priv, RDMA_CM_CONNECT))
4383
+ if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT)
38974384 return -EINVAL;
38984385
38994386 if (!id->qp && conn_param) {
....@@ -3928,10 +4415,41 @@
39284415 return 0;
39294416 reject:
39304417 cma_modify_qp_err(id_priv);
3931
- rdma_reject(id, NULL, 0);
4418
+ rdma_reject(id, NULL, 0, IB_CM_REJ_CONSUMER_DEFINED);
39324419 return ret;
39334420 }
3934
-EXPORT_SYMBOL(__rdma_accept);
4421
+EXPORT_SYMBOL(rdma_accept);
4422
+
4423
+int rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
4424
+ struct rdma_ucm_ece *ece)
4425
+{
4426
+ struct rdma_id_private *id_priv =
4427
+ container_of(id, struct rdma_id_private, id);
4428
+
4429
+ id_priv->ece.vendor_id = ece->vendor_id;
4430
+ id_priv->ece.attr_mod = ece->attr_mod;
4431
+
4432
+ return rdma_accept(id, conn_param);
4433
+}
4434
+EXPORT_SYMBOL(rdma_accept_ece);
4435
+
4436
+void rdma_lock_handler(struct rdma_cm_id *id)
4437
+{
4438
+ struct rdma_id_private *id_priv =
4439
+ container_of(id, struct rdma_id_private, id);
4440
+
4441
+ mutex_lock(&id_priv->handler_mutex);
4442
+}
4443
+EXPORT_SYMBOL(rdma_lock_handler);
4444
+
4445
+void rdma_unlock_handler(struct rdma_cm_id *id)
4446
+{
4447
+ struct rdma_id_private *id_priv =
4448
+ container_of(id, struct rdma_id_private, id);
4449
+
4450
+ mutex_unlock(&id_priv->handler_mutex);
4451
+}
4452
+EXPORT_SYMBOL(rdma_unlock_handler);
39354453
39364454 int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
39374455 {
....@@ -3955,7 +4473,7 @@
39554473 EXPORT_SYMBOL(rdma_notify);
39564474
39574475 int rdma_reject(struct rdma_cm_id *id, const void *private_data,
3958
- u8 private_data_len)
4476
+ u8 private_data_len, u8 reason)
39594477 {
39604478 struct rdma_id_private *id_priv;
39614479 int ret;
....@@ -3965,13 +4483,14 @@
39654483 return -EINVAL;
39664484
39674485 if (rdma_cap_ib_cm(id->device, id->port_num)) {
3968
- if (id->qp_type == IB_QPT_UD)
4486
+ if (id->qp_type == IB_QPT_UD) {
39694487 ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0,
39704488 private_data, private_data_len);
3971
- else
3972
- ret = ib_send_cm_rej(id_priv->cm_id.ib,
3973
- IB_CM_REJ_CONSUMER_DEFINED, NULL,
3974
- 0, private_data, private_data_len);
4489
+ } else {
4490
+ trace_cm_send_rej(id_priv);
4491
+ ret = ib_send_cm_rej(id_priv->cm_id.ib, reason, NULL, 0,
4492
+ private_data, private_data_len);
4493
+ }
39754494 } else if (rdma_cap_iw_cm(id->device, id->port_num)) {
39764495 ret = iw_cm_reject(id_priv->cm_id.iw,
39774496 private_data, private_data_len);
....@@ -3996,8 +4515,13 @@
39964515 if (ret)
39974516 goto out;
39984517 /* Initiate or respond to a disconnect. */
3999
- if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
4000
- ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
4518
+ trace_cm_disconnect(id_priv);
4519
+ if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) {
4520
+ if (!ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0))
4521
+ trace_cm_sent_drep(id_priv);
4522
+ } else {
4523
+ trace_cm_sent_dreq(id_priv);
4524
+ }
40014525 } else if (rdma_cap_iw_cm(id->device, id->port_num)) {
40024526 ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
40034527 } else
....@@ -4008,60 +4532,69 @@
40084532 }
40094533 EXPORT_SYMBOL(rdma_disconnect);
40104534
4535
+static void cma_make_mc_event(int status, struct rdma_id_private *id_priv,
4536
+ struct ib_sa_multicast *multicast,
4537
+ struct rdma_cm_event *event,
4538
+ struct cma_multicast *mc)
4539
+{
4540
+ struct rdma_dev_addr *dev_addr;
4541
+ enum ib_gid_type gid_type;
4542
+ struct net_device *ndev;
4543
+
4544
+ if (status)
4545
+ pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n",
4546
+ status);
4547
+
4548
+ event->status = status;
4549
+ event->param.ud.private_data = mc->context;
4550
+ if (status) {
4551
+ event->event = RDMA_CM_EVENT_MULTICAST_ERROR;
4552
+ return;
4553
+ }
4554
+
4555
+ dev_addr = &id_priv->id.route.addr.dev_addr;
4556
+ ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
4557
+ gid_type =
4558
+ id_priv->cma_dev
4559
+ ->default_gid_type[id_priv->id.port_num -
4560
+ rdma_start_port(
4561
+ id_priv->cma_dev->device)];
4562
+
4563
+ event->event = RDMA_CM_EVENT_MULTICAST_JOIN;
4564
+ if (ib_init_ah_from_mcmember(id_priv->id.device, id_priv->id.port_num,
4565
+ &multicast->rec, ndev, gid_type,
4566
+ &event->param.ud.ah_attr)) {
4567
+ event->event = RDMA_CM_EVENT_MULTICAST_ERROR;
4568
+ goto out;
4569
+ }
4570
+
4571
+ event->param.ud.qp_num = 0xFFFFFF;
4572
+ event->param.ud.qkey = id_priv->qkey;
4573
+
4574
+out:
4575
+ if (ndev)
4576
+ dev_put(ndev);
4577
+}
4578
+
40114579 static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
40124580 {
4013
- struct rdma_id_private *id_priv;
40144581 struct cma_multicast *mc = multicast->context;
4582
+ struct rdma_id_private *id_priv = mc->id_priv;
40154583 struct rdma_cm_event event = {};
40164584 int ret = 0;
40174585
4018
- id_priv = mc->id_priv;
40194586 mutex_lock(&id_priv->handler_mutex);
4020
- if (id_priv->state != RDMA_CM_ADDR_BOUND &&
4021
- id_priv->state != RDMA_CM_ADDR_RESOLVED)
4587
+ if (READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL ||
4588
+ READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING)
40224589 goto out;
40234590
4024
- if (!status)
4025
- status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
4026
- else
4027
- pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n",
4028
- status);
4029
- event.status = status;
4030
- event.param.ud.private_data = mc->context;
4031
- if (!status) {
4032
- struct rdma_dev_addr *dev_addr =
4033
- &id_priv->id.route.addr.dev_addr;
4034
- struct net_device *ndev =
4035
- dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
4036
- enum ib_gid_type gid_type =
4037
- id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
4038
- rdma_start_port(id_priv->cma_dev->device)];
4039
-
4040
- event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
4041
- ret = ib_init_ah_from_mcmember(id_priv->id.device,
4042
- id_priv->id.port_num,
4043
- &multicast->rec,
4044
- ndev, gid_type,
4045
- &event.param.ud.ah_attr);
4046
- if (ret)
4047
- event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
4048
-
4049
- event.param.ud.qp_num = 0xFFFFFF;
4050
- event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
4051
- if (ndev)
4052
- dev_put(ndev);
4053
- } else
4054
- event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
4055
-
4056
- ret = id_priv->id.event_handler(&id_priv->id, &event);
4057
-
4058
- rdma_destroy_ah_attr(&event.param.ud.ah_attr);
4059
- if (ret) {
4060
- cma_exch(id_priv, RDMA_CM_DESTROYING);
4061
- mutex_unlock(&id_priv->handler_mutex);
4062
- rdma_destroy_id(&id_priv->id);
4063
- return 0;
4591
+ ret = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
4592
+ if (!ret) {
4593
+ cma_make_mc_event(status, id_priv, multicast, &event, mc);
4594
+ ret = cma_cm_event_handler(id_priv, &event);
40644595 }
4596
+ rdma_destroy_ah_attr(&event.param.ud.ah_attr);
4597
+ WARN_ON(ret);
40654598
40664599 out:
40674600 mutex_unlock(&id_priv->handler_mutex);
....@@ -4112,9 +4645,11 @@
41124645 if (ret)
41134646 return ret;
41144647
4115
- ret = cma_set_qkey(id_priv, 0);
4116
- if (ret)
4117
- return ret;
4648
+ if (!id_priv->qkey) {
4649
+ ret = cma_set_default_qkey(id_priv);
4650
+ if (ret)
4651
+ return ret;
4652
+ }
41184653
41194654 cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
41204655 rec.qkey = cpu_to_be32(id_priv->qkey);
....@@ -4126,9 +4661,10 @@
41264661 (!ib_sa_sendonly_fullmem_support(&sa_client,
41274662 id_priv->id.device,
41284663 id_priv->id.port_num))) {
4129
- pr_warn("RDMA CM: %s port %u Unable to multicast join\n"
4130
- "RDMA CM: SM doesn't support Send Only Full Member option\n",
4131
- id_priv->id.device->name, id_priv->id.port_num);
4664
+ dev_warn(
4665
+ &id_priv->id.device->dev,
4666
+ "RDMA CM: port %u Unable to multicast join: SM doesn't support Send Only Full Member option\n",
4667
+ id_priv->id.port_num);
41324668 return -EOPNOTSUPP;
41334669 }
41344670
....@@ -4145,23 +4681,10 @@
41454681 IB_SA_MCMEMBER_REC_MTU |
41464682 IB_SA_MCMEMBER_REC_HOP_LIMIT;
41474683
4148
- mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
4149
- id_priv->id.port_num, &rec,
4150
- comp_mask, GFP_KERNEL,
4151
- cma_ib_mc_handler, mc);
4152
- return PTR_ERR_OR_ZERO(mc->multicast.ib);
4153
-}
4154
-
4155
-static void iboe_mcast_work_handler(struct work_struct *work)
4156
-{
4157
- struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work);
4158
- struct cma_multicast *mc = mw->mc;
4159
- struct ib_sa_multicast *m = mc->multicast.ib;
4160
-
4161
- mc->multicast.ib->context = mc;
4162
- cma_ib_mc_handler(0, m);
4163
- kref_put(&mc->mcref, release_mc);
4164
- kfree(mw);
4684
+ mc->sa_mc = ib_sa_join_multicast(&sa_client, id_priv->id.device,
4685
+ id_priv->id.port_num, &rec, comp_mask,
4686
+ GFP_KERNEL, cma_ib_mc_handler, mc);
4687
+ return PTR_ERR_OR_ZERO(mc->sa_mc);
41654688 }
41664689
41674690 static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
....@@ -4196,52 +4719,38 @@
41964719 static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
41974720 struct cma_multicast *mc)
41984721 {
4199
- struct iboe_mcast_work *work;
42004722 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
42014723 int err = 0;
42024724 struct sockaddr *addr = (struct sockaddr *)&mc->addr;
42034725 struct net_device *ndev = NULL;
4726
+ struct ib_sa_multicast ib = {};
42044727 enum ib_gid_type gid_type;
42054728 bool send_only;
42064729
42074730 send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN);
42084731
4209
- if (cma_zero_addr((struct sockaddr *)&mc->addr))
4732
+ if (cma_zero_addr(addr))
42104733 return -EINVAL;
4211
-
4212
- work = kzalloc(sizeof *work, GFP_KERNEL);
4213
- if (!work)
4214
- return -ENOMEM;
4215
-
4216
- mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL);
4217
- if (!mc->multicast.ib) {
4218
- err = -ENOMEM;
4219
- goto out1;
4220
- }
42214734
42224735 gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
42234736 rdma_start_port(id_priv->cma_dev->device)];
4224
- cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid, gid_type);
4737
+ cma_iboe_set_mgid(addr, &ib.rec.mgid, gid_type);
42254738
4226
- mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff);
4227
- if (id_priv->id.ps == RDMA_PS_UDP)
4228
- mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
4229
-
4739
+ ib.rec.pkey = cpu_to_be16(0xffff);
42304740 if (dev_addr->bound_dev_if)
42314741 ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
4232
- if (!ndev) {
4233
- err = -ENODEV;
4234
- goto out2;
4235
- }
4236
- mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
4237
- mc->multicast.ib->rec.hop_limit = 1;
4238
- mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
4742
+ if (!ndev)
4743
+ return -ENODEV;
4744
+
4745
+ ib.rec.rate = IB_RATE_PORT_CURRENT;
4746
+ ib.rec.hop_limit = 1;
4747
+ ib.rec.mtu = iboe_get_mtu(ndev->mtu);
42394748
42404749 if (addr->sa_family == AF_INET) {
42414750 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
4242
- mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
4751
+ ib.rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
42434752 if (!send_only) {
4244
- err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
4753
+ err = cma_igmp_send(ndev, &ib.rec.mgid,
42454754 true);
42464755 }
42474756 }
....@@ -4250,32 +4759,25 @@
42504759 err = -ENOTSUPP;
42514760 }
42524761 dev_put(ndev);
4253
- if (err || !mc->multicast.ib->rec.mtu) {
4254
- if (!err)
4255
- err = -EINVAL;
4256
- goto out2;
4257
- }
4762
+ if (err || !ib.rec.mtu)
4763
+ return err ?: -EINVAL;
4764
+
4765
+ if (!id_priv->qkey)
4766
+ cma_set_default_qkey(id_priv);
4767
+
42584768 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
4259
- &mc->multicast.ib->rec.port_gid);
4260
- work->id = id_priv;
4261
- work->mc = mc;
4262
- INIT_WORK(&work->work, iboe_mcast_work_handler);
4263
- kref_get(&mc->mcref);
4264
- queue_work(cma_wq, &work->work);
4265
-
4769
+ &ib.rec.port_gid);
4770
+ INIT_WORK(&mc->iboe_join.work, cma_iboe_join_work_handler);
4771
+ cma_make_mc_event(0, id_priv, &ib, &mc->iboe_join.event, mc);
4772
+ queue_work(cma_wq, &mc->iboe_join.work);
42664773 return 0;
4267
-
4268
-out2:
4269
- kfree(mc->multicast.ib);
4270
-out1:
4271
- kfree(work);
4272
- return err;
42734774 }
42744775
42754776 int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
42764777 u8 join_state, void *context)
42774778 {
4278
- struct rdma_id_private *id_priv;
4779
+ struct rdma_id_private *id_priv =
4780
+ container_of(id, struct rdma_id_private, id);
42794781 struct cma_multicast *mc;
42804782 int ret;
42814783
....@@ -4283,15 +4785,15 @@
42834785 if (WARN_ON(id->qp))
42844786 return -EINVAL;
42854787
4286
- if (!id->device)
4788
+ /* ULP is calling this wrong. */
4789
+ if (!id->device || (READ_ONCE(id_priv->state) != RDMA_CM_ADDR_BOUND &&
4790
+ READ_ONCE(id_priv->state) != RDMA_CM_ADDR_RESOLVED))
42874791 return -EINVAL;
42884792
4289
- id_priv = container_of(id, struct rdma_id_private, id);
4290
- if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) &&
4291
- !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED))
4793
+ if (id_priv->id.qp_type != IB_QPT_UD)
42924794 return -EINVAL;
42934795
4294
- mc = kmalloc(sizeof *mc, GFP_KERNEL);
4796
+ mc = kzalloc(sizeof(*mc), GFP_KERNEL);
42954797 if (!mc)
42964798 return -ENOMEM;
42974799
....@@ -4301,7 +4803,6 @@
43014803 mc->join_state = join_state;
43024804
43034805 if (rdma_protocol_roce(id->device, id->port_num)) {
4304
- kref_init(&mc->mcref);
43054806 ret = cma_iboe_join_multicast(id_priv, mc);
43064807 if (ret)
43074808 goto out_err;
....@@ -4349,7 +4850,7 @@
43494850 static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv)
43504851 {
43514852 struct rdma_dev_addr *dev_addr;
4352
- struct cma_ndev_work *work;
4853
+ struct cma_work *work;
43534854
43544855 dev_addr = &id_priv->id.route.addr.dev_addr;
43554856
....@@ -4362,10 +4863,10 @@
43624863 if (!work)
43634864 return -ENOMEM;
43644865
4365
- INIT_WORK(&work->work, cma_ndev_work_handler);
4866
+ INIT_WORK(&work->work, cma_work_handler);
43664867 work->id = id_priv;
43674868 work->event.event = RDMA_CM_EVENT_ADDR_CHANGE;
4368
- atomic_inc(&id_priv->refcount);
4869
+ cma_id_get(id_priv);
43694870 queue_work(cma_wq, &work->work);
43704871 }
43714872
....@@ -4403,31 +4904,99 @@
44034904 .notifier_call = cma_netdev_callback
44044905 };
44054906
4406
-static void cma_add_one(struct ib_device *device)
4907
+static void cma_send_device_removal_put(struct rdma_id_private *id_priv)
44074908 {
4909
+ struct rdma_cm_event event = { .event = RDMA_CM_EVENT_DEVICE_REMOVAL };
4910
+ enum rdma_cm_state state;
4911
+ unsigned long flags;
4912
+
4913
+ mutex_lock(&id_priv->handler_mutex);
4914
+ /* Record that we want to remove the device */
4915
+ spin_lock_irqsave(&id_priv->lock, flags);
4916
+ state = id_priv->state;
4917
+ if (state == RDMA_CM_DESTROYING || state == RDMA_CM_DEVICE_REMOVAL) {
4918
+ spin_unlock_irqrestore(&id_priv->lock, flags);
4919
+ mutex_unlock(&id_priv->handler_mutex);
4920
+ cma_id_put(id_priv);
4921
+ return;
4922
+ }
4923
+ id_priv->state = RDMA_CM_DEVICE_REMOVAL;
4924
+ spin_unlock_irqrestore(&id_priv->lock, flags);
4925
+
4926
+ if (cma_cm_event_handler(id_priv, &event)) {
4927
+ /*
4928
+ * At this point the ULP promises it won't call
4929
+ * rdma_destroy_id() concurrently
4930
+ */
4931
+ cma_id_put(id_priv);
4932
+ mutex_unlock(&id_priv->handler_mutex);
4933
+ trace_cm_id_destroy(id_priv);
4934
+ _destroy_id(id_priv, state);
4935
+ return;
4936
+ }
4937
+ mutex_unlock(&id_priv->handler_mutex);
4938
+
4939
+ /*
4940
+ * If this races with destroy then the thread that first assigns state
4941
+ * to a destroying does the cancel.
4942
+ */
4943
+ cma_cancel_operation(id_priv, state);
4944
+ cma_id_put(id_priv);
4945
+}
4946
+
4947
+static void cma_process_remove(struct cma_device *cma_dev)
4948
+{
4949
+ mutex_lock(&lock);
4950
+ while (!list_empty(&cma_dev->id_list)) {
4951
+ struct rdma_id_private *id_priv = list_first_entry(
4952
+ &cma_dev->id_list, struct rdma_id_private, list);
4953
+
4954
+ list_del(&id_priv->listen_list);
4955
+ list_del_init(&id_priv->list);
4956
+ cma_id_get(id_priv);
4957
+ mutex_unlock(&lock);
4958
+
4959
+ cma_send_device_removal_put(id_priv);
4960
+
4961
+ mutex_lock(&lock);
4962
+ }
4963
+ mutex_unlock(&lock);
4964
+
4965
+ cma_dev_put(cma_dev);
4966
+ wait_for_completion(&cma_dev->comp);
4967
+}
4968
+
4969
+static int cma_add_one(struct ib_device *device)
4970
+{
4971
+ struct rdma_id_private *to_destroy;
44084972 struct cma_device *cma_dev;
44094973 struct rdma_id_private *id_priv;
44104974 unsigned int i;
44114975 unsigned long supported_gids = 0;
4976
+ int ret;
44124977
4413
- cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
4978
+ cma_dev = kmalloc(sizeof(*cma_dev), GFP_KERNEL);
44144979 if (!cma_dev)
4415
- return;
4980
+ return -ENOMEM;
44164981
44174982 cma_dev->device = device;
44184983 cma_dev->default_gid_type = kcalloc(device->phys_port_cnt,
44194984 sizeof(*cma_dev->default_gid_type),
44204985 GFP_KERNEL);
4421
- if (!cma_dev->default_gid_type)
4986
+ if (!cma_dev->default_gid_type) {
4987
+ ret = -ENOMEM;
44224988 goto free_cma_dev;
4989
+ }
44234990
44244991 cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt,
44254992 sizeof(*cma_dev->default_roce_tos),
44264993 GFP_KERNEL);
4427
- if (!cma_dev->default_roce_tos)
4994
+ if (!cma_dev->default_roce_tos) {
4995
+ ret = -ENOMEM;
44284996 goto free_gid_type;
4997
+ }
44294998
4430
- for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
4999
+ rdma_for_each_port (device, i) {
44315000 supported_gids = roce_gid_type_mask_support(device, i);
44325001 WARN_ON(!supported_gids);
44335002 if (supported_gids & (1 << CMA_PREFERRED_ROCE_GID_TYPE))
....@@ -4440,86 +5009,42 @@
44405009 }
44415010
44425011 init_completion(&cma_dev->comp);
4443
- atomic_set(&cma_dev->refcount, 1);
5012
+ refcount_set(&cma_dev->refcount, 1);
44445013 INIT_LIST_HEAD(&cma_dev->id_list);
44455014 ib_set_client_data(device, &cma_client, cma_dev);
44465015
44475016 mutex_lock(&lock);
44485017 list_add_tail(&cma_dev->list, &dev_list);
4449
- list_for_each_entry(id_priv, &listen_any_list, list)
4450
- cma_listen_on_dev(id_priv, cma_dev);
5018
+ list_for_each_entry(id_priv, &listen_any_list, list) {
5019
+ ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy);
5020
+ if (ret)
5021
+ goto free_listen;
5022
+ }
44515023 mutex_unlock(&lock);
44525024
4453
- return;
5025
+ trace_cm_add_one(device);
5026
+ return 0;
44545027
5028
+free_listen:
5029
+ list_del(&cma_dev->list);
5030
+ mutex_unlock(&lock);
5031
+
5032
+ /* cma_process_remove() will delete to_destroy */
5033
+ cma_process_remove(cma_dev);
5034
+ kfree(cma_dev->default_roce_tos);
44555035 free_gid_type:
44565036 kfree(cma_dev->default_gid_type);
44575037
44585038 free_cma_dev:
44595039 kfree(cma_dev);
4460
-
4461
- return;
4462
-}
4463
-
4464
-static int cma_remove_id_dev(struct rdma_id_private *id_priv)
4465
-{
4466
- struct rdma_cm_event event = {};
4467
- enum rdma_cm_state state;
4468
- int ret = 0;
4469
-
4470
- /* Record that we want to remove the device */
4471
- state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL);
4472
- if (state == RDMA_CM_DESTROYING)
4473
- return 0;
4474
-
4475
- cma_cancel_operation(id_priv, state);
4476
- mutex_lock(&id_priv->handler_mutex);
4477
-
4478
- /* Check for destruction from another callback. */
4479
- if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL))
4480
- goto out;
4481
-
4482
- event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
4483
- ret = id_priv->id.event_handler(&id_priv->id, &event);
4484
-out:
4485
- mutex_unlock(&id_priv->handler_mutex);
44865040 return ret;
4487
-}
4488
-
4489
-static void cma_process_remove(struct cma_device *cma_dev)
4490
-{
4491
- struct rdma_id_private *id_priv;
4492
- int ret;
4493
-
4494
- mutex_lock(&lock);
4495
- while (!list_empty(&cma_dev->id_list)) {
4496
- id_priv = list_entry(cma_dev->id_list.next,
4497
- struct rdma_id_private, list);
4498
-
4499
- list_del(&id_priv->listen_list);
4500
- list_del_init(&id_priv->list);
4501
- atomic_inc(&id_priv->refcount);
4502
- mutex_unlock(&lock);
4503
-
4504
- ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv);
4505
- cma_deref_id(id_priv);
4506
- if (ret)
4507
- rdma_destroy_id(&id_priv->id);
4508
-
4509
- mutex_lock(&lock);
4510
- }
4511
- mutex_unlock(&lock);
4512
-
4513
- cma_deref_dev(cma_dev);
4514
- wait_for_completion(&cma_dev->comp);
45155041 }
45165042
45175043 static void cma_remove_one(struct ib_device *device, void *client_data)
45185044 {
45195045 struct cma_device *cma_dev = client_data;
45205046
4521
- if (!cma_dev)
4522
- return;
5047
+ trace_cm_remove_one(device);
45235048
45245049 mutex_lock(&lock);
45255050 list_del(&cma_dev->list);
....@@ -4531,93 +5056,14 @@
45315056 kfree(cma_dev);
45325057 }
45335058
4534
-static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb)
4535
-{
4536
- struct nlmsghdr *nlh;
4537
- struct rdma_cm_id_stats *id_stats;
4538
- struct rdma_id_private *id_priv;
4539
- struct rdma_cm_id *id = NULL;
4540
- struct cma_device *cma_dev;
4541
- int i_dev = 0, i_id = 0;
4542
-
4543
- /*
4544
- * We export all of the IDs as a sequence of messages. Each
4545
- * ID gets its own netlink message.
4546
- */
4547
- mutex_lock(&lock);
4548
-
4549
- list_for_each_entry(cma_dev, &dev_list, list) {
4550
- if (i_dev < cb->args[0]) {
4551
- i_dev++;
4552
- continue;
4553
- }
4554
-
4555
- i_id = 0;
4556
- list_for_each_entry(id_priv, &cma_dev->id_list, list) {
4557
- if (i_id < cb->args[1]) {
4558
- i_id++;
4559
- continue;
4560
- }
4561
-
4562
- id_stats = ibnl_put_msg(skb, &nlh, cb->nlh->nlmsg_seq,
4563
- sizeof *id_stats, RDMA_NL_RDMA_CM,
4564
- RDMA_NL_RDMA_CM_ID_STATS,
4565
- NLM_F_MULTI);
4566
- if (!id_stats)
4567
- goto out;
4568
-
4569
- memset(id_stats, 0, sizeof *id_stats);
4570
- id = &id_priv->id;
4571
- id_stats->node_type = id->route.addr.dev_addr.dev_type;
4572
- id_stats->port_num = id->port_num;
4573
- id_stats->bound_dev_if =
4574
- id->route.addr.dev_addr.bound_dev_if;
4575
-
4576
- if (ibnl_put_attr(skb, nlh,
4577
- rdma_addr_size(cma_src_addr(id_priv)),
4578
- cma_src_addr(id_priv),
4579
- RDMA_NL_RDMA_CM_ATTR_SRC_ADDR))
4580
- goto out;
4581
- if (ibnl_put_attr(skb, nlh,
4582
- rdma_addr_size(cma_dst_addr(id_priv)),
4583
- cma_dst_addr(id_priv),
4584
- RDMA_NL_RDMA_CM_ATTR_DST_ADDR))
4585
- goto out;
4586
-
4587
- id_stats->pid = task_pid_vnr(id_priv->res.task);
4588
- id_stats->port_space = id->ps;
4589
- id_stats->cm_state = id_priv->state;
4590
- id_stats->qp_num = id_priv->qp_num;
4591
- id_stats->qp_type = id->qp_type;
4592
-
4593
- i_id++;
4594
- nlmsg_end(skb, nlh);
4595
- }
4596
-
4597
- cb->args[1] = 0;
4598
- i_dev++;
4599
- }
4600
-
4601
-out:
4602
- mutex_unlock(&lock);
4603
- cb->args[0] = i_dev;
4604
- cb->args[1] = i_id;
4605
-
4606
- return skb->len;
4607
-}
4608
-
4609
-static const struct rdma_nl_cbs cma_cb_table[RDMA_NL_RDMA_CM_NUM_OPS] = {
4610
- [RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats},
4611
-};
4612
-
46135059 static int cma_init_net(struct net *net)
46145060 {
46155061 struct cma_pernet *pernet = cma_pernet(net);
46165062
4617
- idr_init(&pernet->tcp_ps);
4618
- idr_init(&pernet->udp_ps);
4619
- idr_init(&pernet->ipoib_ps);
4620
- idr_init(&pernet->ib_ps);
5063
+ xa_init(&pernet->tcp_ps);
5064
+ xa_init(&pernet->udp_ps);
5065
+ xa_init(&pernet->ipoib_ps);
5066
+ xa_init(&pernet->ib_ps);
46215067
46225068 return 0;
46235069 }
....@@ -4626,10 +5072,10 @@
46265072 {
46275073 struct cma_pernet *pernet = cma_pernet(net);
46285074
4629
- idr_destroy(&pernet->tcp_ps);
4630
- idr_destroy(&pernet->udp_ps);
4631
- idr_destroy(&pernet->ipoib_ps);
4632
- idr_destroy(&pernet->ib_ps);
5075
+ WARN_ON(!xa_empty(&pernet->tcp_ps));
5076
+ WARN_ON(!xa_empty(&pernet->udp_ps));
5077
+ WARN_ON(!xa_empty(&pernet->ipoib_ps));
5078
+ WARN_ON(!xa_empty(&pernet->ib_ps));
46335079 }
46345080
46355081 static struct pernet_operations cma_pernet_operations = {
....@@ -4671,11 +5117,14 @@
46715117 if (ret)
46725118 goto err;
46735119
4674
- rdma_nl_register(RDMA_NL_RDMA_CM, cma_cb_table);
4675
- cma_configfs_init();
5120
+ ret = cma_configfs_init();
5121
+ if (ret)
5122
+ goto err_ib;
46765123
46775124 return 0;
46785125
5126
+err_ib:
5127
+ ib_unregister_client(&cma_client);
46795128 err:
46805129 unregister_netdevice_notifier(&cma_nb);
46815130 ib_sa_unregister_client(&sa_client);
....@@ -4688,15 +5137,12 @@
46885137 static void __exit cma_cleanup(void)
46895138 {
46905139 cma_configfs_exit();
4691
- rdma_nl_unregister(RDMA_NL_RDMA_CM);
46925140 ib_unregister_client(&cma_client);
46935141 unregister_netdevice_notifier(&cma_nb);
46945142 ib_sa_unregister_client(&sa_client);
46955143 unregister_pernet_subsys(&cma_pernet_operations);
46965144 destroy_workqueue(cma_wq);
46975145 }
4698
-
4699
-MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_RDMA_CM, 1);
47005146
47015147 module_init(cma_init);
47025148 module_exit(cma_cleanup);