hc
2024-01-05 071106ecf68c401173c58808b1cf5f68cc50d390
kernel/drivers/infiniband/sw/rxe/rxe_net.c
....@@ -1,34 +1,7 @@
1
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
12 /*
23 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
34 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4
- *
5
- * This software is available to you under a choice of one of two
6
- * licenses. You may choose to be licensed under the terms of the GNU
7
- * General Public License (GPL) Version 2, available from the file
8
- * COPYING in the main directory of this source tree, or the
9
- * OpenIB.org BSD license below:
10
- *
11
- * Redistribution and use in source and binary forms, with or
12
- * without modification, are permitted provided that the following
13
- * conditions are met:
14
- *
15
- * - Redistributions of source code must retain the above
16
- * copyright notice, this list of conditions and the following
17
- * disclaimer.
18
- *
19
- * - Redistributions in binary form must reproduce the above
20
- * copyright notice, this list of conditions and the following
21
- * disclaimer in the documentation and/or other materials
22
- * provided with the distribution.
23
- *
24
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31
- * SOFTWARE.
325 */
336
347 #include <linux/skbuff.h>
....@@ -45,56 +18,7 @@
4518 #include "rxe_net.h"
4619 #include "rxe_loc.h"
4720
48
-static LIST_HEAD(rxe_dev_list);
49
-static DEFINE_SPINLOCK(dev_list_lock); /* spinlock for device list */
50
-
51
-struct rxe_dev *net_to_rxe(struct net_device *ndev)
52
-{
53
- struct rxe_dev *rxe;
54
- struct rxe_dev *found = NULL;
55
-
56
- spin_lock_bh(&dev_list_lock);
57
- list_for_each_entry(rxe, &rxe_dev_list, list) {
58
- if (rxe->ndev == ndev) {
59
- found = rxe;
60
- break;
61
- }
62
- }
63
- spin_unlock_bh(&dev_list_lock);
64
-
65
- return found;
66
-}
67
-
68
-struct rxe_dev *get_rxe_by_name(const char *name)
69
-{
70
- struct rxe_dev *rxe;
71
- struct rxe_dev *found = NULL;
72
-
73
- spin_lock_bh(&dev_list_lock);
74
- list_for_each_entry(rxe, &rxe_dev_list, list) {
75
- if (!strcmp(name, rxe->ib_dev.name)) {
76
- found = rxe;
77
- break;
78
- }
79
- }
80
- spin_unlock_bh(&dev_list_lock);
81
- return found;
82
-}
83
-
84
-
8521 static struct rxe_recv_sockets recv_sockets;
86
-
87
-struct device *rxe_dma_device(struct rxe_dev *rxe)
88
-{
89
- struct net_device *ndev;
90
-
91
- ndev = rxe->ndev;
92
-
93
- if (is_vlan_dev(ndev))
94
- ndev = vlan_dev_real_dev(ndev);
95
-
96
- return ndev->dev.parent;
97
-}
9822
9923 int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
10024 {
....@@ -157,7 +81,7 @@
15781 ndst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(recv_sockets.sk6->sk),
15882 recv_sockets.sk6->sk, &fl6,
15983 NULL);
160
- if (unlikely(IS_ERR(ndst))) {
84
+ if (IS_ERR(ndst)) {
16185 pr_err_ratelimited("no route to %pI6\n", daddr);
16286 return NULL;
16387 }
....@@ -184,19 +108,11 @@
184108
185109 #endif
186110
187
-static struct dst_entry *rxe_find_route(struct rxe_dev *rxe,
111
+static struct dst_entry *rxe_find_route(struct net_device *ndev,
188112 struct rxe_qp *qp,
189113 struct rxe_av *av)
190114 {
191
- const struct ib_gid_attr *attr;
192115 struct dst_entry *dst = NULL;
193
- struct net_device *ndev;
194
-
195
- attr = rdma_get_gid_attr(&rxe->ib_dev, qp->attr.port_num,
196
- av->grh.sgid_index);
197
- if (IS_ERR(attr))
198
- return NULL;
199
- ndev = attr->ndev;
200116
201117 if (qp_type(qp) == IB_QPT_RC)
202118 dst = sk_dst_get(qp->sk->sk);
....@@ -205,14 +121,14 @@
205121 if (dst)
206122 dst_release(dst);
207123
208
- if (av->network_type == RDMA_NETWORK_IPV4) {
124
+ if (av->network_type == RXE_NETWORK_TYPE_IPV4) {
209125 struct in_addr *saddr;
210126 struct in_addr *daddr;
211127
212128 saddr = &av->sgid_addr._sockaddr_in.sin_addr;
213129 daddr = &av->dgid_addr._sockaddr_in.sin_addr;
214130 dst = rxe_find_route4(ndev, saddr, daddr);
215
- } else if (av->network_type == RDMA_NETWORK_IPV6) {
131
+ } else if (av->network_type == RXE_NETWORK_TYPE_IPV6) {
216132 struct in6_addr *saddr6;
217133 struct in6_addr *daddr6;
218134
....@@ -231,7 +147,6 @@
231147 sk_dst_set(qp->sk->sk, dst);
232148 }
233149 }
234
- rdma_put_gid_attr(attr);
235150 return dst;
236151 }
237152
....@@ -240,18 +155,19 @@
240155 struct udphdr *udph;
241156 struct net_device *ndev = skb->dev;
242157 struct net_device *rdev = ndev;
243
- struct rxe_dev *rxe = net_to_rxe(ndev);
158
+ struct rxe_dev *rxe = rxe_get_dev_from_net(ndev);
244159 struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
245160
246161 if (!rxe && is_vlan_dev(rdev)) {
247162 rdev = vlan_dev_real_dev(ndev);
248
- rxe = net_to_rxe(rdev);
163
+ rxe = rxe_get_dev_from_net(rdev);
249164 }
250165 if (!rxe)
251166 goto drop;
252167
253168 if (skb_linearize(skb)) {
254169 pr_err("skb_linearize failed\n");
170
+ ib_device_put(&rxe->ib_dev);
255171 goto drop;
256172 }
257173
....@@ -263,6 +179,12 @@
263179 pkt->paylen = be16_to_cpu(udph->len) - sizeof(*udph);
264180
265181 rxe_rcv(skb);
182
+
183
+ /*
184
+ * FIXME: this is in the wrong place, it needs to be done when pkt is
185
+ * destroyed
186
+ */
187
+ ib_device_put(&rxe->ib_dev);
266188
267189 return 0;
268190 drop:
....@@ -377,27 +299,24 @@
377299 ip6h->payload_len = htons(skb->len - sizeof(*ip6h));
378300 }
379301
380
-static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
381
- struct sk_buff *skb, struct rxe_av *av)
302
+static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb)
382303 {
383304 struct rxe_qp *qp = pkt->qp;
384305 struct dst_entry *dst;
385306 bool xnet = false;
386307 __be16 df = htons(IP_DF);
308
+ struct rxe_av *av = rxe_get_av(pkt);
387309 struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr;
388310 struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr;
389311
390
- dst = rxe_find_route(rxe, qp, av);
312
+ dst = rxe_find_route(skb->dev, qp, av);
391313 if (!dst) {
392314 pr_err("Host not reachable\n");
393315 return -EHOSTUNREACH;
394316 }
395317
396
- if (!memcmp(saddr, daddr, sizeof(*daddr)))
397
- pkt->mask |= RXE_LOOPBACK_MASK;
398
-
399
- prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
400
- htons(ROCE_V2_UDP_DPORT));
318
+ prepare_udp_hdr(skb, cpu_to_be16(qp->src_port),
319
+ cpu_to_be16(ROCE_V2_UDP_DPORT));
401320
402321 prepare_ipv4_hdr(dst, skb, saddr->s_addr, daddr->s_addr, IPPROTO_UDP,
403322 av->grh.traffic_class, av->grh.hop_limit, df, xnet);
....@@ -406,25 +325,22 @@
406325 return 0;
407326 }
408327
409
-static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
410
- struct sk_buff *skb, struct rxe_av *av)
328
+static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb)
411329 {
412330 struct rxe_qp *qp = pkt->qp;
413331 struct dst_entry *dst;
332
+ struct rxe_av *av = rxe_get_av(pkt);
414333 struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr;
415334 struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr;
416335
417
- dst = rxe_find_route(rxe, qp, av);
336
+ dst = rxe_find_route(skb->dev, qp, av);
418337 if (!dst) {
419338 pr_err("Host not reachable\n");
420339 return -EHOSTUNREACH;
421340 }
422341
423
- if (!memcmp(saddr, daddr, sizeof(*daddr)))
424
- pkt->mask |= RXE_LOOPBACK_MASK;
425
-
426
- prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
427
- htons(ROCE_V2_UDP_DPORT));
342
+ prepare_udp_hdr(skb, cpu_to_be16(qp->src_port),
343
+ cpu_to_be16(ROCE_V2_UDP_DPORT));
428344
429345 prepare_ipv6_hdr(dst, skb, saddr, daddr, IPPROTO_UDP,
430346 av->grh.traffic_class,
....@@ -434,18 +350,19 @@
434350 return 0;
435351 }
436352
437
-int rxe_prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
438
- struct sk_buff *skb, u32 *crc)
353
+int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb, u32 *crc)
439354 {
440355 int err = 0;
441
- struct rxe_av *av = rxe_get_av(pkt);
442356
443
- if (av->network_type == RDMA_NETWORK_IPV4)
444
- err = prepare4(rxe, pkt, skb, av);
445
- else if (av->network_type == RDMA_NETWORK_IPV6)
446
- err = prepare6(rxe, pkt, skb, av);
357
+ if (skb->protocol == htons(ETH_P_IP))
358
+ err = prepare4(pkt, skb);
359
+ else if (skb->protocol == htons(ETH_P_IPV6))
360
+ err = prepare6(pkt, skb);
447361
448362 *crc = rxe_icrc_hdr(pkt, skb);
363
+
364
+ if (ether_addr_equal(skb->dev->dev_addr, rxe_get_av(pkt)->dmac))
365
+ pkt->mask |= RXE_LOOPBACK_MASK;
449366
450367 return err;
451368 }
....@@ -465,10 +382,7 @@
465382
466383 int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb)
467384 {
468
- struct rxe_av *av;
469385 int err;
470
-
471
- av = rxe_get_av(pkt);
472386
473387 skb->destructor = rxe_skb_tx_dtor;
474388 skb->sk = pkt->qp->sk->sk;
....@@ -476,12 +390,12 @@
476390 rxe_add_ref(pkt->qp);
477391 atomic_inc(&pkt->qp->skb_out);
478392
479
- if (av->network_type == RDMA_NETWORK_IPV4) {
393
+ if (skb->protocol == htons(ETH_P_IP)) {
480394 err = ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
481
- } else if (av->network_type == RDMA_NETWORK_IPV6) {
395
+ } else if (skb->protocol == htons(ETH_P_IPV6)) {
482396 err = ip6_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
483397 } else {
484
- pr_err("Unknown layer 3 protocol: %d\n", av->network_type);
398
+ pr_err("Unknown layer 3 protocol: %d\n", skb->protocol);
485399 atomic_dec(&pkt->qp->skb_out);
486400 rxe_drop_ref(pkt->qp);
487401 kfree_skb(skb);
....@@ -506,16 +420,11 @@
506420 rxe_rcv(skb);
507421 }
508422
509
-static inline int addr_same(struct rxe_dev *rxe, struct rxe_av *av)
510
-{
511
- return rxe->port.port_guid == av->grh.dgid.global.interface_id;
512
-}
513
-
514423 struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
515424 int paylen, struct rxe_pkt_info *pkt)
516425 {
517426 unsigned int hdr_len;
518
- struct sk_buff *skb;
427
+ struct sk_buff *skb = NULL;
519428 struct net_device *ndev;
520429 const struct ib_gid_attr *attr;
521430 const int port_num = 1;
....@@ -523,26 +432,35 @@
523432 attr = rdma_get_gid_attr(&rxe->ib_dev, port_num, av->grh.sgid_index);
524433 if (IS_ERR(attr))
525434 return NULL;
526
- ndev = attr->ndev;
527435
528
- if (av->network_type == RDMA_NETWORK_IPV4)
436
+ if (av->network_type == RXE_NETWORK_TYPE_IPV4)
529437 hdr_len = ETH_HLEN + sizeof(struct udphdr) +
530438 sizeof(struct iphdr);
531439 else
532440 hdr_len = ETH_HLEN + sizeof(struct udphdr) +
533441 sizeof(struct ipv6hdr);
534442
443
+ rcu_read_lock();
444
+ ndev = rdma_read_gid_attr_ndev_rcu(attr);
445
+ if (IS_ERR(ndev)) {
446
+ rcu_read_unlock();
447
+ goto out;
448
+ }
535449 skb = alloc_skb(paylen + hdr_len + LL_RESERVED_SPACE(ndev),
536450 GFP_ATOMIC);
537451
538
- if (unlikely(!skb))
452
+ if (unlikely(!skb)) {
453
+ rcu_read_unlock();
539454 goto out;
455
+ }
540456
541457 skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(ndev));
542458
543459 /* FIXME: hold reference to this netdev until life of this skb. */
544460 skb->dev = ndev;
545
- if (av->network_type == RDMA_NETWORK_IPV4)
461
+ rcu_read_unlock();
462
+
463
+ if (av->network_type == RXE_NETWORK_TYPE_IPV4)
546464 skb->protocol = htons(ETH_P_IP);
547465 else
548466 skb->protocol = htons(ETH_P_IPV6);
....@@ -566,47 +484,24 @@
566484 return rxe->ndev->name;
567485 }
568486
569
-enum rdma_link_layer rxe_link_layer(struct rxe_dev *rxe, unsigned int port_num)
570
-{
571
- return IB_LINK_LAYER_ETHERNET;
572
-}
573
-
574
-struct rxe_dev *rxe_net_add(struct net_device *ndev)
487
+int rxe_net_add(const char *ibdev_name, struct net_device *ndev)
575488 {
576489 int err;
577490 struct rxe_dev *rxe = NULL;
578491
579
- rxe = (struct rxe_dev *)ib_alloc_device(sizeof(*rxe));
492
+ rxe = ib_alloc_device(rxe_dev, ib_dev);
580493 if (!rxe)
581
- return NULL;
494
+ return -ENOMEM;
582495
583496 rxe->ndev = ndev;
584497
585
- err = rxe_add(rxe, ndev->mtu);
498
+ err = rxe_add(rxe, ndev->mtu, ibdev_name);
586499 if (err) {
587500 ib_dealloc_device(&rxe->ib_dev);
588
- return NULL;
501
+ return err;
589502 }
590503
591
- spin_lock_bh(&dev_list_lock);
592
- list_add_tail(&rxe->list, &rxe_dev_list);
593
- spin_unlock_bh(&dev_list_lock);
594
- return rxe;
595
-}
596
-
597
-void rxe_remove_all(void)
598
-{
599
- spin_lock_bh(&dev_list_lock);
600
- while (!list_empty(&rxe_dev_list)) {
601
- struct rxe_dev *rxe =
602
- list_first_entry(&rxe_dev_list, struct rxe_dev, list);
603
-
604
- list_del(&rxe->list);
605
- spin_unlock_bh(&dev_list_lock);
606
- rxe_remove(rxe);
607
- spin_lock_bh(&dev_list_lock);
608
- }
609
- spin_unlock_bh(&dev_list_lock);
504
+ return 0;
610505 }
611506
612507 static void rxe_port_event(struct rxe_dev *rxe,
....@@ -628,10 +523,9 @@
628523
629524 port = &rxe->port;
630525 port->attr.state = IB_PORT_ACTIVE;
631
- port->attr.phys_state = IB_PHYS_STATE_LINK_UP;
632526
633527 rxe_port_event(rxe, IB_EVENT_PORT_ACTIVE);
634
- pr_info("set %s active\n", rxe->ib_dev.name);
528
+ dev_info(&rxe->ib_dev.dev, "set active\n");
635529 }
636530
637531 /* Caller must hold net_info_lock */
....@@ -641,10 +535,18 @@
641535
642536 port = &rxe->port;
643537 port->attr.state = IB_PORT_DOWN;
644
- port->attr.phys_state = IB_PHYS_STATE_LINK_DOWN;
645538
646539 rxe_port_event(rxe, IB_EVENT_PORT_ERR);
647
- pr_info("set %s down\n", rxe->ib_dev.name);
540
+ rxe_counter_inc(rxe, RXE_CNT_LINK_DOWNED);
541
+ dev_info(&rxe->ib_dev.dev, "set down\n");
542
+}
543
+
544
+void rxe_set_port_state(struct rxe_dev *rxe)
545
+{
546
+ if (netif_running(rxe->ndev) && netif_carrier_ok(rxe->ndev))
547
+ rxe_port_up(rxe);
548
+ else
549
+ rxe_port_down(rxe);
648550 }
649551
650552 static int rxe_notify(struct notifier_block *not_blk,
....@@ -652,15 +554,14 @@
652554 void *arg)
653555 {
654556 struct net_device *ndev = netdev_notifier_info_to_dev(arg);
655
- struct rxe_dev *rxe = net_to_rxe(ndev);
557
+ struct rxe_dev *rxe = rxe_get_dev_from_net(ndev);
656558
657559 if (!rxe)
658
- goto out;
560
+ return NOTIFY_OK;
659561
660562 switch (event) {
661563 case NETDEV_UNREGISTER:
662
- list_del(&rxe->list);
663
- rxe_remove(rxe);
564
+ ib_unregister_device_queued(&rxe->ib_dev);
664565 break;
665566 case NETDEV_UP:
666567 rxe_port_up(rxe);
....@@ -673,10 +574,7 @@
673574 rxe_set_mtu(rxe, ndev->mtu);
674575 break;
675576 case NETDEV_CHANGE:
676
- if (netif_running(ndev) && netif_carrier_ok(ndev))
677
- rxe_port_up(rxe);
678
- else
679
- rxe_port_down(rxe);
577
+ rxe_set_port_state(rxe);
680578 break;
681579 case NETDEV_REBOOT:
682580 case NETDEV_GOING_DOWN:
....@@ -688,7 +586,8 @@
688586 event, ndev->name);
689587 break;
690588 }
691
-out:
589
+
590
+ ib_device_put(&rxe->ib_dev);
692591 return NOTIFY_OK;
693592 }
694593