hc
2024-01-05 071106ecf68c401173c58808b1cf5f68cc50d390
kernel/drivers/infiniband/ulp/ipoib/ipoib_main.c
....@@ -52,10 +52,6 @@
5252 #include <linux/inetdevice.h>
5353 #include <rdma/ib_cache.h>
5454
55
-#define DRV_VERSION "1.0.0"
56
-
57
-const char ipoib_driver_version[] = DRV_VERSION;
58
-
5955 MODULE_AUTHOR("Roland Dreier");
6056 MODULE_DESCRIPTION("IP-over-InfiniBand net driver");
6157 MODULE_LICENSE("Dual BSD/GPL");
....@@ -90,7 +86,7 @@
9086
9187 struct ib_sa_client ipoib_sa_client;
9288
93
-static void ipoib_add_one(struct ib_device *device);
89
+static int ipoib_add_one(struct ib_device *device);
9490 static void ipoib_remove_one(struct ib_device *device, void *client_data);
9591 static void ipoib_neigh_reclaim(struct rcu_head *rp);
9692 static struct net_device *ipoib_get_net_dev_by_params(
....@@ -167,7 +163,7 @@
167163 if (flags & IFF_UP)
168164 continue;
169165
170
- dev_change_flags(cpriv->dev, flags | IFF_UP);
166
+ dev_change_flags(cpriv->dev, flags | IFF_UP, NULL);
171167 }
172168 up_read(&priv->vlan_rwsem);
173169 }
....@@ -207,7 +203,7 @@
207203 if (!(flags & IFF_UP))
208204 continue;
209205
210
- dev_change_flags(cpriv->dev, flags & ~IFF_UP);
206
+ dev_change_flags(cpriv->dev, flags & ~IFF_UP, NULL);
211207 }
212208 up_read(&priv->vlan_rwsem);
213209 }
....@@ -346,9 +342,10 @@
346342 struct net_device *result;
347343 };
348344
349
-static int ipoib_upper_walk(struct net_device *upper, void *_data)
345
+static int ipoib_upper_walk(struct net_device *upper,
346
+ struct netdev_nested_priv *priv)
350347 {
351
- struct ipoib_walk_data *data = _data;
348
+ struct ipoib_walk_data *data = (struct ipoib_walk_data *)priv->data;
352349 int ret = 0;
353350
354351 if (ipoib_is_dev_match_addr_rcu(data->addr, upper)) {
....@@ -372,10 +369,12 @@
372369 static struct net_device *ipoib_get_net_dev_match_addr(
373370 const struct sockaddr *addr, struct net_device *dev)
374371 {
372
+ struct netdev_nested_priv priv;
375373 struct ipoib_walk_data data = {
376374 .addr = addr,
377375 };
378376
377
+ priv.data = (void *)&data;
379378 rcu_read_lock();
380379 if (ipoib_is_dev_match_addr_rcu(addr, dev)) {
381380 dev_hold(dev);
....@@ -383,7 +382,7 @@
383382 goto out;
384383 }
385384
386
- netdev_walk_all_upper_dev_rcu(dev, ipoib_upper_walk, &data);
385
+ netdev_walk_all_upper_dev_rcu(dev, ipoib_upper_walk, &priv);
387386 out:
388387 rcu_read_unlock();
389388 return data.result;
....@@ -483,9 +482,6 @@
483482 if (ret)
484483 return NULL;
485484
486
- if (!dev_list)
487
- return NULL;
488
-
489485 /* See if we can find a unique device matching the L2 parameters */
490486 matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index,
491487 gid, NULL, &net_dev);
....@@ -509,7 +505,7 @@
509505 default:
510506 dev_warn_ratelimited(&dev->dev,
511507 "duplicate IP address detected\n");
512
- /* Fall through */
508
+ fallthrough;
513509 case 1:
514510 return net_dev;
515511 }
....@@ -533,6 +529,7 @@
533529 "will cause multicast packet drops\n");
534530 netdev_update_features(dev);
535531 dev_set_mtu(dev, ipoib_cm_max_mtu(dev));
532
+ netif_set_real_num_tx_queues(dev, 1);
536533 rtnl_unlock();
537534 priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM;
538535
....@@ -544,6 +541,7 @@
544541 clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
545542 netdev_update_features(dev);
546543 dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu));
544
+ netif_set_real_num_tx_queues(dev, dev->num_tx_queues);
547545 rtnl_unlock();
548546 ipoib_flush_paths(dev);
549547 return (!rtnl_trylock()) ? -EBUSY : 0;
....@@ -613,7 +611,7 @@
613611 while ((skb = __skb_dequeue(&path->queue)))
614612 dev_kfree_skb_irq(skb);
615613
616
- ipoib_dbg(ipoib_priv(dev), "path_free\n");
614
+ ipoib_dbg(ipoib_priv(dev), "%s\n", __func__);
617615
618616 /* remove all neigh connected to this path */
619617 ipoib_del_neighs_by_gid(dev, path->pathrec.dgid.raw);
....@@ -1182,7 +1180,7 @@
11821180 return NETDEV_TX_OK;
11831181 }
11841182
1185
-static void ipoib_timeout(struct net_device *dev)
1183
+static void ipoib_timeout(struct net_device *dev, unsigned int txqueue)
11861184 {
11871185 struct ipoib_dev_priv *priv = ipoib_priv(dev);
11881186
....@@ -1643,7 +1641,7 @@
16431641 {
16441642 struct ipoib_dev_priv *priv = ipoib_priv(dev);
16451643
1646
- ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n");
1644
+ ipoib_dbg(priv, "%s\n", __func__);
16471645 init_completion(&priv->ntbl.deleted);
16481646
16491647 cancel_delayed_work_sync(&priv->neigh_reap_task);
....@@ -1825,7 +1823,7 @@
18251823 * running ensures the it will not add more work.
18261824 */
18271825 rtnl_lock();
1828
- dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP);
1826
+ dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP, NULL);
18291827 rtnl_unlock();
18301828
18311829 /* ipoib_event() cannot be running once this returns */
....@@ -1864,7 +1862,7 @@
18641862 priv->port);
18651863 return result;
18661864 }
1867
- priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu);
1865
+ priv->max_ib_mtu = rdma_mtu_from_attr(priv->ca, priv->port, &attr);
18681866
18691867 result = ib_query_pkey(priv->ca, priv->port, 0, &priv->pkey);
18701868 if (result) {
....@@ -1897,14 +1895,22 @@
18971895
18981896 priv->max_ib_mtu = ppriv->max_ib_mtu;
18991897 set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags);
1900
- memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN);
1901
- memcpy(&priv->local_gid, &ppriv->local_gid, sizeof(priv->local_gid));
1898
+ if (memchr_inv(priv->dev->dev_addr, 0, INFINIBAND_ALEN))
1899
+ memcpy(&priv->local_gid, priv->dev->dev_addr + 4,
1900
+ sizeof(priv->local_gid));
1901
+ else {
1902
+ memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr,
1903
+ INFINIBAND_ALEN);
1904
+ memcpy(&priv->local_gid, &ppriv->local_gid,
1905
+ sizeof(priv->local_gid));
1906
+ }
19021907 }
19031908
19041909 static int ipoib_ndo_init(struct net_device *ndev)
19051910 {
19061911 struct ipoib_dev_priv *priv = ipoib_priv(ndev);
19071912 int rc;
1913
+ struct rdma_netdev *rn = netdev_priv(ndev);
19081914
19091915 if (priv->parent) {
19101916 ipoib_child_init(ndev);
....@@ -1917,6 +1923,7 @@
19171923 /* MTU will be reset when mcast join happens */
19181924 ndev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu);
19191925 priv->mcast_mtu = priv->admin_mtu = ndev->mtu;
1926
+ rn->mtu = priv->mcast_mtu;
19201927 ndev->max_mtu = IPOIB_CM_MTU;
19211928
19221929 ndev->neigh_priv_len = sizeof(struct ipoib_neigh);
....@@ -2023,6 +2030,15 @@
20232030 return ib_set_vf_guid(priv->ca, vf, priv->port, guid, type);
20242031 }
20252032
2033
+static int ipoib_get_vf_guid(struct net_device *dev, int vf,
2034
+ struct ifla_vf_guid *node_guid,
2035
+ struct ifla_vf_guid *port_guid)
2036
+{
2037
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
2038
+
2039
+ return ib_get_vf_guid(priv->ca, vf, priv->port, node_guid, port_guid);
2040
+}
2041
+
20262042 static int ipoib_get_vf_stats(struct net_device *dev, int vf,
20272043 struct ifla_vf_stats *vf_stats)
20282044 {
....@@ -2049,6 +2065,7 @@
20492065 .ndo_set_vf_link_state = ipoib_set_vf_link_state,
20502066 .ndo_get_vf_config = ipoib_get_vf_config,
20512067 .ndo_get_vf_stats = ipoib_get_vf_stats,
2068
+ .ndo_get_vf_guid = ipoib_get_vf_guid,
20522069 .ndo_set_vf_guid = ipoib_set_vf_guid,
20532070 .ndo_set_mac_address = ipoib_set_mac,
20542071 .ndo_get_stats64 = ipoib_get_stats,
....@@ -2070,9 +2087,17 @@
20702087 .ndo_do_ioctl = ipoib_ioctl,
20712088 };
20722089
2090
+static const struct net_device_ops ipoib_netdev_default_pf = {
2091
+ .ndo_init = ipoib_dev_init_default,
2092
+ .ndo_uninit = ipoib_dev_uninit_default,
2093
+ .ndo_open = ipoib_ib_dev_open_default,
2094
+ .ndo_stop = ipoib_ib_dev_stop_default,
2095
+};
2096
+
20732097 void ipoib_setup_common(struct net_device *dev)
20742098 {
20752099 dev->header_ops = &ipoib_header_ops;
2100
+ dev->netdev_ops = &ipoib_netdev_default_pf;
20762101
20772102 ipoib_set_ethtool_ops(dev);
20782103
....@@ -2122,89 +2147,65 @@
21222147 INIT_DELAYED_WORK(&priv->neigh_reap_task, ipoib_reap_neigh);
21232148 }
21242149
2125
-static const struct net_device_ops ipoib_netdev_default_pf = {
2126
- .ndo_init = ipoib_dev_init_default,
2127
- .ndo_uninit = ipoib_dev_uninit_default,
2128
- .ndo_open = ipoib_ib_dev_open_default,
2129
- .ndo_stop = ipoib_ib_dev_stop_default,
2130
-};
2131
-
2132
-static struct net_device
2133
-*ipoib_create_netdev_default(struct ib_device *hca,
2134
- const char *name,
2135
- unsigned char name_assign_type,
2136
- void (*setup)(struct net_device *))
2150
+static struct net_device *ipoib_alloc_netdev(struct ib_device *hca, u8 port,
2151
+ const char *name)
21372152 {
21382153 struct net_device *dev;
2139
- struct rdma_netdev *rn;
21402154
2141
- dev = alloc_netdev((int)sizeof(struct rdma_netdev),
2142
- name,
2143
- name_assign_type, setup);
2155
+ dev = rdma_alloc_netdev(hca, port, RDMA_NETDEV_IPOIB, name,
2156
+ NET_NAME_UNKNOWN, ipoib_setup_common);
2157
+ if (!IS_ERR(dev) || PTR_ERR(dev) != -EOPNOTSUPP)
2158
+ return dev;
2159
+
2160
+ dev = alloc_netdev(sizeof(struct rdma_netdev), name, NET_NAME_UNKNOWN,
2161
+ ipoib_setup_common);
21442162 if (!dev)
2145
- return NULL;
2146
-
2147
- rn = netdev_priv(dev);
2148
-
2149
- rn->send = ipoib_send;
2150
- rn->attach_mcast = ipoib_mcast_attach;
2151
- rn->detach_mcast = ipoib_mcast_detach;
2152
- rn->hca = hca;
2153
- dev->netdev_ops = &ipoib_netdev_default_pf;
2154
-
2163
+ return ERR_PTR(-ENOMEM);
21552164 return dev;
21562165 }
21572166
2158
-static struct net_device *ipoib_get_netdev(struct ib_device *hca, u8 port,
2159
- const char *name)
2167
+int ipoib_intf_init(struct ib_device *hca, u8 port, const char *name,
2168
+ struct net_device *dev)
21602169 {
2161
- struct net_device *dev;
2162
-
2163
- if (hca->alloc_rdma_netdev) {
2164
- dev = hca->alloc_rdma_netdev(hca, port,
2165
- RDMA_NETDEV_IPOIB, name,
2166
- NET_NAME_UNKNOWN,
2167
- ipoib_setup_common);
2168
- if (IS_ERR_OR_NULL(dev) && PTR_ERR(dev) != -EOPNOTSUPP)
2169
- return NULL;
2170
- }
2171
-
2172
- if (!hca->alloc_rdma_netdev || PTR_ERR(dev) == -EOPNOTSUPP)
2173
- dev = ipoib_create_netdev_default(hca, name, NET_NAME_UNKNOWN,
2174
- ipoib_setup_common);
2175
-
2176
- return dev;
2177
-}
2178
-
2179
-struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port,
2180
- const char *name)
2181
-{
2182
- struct net_device *dev;
2170
+ struct rdma_netdev *rn = netdev_priv(dev);
21832171 struct ipoib_dev_priv *priv;
2184
- struct rdma_netdev *rn;
2172
+ int rc;
21852173
21862174 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
21872175 if (!priv)
2188
- return NULL;
2176
+ return -ENOMEM;
21892177
21902178 priv->ca = hca;
21912179 priv->port = port;
21922180
2193
- dev = ipoib_get_netdev(hca, port, name);
2194
- if (!dev)
2195
- goto free_priv;
2181
+ rc = rdma_init_netdev(hca, port, RDMA_NETDEV_IPOIB, name,
2182
+ NET_NAME_UNKNOWN, ipoib_setup_common, dev);
2183
+ if (rc) {
2184
+ if (rc != -EOPNOTSUPP)
2185
+ goto out;
2186
+
2187
+ rn->send = ipoib_send;
2188
+ rn->attach_mcast = ipoib_mcast_attach;
2189
+ rn->detach_mcast = ipoib_mcast_detach;
2190
+ rn->hca = hca;
2191
+
2192
+ rc = netif_set_real_num_tx_queues(dev, 1);
2193
+ if (rc)
2194
+ goto out;
2195
+
2196
+ rc = netif_set_real_num_rx_queues(dev, 1);
2197
+ if (rc)
2198
+ goto out;
2199
+ }
21962200
21972201 priv->rn_ops = dev->netdev_ops;
21982202
2199
- /* fixme : should be after the query_cap */
2200
- if (priv->hca_caps & IB_DEVICE_VIRTUAL_FUNCTION)
2203
+ if (hca->attrs.device_cap_flags & IB_DEVICE_VIRTUAL_FUNCTION)
22012204 dev->netdev_ops = &ipoib_netdev_ops_vf;
22022205 else
22032206 dev->netdev_ops = &ipoib_netdev_ops_pf;
22042207
2205
- rn = netdev_priv(dev);
22062208 rn->clnt_priv = priv;
2207
-
22082209 /*
22092210 * Only the child register_netdev flows can handle priv_destructor
22102211 * being set, so we force it to NULL here and handle manually until it
....@@ -2215,10 +2216,35 @@
22152216
22162217 ipoib_build_priv(dev);
22172218
2218
- return priv;
2219
-free_priv:
2219
+ return 0;
2220
+
2221
+out:
22202222 kfree(priv);
2221
- return NULL;
2223
+ return rc;
2224
+}
2225
+
2226
+struct net_device *ipoib_intf_alloc(struct ib_device *hca, u8 port,
2227
+ const char *name)
2228
+{
2229
+ struct net_device *dev;
2230
+ int rc;
2231
+
2232
+ dev = ipoib_alloc_netdev(hca, port, name);
2233
+ if (IS_ERR(dev))
2234
+ return dev;
2235
+
2236
+ rc = ipoib_intf_init(hca, port, name, dev);
2237
+ if (rc) {
2238
+ free_netdev(dev);
2239
+ return ERR_PTR(rc);
2240
+ }
2241
+
2242
+ /*
2243
+ * Upon success the caller must ensure ipoib_intf_free is called or
2244
+ * register_netdevice succeed'd and priv_destructor is set to
2245
+ * ipoib_intf_free.
2246
+ */
2247
+ return dev;
22222248 }
22232249
22242250 void ipoib_intf_free(struct net_device *dev)
....@@ -2398,19 +2424,62 @@
23982424 return device_create_file(&dev->dev, &dev_attr_pkey);
23992425 }
24002426
2427
+/*
2428
+ * We erroneously exposed the iface's port number in the dev_id
2429
+ * sysfs field long after dev_port was introduced for that purpose[1],
2430
+ * and we need to stop everyone from relying on that.
2431
+ * Let's overload the shower routine for the dev_id file here
2432
+ * to gently bring the issue up.
2433
+ *
2434
+ * [1] https://www.spinics.net/lists/netdev/msg272123.html
2435
+ */
2436
+static ssize_t dev_id_show(struct device *dev,
2437
+ struct device_attribute *attr, char *buf)
2438
+{
2439
+ struct net_device *ndev = to_net_dev(dev);
2440
+
2441
+ /*
2442
+ * ndev->dev_port will be equal to 0 in old kernel prior to commit
2443
+ * 9b8b2a323008 ("IB/ipoib: Use dev_port to expose network interface
2444
+ * port numbers") Zero was chosen as special case for user space
2445
+ * applications to fallback and query dev_id to check if it has
2446
+ * different value or not.
2447
+ *
2448
+ * Don't print warning in such scenario.
2449
+ *
2450
+ * https://github.com/systemd/systemd/blob/master/src/udev/udev-builtin-net_id.c#L358
2451
+ */
2452
+ if (ndev->dev_port && ndev->dev_id == ndev->dev_port)
2453
+ netdev_info_once(ndev,
2454
+ "\"%s\" wants to know my dev_id. Should it look at dev_port instead? See Documentation/ABI/testing/sysfs-class-net for more info.\n",
2455
+ current->comm);
2456
+
2457
+ return sprintf(buf, "%#x\n", ndev->dev_id);
2458
+}
2459
+static DEVICE_ATTR_RO(dev_id);
2460
+
2461
+static int ipoib_intercept_dev_id_attr(struct net_device *dev)
2462
+{
2463
+ device_remove_file(&dev->dev, &dev_attr_dev_id);
2464
+ return device_create_file(&dev->dev, &dev_attr_dev_id);
2465
+}
2466
+
24012467 static struct net_device *ipoib_add_port(const char *format,
24022468 struct ib_device *hca, u8 port)
24032469 {
2470
+ struct rtnl_link_ops *ops = ipoib_get_link_ops();
2471
+ struct rdma_netdev_alloc_params params;
24042472 struct ipoib_dev_priv *priv;
24052473 struct net_device *ndev;
24062474 int result;
24072475
2408
- priv = ipoib_intf_alloc(hca, port, format);
2409
- if (!priv) {
2410
- pr_warn("%s, %d: ipoib_intf_alloc failed\n", hca->name, port);
2411
- return ERR_PTR(-ENOMEM);
2476
+ ndev = ipoib_intf_alloc(hca, port, format);
2477
+ if (IS_ERR(ndev)) {
2478
+ pr_warn("%s, %d: ipoib_intf_alloc failed %ld\n", hca->name, port,
2479
+ PTR_ERR(ndev));
2480
+ return ndev;
24122481 }
2413
- ndev = priv->dev;
2482
+ priv = ipoib_priv(ndev);
24142483
24152484 INIT_IB_EVENT_HANDLER(&priv->event_handler,
24162485 priv->ca, ipoib_event);
....@@ -2418,6 +2487,8 @@
24182487
24192488 /* call event handler to ensure pkey in sync */
24202489 queue_work(ipoib_workqueue, &priv->flush_heavy);
2490
+
2491
+ ndev->rtnl_link_ops = ipoib_get_link_ops();
24212492
24222493 result = register_netdev(ndev);
24232494 if (result) {
....@@ -2431,6 +2502,14 @@
24312502 return ERR_PTR(result);
24322503 }
24332504
2505
+ if (hca->ops.rdma_netdev_get_params) {
2506
+ int rc = hca->ops.rdma_netdev_get_params(hca, port,
2507
+ RDMA_NETDEV_IPOIB,
2508
+ &params);
2509
+
2510
+ if (!rc && ops->priv_size < params.sizeof_priv)
2511
+ ops->priv_size = params.sizeof_priv;
2512
+ }
24342513 /*
24352514 * We cannot set priv_destructor before register_netdev because we
24362515 * need priv to be always valid during the error flow to execute
....@@ -2439,6 +2518,8 @@
24392518 */
24402519 ndev->priv_destructor = ipoib_intf_free;
24412520
2521
+ if (ipoib_intercept_dev_id_attr(ndev))
2522
+ goto sysfs_failed;
24422523 if (ipoib_cm_add_mode_attr(ndev))
24432524 goto sysfs_failed;
24442525 if (ipoib_add_pkey_attr(ndev))
....@@ -2458,21 +2539,21 @@
24582539 return ERR_PTR(-ENOMEM);
24592540 }
24602541
2461
-static void ipoib_add_one(struct ib_device *device)
2542
+static int ipoib_add_one(struct ib_device *device)
24622543 {
24632544 struct list_head *dev_list;
24642545 struct net_device *dev;
24652546 struct ipoib_dev_priv *priv;
2466
- int p;
2547
+ unsigned int p;
24672548 int count = 0;
24682549
24692550 dev_list = kmalloc(sizeof(*dev_list), GFP_KERNEL);
24702551 if (!dev_list)
2471
- return;
2552
+ return -ENOMEM;
24722553
24732554 INIT_LIST_HEAD(dev_list);
24742555
2475
- for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
2556
+ rdma_for_each_port (device, p) {
24762557 if (!rdma_protocol_ib(device, p))
24772558 continue;
24782559 dev = ipoib_add_port("ib%d", device, p);
....@@ -2485,19 +2566,17 @@
24852566
24862567 if (!count) {
24872568 kfree(dev_list);
2488
- return;
2569
+ return -EOPNOTSUPP;
24892570 }
24902571
24912572 ib_set_client_data(device, &ipoib_client, dev_list);
2573
+ return 0;
24922574 }
24932575
24942576 static void ipoib_remove_one(struct ib_device *device, void *client_data)
24952577 {
24962578 struct ipoib_dev_priv *priv, *tmp, *cpriv, *tcpriv;
24972579 struct list_head *dev_list = client_data;
2498
-
2499
- if (!dev_list)
2500
- return;
25012580
25022581 list_for_each_entry_safe(priv, tmp, dev_list, list) {
25032582 LIST_HEAD(head);
....@@ -2545,9 +2624,7 @@
25452624 */
25462625 BUILD_BUG_ON(IPOIB_CM_COPYBREAK > IPOIB_CM_HEAD_SIZE);
25472626
2548
- ret = ipoib_register_debugfs();
2549
- if (ret)
2550
- return ret;
2627
+ ipoib_register_debugfs();
25512628
25522629 /*
25532630 * We create a global workqueue here that is used for all flush