.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
---|
1 | 2 | /* |
---|
2 | 3 | * NET3 Protocol independent device support routines. |
---|
3 | | - * |
---|
4 | | - * This program is free software; you can redistribute it and/or |
---|
5 | | - * modify it under the terms of the GNU General Public License |
---|
6 | | - * as published by the Free Software Foundation; either version |
---|
7 | | - * 2 of the License, or (at your option) any later version. |
---|
8 | 4 | * |
---|
9 | 5 | * Derived from the non IP parts of dev.c 1.0.19 |
---|
10 | 6 | * Authors: Ross Biro |
---|
.. | .. |
---|
102 | 98 | #include <net/busy_poll.h> |
---|
103 | 99 | #include <linux/rtnetlink.h> |
---|
104 | 100 | #include <linux/stat.h> |
---|
| 101 | +#include <net/dsa.h> |
---|
105 | 102 | #include <net/dst.h> |
---|
106 | 103 | #include <net/dst_metadata.h> |
---|
107 | 104 | #include <net/pkt_sched.h> |
---|
.. | .. |
---|
132 | 129 | #include <trace/events/napi.h> |
---|
133 | 130 | #include <trace/events/net.h> |
---|
134 | 131 | #include <trace/events/skb.h> |
---|
135 | | -#include <linux/pci.h> |
---|
136 | 132 | #include <linux/inetdevice.h> |
---|
137 | 133 | #include <linux/cpu_rmap.h> |
---|
138 | 134 | #include <linux/static_key.h> |
---|
.. | .. |
---|
146 | 142 | #include <linux/sctp.h> |
---|
147 | 143 | #include <net/udp_tunnel.h> |
---|
148 | 144 | #include <linux/net_namespace.h> |
---|
| 145 | +#include <linux/indirect_call_wrapper.h> |
---|
| 146 | +#include <net/devlink.h> |
---|
| 147 | +#include <linux/pm_runtime.h> |
---|
| 148 | +#include <linux/prandom.h> |
---|
| 149 | +#include <trace/hooks/net.h> |
---|
149 | 150 | |
---|
150 | 151 | #include "net-sysfs.h" |
---|
151 | 152 | |
---|
152 | 153 | #define MAX_GRO_SKBS 8 |
---|
153 | | -#define MAX_NEST_DEV 8 |
---|
154 | 154 | |
---|
155 | 155 | /* This should be increased if a protocol with a bigger head is added. */ |
---|
156 | 156 | #define GRO_MAX_HEAD (MAX_HEADER + 128) |
---|
.. | .. |
---|
164 | 164 | static int netif_rx_internal(struct sk_buff *skb); |
---|
165 | 165 | static int call_netdevice_notifiers_info(unsigned long val, |
---|
166 | 166 | struct netdev_notifier_info *info); |
---|
| 167 | +static int call_netdevice_notifiers_extack(unsigned long val, |
---|
| 168 | + struct net_device *dev, |
---|
| 169 | + struct netlink_ext_ack *extack); |
---|
167 | 170 | static struct napi_struct *napi_by_id(unsigned int napi_id); |
---|
168 | 171 | |
---|
169 | 172 | /* |
---|
.. | .. |
---|
219 | 222 | static inline void rps_lock(struct softnet_data *sd) |
---|
220 | 223 | { |
---|
221 | 224 | #ifdef CONFIG_RPS |
---|
222 | | - raw_spin_lock(&sd->input_pkt_queue.raw_lock); |
---|
| 225 | + spin_lock(&sd->input_pkt_queue.lock); |
---|
223 | 226 | #endif |
---|
224 | 227 | } |
---|
225 | 228 | |
---|
226 | 229 | static inline void rps_unlock(struct softnet_data *sd) |
---|
227 | 230 | { |
---|
228 | 231 | #ifdef CONFIG_RPS |
---|
229 | | - raw_spin_unlock(&sd->input_pkt_queue.raw_lock); |
---|
| 232 | + spin_unlock(&sd->input_pkt_queue.lock); |
---|
230 | 233 | #endif |
---|
| 234 | +} |
---|
| 235 | + |
---|
| 236 | +static struct netdev_name_node *netdev_name_node_alloc(struct net_device *dev, |
---|
| 237 | + const char *name) |
---|
| 238 | +{ |
---|
| 239 | + struct netdev_name_node *name_node; |
---|
| 240 | + |
---|
| 241 | + name_node = kmalloc(sizeof(*name_node), GFP_KERNEL); |
---|
| 242 | + if (!name_node) |
---|
| 243 | + return NULL; |
---|
| 244 | + INIT_HLIST_NODE(&name_node->hlist); |
---|
| 245 | + name_node->dev = dev; |
---|
| 246 | + name_node->name = name; |
---|
| 247 | + return name_node; |
---|
| 248 | +} |
---|
| 249 | + |
---|
| 250 | +static struct netdev_name_node * |
---|
| 251 | +netdev_name_node_head_alloc(struct net_device *dev) |
---|
| 252 | +{ |
---|
| 253 | + struct netdev_name_node *name_node; |
---|
| 254 | + |
---|
| 255 | + name_node = netdev_name_node_alloc(dev, dev->name); |
---|
| 256 | + if (!name_node) |
---|
| 257 | + return NULL; |
---|
| 258 | + INIT_LIST_HEAD(&name_node->list); |
---|
| 259 | + return name_node; |
---|
| 260 | +} |
---|
| 261 | + |
---|
| 262 | +static void netdev_name_node_free(struct netdev_name_node *name_node) |
---|
| 263 | +{ |
---|
| 264 | + kfree(name_node); |
---|
| 265 | +} |
---|
| 266 | + |
---|
| 267 | +static void netdev_name_node_add(struct net *net, |
---|
| 268 | + struct netdev_name_node *name_node) |
---|
| 269 | +{ |
---|
| 270 | + hlist_add_head_rcu(&name_node->hlist, |
---|
| 271 | + dev_name_hash(net, name_node->name)); |
---|
| 272 | +} |
---|
| 273 | + |
---|
| 274 | +static void netdev_name_node_del(struct netdev_name_node *name_node) |
---|
| 275 | +{ |
---|
| 276 | + hlist_del_rcu(&name_node->hlist); |
---|
| 277 | +} |
---|
| 278 | + |
---|
| 279 | +static struct netdev_name_node *netdev_name_node_lookup(struct net *net, |
---|
| 280 | + const char *name) |
---|
| 281 | +{ |
---|
| 282 | + struct hlist_head *head = dev_name_hash(net, name); |
---|
| 283 | + struct netdev_name_node *name_node; |
---|
| 284 | + |
---|
| 285 | + hlist_for_each_entry(name_node, head, hlist) |
---|
| 286 | + if (!strcmp(name_node->name, name)) |
---|
| 287 | + return name_node; |
---|
| 288 | + return NULL; |
---|
| 289 | +} |
---|
| 290 | + |
---|
| 291 | +static struct netdev_name_node *netdev_name_node_lookup_rcu(struct net *net, |
---|
| 292 | + const char *name) |
---|
| 293 | +{ |
---|
| 294 | + struct hlist_head *head = dev_name_hash(net, name); |
---|
| 295 | + struct netdev_name_node *name_node; |
---|
| 296 | + |
---|
| 297 | + hlist_for_each_entry_rcu(name_node, head, hlist) |
---|
| 298 | + if (!strcmp(name_node->name, name)) |
---|
| 299 | + return name_node; |
---|
| 300 | + return NULL; |
---|
| 301 | +} |
---|
| 302 | + |
---|
| 303 | +int netdev_name_node_alt_create(struct net_device *dev, const char *name) |
---|
| 304 | +{ |
---|
| 305 | + struct netdev_name_node *name_node; |
---|
| 306 | + struct net *net = dev_net(dev); |
---|
| 307 | + |
---|
| 308 | + name_node = netdev_name_node_lookup(net, name); |
---|
| 309 | + if (name_node) |
---|
| 310 | + return -EEXIST; |
---|
| 311 | + name_node = netdev_name_node_alloc(dev, name); |
---|
| 312 | + if (!name_node) |
---|
| 313 | + return -ENOMEM; |
---|
| 314 | + netdev_name_node_add(net, name_node); |
---|
| 315 | + /* The node that holds dev->name acts as a head of per-device list. */ |
---|
| 316 | + list_add_tail(&name_node->list, &dev->name_node->list); |
---|
| 317 | + |
---|
| 318 | + return 0; |
---|
| 319 | +} |
---|
| 320 | +EXPORT_SYMBOL(netdev_name_node_alt_create); |
---|
| 321 | + |
---|
| 322 | +static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node) |
---|
| 323 | +{ |
---|
| 324 | + list_del(&name_node->list); |
---|
| 325 | + netdev_name_node_del(name_node); |
---|
| 326 | + kfree(name_node->name); |
---|
| 327 | + netdev_name_node_free(name_node); |
---|
| 328 | +} |
---|
| 329 | + |
---|
| 330 | +int netdev_name_node_alt_destroy(struct net_device *dev, const char *name) |
---|
| 331 | +{ |
---|
| 332 | + struct netdev_name_node *name_node; |
---|
| 333 | + struct net *net = dev_net(dev); |
---|
| 334 | + |
---|
| 335 | + name_node = netdev_name_node_lookup(net, name); |
---|
| 336 | + if (!name_node) |
---|
| 337 | + return -ENOENT; |
---|
| 338 | + /* lookup might have found our primary name or a name belonging |
---|
| 339 | + * to another device. |
---|
| 340 | + */ |
---|
| 341 | + if (name_node == dev->name_node || name_node->dev != dev) |
---|
| 342 | + return -EINVAL; |
---|
| 343 | + |
---|
| 344 | + __netdev_name_node_alt_destroy(name_node); |
---|
| 345 | + |
---|
| 346 | + return 0; |
---|
| 347 | +} |
---|
| 348 | +EXPORT_SYMBOL(netdev_name_node_alt_destroy); |
---|
| 349 | + |
---|
| 350 | +static void netdev_name_node_alt_flush(struct net_device *dev) |
---|
| 351 | +{ |
---|
| 352 | + struct netdev_name_node *name_node, *tmp; |
---|
| 353 | + |
---|
| 354 | + list_for_each_entry_safe(name_node, tmp, &dev->name_node->list, list) |
---|
| 355 | + __netdev_name_node_alt_destroy(name_node); |
---|
231 | 356 | } |
---|
232 | 357 | |
---|
233 | 358 | /* Device list insertion */ |
---|
.. | .. |
---|
239 | 364 | |
---|
240 | 365 | write_lock_bh(&dev_base_lock); |
---|
241 | 366 | list_add_tail_rcu(&dev->dev_list, &net->dev_base_head); |
---|
242 | | - hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name)); |
---|
| 367 | + netdev_name_node_add(net, dev->name_node); |
---|
243 | 368 | hlist_add_head_rcu(&dev->index_hlist, |
---|
244 | 369 | dev_index_hash(net, dev->ifindex)); |
---|
245 | 370 | write_unlock_bh(&dev_base_lock); |
---|
.. | .. |
---|
257 | 382 | /* Unlink dev from the device chain */ |
---|
258 | 383 | write_lock_bh(&dev_base_lock); |
---|
259 | 384 | list_del_rcu(&dev->dev_list); |
---|
260 | | - hlist_del_rcu(&dev->name_hlist); |
---|
| 385 | + netdev_name_node_del(dev->name_node); |
---|
261 | 386 | hlist_del_rcu(&dev->index_hlist); |
---|
262 | 387 | write_unlock_bh(&dev_base_lock); |
---|
263 | 388 | |
---|
.. | .. |
---|
355 | 480 | unsigned short dev_type) |
---|
356 | 481 | { |
---|
357 | 482 | } |
---|
| 483 | + |
---|
358 | 484 | static inline void netdev_set_addr_lockdep_class(struct net_device *dev) |
---|
359 | 485 | { |
---|
360 | 486 | } |
---|
.. | .. |
---|
385 | 511 | |
---|
386 | 512 | static inline struct list_head *ptype_head(const struct packet_type *pt) |
---|
387 | 513 | { |
---|
| 514 | + struct list_head vendor_pt = { .next = NULL, }; |
---|
| 515 | + |
---|
| 516 | + trace_android_vh_ptype_head(pt, &vendor_pt); |
---|
| 517 | + if (vendor_pt.next) |
---|
| 518 | + return vendor_pt.next; |
---|
| 519 | + |
---|
388 | 520 | if (pt->type == htons(ETH_P_ALL)) |
---|
389 | 521 | return pt->dev ? &pt->dev->ptype_all : &ptype_all; |
---|
390 | 522 | else |
---|
.. | .. |
---|
735 | 867 | |
---|
736 | 868 | struct net_device *__dev_get_by_name(struct net *net, const char *name) |
---|
737 | 869 | { |
---|
738 | | - struct net_device *dev; |
---|
739 | | - struct hlist_head *head = dev_name_hash(net, name); |
---|
| 870 | + struct netdev_name_node *node_name; |
---|
740 | 871 | |
---|
741 | | - hlist_for_each_entry(dev, head, name_hlist) |
---|
742 | | - if (!strncmp(dev->name, name, IFNAMSIZ)) |
---|
743 | | - return dev; |
---|
744 | | - |
---|
745 | | - return NULL; |
---|
| 872 | + node_name = netdev_name_node_lookup(net, name); |
---|
| 873 | + return node_name ? node_name->dev : NULL; |
---|
746 | 874 | } |
---|
747 | 875 | EXPORT_SYMBOL(__dev_get_by_name); |
---|
748 | 876 | |
---|
.. | .. |
---|
760 | 888 | |
---|
761 | 889 | struct net_device *dev_get_by_name_rcu(struct net *net, const char *name) |
---|
762 | 890 | { |
---|
763 | | - struct net_device *dev; |
---|
764 | | - struct hlist_head *head = dev_name_hash(net, name); |
---|
| 891 | + struct netdev_name_node *node_name; |
---|
765 | 892 | |
---|
766 | | - hlist_for_each_entry_rcu(dev, head, name_hlist) |
---|
767 | | - if (!strncmp(dev->name, name, IFNAMSIZ)) |
---|
768 | | - return dev; |
---|
769 | | - |
---|
770 | | - return NULL; |
---|
| 893 | + node_name = netdev_name_node_lookup_rcu(net, name); |
---|
| 894 | + return node_name ? node_name->dev : NULL; |
---|
771 | 895 | } |
---|
772 | 896 | EXPORT_SYMBOL(dev_get_by_name_rcu); |
---|
773 | 897 | |
---|
.. | .. |
---|
1015 | 1139 | * @name: name string |
---|
1016 | 1140 | * |
---|
1017 | 1141 | * Network device names need to be valid file names to |
---|
1018 | | - * to allow sysfs to work. We also disallow any kind of |
---|
| 1142 | + * allow sysfs to work. We also disallow any kind of |
---|
1019 | 1143 | * whitespace. |
---|
1020 | 1144 | */ |
---|
1021 | 1145 | bool dev_valid_name(const char *name) |
---|
.. | .. |
---|
1078 | 1202 | return -ENOMEM; |
---|
1079 | 1203 | |
---|
1080 | 1204 | for_each_netdev(net, d) { |
---|
| 1205 | + struct netdev_name_node *name_node; |
---|
| 1206 | + list_for_each_entry(name_node, &d->name_node->list, list) { |
---|
| 1207 | + if (!sscanf(name_node->name, name, &i)) |
---|
| 1208 | + continue; |
---|
| 1209 | + if (i < 0 || i >= max_netdevices) |
---|
| 1210 | + continue; |
---|
| 1211 | + |
---|
| 1212 | + /* avoid cases where sscanf is not exact inverse of printf */ |
---|
| 1213 | + snprintf(buf, IFNAMSIZ, name, i); |
---|
| 1214 | + if (!strncmp(buf, name_node->name, IFNAMSIZ)) |
---|
| 1215 | + set_bit(i, inuse); |
---|
| 1216 | + } |
---|
1081 | 1217 | if (!sscanf(d->name, name, &i)) |
---|
1082 | 1218 | continue; |
---|
1083 | 1219 | if (i < 0 || i >= max_netdevices) |
---|
.. | .. |
---|
1138 | 1274 | } |
---|
1139 | 1275 | EXPORT_SYMBOL(dev_alloc_name); |
---|
1140 | 1276 | |
---|
1141 | | -int dev_get_valid_name(struct net *net, struct net_device *dev, |
---|
1142 | | - const char *name) |
---|
| 1277 | +static int dev_get_valid_name(struct net *net, struct net_device *dev, |
---|
| 1278 | + const char *name) |
---|
1143 | 1279 | { |
---|
1144 | 1280 | BUG_ON(!net); |
---|
1145 | 1281 | |
---|
.. | .. |
---|
1155 | 1291 | |
---|
1156 | 1292 | return 0; |
---|
1157 | 1293 | } |
---|
1158 | | -EXPORT_SYMBOL(dev_get_valid_name); |
---|
1159 | 1294 | |
---|
1160 | 1295 | /** |
---|
1161 | 1296 | * dev_change_name - change name of a device |
---|
.. | .. |
---|
1229 | 1364 | netdev_adjacent_rename_links(dev, oldname); |
---|
1230 | 1365 | |
---|
1231 | 1366 | write_lock_bh(&dev_base_lock); |
---|
1232 | | - hlist_del_rcu(&dev->name_hlist); |
---|
| 1367 | + netdev_name_node_del(dev->name_node); |
---|
1233 | 1368 | write_unlock_bh(&dev_base_lock); |
---|
1234 | 1369 | |
---|
1235 | 1370 | synchronize_rcu(); |
---|
1236 | 1371 | |
---|
1237 | 1372 | write_lock_bh(&dev_base_lock); |
---|
1238 | | - hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name)); |
---|
| 1373 | + netdev_name_node_add(net, dev->name_node); |
---|
1239 | 1374 | write_unlock_bh(&dev_base_lock); |
---|
1240 | 1375 | |
---|
1241 | 1376 | ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev); |
---|
.. | .. |
---|
1285 | 1420 | } |
---|
1286 | 1421 | |
---|
1287 | 1422 | mutex_lock(&ifalias_mutex); |
---|
1288 | | - rcu_swap_protected(dev->ifalias, new_alias, |
---|
1289 | | - mutex_is_locked(&ifalias_mutex)); |
---|
| 1423 | + new_alias = rcu_replace_pointer(dev->ifalias, new_alias, |
---|
| 1424 | + mutex_is_locked(&ifalias_mutex)); |
---|
1290 | 1425 | mutex_unlock(&ifalias_mutex); |
---|
1291 | 1426 | |
---|
1292 | 1427 | if (new_alias) |
---|
.. | .. |
---|
1372 | 1507 | } |
---|
1373 | 1508 | EXPORT_SYMBOL(netdev_notify_peers); |
---|
1374 | 1509 | |
---|
1375 | | -static int __dev_open(struct net_device *dev) |
---|
| 1510 | +static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack) |
---|
1376 | 1511 | { |
---|
1377 | 1512 | const struct net_device_ops *ops = dev->netdev_ops; |
---|
1378 | 1513 | int ret; |
---|
1379 | 1514 | |
---|
1380 | 1515 | ASSERT_RTNL(); |
---|
1381 | 1516 | |
---|
1382 | | - if (!netif_device_present(dev)) |
---|
1383 | | - return -ENODEV; |
---|
| 1517 | + if (!netif_device_present(dev)) { |
---|
| 1518 | + /* may be detached because parent is runtime-suspended */ |
---|
| 1519 | + if (dev->dev.parent) |
---|
| 1520 | + pm_runtime_resume(dev->dev.parent); |
---|
| 1521 | + if (!netif_device_present(dev)) |
---|
| 1522 | + return -ENODEV; |
---|
| 1523 | + } |
---|
1384 | 1524 | |
---|
1385 | 1525 | /* Block netpoll from trying to do any rx path servicing. |
---|
1386 | 1526 | * If we don't do this there is a chance ndo_poll_controller |
---|
.. | .. |
---|
1388 | 1528 | */ |
---|
1389 | 1529 | netpoll_poll_disable(dev); |
---|
1390 | 1530 | |
---|
1391 | | - ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev); |
---|
| 1531 | + ret = call_netdevice_notifiers_extack(NETDEV_PRE_UP, dev, extack); |
---|
1392 | 1532 | ret = notifier_to_errno(ret); |
---|
1393 | 1533 | if (ret) |
---|
1394 | 1534 | return ret; |
---|
.. | .. |
---|
1417 | 1557 | |
---|
1418 | 1558 | /** |
---|
1419 | 1559 | * dev_open - prepare an interface for use. |
---|
1420 | | - * @dev: device to open |
---|
| 1560 | + * @dev: device to open |
---|
| 1561 | + * @extack: netlink extended ack |
---|
1421 | 1562 | * |
---|
1422 | 1563 | * Takes a device from down to up state. The device's private open |
---|
1423 | 1564 | * function is invoked and then the multicast lists are loaded. Finally |
---|
.. | .. |
---|
1427 | 1568 | * Calling this function on an active interface is a nop. On a failure |
---|
1428 | 1569 | * a negative errno code is returned. |
---|
1429 | 1570 | */ |
---|
1430 | | -int dev_open(struct net_device *dev) |
---|
| 1571 | +int dev_open(struct net_device *dev, struct netlink_ext_ack *extack) |
---|
1431 | 1572 | { |
---|
1432 | 1573 | int ret; |
---|
1433 | 1574 | |
---|
1434 | 1575 | if (dev->flags & IFF_UP) |
---|
1435 | 1576 | return 0; |
---|
1436 | 1577 | |
---|
1437 | | - ret = __dev_open(dev); |
---|
| 1578 | + ret = __dev_open(dev, extack); |
---|
1438 | 1579 | if (ret < 0) |
---|
1439 | 1580 | return ret; |
---|
1440 | 1581 | |
---|
.. | .. |
---|
1596 | 1737 | N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN) |
---|
1597 | 1738 | N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO) |
---|
1598 | 1739 | N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO) |
---|
| 1740 | + N(PRE_CHANGEADDR) |
---|
1599 | 1741 | } |
---|
1600 | 1742 | #undef N |
---|
1601 | 1743 | return "UNKNOWN_NETDEV_EVENT"; |
---|
.. | .. |
---|
1610 | 1752 | }; |
---|
1611 | 1753 | |
---|
1612 | 1754 | return nb->notifier_call(nb, val, &info); |
---|
| 1755 | +} |
---|
| 1756 | + |
---|
| 1757 | +static int call_netdevice_register_notifiers(struct notifier_block *nb, |
---|
| 1758 | + struct net_device *dev) |
---|
| 1759 | +{ |
---|
| 1760 | + int err; |
---|
| 1761 | + |
---|
| 1762 | + err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev); |
---|
| 1763 | + err = notifier_to_errno(err); |
---|
| 1764 | + if (err) |
---|
| 1765 | + return err; |
---|
| 1766 | + |
---|
| 1767 | + if (!(dev->flags & IFF_UP)) |
---|
| 1768 | + return 0; |
---|
| 1769 | + |
---|
| 1770 | + call_netdevice_notifier(nb, NETDEV_UP, dev); |
---|
| 1771 | + return 0; |
---|
| 1772 | +} |
---|
| 1773 | + |
---|
| 1774 | +static void call_netdevice_unregister_notifiers(struct notifier_block *nb, |
---|
| 1775 | + struct net_device *dev) |
---|
| 1776 | +{ |
---|
| 1777 | + if (dev->flags & IFF_UP) { |
---|
| 1778 | + call_netdevice_notifier(nb, NETDEV_GOING_DOWN, |
---|
| 1779 | + dev); |
---|
| 1780 | + call_netdevice_notifier(nb, NETDEV_DOWN, dev); |
---|
| 1781 | + } |
---|
| 1782 | + call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev); |
---|
| 1783 | +} |
---|
| 1784 | + |
---|
| 1785 | +static int call_netdevice_register_net_notifiers(struct notifier_block *nb, |
---|
| 1786 | + struct net *net) |
---|
| 1787 | +{ |
---|
| 1788 | + struct net_device *dev; |
---|
| 1789 | + int err; |
---|
| 1790 | + |
---|
| 1791 | + for_each_netdev(net, dev) { |
---|
| 1792 | + err = call_netdevice_register_notifiers(nb, dev); |
---|
| 1793 | + if (err) |
---|
| 1794 | + goto rollback; |
---|
| 1795 | + } |
---|
| 1796 | + return 0; |
---|
| 1797 | + |
---|
| 1798 | +rollback: |
---|
| 1799 | + for_each_netdev_continue_reverse(net, dev) |
---|
| 1800 | + call_netdevice_unregister_notifiers(nb, dev); |
---|
| 1801 | + return err; |
---|
| 1802 | +} |
---|
| 1803 | + |
---|
| 1804 | +static void call_netdevice_unregister_net_notifiers(struct notifier_block *nb, |
---|
| 1805 | + struct net *net) |
---|
| 1806 | +{ |
---|
| 1807 | + struct net_device *dev; |
---|
| 1808 | + |
---|
| 1809 | + for_each_netdev(net, dev) |
---|
| 1810 | + call_netdevice_unregister_notifiers(nb, dev); |
---|
1613 | 1811 | } |
---|
1614 | 1812 | |
---|
1615 | 1813 | static int dev_boot_phase = 1; |
---|
.. | .. |
---|
1630 | 1828 | |
---|
1631 | 1829 | int register_netdevice_notifier(struct notifier_block *nb) |
---|
1632 | 1830 | { |
---|
1633 | | - struct net_device *dev; |
---|
1634 | | - struct net_device *last; |
---|
1635 | 1831 | struct net *net; |
---|
1636 | 1832 | int err; |
---|
1637 | 1833 | |
---|
.. | .. |
---|
1644 | 1840 | if (dev_boot_phase) |
---|
1645 | 1841 | goto unlock; |
---|
1646 | 1842 | for_each_net(net) { |
---|
1647 | | - for_each_netdev(net, dev) { |
---|
1648 | | - err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev); |
---|
1649 | | - err = notifier_to_errno(err); |
---|
1650 | | - if (err) |
---|
1651 | | - goto rollback; |
---|
1652 | | - |
---|
1653 | | - if (!(dev->flags & IFF_UP)) |
---|
1654 | | - continue; |
---|
1655 | | - |
---|
1656 | | - call_netdevice_notifier(nb, NETDEV_UP, dev); |
---|
1657 | | - } |
---|
| 1843 | + err = call_netdevice_register_net_notifiers(nb, net); |
---|
| 1844 | + if (err) |
---|
| 1845 | + goto rollback; |
---|
1658 | 1846 | } |
---|
1659 | 1847 | |
---|
1660 | 1848 | unlock: |
---|
.. | .. |
---|
1663 | 1851 | return err; |
---|
1664 | 1852 | |
---|
1665 | 1853 | rollback: |
---|
1666 | | - last = dev; |
---|
1667 | | - for_each_net(net) { |
---|
1668 | | - for_each_netdev(net, dev) { |
---|
1669 | | - if (dev == last) |
---|
1670 | | - goto outroll; |
---|
| 1854 | + for_each_net_continue_reverse(net) |
---|
| 1855 | + call_netdevice_unregister_net_notifiers(nb, net); |
---|
1671 | 1856 | |
---|
1672 | | - if (dev->flags & IFF_UP) { |
---|
1673 | | - call_netdevice_notifier(nb, NETDEV_GOING_DOWN, |
---|
1674 | | - dev); |
---|
1675 | | - call_netdevice_notifier(nb, NETDEV_DOWN, dev); |
---|
1676 | | - } |
---|
1677 | | - call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev); |
---|
1678 | | - } |
---|
1679 | | - } |
---|
1680 | | - |
---|
1681 | | -outroll: |
---|
1682 | 1857 | raw_notifier_chain_unregister(&netdev_chain, nb); |
---|
1683 | 1858 | goto unlock; |
---|
1684 | 1859 | } |
---|
.. | .. |
---|
1700 | 1875 | |
---|
1701 | 1876 | int unregister_netdevice_notifier(struct notifier_block *nb) |
---|
1702 | 1877 | { |
---|
1703 | | - struct net_device *dev; |
---|
1704 | 1878 | struct net *net; |
---|
1705 | 1879 | int err; |
---|
1706 | 1880 | |
---|
.. | .. |
---|
1711 | 1885 | if (err) |
---|
1712 | 1886 | goto unlock; |
---|
1713 | 1887 | |
---|
1714 | | - for_each_net(net) { |
---|
1715 | | - for_each_netdev(net, dev) { |
---|
1716 | | - if (dev->flags & IFF_UP) { |
---|
1717 | | - call_netdevice_notifier(nb, NETDEV_GOING_DOWN, |
---|
1718 | | - dev); |
---|
1719 | | - call_netdevice_notifier(nb, NETDEV_DOWN, dev); |
---|
1720 | | - } |
---|
1721 | | - call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev); |
---|
1722 | | - } |
---|
1723 | | - } |
---|
| 1888 | + for_each_net(net) |
---|
| 1889 | + call_netdevice_unregister_net_notifiers(nb, net); |
---|
| 1890 | + |
---|
1724 | 1891 | unlock: |
---|
1725 | 1892 | rtnl_unlock(); |
---|
1726 | 1893 | up_write(&pernet_ops_rwsem); |
---|
1727 | 1894 | return err; |
---|
1728 | 1895 | } |
---|
1729 | 1896 | EXPORT_SYMBOL(unregister_netdevice_notifier); |
---|
| 1897 | + |
---|
| 1898 | +static int __register_netdevice_notifier_net(struct net *net, |
---|
| 1899 | + struct notifier_block *nb, |
---|
| 1900 | + bool ignore_call_fail) |
---|
| 1901 | +{ |
---|
| 1902 | + int err; |
---|
| 1903 | + |
---|
| 1904 | + err = raw_notifier_chain_register(&net->netdev_chain, nb); |
---|
| 1905 | + if (err) |
---|
| 1906 | + return err; |
---|
| 1907 | + if (dev_boot_phase) |
---|
| 1908 | + return 0; |
---|
| 1909 | + |
---|
| 1910 | + err = call_netdevice_register_net_notifiers(nb, net); |
---|
| 1911 | + if (err && !ignore_call_fail) |
---|
| 1912 | + goto chain_unregister; |
---|
| 1913 | + |
---|
| 1914 | + return 0; |
---|
| 1915 | + |
---|
| 1916 | +chain_unregister: |
---|
| 1917 | + raw_notifier_chain_unregister(&net->netdev_chain, nb); |
---|
| 1918 | + return err; |
---|
| 1919 | +} |
---|
| 1920 | + |
---|
| 1921 | +static int __unregister_netdevice_notifier_net(struct net *net, |
---|
| 1922 | + struct notifier_block *nb) |
---|
| 1923 | +{ |
---|
| 1924 | + int err; |
---|
| 1925 | + |
---|
| 1926 | + err = raw_notifier_chain_unregister(&net->netdev_chain, nb); |
---|
| 1927 | + if (err) |
---|
| 1928 | + return err; |
---|
| 1929 | + |
---|
| 1930 | + call_netdevice_unregister_net_notifiers(nb, net); |
---|
| 1931 | + return 0; |
---|
| 1932 | +} |
---|
| 1933 | + |
---|
| 1934 | +/** |
---|
| 1935 | + * register_netdevice_notifier_net - register a per-netns network notifier block |
---|
| 1936 | + * @net: network namespace |
---|
| 1937 | + * @nb: notifier |
---|
| 1938 | + * |
---|
| 1939 | + * Register a notifier to be called when network device events occur. |
---|
| 1940 | + * The notifier passed is linked into the kernel structures and must |
---|
| 1941 | + * not be reused until it has been unregistered. A negative errno code |
---|
| 1942 | + * is returned on a failure. |
---|
| 1943 | + * |
---|
| 1944 | + * When registered all registration and up events are replayed |
---|
| 1945 | + * to the new notifier to allow device to have a race free |
---|
| 1946 | + * view of the network device list. |
---|
| 1947 | + */ |
---|
| 1948 | + |
---|
| 1949 | +int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb) |
---|
| 1950 | +{ |
---|
| 1951 | + int err; |
---|
| 1952 | + |
---|
| 1953 | + rtnl_lock(); |
---|
| 1954 | + err = __register_netdevice_notifier_net(net, nb, false); |
---|
| 1955 | + rtnl_unlock(); |
---|
| 1956 | + return err; |
---|
| 1957 | +} |
---|
| 1958 | +EXPORT_SYMBOL(register_netdevice_notifier_net); |
---|
| 1959 | + |
---|
| 1960 | +/** |
---|
| 1961 | + * unregister_netdevice_notifier_net - unregister a per-netns |
---|
| 1962 | + * network notifier block |
---|
| 1963 | + * @net: network namespace |
---|
| 1964 | + * @nb: notifier |
---|
| 1965 | + * |
---|
| 1966 | + * Unregister a notifier previously registered by |
---|
| 1967 | + * register_netdevice_notifier(). The notifier is unlinked into the |
---|
| 1968 | + * kernel structures and may then be reused. A negative errno code |
---|
| 1969 | + * is returned on a failure. |
---|
| 1970 | + * |
---|
| 1971 | + * After unregistering unregister and down device events are synthesized |
---|
| 1972 | + * for all devices on the device list to the removed notifier to remove |
---|
| 1973 | + * the need for special case cleanup code. |
---|
| 1974 | + */ |
---|
| 1975 | + |
---|
| 1976 | +int unregister_netdevice_notifier_net(struct net *net, |
---|
| 1977 | + struct notifier_block *nb) |
---|
| 1978 | +{ |
---|
| 1979 | + int err; |
---|
| 1980 | + |
---|
| 1981 | + rtnl_lock(); |
---|
| 1982 | + err = __unregister_netdevice_notifier_net(net, nb); |
---|
| 1983 | + rtnl_unlock(); |
---|
| 1984 | + return err; |
---|
| 1985 | +} |
---|
| 1986 | +EXPORT_SYMBOL(unregister_netdevice_notifier_net); |
---|
| 1987 | + |
---|
| 1988 | +int register_netdevice_notifier_dev_net(struct net_device *dev, |
---|
| 1989 | + struct notifier_block *nb, |
---|
| 1990 | + struct netdev_net_notifier *nn) |
---|
| 1991 | +{ |
---|
| 1992 | + int err; |
---|
| 1993 | + |
---|
| 1994 | + rtnl_lock(); |
---|
| 1995 | + err = __register_netdevice_notifier_net(dev_net(dev), nb, false); |
---|
| 1996 | + if (!err) { |
---|
| 1997 | + nn->nb = nb; |
---|
| 1998 | + list_add(&nn->list, &dev->net_notifier_list); |
---|
| 1999 | + } |
---|
| 2000 | + rtnl_unlock(); |
---|
| 2001 | + return err; |
---|
| 2002 | +} |
---|
| 2003 | +EXPORT_SYMBOL(register_netdevice_notifier_dev_net); |
---|
| 2004 | + |
---|
| 2005 | +int unregister_netdevice_notifier_dev_net(struct net_device *dev, |
---|
| 2006 | + struct notifier_block *nb, |
---|
| 2007 | + struct netdev_net_notifier *nn) |
---|
| 2008 | +{ |
---|
| 2009 | + int err; |
---|
| 2010 | + |
---|
| 2011 | + rtnl_lock(); |
---|
| 2012 | + list_del(&nn->list); |
---|
| 2013 | + err = __unregister_netdevice_notifier_net(dev_net(dev), nb); |
---|
| 2014 | + rtnl_unlock(); |
---|
| 2015 | + return err; |
---|
| 2016 | +} |
---|
| 2017 | +EXPORT_SYMBOL(unregister_netdevice_notifier_dev_net); |
---|
| 2018 | + |
---|
| 2019 | +static void move_netdevice_notifiers_dev_net(struct net_device *dev, |
---|
| 2020 | + struct net *net) |
---|
| 2021 | +{ |
---|
| 2022 | + struct netdev_net_notifier *nn; |
---|
| 2023 | + |
---|
| 2024 | + list_for_each_entry(nn, &dev->net_notifier_list, list) { |
---|
| 2025 | + __unregister_netdevice_notifier_net(dev_net(dev), nn->nb); |
---|
| 2026 | + __register_netdevice_notifier_net(net, nn->nb, true); |
---|
| 2027 | + } |
---|
| 2028 | +} |
---|
1730 | 2029 | |
---|
1731 | 2030 | /** |
---|
1732 | 2031 | * call_netdevice_notifiers_info - call all network notifier blocks |
---|
.. | .. |
---|
1740 | 2039 | static int call_netdevice_notifiers_info(unsigned long val, |
---|
1741 | 2040 | struct netdev_notifier_info *info) |
---|
1742 | 2041 | { |
---|
| 2042 | + struct net *net = dev_net(info->dev); |
---|
| 2043 | + int ret; |
---|
| 2044 | + |
---|
1743 | 2045 | ASSERT_RTNL(); |
---|
| 2046 | + |
---|
| 2047 | + /* Run per-netns notifier block chain first, then run the global one. |
---|
| 2048 | + * Hopefully, one day, the global one is going to be removed after |
---|
| 2049 | + * all notifier block registrators get converted to be per-netns. |
---|
| 2050 | + */ |
---|
| 2051 | + ret = raw_notifier_call_chain(&net->netdev_chain, val, info); |
---|
| 2052 | + if (ret & NOTIFY_STOP_MASK) |
---|
| 2053 | + return ret; |
---|
1744 | 2054 | return raw_notifier_call_chain(&netdev_chain, val, info); |
---|
| 2055 | +} |
---|
| 2056 | + |
---|
| 2057 | +static int call_netdevice_notifiers_extack(unsigned long val, |
---|
| 2058 | + struct net_device *dev, |
---|
| 2059 | + struct netlink_ext_ack *extack) |
---|
| 2060 | +{ |
---|
| 2061 | + struct netdev_notifier_info info = { |
---|
| 2062 | + .dev = dev, |
---|
| 2063 | + .extack = extack, |
---|
| 2064 | + }; |
---|
| 2065 | + |
---|
| 2066 | + return call_netdevice_notifiers_info(val, &info); |
---|
1745 | 2067 | } |
---|
1746 | 2068 | |
---|
1747 | 2069 | /** |
---|
.. | .. |
---|
1755 | 2077 | |
---|
1756 | 2078 | int call_netdevice_notifiers(unsigned long val, struct net_device *dev) |
---|
1757 | 2079 | { |
---|
1758 | | - struct netdev_notifier_info info = { |
---|
1759 | | - .dev = dev, |
---|
1760 | | - }; |
---|
1761 | | - |
---|
1762 | | - return call_netdevice_notifiers_info(val, &info); |
---|
| 2080 | + return call_netdevice_notifiers_extack(val, dev, NULL); |
---|
1763 | 2081 | } |
---|
1764 | 2082 | EXPORT_SYMBOL(call_netdevice_notifiers); |
---|
1765 | 2083 | |
---|
.. | .. |
---|
1987 | 2305 | return false; |
---|
1988 | 2306 | } |
---|
1989 | 2307 | |
---|
| 2308 | +/** |
---|
| 2309 | + * dev_nit_active - return true if any network interface taps are in use |
---|
| 2310 | + * |
---|
| 2311 | + * @dev: network device to check for the presence of taps |
---|
| 2312 | + */ |
---|
| 2313 | +bool dev_nit_active(struct net_device *dev) |
---|
| 2314 | +{ |
---|
| 2315 | + return !list_empty(&ptype_all) || !list_empty(&dev->ptype_all); |
---|
| 2316 | +} |
---|
| 2317 | +EXPORT_SYMBOL_GPL(dev_nit_active); |
---|
| 2318 | + |
---|
1990 | 2319 | /* |
---|
1991 | 2320 | * Support routine. Sends outgoing frames to any network |
---|
1992 | 2321 | * taps currently in use. |
---|
.. | .. |
---|
2002 | 2331 | rcu_read_lock(); |
---|
2003 | 2332 | again: |
---|
2004 | 2333 | list_for_each_entry_rcu(ptype, ptype_list, list) { |
---|
| 2334 | + if (ptype->ignore_outgoing) |
---|
| 2335 | + continue; |
---|
| 2336 | + |
---|
2005 | 2337 | /* Never send packets back to the socket |
---|
2006 | 2338 | * they originated from - MvS (miquels@drinkel.ow.org) |
---|
2007 | 2339 | */ |
---|
.. | .. |
---|
2723 | 3055 | sd->output_queue_tailp = &q->next_sched; |
---|
2724 | 3056 | raise_softirq_irqoff(NET_TX_SOFTIRQ); |
---|
2725 | 3057 | local_irq_restore(flags); |
---|
2726 | | - preempt_check_resched_rt(); |
---|
2727 | 3058 | } |
---|
2728 | 3059 | |
---|
2729 | 3060 | void __netif_schedule(struct Qdisc *q) |
---|
.. | .. |
---|
2745 | 3076 | void netif_schedule_queue(struct netdev_queue *txq) |
---|
2746 | 3077 | { |
---|
2747 | 3078 | rcu_read_lock(); |
---|
2748 | | - if (!(txq->state & QUEUE_STATE_ANY_XOFF)) { |
---|
| 3079 | + if (!netif_xmit_stopped(txq)) { |
---|
2749 | 3080 | struct Qdisc *q = rcu_dereference(txq->qdisc); |
---|
2750 | 3081 | |
---|
2751 | 3082 | __netif_schedule(q); |
---|
.. | .. |
---|
2786 | 3117 | __this_cpu_write(softnet_data.completion_queue, skb); |
---|
2787 | 3118 | raise_softirq_irqoff(NET_TX_SOFTIRQ); |
---|
2788 | 3119 | local_irq_restore(flags); |
---|
2789 | | - preempt_check_resched_rt(); |
---|
2790 | 3120 | } |
---|
2791 | 3121 | EXPORT_SYMBOL(__dev_kfree_skb_irq); |
---|
2792 | 3122 | |
---|
.. | .. |
---|
2883 | 3213 | else |
---|
2884 | 3214 | name = netdev_name(dev); |
---|
2885 | 3215 | } |
---|
2886 | | - WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d " |
---|
2887 | | - "gso_type=%d ip_summed=%d\n", |
---|
| 3216 | + skb_dump(KERN_WARNING, skb, false); |
---|
| 3217 | + WARN(1, "%s: caps=(%pNF, %pNF)\n", |
---|
2888 | 3218 | name, dev ? &dev->features : &null_features, |
---|
2889 | | - skb->sk ? &skb->sk->sk_route_caps : &null_features, |
---|
2890 | | - skb->len, skb->data_len, skb_shinfo(skb)->gso_size, |
---|
2891 | | - skb_shinfo(skb)->gso_type, skb->ip_summed); |
---|
| 3219 | + skb->sk ? &skb->sk->sk_route_caps : &null_features); |
---|
2892 | 3220 | } |
---|
2893 | 3221 | |
---|
2894 | 3222 | /* |
---|
.. | .. |
---|
2918 | 3246 | } |
---|
2919 | 3247 | |
---|
2920 | 3248 | offset = skb_checksum_start_offset(skb); |
---|
2921 | | - BUG_ON(offset >= skb_headlen(skb)); |
---|
| 3249 | + ret = -EINVAL; |
---|
| 3250 | + if (WARN_ON_ONCE(offset >= skb_headlen(skb))) |
---|
| 3251 | + goto out; |
---|
| 3252 | + |
---|
2922 | 3253 | csum = skb_checksum(skb, offset, skb->len - offset, 0); |
---|
2923 | 3254 | |
---|
2924 | 3255 | offset += skb->csum_offset; |
---|
2925 | | - BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb)); |
---|
| 3256 | + if (WARN_ON_ONCE(offset + sizeof(__sum16) > skb_headlen(skb))) |
---|
| 3257 | + goto out; |
---|
2926 | 3258 | |
---|
2927 | | - if (skb_cloned(skb) && |
---|
2928 | | - !skb_clone_writable(skb, offset + sizeof(__sum16))) { |
---|
2929 | | - ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); |
---|
2930 | | - if (ret) |
---|
2931 | | - goto out; |
---|
2932 | | - } |
---|
| 3259 | + ret = skb_ensure_writable(skb, offset + sizeof(__sum16)); |
---|
| 3260 | + if (ret) |
---|
| 3261 | + goto out; |
---|
2933 | 3262 | |
---|
2934 | 3263 | *(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0; |
---|
2935 | 3264 | out_set_summed: |
---|
.. | .. |
---|
2964 | 3293 | ret = -EINVAL; |
---|
2965 | 3294 | goto out; |
---|
2966 | 3295 | } |
---|
2967 | | - if (skb_cloned(skb) && |
---|
2968 | | - !skb_clone_writable(skb, offset + sizeof(__le32))) { |
---|
2969 | | - ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); |
---|
2970 | | - if (ret) |
---|
2971 | | - goto out; |
---|
2972 | | - } |
---|
| 3296 | + |
---|
| 3297 | + ret = skb_ensure_writable(skb, offset + sizeof(__le32)); |
---|
| 3298 | + if (ret) |
---|
| 3299 | + goto out; |
---|
| 3300 | + |
---|
2973 | 3301 | crc32c_csum = cpu_to_le32(~__skb_checksum(skb, start, |
---|
2974 | 3302 | skb->len - start, ~(__u32)0, |
---|
2975 | 3303 | crc32c_csum_stub)); |
---|
.. | .. |
---|
3054 | 3382 | * It may return NULL if the skb requires no segmentation. This is |
---|
3055 | 3383 | * only possible when GSO is used for verifying header integrity. |
---|
3056 | 3384 | * |
---|
3057 | | - * Segmentation preserves SKB_SGO_CB_OFFSET bytes of previous skb cb. |
---|
| 3385 | + * Segmentation preserves SKB_GSO_CB_OFFSET bytes of previous skb cb. |
---|
3058 | 3386 | */ |
---|
3059 | 3387 | struct sk_buff *__skb_gso_segment(struct sk_buff *skb, |
---|
3060 | 3388 | netdev_features_t features, bool tx_path) |
---|
.. | .. |
---|
3083 | 3411 | features &= ~NETIF_F_GSO_PARTIAL; |
---|
3084 | 3412 | } |
---|
3085 | 3413 | |
---|
3086 | | - BUILD_BUG_ON(SKB_SGO_CB_OFFSET + |
---|
| 3414 | + BUILD_BUG_ON(SKB_GSO_CB_OFFSET + |
---|
3087 | 3415 | sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb)); |
---|
3088 | 3416 | |
---|
3089 | 3417 | SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb); |
---|
.. | .. |
---|
3094 | 3422 | |
---|
3095 | 3423 | segs = skb_mac_gso_segment(skb, features); |
---|
3096 | 3424 | |
---|
3097 | | - if (unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs))) |
---|
| 3425 | + if (segs != skb && unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs))) |
---|
3098 | 3426 | skb_warn_bad_offload(skb); |
---|
3099 | 3427 | |
---|
3100 | 3428 | return segs; |
---|
.. | .. |
---|
3103 | 3431 | |
---|
3104 | 3432 | /* Take action when hardware reception checksum errors are detected. */ |
---|
3105 | 3433 | #ifdef CONFIG_BUG |
---|
3106 | | -void netdev_rx_csum_fault(struct net_device *dev) |
---|
| 3434 | +void netdev_rx_csum_fault(struct net_device *dev, struct sk_buff *skb) |
---|
3107 | 3435 | { |
---|
3108 | 3436 | if (net_ratelimit()) { |
---|
3109 | 3437 | pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>"); |
---|
| 3438 | + skb_dump(KERN_ERR, skb, true); |
---|
3110 | 3439 | dump_stack(); |
---|
3111 | 3440 | } |
---|
3112 | 3441 | } |
---|
.. | .. |
---|
3156 | 3485 | static netdev_features_t harmonize_features(struct sk_buff *skb, |
---|
3157 | 3486 | netdev_features_t features) |
---|
3158 | 3487 | { |
---|
3159 | | - int tmp; |
---|
3160 | 3488 | __be16 type; |
---|
3161 | 3489 | |
---|
3162 | | - type = skb_network_protocol(skb, &tmp); |
---|
| 3490 | + type = skb_network_protocol(skb, NULL); |
---|
3163 | 3491 | features = net_mpls_features(skb, features, type); |
---|
3164 | 3492 | |
---|
3165 | 3493 | if (skb->ip_summed != CHECKSUM_NONE && |
---|
.. | .. |
---|
3256 | 3584 | unsigned int len; |
---|
3257 | 3585 | int rc; |
---|
3258 | 3586 | |
---|
3259 | | - if (!list_empty(&ptype_all) || !list_empty(&dev->ptype_all)) |
---|
| 3587 | + if (dev_nit_active(dev)) |
---|
3260 | 3588 | dev_queue_xmit_nit(skb, dev); |
---|
3261 | 3589 | |
---|
3262 | 3590 | len = skb->len; |
---|
| 3591 | + PRANDOM_ADD_NOISE(skb, dev, txq, len + jiffies); |
---|
3263 | 3592 | trace_net_dev_start_xmit(skb, dev); |
---|
3264 | 3593 | rc = netdev_start_xmit(skb, dev, txq, more); |
---|
3265 | 3594 | trace_net_dev_xmit(skb, rc, dev, len); |
---|
.. | .. |
---|
3276 | 3605 | while (skb) { |
---|
3277 | 3606 | struct sk_buff *next = skb->next; |
---|
3278 | 3607 | |
---|
3279 | | - skb->next = NULL; |
---|
| 3608 | + skb_mark_not_on_list(skb); |
---|
3280 | 3609 | rc = xmit_one(skb, dev, txq, next != NULL); |
---|
3281 | 3610 | if (unlikely(!dev_xmit_complete(rc))) { |
---|
3282 | 3611 | skb->next = next; |
---|
.. | .. |
---|
3376 | 3705 | |
---|
3377 | 3706 | for (; skb != NULL; skb = next) { |
---|
3378 | 3707 | next = skb->next; |
---|
3379 | | - skb->next = NULL; |
---|
| 3708 | + skb_mark_not_on_list(skb); |
---|
3380 | 3709 | |
---|
3381 | 3710 | /* in case skb wont be segmented, point to itself */ |
---|
3382 | 3711 | skb->prev = skb; |
---|
.. | .. |
---|
3407 | 3736 | /* To get more precise estimation of bytes sent on wire, |
---|
3408 | 3737 | * we add to pkt_len the headers size of all segments |
---|
3409 | 3738 | */ |
---|
3410 | | - if (shinfo->gso_size) { |
---|
| 3739 | + if (shinfo->gso_size && skb_transport_header_was_set(skb)) { |
---|
3411 | 3740 | unsigned int hdr_len; |
---|
3412 | 3741 | u16 gso_segs = shinfo->gso_segs; |
---|
3413 | 3742 | |
---|
.. | .. |
---|
3451 | 3780 | qdisc_calculate_pkt_len(skb, q); |
---|
3452 | 3781 | |
---|
3453 | 3782 | if (q->flags & TCQ_F_NOLOCK) { |
---|
3454 | | - if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { |
---|
3455 | | - __qdisc_drop(skb, &to_free); |
---|
3456 | | - rc = NET_XMIT_DROP; |
---|
3457 | | - } else { |
---|
3458 | | - rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; |
---|
| 3783 | + rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; |
---|
| 3784 | + if (likely(!netif_xmit_frozen_or_stopped(txq))) |
---|
3459 | 3785 | qdisc_run(q); |
---|
3460 | | - } |
---|
3461 | 3786 | |
---|
3462 | 3787 | if (unlikely(to_free)) |
---|
3463 | 3788 | kfree_skb_list(to_free); |
---|
.. | .. |
---|
3470 | 3795 | * This permits qdisc->running owner to get the lock more |
---|
3471 | 3796 | * often and dequeue packets faster. |
---|
3472 | 3797 | */ |
---|
3473 | | -#ifdef CONFIG_PREEMPT_RT_FULL |
---|
3474 | | - contended = true; |
---|
3475 | | -#else |
---|
3476 | 3798 | contended = qdisc_is_running(q); |
---|
3477 | | -#endif |
---|
3478 | 3799 | if (unlikely(contended)) |
---|
3479 | 3800 | spin_lock(&q->busylock); |
---|
3480 | 3801 | |
---|
.. | .. |
---|
3557 | 3878 | skb_reset_mac_header(skb); |
---|
3558 | 3879 | __skb_pull(skb, skb_network_offset(skb)); |
---|
3559 | 3880 | skb->pkt_type = PACKET_LOOPBACK; |
---|
3560 | | - skb->ip_summed = CHECKSUM_UNNECESSARY; |
---|
| 3881 | + if (skb->ip_summed == CHECKSUM_NONE) |
---|
| 3882 | + skb->ip_summed = CHECKSUM_UNNECESSARY; |
---|
3561 | 3883 | WARN_ON(!skb_dst(skb)); |
---|
3562 | 3884 | skb_dst_force(skb); |
---|
3563 | 3885 | netif_rx_ni(skb); |
---|
.. | .. |
---|
3576 | 3898 | return skb; |
---|
3577 | 3899 | |
---|
3578 | 3900 | /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */ |
---|
| 3901 | + qdisc_skb_cb(skb)->mru = 0; |
---|
3579 | 3902 | mini_qdisc_bstats_cpu_update(miniq, skb); |
---|
3580 | 3903 | |
---|
3581 | 3904 | switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) { |
---|
.. | .. |
---|
3676 | 3999 | } |
---|
3677 | 4000 | |
---|
3678 | 4001 | u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb, |
---|
3679 | | - struct net_device *sb_dev, |
---|
3680 | | - select_queue_fallback_t fallback) |
---|
| 4002 | + struct net_device *sb_dev) |
---|
3681 | 4003 | { |
---|
3682 | 4004 | return 0; |
---|
3683 | 4005 | } |
---|
3684 | 4006 | EXPORT_SYMBOL(dev_pick_tx_zero); |
---|
3685 | 4007 | |
---|
3686 | 4008 | u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb, |
---|
3687 | | - struct net_device *sb_dev, |
---|
3688 | | - select_queue_fallback_t fallback) |
---|
| 4009 | + struct net_device *sb_dev) |
---|
3689 | 4010 | { |
---|
3690 | 4011 | return (u16)raw_smp_processor_id() % dev->real_num_tx_queues; |
---|
3691 | 4012 | } |
---|
3692 | 4013 | EXPORT_SYMBOL(dev_pick_tx_cpu_id); |
---|
3693 | 4014 | |
---|
3694 | | -static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, |
---|
3695 | | - struct net_device *sb_dev) |
---|
| 4015 | +u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, |
---|
| 4016 | + struct net_device *sb_dev) |
---|
3696 | 4017 | { |
---|
3697 | 4018 | struct sock *sk = skb->sk; |
---|
3698 | 4019 | int queue_index = sk_tx_queue_get(sk); |
---|
.. | .. |
---|
3716 | 4037 | |
---|
3717 | 4038 | return queue_index; |
---|
3718 | 4039 | } |
---|
| 4040 | +EXPORT_SYMBOL(netdev_pick_tx); |
---|
3719 | 4041 | |
---|
3720 | | -struct netdev_queue *netdev_pick_tx(struct net_device *dev, |
---|
3721 | | - struct sk_buff *skb, |
---|
3722 | | - struct net_device *sb_dev) |
---|
| 4042 | +struct netdev_queue *netdev_core_pick_tx(struct net_device *dev, |
---|
| 4043 | + struct sk_buff *skb, |
---|
| 4044 | + struct net_device *sb_dev) |
---|
3723 | 4045 | { |
---|
3724 | 4046 | int queue_index = 0; |
---|
3725 | 4047 | |
---|
.. | .. |
---|
3734 | 4056 | const struct net_device_ops *ops = dev->netdev_ops; |
---|
3735 | 4057 | |
---|
3736 | 4058 | if (ops->ndo_select_queue) |
---|
3737 | | - queue_index = ops->ndo_select_queue(dev, skb, sb_dev, |
---|
3738 | | - __netdev_pick_tx); |
---|
| 4059 | + queue_index = ops->ndo_select_queue(dev, skb, sb_dev); |
---|
3739 | 4060 | else |
---|
3740 | | - queue_index = __netdev_pick_tx(dev, skb, sb_dev); |
---|
| 4061 | + queue_index = netdev_pick_tx(dev, skb, sb_dev); |
---|
3741 | 4062 | |
---|
3742 | 4063 | queue_index = netdev_cap_txqueue(dev, queue_index); |
---|
3743 | 4064 | } |
---|
.. | .. |
---|
3781 | 4102 | bool again = false; |
---|
3782 | 4103 | |
---|
3783 | 4104 | skb_reset_mac_header(skb); |
---|
| 4105 | + skb_assert_len(skb); |
---|
3784 | 4106 | |
---|
3785 | 4107 | if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP)) |
---|
3786 | 4108 | __skb_tstamp_tx(skb, NULL, skb->sk, SCM_TSTAMP_SCHED); |
---|
.. | .. |
---|
3811 | 4133 | else |
---|
3812 | 4134 | skb_dst_force(skb); |
---|
3813 | 4135 | |
---|
3814 | | - txq = netdev_pick_tx(dev, skb, sb_dev); |
---|
| 4136 | + txq = netdev_core_pick_tx(dev, skb, sb_dev); |
---|
3815 | 4137 | q = rcu_dereference_bh(txq->qdisc); |
---|
3816 | 4138 | |
---|
3817 | 4139 | trace_net_dev_queue(skb); |
---|
.. | .. |
---|
3835 | 4157 | if (dev->flags & IFF_UP) { |
---|
3836 | 4158 | int cpu = smp_processor_id(); /* ok because BHs are off */ |
---|
3837 | 4159 | |
---|
3838 | | -#ifdef CONFIG_PREEMPT_RT_FULL |
---|
3839 | | - if (READ_ONCE(txq->xmit_lock_owner) != current) { |
---|
3840 | | -#else |
---|
3841 | 4160 | /* Other cpus might concurrently change txq->xmit_lock_owner |
---|
3842 | 4161 | * to -1 or to their cpu id, but not to our id. |
---|
3843 | 4162 | */ |
---|
3844 | 4163 | if (READ_ONCE(txq->xmit_lock_owner) != cpu) { |
---|
3845 | | -#endif |
---|
3846 | 4164 | if (dev_xmit_recursion()) |
---|
3847 | 4165 | goto recursion_alert; |
---|
3848 | 4166 | |
---|
.. | .. |
---|
3850 | 4168 | if (!skb) |
---|
3851 | 4169 | goto out; |
---|
3852 | 4170 | |
---|
| 4171 | + PRANDOM_ADD_NOISE(skb, dev, txq, jiffies); |
---|
3853 | 4172 | HARD_TX_LOCK(dev, txq, cpu); |
---|
3854 | 4173 | |
---|
3855 | 4174 | if (!netif_xmit_stopped(txq)) { |
---|
.. | .. |
---|
3897 | 4216 | } |
---|
3898 | 4217 | EXPORT_SYMBOL(dev_queue_xmit_accel); |
---|
3899 | 4218 | |
---|
3900 | | -int dev_direct_xmit(struct sk_buff *skb, u16 queue_id) |
---|
| 4219 | +int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id) |
---|
3901 | 4220 | { |
---|
3902 | 4221 | struct net_device *dev = skb->dev; |
---|
3903 | 4222 | struct sk_buff *orig_skb = skb; |
---|
.. | .. |
---|
3915 | 4234 | |
---|
3916 | 4235 | skb_set_queue_mapping(skb, queue_id); |
---|
3917 | 4236 | txq = skb_get_tx_queue(dev, skb); |
---|
| 4237 | + PRANDOM_ADD_NOISE(skb, dev, txq, jiffies); |
---|
3918 | 4238 | |
---|
3919 | 4239 | local_bh_disable(); |
---|
3920 | 4240 | |
---|
.. | .. |
---|
3926 | 4246 | dev_xmit_recursion_dec(); |
---|
3927 | 4247 | |
---|
3928 | 4248 | local_bh_enable(); |
---|
3929 | | - |
---|
3930 | | - if (!dev_xmit_complete(ret)) |
---|
3931 | | - kfree_skb(skb); |
---|
3932 | | - |
---|
3933 | 4249 | return ret; |
---|
3934 | 4250 | drop: |
---|
3935 | 4251 | atomic_long_inc(&dev->tx_dropped); |
---|
3936 | 4252 | kfree_skb_list(skb); |
---|
3937 | 4253 | return NET_XMIT_DROP; |
---|
3938 | 4254 | } |
---|
3939 | | -EXPORT_SYMBOL(dev_direct_xmit); |
---|
| 4255 | +EXPORT_SYMBOL(__dev_direct_xmit); |
---|
3940 | 4256 | |
---|
3941 | 4257 | /************************************************************************* |
---|
3942 | 4258 | * Receiver routines |
---|
.. | .. |
---|
3954 | 4270 | int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */ |
---|
3955 | 4271 | int dev_rx_weight __read_mostly = 64; |
---|
3956 | 4272 | int dev_tx_weight __read_mostly = 64; |
---|
| 4273 | +/* Maximum number of GRO_NORMAL skbs to batch up for list-RX */ |
---|
| 4274 | +int gro_normal_batch __read_mostly = 8; |
---|
3957 | 4275 | |
---|
3958 | 4276 | /* Called with irq disabled */ |
---|
3959 | 4277 | static inline void ____napi_schedule(struct softnet_data *sd, |
---|
.. | .. |
---|
3971 | 4289 | u32 rps_cpu_mask __read_mostly; |
---|
3972 | 4290 | EXPORT_SYMBOL(rps_cpu_mask); |
---|
3973 | 4291 | |
---|
3974 | | -struct static_key rps_needed __read_mostly; |
---|
| 4292 | +struct static_key_false rps_needed __read_mostly; |
---|
3975 | 4293 | EXPORT_SYMBOL(rps_needed); |
---|
3976 | | -struct static_key rfs_needed __read_mostly; |
---|
| 4294 | +struct static_key_false rfs_needed __read_mostly; |
---|
3977 | 4295 | EXPORT_SYMBOL(rfs_needed); |
---|
3978 | 4296 | |
---|
3979 | 4297 | static struct rps_dev_flow * |
---|
.. | .. |
---|
4204 | 4522 | struct softnet_data *sd; |
---|
4205 | 4523 | unsigned int old_flow, new_flow; |
---|
4206 | 4524 | |
---|
4207 | | - if (qlen < (netdev_max_backlog >> 1)) |
---|
| 4525 | + if (qlen < (READ_ONCE(netdev_max_backlog) >> 1)) |
---|
4208 | 4526 | return false; |
---|
4209 | 4527 | |
---|
4210 | 4528 | sd = this_cpu_ptr(&softnet_data); |
---|
.. | .. |
---|
4252 | 4570 | if (!netif_running(skb->dev)) |
---|
4253 | 4571 | goto drop; |
---|
4254 | 4572 | qlen = skb_queue_len(&sd->input_pkt_queue); |
---|
4255 | | - if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) { |
---|
| 4573 | + if (qlen <= READ_ONCE(netdev_max_backlog) && !skb_flow_limit(skb, qlen)) { |
---|
4256 | 4574 | if (qlen) { |
---|
4257 | 4575 | enqueue: |
---|
4258 | 4576 | __skb_queue_tail(&sd->input_pkt_queue, skb); |
---|
.. | .. |
---|
4277 | 4595 | rps_unlock(sd); |
---|
4278 | 4596 | |
---|
4279 | 4597 | local_irq_restore(flags); |
---|
4280 | | - preempt_check_resched_rt(); |
---|
4281 | 4598 | |
---|
4282 | 4599 | atomic_long_inc(&skb->dev->rx_dropped); |
---|
4283 | 4600 | kfree_skb(skb); |
---|
.. | .. |
---|
4323 | 4640 | /* Reinjected packets coming from act_mirred or similar should |
---|
4324 | 4641 | * not get XDP generic processing. |
---|
4325 | 4642 | */ |
---|
4326 | | - if (skb_is_tc_redirected(skb)) |
---|
| 4643 | + if (skb_is_redirected(skb)) |
---|
4327 | 4644 | return XDP_PASS; |
---|
4328 | 4645 | |
---|
4329 | 4646 | /* XDP packets must be linear and must have sufficient headroom |
---|
.. | .. |
---|
4355 | 4672 | xdp->data_meta = xdp->data; |
---|
4356 | 4673 | xdp->data_end = xdp->data + hlen; |
---|
4357 | 4674 | xdp->data_hard_start = skb->data - skb_headroom(skb); |
---|
| 4675 | + |
---|
| 4676 | + /* SKB "head" area always have tailroom for skb_shared_info */ |
---|
| 4677 | + xdp->frame_sz = (void *)skb_end_pointer(skb) - xdp->data_hard_start; |
---|
| 4678 | + xdp->frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); |
---|
| 4679 | + |
---|
4358 | 4680 | orig_data_end = xdp->data_end; |
---|
4359 | 4681 | orig_data = xdp->data; |
---|
4360 | 4682 | eth = (struct ethhdr *)xdp->data; |
---|
.. | .. |
---|
4378 | 4700 | skb_reset_network_header(skb); |
---|
4379 | 4701 | } |
---|
4380 | 4702 | |
---|
4381 | | - /* check if bpf_xdp_adjust_tail was used. it can only "shrink" |
---|
4382 | | - * pckt. |
---|
4383 | | - */ |
---|
4384 | | - off = orig_data_end - xdp->data_end; |
---|
| 4703 | + /* check if bpf_xdp_adjust_tail was used */ |
---|
| 4704 | + off = xdp->data_end - orig_data_end; |
---|
4385 | 4705 | if (off != 0) { |
---|
4386 | 4706 | skb_set_tail_pointer(skb, xdp->data_end - xdp->data); |
---|
4387 | | - skb->len -= off; |
---|
4388 | | - |
---|
| 4707 | + skb->len += off; /* positive on grow, negative on shrink */ |
---|
4389 | 4708 | } |
---|
4390 | 4709 | |
---|
4391 | 4710 | /* check if XDP changed eth hdr such SKB needs update */ |
---|
.. | .. |
---|
4408 | 4727 | break; |
---|
4409 | 4728 | default: |
---|
4410 | 4729 | bpf_warn_invalid_xdp_action(act); |
---|
4411 | | - /* fall through */ |
---|
| 4730 | + fallthrough; |
---|
4412 | 4731 | case XDP_ABORTED: |
---|
4413 | 4732 | trace_xdp_exception(skb->dev, xdp_prog, act); |
---|
4414 | | - /* fall through */ |
---|
| 4733 | + fallthrough; |
---|
4415 | 4734 | case XDP_DROP: |
---|
4416 | 4735 | do_drop: |
---|
4417 | 4736 | kfree_skb(skb); |
---|
.. | .. |
---|
4431 | 4750 | bool free_skb = true; |
---|
4432 | 4751 | int cpu, rc; |
---|
4433 | 4752 | |
---|
4434 | | - txq = netdev_pick_tx(dev, skb, NULL); |
---|
| 4753 | + txq = netdev_core_pick_tx(dev, skb, NULL); |
---|
4435 | 4754 | cpu = smp_processor_id(); |
---|
4436 | 4755 | HARD_TX_LOCK(dev, txq, cpu); |
---|
4437 | 4756 | if (!netif_xmit_stopped(txq)) { |
---|
.. | .. |
---|
4445 | 4764 | kfree_skb(skb); |
---|
4446 | 4765 | } |
---|
4447 | 4766 | } |
---|
4448 | | -EXPORT_SYMBOL_GPL(generic_xdp_tx); |
---|
4449 | 4767 | |
---|
4450 | 4768 | static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key); |
---|
4451 | 4769 | |
---|
.. | .. |
---|
4483 | 4801 | { |
---|
4484 | 4802 | int ret; |
---|
4485 | 4803 | |
---|
4486 | | - net_timestamp_check(netdev_tstamp_prequeue, skb); |
---|
| 4804 | + net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb); |
---|
4487 | 4805 | |
---|
4488 | 4806 | trace_netif_rx(skb); |
---|
4489 | 4807 | |
---|
4490 | 4808 | #ifdef CONFIG_RPS |
---|
4491 | | - if (static_key_false(&rps_needed)) { |
---|
| 4809 | + if (static_branch_unlikely(&rps_needed)) { |
---|
4492 | 4810 | struct rps_dev_flow voidflow, *rflow = &voidflow; |
---|
4493 | 4811 | int cpu; |
---|
4494 | 4812 | |
---|
4495 | | - migrate_disable(); |
---|
| 4813 | + preempt_disable(); |
---|
4496 | 4814 | rcu_read_lock(); |
---|
4497 | 4815 | |
---|
4498 | 4816 | cpu = get_rps_cpu(skb->dev, skb, &rflow); |
---|
.. | .. |
---|
4502 | 4820 | ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); |
---|
4503 | 4821 | |
---|
4504 | 4822 | rcu_read_unlock(); |
---|
4505 | | - migrate_enable(); |
---|
| 4823 | + preempt_enable(); |
---|
4506 | 4824 | } else |
---|
4507 | 4825 | #endif |
---|
4508 | 4826 | { |
---|
4509 | 4827 | unsigned int qtail; |
---|
4510 | 4828 | |
---|
4511 | | - ret = enqueue_to_backlog(skb, get_cpu_light(), &qtail); |
---|
4512 | | - put_cpu_light(); |
---|
| 4829 | + ret = enqueue_to_backlog(skb, get_cpu(), &qtail); |
---|
| 4830 | + put_cpu(); |
---|
4513 | 4831 | } |
---|
4514 | 4832 | return ret; |
---|
4515 | 4833 | } |
---|
.. | .. |
---|
4531 | 4849 | |
---|
4532 | 4850 | int netif_rx(struct sk_buff *skb) |
---|
4533 | 4851 | { |
---|
| 4852 | + int ret; |
---|
| 4853 | + |
---|
4534 | 4854 | trace_netif_rx_entry(skb); |
---|
4535 | 4855 | |
---|
4536 | | - return netif_rx_internal(skb); |
---|
| 4856 | + ret = netif_rx_internal(skb); |
---|
| 4857 | + trace_netif_rx_exit(ret); |
---|
| 4858 | + |
---|
| 4859 | + return ret; |
---|
4537 | 4860 | } |
---|
4538 | 4861 | EXPORT_SYMBOL(netif_rx); |
---|
4539 | 4862 | |
---|
.. | .. |
---|
4543 | 4866 | |
---|
4544 | 4867 | trace_netif_rx_ni_entry(skb); |
---|
4545 | 4868 | |
---|
4546 | | - local_bh_disable(); |
---|
| 4869 | + preempt_disable(); |
---|
4547 | 4870 | err = netif_rx_internal(skb); |
---|
4548 | | - local_bh_enable(); |
---|
| 4871 | + if (local_softirq_pending()) |
---|
| 4872 | + do_softirq(); |
---|
| 4873 | + preempt_enable(); |
---|
| 4874 | + trace_netif_rx_ni_exit(err); |
---|
4549 | 4875 | |
---|
4550 | 4876 | return err; |
---|
4551 | 4877 | } |
---|
4552 | 4878 | EXPORT_SYMBOL(netif_rx_ni); |
---|
| 4879 | + |
---|
| 4880 | +int netif_rx_any_context(struct sk_buff *skb) |
---|
| 4881 | +{ |
---|
| 4882 | + /* |
---|
| 4883 | + * If invoked from contexts which do not invoke bottom half |
---|
| 4884 | + * processing either at return from interrupt or when softrqs are |
---|
| 4885 | + * reenabled, use netif_rx_ni() which invokes bottomhalf processing |
---|
| 4886 | + * directly. |
---|
| 4887 | + */ |
---|
| 4888 | + if (in_interrupt()) |
---|
| 4889 | + return netif_rx(skb); |
---|
| 4890 | + else |
---|
| 4891 | + return netif_rx_ni(skb); |
---|
| 4892 | +} |
---|
| 4893 | +EXPORT_SYMBOL(netif_rx_any_context); |
---|
4553 | 4894 | |
---|
4554 | 4895 | static __latent_entropy void net_tx_action(struct softirq_action *h) |
---|
4555 | 4896 | { |
---|
.. | .. |
---|
4592 | 4933 | sd->output_queue_tailp = &sd->output_queue; |
---|
4593 | 4934 | local_irq_enable(); |
---|
4594 | 4935 | |
---|
| 4936 | + rcu_read_lock(); |
---|
| 4937 | + |
---|
4595 | 4938 | while (head) { |
---|
4596 | 4939 | struct Qdisc *q = head; |
---|
4597 | 4940 | spinlock_t *root_lock = NULL; |
---|
4598 | 4941 | |
---|
4599 | 4942 | head = head->next_sched; |
---|
4600 | 4943 | |
---|
4601 | | - if (!(q->flags & TCQ_F_NOLOCK)) { |
---|
4602 | | - root_lock = qdisc_lock(q); |
---|
4603 | | - spin_lock(root_lock); |
---|
4604 | | - } |
---|
4605 | 4944 | /* We need to make sure head->next_sched is read |
---|
4606 | 4945 | * before clearing __QDISC_STATE_SCHED |
---|
4607 | 4946 | */ |
---|
4608 | 4947 | smp_mb__before_atomic(); |
---|
| 4948 | + |
---|
| 4949 | + if (!(q->flags & TCQ_F_NOLOCK)) { |
---|
| 4950 | + root_lock = qdisc_lock(q); |
---|
| 4951 | + spin_lock(root_lock); |
---|
| 4952 | + } else if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, |
---|
| 4953 | + &q->state))) { |
---|
| 4954 | + /* There is a synchronize_net() between |
---|
| 4955 | + * STATE_DEACTIVATED flag being set and |
---|
| 4956 | + * qdisc_reset()/some_qdisc_is_busy() in |
---|
| 4957 | + * dev_deactivate(), so we can safely bail out |
---|
| 4958 | + * early here to avoid data race between |
---|
| 4959 | + * qdisc_deactivate() and some_qdisc_is_busy() |
---|
| 4960 | + * for lockless qdisc. |
---|
| 4961 | + */ |
---|
| 4962 | + clear_bit(__QDISC_STATE_SCHED, &q->state); |
---|
| 4963 | + continue; |
---|
| 4964 | + } |
---|
| 4965 | + |
---|
4609 | 4966 | clear_bit(__QDISC_STATE_SCHED, &q->state); |
---|
4610 | 4967 | qdisc_run(q); |
---|
4611 | 4968 | if (root_lock) |
---|
4612 | 4969 | spin_unlock(root_lock); |
---|
4613 | 4970 | } |
---|
| 4971 | + |
---|
| 4972 | + rcu_read_unlock(); |
---|
4614 | 4973 | } |
---|
4615 | 4974 | |
---|
4616 | 4975 | xfrm_dev_backlog(sd); |
---|
.. | .. |
---|
4625 | 4984 | |
---|
4626 | 4985 | static inline struct sk_buff * |
---|
4627 | 4986 | sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, |
---|
4628 | | - struct net_device *orig_dev) |
---|
| 4987 | + struct net_device *orig_dev, bool *another) |
---|
4629 | 4988 | { |
---|
4630 | 4989 | #ifdef CONFIG_NET_CLS_ACT |
---|
4631 | 4990 | struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress); |
---|
.. | .. |
---|
4645 | 5004 | } |
---|
4646 | 5005 | |
---|
4647 | 5006 | qdisc_skb_cb(skb)->pkt_len = skb->len; |
---|
| 5007 | + qdisc_skb_cb(skb)->mru = 0; |
---|
4648 | 5008 | skb->tc_at_ingress = 1; |
---|
4649 | 5009 | mini_qdisc_bstats_cpu_update(miniq, skb); |
---|
4650 | 5010 | |
---|
4651 | | - switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) { |
---|
| 5011 | + switch (tcf_classify_ingress(skb, miniq->block, miniq->filter_list, |
---|
| 5012 | + &cl_res, false)) { |
---|
4652 | 5013 | case TC_ACT_OK: |
---|
4653 | 5014 | case TC_ACT_RECLASSIFY: |
---|
4654 | 5015 | skb->tc_index = TC_H_MIN(cl_res.classid); |
---|
.. | .. |
---|
4668 | 5029 | * redirecting to another netdev |
---|
4669 | 5030 | */ |
---|
4670 | 5031 | __skb_push(skb, skb->mac_len); |
---|
4671 | | - skb_do_redirect(skb); |
---|
| 5032 | + if (skb_do_redirect(skb) == -EAGAIN) { |
---|
| 5033 | + __skb_pull(skb, skb->mac_len); |
---|
| 5034 | + *another = true; |
---|
| 5035 | + break; |
---|
| 5036 | + } |
---|
4672 | 5037 | return NULL; |
---|
4673 | | - case TC_ACT_REINSERT: |
---|
4674 | | - /* this does not scrub the packet, and updates stats on error */ |
---|
4675 | | - skb_tc_reinsert(skb, &cl_res); |
---|
| 5038 | + case TC_ACT_CONSUMED: |
---|
4676 | 5039 | return NULL; |
---|
4677 | 5040 | default: |
---|
4678 | 5041 | break; |
---|
.. | .. |
---|
4772 | 5135 | static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev, |
---|
4773 | 5136 | int *ret, struct net_device *orig_dev) |
---|
4774 | 5137 | { |
---|
4775 | | -#ifdef CONFIG_NETFILTER_INGRESS |
---|
4776 | 5138 | if (nf_hook_ingress_active(skb)) { |
---|
4777 | 5139 | int ingress_retval; |
---|
4778 | 5140 | |
---|
.. | .. |
---|
4786 | 5148 | rcu_read_unlock(); |
---|
4787 | 5149 | return ingress_retval; |
---|
4788 | 5150 | } |
---|
4789 | | -#endif /* CONFIG_NETFILTER_INGRESS */ |
---|
4790 | 5151 | return 0; |
---|
4791 | 5152 | } |
---|
4792 | 5153 | |
---|
.. | .. |
---|
4801 | 5162 | int ret = NET_RX_DROP; |
---|
4802 | 5163 | __be16 type; |
---|
4803 | 5164 | |
---|
4804 | | - net_timestamp_check(!netdev_tstamp_prequeue, skb); |
---|
| 5165 | + net_timestamp_check(!READ_ONCE(netdev_tstamp_prequeue), skb); |
---|
4805 | 5166 | |
---|
4806 | 5167 | trace_netif_receive_skb(skb); |
---|
4807 | 5168 | |
---|
.. | .. |
---|
4861 | 5222 | skip_taps: |
---|
4862 | 5223 | #ifdef CONFIG_NET_INGRESS |
---|
4863 | 5224 | if (static_branch_unlikely(&ingress_needed_key)) { |
---|
4864 | | - skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev); |
---|
| 5225 | + bool another = false; |
---|
| 5226 | + |
---|
| 5227 | + skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev, |
---|
| 5228 | + &another); |
---|
| 5229 | + if (another) |
---|
| 5230 | + goto another_round; |
---|
4865 | 5231 | if (!skb) |
---|
4866 | 5232 | goto out; |
---|
4867 | 5233 | |
---|
.. | .. |
---|
4869 | 5235 | goto out; |
---|
4870 | 5236 | } |
---|
4871 | 5237 | #endif |
---|
4872 | | - skb_reset_tc(skb); |
---|
| 5238 | + skb_reset_redirect(skb); |
---|
4873 | 5239 | skip_classify: |
---|
4874 | 5240 | if (pfmemalloc && !skb_pfmemalloc_protocol(skb)) |
---|
4875 | 5241 | goto drop; |
---|
.. | .. |
---|
4906 | 5272 | } |
---|
4907 | 5273 | } |
---|
4908 | 5274 | |
---|
4909 | | - if (unlikely(skb_vlan_tag_present(skb))) { |
---|
4910 | | - if (skb_vlan_tag_get_id(skb)) |
---|
| 5275 | + if (unlikely(skb_vlan_tag_present(skb)) && !netdev_uses_dsa(skb->dev)) { |
---|
| 5276 | +check_vlan_id: |
---|
| 5277 | + if (skb_vlan_tag_get_id(skb)) { |
---|
| 5278 | + /* Vlan id is non 0 and vlan_do_receive() above couldn't |
---|
| 5279 | + * find vlan device. |
---|
| 5280 | + */ |
---|
4911 | 5281 | skb->pkt_type = PACKET_OTHERHOST; |
---|
| 5282 | + } else if (skb->protocol == cpu_to_be16(ETH_P_8021Q) || |
---|
| 5283 | + skb->protocol == cpu_to_be16(ETH_P_8021AD)) { |
---|
| 5284 | + /* Outer header is 802.1P with vlan 0, inner header is |
---|
| 5285 | + * 802.1Q or 802.1AD and vlan_do_receive() above could |
---|
| 5286 | + * not find vlan dev for vlan id 0. |
---|
| 5287 | + */ |
---|
| 5288 | + __vlan_hwaccel_clear_tag(skb); |
---|
| 5289 | + skb = skb_vlan_untag(skb); |
---|
| 5290 | + if (unlikely(!skb)) |
---|
| 5291 | + goto out; |
---|
| 5292 | + if (vlan_do_receive(&skb)) |
---|
| 5293 | + /* After stripping off 802.1P header with vlan 0 |
---|
| 5294 | + * vlan dev is found for inner header. |
---|
| 5295 | + */ |
---|
| 5296 | + goto another_round; |
---|
| 5297 | + else if (unlikely(!skb)) |
---|
| 5298 | + goto out; |
---|
| 5299 | + else |
---|
| 5300 | + /* We have stripped outer 802.1P vlan 0 header. |
---|
| 5301 | + * But could not find vlan dev. |
---|
| 5302 | + * check again for vlan id to set OTHERHOST. |
---|
| 5303 | + */ |
---|
| 5304 | + goto check_vlan_id; |
---|
| 5305 | + } |
---|
4912 | 5306 | /* Note: we might in the future use prio bits |
---|
4913 | 5307 | * and set skb->priority like in vlan_do_receive() |
---|
4914 | 5308 | * For the time being, just ignore Priority Code Point |
---|
4915 | 5309 | */ |
---|
4916 | | - skb->vlan_tci = 0; |
---|
| 5310 | + __vlan_hwaccel_clear_tag(skb); |
---|
4917 | 5311 | } |
---|
4918 | 5312 | |
---|
4919 | 5313 | type = skb->protocol; |
---|
.. | .. |
---|
4969 | 5363 | |
---|
4970 | 5364 | ret = __netif_receive_skb_core(&skb, pfmemalloc, &pt_prev); |
---|
4971 | 5365 | if (pt_prev) |
---|
4972 | | - ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); |
---|
| 5366 | + ret = INDIRECT_CALL_INET(pt_prev->func, ipv6_rcv, ip_rcv, skb, |
---|
| 5367 | + skb->dev, pt_prev, orig_dev); |
---|
4973 | 5368 | return ret; |
---|
4974 | 5369 | } |
---|
4975 | 5370 | |
---|
.. | .. |
---|
4979 | 5374 | * |
---|
4980 | 5375 | * More direct receive version of netif_receive_skb(). It should |
---|
4981 | 5376 | * only be used by callers that have a need to skip RPS and Generic XDP. |
---|
4982 | | - * Caller must also take care of handling if (page_is_)pfmemalloc. |
---|
| 5377 | + * Caller must also take care of handling if ``(page_is_)pfmemalloc``. |
---|
4983 | 5378 | * |
---|
4984 | 5379 | * This function may only be called from softirq context and interrupts |
---|
4985 | 5380 | * should be enabled. |
---|
.. | .. |
---|
5011 | 5406 | if (list_empty(head)) |
---|
5012 | 5407 | return; |
---|
5013 | 5408 | if (pt_prev->list_func != NULL) |
---|
5014 | | - pt_prev->list_func(head, pt_prev, orig_dev); |
---|
| 5409 | + INDIRECT_CALL_INET(pt_prev->list_func, ipv6_list_rcv, |
---|
| 5410 | + ip_list_rcv, head, pt_prev, orig_dev); |
---|
5015 | 5411 | else |
---|
5016 | 5412 | list_for_each_entry_safe(skb, next, head, list) { |
---|
5017 | 5413 | skb_list_del_init(skb); |
---|
.. | .. |
---|
5122 | 5518 | struct bpf_prog *new = xdp->prog; |
---|
5123 | 5519 | int ret = 0; |
---|
5124 | 5520 | |
---|
| 5521 | + if (new) { |
---|
| 5522 | + u32 i; |
---|
| 5523 | + |
---|
| 5524 | + mutex_lock(&new->aux->used_maps_mutex); |
---|
| 5525 | + |
---|
| 5526 | + /* generic XDP does not work with DEVMAPs that can |
---|
| 5527 | + * have a bpf_prog installed on an entry |
---|
| 5528 | + */ |
---|
| 5529 | + for (i = 0; i < new->aux->used_map_cnt; i++) { |
---|
| 5530 | + if (dev_map_can_have_prog(new->aux->used_maps[i]) || |
---|
| 5531 | + cpu_map_prog_allowed(new->aux->used_maps[i])) { |
---|
| 5532 | + mutex_unlock(&new->aux->used_maps_mutex); |
---|
| 5533 | + return -EINVAL; |
---|
| 5534 | + } |
---|
| 5535 | + } |
---|
| 5536 | + |
---|
| 5537 | + mutex_unlock(&new->aux->used_maps_mutex); |
---|
| 5538 | + } |
---|
| 5539 | + |
---|
5125 | 5540 | switch (xdp->command) { |
---|
5126 | 5541 | case XDP_SETUP_PROG: |
---|
5127 | 5542 | rcu_assign_pointer(dev->xdp_prog, new); |
---|
.. | .. |
---|
5137 | 5552 | } |
---|
5138 | 5553 | break; |
---|
5139 | 5554 | |
---|
5140 | | - case XDP_QUERY_PROG: |
---|
5141 | | - xdp->prog_id = old ? old->aux->id : 0; |
---|
5142 | | - break; |
---|
5143 | | - |
---|
5144 | 5555 | default: |
---|
5145 | 5556 | ret = -EINVAL; |
---|
5146 | 5557 | break; |
---|
.. | .. |
---|
5153 | 5564 | { |
---|
5154 | 5565 | int ret; |
---|
5155 | 5566 | |
---|
5156 | | - net_timestamp_check(netdev_tstamp_prequeue, skb); |
---|
| 5567 | + net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb); |
---|
5157 | 5568 | |
---|
5158 | 5569 | if (skb_defer_rx_timestamp(skb)) |
---|
5159 | 5570 | return NET_RX_SUCCESS; |
---|
5160 | 5571 | |
---|
5161 | 5572 | rcu_read_lock(); |
---|
5162 | 5573 | #ifdef CONFIG_RPS |
---|
5163 | | - if (static_key_false(&rps_needed)) { |
---|
| 5574 | + if (static_branch_unlikely(&rps_needed)) { |
---|
5164 | 5575 | struct rps_dev_flow voidflow, *rflow = &voidflow; |
---|
5165 | 5576 | int cpu = get_rps_cpu(skb->dev, skb, &rflow); |
---|
5166 | 5577 | |
---|
.. | .. |
---|
5183 | 5594 | |
---|
5184 | 5595 | INIT_LIST_HEAD(&sublist); |
---|
5185 | 5596 | list_for_each_entry_safe(skb, next, head, list) { |
---|
5186 | | - net_timestamp_check(netdev_tstamp_prequeue, skb); |
---|
| 5597 | + net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb); |
---|
5187 | 5598 | skb_list_del_init(skb); |
---|
5188 | 5599 | if (!skb_defer_rx_timestamp(skb)) |
---|
5189 | 5600 | list_add_tail(&skb->list, &sublist); |
---|
.. | .. |
---|
5192 | 5603 | |
---|
5193 | 5604 | rcu_read_lock(); |
---|
5194 | 5605 | #ifdef CONFIG_RPS |
---|
5195 | | - if (static_key_false(&rps_needed)) { |
---|
| 5606 | + if (static_branch_unlikely(&rps_needed)) { |
---|
5196 | 5607 | list_for_each_entry_safe(skb, next, head, list) { |
---|
5197 | 5608 | struct rps_dev_flow voidflow, *rflow = &voidflow; |
---|
5198 | 5609 | int cpu = get_rps_cpu(skb->dev, skb, &rflow); |
---|
.. | .. |
---|
5226 | 5637 | */ |
---|
5227 | 5638 | int netif_receive_skb(struct sk_buff *skb) |
---|
5228 | 5639 | { |
---|
| 5640 | + int ret; |
---|
| 5641 | + |
---|
5229 | 5642 | trace_netif_receive_skb_entry(skb); |
---|
5230 | 5643 | |
---|
5231 | | - return netif_receive_skb_internal(skb); |
---|
| 5644 | + ret = netif_receive_skb_internal(skb); |
---|
| 5645 | + trace_netif_receive_skb_exit(ret); |
---|
| 5646 | + |
---|
| 5647 | + return ret; |
---|
5232 | 5648 | } |
---|
5233 | 5649 | EXPORT_SYMBOL(netif_receive_skb); |
---|
5234 | 5650 | |
---|
.. | .. |
---|
5248 | 5664 | |
---|
5249 | 5665 | if (list_empty(head)) |
---|
5250 | 5666 | return; |
---|
5251 | | - list_for_each_entry(skb, head, list) |
---|
5252 | | - trace_netif_receive_skb_list_entry(skb); |
---|
| 5667 | + if (trace_netif_receive_skb_list_entry_enabled()) { |
---|
| 5668 | + list_for_each_entry(skb, head, list) |
---|
| 5669 | + trace_netif_receive_skb_list_entry(skb); |
---|
| 5670 | + } |
---|
5253 | 5671 | netif_receive_skb_list_internal(head); |
---|
| 5672 | + trace_netif_receive_skb_list_exit(0); |
---|
5254 | 5673 | } |
---|
5255 | 5674 | EXPORT_SYMBOL(netif_receive_skb_list); |
---|
5256 | 5675 | |
---|
5257 | | -DEFINE_PER_CPU(struct work_struct, flush_works); |
---|
| 5676 | +static DEFINE_PER_CPU(struct work_struct, flush_works); |
---|
5258 | 5677 | |
---|
5259 | 5678 | /* Network device is going away, flush any packets still pending */ |
---|
5260 | 5679 | static void flush_backlog(struct work_struct *work) |
---|
.. | .. |
---|
5287 | 5706 | local_bh_enable(); |
---|
5288 | 5707 | } |
---|
5289 | 5708 | |
---|
| 5709 | +static bool flush_required(int cpu) |
---|
| 5710 | +{ |
---|
| 5711 | +#if IS_ENABLED(CONFIG_RPS) |
---|
| 5712 | + struct softnet_data *sd = &per_cpu(softnet_data, cpu); |
---|
| 5713 | + bool do_flush; |
---|
| 5714 | + |
---|
| 5715 | + local_irq_disable(); |
---|
| 5716 | + rps_lock(sd); |
---|
| 5717 | + |
---|
| 5718 | + /* as insertion into process_queue happens with the rps lock held, |
---|
| 5719 | + * process_queue access may race only with dequeue |
---|
| 5720 | + */ |
---|
| 5721 | + do_flush = !skb_queue_empty(&sd->input_pkt_queue) || |
---|
| 5722 | + !skb_queue_empty_lockless(&sd->process_queue); |
---|
| 5723 | + rps_unlock(sd); |
---|
| 5724 | + local_irq_enable(); |
---|
| 5725 | + |
---|
| 5726 | + return do_flush; |
---|
| 5727 | +#endif |
---|
| 5728 | + /* without RPS we can't safely check input_pkt_queue: during a |
---|
| 5729 | + * concurrent remote skb_queue_splice() we can detect as empty both |
---|
| 5730 | + * input_pkt_queue and process_queue even if the latter could end-up |
---|
| 5731 | + * containing a lot of packets. |
---|
| 5732 | + */ |
---|
| 5733 | + return true; |
---|
| 5734 | +} |
---|
| 5735 | + |
---|
5290 | 5736 | static void flush_all_backlogs(void) |
---|
5291 | 5737 | { |
---|
| 5738 | + static cpumask_t flush_cpus; |
---|
5292 | 5739 | unsigned int cpu; |
---|
| 5740 | + |
---|
| 5741 | + /* since we are under rtnl lock protection we can use static data |
---|
| 5742 | + * for the cpumask and avoid allocating on stack the possibly |
---|
| 5743 | + * large mask |
---|
| 5744 | + */ |
---|
| 5745 | + ASSERT_RTNL(); |
---|
5293 | 5746 | |
---|
5294 | 5747 | get_online_cpus(); |
---|
5295 | 5748 | |
---|
5296 | | - for_each_online_cpu(cpu) |
---|
5297 | | - queue_work_on(cpu, system_highpri_wq, |
---|
5298 | | - per_cpu_ptr(&flush_works, cpu)); |
---|
| 5749 | + cpumask_clear(&flush_cpus); |
---|
| 5750 | + for_each_online_cpu(cpu) { |
---|
| 5751 | + if (flush_required(cpu)) { |
---|
| 5752 | + queue_work_on(cpu, system_highpri_wq, |
---|
| 5753 | + per_cpu_ptr(&flush_works, cpu)); |
---|
| 5754 | + cpumask_set_cpu(cpu, &flush_cpus); |
---|
| 5755 | + } |
---|
| 5756 | + } |
---|
5299 | 5757 | |
---|
5300 | | - for_each_online_cpu(cpu) |
---|
| 5758 | + /* we can have in flight packet[s] on the cpus we are not flushing, |
---|
| 5759 | + * synchronize_net() in unregister_netdevice_many() will take care of |
---|
| 5760 | + * them |
---|
| 5761 | + */ |
---|
| 5762 | + for_each_cpu(cpu, &flush_cpus) |
---|
5301 | 5763 | flush_work(per_cpu_ptr(&flush_works, cpu)); |
---|
5302 | 5764 | |
---|
5303 | 5765 | put_online_cpus(); |
---|
5304 | 5766 | } |
---|
5305 | 5767 | |
---|
5306 | | -static int napi_gro_complete(struct sk_buff *skb) |
---|
| 5768 | +/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ |
---|
| 5769 | +static void gro_normal_list(struct napi_struct *napi) |
---|
| 5770 | +{ |
---|
| 5771 | + if (!napi->rx_count) |
---|
| 5772 | + return; |
---|
| 5773 | + netif_receive_skb_list_internal(&napi->rx_list); |
---|
| 5774 | + INIT_LIST_HEAD(&napi->rx_list); |
---|
| 5775 | + napi->rx_count = 0; |
---|
| 5776 | +} |
---|
| 5777 | + |
---|
| 5778 | +/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, |
---|
| 5779 | + * pass the whole batch up to the stack. |
---|
| 5780 | + */ |
---|
| 5781 | +static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, int segs) |
---|
| 5782 | +{ |
---|
| 5783 | + list_add_tail(&skb->list, &napi->rx_list); |
---|
| 5784 | + napi->rx_count += segs; |
---|
| 5785 | + if (napi->rx_count >= gro_normal_batch) |
---|
| 5786 | + gro_normal_list(napi); |
---|
| 5787 | +} |
---|
| 5788 | + |
---|
| 5789 | +INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int)); |
---|
| 5790 | +INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int)); |
---|
| 5791 | +static int napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb) |
---|
5307 | 5792 | { |
---|
5308 | 5793 | struct packet_offload *ptype; |
---|
5309 | 5794 | __be16 type = skb->protocol; |
---|
.. | .. |
---|
5322 | 5807 | if (ptype->type != type || !ptype->callbacks.gro_complete) |
---|
5323 | 5808 | continue; |
---|
5324 | 5809 | |
---|
5325 | | - err = ptype->callbacks.gro_complete(skb, 0); |
---|
| 5810 | + err = INDIRECT_CALL_INET(ptype->callbacks.gro_complete, |
---|
| 5811 | + ipv6_gro_complete, inet_gro_complete, |
---|
| 5812 | + skb, 0); |
---|
5326 | 5813 | break; |
---|
5327 | 5814 | } |
---|
5328 | 5815 | rcu_read_unlock(); |
---|
.. | .. |
---|
5334 | 5821 | } |
---|
5335 | 5822 | |
---|
5336 | 5823 | out: |
---|
5337 | | - return netif_receive_skb_internal(skb); |
---|
| 5824 | + gro_normal_one(napi, skb, NAPI_GRO_CB(skb)->count); |
---|
| 5825 | + return NET_RX_SUCCESS; |
---|
5338 | 5826 | } |
---|
5339 | 5827 | |
---|
5340 | 5828 | static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index, |
---|
.. | .. |
---|
5346 | 5834 | list_for_each_entry_safe_reverse(skb, p, head, list) { |
---|
5347 | 5835 | if (flush_old && NAPI_GRO_CB(skb)->age == jiffies) |
---|
5348 | 5836 | return; |
---|
5349 | | - list_del(&skb->list); |
---|
5350 | | - skb->next = NULL; |
---|
5351 | | - napi_gro_complete(skb); |
---|
| 5837 | + skb_list_del_init(skb); |
---|
| 5838 | + napi_gro_complete(napi, skb); |
---|
5352 | 5839 | napi->gro_hash[index].count--; |
---|
5353 | 5840 | } |
---|
5354 | 5841 | |
---|
.. | .. |
---|
5362 | 5849 | */ |
---|
5363 | 5850 | void napi_gro_flush(struct napi_struct *napi, bool flush_old) |
---|
5364 | 5851 | { |
---|
5365 | | - u32 i; |
---|
| 5852 | + unsigned long bitmask = napi->gro_bitmask; |
---|
| 5853 | + unsigned int i, base = ~0U; |
---|
5366 | 5854 | |
---|
5367 | | - for (i = 0; i < GRO_HASH_BUCKETS; i++) { |
---|
5368 | | - if (test_bit(i, &napi->gro_bitmask)) |
---|
5369 | | - __napi_gro_flush_chain(napi, i, flush_old); |
---|
| 5855 | + while ((i = ffs(bitmask)) != 0) { |
---|
| 5856 | + bitmask >>= i; |
---|
| 5857 | + base += i; |
---|
| 5858 | + __napi_gro_flush_chain(napi, base, flush_old); |
---|
5370 | 5859 | } |
---|
5371 | 5860 | } |
---|
5372 | 5861 | EXPORT_SYMBOL(napi_gro_flush); |
---|
.. | .. |
---|
5391 | 5880 | } |
---|
5392 | 5881 | |
---|
5393 | 5882 | diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; |
---|
5394 | | - diffs |= p->vlan_tci ^ skb->vlan_tci; |
---|
| 5883 | + diffs |= skb_vlan_tag_present(p) ^ skb_vlan_tag_present(skb); |
---|
| 5884 | + if (skb_vlan_tag_present(p)) |
---|
| 5885 | + diffs |= skb_vlan_tag_get(p) ^ skb_vlan_tag_get(skb); |
---|
5395 | 5886 | diffs |= skb_metadata_dst_cmp(p, skb); |
---|
5396 | 5887 | diffs |= skb_metadata_differs(p, skb); |
---|
5397 | 5888 | if (maclen == ETH_HLEN) |
---|
.. | .. |
---|
5401 | 5892 | diffs = memcmp(skb_mac_header(p), |
---|
5402 | 5893 | skb_mac_header(skb), |
---|
5403 | 5894 | maclen); |
---|
| 5895 | + |
---|
| 5896 | + diffs |= skb_get_nfct(p) ^ skb_get_nfct(skb); |
---|
| 5897 | +#if IS_ENABLED(CONFIG_SKB_EXTENSIONS) && IS_ENABLED(CONFIG_NET_TC_SKB_EXT) |
---|
| 5898 | + if (!diffs) { |
---|
| 5899 | + struct tc_skb_ext *skb_ext = skb_ext_find(skb, TC_SKB_EXT); |
---|
| 5900 | + struct tc_skb_ext *p_ext = skb_ext_find(p, TC_SKB_EXT); |
---|
| 5901 | + |
---|
| 5902 | + diffs |= (!!p_ext) ^ (!!skb_ext); |
---|
| 5903 | + if (!diffs && unlikely(skb_ext)) |
---|
| 5904 | + diffs |= p_ext->chain ^ skb_ext->chain; |
---|
| 5905 | + } |
---|
| 5906 | +#endif |
---|
| 5907 | + |
---|
5404 | 5908 | NAPI_GRO_CB(p)->same_flow = !diffs; |
---|
5405 | 5909 | } |
---|
5406 | 5910 | |
---|
5407 | 5911 | return head; |
---|
5408 | 5912 | } |
---|
5409 | 5913 | |
---|
5410 | | -static void skb_gro_reset_offset(struct sk_buff *skb) |
---|
| 5914 | +static inline void skb_gro_reset_offset(struct sk_buff *skb, u32 nhoff) |
---|
5411 | 5915 | { |
---|
5412 | 5916 | const struct skb_shared_info *pinfo = skb_shinfo(skb); |
---|
5413 | 5917 | const skb_frag_t *frag0 = &pinfo->frags[0]; |
---|
.. | .. |
---|
5416 | 5920 | NAPI_GRO_CB(skb)->frag0 = NULL; |
---|
5417 | 5921 | NAPI_GRO_CB(skb)->frag0_len = 0; |
---|
5418 | 5922 | |
---|
5419 | | - if (skb_mac_header(skb) == skb_tail_pointer(skb) && |
---|
5420 | | - pinfo->nr_frags && |
---|
| 5923 | + if (!skb_headlen(skb) && pinfo->nr_frags && |
---|
5421 | 5924 | !PageHighMem(skb_frag_page(frag0)) && |
---|
5422 | | - (!NET_IP_ALIGN || !(skb_frag_off(frag0) & 3))) { |
---|
| 5925 | + (!NET_IP_ALIGN || !((skb_frag_off(frag0) + nhoff) & 3))) { |
---|
5423 | 5926 | NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); |
---|
5424 | 5927 | NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int, |
---|
5425 | 5928 | skb_frag_size(frag0), |
---|
.. | .. |
---|
5438 | 5941 | skb->data_len -= grow; |
---|
5439 | 5942 | skb->tail += grow; |
---|
5440 | 5943 | |
---|
5441 | | - pinfo->frags[0].page_offset += grow; |
---|
| 5944 | + skb_frag_off_add(&pinfo->frags[0], grow); |
---|
5442 | 5945 | skb_frag_size_sub(&pinfo->frags[0], grow); |
---|
5443 | 5946 | |
---|
5444 | 5947 | if (unlikely(!skb_frag_size(&pinfo->frags[0]))) { |
---|
.. | .. |
---|
5448 | 5951 | } |
---|
5449 | 5952 | } |
---|
5450 | 5953 | |
---|
5451 | | -static void gro_flush_oldest(struct list_head *head) |
---|
| 5954 | +static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head) |
---|
5452 | 5955 | { |
---|
5453 | 5956 | struct sk_buff *oldest; |
---|
5454 | 5957 | |
---|
.. | .. |
---|
5463 | 5966 | /* Do not adjust napi->gro_hash[].count, caller is adding a new |
---|
5464 | 5967 | * SKB to the chain. |
---|
5465 | 5968 | */ |
---|
5466 | | - list_del(&oldest->list); |
---|
5467 | | - oldest->next = NULL; |
---|
5468 | | - napi_gro_complete(oldest); |
---|
| 5969 | + skb_list_del_init(oldest); |
---|
| 5970 | + napi_gro_complete(napi, oldest); |
---|
5469 | 5971 | } |
---|
5470 | 5972 | |
---|
| 5973 | +INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *, |
---|
| 5974 | + struct sk_buff *)); |
---|
| 5975 | +INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *, |
---|
| 5976 | + struct sk_buff *)); |
---|
5471 | 5977 | static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) |
---|
5472 | 5978 | { |
---|
5473 | 5979 | u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1); |
---|
.. | .. |
---|
5517 | 6023 | NAPI_GRO_CB(skb)->csum_valid = 0; |
---|
5518 | 6024 | } |
---|
5519 | 6025 | |
---|
5520 | | - pp = ptype->callbacks.gro_receive(gro_head, skb); |
---|
| 6026 | + pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive, |
---|
| 6027 | + ipv6_gro_receive, inet_gro_receive, |
---|
| 6028 | + gro_head, skb); |
---|
5521 | 6029 | break; |
---|
5522 | 6030 | } |
---|
5523 | 6031 | rcu_read_unlock(); |
---|
.. | .. |
---|
5525 | 6033 | if (&ptype->list == head) |
---|
5526 | 6034 | goto normal; |
---|
5527 | 6035 | |
---|
5528 | | - if (IS_ERR(pp) && PTR_ERR(pp) == -EINPROGRESS) { |
---|
| 6036 | + if (PTR_ERR(pp) == -EINPROGRESS) { |
---|
5529 | 6037 | ret = GRO_CONSUMED; |
---|
5530 | 6038 | goto ok; |
---|
5531 | 6039 | } |
---|
.. | .. |
---|
5534 | 6042 | ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED; |
---|
5535 | 6043 | |
---|
5536 | 6044 | if (pp) { |
---|
5537 | | - list_del(&pp->list); |
---|
5538 | | - pp->next = NULL; |
---|
5539 | | - napi_gro_complete(pp); |
---|
| 6045 | + skb_list_del_init(pp); |
---|
| 6046 | + napi_gro_complete(napi, pp); |
---|
5540 | 6047 | napi->gro_hash[hash].count--; |
---|
5541 | 6048 | } |
---|
5542 | 6049 | |
---|
.. | .. |
---|
5547 | 6054 | goto normal; |
---|
5548 | 6055 | |
---|
5549 | 6056 | if (unlikely(napi->gro_hash[hash].count >= MAX_GRO_SKBS)) { |
---|
5550 | | - gro_flush_oldest(gro_head); |
---|
| 6057 | + gro_flush_oldest(napi, gro_head); |
---|
5551 | 6058 | } else { |
---|
5552 | 6059 | napi->gro_hash[hash].count++; |
---|
5553 | 6060 | } |
---|
.. | .. |
---|
5608 | 6115 | static void napi_skb_free_stolen_head(struct sk_buff *skb) |
---|
5609 | 6116 | { |
---|
5610 | 6117 | skb_dst_drop(skb); |
---|
5611 | | - secpath_reset(skb); |
---|
| 6118 | + skb_ext_put(skb); |
---|
5612 | 6119 | kmem_cache_free(skbuff_head_cache, skb); |
---|
5613 | 6120 | } |
---|
5614 | 6121 | |
---|
5615 | | -static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) |
---|
| 6122 | +static gro_result_t napi_skb_finish(struct napi_struct *napi, |
---|
| 6123 | + struct sk_buff *skb, |
---|
| 6124 | + gro_result_t ret) |
---|
5616 | 6125 | { |
---|
5617 | 6126 | switch (ret) { |
---|
5618 | 6127 | case GRO_NORMAL: |
---|
5619 | | - if (netif_receive_skb_internal(skb)) |
---|
5620 | | - ret = GRO_DROP; |
---|
| 6128 | + gro_normal_one(napi, skb, 1); |
---|
5621 | 6129 | break; |
---|
5622 | 6130 | |
---|
5623 | 6131 | case GRO_DROP: |
---|
.. | .. |
---|
5642 | 6150 | |
---|
5643 | 6151 | gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) |
---|
5644 | 6152 | { |
---|
| 6153 | + gro_result_t ret; |
---|
| 6154 | + |
---|
5645 | 6155 | skb_mark_napi_id(skb, napi); |
---|
5646 | 6156 | trace_napi_gro_receive_entry(skb); |
---|
5647 | 6157 | |
---|
5648 | | - skb_gro_reset_offset(skb); |
---|
| 6158 | + skb_gro_reset_offset(skb, 0); |
---|
5649 | 6159 | |
---|
5650 | | - return napi_skb_finish(dev_gro_receive(napi, skb), skb); |
---|
| 6160 | + ret = napi_skb_finish(napi, skb, dev_gro_receive(napi, skb)); |
---|
| 6161 | + trace_napi_gro_receive_exit(ret); |
---|
| 6162 | + |
---|
| 6163 | + return ret; |
---|
5651 | 6164 | } |
---|
5652 | 6165 | EXPORT_SYMBOL(napi_gro_receive); |
---|
5653 | 6166 | |
---|
.. | .. |
---|
5660 | 6173 | __skb_pull(skb, skb_headlen(skb)); |
---|
5661 | 6174 | /* restore the reserve we had after netdev_alloc_skb_ip_align() */ |
---|
5662 | 6175 | skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb)); |
---|
5663 | | - skb->vlan_tci = 0; |
---|
| 6176 | + __vlan_hwaccel_clear_tag(skb); |
---|
5664 | 6177 | skb->dev = napi->dev; |
---|
5665 | 6178 | skb->skb_iif = 0; |
---|
5666 | 6179 | |
---|
.. | .. |
---|
5670 | 6183 | skb->encapsulation = 0; |
---|
5671 | 6184 | skb_shinfo(skb)->gso_type = 0; |
---|
5672 | 6185 | skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); |
---|
5673 | | - secpath_reset(skb); |
---|
| 6186 | + skb_ext_reset(skb); |
---|
| 6187 | + nf_reset_ct(skb); |
---|
5674 | 6188 | |
---|
5675 | 6189 | napi->skb = skb; |
---|
5676 | 6190 | } |
---|
.. | .. |
---|
5699 | 6213 | case GRO_HELD: |
---|
5700 | 6214 | __skb_push(skb, ETH_HLEN); |
---|
5701 | 6215 | skb->protocol = eth_type_trans(skb, skb->dev); |
---|
5702 | | - if (ret == GRO_NORMAL && netif_receive_skb_internal(skb)) |
---|
5703 | | - ret = GRO_DROP; |
---|
| 6216 | + if (ret == GRO_NORMAL) |
---|
| 6217 | + gro_normal_one(napi, skb, 1); |
---|
5704 | 6218 | break; |
---|
5705 | 6219 | |
---|
5706 | 6220 | case GRO_DROP: |
---|
.. | .. |
---|
5735 | 6249 | napi->skb = NULL; |
---|
5736 | 6250 | |
---|
5737 | 6251 | skb_reset_mac_header(skb); |
---|
5738 | | - skb_gro_reset_offset(skb); |
---|
| 6252 | + skb_gro_reset_offset(skb, hlen); |
---|
5739 | 6253 | |
---|
5740 | 6254 | if (unlikely(skb_gro_header_hard(skb, hlen))) { |
---|
5741 | 6255 | eth = skb_gro_header_slow(skb, hlen, 0); |
---|
.. | .. |
---|
5765 | 6279 | |
---|
5766 | 6280 | gro_result_t napi_gro_frags(struct napi_struct *napi) |
---|
5767 | 6281 | { |
---|
| 6282 | + gro_result_t ret; |
---|
5768 | 6283 | struct sk_buff *skb = napi_frags_skb(napi); |
---|
5769 | 6284 | |
---|
5770 | 6285 | if (!skb) |
---|
.. | .. |
---|
5772 | 6287 | |
---|
5773 | 6288 | trace_napi_gro_frags_entry(skb); |
---|
5774 | 6289 | |
---|
5775 | | - return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb)); |
---|
| 6290 | + ret = napi_frags_finish(napi, skb, dev_gro_receive(napi, skb)); |
---|
| 6291 | + trace_napi_gro_frags_exit(ret); |
---|
| 6292 | + |
---|
| 6293 | + return ret; |
---|
5776 | 6294 | } |
---|
5777 | 6295 | EXPORT_SYMBOL(napi_gro_frags); |
---|
5778 | 6296 | |
---|
.. | .. |
---|
5788 | 6306 | |
---|
5789 | 6307 | /* NAPI_GRO_CB(skb)->csum holds pseudo checksum */ |
---|
5790 | 6308 | sum = csum_fold(csum_add(NAPI_GRO_CB(skb)->csum, wsum)); |
---|
| 6309 | + /* See comments in __skb_checksum_complete(). */ |
---|
5791 | 6310 | if (likely(!sum)) { |
---|
5792 | 6311 | if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && |
---|
5793 | 6312 | !skb->csum_complete_sw) |
---|
5794 | | - netdev_rx_csum_fault(skb->dev); |
---|
| 6313 | + netdev_rx_csum_fault(skb->dev, skb); |
---|
5795 | 6314 | } |
---|
5796 | 6315 | |
---|
5797 | 6316 | NAPI_GRO_CB(skb)->csum = wsum; |
---|
.. | .. |
---|
5827 | 6346 | sd->rps_ipi_list = NULL; |
---|
5828 | 6347 | |
---|
5829 | 6348 | local_irq_enable(); |
---|
5830 | | - preempt_check_resched_rt(); |
---|
5831 | 6349 | |
---|
5832 | 6350 | /* Send pending IPI's to kick RPS processing on remote cpus. */ |
---|
5833 | 6351 | net_rps_send_ipi(remsd); |
---|
5834 | 6352 | } else |
---|
5835 | 6353 | #endif |
---|
5836 | 6354 | local_irq_enable(); |
---|
5837 | | - preempt_check_resched_rt(); |
---|
5838 | 6355 | } |
---|
5839 | 6356 | |
---|
5840 | 6357 | static bool sd_has_rps_ipi_waiting(struct softnet_data *sd) |
---|
.. | .. |
---|
5860 | 6377 | net_rps_action_and_irq_enable(sd); |
---|
5861 | 6378 | } |
---|
5862 | 6379 | |
---|
5863 | | - napi->weight = dev_rx_weight; |
---|
| 6380 | + napi->weight = READ_ONCE(dev_rx_weight); |
---|
5864 | 6381 | while (again) { |
---|
5865 | 6382 | struct sk_buff *skb; |
---|
5866 | 6383 | |
---|
5867 | | - local_irq_disable(); |
---|
5868 | 6384 | while ((skb = __skb_dequeue(&sd->process_queue))) { |
---|
5869 | | - local_irq_enable(); |
---|
5870 | 6385 | rcu_read_lock(); |
---|
5871 | 6386 | __netif_receive_skb(skb); |
---|
5872 | 6387 | rcu_read_unlock(); |
---|
5873 | 6388 | input_queue_head_incr(sd); |
---|
5874 | 6389 | if (++work >= quota) |
---|
5875 | | - goto state_changed; |
---|
5876 | | - local_irq_disable(); |
---|
| 6390 | + return work; |
---|
| 6391 | + |
---|
5877 | 6392 | } |
---|
5878 | 6393 | |
---|
| 6394 | + local_irq_disable(); |
---|
5879 | 6395 | rps_lock(sd); |
---|
5880 | 6396 | if (skb_queue_empty(&sd->input_pkt_queue)) { |
---|
5881 | 6397 | /* |
---|
.. | .. |
---|
5896 | 6412 | local_irq_enable(); |
---|
5897 | 6413 | } |
---|
5898 | 6414 | |
---|
5899 | | -state_changed: |
---|
5900 | | - napi_gro_flush(napi, false); |
---|
5901 | | - sd->current_napi = NULL; |
---|
5902 | | - |
---|
5903 | 6415 | return work; |
---|
5904 | 6416 | } |
---|
5905 | 6417 | |
---|
.. | .. |
---|
5917 | 6429 | local_irq_save(flags); |
---|
5918 | 6430 | ____napi_schedule(this_cpu_ptr(&softnet_data), n); |
---|
5919 | 6431 | local_irq_restore(flags); |
---|
5920 | | - preempt_check_resched_rt(); |
---|
5921 | 6432 | } |
---|
5922 | 6433 | EXPORT_SYMBOL(__napi_schedule); |
---|
5923 | 6434 | |
---|
.. | .. |
---|
5926 | 6437 | * @n: napi context |
---|
5927 | 6438 | * |
---|
5928 | 6439 | * Test if NAPI routine is already running, and if not mark |
---|
5929 | | - * it as running. This is used as a condition variable |
---|
| 6440 | + * it as running. This is used as a condition variable to |
---|
5930 | 6441 | * insure only one NAPI poll instance runs. We also make |
---|
5931 | 6442 | * sure there is no pending NAPI disable. |
---|
5932 | 6443 | */ |
---|
.. | .. |
---|
5954 | 6465 | } |
---|
5955 | 6466 | EXPORT_SYMBOL(napi_schedule_prep); |
---|
5956 | 6467 | |
---|
5957 | | -#ifndef CONFIG_PREEMPT_RT_FULL |
---|
5958 | 6468 | /** |
---|
5959 | 6469 | * __napi_schedule_irqoff - schedule for receive |
---|
5960 | 6470 | * @n: entry to schedule |
---|
.. | .. |
---|
5973 | 6483 | __napi_schedule(n); |
---|
5974 | 6484 | } |
---|
5975 | 6485 | EXPORT_SYMBOL(__napi_schedule_irqoff); |
---|
5976 | | -#endif |
---|
5977 | 6486 | |
---|
5978 | 6487 | bool napi_complete_done(struct napi_struct *n, int work_done) |
---|
5979 | 6488 | { |
---|
5980 | | - unsigned long flags, val, new; |
---|
| 6489 | + unsigned long flags, val, new, timeout = 0; |
---|
| 6490 | + bool ret = true; |
---|
5981 | 6491 | |
---|
5982 | 6492 | /* |
---|
5983 | 6493 | * 1) Don't let napi dequeue from the cpu poll list |
---|
.. | .. |
---|
5989 | 6499 | NAPIF_STATE_IN_BUSY_POLL))) |
---|
5990 | 6500 | return false; |
---|
5991 | 6501 | |
---|
| 6502 | + if (work_done) { |
---|
| 6503 | + if (n->gro_bitmask) |
---|
| 6504 | + timeout = READ_ONCE(n->dev->gro_flush_timeout); |
---|
| 6505 | + n->defer_hard_irqs_count = READ_ONCE(n->dev->napi_defer_hard_irqs); |
---|
| 6506 | + } |
---|
| 6507 | + if (n->defer_hard_irqs_count > 0) { |
---|
| 6508 | + n->defer_hard_irqs_count--; |
---|
| 6509 | + timeout = READ_ONCE(n->dev->gro_flush_timeout); |
---|
| 6510 | + if (timeout) |
---|
| 6511 | + ret = false; |
---|
| 6512 | + } |
---|
5992 | 6513 | if (n->gro_bitmask) { |
---|
5993 | | - unsigned long timeout = 0; |
---|
5994 | | - |
---|
5995 | | - if (work_done) |
---|
5996 | | - timeout = n->dev->gro_flush_timeout; |
---|
5997 | | - |
---|
5998 | 6514 | /* When the NAPI instance uses a timeout and keeps postponing |
---|
5999 | 6515 | * it, we need to bound somehow the time packets are kept in |
---|
6000 | 6516 | * the GRO layer |
---|
6001 | 6517 | */ |
---|
6002 | 6518 | napi_gro_flush(n, !!timeout); |
---|
6003 | | - if (timeout) |
---|
6004 | | - hrtimer_start(&n->timer, ns_to_ktime(timeout), |
---|
6005 | | - HRTIMER_MODE_REL_PINNED); |
---|
6006 | 6519 | } |
---|
6007 | | - if (unlikely(!list_empty(&n->poll_list))) { |
---|
6008 | | - struct softnet_data *sd = this_cpu_ptr(&softnet_data); |
---|
6009 | 6520 | |
---|
| 6521 | + gro_normal_list(n); |
---|
| 6522 | + |
---|
| 6523 | + if (unlikely(!list_empty(&n->poll_list))) { |
---|
6010 | 6524 | /* If n->poll_list is not empty, we need to mask irqs */ |
---|
6011 | 6525 | local_irq_save(flags); |
---|
6012 | 6526 | list_del_init(&n->poll_list); |
---|
6013 | | - sd->current_napi = NULL; |
---|
6014 | 6527 | local_irq_restore(flags); |
---|
6015 | 6528 | } |
---|
6016 | 6529 | |
---|
.. | .. |
---|
6034 | 6547 | return false; |
---|
6035 | 6548 | } |
---|
6036 | 6549 | |
---|
6037 | | - return true; |
---|
| 6550 | + if (timeout) |
---|
| 6551 | + hrtimer_start(&n->timer, ns_to_ktime(timeout), |
---|
| 6552 | + HRTIMER_MODE_REL_PINNED); |
---|
| 6553 | + return ret; |
---|
6038 | 6554 | } |
---|
6039 | 6555 | EXPORT_SYMBOL(napi_complete_done); |
---|
6040 | 6556 | |
---|
.. | .. |
---|
6077 | 6593 | * Ideally, a new ndo_busy_poll_stop() could avoid another round. |
---|
6078 | 6594 | */ |
---|
6079 | 6595 | rc = napi->poll(napi, BUSY_POLL_BUDGET); |
---|
| 6596 | + /* We can't gro_normal_list() here, because napi->poll() might have |
---|
| 6597 | + * rearmed the napi (napi_complete_done()) in which case it could |
---|
| 6598 | + * already be running on another CPU. |
---|
| 6599 | + */ |
---|
6080 | 6600 | trace_napi_poll(napi, rc, BUSY_POLL_BUDGET); |
---|
6081 | 6601 | netpoll_poll_unlock(have_poll_lock); |
---|
6082 | | - if (rc == BUSY_POLL_BUDGET) |
---|
| 6602 | + if (rc == BUSY_POLL_BUDGET) { |
---|
| 6603 | + /* As the whole budget was spent, we still own the napi so can |
---|
| 6604 | + * safely handle the rx_list. |
---|
| 6605 | + */ |
---|
| 6606 | + gro_normal_list(napi); |
---|
6083 | 6607 | __napi_schedule(napi); |
---|
| 6608 | + } |
---|
6084 | 6609 | local_bh_enable(); |
---|
6085 | 6610 | } |
---|
6086 | 6611 | |
---|
.. | .. |
---|
6125 | 6650 | } |
---|
6126 | 6651 | work = napi_poll(napi, BUSY_POLL_BUDGET); |
---|
6127 | 6652 | trace_napi_poll(napi, work, BUSY_POLL_BUDGET); |
---|
| 6653 | + gro_normal_list(napi); |
---|
6128 | 6654 | count: |
---|
6129 | 6655 | if (work > 0) |
---|
6130 | 6656 | __NET_ADD_STATS(dev_net(napi->dev), |
---|
.. | .. |
---|
6158 | 6684 | |
---|
6159 | 6685 | static void napi_hash_add(struct napi_struct *napi) |
---|
6160 | 6686 | { |
---|
6161 | | - if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state) || |
---|
6162 | | - test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) |
---|
| 6687 | + if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state)) |
---|
6163 | 6688 | return; |
---|
6164 | 6689 | |
---|
6165 | 6690 | spin_lock(&napi_hash_lock); |
---|
.. | .. |
---|
6180 | 6705 | /* Warning : caller is responsible to make sure rcu grace period |
---|
6181 | 6706 | * is respected before freeing memory containing @napi |
---|
6182 | 6707 | */ |
---|
6183 | | -bool napi_hash_del(struct napi_struct *napi) |
---|
| 6708 | +static void napi_hash_del(struct napi_struct *napi) |
---|
6184 | 6709 | { |
---|
6185 | | - bool rcu_sync_needed = false; |
---|
6186 | | - |
---|
6187 | 6710 | spin_lock(&napi_hash_lock); |
---|
6188 | 6711 | |
---|
6189 | | - if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state)) { |
---|
6190 | | - rcu_sync_needed = true; |
---|
6191 | | - hlist_del_rcu(&napi->napi_hash_node); |
---|
6192 | | - } |
---|
| 6712 | + hlist_del_init_rcu(&napi->napi_hash_node); |
---|
| 6713 | + |
---|
6193 | 6714 | spin_unlock(&napi_hash_lock); |
---|
6194 | | - return rcu_sync_needed; |
---|
6195 | 6715 | } |
---|
6196 | | -EXPORT_SYMBOL_GPL(napi_hash_del); |
---|
6197 | 6716 | |
---|
6198 | 6717 | static enum hrtimer_restart napi_watchdog(struct hrtimer *timer) |
---|
6199 | 6718 | { |
---|
.. | .. |
---|
6204 | 6723 | /* Note : we use a relaxed variant of napi_schedule_prep() not setting |
---|
6205 | 6724 | * NAPI_STATE_MISSED, since we do not react to a device IRQ. |
---|
6206 | 6725 | */ |
---|
6207 | | - if (napi->gro_bitmask && !napi_disable_pending(napi) && |
---|
| 6726 | + if (!napi_disable_pending(napi) && |
---|
6208 | 6727 | !test_and_set_bit(NAPI_STATE_SCHED, &napi->state)) |
---|
6209 | 6728 | __napi_schedule_irqoff(napi); |
---|
6210 | 6729 | |
---|
.. | .. |
---|
6225 | 6744 | void netif_napi_add(struct net_device *dev, struct napi_struct *napi, |
---|
6226 | 6745 | int (*poll)(struct napi_struct *, int), int weight) |
---|
6227 | 6746 | { |
---|
| 6747 | + if (WARN_ON(test_and_set_bit(NAPI_STATE_LISTED, &napi->state))) |
---|
| 6748 | + return; |
---|
| 6749 | + |
---|
6228 | 6750 | INIT_LIST_HEAD(&napi->poll_list); |
---|
| 6751 | + INIT_HLIST_NODE(&napi->napi_hash_node); |
---|
6229 | 6752 | hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); |
---|
6230 | 6753 | napi->timer.function = napi_watchdog; |
---|
6231 | 6754 | init_gro_hash(napi); |
---|
6232 | 6755 | napi->skb = NULL; |
---|
| 6756 | + INIT_LIST_HEAD(&napi->rx_list); |
---|
| 6757 | + napi->rx_count = 0; |
---|
6233 | 6758 | napi->poll = poll; |
---|
6234 | 6759 | if (weight > NAPI_POLL_WEIGHT) |
---|
6235 | | - pr_err_once("netif_napi_add() called with weight %d on device %s\n", |
---|
6236 | | - weight, dev->name); |
---|
| 6760 | + netdev_err_once(dev, "%s() called with weight %d\n", __func__, |
---|
| 6761 | + weight); |
---|
6237 | 6762 | napi->weight = weight; |
---|
6238 | 6763 | napi->dev = dev; |
---|
6239 | 6764 | #ifdef CONFIG_NETPOLL |
---|
.. | .. |
---|
6276 | 6801 | } |
---|
6277 | 6802 | |
---|
6278 | 6803 | /* Must be called in process context */ |
---|
6279 | | -void netif_napi_del(struct napi_struct *napi) |
---|
| 6804 | +void __netif_napi_del(struct napi_struct *napi) |
---|
6280 | 6805 | { |
---|
6281 | | - might_sleep(); |
---|
6282 | | - if (napi_hash_del(napi)) |
---|
6283 | | - synchronize_net(); |
---|
6284 | | - list_del_init(&napi->dev_list); |
---|
| 6806 | + if (!test_and_clear_bit(NAPI_STATE_LISTED, &napi->state)) |
---|
| 6807 | + return; |
---|
| 6808 | + |
---|
| 6809 | + napi_hash_del(napi); |
---|
| 6810 | + list_del_rcu(&napi->dev_list); |
---|
6285 | 6811 | napi_free_frags(napi); |
---|
6286 | 6812 | |
---|
6287 | 6813 | flush_gro_hash(napi); |
---|
6288 | 6814 | napi->gro_bitmask = 0; |
---|
6289 | 6815 | } |
---|
6290 | | -EXPORT_SYMBOL(netif_napi_del); |
---|
6291 | | - |
---|
6292 | | -struct napi_struct *get_current_napi_context(void) |
---|
6293 | | -{ |
---|
6294 | | - struct softnet_data *sd = this_cpu_ptr(&softnet_data); |
---|
6295 | | - |
---|
6296 | | - return sd->current_napi; |
---|
6297 | | -} |
---|
6298 | | -EXPORT_SYMBOL(get_current_napi_context); |
---|
| 6816 | +EXPORT_SYMBOL(__netif_napi_del); |
---|
6299 | 6817 | |
---|
6300 | 6818 | static int napi_poll(struct napi_struct *n, struct list_head *repoll) |
---|
6301 | 6819 | { |
---|
.. | .. |
---|
6316 | 6834 | */ |
---|
6317 | 6835 | work = 0; |
---|
6318 | 6836 | if (test_bit(NAPI_STATE_SCHED, &n->state)) { |
---|
6319 | | - struct softnet_data *sd = this_cpu_ptr(&softnet_data); |
---|
6320 | | - |
---|
6321 | | - sd->current_napi = n; |
---|
6322 | 6837 | work = n->poll(n, weight); |
---|
6323 | 6838 | trace_napi_poll(n, work, weight); |
---|
6324 | 6839 | } |
---|
6325 | 6840 | |
---|
6326 | | - WARN_ON_ONCE(work > weight); |
---|
| 6841 | + if (unlikely(work > weight)) |
---|
| 6842 | + pr_err_once("NAPI poll function %pS returned %d, exceeding its budget of %d.\n", |
---|
| 6843 | + n->poll, work, weight); |
---|
6327 | 6844 | |
---|
6328 | 6845 | if (likely(work < weight)) |
---|
6329 | 6846 | goto out_unlock; |
---|
.. | .. |
---|
6344 | 6861 | */ |
---|
6345 | 6862 | napi_gro_flush(n, HZ >= 1000); |
---|
6346 | 6863 | } |
---|
| 6864 | + |
---|
| 6865 | + gro_normal_list(n); |
---|
6347 | 6866 | |
---|
6348 | 6867 | /* Some drivers may have called napi_schedule |
---|
6349 | 6868 | * prior to exhausting their budget. |
---|
.. | .. |
---|
6366 | 6885 | { |
---|
6367 | 6886 | struct softnet_data *sd = this_cpu_ptr(&softnet_data); |
---|
6368 | 6887 | unsigned long time_limit = jiffies + |
---|
6369 | | - usecs_to_jiffies(netdev_budget_usecs); |
---|
6370 | | - int budget = netdev_budget; |
---|
6371 | | - struct sk_buff_head tofree_q; |
---|
6372 | | - struct sk_buff *skb; |
---|
| 6888 | + usecs_to_jiffies(READ_ONCE(netdev_budget_usecs)); |
---|
| 6889 | + int budget = READ_ONCE(netdev_budget); |
---|
6373 | 6890 | LIST_HEAD(list); |
---|
6374 | 6891 | LIST_HEAD(repoll); |
---|
6375 | 6892 | |
---|
6376 | | - __skb_queue_head_init(&tofree_q); |
---|
6377 | | - |
---|
6378 | 6893 | local_irq_disable(); |
---|
6379 | | - skb_queue_splice_init(&sd->tofree_queue, &tofree_q); |
---|
6380 | 6894 | list_splice_init(&sd->poll_list, &list); |
---|
6381 | 6895 | local_irq_enable(); |
---|
6382 | | - |
---|
6383 | | - while ((skb = __skb_dequeue(&tofree_q))) |
---|
6384 | | - kfree_skb(skb); |
---|
6385 | 6896 | |
---|
6386 | 6897 | for (;;) { |
---|
6387 | 6898 | struct napi_struct *n; |
---|
.. | .. |
---|
6412 | 6923 | list_splice_tail(&repoll, &list); |
---|
6413 | 6924 | list_splice(&list, &sd->poll_list); |
---|
6414 | 6925 | if (!list_empty(&sd->poll_list)) |
---|
6415 | | - __raise_softirq_irqoff_ksoft(NET_RX_SOFTIRQ); |
---|
| 6926 | + __raise_softirq_irqoff(NET_RX_SOFTIRQ); |
---|
6416 | 6927 | |
---|
6417 | 6928 | net_rps_action_and_irq_enable(sd); |
---|
6418 | 6929 | out: |
---|
.. | .. |
---|
6424 | 6935 | |
---|
6425 | 6936 | /* upper master flag, there can only be one master device per list */ |
---|
6426 | 6937 | bool master; |
---|
| 6938 | + |
---|
| 6939 | + /* lookup ignore flag */ |
---|
| 6940 | + bool ignore; |
---|
6427 | 6941 | |
---|
6428 | 6942 | /* counter for the number of times this device was added to us */ |
---|
6429 | 6943 | u16 ref_nr; |
---|
.. | .. |
---|
6447 | 6961 | return NULL; |
---|
6448 | 6962 | } |
---|
6449 | 6963 | |
---|
6450 | | -static int __netdev_has_upper_dev(struct net_device *upper_dev, void *data) |
---|
| 6964 | +static int ____netdev_has_upper_dev(struct net_device *upper_dev, |
---|
| 6965 | + struct netdev_nested_priv *priv) |
---|
6451 | 6966 | { |
---|
6452 | | - struct net_device *dev = data; |
---|
| 6967 | + struct net_device *dev = (struct net_device *)priv->data; |
---|
6453 | 6968 | |
---|
6454 | 6969 | return upper_dev == dev; |
---|
6455 | 6970 | } |
---|
.. | .. |
---|
6466 | 6981 | bool netdev_has_upper_dev(struct net_device *dev, |
---|
6467 | 6982 | struct net_device *upper_dev) |
---|
6468 | 6983 | { |
---|
| 6984 | + struct netdev_nested_priv priv = { |
---|
| 6985 | + .data = (void *)upper_dev, |
---|
| 6986 | + }; |
---|
| 6987 | + |
---|
6469 | 6988 | ASSERT_RTNL(); |
---|
6470 | 6989 | |
---|
6471 | | - return netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev, |
---|
6472 | | - upper_dev); |
---|
| 6990 | + return netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev, |
---|
| 6991 | + &priv); |
---|
6473 | 6992 | } |
---|
6474 | 6993 | EXPORT_SYMBOL(netdev_has_upper_dev); |
---|
6475 | 6994 | |
---|
.. | .. |
---|
6486 | 7005 | bool netdev_has_upper_dev_all_rcu(struct net_device *dev, |
---|
6487 | 7006 | struct net_device *upper_dev) |
---|
6488 | 7007 | { |
---|
6489 | | - return !!netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev, |
---|
6490 | | - upper_dev); |
---|
| 7008 | + struct netdev_nested_priv priv = { |
---|
| 7009 | + .data = (void *)upper_dev, |
---|
| 7010 | + }; |
---|
| 7011 | + |
---|
| 7012 | + return !!netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev, |
---|
| 7013 | + &priv); |
---|
6491 | 7014 | } |
---|
6492 | 7015 | EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu); |
---|
6493 | 7016 | |
---|
.. | .. |
---|
6529 | 7052 | return NULL; |
---|
6530 | 7053 | } |
---|
6531 | 7054 | EXPORT_SYMBOL(netdev_master_upper_dev_get); |
---|
| 7055 | + |
---|
| 7056 | +static struct net_device *__netdev_master_upper_dev_get(struct net_device *dev) |
---|
| 7057 | +{ |
---|
| 7058 | + struct netdev_adjacent *upper; |
---|
| 7059 | + |
---|
| 7060 | + ASSERT_RTNL(); |
---|
| 7061 | + |
---|
| 7062 | + if (list_empty(&dev->adj_list.upper)) |
---|
| 7063 | + return NULL; |
---|
| 7064 | + |
---|
| 7065 | + upper = list_first_entry(&dev->adj_list.upper, |
---|
| 7066 | + struct netdev_adjacent, list); |
---|
| 7067 | + if (likely(upper->master) && !upper->ignore) |
---|
| 7068 | + return upper->dev; |
---|
| 7069 | + return NULL; |
---|
| 7070 | +} |
---|
6532 | 7071 | |
---|
6533 | 7072 | /** |
---|
6534 | 7073 | * netdev_has_any_lower_dev - Check if device is linked to some device |
---|
.. | .. |
---|
6580 | 7119 | } |
---|
6581 | 7120 | EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu); |
---|
6582 | 7121 | |
---|
6583 | | -static struct net_device *netdev_next_upper_dev(struct net_device *dev, |
---|
6584 | | - struct list_head **iter) |
---|
| 7122 | +static struct net_device *__netdev_next_upper_dev(struct net_device *dev, |
---|
| 7123 | + struct list_head **iter, |
---|
| 7124 | + bool *ignore) |
---|
6585 | 7125 | { |
---|
6586 | 7126 | struct netdev_adjacent *upper; |
---|
6587 | 7127 | |
---|
.. | .. |
---|
6591 | 7131 | return NULL; |
---|
6592 | 7132 | |
---|
6593 | 7133 | *iter = &upper->list; |
---|
| 7134 | + *ignore = upper->ignore; |
---|
6594 | 7135 | |
---|
6595 | 7136 | return upper->dev; |
---|
6596 | 7137 | } |
---|
.. | .. |
---|
6612 | 7153 | return upper->dev; |
---|
6613 | 7154 | } |
---|
6614 | 7155 | |
---|
6615 | | -static int netdev_walk_all_upper_dev(struct net_device *dev, |
---|
6616 | | - int (*fn)(struct net_device *dev, |
---|
6617 | | - void *data), |
---|
6618 | | - void *data) |
---|
| 7156 | +static int __netdev_walk_all_upper_dev(struct net_device *dev, |
---|
| 7157 | + int (*fn)(struct net_device *dev, |
---|
| 7158 | + struct netdev_nested_priv *priv), |
---|
| 7159 | + struct netdev_nested_priv *priv) |
---|
6619 | 7160 | { |
---|
6620 | 7161 | struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; |
---|
6621 | 7162 | struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; |
---|
6622 | 7163 | int ret, cur = 0; |
---|
| 7164 | + bool ignore; |
---|
6623 | 7165 | |
---|
6624 | 7166 | now = dev; |
---|
6625 | 7167 | iter = &dev->adj_list.upper; |
---|
6626 | 7168 | |
---|
6627 | 7169 | while (1) { |
---|
6628 | 7170 | if (now != dev) { |
---|
6629 | | - ret = fn(now, data); |
---|
| 7171 | + ret = fn(now, priv); |
---|
6630 | 7172 | if (ret) |
---|
6631 | 7173 | return ret; |
---|
6632 | 7174 | } |
---|
6633 | 7175 | |
---|
6634 | 7176 | next = NULL; |
---|
6635 | 7177 | while (1) { |
---|
6636 | | - udev = netdev_next_upper_dev(now, &iter); |
---|
| 7178 | + udev = __netdev_next_upper_dev(now, &iter, &ignore); |
---|
6637 | 7179 | if (!udev) |
---|
6638 | 7180 | break; |
---|
| 7181 | + if (ignore) |
---|
| 7182 | + continue; |
---|
6639 | 7183 | |
---|
6640 | 7184 | next = udev; |
---|
6641 | 7185 | niter = &udev->adj_list.upper; |
---|
.. | .. |
---|
6660 | 7204 | |
---|
6661 | 7205 | int netdev_walk_all_upper_dev_rcu(struct net_device *dev, |
---|
6662 | 7206 | int (*fn)(struct net_device *dev, |
---|
6663 | | - void *data), |
---|
6664 | | - void *data) |
---|
| 7207 | + struct netdev_nested_priv *priv), |
---|
| 7208 | + struct netdev_nested_priv *priv) |
---|
6665 | 7209 | { |
---|
6666 | 7210 | struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; |
---|
6667 | 7211 | struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; |
---|
.. | .. |
---|
6672 | 7216 | |
---|
6673 | 7217 | while (1) { |
---|
6674 | 7218 | if (now != dev) { |
---|
6675 | | - ret = fn(now, data); |
---|
| 7219 | + ret = fn(now, priv); |
---|
6676 | 7220 | if (ret) |
---|
6677 | 7221 | return ret; |
---|
6678 | 7222 | } |
---|
.. | .. |
---|
6704 | 7248 | return 0; |
---|
6705 | 7249 | } |
---|
6706 | 7250 | EXPORT_SYMBOL_GPL(netdev_walk_all_upper_dev_rcu); |
---|
| 7251 | + |
---|
| 7252 | +static bool __netdev_has_upper_dev(struct net_device *dev, |
---|
| 7253 | + struct net_device *upper_dev) |
---|
| 7254 | +{ |
---|
| 7255 | + struct netdev_nested_priv priv = { |
---|
| 7256 | + .flags = 0, |
---|
| 7257 | + .data = (void *)upper_dev, |
---|
| 7258 | + }; |
---|
| 7259 | + |
---|
| 7260 | + ASSERT_RTNL(); |
---|
| 7261 | + |
---|
| 7262 | + return __netdev_walk_all_upper_dev(dev, ____netdev_has_upper_dev, |
---|
| 7263 | + &priv); |
---|
| 7264 | +} |
---|
6707 | 7265 | |
---|
6708 | 7266 | /** |
---|
6709 | 7267 | * netdev_lower_get_next_private - Get the next ->private from the |
---|
.. | .. |
---|
6801 | 7359 | return lower->dev; |
---|
6802 | 7360 | } |
---|
6803 | 7361 | |
---|
| 7362 | +static struct net_device *__netdev_next_lower_dev(struct net_device *dev, |
---|
| 7363 | + struct list_head **iter, |
---|
| 7364 | + bool *ignore) |
---|
| 7365 | +{ |
---|
| 7366 | + struct netdev_adjacent *lower; |
---|
| 7367 | + |
---|
| 7368 | + lower = list_entry((*iter)->next, struct netdev_adjacent, list); |
---|
| 7369 | + |
---|
| 7370 | + if (&lower->list == &dev->adj_list.lower) |
---|
| 7371 | + return NULL; |
---|
| 7372 | + |
---|
| 7373 | + *iter = &lower->list; |
---|
| 7374 | + *ignore = lower->ignore; |
---|
| 7375 | + |
---|
| 7376 | + return lower->dev; |
---|
| 7377 | +} |
---|
| 7378 | + |
---|
6804 | 7379 | int netdev_walk_all_lower_dev(struct net_device *dev, |
---|
6805 | 7380 | int (*fn)(struct net_device *dev, |
---|
6806 | | - void *data), |
---|
6807 | | - void *data) |
---|
| 7381 | + struct netdev_nested_priv *priv), |
---|
| 7382 | + struct netdev_nested_priv *priv) |
---|
6808 | 7383 | { |
---|
6809 | 7384 | struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; |
---|
6810 | 7385 | struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; |
---|
.. | .. |
---|
6815 | 7390 | |
---|
6816 | 7391 | while (1) { |
---|
6817 | 7392 | if (now != dev) { |
---|
6818 | | - ret = fn(now, data); |
---|
| 7393 | + ret = fn(now, priv); |
---|
6819 | 7394 | if (ret) |
---|
6820 | 7395 | return ret; |
---|
6821 | 7396 | } |
---|
.. | .. |
---|
6848 | 7423 | } |
---|
6849 | 7424 | EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev); |
---|
6850 | 7425 | |
---|
6851 | | -static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, |
---|
6852 | | - struct list_head **iter) |
---|
| 7426 | +static int __netdev_walk_all_lower_dev(struct net_device *dev, |
---|
| 7427 | + int (*fn)(struct net_device *dev, |
---|
| 7428 | + struct netdev_nested_priv *priv), |
---|
| 7429 | + struct netdev_nested_priv *priv) |
---|
| 7430 | +{ |
---|
| 7431 | + struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; |
---|
| 7432 | + struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; |
---|
| 7433 | + int ret, cur = 0; |
---|
| 7434 | + bool ignore; |
---|
| 7435 | + |
---|
| 7436 | + now = dev; |
---|
| 7437 | + iter = &dev->adj_list.lower; |
---|
| 7438 | + |
---|
| 7439 | + while (1) { |
---|
| 7440 | + if (now != dev) { |
---|
| 7441 | + ret = fn(now, priv); |
---|
| 7442 | + if (ret) |
---|
| 7443 | + return ret; |
---|
| 7444 | + } |
---|
| 7445 | + |
---|
| 7446 | + next = NULL; |
---|
| 7447 | + while (1) { |
---|
| 7448 | + ldev = __netdev_next_lower_dev(now, &iter, &ignore); |
---|
| 7449 | + if (!ldev) |
---|
| 7450 | + break; |
---|
| 7451 | + if (ignore) |
---|
| 7452 | + continue; |
---|
| 7453 | + |
---|
| 7454 | + next = ldev; |
---|
| 7455 | + niter = &ldev->adj_list.lower; |
---|
| 7456 | + dev_stack[cur] = now; |
---|
| 7457 | + iter_stack[cur++] = iter; |
---|
| 7458 | + break; |
---|
| 7459 | + } |
---|
| 7460 | + |
---|
| 7461 | + if (!next) { |
---|
| 7462 | + if (!cur) |
---|
| 7463 | + return 0; |
---|
| 7464 | + next = dev_stack[--cur]; |
---|
| 7465 | + niter = iter_stack[cur]; |
---|
| 7466 | + } |
---|
| 7467 | + |
---|
| 7468 | + now = next; |
---|
| 7469 | + iter = niter; |
---|
| 7470 | + } |
---|
| 7471 | + |
---|
| 7472 | + return 0; |
---|
| 7473 | +} |
---|
| 7474 | + |
---|
| 7475 | +struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, |
---|
| 7476 | + struct list_head **iter) |
---|
6853 | 7477 | { |
---|
6854 | 7478 | struct netdev_adjacent *lower; |
---|
6855 | 7479 | |
---|
.. | .. |
---|
6861 | 7485 | |
---|
6862 | 7486 | return lower->dev; |
---|
6863 | 7487 | } |
---|
| 7488 | +EXPORT_SYMBOL(netdev_next_lower_dev_rcu); |
---|
6864 | 7489 | |
---|
6865 | 7490 | static u8 __netdev_upper_depth(struct net_device *dev) |
---|
6866 | 7491 | { |
---|
6867 | 7492 | struct net_device *udev; |
---|
6868 | 7493 | struct list_head *iter; |
---|
6869 | 7494 | u8 max_depth = 0; |
---|
| 7495 | + bool ignore; |
---|
6870 | 7496 | |
---|
6871 | 7497 | for (iter = &dev->adj_list.upper, |
---|
6872 | | - udev = netdev_next_upper_dev(dev, &iter); |
---|
| 7498 | + udev = __netdev_next_upper_dev(dev, &iter, &ignore); |
---|
6873 | 7499 | udev; |
---|
6874 | | - udev = netdev_next_upper_dev(dev, &iter)) { |
---|
| 7500 | + udev = __netdev_next_upper_dev(dev, &iter, &ignore)) { |
---|
| 7501 | + if (ignore) |
---|
| 7502 | + continue; |
---|
6875 | 7503 | if (max_depth < udev->upper_level) |
---|
6876 | 7504 | max_depth = udev->upper_level; |
---|
6877 | 7505 | } |
---|
.. | .. |
---|
6884 | 7512 | struct net_device *ldev; |
---|
6885 | 7513 | struct list_head *iter; |
---|
6886 | 7514 | u8 max_depth = 0; |
---|
| 7515 | + bool ignore; |
---|
6887 | 7516 | |
---|
6888 | 7517 | for (iter = &dev->adj_list.lower, |
---|
6889 | | - ldev = netdev_next_lower_dev(dev, &iter); |
---|
| 7518 | + ldev = __netdev_next_lower_dev(dev, &iter, &ignore); |
---|
6890 | 7519 | ldev; |
---|
6891 | | - ldev = netdev_next_lower_dev(dev, &iter)) { |
---|
| 7520 | + ldev = __netdev_next_lower_dev(dev, &iter, &ignore)) { |
---|
| 7521 | + if (ignore) |
---|
| 7522 | + continue; |
---|
6892 | 7523 | if (max_depth < ldev->lower_level) |
---|
6893 | 7524 | max_depth = ldev->lower_level; |
---|
6894 | 7525 | } |
---|
.. | .. |
---|
6896 | 7527 | return max_depth; |
---|
6897 | 7528 | } |
---|
6898 | 7529 | |
---|
6899 | | -static int __netdev_update_upper_level(struct net_device *dev, void *data) |
---|
| 7530 | +static int __netdev_update_upper_level(struct net_device *dev, |
---|
| 7531 | + struct netdev_nested_priv *__unused) |
---|
6900 | 7532 | { |
---|
6901 | 7533 | dev->upper_level = __netdev_upper_depth(dev) + 1; |
---|
6902 | 7534 | return 0; |
---|
6903 | 7535 | } |
---|
6904 | 7536 | |
---|
6905 | | -static int __netdev_update_lower_level(struct net_device *dev, void *data) |
---|
| 7537 | +static int __netdev_update_lower_level(struct net_device *dev, |
---|
| 7538 | + struct netdev_nested_priv *priv) |
---|
6906 | 7539 | { |
---|
6907 | 7540 | dev->lower_level = __netdev_lower_depth(dev) + 1; |
---|
| 7541 | + |
---|
| 7542 | +#ifdef CONFIG_LOCKDEP |
---|
| 7543 | + if (!priv) |
---|
| 7544 | + return 0; |
---|
| 7545 | + |
---|
| 7546 | + if (priv->flags & NESTED_SYNC_IMM) |
---|
| 7547 | + dev->nested_level = dev->lower_level - 1; |
---|
| 7548 | + if (priv->flags & NESTED_SYNC_TODO) |
---|
| 7549 | + net_unlink_todo(dev); |
---|
| 7550 | +#endif |
---|
6908 | 7551 | return 0; |
---|
6909 | 7552 | } |
---|
6910 | 7553 | |
---|
6911 | 7554 | int netdev_walk_all_lower_dev_rcu(struct net_device *dev, |
---|
6912 | 7555 | int (*fn)(struct net_device *dev, |
---|
6913 | | - void *data), |
---|
6914 | | - void *data) |
---|
| 7556 | + struct netdev_nested_priv *priv), |
---|
| 7557 | + struct netdev_nested_priv *priv) |
---|
6915 | 7558 | { |
---|
6916 | 7559 | struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; |
---|
6917 | 7560 | struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; |
---|
.. | .. |
---|
6922 | 7565 | |
---|
6923 | 7566 | while (1) { |
---|
6924 | 7567 | if (now != dev) { |
---|
6925 | | - ret = fn(now, data); |
---|
| 7568 | + ret = fn(now, priv); |
---|
6926 | 7569 | if (ret) |
---|
6927 | 7570 | return ret; |
---|
6928 | 7571 | } |
---|
.. | .. |
---|
7052 | 7695 | adj->master = master; |
---|
7053 | 7696 | adj->ref_nr = 1; |
---|
7054 | 7697 | adj->private = private; |
---|
| 7698 | + adj->ignore = false; |
---|
7055 | 7699 | dev_hold(adj_dev); |
---|
7056 | 7700 | |
---|
7057 | 7701 | pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d; dev_hold on %s\n", |
---|
.. | .. |
---|
7181 | 7825 | static int __netdev_upper_dev_link(struct net_device *dev, |
---|
7182 | 7826 | struct net_device *upper_dev, bool master, |
---|
7183 | 7827 | void *upper_priv, void *upper_info, |
---|
| 7828 | + struct netdev_nested_priv *priv, |
---|
7184 | 7829 | struct netlink_ext_ack *extack) |
---|
7185 | 7830 | { |
---|
7186 | 7831 | struct netdev_notifier_changeupper_info changeupper_info = { |
---|
.. | .. |
---|
7202 | 7847 | return -EBUSY; |
---|
7203 | 7848 | |
---|
7204 | 7849 | /* To prevent loops, check if dev is not upper device to upper_dev. */ |
---|
7205 | | - if (netdev_has_upper_dev(upper_dev, dev)) |
---|
| 7850 | + if (__netdev_has_upper_dev(upper_dev, dev)) |
---|
7206 | 7851 | return -EBUSY; |
---|
7207 | 7852 | |
---|
7208 | 7853 | if ((dev->lower_level + upper_dev->upper_level) > MAX_NEST_DEV) |
---|
7209 | 7854 | return -EMLINK; |
---|
7210 | 7855 | |
---|
7211 | 7856 | if (!master) { |
---|
7212 | | - if (netdev_has_upper_dev(dev, upper_dev)) |
---|
| 7857 | + if (__netdev_has_upper_dev(dev, upper_dev)) |
---|
7213 | 7858 | return -EEXIST; |
---|
7214 | 7859 | } else { |
---|
7215 | | - master_dev = netdev_master_upper_dev_get(dev); |
---|
| 7860 | + master_dev = __netdev_master_upper_dev_get(dev); |
---|
7216 | 7861 | if (master_dev) |
---|
7217 | 7862 | return master_dev == upper_dev ? -EEXIST : -EBUSY; |
---|
7218 | 7863 | } |
---|
.. | .. |
---|
7235 | 7880 | goto rollback; |
---|
7236 | 7881 | |
---|
7237 | 7882 | __netdev_update_upper_level(dev, NULL); |
---|
7238 | | - netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL); |
---|
| 7883 | + __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL); |
---|
7239 | 7884 | |
---|
7240 | | - __netdev_update_lower_level(upper_dev, NULL); |
---|
7241 | | - netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, NULL); |
---|
| 7885 | + __netdev_update_lower_level(upper_dev, priv); |
---|
| 7886 | + __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, |
---|
| 7887 | + priv); |
---|
7242 | 7888 | |
---|
7243 | 7889 | return 0; |
---|
7244 | 7890 | |
---|
.. | .. |
---|
7263 | 7909 | struct net_device *upper_dev, |
---|
7264 | 7910 | struct netlink_ext_ack *extack) |
---|
7265 | 7911 | { |
---|
| 7912 | + struct netdev_nested_priv priv = { |
---|
| 7913 | + .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO, |
---|
| 7914 | + .data = NULL, |
---|
| 7915 | + }; |
---|
| 7916 | + |
---|
7266 | 7917 | return __netdev_upper_dev_link(dev, upper_dev, false, |
---|
7267 | | - NULL, NULL, extack); |
---|
| 7918 | + NULL, NULL, &priv, extack); |
---|
7268 | 7919 | } |
---|
7269 | 7920 | EXPORT_SYMBOL(netdev_upper_dev_link); |
---|
7270 | 7921 | |
---|
.. | .. |
---|
7287 | 7938 | void *upper_priv, void *upper_info, |
---|
7288 | 7939 | struct netlink_ext_ack *extack) |
---|
7289 | 7940 | { |
---|
| 7941 | + struct netdev_nested_priv priv = { |
---|
| 7942 | + .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO, |
---|
| 7943 | + .data = NULL, |
---|
| 7944 | + }; |
---|
| 7945 | + |
---|
7290 | 7946 | return __netdev_upper_dev_link(dev, upper_dev, true, |
---|
7291 | | - upper_priv, upper_info, extack); |
---|
| 7947 | + upper_priv, upper_info, &priv, extack); |
---|
7292 | 7948 | } |
---|
7293 | 7949 | EXPORT_SYMBOL(netdev_master_upper_dev_link); |
---|
7294 | 7950 | |
---|
7295 | | -/** |
---|
7296 | | - * netdev_upper_dev_unlink - Removes a link to upper device |
---|
7297 | | - * @dev: device |
---|
7298 | | - * @upper_dev: new upper device |
---|
7299 | | - * |
---|
7300 | | - * Removes a link to device which is upper to this one. The caller must hold |
---|
7301 | | - * the RTNL lock. |
---|
7302 | | - */ |
---|
7303 | | -void netdev_upper_dev_unlink(struct net_device *dev, |
---|
7304 | | - struct net_device *upper_dev) |
---|
| 7951 | +static void __netdev_upper_dev_unlink(struct net_device *dev, |
---|
| 7952 | + struct net_device *upper_dev, |
---|
| 7953 | + struct netdev_nested_priv *priv) |
---|
7305 | 7954 | { |
---|
7306 | 7955 | struct netdev_notifier_changeupper_info changeupper_info = { |
---|
7307 | 7956 | .info = { |
---|
.. | .. |
---|
7324 | 7973 | &changeupper_info.info); |
---|
7325 | 7974 | |
---|
7326 | 7975 | __netdev_update_upper_level(dev, NULL); |
---|
7327 | | - netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL); |
---|
| 7976 | + __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL); |
---|
7328 | 7977 | |
---|
7329 | | - __netdev_update_lower_level(upper_dev, NULL); |
---|
7330 | | - netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, NULL); |
---|
| 7978 | + __netdev_update_lower_level(upper_dev, priv); |
---|
| 7979 | + __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, |
---|
| 7980 | + priv); |
---|
| 7981 | +} |
---|
| 7982 | + |
---|
| 7983 | +/** |
---|
| 7984 | + * netdev_upper_dev_unlink - Removes a link to upper device |
---|
| 7985 | + * @dev: device |
---|
| 7986 | + * @upper_dev: new upper device |
---|
| 7987 | + * |
---|
| 7988 | + * Removes a link to device which is upper to this one. The caller must hold |
---|
| 7989 | + * the RTNL lock. |
---|
| 7990 | + */ |
---|
| 7991 | +void netdev_upper_dev_unlink(struct net_device *dev, |
---|
| 7992 | + struct net_device *upper_dev) |
---|
| 7993 | +{ |
---|
| 7994 | + struct netdev_nested_priv priv = { |
---|
| 7995 | + .flags = NESTED_SYNC_TODO, |
---|
| 7996 | + .data = NULL, |
---|
| 7997 | + }; |
---|
| 7998 | + |
---|
| 7999 | + __netdev_upper_dev_unlink(dev, upper_dev, &priv); |
---|
7331 | 8000 | } |
---|
7332 | 8001 | EXPORT_SYMBOL(netdev_upper_dev_unlink); |
---|
| 8002 | + |
---|
| 8003 | +static void __netdev_adjacent_dev_set(struct net_device *upper_dev, |
---|
| 8004 | + struct net_device *lower_dev, |
---|
| 8005 | + bool val) |
---|
| 8006 | +{ |
---|
| 8007 | + struct netdev_adjacent *adj; |
---|
| 8008 | + |
---|
| 8009 | + adj = __netdev_find_adj(lower_dev, &upper_dev->adj_list.lower); |
---|
| 8010 | + if (adj) |
---|
| 8011 | + adj->ignore = val; |
---|
| 8012 | + |
---|
| 8013 | + adj = __netdev_find_adj(upper_dev, &lower_dev->adj_list.upper); |
---|
| 8014 | + if (adj) |
---|
| 8015 | + adj->ignore = val; |
---|
| 8016 | +} |
---|
| 8017 | + |
---|
| 8018 | +static void netdev_adjacent_dev_disable(struct net_device *upper_dev, |
---|
| 8019 | + struct net_device *lower_dev) |
---|
| 8020 | +{ |
---|
| 8021 | + __netdev_adjacent_dev_set(upper_dev, lower_dev, true); |
---|
| 8022 | +} |
---|
| 8023 | + |
---|
| 8024 | +static void netdev_adjacent_dev_enable(struct net_device *upper_dev, |
---|
| 8025 | + struct net_device *lower_dev) |
---|
| 8026 | +{ |
---|
| 8027 | + __netdev_adjacent_dev_set(upper_dev, lower_dev, false); |
---|
| 8028 | +} |
---|
| 8029 | + |
---|
| 8030 | +int netdev_adjacent_change_prepare(struct net_device *old_dev, |
---|
| 8031 | + struct net_device *new_dev, |
---|
| 8032 | + struct net_device *dev, |
---|
| 8033 | + struct netlink_ext_ack *extack) |
---|
| 8034 | +{ |
---|
| 8035 | + struct netdev_nested_priv priv = { |
---|
| 8036 | + .flags = 0, |
---|
| 8037 | + .data = NULL, |
---|
| 8038 | + }; |
---|
| 8039 | + int err; |
---|
| 8040 | + |
---|
| 8041 | + if (!new_dev) |
---|
| 8042 | + return 0; |
---|
| 8043 | + |
---|
| 8044 | + if (old_dev && new_dev != old_dev) |
---|
| 8045 | + netdev_adjacent_dev_disable(dev, old_dev); |
---|
| 8046 | + err = __netdev_upper_dev_link(new_dev, dev, false, NULL, NULL, &priv, |
---|
| 8047 | + extack); |
---|
| 8048 | + if (err) { |
---|
| 8049 | + if (old_dev && new_dev != old_dev) |
---|
| 8050 | + netdev_adjacent_dev_enable(dev, old_dev); |
---|
| 8051 | + return err; |
---|
| 8052 | + } |
---|
| 8053 | + |
---|
| 8054 | + return 0; |
---|
| 8055 | +} |
---|
| 8056 | +EXPORT_SYMBOL(netdev_adjacent_change_prepare); |
---|
| 8057 | + |
---|
| 8058 | +void netdev_adjacent_change_commit(struct net_device *old_dev, |
---|
| 8059 | + struct net_device *new_dev, |
---|
| 8060 | + struct net_device *dev) |
---|
| 8061 | +{ |
---|
| 8062 | + struct netdev_nested_priv priv = { |
---|
| 8063 | + .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO, |
---|
| 8064 | + .data = NULL, |
---|
| 8065 | + }; |
---|
| 8066 | + |
---|
| 8067 | + if (!new_dev || !old_dev) |
---|
| 8068 | + return; |
---|
| 8069 | + |
---|
| 8070 | + if (new_dev == old_dev) |
---|
| 8071 | + return; |
---|
| 8072 | + |
---|
| 8073 | + netdev_adjacent_dev_enable(dev, old_dev); |
---|
| 8074 | + __netdev_upper_dev_unlink(old_dev, dev, &priv); |
---|
| 8075 | +} |
---|
| 8076 | +EXPORT_SYMBOL(netdev_adjacent_change_commit); |
---|
| 8077 | + |
---|
| 8078 | +void netdev_adjacent_change_abort(struct net_device *old_dev, |
---|
| 8079 | + struct net_device *new_dev, |
---|
| 8080 | + struct net_device *dev) |
---|
| 8081 | +{ |
---|
| 8082 | + struct netdev_nested_priv priv = { |
---|
| 8083 | + .flags = 0, |
---|
| 8084 | + .data = NULL, |
---|
| 8085 | + }; |
---|
| 8086 | + |
---|
| 8087 | + if (!new_dev) |
---|
| 8088 | + return; |
---|
| 8089 | + |
---|
| 8090 | + if (old_dev && new_dev != old_dev) |
---|
| 8091 | + netdev_adjacent_dev_enable(dev, old_dev); |
---|
| 8092 | + |
---|
| 8093 | + __netdev_upper_dev_unlink(new_dev, dev, &priv); |
---|
| 8094 | +} |
---|
| 8095 | +EXPORT_SYMBOL(netdev_adjacent_change_abort); |
---|
7333 | 8096 | |
---|
7334 | 8097 | /** |
---|
7335 | 8098 | * netdev_bonding_info_change - Dispatch event about slave change |
---|
.. | .. |
---|
7352 | 8115 | &info.info); |
---|
7353 | 8116 | } |
---|
7354 | 8117 | EXPORT_SYMBOL(netdev_bonding_info_change); |
---|
| 8118 | + |
---|
| 8119 | +/** |
---|
| 8120 | + * netdev_get_xmit_slave - Get the xmit slave of master device |
---|
| 8121 | + * @dev: device |
---|
| 8122 | + * @skb: The packet |
---|
| 8123 | + * @all_slaves: assume all the slaves are active |
---|
| 8124 | + * |
---|
| 8125 | + * The reference counters are not incremented so the caller must be |
---|
| 8126 | + * careful with locks. The caller must hold RCU lock. |
---|
| 8127 | + * %NULL is returned if no slave is found. |
---|
| 8128 | + */ |
---|
| 8129 | + |
---|
| 8130 | +struct net_device *netdev_get_xmit_slave(struct net_device *dev, |
---|
| 8131 | + struct sk_buff *skb, |
---|
| 8132 | + bool all_slaves) |
---|
| 8133 | +{ |
---|
| 8134 | + const struct net_device_ops *ops = dev->netdev_ops; |
---|
| 8135 | + |
---|
| 8136 | + if (!ops->ndo_get_xmit_slave) |
---|
| 8137 | + return NULL; |
---|
| 8138 | + return ops->ndo_get_xmit_slave(dev, skb, all_slaves); |
---|
| 8139 | +} |
---|
| 8140 | +EXPORT_SYMBOL(netdev_get_xmit_slave); |
---|
7355 | 8141 | |
---|
7356 | 8142 | static void netdev_adjacent_add_links(struct net_device *dev) |
---|
7357 | 8143 | { |
---|
.. | .. |
---|
7443 | 8229 | } |
---|
7444 | 8230 | EXPORT_SYMBOL(netdev_lower_dev_get_private); |
---|
7445 | 8231 | |
---|
7446 | | - |
---|
7447 | | -int dev_get_nest_level(struct net_device *dev) |
---|
7448 | | -{ |
---|
7449 | | - struct net_device *lower = NULL; |
---|
7450 | | - struct list_head *iter; |
---|
7451 | | - int max_nest = -1; |
---|
7452 | | - int nest; |
---|
7453 | | - |
---|
7454 | | - ASSERT_RTNL(); |
---|
7455 | | - |
---|
7456 | | - netdev_for_each_lower_dev(dev, lower, iter) { |
---|
7457 | | - nest = dev_get_nest_level(lower); |
---|
7458 | | - if (max_nest < nest) |
---|
7459 | | - max_nest = nest; |
---|
7460 | | - } |
---|
7461 | | - |
---|
7462 | | - return max_nest + 1; |
---|
7463 | | -} |
---|
7464 | | -EXPORT_SYMBOL(dev_get_nest_level); |
---|
7465 | 8232 | |
---|
7466 | 8233 | /** |
---|
7467 | 8234 | * netdev_lower_change - Dispatch event about lower device state change |
---|
.. | .. |
---|
7689 | 8456 | } |
---|
7690 | 8457 | EXPORT_SYMBOL(dev_get_flags); |
---|
7691 | 8458 | |
---|
7692 | | -int __dev_change_flags(struct net_device *dev, unsigned int flags) |
---|
| 8459 | +int __dev_change_flags(struct net_device *dev, unsigned int flags, |
---|
| 8460 | + struct netlink_ext_ack *extack) |
---|
7693 | 8461 | { |
---|
7694 | 8462 | unsigned int old_flags = dev->flags; |
---|
7695 | 8463 | int ret; |
---|
.. | .. |
---|
7726 | 8494 | if (old_flags & IFF_UP) |
---|
7727 | 8495 | __dev_close(dev); |
---|
7728 | 8496 | else |
---|
7729 | | - ret = __dev_open(dev); |
---|
| 8497 | + ret = __dev_open(dev, extack); |
---|
7730 | 8498 | } |
---|
7731 | 8499 | |
---|
7732 | 8500 | if ((flags ^ dev->gflags) & IFF_PROMISC) { |
---|
.. | .. |
---|
7786 | 8554 | * dev_change_flags - change device settings |
---|
7787 | 8555 | * @dev: device |
---|
7788 | 8556 | * @flags: device state flags |
---|
| 8557 | + * @extack: netlink extended ack |
---|
7789 | 8558 | * |
---|
7790 | 8559 | * Change settings on device based state flags. The flags are |
---|
7791 | 8560 | * in the userspace exported format. |
---|
7792 | 8561 | */ |
---|
7793 | | -int dev_change_flags(struct net_device *dev, unsigned int flags) |
---|
| 8562 | +int dev_change_flags(struct net_device *dev, unsigned int flags, |
---|
| 8563 | + struct netlink_ext_ack *extack) |
---|
7794 | 8564 | { |
---|
7795 | 8565 | int ret; |
---|
7796 | 8566 | unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags; |
---|
7797 | 8567 | |
---|
7798 | | - ret = __dev_change_flags(dev, flags); |
---|
| 8568 | + ret = __dev_change_flags(dev, flags, extack); |
---|
7799 | 8569 | if (ret < 0) |
---|
7800 | 8570 | return ret; |
---|
7801 | 8571 | |
---|
.. | .. |
---|
7938 | 8708 | EXPORT_SYMBOL(dev_set_group); |
---|
7939 | 8709 | |
---|
7940 | 8710 | /** |
---|
| 8711 | + * dev_pre_changeaddr_notify - Call NETDEV_PRE_CHANGEADDR. |
---|
| 8712 | + * @dev: device |
---|
| 8713 | + * @addr: new address |
---|
| 8714 | + * @extack: netlink extended ack |
---|
| 8715 | + */ |
---|
| 8716 | +int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr, |
---|
| 8717 | + struct netlink_ext_ack *extack) |
---|
| 8718 | +{ |
---|
| 8719 | + struct netdev_notifier_pre_changeaddr_info info = { |
---|
| 8720 | + .info.dev = dev, |
---|
| 8721 | + .info.extack = extack, |
---|
| 8722 | + .dev_addr = addr, |
---|
| 8723 | + }; |
---|
| 8724 | + int rc; |
---|
| 8725 | + |
---|
| 8726 | + rc = call_netdevice_notifiers_info(NETDEV_PRE_CHANGEADDR, &info.info); |
---|
| 8727 | + return notifier_to_errno(rc); |
---|
| 8728 | +} |
---|
| 8729 | +EXPORT_SYMBOL(dev_pre_changeaddr_notify); |
---|
| 8730 | + |
---|
| 8731 | +/** |
---|
7941 | 8732 | * dev_set_mac_address - Change Media Access Control Address |
---|
7942 | 8733 | * @dev: device |
---|
7943 | 8734 | * @sa: new address |
---|
| 8735 | + * @extack: netlink extended ack |
---|
7944 | 8736 | * |
---|
7945 | 8737 | * Change the hardware (MAC) address of the device |
---|
7946 | 8738 | */ |
---|
7947 | | -int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) |
---|
| 8739 | +int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, |
---|
| 8740 | + struct netlink_ext_ack *extack) |
---|
7948 | 8741 | { |
---|
7949 | 8742 | const struct net_device_ops *ops = dev->netdev_ops; |
---|
7950 | 8743 | int err; |
---|
.. | .. |
---|
7955 | 8748 | return -EINVAL; |
---|
7956 | 8749 | if (!netif_device_present(dev)) |
---|
7957 | 8750 | return -ENODEV; |
---|
| 8751 | + err = dev_pre_changeaddr_notify(dev, sa->sa_data, extack); |
---|
| 8752 | + if (err) |
---|
| 8753 | + return err; |
---|
7958 | 8754 | err = ops->ndo_set_mac_address(dev, sa); |
---|
7959 | 8755 | if (err) |
---|
7960 | 8756 | return err; |
---|
.. | .. |
---|
7964 | 8760 | return 0; |
---|
7965 | 8761 | } |
---|
7966 | 8762 | EXPORT_SYMBOL(dev_set_mac_address); |
---|
| 8763 | + |
---|
| 8764 | +static DECLARE_RWSEM(dev_addr_sem); |
---|
| 8765 | + |
---|
| 8766 | +int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa, |
---|
| 8767 | + struct netlink_ext_ack *extack) |
---|
| 8768 | +{ |
---|
| 8769 | + int ret; |
---|
| 8770 | + |
---|
| 8771 | + down_write(&dev_addr_sem); |
---|
| 8772 | + ret = dev_set_mac_address(dev, sa, extack); |
---|
| 8773 | + up_write(&dev_addr_sem); |
---|
| 8774 | + return ret; |
---|
| 8775 | +} |
---|
| 8776 | +EXPORT_SYMBOL(dev_set_mac_address_user); |
---|
| 8777 | + |
---|
| 8778 | +int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name) |
---|
| 8779 | +{ |
---|
| 8780 | + size_t size = sizeof(sa->sa_data); |
---|
| 8781 | + struct net_device *dev; |
---|
| 8782 | + int ret = 0; |
---|
| 8783 | + |
---|
| 8784 | + down_read(&dev_addr_sem); |
---|
| 8785 | + rcu_read_lock(); |
---|
| 8786 | + |
---|
| 8787 | + dev = dev_get_by_name_rcu(net, dev_name); |
---|
| 8788 | + if (!dev) { |
---|
| 8789 | + ret = -ENODEV; |
---|
| 8790 | + goto unlock; |
---|
| 8791 | + } |
---|
| 8792 | + if (!dev->addr_len) |
---|
| 8793 | + memset(sa->sa_data, 0, size); |
---|
| 8794 | + else |
---|
| 8795 | + memcpy(sa->sa_data, dev->dev_addr, |
---|
| 8796 | + min_t(size_t, size, dev->addr_len)); |
---|
| 8797 | + sa->sa_family = dev->type; |
---|
| 8798 | + |
---|
| 8799 | +unlock: |
---|
| 8800 | + rcu_read_unlock(); |
---|
| 8801 | + up_read(&dev_addr_sem); |
---|
| 8802 | + return ret; |
---|
| 8803 | +} |
---|
| 8804 | +EXPORT_SYMBOL(dev_get_mac_address); |
---|
7967 | 8805 | |
---|
7968 | 8806 | /** |
---|
7969 | 8807 | * dev_change_carrier - Change device carrier |
---|
.. | .. |
---|
8014 | 8852 | char *name, size_t len) |
---|
8015 | 8853 | { |
---|
8016 | 8854 | const struct net_device_ops *ops = dev->netdev_ops; |
---|
| 8855 | + int err; |
---|
8017 | 8856 | |
---|
8018 | | - if (!ops->ndo_get_phys_port_name) |
---|
8019 | | - return -EOPNOTSUPP; |
---|
8020 | | - return ops->ndo_get_phys_port_name(dev, name, len); |
---|
| 8857 | + if (ops->ndo_get_phys_port_name) { |
---|
| 8858 | + err = ops->ndo_get_phys_port_name(dev, name, len); |
---|
| 8859 | + if (err != -EOPNOTSUPP) |
---|
| 8860 | + return err; |
---|
| 8861 | + } |
---|
| 8862 | + return devlink_compat_phys_port_name_get(dev, name, len); |
---|
8021 | 8863 | } |
---|
8022 | 8864 | EXPORT_SYMBOL(dev_get_phys_port_name); |
---|
| 8865 | + |
---|
| 8866 | +/** |
---|
| 8867 | + * dev_get_port_parent_id - Get the device's port parent identifier |
---|
| 8868 | + * @dev: network device |
---|
| 8869 | + * @ppid: pointer to a storage for the port's parent identifier |
---|
| 8870 | + * @recurse: allow/disallow recursion to lower devices |
---|
| 8871 | + * |
---|
| 8872 | + * Get the devices's port parent identifier |
---|
| 8873 | + */ |
---|
| 8874 | +int dev_get_port_parent_id(struct net_device *dev, |
---|
| 8875 | + struct netdev_phys_item_id *ppid, |
---|
| 8876 | + bool recurse) |
---|
| 8877 | +{ |
---|
| 8878 | + const struct net_device_ops *ops = dev->netdev_ops; |
---|
| 8879 | + struct netdev_phys_item_id first = { }; |
---|
| 8880 | + struct net_device *lower_dev; |
---|
| 8881 | + struct list_head *iter; |
---|
| 8882 | + int err; |
---|
| 8883 | + |
---|
| 8884 | + if (ops->ndo_get_port_parent_id) { |
---|
| 8885 | + err = ops->ndo_get_port_parent_id(dev, ppid); |
---|
| 8886 | + if (err != -EOPNOTSUPP) |
---|
| 8887 | + return err; |
---|
| 8888 | + } |
---|
| 8889 | + |
---|
| 8890 | + err = devlink_compat_switch_id_get(dev, ppid); |
---|
| 8891 | + if (!err || err != -EOPNOTSUPP) |
---|
| 8892 | + return err; |
---|
| 8893 | + |
---|
| 8894 | + if (!recurse) |
---|
| 8895 | + return -EOPNOTSUPP; |
---|
| 8896 | + |
---|
| 8897 | + netdev_for_each_lower_dev(dev, lower_dev, iter) { |
---|
| 8898 | + err = dev_get_port_parent_id(lower_dev, ppid, recurse); |
---|
| 8899 | + if (err) |
---|
| 8900 | + break; |
---|
| 8901 | + if (!first.id_len) |
---|
| 8902 | + first = *ppid; |
---|
| 8903 | + else if (memcmp(&first, ppid, sizeof(*ppid))) |
---|
| 8904 | + return -EOPNOTSUPP; |
---|
| 8905 | + } |
---|
| 8906 | + |
---|
| 8907 | + return err; |
---|
| 8908 | +} |
---|
| 8909 | +EXPORT_SYMBOL(dev_get_port_parent_id); |
---|
| 8910 | + |
---|
| 8911 | +/** |
---|
| 8912 | + * netdev_port_same_parent_id - Indicate if two network devices have |
---|
| 8913 | + * the same port parent identifier |
---|
| 8914 | + * @a: first network device |
---|
| 8915 | + * @b: second network device |
---|
| 8916 | + */ |
---|
| 8917 | +bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b) |
---|
| 8918 | +{ |
---|
| 8919 | + struct netdev_phys_item_id a_id = { }; |
---|
| 8920 | + struct netdev_phys_item_id b_id = { }; |
---|
| 8921 | + |
---|
| 8922 | + if (dev_get_port_parent_id(a, &a_id, true) || |
---|
| 8923 | + dev_get_port_parent_id(b, &b_id, true)) |
---|
| 8924 | + return false; |
---|
| 8925 | + |
---|
| 8926 | + return netdev_phys_item_id_same(&a_id, &b_id); |
---|
| 8927 | +} |
---|
| 8928 | +EXPORT_SYMBOL(netdev_port_same_parent_id); |
---|
8023 | 8929 | |
---|
8024 | 8930 | /** |
---|
8025 | 8931 | * dev_change_proto_down - update protocol port state information |
---|
.. | .. |
---|
8041 | 8947 | } |
---|
8042 | 8948 | EXPORT_SYMBOL(dev_change_proto_down); |
---|
8043 | 8949 | |
---|
8044 | | -u32 __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op, |
---|
8045 | | - enum bpf_netdev_command cmd) |
---|
| 8950 | +/** |
---|
| 8951 | + * dev_change_proto_down_generic - generic implementation for |
---|
| 8952 | + * ndo_change_proto_down that sets carrier according to |
---|
| 8953 | + * proto_down. |
---|
| 8954 | + * |
---|
| 8955 | + * @dev: device |
---|
| 8956 | + * @proto_down: new value |
---|
| 8957 | + */ |
---|
| 8958 | +int dev_change_proto_down_generic(struct net_device *dev, bool proto_down) |
---|
8046 | 8959 | { |
---|
8047 | | - struct netdev_bpf xdp; |
---|
| 8960 | + if (proto_down) |
---|
| 8961 | + netif_carrier_off(dev); |
---|
| 8962 | + else |
---|
| 8963 | + netif_carrier_on(dev); |
---|
| 8964 | + dev->proto_down = proto_down; |
---|
| 8965 | + return 0; |
---|
| 8966 | +} |
---|
| 8967 | +EXPORT_SYMBOL(dev_change_proto_down_generic); |
---|
8048 | 8968 | |
---|
8049 | | - if (!bpf_op) |
---|
8050 | | - return 0; |
---|
| 8969 | +/** |
---|
| 8970 | + * dev_change_proto_down_reason - proto down reason |
---|
| 8971 | + * |
---|
| 8972 | + * @dev: device |
---|
| 8973 | + * @mask: proto down mask |
---|
| 8974 | + * @value: proto down value |
---|
| 8975 | + */ |
---|
| 8976 | +void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask, |
---|
| 8977 | + u32 value) |
---|
| 8978 | +{ |
---|
| 8979 | + int b; |
---|
8051 | 8980 | |
---|
8052 | | - memset(&xdp, 0, sizeof(xdp)); |
---|
8053 | | - xdp.command = cmd; |
---|
| 8981 | + if (!mask) { |
---|
| 8982 | + dev->proto_down_reason = value; |
---|
| 8983 | + } else { |
---|
| 8984 | + for_each_set_bit(b, &mask, 32) { |
---|
| 8985 | + if (value & (1 << b)) |
---|
| 8986 | + dev->proto_down_reason |= BIT(b); |
---|
| 8987 | + else |
---|
| 8988 | + dev->proto_down_reason &= ~BIT(b); |
---|
| 8989 | + } |
---|
| 8990 | + } |
---|
| 8991 | +} |
---|
| 8992 | +EXPORT_SYMBOL(dev_change_proto_down_reason); |
---|
8054 | 8993 | |
---|
8055 | | - /* Query must always succeed. */ |
---|
8056 | | - WARN_ON(bpf_op(dev, &xdp) < 0 && cmd == XDP_QUERY_PROG); |
---|
| 8994 | +struct bpf_xdp_link { |
---|
| 8995 | + struct bpf_link link; |
---|
| 8996 | + struct net_device *dev; /* protected by rtnl_lock, no refcnt held */ |
---|
| 8997 | + int flags; |
---|
| 8998 | +}; |
---|
8057 | 8999 | |
---|
8058 | | - return xdp.prog_id; |
---|
| 9000 | +static enum bpf_xdp_mode dev_xdp_mode(struct net_device *dev, u32 flags) |
---|
| 9001 | +{ |
---|
| 9002 | + if (flags & XDP_FLAGS_HW_MODE) |
---|
| 9003 | + return XDP_MODE_HW; |
---|
| 9004 | + if (flags & XDP_FLAGS_DRV_MODE) |
---|
| 9005 | + return XDP_MODE_DRV; |
---|
| 9006 | + if (flags & XDP_FLAGS_SKB_MODE) |
---|
| 9007 | + return XDP_MODE_SKB; |
---|
| 9008 | + return dev->netdev_ops->ndo_bpf ? XDP_MODE_DRV : XDP_MODE_SKB; |
---|
8059 | 9009 | } |
---|
8060 | 9010 | |
---|
8061 | | -static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op, |
---|
8062 | | - struct netlink_ext_ack *extack, u32 flags, |
---|
8063 | | - struct bpf_prog *prog) |
---|
| 9011 | +static bpf_op_t dev_xdp_bpf_op(struct net_device *dev, enum bpf_xdp_mode mode) |
---|
| 9012 | +{ |
---|
| 9013 | + switch (mode) { |
---|
| 9014 | + case XDP_MODE_SKB: |
---|
| 9015 | + return generic_xdp_install; |
---|
| 9016 | + case XDP_MODE_DRV: |
---|
| 9017 | + case XDP_MODE_HW: |
---|
| 9018 | + return dev->netdev_ops->ndo_bpf; |
---|
| 9019 | + default: |
---|
| 9020 | + return NULL; |
---|
| 9021 | + }; |
---|
| 9022 | +} |
---|
| 9023 | + |
---|
| 9024 | +static struct bpf_xdp_link *dev_xdp_link(struct net_device *dev, |
---|
| 9025 | + enum bpf_xdp_mode mode) |
---|
| 9026 | +{ |
---|
| 9027 | + return dev->xdp_state[mode].link; |
---|
| 9028 | +} |
---|
| 9029 | + |
---|
| 9030 | +static struct bpf_prog *dev_xdp_prog(struct net_device *dev, |
---|
| 9031 | + enum bpf_xdp_mode mode) |
---|
| 9032 | +{ |
---|
| 9033 | + struct bpf_xdp_link *link = dev_xdp_link(dev, mode); |
---|
| 9034 | + |
---|
| 9035 | + if (link) |
---|
| 9036 | + return link->link.prog; |
---|
| 9037 | + return dev->xdp_state[mode].prog; |
---|
| 9038 | +} |
---|
| 9039 | + |
---|
| 9040 | +static u8 dev_xdp_prog_count(struct net_device *dev) |
---|
| 9041 | +{ |
---|
| 9042 | + u8 count = 0; |
---|
| 9043 | + int i; |
---|
| 9044 | + |
---|
| 9045 | + for (i = 0; i < __MAX_XDP_MODE; i++) |
---|
| 9046 | + if (dev->xdp_state[i].prog || dev->xdp_state[i].link) |
---|
| 9047 | + count++; |
---|
| 9048 | + return count; |
---|
| 9049 | +} |
---|
| 9050 | + |
---|
| 9051 | +u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode) |
---|
| 9052 | +{ |
---|
| 9053 | + struct bpf_prog *prog = dev_xdp_prog(dev, mode); |
---|
| 9054 | + |
---|
| 9055 | + return prog ? prog->aux->id : 0; |
---|
| 9056 | +} |
---|
| 9057 | + |
---|
| 9058 | +static void dev_xdp_set_link(struct net_device *dev, enum bpf_xdp_mode mode, |
---|
| 9059 | + struct bpf_xdp_link *link) |
---|
| 9060 | +{ |
---|
| 9061 | + dev->xdp_state[mode].link = link; |
---|
| 9062 | + dev->xdp_state[mode].prog = NULL; |
---|
| 9063 | +} |
---|
| 9064 | + |
---|
| 9065 | +static void dev_xdp_set_prog(struct net_device *dev, enum bpf_xdp_mode mode, |
---|
| 9066 | + struct bpf_prog *prog) |
---|
| 9067 | +{ |
---|
| 9068 | + dev->xdp_state[mode].link = NULL; |
---|
| 9069 | + dev->xdp_state[mode].prog = prog; |
---|
| 9070 | +} |
---|
| 9071 | + |
---|
| 9072 | +static int dev_xdp_install(struct net_device *dev, enum bpf_xdp_mode mode, |
---|
| 9073 | + bpf_op_t bpf_op, struct netlink_ext_ack *extack, |
---|
| 9074 | + u32 flags, struct bpf_prog *prog) |
---|
8064 | 9075 | { |
---|
8065 | 9076 | struct netdev_bpf xdp; |
---|
| 9077 | + int err; |
---|
8066 | 9078 | |
---|
8067 | 9079 | memset(&xdp, 0, sizeof(xdp)); |
---|
8068 | | - if (flags & XDP_FLAGS_HW_MODE) |
---|
8069 | | - xdp.command = XDP_SETUP_PROG_HW; |
---|
8070 | | - else |
---|
8071 | | - xdp.command = XDP_SETUP_PROG; |
---|
| 9080 | + xdp.command = mode == XDP_MODE_HW ? XDP_SETUP_PROG_HW : XDP_SETUP_PROG; |
---|
8072 | 9081 | xdp.extack = extack; |
---|
8073 | 9082 | xdp.flags = flags; |
---|
8074 | 9083 | xdp.prog = prog; |
---|
8075 | 9084 | |
---|
8076 | | - return bpf_op(dev, &xdp); |
---|
| 9085 | + /* Drivers assume refcnt is already incremented (i.e, prog pointer is |
---|
| 9086 | + * "moved" into driver), so they don't increment it on their own, but |
---|
| 9087 | + * they do decrement refcnt when program is detached or replaced. |
---|
| 9088 | + * Given net_device also owns link/prog, we need to bump refcnt here |
---|
| 9089 | + * to prevent drivers from underflowing it. |
---|
| 9090 | + */ |
---|
| 9091 | + if (prog) |
---|
| 9092 | + bpf_prog_inc(prog); |
---|
| 9093 | + err = bpf_op(dev, &xdp); |
---|
| 9094 | + if (err) { |
---|
| 9095 | + if (prog) |
---|
| 9096 | + bpf_prog_put(prog); |
---|
| 9097 | + return err; |
---|
| 9098 | + } |
---|
| 9099 | + |
---|
| 9100 | + if (mode != XDP_MODE_HW) |
---|
| 9101 | + bpf_prog_change_xdp(dev_xdp_prog(dev, mode), prog); |
---|
| 9102 | + |
---|
| 9103 | + return 0; |
---|
8077 | 9104 | } |
---|
8078 | 9105 | |
---|
8079 | 9106 | static void dev_xdp_uninstall(struct net_device *dev) |
---|
8080 | 9107 | { |
---|
8081 | | - struct netdev_bpf xdp; |
---|
8082 | | - bpf_op_t ndo_bpf; |
---|
| 9108 | + struct bpf_xdp_link *link; |
---|
| 9109 | + struct bpf_prog *prog; |
---|
| 9110 | + enum bpf_xdp_mode mode; |
---|
| 9111 | + bpf_op_t bpf_op; |
---|
8083 | 9112 | |
---|
8084 | | - /* Remove generic XDP */ |
---|
8085 | | - WARN_ON(dev_xdp_install(dev, generic_xdp_install, NULL, 0, NULL)); |
---|
| 9113 | + ASSERT_RTNL(); |
---|
8086 | 9114 | |
---|
8087 | | - /* Remove from the driver */ |
---|
8088 | | - ndo_bpf = dev->netdev_ops->ndo_bpf; |
---|
8089 | | - if (!ndo_bpf) |
---|
8090 | | - return; |
---|
| 9115 | + for (mode = XDP_MODE_SKB; mode < __MAX_XDP_MODE; mode++) { |
---|
| 9116 | + prog = dev_xdp_prog(dev, mode); |
---|
| 9117 | + if (!prog) |
---|
| 9118 | + continue; |
---|
8091 | 9119 | |
---|
8092 | | - memset(&xdp, 0, sizeof(xdp)); |
---|
8093 | | - xdp.command = XDP_QUERY_PROG; |
---|
8094 | | - WARN_ON(ndo_bpf(dev, &xdp)); |
---|
8095 | | - if (xdp.prog_id) |
---|
8096 | | - WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags, |
---|
8097 | | - NULL)); |
---|
| 9120 | + bpf_op = dev_xdp_bpf_op(dev, mode); |
---|
| 9121 | + if (!bpf_op) |
---|
| 9122 | + continue; |
---|
8098 | 9123 | |
---|
8099 | | - /* Remove HW offload */ |
---|
8100 | | - memset(&xdp, 0, sizeof(xdp)); |
---|
8101 | | - xdp.command = XDP_QUERY_PROG_HW; |
---|
8102 | | - if (!ndo_bpf(dev, &xdp) && xdp.prog_id) |
---|
8103 | | - WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags, |
---|
8104 | | - NULL)); |
---|
| 9124 | + WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL)); |
---|
| 9125 | + |
---|
| 9126 | + /* auto-detach link from net device */ |
---|
| 9127 | + link = dev_xdp_link(dev, mode); |
---|
| 9128 | + if (link) |
---|
| 9129 | + link->dev = NULL; |
---|
| 9130 | + else |
---|
| 9131 | + bpf_prog_put(prog); |
---|
| 9132 | + |
---|
| 9133 | + dev_xdp_set_link(dev, mode, NULL); |
---|
| 9134 | + } |
---|
| 9135 | +} |
---|
| 9136 | + |
---|
| 9137 | +static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack, |
---|
| 9138 | + struct bpf_xdp_link *link, struct bpf_prog *new_prog, |
---|
| 9139 | + struct bpf_prog *old_prog, u32 flags) |
---|
| 9140 | +{ |
---|
| 9141 | + unsigned int num_modes = hweight32(flags & XDP_FLAGS_MODES); |
---|
| 9142 | + struct bpf_prog *cur_prog; |
---|
| 9143 | + enum bpf_xdp_mode mode; |
---|
| 9144 | + bpf_op_t bpf_op; |
---|
| 9145 | + int err; |
---|
| 9146 | + |
---|
| 9147 | + ASSERT_RTNL(); |
---|
| 9148 | + |
---|
| 9149 | + /* either link or prog attachment, never both */ |
---|
| 9150 | + if (link && (new_prog || old_prog)) |
---|
| 9151 | + return -EINVAL; |
---|
| 9152 | + /* link supports only XDP mode flags */ |
---|
| 9153 | + if (link && (flags & ~XDP_FLAGS_MODES)) { |
---|
| 9154 | + NL_SET_ERR_MSG(extack, "Invalid XDP flags for BPF link attachment"); |
---|
| 9155 | + return -EINVAL; |
---|
| 9156 | + } |
---|
| 9157 | + /* just one XDP mode bit should be set, zero defaults to drv/skb mode */ |
---|
| 9158 | + if (num_modes > 1) { |
---|
| 9159 | + NL_SET_ERR_MSG(extack, "Only one XDP mode flag can be set"); |
---|
| 9160 | + return -EINVAL; |
---|
| 9161 | + } |
---|
| 9162 | + /* avoid ambiguity if offload + drv/skb mode progs are both loaded */ |
---|
| 9163 | + if (!num_modes && dev_xdp_prog_count(dev) > 1) { |
---|
| 9164 | + NL_SET_ERR_MSG(extack, |
---|
| 9165 | + "More than one program loaded, unset mode is ambiguous"); |
---|
| 9166 | + return -EINVAL; |
---|
| 9167 | + } |
---|
| 9168 | + /* old_prog != NULL implies XDP_FLAGS_REPLACE is set */ |
---|
| 9169 | + if (old_prog && !(flags & XDP_FLAGS_REPLACE)) { |
---|
| 9170 | + NL_SET_ERR_MSG(extack, "XDP_FLAGS_REPLACE is not specified"); |
---|
| 9171 | + return -EINVAL; |
---|
| 9172 | + } |
---|
| 9173 | + |
---|
| 9174 | + mode = dev_xdp_mode(dev, flags); |
---|
| 9175 | + /* can't replace attached link */ |
---|
| 9176 | + if (dev_xdp_link(dev, mode)) { |
---|
| 9177 | + NL_SET_ERR_MSG(extack, "Can't replace active BPF XDP link"); |
---|
| 9178 | + return -EBUSY; |
---|
| 9179 | + } |
---|
| 9180 | + |
---|
| 9181 | + cur_prog = dev_xdp_prog(dev, mode); |
---|
| 9182 | + /* can't replace attached prog with link */ |
---|
| 9183 | + if (link && cur_prog) { |
---|
| 9184 | + NL_SET_ERR_MSG(extack, "Can't replace active XDP program with BPF link"); |
---|
| 9185 | + return -EBUSY; |
---|
| 9186 | + } |
---|
| 9187 | + if ((flags & XDP_FLAGS_REPLACE) && cur_prog != old_prog) { |
---|
| 9188 | + NL_SET_ERR_MSG(extack, "Active program does not match expected"); |
---|
| 9189 | + return -EEXIST; |
---|
| 9190 | + } |
---|
| 9191 | + |
---|
| 9192 | + /* put effective new program into new_prog */ |
---|
| 9193 | + if (link) |
---|
| 9194 | + new_prog = link->link.prog; |
---|
| 9195 | + |
---|
| 9196 | + if (new_prog) { |
---|
| 9197 | + bool offload = mode == XDP_MODE_HW; |
---|
| 9198 | + enum bpf_xdp_mode other_mode = mode == XDP_MODE_SKB |
---|
| 9199 | + ? XDP_MODE_DRV : XDP_MODE_SKB; |
---|
| 9200 | + |
---|
| 9201 | + if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && cur_prog) { |
---|
| 9202 | + NL_SET_ERR_MSG(extack, "XDP program already attached"); |
---|
| 9203 | + return -EBUSY; |
---|
| 9204 | + } |
---|
| 9205 | + if (!offload && dev_xdp_prog(dev, other_mode)) { |
---|
| 9206 | + NL_SET_ERR_MSG(extack, "Native and generic XDP can't be active at the same time"); |
---|
| 9207 | + return -EEXIST; |
---|
| 9208 | + } |
---|
| 9209 | + if (!offload && bpf_prog_is_dev_bound(new_prog->aux)) { |
---|
| 9210 | + NL_SET_ERR_MSG(extack, "Using device-bound program without HW_MODE flag is not supported"); |
---|
| 9211 | + return -EINVAL; |
---|
| 9212 | + } |
---|
| 9213 | + if (new_prog->expected_attach_type == BPF_XDP_DEVMAP) { |
---|
| 9214 | + NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device"); |
---|
| 9215 | + return -EINVAL; |
---|
| 9216 | + } |
---|
| 9217 | + if (new_prog->expected_attach_type == BPF_XDP_CPUMAP) { |
---|
| 9218 | + NL_SET_ERR_MSG(extack, "BPF_XDP_CPUMAP programs can not be attached to a device"); |
---|
| 9219 | + return -EINVAL; |
---|
| 9220 | + } |
---|
| 9221 | + } |
---|
| 9222 | + |
---|
| 9223 | + /* don't call drivers if the effective program didn't change */ |
---|
| 9224 | + if (new_prog != cur_prog) { |
---|
| 9225 | + bpf_op = dev_xdp_bpf_op(dev, mode); |
---|
| 9226 | + if (!bpf_op) { |
---|
| 9227 | + NL_SET_ERR_MSG(extack, "Underlying driver does not support XDP in native mode"); |
---|
| 9228 | + return -EOPNOTSUPP; |
---|
| 9229 | + } |
---|
| 9230 | + |
---|
| 9231 | + err = dev_xdp_install(dev, mode, bpf_op, extack, flags, new_prog); |
---|
| 9232 | + if (err) |
---|
| 9233 | + return err; |
---|
| 9234 | + } |
---|
| 9235 | + |
---|
| 9236 | + if (link) |
---|
| 9237 | + dev_xdp_set_link(dev, mode, link); |
---|
| 9238 | + else |
---|
| 9239 | + dev_xdp_set_prog(dev, mode, new_prog); |
---|
| 9240 | + if (cur_prog) |
---|
| 9241 | + bpf_prog_put(cur_prog); |
---|
| 9242 | + |
---|
| 9243 | + return 0; |
---|
| 9244 | +} |
---|
| 9245 | + |
---|
| 9246 | +static int dev_xdp_attach_link(struct net_device *dev, |
---|
| 9247 | + struct netlink_ext_ack *extack, |
---|
| 9248 | + struct bpf_xdp_link *link) |
---|
| 9249 | +{ |
---|
| 9250 | + return dev_xdp_attach(dev, extack, link, NULL, NULL, link->flags); |
---|
| 9251 | +} |
---|
| 9252 | + |
---|
| 9253 | +static int dev_xdp_detach_link(struct net_device *dev, |
---|
| 9254 | + struct netlink_ext_ack *extack, |
---|
| 9255 | + struct bpf_xdp_link *link) |
---|
| 9256 | +{ |
---|
| 9257 | + enum bpf_xdp_mode mode; |
---|
| 9258 | + bpf_op_t bpf_op; |
---|
| 9259 | + |
---|
| 9260 | + ASSERT_RTNL(); |
---|
| 9261 | + |
---|
| 9262 | + mode = dev_xdp_mode(dev, link->flags); |
---|
| 9263 | + if (dev_xdp_link(dev, mode) != link) |
---|
| 9264 | + return -EINVAL; |
---|
| 9265 | + |
---|
| 9266 | + bpf_op = dev_xdp_bpf_op(dev, mode); |
---|
| 9267 | + WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL)); |
---|
| 9268 | + dev_xdp_set_link(dev, mode, NULL); |
---|
| 9269 | + return 0; |
---|
| 9270 | +} |
---|
| 9271 | + |
---|
| 9272 | +static void bpf_xdp_link_release(struct bpf_link *link) |
---|
| 9273 | +{ |
---|
| 9274 | + struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link); |
---|
| 9275 | + |
---|
| 9276 | + rtnl_lock(); |
---|
| 9277 | + |
---|
| 9278 | + /* if racing with net_device's tear down, xdp_link->dev might be |
---|
| 9279 | + * already NULL, in which case link was already auto-detached |
---|
| 9280 | + */ |
---|
| 9281 | + if (xdp_link->dev) { |
---|
| 9282 | + WARN_ON(dev_xdp_detach_link(xdp_link->dev, NULL, xdp_link)); |
---|
| 9283 | + xdp_link->dev = NULL; |
---|
| 9284 | + } |
---|
| 9285 | + |
---|
| 9286 | + rtnl_unlock(); |
---|
| 9287 | +} |
---|
| 9288 | + |
---|
| 9289 | +static int bpf_xdp_link_detach(struct bpf_link *link) |
---|
| 9290 | +{ |
---|
| 9291 | + bpf_xdp_link_release(link); |
---|
| 9292 | + return 0; |
---|
| 9293 | +} |
---|
| 9294 | + |
---|
| 9295 | +static void bpf_xdp_link_dealloc(struct bpf_link *link) |
---|
| 9296 | +{ |
---|
| 9297 | + struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link); |
---|
| 9298 | + |
---|
| 9299 | + kfree(xdp_link); |
---|
| 9300 | +} |
---|
| 9301 | + |
---|
| 9302 | +static void bpf_xdp_link_show_fdinfo(const struct bpf_link *link, |
---|
| 9303 | + struct seq_file *seq) |
---|
| 9304 | +{ |
---|
| 9305 | + struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link); |
---|
| 9306 | + u32 ifindex = 0; |
---|
| 9307 | + |
---|
| 9308 | + rtnl_lock(); |
---|
| 9309 | + if (xdp_link->dev) |
---|
| 9310 | + ifindex = xdp_link->dev->ifindex; |
---|
| 9311 | + rtnl_unlock(); |
---|
| 9312 | + |
---|
| 9313 | + seq_printf(seq, "ifindex:\t%u\n", ifindex); |
---|
| 9314 | +} |
---|
| 9315 | + |
---|
| 9316 | +static int bpf_xdp_link_fill_link_info(const struct bpf_link *link, |
---|
| 9317 | + struct bpf_link_info *info) |
---|
| 9318 | +{ |
---|
| 9319 | + struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link); |
---|
| 9320 | + u32 ifindex = 0; |
---|
| 9321 | + |
---|
| 9322 | + rtnl_lock(); |
---|
| 9323 | + if (xdp_link->dev) |
---|
| 9324 | + ifindex = xdp_link->dev->ifindex; |
---|
| 9325 | + rtnl_unlock(); |
---|
| 9326 | + |
---|
| 9327 | + info->xdp.ifindex = ifindex; |
---|
| 9328 | + return 0; |
---|
| 9329 | +} |
---|
| 9330 | + |
---|
| 9331 | +static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog, |
---|
| 9332 | + struct bpf_prog *old_prog) |
---|
| 9333 | +{ |
---|
| 9334 | + struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link); |
---|
| 9335 | + enum bpf_xdp_mode mode; |
---|
| 9336 | + bpf_op_t bpf_op; |
---|
| 9337 | + int err = 0; |
---|
| 9338 | + |
---|
| 9339 | + rtnl_lock(); |
---|
| 9340 | + |
---|
| 9341 | + /* link might have been auto-released already, so fail */ |
---|
| 9342 | + if (!xdp_link->dev) { |
---|
| 9343 | + err = -ENOLINK; |
---|
| 9344 | + goto out_unlock; |
---|
| 9345 | + } |
---|
| 9346 | + |
---|
| 9347 | + if (old_prog && link->prog != old_prog) { |
---|
| 9348 | + err = -EPERM; |
---|
| 9349 | + goto out_unlock; |
---|
| 9350 | + } |
---|
| 9351 | + old_prog = link->prog; |
---|
| 9352 | + if (old_prog->type != new_prog->type || |
---|
| 9353 | + old_prog->expected_attach_type != new_prog->expected_attach_type) { |
---|
| 9354 | + err = -EINVAL; |
---|
| 9355 | + goto out_unlock; |
---|
| 9356 | + } |
---|
| 9357 | + |
---|
| 9358 | + if (old_prog == new_prog) { |
---|
| 9359 | + /* no-op, don't disturb drivers */ |
---|
| 9360 | + bpf_prog_put(new_prog); |
---|
| 9361 | + goto out_unlock; |
---|
| 9362 | + } |
---|
| 9363 | + |
---|
| 9364 | + mode = dev_xdp_mode(xdp_link->dev, xdp_link->flags); |
---|
| 9365 | + bpf_op = dev_xdp_bpf_op(xdp_link->dev, mode); |
---|
| 9366 | + err = dev_xdp_install(xdp_link->dev, mode, bpf_op, NULL, |
---|
| 9367 | + xdp_link->flags, new_prog); |
---|
| 9368 | + if (err) |
---|
| 9369 | + goto out_unlock; |
---|
| 9370 | + |
---|
| 9371 | + old_prog = xchg(&link->prog, new_prog); |
---|
| 9372 | + bpf_prog_put(old_prog); |
---|
| 9373 | + |
---|
| 9374 | +out_unlock: |
---|
| 9375 | + rtnl_unlock(); |
---|
| 9376 | + return err; |
---|
| 9377 | +} |
---|
| 9378 | + |
---|
| 9379 | +static const struct bpf_link_ops bpf_xdp_link_lops = { |
---|
| 9380 | + .release = bpf_xdp_link_release, |
---|
| 9381 | + .dealloc = bpf_xdp_link_dealloc, |
---|
| 9382 | + .detach = bpf_xdp_link_detach, |
---|
| 9383 | + .show_fdinfo = bpf_xdp_link_show_fdinfo, |
---|
| 9384 | + .fill_link_info = bpf_xdp_link_fill_link_info, |
---|
| 9385 | + .update_prog = bpf_xdp_link_update, |
---|
| 9386 | +}; |
---|
| 9387 | + |
---|
| 9388 | +int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) |
---|
| 9389 | +{ |
---|
| 9390 | + struct net *net = current->nsproxy->net_ns; |
---|
| 9391 | + struct bpf_link_primer link_primer; |
---|
| 9392 | + struct bpf_xdp_link *link; |
---|
| 9393 | + struct net_device *dev; |
---|
| 9394 | + int err, fd; |
---|
| 9395 | + |
---|
| 9396 | + rtnl_lock(); |
---|
| 9397 | + dev = dev_get_by_index(net, attr->link_create.target_ifindex); |
---|
| 9398 | + if (!dev) { |
---|
| 9399 | + rtnl_unlock(); |
---|
| 9400 | + return -EINVAL; |
---|
| 9401 | + } |
---|
| 9402 | + |
---|
| 9403 | + link = kzalloc(sizeof(*link), GFP_USER); |
---|
| 9404 | + if (!link) { |
---|
| 9405 | + err = -ENOMEM; |
---|
| 9406 | + goto unlock; |
---|
| 9407 | + } |
---|
| 9408 | + |
---|
| 9409 | + bpf_link_init(&link->link, BPF_LINK_TYPE_XDP, &bpf_xdp_link_lops, prog); |
---|
| 9410 | + link->dev = dev; |
---|
| 9411 | + link->flags = attr->link_create.flags; |
---|
| 9412 | + |
---|
| 9413 | + err = bpf_link_prime(&link->link, &link_primer); |
---|
| 9414 | + if (err) { |
---|
| 9415 | + kfree(link); |
---|
| 9416 | + goto unlock; |
---|
| 9417 | + } |
---|
| 9418 | + |
---|
| 9419 | + err = dev_xdp_attach_link(dev, NULL, link); |
---|
| 9420 | + rtnl_unlock(); |
---|
| 9421 | + |
---|
| 9422 | + if (err) { |
---|
| 9423 | + link->dev = NULL; |
---|
| 9424 | + bpf_link_cleanup(&link_primer); |
---|
| 9425 | + goto out_put_dev; |
---|
| 9426 | + } |
---|
| 9427 | + |
---|
| 9428 | + fd = bpf_link_settle(&link_primer); |
---|
| 9429 | + /* link itself doesn't hold dev's refcnt to not complicate shutdown */ |
---|
| 9430 | + dev_put(dev); |
---|
| 9431 | + return fd; |
---|
| 9432 | + |
---|
| 9433 | +unlock: |
---|
| 9434 | + rtnl_unlock(); |
---|
| 9435 | + |
---|
| 9436 | +out_put_dev: |
---|
| 9437 | + dev_put(dev); |
---|
| 9438 | + return err; |
---|
8105 | 9439 | } |
---|
8106 | 9440 | |
---|
8107 | 9441 | /** |
---|
.. | .. |
---|
8109 | 9443 | * @dev: device |
---|
8110 | 9444 | * @extack: netlink extended ack |
---|
8111 | 9445 | * @fd: new program fd or negative value to clear |
---|
| 9446 | + * @expected_fd: old program fd that userspace expects to replace or clear |
---|
8112 | 9447 | * @flags: xdp-related flags |
---|
8113 | 9448 | * |
---|
8114 | 9449 | * Set or clear a bpf program for a device |
---|
8115 | 9450 | */ |
---|
8116 | 9451 | int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, |
---|
8117 | | - int fd, u32 flags) |
---|
| 9452 | + int fd, int expected_fd, u32 flags) |
---|
8118 | 9453 | { |
---|
8119 | | - const struct net_device_ops *ops = dev->netdev_ops; |
---|
8120 | | - enum bpf_netdev_command query; |
---|
8121 | | - struct bpf_prog *prog = NULL; |
---|
8122 | | - bpf_op_t bpf_op, bpf_chk; |
---|
| 9454 | + enum bpf_xdp_mode mode = dev_xdp_mode(dev, flags); |
---|
| 9455 | + struct bpf_prog *new_prog = NULL, *old_prog = NULL; |
---|
8123 | 9456 | int err; |
---|
8124 | 9457 | |
---|
8125 | 9458 | ASSERT_RTNL(); |
---|
8126 | 9459 | |
---|
8127 | | - query = flags & XDP_FLAGS_HW_MODE ? XDP_QUERY_PROG_HW : XDP_QUERY_PROG; |
---|
8128 | | - |
---|
8129 | | - bpf_op = bpf_chk = ops->ndo_bpf; |
---|
8130 | | - if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE))) |
---|
8131 | | - return -EOPNOTSUPP; |
---|
8132 | | - if (!bpf_op || (flags & XDP_FLAGS_SKB_MODE)) |
---|
8133 | | - bpf_op = generic_xdp_install; |
---|
8134 | | - if (bpf_op == bpf_chk) |
---|
8135 | | - bpf_chk = generic_xdp_install; |
---|
8136 | | - |
---|
8137 | 9460 | if (fd >= 0) { |
---|
8138 | | - if (__dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG) || |
---|
8139 | | - __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG_HW)) |
---|
8140 | | - return -EEXIST; |
---|
8141 | | - if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && |
---|
8142 | | - __dev_xdp_query(dev, bpf_op, query)) |
---|
8143 | | - return -EBUSY; |
---|
| 9461 | + new_prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP, |
---|
| 9462 | + mode != XDP_MODE_SKB); |
---|
| 9463 | + if (IS_ERR(new_prog)) |
---|
| 9464 | + return PTR_ERR(new_prog); |
---|
| 9465 | + } |
---|
8144 | 9466 | |
---|
8145 | | - prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP, |
---|
8146 | | - bpf_op == ops->ndo_bpf); |
---|
8147 | | - if (IS_ERR(prog)) |
---|
8148 | | - return PTR_ERR(prog); |
---|
8149 | | - |
---|
8150 | | - if (!(flags & XDP_FLAGS_HW_MODE) && |
---|
8151 | | - bpf_prog_is_dev_bound(prog->aux)) { |
---|
8152 | | - NL_SET_ERR_MSG(extack, "using device-bound program without HW_MODE flag is not supported"); |
---|
8153 | | - bpf_prog_put(prog); |
---|
8154 | | - return -EINVAL; |
---|
| 9467 | + if (expected_fd >= 0) { |
---|
| 9468 | + old_prog = bpf_prog_get_type_dev(expected_fd, BPF_PROG_TYPE_XDP, |
---|
| 9469 | + mode != XDP_MODE_SKB); |
---|
| 9470 | + if (IS_ERR(old_prog)) { |
---|
| 9471 | + err = PTR_ERR(old_prog); |
---|
| 9472 | + old_prog = NULL; |
---|
| 9473 | + goto err_out; |
---|
8155 | 9474 | } |
---|
8156 | 9475 | } |
---|
8157 | 9476 | |
---|
8158 | | - err = dev_xdp_install(dev, bpf_op, extack, flags, prog); |
---|
8159 | | - if (err < 0 && prog) |
---|
8160 | | - bpf_prog_put(prog); |
---|
| 9477 | + err = dev_xdp_attach(dev, extack, NULL, new_prog, old_prog, flags); |
---|
8161 | 9478 | |
---|
| 9479 | +err_out: |
---|
| 9480 | + if (err && new_prog) |
---|
| 9481 | + bpf_prog_put(new_prog); |
---|
| 9482 | + if (old_prog) |
---|
| 9483 | + bpf_prog_put(old_prog); |
---|
8162 | 9484 | return err; |
---|
8163 | 9485 | } |
---|
8164 | 9486 | |
---|
.. | .. |
---|
8190 | 9512 | { |
---|
8191 | 9513 | list_add_tail(&dev->todo_list, &net_todo_list); |
---|
8192 | 9514 | dev_net(dev)->dev_unreg_count++; |
---|
8193 | | -} |
---|
8194 | | - |
---|
8195 | | -static void rollback_registered_many(struct list_head *head) |
---|
8196 | | -{ |
---|
8197 | | - struct net_device *dev, *tmp; |
---|
8198 | | - LIST_HEAD(close_head); |
---|
8199 | | - |
---|
8200 | | - BUG_ON(dev_boot_phase); |
---|
8201 | | - ASSERT_RTNL(); |
---|
8202 | | - |
---|
8203 | | - list_for_each_entry_safe(dev, tmp, head, unreg_list) { |
---|
8204 | | - /* Some devices call without registering |
---|
8205 | | - * for initialization unwind. Remove those |
---|
8206 | | - * devices and proceed with the remaining. |
---|
8207 | | - */ |
---|
8208 | | - if (dev->reg_state == NETREG_UNINITIALIZED) { |
---|
8209 | | - pr_debug("unregister_netdevice: device %s/%p never was registered\n", |
---|
8210 | | - dev->name, dev); |
---|
8211 | | - |
---|
8212 | | - WARN_ON(1); |
---|
8213 | | - list_del(&dev->unreg_list); |
---|
8214 | | - continue; |
---|
8215 | | - } |
---|
8216 | | - dev->dismantle = true; |
---|
8217 | | - BUG_ON(dev->reg_state != NETREG_REGISTERED); |
---|
8218 | | - } |
---|
8219 | | - |
---|
8220 | | - /* If device is running, close it first. */ |
---|
8221 | | - list_for_each_entry(dev, head, unreg_list) |
---|
8222 | | - list_add_tail(&dev->close_list, &close_head); |
---|
8223 | | - dev_close_many(&close_head, true); |
---|
8224 | | - |
---|
8225 | | - list_for_each_entry(dev, head, unreg_list) { |
---|
8226 | | - /* And unlink it from device chain. */ |
---|
8227 | | - unlist_netdevice(dev); |
---|
8228 | | - |
---|
8229 | | - dev->reg_state = NETREG_UNREGISTERING; |
---|
8230 | | - } |
---|
8231 | | - flush_all_backlogs(); |
---|
8232 | | - |
---|
8233 | | - synchronize_net(); |
---|
8234 | | - |
---|
8235 | | - list_for_each_entry(dev, head, unreg_list) { |
---|
8236 | | - struct sk_buff *skb = NULL; |
---|
8237 | | - |
---|
8238 | | - /* Shutdown queueing discipline. */ |
---|
8239 | | - dev_shutdown(dev); |
---|
8240 | | - |
---|
8241 | | - dev_xdp_uninstall(dev); |
---|
8242 | | - |
---|
8243 | | - /* Notify protocols, that we are about to destroy |
---|
8244 | | - * this device. They should clean all the things. |
---|
8245 | | - */ |
---|
8246 | | - call_netdevice_notifiers(NETDEV_UNREGISTER, dev); |
---|
8247 | | - |
---|
8248 | | - if (!dev->rtnl_link_ops || |
---|
8249 | | - dev->rtnl_link_state == RTNL_LINK_INITIALIZED) |
---|
8250 | | - skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0, |
---|
8251 | | - GFP_KERNEL, NULL, 0); |
---|
8252 | | - |
---|
8253 | | - /* |
---|
8254 | | - * Flush the unicast and multicast chains |
---|
8255 | | - */ |
---|
8256 | | - dev_uc_flush(dev); |
---|
8257 | | - dev_mc_flush(dev); |
---|
8258 | | - |
---|
8259 | | - if (dev->netdev_ops->ndo_uninit) |
---|
8260 | | - dev->netdev_ops->ndo_uninit(dev); |
---|
8261 | | - |
---|
8262 | | - if (skb) |
---|
8263 | | - rtmsg_ifinfo_send(skb, dev, GFP_KERNEL); |
---|
8264 | | - |
---|
8265 | | - /* Notifier chain MUST detach us all upper devices. */ |
---|
8266 | | - WARN_ON(netdev_has_any_upper_dev(dev)); |
---|
8267 | | - WARN_ON(netdev_has_any_lower_dev(dev)); |
---|
8268 | | - |
---|
8269 | | - /* Remove entries from kobject tree */ |
---|
8270 | | - netdev_unregister_kobject(dev); |
---|
8271 | | -#ifdef CONFIG_XPS |
---|
8272 | | - /* Remove XPS queueing entries */ |
---|
8273 | | - netif_reset_xps_queues_gt(dev, 0); |
---|
8274 | | -#endif |
---|
8275 | | - } |
---|
8276 | | - |
---|
8277 | | - synchronize_net(); |
---|
8278 | | - |
---|
8279 | | - list_for_each_entry(dev, head, unreg_list) |
---|
8280 | | - dev_put(dev); |
---|
8281 | | -} |
---|
8282 | | - |
---|
8283 | | -static void rollback_registered(struct net_device *dev) |
---|
8284 | | -{ |
---|
8285 | | - LIST_HEAD(single); |
---|
8286 | | - |
---|
8287 | | - list_add(&dev->unreg_list, &single); |
---|
8288 | | - rollback_registered_many(&single); |
---|
8289 | | - list_del(&single); |
---|
8290 | 9515 | } |
---|
8291 | 9516 | |
---|
8292 | 9517 | static netdev_features_t netdev_sync_upper_features(struct net_device *lower, |
---|
.. | .. |
---|
8434 | 9659 | /* driver might be less strict about feature dependencies */ |
---|
8435 | 9660 | features = netdev_fix_features(dev, features); |
---|
8436 | 9661 | |
---|
8437 | | - /* some features can't be enabled if they're off an an upper device */ |
---|
| 9662 | + /* some features can't be enabled if they're off on an upper device */ |
---|
8438 | 9663 | netdev_for_each_upper_dev_rcu(dev, upper, iter) |
---|
8439 | 9664 | features = netdev_sync_upper_features(dev, upper, features); |
---|
8440 | 9665 | |
---|
.. | .. |
---|
8558 | 9783 | else |
---|
8559 | 9784 | netif_dormant_off(dev); |
---|
8560 | 9785 | |
---|
| 9786 | + if (rootdev->operstate == IF_OPER_TESTING) |
---|
| 9787 | + netif_testing_on(dev); |
---|
| 9788 | + else |
---|
| 9789 | + netif_testing_off(dev); |
---|
| 9790 | + |
---|
8561 | 9791 | if (netif_carrier_ok(rootdev)) |
---|
8562 | 9792 | netif_carrier_on(dev); |
---|
8563 | 9793 | else |
---|
.. | .. |
---|
8619 | 9849 | /* Initialize queue lock */ |
---|
8620 | 9850 | spin_lock_init(&queue->_xmit_lock); |
---|
8621 | 9851 | netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type); |
---|
8622 | | - netdev_queue_clear_owner(queue); |
---|
| 9852 | + queue->xmit_lock_owner = -1; |
---|
8623 | 9853 | netdev_queue_numa_node_write(queue, NUMA_NO_NODE); |
---|
8624 | 9854 | queue->dev = dev; |
---|
8625 | 9855 | #ifdef CONFIG_BQL |
---|
.. | .. |
---|
8698 | 9928 | BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); |
---|
8699 | 9929 | BUG_ON(!net); |
---|
8700 | 9930 | |
---|
| 9931 | + ret = ethtool_check_ops(dev->ethtool_ops); |
---|
| 9932 | + if (ret) |
---|
| 9933 | + return ret; |
---|
| 9934 | + |
---|
8701 | 9935 | spin_lock_init(&dev->addr_list_lock); |
---|
8702 | 9936 | netdev_set_addr_lockdep_class(dev); |
---|
8703 | 9937 | |
---|
8704 | 9938 | ret = dev_get_valid_name(net, dev, dev->name); |
---|
8705 | 9939 | if (ret < 0) |
---|
| 9940 | + goto out; |
---|
| 9941 | + |
---|
| 9942 | + ret = -ENOMEM; |
---|
| 9943 | + dev->name_node = netdev_name_node_head_alloc(dev); |
---|
| 9944 | + if (!dev->name_node) |
---|
8706 | 9945 | goto out; |
---|
8707 | 9946 | |
---|
8708 | 9947 | /* Init, if this function is available */ |
---|
.. | .. |
---|
8711 | 9950 | if (ret) { |
---|
8712 | 9951 | if (ret > 0) |
---|
8713 | 9952 | ret = -EIO; |
---|
8714 | | - goto out; |
---|
| 9953 | + goto err_free_name; |
---|
8715 | 9954 | } |
---|
8716 | 9955 | } |
---|
8717 | 9956 | |
---|
.. | .. |
---|
8733 | 9972 | /* Transfer changeable features to wanted_features and enable |
---|
8734 | 9973 | * software offloads (GSO and GRO). |
---|
8735 | 9974 | */ |
---|
8736 | | - dev->hw_features |= NETIF_F_SOFT_FEATURES; |
---|
| 9975 | + dev->hw_features |= (NETIF_F_SOFT_FEATURES | NETIF_F_SOFT_FEATURES_OFF); |
---|
8737 | 9976 | dev->features |= NETIF_F_SOFT_FEATURES; |
---|
8738 | 9977 | |
---|
8739 | 9978 | if (dev->netdev_ops->ndo_udp_tunnel_add) { |
---|
.. | .. |
---|
8811 | 10050 | ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); |
---|
8812 | 10051 | ret = notifier_to_errno(ret); |
---|
8813 | 10052 | if (ret) { |
---|
8814 | | - rollback_registered(dev); |
---|
8815 | | - rcu_barrier(); |
---|
8816 | | - |
---|
8817 | | - dev->reg_state = NETREG_UNREGISTERED; |
---|
8818 | | - /* We should put the kobject that hold in |
---|
8819 | | - * netdev_unregister_kobject(), otherwise |
---|
8820 | | - * the net device cannot be freed when |
---|
8821 | | - * driver calls free_netdev(), because the |
---|
8822 | | - * kobject is being hold. |
---|
8823 | | - */ |
---|
8824 | | - kobject_put(&dev->dev.kobj); |
---|
| 10053 | + /* Expect explicit free_netdev() on failure */ |
---|
| 10054 | + dev->needs_free_netdev = false; |
---|
| 10055 | + unregister_netdevice_queue(dev, NULL); |
---|
| 10056 | + goto out; |
---|
8825 | 10057 | } |
---|
8826 | 10058 | /* |
---|
8827 | 10059 | * Prevent userspace races by waiting until the network |
---|
.. | .. |
---|
8839 | 10071 | dev->netdev_ops->ndo_uninit(dev); |
---|
8840 | 10072 | if (dev->priv_destructor) |
---|
8841 | 10073 | dev->priv_destructor(dev); |
---|
| 10074 | +err_free_name: |
---|
| 10075 | + netdev_name_node_free(dev->name_node); |
---|
8842 | 10076 | goto out; |
---|
8843 | 10077 | } |
---|
8844 | 10078 | EXPORT_SYMBOL(register_netdevice); |
---|
.. | .. |
---|
8922 | 10156 | } |
---|
8923 | 10157 | EXPORT_SYMBOL(netdev_refcnt_read); |
---|
8924 | 10158 | |
---|
| 10159 | +#define WAIT_REFS_MIN_MSECS 1 |
---|
| 10160 | +#define WAIT_REFS_MAX_MSECS 250 |
---|
8925 | 10161 | /** |
---|
8926 | 10162 | * netdev_wait_allrefs - wait until all references are gone. |
---|
8927 | 10163 | * @dev: target net_device |
---|
.. | .. |
---|
8937 | 10173 | static void netdev_wait_allrefs(struct net_device *dev) |
---|
8938 | 10174 | { |
---|
8939 | 10175 | unsigned long rebroadcast_time, warning_time; |
---|
8940 | | - int refcnt; |
---|
| 10176 | + int wait = 0, refcnt; |
---|
8941 | 10177 | |
---|
8942 | 10178 | linkwatch_forget_dev(dev); |
---|
8943 | 10179 | |
---|
.. | .. |
---|
8971 | 10207 | rebroadcast_time = jiffies; |
---|
8972 | 10208 | } |
---|
8973 | 10209 | |
---|
8974 | | - msleep(250); |
---|
| 10210 | + if (!wait) { |
---|
| 10211 | + rcu_barrier(); |
---|
| 10212 | + wait = WAIT_REFS_MIN_MSECS; |
---|
| 10213 | + } else { |
---|
| 10214 | + msleep(wait); |
---|
| 10215 | + wait = min(wait << 1, WAIT_REFS_MAX_MSECS); |
---|
| 10216 | + } |
---|
8975 | 10217 | |
---|
8976 | 10218 | refcnt = netdev_refcnt_read(dev); |
---|
8977 | 10219 | |
---|
.. | .. |
---|
9010 | 10252 | void netdev_run_todo(void) |
---|
9011 | 10253 | { |
---|
9012 | 10254 | struct list_head list; |
---|
| 10255 | +#ifdef CONFIG_LOCKDEP |
---|
| 10256 | + struct list_head unlink_list; |
---|
| 10257 | + |
---|
| 10258 | + list_replace_init(&net_unlink_list, &unlink_list); |
---|
| 10259 | + |
---|
| 10260 | + while (!list_empty(&unlink_list)) { |
---|
| 10261 | + struct net_device *dev = list_first_entry(&unlink_list, |
---|
| 10262 | + struct net_device, |
---|
| 10263 | + unlink_list); |
---|
| 10264 | + list_del_init(&dev->unlink_list); |
---|
| 10265 | + dev->nested_level = dev->lower_level - 1; |
---|
| 10266 | + } |
---|
| 10267 | +#endif |
---|
9013 | 10268 | |
---|
9014 | 10269 | /* Snapshot list, allow later requests */ |
---|
9015 | 10270 | list_replace_init(&net_todo_list, &list); |
---|
.. | .. |
---|
9121 | 10376 | } |
---|
9122 | 10377 | EXPORT_SYMBOL(dev_get_stats); |
---|
9123 | 10378 | |
---|
| 10379 | +/** |
---|
| 10380 | + * dev_fetch_sw_netstats - get per-cpu network device statistics |
---|
| 10381 | + * @s: place to store stats |
---|
| 10382 | + * @netstats: per-cpu network stats to read from |
---|
| 10383 | + * |
---|
| 10384 | + * Read per-cpu network statistics and populate the related fields in @s. |
---|
| 10385 | + */ |
---|
| 10386 | +void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s, |
---|
| 10387 | + const struct pcpu_sw_netstats __percpu *netstats) |
---|
| 10388 | +{ |
---|
| 10389 | + int cpu; |
---|
| 10390 | + |
---|
| 10391 | + for_each_possible_cpu(cpu) { |
---|
| 10392 | + const struct pcpu_sw_netstats *stats; |
---|
| 10393 | + struct pcpu_sw_netstats tmp; |
---|
| 10394 | + unsigned int start; |
---|
| 10395 | + |
---|
| 10396 | + stats = per_cpu_ptr(netstats, cpu); |
---|
| 10397 | + do { |
---|
| 10398 | + start = u64_stats_fetch_begin_irq(&stats->syncp); |
---|
| 10399 | + tmp.rx_packets = stats->rx_packets; |
---|
| 10400 | + tmp.rx_bytes = stats->rx_bytes; |
---|
| 10401 | + tmp.tx_packets = stats->tx_packets; |
---|
| 10402 | + tmp.tx_bytes = stats->tx_bytes; |
---|
| 10403 | + } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); |
---|
| 10404 | + |
---|
| 10405 | + s->rx_packets += tmp.rx_packets; |
---|
| 10406 | + s->rx_bytes += tmp.rx_bytes; |
---|
| 10407 | + s->tx_packets += tmp.tx_packets; |
---|
| 10408 | + s->tx_bytes += tmp.tx_bytes; |
---|
| 10409 | + } |
---|
| 10410 | +} |
---|
| 10411 | +EXPORT_SYMBOL_GPL(dev_fetch_sw_netstats); |
---|
| 10412 | + |
---|
9124 | 10413 | struct netdev_queue *dev_ingress_queue_create(struct net_device *dev) |
---|
9125 | 10414 | { |
---|
9126 | 10415 | struct netdev_queue *queue = dev_ingress_queue(dev); |
---|
.. | .. |
---|
9222 | 10511 | dev->gso_max_segs = GSO_MAX_SEGS; |
---|
9223 | 10512 | dev->upper_level = 1; |
---|
9224 | 10513 | dev->lower_level = 1; |
---|
| 10514 | +#ifdef CONFIG_LOCKDEP |
---|
| 10515 | + dev->nested_level = 0; |
---|
| 10516 | + INIT_LIST_HEAD(&dev->unlink_list); |
---|
| 10517 | +#endif |
---|
9225 | 10518 | |
---|
9226 | 10519 | INIT_LIST_HEAD(&dev->napi_list); |
---|
9227 | 10520 | INIT_LIST_HEAD(&dev->unreg_list); |
---|
.. | .. |
---|
9231 | 10524 | INIT_LIST_HEAD(&dev->adj_list.lower); |
---|
9232 | 10525 | INIT_LIST_HEAD(&dev->ptype_all); |
---|
9233 | 10526 | INIT_LIST_HEAD(&dev->ptype_specific); |
---|
| 10527 | + INIT_LIST_HEAD(&dev->net_notifier_list); |
---|
9234 | 10528 | #ifdef CONFIG_NET_SCHED |
---|
9235 | 10529 | hash_init(dev->qdisc_hash); |
---|
9236 | 10530 | #endif |
---|
.. | .. |
---|
9288 | 10582 | struct napi_struct *p, *n; |
---|
9289 | 10583 | |
---|
9290 | 10584 | might_sleep(); |
---|
| 10585 | + |
---|
| 10586 | + /* When called immediately after register_netdevice() failed the unwind |
---|
| 10587 | + * handling may still be dismantling the device. Handle that case by |
---|
| 10588 | + * deferring the free. |
---|
| 10589 | + */ |
---|
| 10590 | + if (dev->reg_state == NETREG_UNREGISTERING) { |
---|
| 10591 | + ASSERT_RTNL(); |
---|
| 10592 | + dev->needs_free_netdev = true; |
---|
| 10593 | + return; |
---|
| 10594 | + } |
---|
| 10595 | + |
---|
9291 | 10596 | netif_free_tx_queues(dev); |
---|
9292 | 10597 | netif_free_rx_queues(dev); |
---|
9293 | 10598 | |
---|
.. | .. |
---|
9301 | 10606 | |
---|
9302 | 10607 | free_percpu(dev->pcpu_refcnt); |
---|
9303 | 10608 | dev->pcpu_refcnt = NULL; |
---|
| 10609 | + free_percpu(dev->xdp_bulkq); |
---|
| 10610 | + dev->xdp_bulkq = NULL; |
---|
9304 | 10611 | |
---|
9305 | 10612 | /* Compatibility with error handling in drivers */ |
---|
9306 | 10613 | if (dev->reg_state == NETREG_UNINITIALIZED) { |
---|
.. | .. |
---|
9352 | 10659 | if (head) { |
---|
9353 | 10660 | list_move_tail(&dev->unreg_list, head); |
---|
9354 | 10661 | } else { |
---|
9355 | | - rollback_registered(dev); |
---|
9356 | | - /* Finish processing unregister after unlock */ |
---|
9357 | | - net_set_todo(dev); |
---|
| 10662 | + LIST_HEAD(single); |
---|
| 10663 | + |
---|
| 10664 | + list_add(&dev->unreg_list, &single); |
---|
| 10665 | + unregister_netdevice_many(&single); |
---|
9358 | 10666 | } |
---|
9359 | 10667 | } |
---|
9360 | 10668 | EXPORT_SYMBOL(unregister_netdevice_queue); |
---|
.. | .. |
---|
9368 | 10676 | */ |
---|
9369 | 10677 | void unregister_netdevice_many(struct list_head *head) |
---|
9370 | 10678 | { |
---|
9371 | | - struct net_device *dev; |
---|
| 10679 | + struct net_device *dev, *tmp; |
---|
| 10680 | + LIST_HEAD(close_head); |
---|
9372 | 10681 | |
---|
9373 | | - if (!list_empty(head)) { |
---|
9374 | | - rollback_registered_many(head); |
---|
9375 | | - list_for_each_entry(dev, head, unreg_list) |
---|
9376 | | - net_set_todo(dev); |
---|
9377 | | - list_del(head); |
---|
| 10682 | + BUG_ON(dev_boot_phase); |
---|
| 10683 | + ASSERT_RTNL(); |
---|
| 10684 | + |
---|
| 10685 | + if (list_empty(head)) |
---|
| 10686 | + return; |
---|
| 10687 | + |
---|
| 10688 | + list_for_each_entry_safe(dev, tmp, head, unreg_list) { |
---|
| 10689 | + /* Some devices call without registering |
---|
| 10690 | + * for initialization unwind. Remove those |
---|
| 10691 | + * devices and proceed with the remaining. |
---|
| 10692 | + */ |
---|
| 10693 | + if (dev->reg_state == NETREG_UNINITIALIZED) { |
---|
| 10694 | + pr_debug("unregister_netdevice: device %s/%p never was registered\n", |
---|
| 10695 | + dev->name, dev); |
---|
| 10696 | + |
---|
| 10697 | + WARN_ON(1); |
---|
| 10698 | + list_del(&dev->unreg_list); |
---|
| 10699 | + continue; |
---|
| 10700 | + } |
---|
| 10701 | + dev->dismantle = true; |
---|
| 10702 | + BUG_ON(dev->reg_state != NETREG_REGISTERED); |
---|
9378 | 10703 | } |
---|
| 10704 | + |
---|
| 10705 | + /* If device is running, close it first. */ |
---|
| 10706 | + list_for_each_entry(dev, head, unreg_list) |
---|
| 10707 | + list_add_tail(&dev->close_list, &close_head); |
---|
| 10708 | + dev_close_many(&close_head, true); |
---|
| 10709 | + |
---|
| 10710 | + list_for_each_entry(dev, head, unreg_list) { |
---|
| 10711 | + /* And unlink it from device chain. */ |
---|
| 10712 | + unlist_netdevice(dev); |
---|
| 10713 | + |
---|
| 10714 | + dev->reg_state = NETREG_UNREGISTERING; |
---|
| 10715 | + } |
---|
| 10716 | + flush_all_backlogs(); |
---|
| 10717 | + |
---|
| 10718 | + synchronize_net(); |
---|
| 10719 | + |
---|
| 10720 | + list_for_each_entry(dev, head, unreg_list) { |
---|
| 10721 | + struct sk_buff *skb = NULL; |
---|
| 10722 | + |
---|
| 10723 | + /* Shutdown queueing discipline. */ |
---|
| 10724 | + dev_shutdown(dev); |
---|
| 10725 | + |
---|
| 10726 | + dev_xdp_uninstall(dev); |
---|
| 10727 | + |
---|
| 10728 | + /* Notify protocols, that we are about to destroy |
---|
| 10729 | + * this device. They should clean all the things. |
---|
| 10730 | + */ |
---|
| 10731 | + call_netdevice_notifiers(NETDEV_UNREGISTER, dev); |
---|
| 10732 | + |
---|
| 10733 | + if (!dev->rtnl_link_ops || |
---|
| 10734 | + dev->rtnl_link_state == RTNL_LINK_INITIALIZED) |
---|
| 10735 | + skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0, |
---|
| 10736 | + GFP_KERNEL, NULL, 0); |
---|
| 10737 | + |
---|
| 10738 | + /* |
---|
| 10739 | + * Flush the unicast and multicast chains |
---|
| 10740 | + */ |
---|
| 10741 | + dev_uc_flush(dev); |
---|
| 10742 | + dev_mc_flush(dev); |
---|
| 10743 | + |
---|
| 10744 | + netdev_name_node_alt_flush(dev); |
---|
| 10745 | + netdev_name_node_free(dev->name_node); |
---|
| 10746 | + |
---|
| 10747 | + if (dev->netdev_ops->ndo_uninit) |
---|
| 10748 | + dev->netdev_ops->ndo_uninit(dev); |
---|
| 10749 | + |
---|
| 10750 | + if (skb) |
---|
| 10751 | + rtmsg_ifinfo_send(skb, dev, GFP_KERNEL); |
---|
| 10752 | + |
---|
| 10753 | + /* Notifier chain MUST detach us all upper devices. */ |
---|
| 10754 | + WARN_ON(netdev_has_any_upper_dev(dev)); |
---|
| 10755 | + WARN_ON(netdev_has_any_lower_dev(dev)); |
---|
| 10756 | + |
---|
| 10757 | + /* Remove entries from kobject tree */ |
---|
| 10758 | + netdev_unregister_kobject(dev); |
---|
| 10759 | +#ifdef CONFIG_XPS |
---|
| 10760 | + /* Remove XPS queueing entries */ |
---|
| 10761 | + netif_reset_xps_queues_gt(dev, 0); |
---|
| 10762 | +#endif |
---|
| 10763 | + } |
---|
| 10764 | + |
---|
| 10765 | + synchronize_net(); |
---|
| 10766 | + |
---|
| 10767 | + list_for_each_entry(dev, head, unreg_list) { |
---|
| 10768 | + dev_put(dev); |
---|
| 10769 | + net_set_todo(dev); |
---|
| 10770 | + } |
---|
| 10771 | + |
---|
| 10772 | + list_del(head); |
---|
9379 | 10773 | } |
---|
9380 | 10774 | EXPORT_SYMBOL(unregister_netdevice_many); |
---|
9381 | 10775 | |
---|
.. | .. |
---|
9414 | 10808 | |
---|
9415 | 10809 | int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) |
---|
9416 | 10810 | { |
---|
| 10811 | + struct net *net_old = dev_net(dev); |
---|
9417 | 10812 | int err, new_nsid, new_ifindex; |
---|
9418 | 10813 | |
---|
9419 | 10814 | ASSERT_RTNL(); |
---|
.. | .. |
---|
9429 | 10824 | |
---|
9430 | 10825 | /* Get out if there is nothing todo */ |
---|
9431 | 10826 | err = 0; |
---|
9432 | | - if (net_eq(dev_net(dev), net)) |
---|
| 10827 | + if (net_eq(net_old, net)) |
---|
9433 | 10828 | goto out; |
---|
9434 | 10829 | |
---|
9435 | 10830 | /* Pick the destination device name, and ensure |
---|
.. | .. |
---|
9490 | 10885 | kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE); |
---|
9491 | 10886 | netdev_adjacent_del_links(dev); |
---|
9492 | 10887 | |
---|
| 10888 | + /* Move per-net netdevice notifiers that are following the netdevice */ |
---|
| 10889 | + move_netdevice_notifiers_dev_net(dev, net); |
---|
| 10890 | + |
---|
9493 | 10891 | /* Actually switch the network namespace */ |
---|
9494 | 10892 | dev_net_set(dev, net); |
---|
9495 | 10893 | dev->ifindex = new_ifindex; |
---|
.. | .. |
---|
9500 | 10898 | |
---|
9501 | 10899 | /* Fixup kobjects */ |
---|
9502 | 10900 | err = device_rename(&dev->dev, dev->name); |
---|
| 10901 | + WARN_ON(err); |
---|
| 10902 | + |
---|
| 10903 | + /* Adapt owner in case owning user namespace of target network |
---|
| 10904 | + * namespace is different from the original one. |
---|
| 10905 | + */ |
---|
| 10906 | + err = netdev_change_owner(dev, net_old, net); |
---|
9503 | 10907 | WARN_ON(err); |
---|
9504 | 10908 | |
---|
9505 | 10909 | /* Add the device back in the hashes */ |
---|
.. | .. |
---|
9566 | 10970 | |
---|
9567 | 10971 | raise_softirq_irqoff(NET_TX_SOFTIRQ); |
---|
9568 | 10972 | local_irq_enable(); |
---|
9569 | | - preempt_check_resched_rt(); |
---|
9570 | 10973 | |
---|
9571 | 10974 | #ifdef CONFIG_RPS |
---|
9572 | 10975 | remsd = oldsd->rps_ipi_list; |
---|
.. | .. |
---|
9580 | 10983 | netif_rx_ni(skb); |
---|
9581 | 10984 | input_queue_head_incr(oldsd); |
---|
9582 | 10985 | } |
---|
9583 | | - while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { |
---|
| 10986 | + while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) { |
---|
9584 | 10987 | netif_rx_ni(skb); |
---|
9585 | 10988 | input_queue_head_incr(oldsd); |
---|
9586 | | - } |
---|
9587 | | - while ((skb = __skb_dequeue(&oldsd->tofree_queue))) { |
---|
9588 | | - kfree_skb(skb); |
---|
9589 | 10989 | } |
---|
9590 | 10990 | |
---|
9591 | 10991 | return 0; |
---|
.. | .. |
---|
9636 | 11036 | static int __net_init netdev_init(struct net *net) |
---|
9637 | 11037 | { |
---|
9638 | 11038 | BUILD_BUG_ON(GRO_HASH_BUCKETS > |
---|
9639 | | - 8 * FIELD_SIZEOF(struct napi_struct, gro_bitmask)); |
---|
| 11039 | + 8 * sizeof_field(struct napi_struct, gro_bitmask)); |
---|
9640 | 11040 | |
---|
9641 | 11041 | if (net != &init_net) |
---|
9642 | 11042 | INIT_LIST_HEAD(&net->dev_base_head); |
---|
.. | .. |
---|
9648 | 11048 | net->dev_index_head = netdev_create_hash(); |
---|
9649 | 11049 | if (net->dev_index_head == NULL) |
---|
9650 | 11050 | goto err_idx; |
---|
| 11051 | + |
---|
| 11052 | + RAW_INIT_NOTIFIER_HEAD(&net->netdev_chain); |
---|
9651 | 11053 | |
---|
9652 | 11054 | return 0; |
---|
9653 | 11055 | |
---|
.. | .. |
---|
9770 | 11172 | continue; |
---|
9771 | 11173 | |
---|
9772 | 11174 | /* Leave virtual devices for the generic cleanup */ |
---|
9773 | | - if (dev->rtnl_link_ops) |
---|
| 11175 | + if (dev->rtnl_link_ops && !dev->rtnl_link_ops->netns_refund) |
---|
9774 | 11176 | continue; |
---|
9775 | 11177 | |
---|
9776 | 11178 | /* Push remaining network devices to init_net */ |
---|
.. | .. |
---|
9897 | 11299 | |
---|
9898 | 11300 | INIT_WORK(flush, flush_backlog); |
---|
9899 | 11301 | |
---|
9900 | | - skb_queue_head_init_raw(&sd->input_pkt_queue); |
---|
9901 | | - skb_queue_head_init_raw(&sd->process_queue); |
---|
9902 | | - skb_queue_head_init_raw(&sd->tofree_queue); |
---|
| 11302 | + skb_queue_head_init(&sd->input_pkt_queue); |
---|
| 11303 | + skb_queue_head_init(&sd->process_queue); |
---|
9903 | 11304 | #ifdef CONFIG_XFRM_OFFLOAD |
---|
9904 | 11305 | skb_queue_head_init(&sd->xfrm_backlog); |
---|
9905 | 11306 | #endif |
---|