| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * NET3 Protocol independent device support routines. |
|---|
| 3 | | - * |
|---|
| 4 | | - * This program is free software; you can redistribute it and/or |
|---|
| 5 | | - * modify it under the terms of the GNU General Public License |
|---|
| 6 | | - * as published by the Free Software Foundation; either version |
|---|
| 7 | | - * 2 of the License, or (at your option) any later version. |
|---|
| 8 | 4 | * |
|---|
| 9 | 5 | * Derived from the non IP parts of dev.c 1.0.19 |
|---|
| 10 | 6 | * Authors: Ross Biro |
|---|
| .. | .. |
|---|
| 102 | 98 | #include <net/busy_poll.h> |
|---|
| 103 | 99 | #include <linux/rtnetlink.h> |
|---|
| 104 | 100 | #include <linux/stat.h> |
|---|
| 101 | +#include <net/dsa.h> |
|---|
| 105 | 102 | #include <net/dst.h> |
|---|
| 106 | 103 | #include <net/dst_metadata.h> |
|---|
| 107 | 104 | #include <net/pkt_sched.h> |
|---|
| .. | .. |
|---|
| 132 | 129 | #include <trace/events/napi.h> |
|---|
| 133 | 130 | #include <trace/events/net.h> |
|---|
| 134 | 131 | #include <trace/events/skb.h> |
|---|
| 135 | | -#include <linux/pci.h> |
|---|
| 136 | 132 | #include <linux/inetdevice.h> |
|---|
| 137 | 133 | #include <linux/cpu_rmap.h> |
|---|
| 138 | 134 | #include <linux/static_key.h> |
|---|
| .. | .. |
|---|
| 146 | 142 | #include <linux/sctp.h> |
|---|
| 147 | 143 | #include <net/udp_tunnel.h> |
|---|
| 148 | 144 | #include <linux/net_namespace.h> |
|---|
| 145 | +#include <linux/indirect_call_wrapper.h> |
|---|
| 146 | +#include <net/devlink.h> |
|---|
| 147 | +#include <linux/pm_runtime.h> |
|---|
| 148 | +#include <linux/prandom.h> |
|---|
| 149 | +#include <trace/hooks/net.h> |
|---|
| 149 | 150 | |
|---|
| 150 | 151 | #include "net-sysfs.h" |
|---|
| 151 | 152 | |
|---|
| 152 | 153 | #define MAX_GRO_SKBS 8 |
|---|
| 153 | | -#define MAX_NEST_DEV 8 |
|---|
| 154 | 154 | |
|---|
| 155 | 155 | /* This should be increased if a protocol with a bigger head is added. */ |
|---|
| 156 | 156 | #define GRO_MAX_HEAD (MAX_HEADER + 128) |
|---|
| .. | .. |
|---|
| 164 | 164 | static int netif_rx_internal(struct sk_buff *skb); |
|---|
| 165 | 165 | static int call_netdevice_notifiers_info(unsigned long val, |
|---|
| 166 | 166 | struct netdev_notifier_info *info); |
|---|
| 167 | +static int call_netdevice_notifiers_extack(unsigned long val, |
|---|
| 168 | + struct net_device *dev, |
|---|
| 169 | + struct netlink_ext_ack *extack); |
|---|
| 167 | 170 | static struct napi_struct *napi_by_id(unsigned int napi_id); |
|---|
| 168 | 171 | |
|---|
| 169 | 172 | /* |
|---|
| .. | .. |
|---|
| 219 | 222 | static inline void rps_lock(struct softnet_data *sd) |
|---|
| 220 | 223 | { |
|---|
| 221 | 224 | #ifdef CONFIG_RPS |
|---|
| 222 | | - raw_spin_lock(&sd->input_pkt_queue.raw_lock); |
|---|
| 225 | + spin_lock(&sd->input_pkt_queue.lock); |
|---|
| 223 | 226 | #endif |
|---|
| 224 | 227 | } |
|---|
| 225 | 228 | |
|---|
| 226 | 229 | static inline void rps_unlock(struct softnet_data *sd) |
|---|
| 227 | 230 | { |
|---|
| 228 | 231 | #ifdef CONFIG_RPS |
|---|
| 229 | | - raw_spin_unlock(&sd->input_pkt_queue.raw_lock); |
|---|
| 232 | + spin_unlock(&sd->input_pkt_queue.lock); |
|---|
| 230 | 233 | #endif |
|---|
| 234 | +} |
|---|
| 235 | + |
|---|
| 236 | +static struct netdev_name_node *netdev_name_node_alloc(struct net_device *dev, |
|---|
| 237 | + const char *name) |
|---|
| 238 | +{ |
|---|
| 239 | + struct netdev_name_node *name_node; |
|---|
| 240 | + |
|---|
| 241 | + name_node = kmalloc(sizeof(*name_node), GFP_KERNEL); |
|---|
| 242 | + if (!name_node) |
|---|
| 243 | + return NULL; |
|---|
| 244 | + INIT_HLIST_NODE(&name_node->hlist); |
|---|
| 245 | + name_node->dev = dev; |
|---|
| 246 | + name_node->name = name; |
|---|
| 247 | + return name_node; |
|---|
| 248 | +} |
|---|
| 249 | + |
|---|
| 250 | +static struct netdev_name_node * |
|---|
| 251 | +netdev_name_node_head_alloc(struct net_device *dev) |
|---|
| 252 | +{ |
|---|
| 253 | + struct netdev_name_node *name_node; |
|---|
| 254 | + |
|---|
| 255 | + name_node = netdev_name_node_alloc(dev, dev->name); |
|---|
| 256 | + if (!name_node) |
|---|
| 257 | + return NULL; |
|---|
| 258 | + INIT_LIST_HEAD(&name_node->list); |
|---|
| 259 | + return name_node; |
|---|
| 260 | +} |
|---|
| 261 | + |
|---|
| 262 | +static void netdev_name_node_free(struct netdev_name_node *name_node) |
|---|
| 263 | +{ |
|---|
| 264 | + kfree(name_node); |
|---|
| 265 | +} |
|---|
| 266 | + |
|---|
| 267 | +static void netdev_name_node_add(struct net *net, |
|---|
| 268 | + struct netdev_name_node *name_node) |
|---|
| 269 | +{ |
|---|
| 270 | + hlist_add_head_rcu(&name_node->hlist, |
|---|
| 271 | + dev_name_hash(net, name_node->name)); |
|---|
| 272 | +} |
|---|
| 273 | + |
|---|
| 274 | +static void netdev_name_node_del(struct netdev_name_node *name_node) |
|---|
| 275 | +{ |
|---|
| 276 | + hlist_del_rcu(&name_node->hlist); |
|---|
| 277 | +} |
|---|
| 278 | + |
|---|
| 279 | +static struct netdev_name_node *netdev_name_node_lookup(struct net *net, |
|---|
| 280 | + const char *name) |
|---|
| 281 | +{ |
|---|
| 282 | + struct hlist_head *head = dev_name_hash(net, name); |
|---|
| 283 | + struct netdev_name_node *name_node; |
|---|
| 284 | + |
|---|
| 285 | + hlist_for_each_entry(name_node, head, hlist) |
|---|
| 286 | + if (!strcmp(name_node->name, name)) |
|---|
| 287 | + return name_node; |
|---|
| 288 | + return NULL; |
|---|
| 289 | +} |
|---|
| 290 | + |
|---|
| 291 | +static struct netdev_name_node *netdev_name_node_lookup_rcu(struct net *net, |
|---|
| 292 | + const char *name) |
|---|
| 293 | +{ |
|---|
| 294 | + struct hlist_head *head = dev_name_hash(net, name); |
|---|
| 295 | + struct netdev_name_node *name_node; |
|---|
| 296 | + |
|---|
| 297 | + hlist_for_each_entry_rcu(name_node, head, hlist) |
|---|
| 298 | + if (!strcmp(name_node->name, name)) |
|---|
| 299 | + return name_node; |
|---|
| 300 | + return NULL; |
|---|
| 301 | +} |
|---|
| 302 | + |
|---|
| 303 | +int netdev_name_node_alt_create(struct net_device *dev, const char *name) |
|---|
| 304 | +{ |
|---|
| 305 | + struct netdev_name_node *name_node; |
|---|
| 306 | + struct net *net = dev_net(dev); |
|---|
| 307 | + |
|---|
| 308 | + name_node = netdev_name_node_lookup(net, name); |
|---|
| 309 | + if (name_node) |
|---|
| 310 | + return -EEXIST; |
|---|
| 311 | + name_node = netdev_name_node_alloc(dev, name); |
|---|
| 312 | + if (!name_node) |
|---|
| 313 | + return -ENOMEM; |
|---|
| 314 | + netdev_name_node_add(net, name_node); |
|---|
| 315 | + /* The node that holds dev->name acts as a head of per-device list. */ |
|---|
| 316 | + list_add_tail(&name_node->list, &dev->name_node->list); |
|---|
| 317 | + |
|---|
| 318 | + return 0; |
|---|
| 319 | +} |
|---|
| 320 | +EXPORT_SYMBOL(netdev_name_node_alt_create); |
|---|
| 321 | + |
|---|
| 322 | +static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node) |
|---|
| 323 | +{ |
|---|
| 324 | + list_del(&name_node->list); |
|---|
| 325 | + netdev_name_node_del(name_node); |
|---|
| 326 | + kfree(name_node->name); |
|---|
| 327 | + netdev_name_node_free(name_node); |
|---|
| 328 | +} |
|---|
| 329 | + |
|---|
| 330 | +int netdev_name_node_alt_destroy(struct net_device *dev, const char *name) |
|---|
| 331 | +{ |
|---|
| 332 | + struct netdev_name_node *name_node; |
|---|
| 333 | + struct net *net = dev_net(dev); |
|---|
| 334 | + |
|---|
| 335 | + name_node = netdev_name_node_lookup(net, name); |
|---|
| 336 | + if (!name_node) |
|---|
| 337 | + return -ENOENT; |
|---|
| 338 | + /* lookup might have found our primary name or a name belonging |
|---|
| 339 | + * to another device. |
|---|
| 340 | + */ |
|---|
| 341 | + if (name_node == dev->name_node || name_node->dev != dev) |
|---|
| 342 | + return -EINVAL; |
|---|
| 343 | + |
|---|
| 344 | + __netdev_name_node_alt_destroy(name_node); |
|---|
| 345 | + |
|---|
| 346 | + return 0; |
|---|
| 347 | +} |
|---|
| 348 | +EXPORT_SYMBOL(netdev_name_node_alt_destroy); |
|---|
| 349 | + |
|---|
| 350 | +static void netdev_name_node_alt_flush(struct net_device *dev) |
|---|
| 351 | +{ |
|---|
| 352 | + struct netdev_name_node *name_node, *tmp; |
|---|
| 353 | + |
|---|
| 354 | + list_for_each_entry_safe(name_node, tmp, &dev->name_node->list, list) |
|---|
| 355 | + __netdev_name_node_alt_destroy(name_node); |
|---|
| 231 | 356 | } |
|---|
| 232 | 357 | |
|---|
| 233 | 358 | /* Device list insertion */ |
|---|
| .. | .. |
|---|
| 239 | 364 | |
|---|
| 240 | 365 | write_lock_bh(&dev_base_lock); |
|---|
| 241 | 366 | list_add_tail_rcu(&dev->dev_list, &net->dev_base_head); |
|---|
| 242 | | - hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name)); |
|---|
| 367 | + netdev_name_node_add(net, dev->name_node); |
|---|
| 243 | 368 | hlist_add_head_rcu(&dev->index_hlist, |
|---|
| 244 | 369 | dev_index_hash(net, dev->ifindex)); |
|---|
| 245 | 370 | write_unlock_bh(&dev_base_lock); |
|---|
| .. | .. |
|---|
| 257 | 382 | /* Unlink dev from the device chain */ |
|---|
| 258 | 383 | write_lock_bh(&dev_base_lock); |
|---|
| 259 | 384 | list_del_rcu(&dev->dev_list); |
|---|
| 260 | | - hlist_del_rcu(&dev->name_hlist); |
|---|
| 385 | + netdev_name_node_del(dev->name_node); |
|---|
| 261 | 386 | hlist_del_rcu(&dev->index_hlist); |
|---|
| 262 | 387 | write_unlock_bh(&dev_base_lock); |
|---|
| 263 | 388 | |
|---|
| .. | .. |
|---|
| 355 | 480 | unsigned short dev_type) |
|---|
| 356 | 481 | { |
|---|
| 357 | 482 | } |
|---|
| 483 | + |
|---|
| 358 | 484 | static inline void netdev_set_addr_lockdep_class(struct net_device *dev) |
|---|
| 359 | 485 | { |
|---|
| 360 | 486 | } |
|---|
| .. | .. |
|---|
| 385 | 511 | |
|---|
| 386 | 512 | static inline struct list_head *ptype_head(const struct packet_type *pt) |
|---|
| 387 | 513 | { |
|---|
| 514 | + struct list_head vendor_pt = { .next = NULL, }; |
|---|
| 515 | + |
|---|
| 516 | + trace_android_vh_ptype_head(pt, &vendor_pt); |
|---|
| 517 | + if (vendor_pt.next) |
|---|
| 518 | + return vendor_pt.next; |
|---|
| 519 | + |
|---|
| 388 | 520 | if (pt->type == htons(ETH_P_ALL)) |
|---|
| 389 | 521 | return pt->dev ? &pt->dev->ptype_all : &ptype_all; |
|---|
| 390 | 522 | else |
|---|
| .. | .. |
|---|
| 735 | 867 | |
|---|
| 736 | 868 | struct net_device *__dev_get_by_name(struct net *net, const char *name) |
|---|
| 737 | 869 | { |
|---|
| 738 | | - struct net_device *dev; |
|---|
| 739 | | - struct hlist_head *head = dev_name_hash(net, name); |
|---|
| 870 | + struct netdev_name_node *node_name; |
|---|
| 740 | 871 | |
|---|
| 741 | | - hlist_for_each_entry(dev, head, name_hlist) |
|---|
| 742 | | - if (!strncmp(dev->name, name, IFNAMSIZ)) |
|---|
| 743 | | - return dev; |
|---|
| 744 | | - |
|---|
| 745 | | - return NULL; |
|---|
| 872 | + node_name = netdev_name_node_lookup(net, name); |
|---|
| 873 | + return node_name ? node_name->dev : NULL; |
|---|
| 746 | 874 | } |
|---|
| 747 | 875 | EXPORT_SYMBOL(__dev_get_by_name); |
|---|
| 748 | 876 | |
|---|
| .. | .. |
|---|
| 760 | 888 | |
|---|
| 761 | 889 | struct net_device *dev_get_by_name_rcu(struct net *net, const char *name) |
|---|
| 762 | 890 | { |
|---|
| 763 | | - struct net_device *dev; |
|---|
| 764 | | - struct hlist_head *head = dev_name_hash(net, name); |
|---|
| 891 | + struct netdev_name_node *node_name; |
|---|
| 765 | 892 | |
|---|
| 766 | | - hlist_for_each_entry_rcu(dev, head, name_hlist) |
|---|
| 767 | | - if (!strncmp(dev->name, name, IFNAMSIZ)) |
|---|
| 768 | | - return dev; |
|---|
| 769 | | - |
|---|
| 770 | | - return NULL; |
|---|
| 893 | + node_name = netdev_name_node_lookup_rcu(net, name); |
|---|
| 894 | + return node_name ? node_name->dev : NULL; |
|---|
| 771 | 895 | } |
|---|
| 772 | 896 | EXPORT_SYMBOL(dev_get_by_name_rcu); |
|---|
| 773 | 897 | |
|---|
| .. | .. |
|---|
| 1015 | 1139 | * @name: name string |
|---|
| 1016 | 1140 | * |
|---|
| 1017 | 1141 | * Network device names need to be valid file names to |
|---|
| 1018 | | - * to allow sysfs to work. We also disallow any kind of |
|---|
| 1142 | + * allow sysfs to work. We also disallow any kind of |
|---|
| 1019 | 1143 | * whitespace. |
|---|
| 1020 | 1144 | */ |
|---|
| 1021 | 1145 | bool dev_valid_name(const char *name) |
|---|
| .. | .. |
|---|
| 1078 | 1202 | return -ENOMEM; |
|---|
| 1079 | 1203 | |
|---|
| 1080 | 1204 | for_each_netdev(net, d) { |
|---|
| 1205 | + struct netdev_name_node *name_node; |
|---|
| 1206 | + list_for_each_entry(name_node, &d->name_node->list, list) { |
|---|
| 1207 | + if (!sscanf(name_node->name, name, &i)) |
|---|
| 1208 | + continue; |
|---|
| 1209 | + if (i < 0 || i >= max_netdevices) |
|---|
| 1210 | + continue; |
|---|
| 1211 | + |
|---|
| 1212 | + /* avoid cases where sscanf is not exact inverse of printf */ |
|---|
| 1213 | + snprintf(buf, IFNAMSIZ, name, i); |
|---|
| 1214 | + if (!strncmp(buf, name_node->name, IFNAMSIZ)) |
|---|
| 1215 | + set_bit(i, inuse); |
|---|
| 1216 | + } |
|---|
| 1081 | 1217 | if (!sscanf(d->name, name, &i)) |
|---|
| 1082 | 1218 | continue; |
|---|
| 1083 | 1219 | if (i < 0 || i >= max_netdevices) |
|---|
| .. | .. |
|---|
| 1138 | 1274 | } |
|---|
| 1139 | 1275 | EXPORT_SYMBOL(dev_alloc_name); |
|---|
| 1140 | 1276 | |
|---|
| 1141 | | -int dev_get_valid_name(struct net *net, struct net_device *dev, |
|---|
| 1142 | | - const char *name) |
|---|
| 1277 | +static int dev_get_valid_name(struct net *net, struct net_device *dev, |
|---|
| 1278 | + const char *name) |
|---|
| 1143 | 1279 | { |
|---|
| 1144 | 1280 | BUG_ON(!net); |
|---|
| 1145 | 1281 | |
|---|
| .. | .. |
|---|
| 1155 | 1291 | |
|---|
| 1156 | 1292 | return 0; |
|---|
| 1157 | 1293 | } |
|---|
| 1158 | | -EXPORT_SYMBOL(dev_get_valid_name); |
|---|
| 1159 | 1294 | |
|---|
| 1160 | 1295 | /** |
|---|
| 1161 | 1296 | * dev_change_name - change name of a device |
|---|
| .. | .. |
|---|
| 1229 | 1364 | netdev_adjacent_rename_links(dev, oldname); |
|---|
| 1230 | 1365 | |
|---|
| 1231 | 1366 | write_lock_bh(&dev_base_lock); |
|---|
| 1232 | | - hlist_del_rcu(&dev->name_hlist); |
|---|
| 1367 | + netdev_name_node_del(dev->name_node); |
|---|
| 1233 | 1368 | write_unlock_bh(&dev_base_lock); |
|---|
| 1234 | 1369 | |
|---|
| 1235 | 1370 | synchronize_rcu(); |
|---|
| 1236 | 1371 | |
|---|
| 1237 | 1372 | write_lock_bh(&dev_base_lock); |
|---|
| 1238 | | - hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name)); |
|---|
| 1373 | + netdev_name_node_add(net, dev->name_node); |
|---|
| 1239 | 1374 | write_unlock_bh(&dev_base_lock); |
|---|
| 1240 | 1375 | |
|---|
| 1241 | 1376 | ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev); |
|---|
| .. | .. |
|---|
| 1285 | 1420 | } |
|---|
| 1286 | 1421 | |
|---|
| 1287 | 1422 | mutex_lock(&ifalias_mutex); |
|---|
| 1288 | | - rcu_swap_protected(dev->ifalias, new_alias, |
|---|
| 1289 | | - mutex_is_locked(&ifalias_mutex)); |
|---|
| 1423 | + new_alias = rcu_replace_pointer(dev->ifalias, new_alias, |
|---|
| 1424 | + mutex_is_locked(&ifalias_mutex)); |
|---|
| 1290 | 1425 | mutex_unlock(&ifalias_mutex); |
|---|
| 1291 | 1426 | |
|---|
| 1292 | 1427 | if (new_alias) |
|---|
| .. | .. |
|---|
| 1372 | 1507 | } |
|---|
| 1373 | 1508 | EXPORT_SYMBOL(netdev_notify_peers); |
|---|
| 1374 | 1509 | |
|---|
| 1375 | | -static int __dev_open(struct net_device *dev) |
|---|
| 1510 | +static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack) |
|---|
| 1376 | 1511 | { |
|---|
| 1377 | 1512 | const struct net_device_ops *ops = dev->netdev_ops; |
|---|
| 1378 | 1513 | int ret; |
|---|
| 1379 | 1514 | |
|---|
| 1380 | 1515 | ASSERT_RTNL(); |
|---|
| 1381 | 1516 | |
|---|
| 1382 | | - if (!netif_device_present(dev)) |
|---|
| 1383 | | - return -ENODEV; |
|---|
| 1517 | + if (!netif_device_present(dev)) { |
|---|
| 1518 | + /* may be detached because parent is runtime-suspended */ |
|---|
| 1519 | + if (dev->dev.parent) |
|---|
| 1520 | + pm_runtime_resume(dev->dev.parent); |
|---|
| 1521 | + if (!netif_device_present(dev)) |
|---|
| 1522 | + return -ENODEV; |
|---|
| 1523 | + } |
|---|
| 1384 | 1524 | |
|---|
| 1385 | 1525 | /* Block netpoll from trying to do any rx path servicing. |
|---|
| 1386 | 1526 | * If we don't do this there is a chance ndo_poll_controller |
|---|
| .. | .. |
|---|
| 1388 | 1528 | */ |
|---|
| 1389 | 1529 | netpoll_poll_disable(dev); |
|---|
| 1390 | 1530 | |
|---|
| 1391 | | - ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev); |
|---|
| 1531 | + ret = call_netdevice_notifiers_extack(NETDEV_PRE_UP, dev, extack); |
|---|
| 1392 | 1532 | ret = notifier_to_errno(ret); |
|---|
| 1393 | 1533 | if (ret) |
|---|
| 1394 | 1534 | return ret; |
|---|
| .. | .. |
|---|
| 1417 | 1557 | |
|---|
| 1418 | 1558 | /** |
|---|
| 1419 | 1559 | * dev_open - prepare an interface for use. |
|---|
| 1420 | | - * @dev: device to open |
|---|
| 1560 | + * @dev: device to open |
|---|
| 1561 | + * @extack: netlink extended ack |
|---|
| 1421 | 1562 | * |
|---|
| 1422 | 1563 | * Takes a device from down to up state. The device's private open |
|---|
| 1423 | 1564 | * function is invoked and then the multicast lists are loaded. Finally |
|---|
| .. | .. |
|---|
| 1427 | 1568 | * Calling this function on an active interface is a nop. On a failure |
|---|
| 1428 | 1569 | * a negative errno code is returned. |
|---|
| 1429 | 1570 | */ |
|---|
| 1430 | | -int dev_open(struct net_device *dev) |
|---|
| 1571 | +int dev_open(struct net_device *dev, struct netlink_ext_ack *extack) |
|---|
| 1431 | 1572 | { |
|---|
| 1432 | 1573 | int ret; |
|---|
| 1433 | 1574 | |
|---|
| 1434 | 1575 | if (dev->flags & IFF_UP) |
|---|
| 1435 | 1576 | return 0; |
|---|
| 1436 | 1577 | |
|---|
| 1437 | | - ret = __dev_open(dev); |
|---|
| 1578 | + ret = __dev_open(dev, extack); |
|---|
| 1438 | 1579 | if (ret < 0) |
|---|
| 1439 | 1580 | return ret; |
|---|
| 1440 | 1581 | |
|---|
| .. | .. |
|---|
| 1596 | 1737 | N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN) |
|---|
| 1597 | 1738 | N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO) |
|---|
| 1598 | 1739 | N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO) |
|---|
| 1740 | + N(PRE_CHANGEADDR) |
|---|
| 1599 | 1741 | } |
|---|
| 1600 | 1742 | #undef N |
|---|
| 1601 | 1743 | return "UNKNOWN_NETDEV_EVENT"; |
|---|
| .. | .. |
|---|
| 1610 | 1752 | }; |
|---|
| 1611 | 1753 | |
|---|
| 1612 | 1754 | return nb->notifier_call(nb, val, &info); |
|---|
| 1755 | +} |
|---|
| 1756 | + |
|---|
| 1757 | +static int call_netdevice_register_notifiers(struct notifier_block *nb, |
|---|
| 1758 | + struct net_device *dev) |
|---|
| 1759 | +{ |
|---|
| 1760 | + int err; |
|---|
| 1761 | + |
|---|
| 1762 | + err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev); |
|---|
| 1763 | + err = notifier_to_errno(err); |
|---|
| 1764 | + if (err) |
|---|
| 1765 | + return err; |
|---|
| 1766 | + |
|---|
| 1767 | + if (!(dev->flags & IFF_UP)) |
|---|
| 1768 | + return 0; |
|---|
| 1769 | + |
|---|
| 1770 | + call_netdevice_notifier(nb, NETDEV_UP, dev); |
|---|
| 1771 | + return 0; |
|---|
| 1772 | +} |
|---|
| 1773 | + |
|---|
| 1774 | +static void call_netdevice_unregister_notifiers(struct notifier_block *nb, |
|---|
| 1775 | + struct net_device *dev) |
|---|
| 1776 | +{ |
|---|
| 1777 | + if (dev->flags & IFF_UP) { |
|---|
| 1778 | + call_netdevice_notifier(nb, NETDEV_GOING_DOWN, |
|---|
| 1779 | + dev); |
|---|
| 1780 | + call_netdevice_notifier(nb, NETDEV_DOWN, dev); |
|---|
| 1781 | + } |
|---|
| 1782 | + call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev); |
|---|
| 1783 | +} |
|---|
| 1784 | + |
|---|
| 1785 | +static int call_netdevice_register_net_notifiers(struct notifier_block *nb, |
|---|
| 1786 | + struct net *net) |
|---|
| 1787 | +{ |
|---|
| 1788 | + struct net_device *dev; |
|---|
| 1789 | + int err; |
|---|
| 1790 | + |
|---|
| 1791 | + for_each_netdev(net, dev) { |
|---|
| 1792 | + err = call_netdevice_register_notifiers(nb, dev); |
|---|
| 1793 | + if (err) |
|---|
| 1794 | + goto rollback; |
|---|
| 1795 | + } |
|---|
| 1796 | + return 0; |
|---|
| 1797 | + |
|---|
| 1798 | +rollback: |
|---|
| 1799 | + for_each_netdev_continue_reverse(net, dev) |
|---|
| 1800 | + call_netdevice_unregister_notifiers(nb, dev); |
|---|
| 1801 | + return err; |
|---|
| 1802 | +} |
|---|
| 1803 | + |
|---|
| 1804 | +static void call_netdevice_unregister_net_notifiers(struct notifier_block *nb, |
|---|
| 1805 | + struct net *net) |
|---|
| 1806 | +{ |
|---|
| 1807 | + struct net_device *dev; |
|---|
| 1808 | + |
|---|
| 1809 | + for_each_netdev(net, dev) |
|---|
| 1810 | + call_netdevice_unregister_notifiers(nb, dev); |
|---|
| 1613 | 1811 | } |
|---|
| 1614 | 1812 | |
|---|
| 1615 | 1813 | static int dev_boot_phase = 1; |
|---|
| .. | .. |
|---|
| 1630 | 1828 | |
|---|
| 1631 | 1829 | int register_netdevice_notifier(struct notifier_block *nb) |
|---|
| 1632 | 1830 | { |
|---|
| 1633 | | - struct net_device *dev; |
|---|
| 1634 | | - struct net_device *last; |
|---|
| 1635 | 1831 | struct net *net; |
|---|
| 1636 | 1832 | int err; |
|---|
| 1637 | 1833 | |
|---|
| .. | .. |
|---|
| 1644 | 1840 | if (dev_boot_phase) |
|---|
| 1645 | 1841 | goto unlock; |
|---|
| 1646 | 1842 | for_each_net(net) { |
|---|
| 1647 | | - for_each_netdev(net, dev) { |
|---|
| 1648 | | - err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev); |
|---|
| 1649 | | - err = notifier_to_errno(err); |
|---|
| 1650 | | - if (err) |
|---|
| 1651 | | - goto rollback; |
|---|
| 1652 | | - |
|---|
| 1653 | | - if (!(dev->flags & IFF_UP)) |
|---|
| 1654 | | - continue; |
|---|
| 1655 | | - |
|---|
| 1656 | | - call_netdevice_notifier(nb, NETDEV_UP, dev); |
|---|
| 1657 | | - } |
|---|
| 1843 | + err = call_netdevice_register_net_notifiers(nb, net); |
|---|
| 1844 | + if (err) |
|---|
| 1845 | + goto rollback; |
|---|
| 1658 | 1846 | } |
|---|
| 1659 | 1847 | |
|---|
| 1660 | 1848 | unlock: |
|---|
| .. | .. |
|---|
| 1663 | 1851 | return err; |
|---|
| 1664 | 1852 | |
|---|
| 1665 | 1853 | rollback: |
|---|
| 1666 | | - last = dev; |
|---|
| 1667 | | - for_each_net(net) { |
|---|
| 1668 | | - for_each_netdev(net, dev) { |
|---|
| 1669 | | - if (dev == last) |
|---|
| 1670 | | - goto outroll; |
|---|
| 1854 | + for_each_net_continue_reverse(net) |
|---|
| 1855 | + call_netdevice_unregister_net_notifiers(nb, net); |
|---|
| 1671 | 1856 | |
|---|
| 1672 | | - if (dev->flags & IFF_UP) { |
|---|
| 1673 | | - call_netdevice_notifier(nb, NETDEV_GOING_DOWN, |
|---|
| 1674 | | - dev); |
|---|
| 1675 | | - call_netdevice_notifier(nb, NETDEV_DOWN, dev); |
|---|
| 1676 | | - } |
|---|
| 1677 | | - call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev); |
|---|
| 1678 | | - } |
|---|
| 1679 | | - } |
|---|
| 1680 | | - |
|---|
| 1681 | | -outroll: |
|---|
| 1682 | 1857 | raw_notifier_chain_unregister(&netdev_chain, nb); |
|---|
| 1683 | 1858 | goto unlock; |
|---|
| 1684 | 1859 | } |
|---|
| .. | .. |
|---|
| 1700 | 1875 | |
|---|
| 1701 | 1876 | int unregister_netdevice_notifier(struct notifier_block *nb) |
|---|
| 1702 | 1877 | { |
|---|
| 1703 | | - struct net_device *dev; |
|---|
| 1704 | 1878 | struct net *net; |
|---|
| 1705 | 1879 | int err; |
|---|
| 1706 | 1880 | |
|---|
| .. | .. |
|---|
| 1711 | 1885 | if (err) |
|---|
| 1712 | 1886 | goto unlock; |
|---|
| 1713 | 1887 | |
|---|
| 1714 | | - for_each_net(net) { |
|---|
| 1715 | | - for_each_netdev(net, dev) { |
|---|
| 1716 | | - if (dev->flags & IFF_UP) { |
|---|
| 1717 | | - call_netdevice_notifier(nb, NETDEV_GOING_DOWN, |
|---|
| 1718 | | - dev); |
|---|
| 1719 | | - call_netdevice_notifier(nb, NETDEV_DOWN, dev); |
|---|
| 1720 | | - } |
|---|
| 1721 | | - call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev); |
|---|
| 1722 | | - } |
|---|
| 1723 | | - } |
|---|
| 1888 | + for_each_net(net) |
|---|
| 1889 | + call_netdevice_unregister_net_notifiers(nb, net); |
|---|
| 1890 | + |
|---|
| 1724 | 1891 | unlock: |
|---|
| 1725 | 1892 | rtnl_unlock(); |
|---|
| 1726 | 1893 | up_write(&pernet_ops_rwsem); |
|---|
| 1727 | 1894 | return err; |
|---|
| 1728 | 1895 | } |
|---|
| 1729 | 1896 | EXPORT_SYMBOL(unregister_netdevice_notifier); |
|---|
| 1897 | + |
|---|
| 1898 | +static int __register_netdevice_notifier_net(struct net *net, |
|---|
| 1899 | + struct notifier_block *nb, |
|---|
| 1900 | + bool ignore_call_fail) |
|---|
| 1901 | +{ |
|---|
| 1902 | + int err; |
|---|
| 1903 | + |
|---|
| 1904 | + err = raw_notifier_chain_register(&net->netdev_chain, nb); |
|---|
| 1905 | + if (err) |
|---|
| 1906 | + return err; |
|---|
| 1907 | + if (dev_boot_phase) |
|---|
| 1908 | + return 0; |
|---|
| 1909 | + |
|---|
| 1910 | + err = call_netdevice_register_net_notifiers(nb, net); |
|---|
| 1911 | + if (err && !ignore_call_fail) |
|---|
| 1912 | + goto chain_unregister; |
|---|
| 1913 | + |
|---|
| 1914 | + return 0; |
|---|
| 1915 | + |
|---|
| 1916 | +chain_unregister: |
|---|
| 1917 | + raw_notifier_chain_unregister(&net->netdev_chain, nb); |
|---|
| 1918 | + return err; |
|---|
| 1919 | +} |
|---|
| 1920 | + |
|---|
| 1921 | +static int __unregister_netdevice_notifier_net(struct net *net, |
|---|
| 1922 | + struct notifier_block *nb) |
|---|
| 1923 | +{ |
|---|
| 1924 | + int err; |
|---|
| 1925 | + |
|---|
| 1926 | + err = raw_notifier_chain_unregister(&net->netdev_chain, nb); |
|---|
| 1927 | + if (err) |
|---|
| 1928 | + return err; |
|---|
| 1929 | + |
|---|
| 1930 | + call_netdevice_unregister_net_notifiers(nb, net); |
|---|
| 1931 | + return 0; |
|---|
| 1932 | +} |
|---|
| 1933 | + |
|---|
| 1934 | +/** |
|---|
| 1935 | + * register_netdevice_notifier_net - register a per-netns network notifier block |
|---|
| 1936 | + * @net: network namespace |
|---|
| 1937 | + * @nb: notifier |
|---|
| 1938 | + * |
|---|
| 1939 | + * Register a notifier to be called when network device events occur. |
|---|
| 1940 | + * The notifier passed is linked into the kernel structures and must |
|---|
| 1941 | + * not be reused until it has been unregistered. A negative errno code |
|---|
| 1942 | + * is returned on a failure. |
|---|
| 1943 | + * |
|---|
| 1944 | + * When registered all registration and up events are replayed |
|---|
| 1945 | + * to the new notifier to allow device to have a race free |
|---|
| 1946 | + * view of the network device list. |
|---|
| 1947 | + */ |
|---|
| 1948 | + |
|---|
| 1949 | +int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb) |
|---|
| 1950 | +{ |
|---|
| 1951 | + int err; |
|---|
| 1952 | + |
|---|
| 1953 | + rtnl_lock(); |
|---|
| 1954 | + err = __register_netdevice_notifier_net(net, nb, false); |
|---|
| 1955 | + rtnl_unlock(); |
|---|
| 1956 | + return err; |
|---|
| 1957 | +} |
|---|
| 1958 | +EXPORT_SYMBOL(register_netdevice_notifier_net); |
|---|
| 1959 | + |
|---|
| 1960 | +/** |
|---|
| 1961 | + * unregister_netdevice_notifier_net - unregister a per-netns |
|---|
| 1962 | + * network notifier block |
|---|
| 1963 | + * @net: network namespace |
|---|
| 1964 | + * @nb: notifier |
|---|
| 1965 | + * |
|---|
| 1966 | + * Unregister a notifier previously registered by |
|---|
| 1967 | + * register_netdevice_notifier(). The notifier is unlinked into the |
|---|
| 1968 | + * kernel structures and may then be reused. A negative errno code |
|---|
| 1969 | + * is returned on a failure. |
|---|
| 1970 | + * |
|---|
| 1971 | + * After unregistering unregister and down device events are synthesized |
|---|
| 1972 | + * for all devices on the device list to the removed notifier to remove |
|---|
| 1973 | + * the need for special case cleanup code. |
|---|
| 1974 | + */ |
|---|
| 1975 | + |
|---|
| 1976 | +int unregister_netdevice_notifier_net(struct net *net, |
|---|
| 1977 | + struct notifier_block *nb) |
|---|
| 1978 | +{ |
|---|
| 1979 | + int err; |
|---|
| 1980 | + |
|---|
| 1981 | + rtnl_lock(); |
|---|
| 1982 | + err = __unregister_netdevice_notifier_net(net, nb); |
|---|
| 1983 | + rtnl_unlock(); |
|---|
| 1984 | + return err; |
|---|
| 1985 | +} |
|---|
| 1986 | +EXPORT_SYMBOL(unregister_netdevice_notifier_net); |
|---|
| 1987 | + |
|---|
| 1988 | +int register_netdevice_notifier_dev_net(struct net_device *dev, |
|---|
| 1989 | + struct notifier_block *nb, |
|---|
| 1990 | + struct netdev_net_notifier *nn) |
|---|
| 1991 | +{ |
|---|
| 1992 | + int err; |
|---|
| 1993 | + |
|---|
| 1994 | + rtnl_lock(); |
|---|
| 1995 | + err = __register_netdevice_notifier_net(dev_net(dev), nb, false); |
|---|
| 1996 | + if (!err) { |
|---|
| 1997 | + nn->nb = nb; |
|---|
| 1998 | + list_add(&nn->list, &dev->net_notifier_list); |
|---|
| 1999 | + } |
|---|
| 2000 | + rtnl_unlock(); |
|---|
| 2001 | + return err; |
|---|
| 2002 | +} |
|---|
| 2003 | +EXPORT_SYMBOL(register_netdevice_notifier_dev_net); |
|---|
| 2004 | + |
|---|
| 2005 | +int unregister_netdevice_notifier_dev_net(struct net_device *dev, |
|---|
| 2006 | + struct notifier_block *nb, |
|---|
| 2007 | + struct netdev_net_notifier *nn) |
|---|
| 2008 | +{ |
|---|
| 2009 | + int err; |
|---|
| 2010 | + |
|---|
| 2011 | + rtnl_lock(); |
|---|
| 2012 | + list_del(&nn->list); |
|---|
| 2013 | + err = __unregister_netdevice_notifier_net(dev_net(dev), nb); |
|---|
| 2014 | + rtnl_unlock(); |
|---|
| 2015 | + return err; |
|---|
| 2016 | +} |
|---|
| 2017 | +EXPORT_SYMBOL(unregister_netdevice_notifier_dev_net); |
|---|
| 2018 | + |
|---|
| 2019 | +static void move_netdevice_notifiers_dev_net(struct net_device *dev, |
|---|
| 2020 | + struct net *net) |
|---|
| 2021 | +{ |
|---|
| 2022 | + struct netdev_net_notifier *nn; |
|---|
| 2023 | + |
|---|
| 2024 | + list_for_each_entry(nn, &dev->net_notifier_list, list) { |
|---|
| 2025 | + __unregister_netdevice_notifier_net(dev_net(dev), nn->nb); |
|---|
| 2026 | + __register_netdevice_notifier_net(net, nn->nb, true); |
|---|
| 2027 | + } |
|---|
| 2028 | +} |
|---|
| 1730 | 2029 | |
|---|
| 1731 | 2030 | /** |
|---|
| 1732 | 2031 | * call_netdevice_notifiers_info - call all network notifier blocks |
|---|
| .. | .. |
|---|
| 1740 | 2039 | static int call_netdevice_notifiers_info(unsigned long val, |
|---|
| 1741 | 2040 | struct netdev_notifier_info *info) |
|---|
| 1742 | 2041 | { |
|---|
| 2042 | + struct net *net = dev_net(info->dev); |
|---|
| 2043 | + int ret; |
|---|
| 2044 | + |
|---|
| 1743 | 2045 | ASSERT_RTNL(); |
|---|
| 2046 | + |
|---|
| 2047 | + /* Run per-netns notifier block chain first, then run the global one. |
|---|
| 2048 | + * Hopefully, one day, the global one is going to be removed after |
|---|
| 2049 | + * all notifier block registrators get converted to be per-netns. |
|---|
| 2050 | + */ |
|---|
| 2051 | + ret = raw_notifier_call_chain(&net->netdev_chain, val, info); |
|---|
| 2052 | + if (ret & NOTIFY_STOP_MASK) |
|---|
| 2053 | + return ret; |
|---|
| 1744 | 2054 | return raw_notifier_call_chain(&netdev_chain, val, info); |
|---|
| 2055 | +} |
|---|
| 2056 | + |
|---|
| 2057 | +static int call_netdevice_notifiers_extack(unsigned long val, |
|---|
| 2058 | + struct net_device *dev, |
|---|
| 2059 | + struct netlink_ext_ack *extack) |
|---|
| 2060 | +{ |
|---|
| 2061 | + struct netdev_notifier_info info = { |
|---|
| 2062 | + .dev = dev, |
|---|
| 2063 | + .extack = extack, |
|---|
| 2064 | + }; |
|---|
| 2065 | + |
|---|
| 2066 | + return call_netdevice_notifiers_info(val, &info); |
|---|
| 1745 | 2067 | } |
|---|
| 1746 | 2068 | |
|---|
| 1747 | 2069 | /** |
|---|
| .. | .. |
|---|
| 1755 | 2077 | |
|---|
| 1756 | 2078 | int call_netdevice_notifiers(unsigned long val, struct net_device *dev) |
|---|
| 1757 | 2079 | { |
|---|
| 1758 | | - struct netdev_notifier_info info = { |
|---|
| 1759 | | - .dev = dev, |
|---|
| 1760 | | - }; |
|---|
| 1761 | | - |
|---|
| 1762 | | - return call_netdevice_notifiers_info(val, &info); |
|---|
| 2080 | + return call_netdevice_notifiers_extack(val, dev, NULL); |
|---|
| 1763 | 2081 | } |
|---|
| 1764 | 2082 | EXPORT_SYMBOL(call_netdevice_notifiers); |
|---|
| 1765 | 2083 | |
|---|
| .. | .. |
|---|
| 1987 | 2305 | return false; |
|---|
| 1988 | 2306 | } |
|---|
| 1989 | 2307 | |
|---|
| 2308 | +/** |
|---|
| 2309 | + * dev_nit_active - return true if any network interface taps are in use |
|---|
| 2310 | + * |
|---|
| 2311 | + * @dev: network device to check for the presence of taps |
|---|
| 2312 | + */ |
|---|
| 2313 | +bool dev_nit_active(struct net_device *dev) |
|---|
| 2314 | +{ |
|---|
| 2315 | + return !list_empty(&ptype_all) || !list_empty(&dev->ptype_all); |
|---|
| 2316 | +} |
|---|
| 2317 | +EXPORT_SYMBOL_GPL(dev_nit_active); |
|---|
| 2318 | + |
|---|
| 1990 | 2319 | /* |
|---|
| 1991 | 2320 | * Support routine. Sends outgoing frames to any network |
|---|
| 1992 | 2321 | * taps currently in use. |
|---|
| .. | .. |
|---|
| 2002 | 2331 | rcu_read_lock(); |
|---|
| 2003 | 2332 | again: |
|---|
| 2004 | 2333 | list_for_each_entry_rcu(ptype, ptype_list, list) { |
|---|
| 2334 | + if (ptype->ignore_outgoing) |
|---|
| 2335 | + continue; |
|---|
| 2336 | + |
|---|
| 2005 | 2337 | /* Never send packets back to the socket |
|---|
| 2006 | 2338 | * they originated from - MvS (miquels@drinkel.ow.org) |
|---|
| 2007 | 2339 | */ |
|---|
| .. | .. |
|---|
| 2723 | 3055 | sd->output_queue_tailp = &q->next_sched; |
|---|
| 2724 | 3056 | raise_softirq_irqoff(NET_TX_SOFTIRQ); |
|---|
| 2725 | 3057 | local_irq_restore(flags); |
|---|
| 2726 | | - preempt_check_resched_rt(); |
|---|
| 2727 | 3058 | } |
|---|
| 2728 | 3059 | |
|---|
| 2729 | 3060 | void __netif_schedule(struct Qdisc *q) |
|---|
| .. | .. |
|---|
| 2745 | 3076 | void netif_schedule_queue(struct netdev_queue *txq) |
|---|
| 2746 | 3077 | { |
|---|
| 2747 | 3078 | rcu_read_lock(); |
|---|
| 2748 | | - if (!(txq->state & QUEUE_STATE_ANY_XOFF)) { |
|---|
| 3079 | + if (!netif_xmit_stopped(txq)) { |
|---|
| 2749 | 3080 | struct Qdisc *q = rcu_dereference(txq->qdisc); |
|---|
| 2750 | 3081 | |
|---|
| 2751 | 3082 | __netif_schedule(q); |
|---|
| .. | .. |
|---|
| 2786 | 3117 | __this_cpu_write(softnet_data.completion_queue, skb); |
|---|
| 2787 | 3118 | raise_softirq_irqoff(NET_TX_SOFTIRQ); |
|---|
| 2788 | 3119 | local_irq_restore(flags); |
|---|
| 2789 | | - preempt_check_resched_rt(); |
|---|
| 2790 | 3120 | } |
|---|
| 2791 | 3121 | EXPORT_SYMBOL(__dev_kfree_skb_irq); |
|---|
| 2792 | 3122 | |
|---|
| .. | .. |
|---|
| 2883 | 3213 | else |
|---|
| 2884 | 3214 | name = netdev_name(dev); |
|---|
| 2885 | 3215 | } |
|---|
| 2886 | | - WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d " |
|---|
| 2887 | | - "gso_type=%d ip_summed=%d\n", |
|---|
| 3216 | + skb_dump(KERN_WARNING, skb, false); |
|---|
| 3217 | + WARN(1, "%s: caps=(%pNF, %pNF)\n", |
|---|
| 2888 | 3218 | name, dev ? &dev->features : &null_features, |
|---|
| 2889 | | - skb->sk ? &skb->sk->sk_route_caps : &null_features, |
|---|
| 2890 | | - skb->len, skb->data_len, skb_shinfo(skb)->gso_size, |
|---|
| 2891 | | - skb_shinfo(skb)->gso_type, skb->ip_summed); |
|---|
| 3219 | + skb->sk ? &skb->sk->sk_route_caps : &null_features); |
|---|
| 2892 | 3220 | } |
|---|
| 2893 | 3221 | |
|---|
| 2894 | 3222 | /* |
|---|
| .. | .. |
|---|
| 2918 | 3246 | } |
|---|
| 2919 | 3247 | |
|---|
| 2920 | 3248 | offset = skb_checksum_start_offset(skb); |
|---|
| 2921 | | - BUG_ON(offset >= skb_headlen(skb)); |
|---|
| 3249 | + ret = -EINVAL; |
|---|
| 3250 | + if (WARN_ON_ONCE(offset >= skb_headlen(skb))) |
|---|
| 3251 | + goto out; |
|---|
| 3252 | + |
|---|
| 2922 | 3253 | csum = skb_checksum(skb, offset, skb->len - offset, 0); |
|---|
| 2923 | 3254 | |
|---|
| 2924 | 3255 | offset += skb->csum_offset; |
|---|
| 2925 | | - BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb)); |
|---|
| 3256 | + if (WARN_ON_ONCE(offset + sizeof(__sum16) > skb_headlen(skb))) |
|---|
| 3257 | + goto out; |
|---|
| 2926 | 3258 | |
|---|
| 2927 | | - if (skb_cloned(skb) && |
|---|
| 2928 | | - !skb_clone_writable(skb, offset + sizeof(__sum16))) { |
|---|
| 2929 | | - ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); |
|---|
| 2930 | | - if (ret) |
|---|
| 2931 | | - goto out; |
|---|
| 2932 | | - } |
|---|
| 3259 | + ret = skb_ensure_writable(skb, offset + sizeof(__sum16)); |
|---|
| 3260 | + if (ret) |
|---|
| 3261 | + goto out; |
|---|
| 2933 | 3262 | |
|---|
| 2934 | 3263 | *(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0; |
|---|
| 2935 | 3264 | out_set_summed: |
|---|
| .. | .. |
|---|
| 2964 | 3293 | ret = -EINVAL; |
|---|
| 2965 | 3294 | goto out; |
|---|
| 2966 | 3295 | } |
|---|
| 2967 | | - if (skb_cloned(skb) && |
|---|
| 2968 | | - !skb_clone_writable(skb, offset + sizeof(__le32))) { |
|---|
| 2969 | | - ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); |
|---|
| 2970 | | - if (ret) |
|---|
| 2971 | | - goto out; |
|---|
| 2972 | | - } |
|---|
| 3296 | + |
|---|
| 3297 | + ret = skb_ensure_writable(skb, offset + sizeof(__le32)); |
|---|
| 3298 | + if (ret) |
|---|
| 3299 | + goto out; |
|---|
| 3300 | + |
|---|
| 2973 | 3301 | crc32c_csum = cpu_to_le32(~__skb_checksum(skb, start, |
|---|
| 2974 | 3302 | skb->len - start, ~(__u32)0, |
|---|
| 2975 | 3303 | crc32c_csum_stub)); |
|---|
| .. | .. |
|---|
| 3054 | 3382 | * It may return NULL if the skb requires no segmentation. This is |
|---|
| 3055 | 3383 | * only possible when GSO is used for verifying header integrity. |
|---|
| 3056 | 3384 | * |
|---|
| 3057 | | - * Segmentation preserves SKB_SGO_CB_OFFSET bytes of previous skb cb. |
|---|
| 3385 | + * Segmentation preserves SKB_GSO_CB_OFFSET bytes of previous skb cb. |
|---|
| 3058 | 3386 | */ |
|---|
| 3059 | 3387 | struct sk_buff *__skb_gso_segment(struct sk_buff *skb, |
|---|
| 3060 | 3388 | netdev_features_t features, bool tx_path) |
|---|
| .. | .. |
|---|
| 3083 | 3411 | features &= ~NETIF_F_GSO_PARTIAL; |
|---|
| 3084 | 3412 | } |
|---|
| 3085 | 3413 | |
|---|
| 3086 | | - BUILD_BUG_ON(SKB_SGO_CB_OFFSET + |
|---|
| 3414 | + BUILD_BUG_ON(SKB_GSO_CB_OFFSET + |
|---|
| 3087 | 3415 | sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb)); |
|---|
| 3088 | 3416 | |
|---|
| 3089 | 3417 | SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb); |
|---|
| .. | .. |
|---|
| 3094 | 3422 | |
|---|
| 3095 | 3423 | segs = skb_mac_gso_segment(skb, features); |
|---|
| 3096 | 3424 | |
|---|
| 3097 | | - if (unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs))) |
|---|
| 3425 | + if (segs != skb && unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs))) |
|---|
| 3098 | 3426 | skb_warn_bad_offload(skb); |
|---|
| 3099 | 3427 | |
|---|
| 3100 | 3428 | return segs; |
|---|
| .. | .. |
|---|
| 3103 | 3431 | |
|---|
| 3104 | 3432 | /* Take action when hardware reception checksum errors are detected. */ |
|---|
| 3105 | 3433 | #ifdef CONFIG_BUG |
|---|
| 3106 | | -void netdev_rx_csum_fault(struct net_device *dev) |
|---|
| 3434 | +void netdev_rx_csum_fault(struct net_device *dev, struct sk_buff *skb) |
|---|
| 3107 | 3435 | { |
|---|
| 3108 | 3436 | if (net_ratelimit()) { |
|---|
| 3109 | 3437 | pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>"); |
|---|
| 3438 | + skb_dump(KERN_ERR, skb, true); |
|---|
| 3110 | 3439 | dump_stack(); |
|---|
| 3111 | 3440 | } |
|---|
| 3112 | 3441 | } |
|---|
| .. | .. |
|---|
| 3156 | 3485 | static netdev_features_t harmonize_features(struct sk_buff *skb, |
|---|
| 3157 | 3486 | netdev_features_t features) |
|---|
| 3158 | 3487 | { |
|---|
| 3159 | | - int tmp; |
|---|
| 3160 | 3488 | __be16 type; |
|---|
| 3161 | 3489 | |
|---|
| 3162 | | - type = skb_network_protocol(skb, &tmp); |
|---|
| 3490 | + type = skb_network_protocol(skb, NULL); |
|---|
| 3163 | 3491 | features = net_mpls_features(skb, features, type); |
|---|
| 3164 | 3492 | |
|---|
| 3165 | 3493 | if (skb->ip_summed != CHECKSUM_NONE && |
|---|
| .. | .. |
|---|
| 3256 | 3584 | unsigned int len; |
|---|
| 3257 | 3585 | int rc; |
|---|
| 3258 | 3586 | |
|---|
| 3259 | | - if (!list_empty(&ptype_all) || !list_empty(&dev->ptype_all)) |
|---|
| 3587 | + if (dev_nit_active(dev)) |
|---|
| 3260 | 3588 | dev_queue_xmit_nit(skb, dev); |
|---|
| 3261 | 3589 | |
|---|
| 3262 | 3590 | len = skb->len; |
|---|
| 3591 | + PRANDOM_ADD_NOISE(skb, dev, txq, len + jiffies); |
|---|
| 3263 | 3592 | trace_net_dev_start_xmit(skb, dev); |
|---|
| 3264 | 3593 | rc = netdev_start_xmit(skb, dev, txq, more); |
|---|
| 3265 | 3594 | trace_net_dev_xmit(skb, rc, dev, len); |
|---|
| .. | .. |
|---|
| 3276 | 3605 | while (skb) { |
|---|
| 3277 | 3606 | struct sk_buff *next = skb->next; |
|---|
| 3278 | 3607 | |
|---|
| 3279 | | - skb->next = NULL; |
|---|
| 3608 | + skb_mark_not_on_list(skb); |
|---|
| 3280 | 3609 | rc = xmit_one(skb, dev, txq, next != NULL); |
|---|
| 3281 | 3610 | if (unlikely(!dev_xmit_complete(rc))) { |
|---|
| 3282 | 3611 | skb->next = next; |
|---|
| .. | .. |
|---|
| 3376 | 3705 | |
|---|
| 3377 | 3706 | for (; skb != NULL; skb = next) { |
|---|
| 3378 | 3707 | next = skb->next; |
|---|
| 3379 | | - skb->next = NULL; |
|---|
| 3708 | + skb_mark_not_on_list(skb); |
|---|
| 3380 | 3709 | |
|---|
| 3381 | 3710 | /* in case skb wont be segmented, point to itself */ |
|---|
| 3382 | 3711 | skb->prev = skb; |
|---|
| .. | .. |
|---|
| 3407 | 3736 | /* To get more precise estimation of bytes sent on wire, |
|---|
| 3408 | 3737 | * we add to pkt_len the headers size of all segments |
|---|
| 3409 | 3738 | */ |
|---|
| 3410 | | - if (shinfo->gso_size) { |
|---|
| 3739 | + if (shinfo->gso_size && skb_transport_header_was_set(skb)) { |
|---|
| 3411 | 3740 | unsigned int hdr_len; |
|---|
| 3412 | 3741 | u16 gso_segs = shinfo->gso_segs; |
|---|
| 3413 | 3742 | |
|---|
| .. | .. |
|---|
| 3451 | 3780 | qdisc_calculate_pkt_len(skb, q); |
|---|
| 3452 | 3781 | |
|---|
| 3453 | 3782 | if (q->flags & TCQ_F_NOLOCK) { |
|---|
| 3454 | | - if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { |
|---|
| 3455 | | - __qdisc_drop(skb, &to_free); |
|---|
| 3456 | | - rc = NET_XMIT_DROP; |
|---|
| 3457 | | - } else { |
|---|
| 3458 | | - rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; |
|---|
| 3783 | + rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; |
|---|
| 3784 | + if (likely(!netif_xmit_frozen_or_stopped(txq))) |
|---|
| 3459 | 3785 | qdisc_run(q); |
|---|
| 3460 | | - } |
|---|
| 3461 | 3786 | |
|---|
| 3462 | 3787 | if (unlikely(to_free)) |
|---|
| 3463 | 3788 | kfree_skb_list(to_free); |
|---|
| .. | .. |
|---|
| 3470 | 3795 | * This permits qdisc->running owner to get the lock more |
|---|
| 3471 | 3796 | * often and dequeue packets faster. |
|---|
| 3472 | 3797 | */ |
|---|
| 3473 | | -#ifdef CONFIG_PREEMPT_RT_FULL |
|---|
| 3474 | | - contended = true; |
|---|
| 3475 | | -#else |
|---|
| 3476 | 3798 | contended = qdisc_is_running(q); |
|---|
| 3477 | | -#endif |
|---|
| 3478 | 3799 | if (unlikely(contended)) |
|---|
| 3479 | 3800 | spin_lock(&q->busylock); |
|---|
| 3480 | 3801 | |
|---|
| .. | .. |
|---|
| 3557 | 3878 | skb_reset_mac_header(skb); |
|---|
| 3558 | 3879 | __skb_pull(skb, skb_network_offset(skb)); |
|---|
| 3559 | 3880 | skb->pkt_type = PACKET_LOOPBACK; |
|---|
| 3560 | | - skb->ip_summed = CHECKSUM_UNNECESSARY; |
|---|
| 3881 | + if (skb->ip_summed == CHECKSUM_NONE) |
|---|
| 3882 | + skb->ip_summed = CHECKSUM_UNNECESSARY; |
|---|
| 3561 | 3883 | WARN_ON(!skb_dst(skb)); |
|---|
| 3562 | 3884 | skb_dst_force(skb); |
|---|
| 3563 | 3885 | netif_rx_ni(skb); |
|---|
| .. | .. |
|---|
| 3576 | 3898 | return skb; |
|---|
| 3577 | 3899 | |
|---|
| 3578 | 3900 | /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */ |
|---|
| 3901 | + qdisc_skb_cb(skb)->mru = 0; |
|---|
| 3579 | 3902 | mini_qdisc_bstats_cpu_update(miniq, skb); |
|---|
| 3580 | 3903 | |
|---|
| 3581 | 3904 | switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) { |
|---|
| .. | .. |
|---|
| 3676 | 3999 | } |
|---|
| 3677 | 4000 | |
|---|
| 3678 | 4001 | u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb, |
|---|
| 3679 | | - struct net_device *sb_dev, |
|---|
| 3680 | | - select_queue_fallback_t fallback) |
|---|
| 4002 | + struct net_device *sb_dev) |
|---|
| 3681 | 4003 | { |
|---|
| 3682 | 4004 | return 0; |
|---|
| 3683 | 4005 | } |
|---|
| 3684 | 4006 | EXPORT_SYMBOL(dev_pick_tx_zero); |
|---|
| 3685 | 4007 | |
|---|
| 3686 | 4008 | u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb, |
|---|
| 3687 | | - struct net_device *sb_dev, |
|---|
| 3688 | | - select_queue_fallback_t fallback) |
|---|
| 4009 | + struct net_device *sb_dev) |
|---|
| 3689 | 4010 | { |
|---|
| 3690 | 4011 | return (u16)raw_smp_processor_id() % dev->real_num_tx_queues; |
|---|
| 3691 | 4012 | } |
|---|
| 3692 | 4013 | EXPORT_SYMBOL(dev_pick_tx_cpu_id); |
|---|
| 3693 | 4014 | |
|---|
| 3694 | | -static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, |
|---|
| 3695 | | - struct net_device *sb_dev) |
|---|
| 4015 | +u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, |
|---|
| 4016 | + struct net_device *sb_dev) |
|---|
| 3696 | 4017 | { |
|---|
| 3697 | 4018 | struct sock *sk = skb->sk; |
|---|
| 3698 | 4019 | int queue_index = sk_tx_queue_get(sk); |
|---|
| .. | .. |
|---|
| 3716 | 4037 | |
|---|
| 3717 | 4038 | return queue_index; |
|---|
| 3718 | 4039 | } |
|---|
| 4040 | +EXPORT_SYMBOL(netdev_pick_tx); |
|---|
| 3719 | 4041 | |
|---|
| 3720 | | -struct netdev_queue *netdev_pick_tx(struct net_device *dev, |
|---|
| 3721 | | - struct sk_buff *skb, |
|---|
| 3722 | | - struct net_device *sb_dev) |
|---|
| 4042 | +struct netdev_queue *netdev_core_pick_tx(struct net_device *dev, |
|---|
| 4043 | + struct sk_buff *skb, |
|---|
| 4044 | + struct net_device *sb_dev) |
|---|
| 3723 | 4045 | { |
|---|
| 3724 | 4046 | int queue_index = 0; |
|---|
| 3725 | 4047 | |
|---|
| .. | .. |
|---|
| 3734 | 4056 | const struct net_device_ops *ops = dev->netdev_ops; |
|---|
| 3735 | 4057 | |
|---|
| 3736 | 4058 | if (ops->ndo_select_queue) |
|---|
| 3737 | | - queue_index = ops->ndo_select_queue(dev, skb, sb_dev, |
|---|
| 3738 | | - __netdev_pick_tx); |
|---|
| 4059 | + queue_index = ops->ndo_select_queue(dev, skb, sb_dev); |
|---|
| 3739 | 4060 | else |
|---|
| 3740 | | - queue_index = __netdev_pick_tx(dev, skb, sb_dev); |
|---|
| 4061 | + queue_index = netdev_pick_tx(dev, skb, sb_dev); |
|---|
| 3741 | 4062 | |
|---|
| 3742 | 4063 | queue_index = netdev_cap_txqueue(dev, queue_index); |
|---|
| 3743 | 4064 | } |
|---|
| .. | .. |
|---|
| 3781 | 4102 | bool again = false; |
|---|
| 3782 | 4103 | |
|---|
| 3783 | 4104 | skb_reset_mac_header(skb); |
|---|
| 4105 | + skb_assert_len(skb); |
|---|
| 3784 | 4106 | |
|---|
| 3785 | 4107 | if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP)) |
|---|
| 3786 | 4108 | __skb_tstamp_tx(skb, NULL, skb->sk, SCM_TSTAMP_SCHED); |
|---|
| .. | .. |
|---|
| 3811 | 4133 | else |
|---|
| 3812 | 4134 | skb_dst_force(skb); |
|---|
| 3813 | 4135 | |
|---|
| 3814 | | - txq = netdev_pick_tx(dev, skb, sb_dev); |
|---|
| 4136 | + txq = netdev_core_pick_tx(dev, skb, sb_dev); |
|---|
| 3815 | 4137 | q = rcu_dereference_bh(txq->qdisc); |
|---|
| 3816 | 4138 | |
|---|
| 3817 | 4139 | trace_net_dev_queue(skb); |
|---|
| .. | .. |
|---|
| 3835 | 4157 | if (dev->flags & IFF_UP) { |
|---|
| 3836 | 4158 | int cpu = smp_processor_id(); /* ok because BHs are off */ |
|---|
| 3837 | 4159 | |
|---|
| 3838 | | -#ifdef CONFIG_PREEMPT_RT_FULL |
|---|
| 3839 | | - if (READ_ONCE(txq->xmit_lock_owner) != current) { |
|---|
| 3840 | | -#else |
|---|
| 3841 | 4160 | /* Other cpus might concurrently change txq->xmit_lock_owner |
|---|
| 3842 | 4161 | * to -1 or to their cpu id, but not to our id. |
|---|
| 3843 | 4162 | */ |
|---|
| 3844 | 4163 | if (READ_ONCE(txq->xmit_lock_owner) != cpu) { |
|---|
| 3845 | | -#endif |
|---|
| 3846 | 4164 | if (dev_xmit_recursion()) |
|---|
| 3847 | 4165 | goto recursion_alert; |
|---|
| 3848 | 4166 | |
|---|
| .. | .. |
|---|
| 3850 | 4168 | if (!skb) |
|---|
| 3851 | 4169 | goto out; |
|---|
| 3852 | 4170 | |
|---|
| 4171 | + PRANDOM_ADD_NOISE(skb, dev, txq, jiffies); |
|---|
| 3853 | 4172 | HARD_TX_LOCK(dev, txq, cpu); |
|---|
| 3854 | 4173 | |
|---|
| 3855 | 4174 | if (!netif_xmit_stopped(txq)) { |
|---|
| .. | .. |
|---|
| 3897 | 4216 | } |
|---|
| 3898 | 4217 | EXPORT_SYMBOL(dev_queue_xmit_accel); |
|---|
| 3899 | 4218 | |
|---|
| 3900 | | -int dev_direct_xmit(struct sk_buff *skb, u16 queue_id) |
|---|
| 4219 | +int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id) |
|---|
| 3901 | 4220 | { |
|---|
| 3902 | 4221 | struct net_device *dev = skb->dev; |
|---|
| 3903 | 4222 | struct sk_buff *orig_skb = skb; |
|---|
| .. | .. |
|---|
| 3915 | 4234 | |
|---|
| 3916 | 4235 | skb_set_queue_mapping(skb, queue_id); |
|---|
| 3917 | 4236 | txq = skb_get_tx_queue(dev, skb); |
|---|
| 4237 | + PRANDOM_ADD_NOISE(skb, dev, txq, jiffies); |
|---|
| 3918 | 4238 | |
|---|
| 3919 | 4239 | local_bh_disable(); |
|---|
| 3920 | 4240 | |
|---|
| .. | .. |
|---|
| 3926 | 4246 | dev_xmit_recursion_dec(); |
|---|
| 3927 | 4247 | |
|---|
| 3928 | 4248 | local_bh_enable(); |
|---|
| 3929 | | - |
|---|
| 3930 | | - if (!dev_xmit_complete(ret)) |
|---|
| 3931 | | - kfree_skb(skb); |
|---|
| 3932 | | - |
|---|
| 3933 | 4249 | return ret; |
|---|
| 3934 | 4250 | drop: |
|---|
| 3935 | 4251 | atomic_long_inc(&dev->tx_dropped); |
|---|
| 3936 | 4252 | kfree_skb_list(skb); |
|---|
| 3937 | 4253 | return NET_XMIT_DROP; |
|---|
| 3938 | 4254 | } |
|---|
| 3939 | | -EXPORT_SYMBOL(dev_direct_xmit); |
|---|
| 4255 | +EXPORT_SYMBOL(__dev_direct_xmit); |
|---|
| 3940 | 4256 | |
|---|
| 3941 | 4257 | /************************************************************************* |
|---|
| 3942 | 4258 | * Receiver routines |
|---|
| .. | .. |
|---|
| 3954 | 4270 | int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */ |
|---|
| 3955 | 4271 | int dev_rx_weight __read_mostly = 64; |
|---|
| 3956 | 4272 | int dev_tx_weight __read_mostly = 64; |
|---|
| 4273 | +/* Maximum number of GRO_NORMAL skbs to batch up for list-RX */ |
|---|
| 4274 | +int gro_normal_batch __read_mostly = 8; |
|---|
| 3957 | 4275 | |
|---|
| 3958 | 4276 | /* Called with irq disabled */ |
|---|
| 3959 | 4277 | static inline void ____napi_schedule(struct softnet_data *sd, |
|---|
| .. | .. |
|---|
| 3971 | 4289 | u32 rps_cpu_mask __read_mostly; |
|---|
| 3972 | 4290 | EXPORT_SYMBOL(rps_cpu_mask); |
|---|
| 3973 | 4291 | |
|---|
| 3974 | | -struct static_key rps_needed __read_mostly; |
|---|
| 4292 | +struct static_key_false rps_needed __read_mostly; |
|---|
| 3975 | 4293 | EXPORT_SYMBOL(rps_needed); |
|---|
| 3976 | | -struct static_key rfs_needed __read_mostly; |
|---|
| 4294 | +struct static_key_false rfs_needed __read_mostly; |
|---|
| 3977 | 4295 | EXPORT_SYMBOL(rfs_needed); |
|---|
| 3978 | 4296 | |
|---|
| 3979 | 4297 | static struct rps_dev_flow * |
|---|
| .. | .. |
|---|
| 4204 | 4522 | struct softnet_data *sd; |
|---|
| 4205 | 4523 | unsigned int old_flow, new_flow; |
|---|
| 4206 | 4524 | |
|---|
| 4207 | | - if (qlen < (netdev_max_backlog >> 1)) |
|---|
| 4525 | + if (qlen < (READ_ONCE(netdev_max_backlog) >> 1)) |
|---|
| 4208 | 4526 | return false; |
|---|
| 4209 | 4527 | |
|---|
| 4210 | 4528 | sd = this_cpu_ptr(&softnet_data); |
|---|
| .. | .. |
|---|
| 4252 | 4570 | if (!netif_running(skb->dev)) |
|---|
| 4253 | 4571 | goto drop; |
|---|
| 4254 | 4572 | qlen = skb_queue_len(&sd->input_pkt_queue); |
|---|
| 4255 | | - if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) { |
|---|
| 4573 | + if (qlen <= READ_ONCE(netdev_max_backlog) && !skb_flow_limit(skb, qlen)) { |
|---|
| 4256 | 4574 | if (qlen) { |
|---|
| 4257 | 4575 | enqueue: |
|---|
| 4258 | 4576 | __skb_queue_tail(&sd->input_pkt_queue, skb); |
|---|
| .. | .. |
|---|
| 4277 | 4595 | rps_unlock(sd); |
|---|
| 4278 | 4596 | |
|---|
| 4279 | 4597 | local_irq_restore(flags); |
|---|
| 4280 | | - preempt_check_resched_rt(); |
|---|
| 4281 | 4598 | |
|---|
| 4282 | 4599 | atomic_long_inc(&skb->dev->rx_dropped); |
|---|
| 4283 | 4600 | kfree_skb(skb); |
|---|
| .. | .. |
|---|
| 4323 | 4640 | /* Reinjected packets coming from act_mirred or similar should |
|---|
| 4324 | 4641 | * not get XDP generic processing. |
|---|
| 4325 | 4642 | */ |
|---|
| 4326 | | - if (skb_is_tc_redirected(skb)) |
|---|
| 4643 | + if (skb_is_redirected(skb)) |
|---|
| 4327 | 4644 | return XDP_PASS; |
|---|
| 4328 | 4645 | |
|---|
| 4329 | 4646 | /* XDP packets must be linear and must have sufficient headroom |
|---|
| .. | .. |
|---|
| 4355 | 4672 | xdp->data_meta = xdp->data; |
|---|
| 4356 | 4673 | xdp->data_end = xdp->data + hlen; |
|---|
| 4357 | 4674 | xdp->data_hard_start = skb->data - skb_headroom(skb); |
|---|
| 4675 | + |
|---|
| 4676 | + /* SKB "head" area always have tailroom for skb_shared_info */ |
|---|
| 4677 | + xdp->frame_sz = (void *)skb_end_pointer(skb) - xdp->data_hard_start; |
|---|
| 4678 | + xdp->frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); |
|---|
| 4679 | + |
|---|
| 4358 | 4680 | orig_data_end = xdp->data_end; |
|---|
| 4359 | 4681 | orig_data = xdp->data; |
|---|
| 4360 | 4682 | eth = (struct ethhdr *)xdp->data; |
|---|
| .. | .. |
|---|
| 4378 | 4700 | skb_reset_network_header(skb); |
|---|
| 4379 | 4701 | } |
|---|
| 4380 | 4702 | |
|---|
| 4381 | | - /* check if bpf_xdp_adjust_tail was used. it can only "shrink" |
|---|
| 4382 | | - * pckt. |
|---|
| 4383 | | - */ |
|---|
| 4384 | | - off = orig_data_end - xdp->data_end; |
|---|
| 4703 | + /* check if bpf_xdp_adjust_tail was used */ |
|---|
| 4704 | + off = xdp->data_end - orig_data_end; |
|---|
| 4385 | 4705 | if (off != 0) { |
|---|
| 4386 | 4706 | skb_set_tail_pointer(skb, xdp->data_end - xdp->data); |
|---|
| 4387 | | - skb->len -= off; |
|---|
| 4388 | | - |
|---|
| 4707 | + skb->len += off; /* positive on grow, negative on shrink */ |
|---|
| 4389 | 4708 | } |
|---|
| 4390 | 4709 | |
|---|
| 4391 | 4710 | /* check if XDP changed eth hdr such SKB needs update */ |
|---|
| .. | .. |
|---|
| 4408 | 4727 | break; |
|---|
| 4409 | 4728 | default: |
|---|
| 4410 | 4729 | bpf_warn_invalid_xdp_action(act); |
|---|
| 4411 | | - /* fall through */ |
|---|
| 4730 | + fallthrough; |
|---|
| 4412 | 4731 | case XDP_ABORTED: |
|---|
| 4413 | 4732 | trace_xdp_exception(skb->dev, xdp_prog, act); |
|---|
| 4414 | | - /* fall through */ |
|---|
| 4733 | + fallthrough; |
|---|
| 4415 | 4734 | case XDP_DROP: |
|---|
| 4416 | 4735 | do_drop: |
|---|
| 4417 | 4736 | kfree_skb(skb); |
|---|
| .. | .. |
|---|
| 4431 | 4750 | bool free_skb = true; |
|---|
| 4432 | 4751 | int cpu, rc; |
|---|
| 4433 | 4752 | |
|---|
| 4434 | | - txq = netdev_pick_tx(dev, skb, NULL); |
|---|
| 4753 | + txq = netdev_core_pick_tx(dev, skb, NULL); |
|---|
| 4435 | 4754 | cpu = smp_processor_id(); |
|---|
| 4436 | 4755 | HARD_TX_LOCK(dev, txq, cpu); |
|---|
| 4437 | 4756 | if (!netif_xmit_stopped(txq)) { |
|---|
| .. | .. |
|---|
| 4445 | 4764 | kfree_skb(skb); |
|---|
| 4446 | 4765 | } |
|---|
| 4447 | 4766 | } |
|---|
| 4448 | | -EXPORT_SYMBOL_GPL(generic_xdp_tx); |
|---|
| 4449 | 4767 | |
|---|
| 4450 | 4768 | static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key); |
|---|
| 4451 | 4769 | |
|---|
| .. | .. |
|---|
| 4483 | 4801 | { |
|---|
| 4484 | 4802 | int ret; |
|---|
| 4485 | 4803 | |
|---|
| 4486 | | - net_timestamp_check(netdev_tstamp_prequeue, skb); |
|---|
| 4804 | + net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb); |
|---|
| 4487 | 4805 | |
|---|
| 4488 | 4806 | trace_netif_rx(skb); |
|---|
| 4489 | 4807 | |
|---|
| 4490 | 4808 | #ifdef CONFIG_RPS |
|---|
| 4491 | | - if (static_key_false(&rps_needed)) { |
|---|
| 4809 | + if (static_branch_unlikely(&rps_needed)) { |
|---|
| 4492 | 4810 | struct rps_dev_flow voidflow, *rflow = &voidflow; |
|---|
| 4493 | 4811 | int cpu; |
|---|
| 4494 | 4812 | |
|---|
| 4495 | | - migrate_disable(); |
|---|
| 4813 | + preempt_disable(); |
|---|
| 4496 | 4814 | rcu_read_lock(); |
|---|
| 4497 | 4815 | |
|---|
| 4498 | 4816 | cpu = get_rps_cpu(skb->dev, skb, &rflow); |
|---|
| .. | .. |
|---|
| 4502 | 4820 | ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); |
|---|
| 4503 | 4821 | |
|---|
| 4504 | 4822 | rcu_read_unlock(); |
|---|
| 4505 | | - migrate_enable(); |
|---|
| 4823 | + preempt_enable(); |
|---|
| 4506 | 4824 | } else |
|---|
| 4507 | 4825 | #endif |
|---|
| 4508 | 4826 | { |
|---|
| 4509 | 4827 | unsigned int qtail; |
|---|
| 4510 | 4828 | |
|---|
| 4511 | | - ret = enqueue_to_backlog(skb, get_cpu_light(), &qtail); |
|---|
| 4512 | | - put_cpu_light(); |
|---|
| 4829 | + ret = enqueue_to_backlog(skb, get_cpu(), &qtail); |
|---|
| 4830 | + put_cpu(); |
|---|
| 4513 | 4831 | } |
|---|
| 4514 | 4832 | return ret; |
|---|
| 4515 | 4833 | } |
|---|
| .. | .. |
|---|
| 4531 | 4849 | |
|---|
| 4532 | 4850 | int netif_rx(struct sk_buff *skb) |
|---|
| 4533 | 4851 | { |
|---|
| 4852 | + int ret; |
|---|
| 4853 | + |
|---|
| 4534 | 4854 | trace_netif_rx_entry(skb); |
|---|
| 4535 | 4855 | |
|---|
| 4536 | | - return netif_rx_internal(skb); |
|---|
| 4856 | + ret = netif_rx_internal(skb); |
|---|
| 4857 | + trace_netif_rx_exit(ret); |
|---|
| 4858 | + |
|---|
| 4859 | + return ret; |
|---|
| 4537 | 4860 | } |
|---|
| 4538 | 4861 | EXPORT_SYMBOL(netif_rx); |
|---|
| 4539 | 4862 | |
|---|
| .. | .. |
|---|
| 4543 | 4866 | |
|---|
| 4544 | 4867 | trace_netif_rx_ni_entry(skb); |
|---|
| 4545 | 4868 | |
|---|
| 4546 | | - local_bh_disable(); |
|---|
| 4869 | + preempt_disable(); |
|---|
| 4547 | 4870 | err = netif_rx_internal(skb); |
|---|
| 4548 | | - local_bh_enable(); |
|---|
| 4871 | + if (local_softirq_pending()) |
|---|
| 4872 | + do_softirq(); |
|---|
| 4873 | + preempt_enable(); |
|---|
| 4874 | + trace_netif_rx_ni_exit(err); |
|---|
| 4549 | 4875 | |
|---|
| 4550 | 4876 | return err; |
|---|
| 4551 | 4877 | } |
|---|
| 4552 | 4878 | EXPORT_SYMBOL(netif_rx_ni); |
|---|
| 4879 | + |
|---|
| 4880 | +int netif_rx_any_context(struct sk_buff *skb) |
|---|
| 4881 | +{ |
|---|
| 4882 | + /* |
|---|
| 4883 | + * If invoked from contexts which do not invoke bottom half |
|---|
| 4884 | + * processing either at return from interrupt or when softrqs are |
|---|
| 4885 | + * reenabled, use netif_rx_ni() which invokes bottomhalf processing |
|---|
| 4886 | + * directly. |
|---|
| 4887 | + */ |
|---|
| 4888 | + if (in_interrupt()) |
|---|
| 4889 | + return netif_rx(skb); |
|---|
| 4890 | + else |
|---|
| 4891 | + return netif_rx_ni(skb); |
|---|
| 4892 | +} |
|---|
| 4893 | +EXPORT_SYMBOL(netif_rx_any_context); |
|---|
| 4553 | 4894 | |
|---|
| 4554 | 4895 | static __latent_entropy void net_tx_action(struct softirq_action *h) |
|---|
| 4555 | 4896 | { |
|---|
| .. | .. |
|---|
| 4592 | 4933 | sd->output_queue_tailp = &sd->output_queue; |
|---|
| 4593 | 4934 | local_irq_enable(); |
|---|
| 4594 | 4935 | |
|---|
| 4936 | + rcu_read_lock(); |
|---|
| 4937 | + |
|---|
| 4595 | 4938 | while (head) { |
|---|
| 4596 | 4939 | struct Qdisc *q = head; |
|---|
| 4597 | 4940 | spinlock_t *root_lock = NULL; |
|---|
| 4598 | 4941 | |
|---|
| 4599 | 4942 | head = head->next_sched; |
|---|
| 4600 | 4943 | |
|---|
| 4601 | | - if (!(q->flags & TCQ_F_NOLOCK)) { |
|---|
| 4602 | | - root_lock = qdisc_lock(q); |
|---|
| 4603 | | - spin_lock(root_lock); |
|---|
| 4604 | | - } |
|---|
| 4605 | 4944 | /* We need to make sure head->next_sched is read |
|---|
| 4606 | 4945 | * before clearing __QDISC_STATE_SCHED |
|---|
| 4607 | 4946 | */ |
|---|
| 4608 | 4947 | smp_mb__before_atomic(); |
|---|
| 4948 | + |
|---|
| 4949 | + if (!(q->flags & TCQ_F_NOLOCK)) { |
|---|
| 4950 | + root_lock = qdisc_lock(q); |
|---|
| 4951 | + spin_lock(root_lock); |
|---|
| 4952 | + } else if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, |
|---|
| 4953 | + &q->state))) { |
|---|
| 4954 | + /* There is a synchronize_net() between |
|---|
| 4955 | + * STATE_DEACTIVATED flag being set and |
|---|
| 4956 | + * qdisc_reset()/some_qdisc_is_busy() in |
|---|
| 4957 | + * dev_deactivate(), so we can safely bail out |
|---|
| 4958 | + * early here to avoid data race between |
|---|
| 4959 | + * qdisc_deactivate() and some_qdisc_is_busy() |
|---|
| 4960 | + * for lockless qdisc. |
|---|
| 4961 | + */ |
|---|
| 4962 | + clear_bit(__QDISC_STATE_SCHED, &q->state); |
|---|
| 4963 | + continue; |
|---|
| 4964 | + } |
|---|
| 4965 | + |
|---|
| 4609 | 4966 | clear_bit(__QDISC_STATE_SCHED, &q->state); |
|---|
| 4610 | 4967 | qdisc_run(q); |
|---|
| 4611 | 4968 | if (root_lock) |
|---|
| 4612 | 4969 | spin_unlock(root_lock); |
|---|
| 4613 | 4970 | } |
|---|
| 4971 | + |
|---|
| 4972 | + rcu_read_unlock(); |
|---|
| 4614 | 4973 | } |
|---|
| 4615 | 4974 | |
|---|
| 4616 | 4975 | xfrm_dev_backlog(sd); |
|---|
| .. | .. |
|---|
| 4625 | 4984 | |
|---|
| 4626 | 4985 | static inline struct sk_buff * |
|---|
| 4627 | 4986 | sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, |
|---|
| 4628 | | - struct net_device *orig_dev) |
|---|
| 4987 | + struct net_device *orig_dev, bool *another) |
|---|
| 4629 | 4988 | { |
|---|
| 4630 | 4989 | #ifdef CONFIG_NET_CLS_ACT |
|---|
| 4631 | 4990 | struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress); |
|---|
| .. | .. |
|---|
| 4645 | 5004 | } |
|---|
| 4646 | 5005 | |
|---|
| 4647 | 5006 | qdisc_skb_cb(skb)->pkt_len = skb->len; |
|---|
| 5007 | + qdisc_skb_cb(skb)->mru = 0; |
|---|
| 4648 | 5008 | skb->tc_at_ingress = 1; |
|---|
| 4649 | 5009 | mini_qdisc_bstats_cpu_update(miniq, skb); |
|---|
| 4650 | 5010 | |
|---|
| 4651 | | - switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) { |
|---|
| 5011 | + switch (tcf_classify_ingress(skb, miniq->block, miniq->filter_list, |
|---|
| 5012 | + &cl_res, false)) { |
|---|
| 4652 | 5013 | case TC_ACT_OK: |
|---|
| 4653 | 5014 | case TC_ACT_RECLASSIFY: |
|---|
| 4654 | 5015 | skb->tc_index = TC_H_MIN(cl_res.classid); |
|---|
| .. | .. |
|---|
| 4668 | 5029 | * redirecting to another netdev |
|---|
| 4669 | 5030 | */ |
|---|
| 4670 | 5031 | __skb_push(skb, skb->mac_len); |
|---|
| 4671 | | - skb_do_redirect(skb); |
|---|
| 5032 | + if (skb_do_redirect(skb) == -EAGAIN) { |
|---|
| 5033 | + __skb_pull(skb, skb->mac_len); |
|---|
| 5034 | + *another = true; |
|---|
| 5035 | + break; |
|---|
| 5036 | + } |
|---|
| 4672 | 5037 | return NULL; |
|---|
| 4673 | | - case TC_ACT_REINSERT: |
|---|
| 4674 | | - /* this does not scrub the packet, and updates stats on error */ |
|---|
| 4675 | | - skb_tc_reinsert(skb, &cl_res); |
|---|
| 5038 | + case TC_ACT_CONSUMED: |
|---|
| 4676 | 5039 | return NULL; |
|---|
| 4677 | 5040 | default: |
|---|
| 4678 | 5041 | break; |
|---|
| .. | .. |
|---|
| 4772 | 5135 | static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev, |
|---|
| 4773 | 5136 | int *ret, struct net_device *orig_dev) |
|---|
| 4774 | 5137 | { |
|---|
| 4775 | | -#ifdef CONFIG_NETFILTER_INGRESS |
|---|
| 4776 | 5138 | if (nf_hook_ingress_active(skb)) { |
|---|
| 4777 | 5139 | int ingress_retval; |
|---|
| 4778 | 5140 | |
|---|
| .. | .. |
|---|
| 4786 | 5148 | rcu_read_unlock(); |
|---|
| 4787 | 5149 | return ingress_retval; |
|---|
| 4788 | 5150 | } |
|---|
| 4789 | | -#endif /* CONFIG_NETFILTER_INGRESS */ |
|---|
| 4790 | 5151 | return 0; |
|---|
| 4791 | 5152 | } |
|---|
| 4792 | 5153 | |
|---|
| .. | .. |
|---|
| 4801 | 5162 | int ret = NET_RX_DROP; |
|---|
| 4802 | 5163 | __be16 type; |
|---|
| 4803 | 5164 | |
|---|
| 4804 | | - net_timestamp_check(!netdev_tstamp_prequeue, skb); |
|---|
| 5165 | + net_timestamp_check(!READ_ONCE(netdev_tstamp_prequeue), skb); |
|---|
| 4805 | 5166 | |
|---|
| 4806 | 5167 | trace_netif_receive_skb(skb); |
|---|
| 4807 | 5168 | |
|---|
| .. | .. |
|---|
| 4861 | 5222 | skip_taps: |
|---|
| 4862 | 5223 | #ifdef CONFIG_NET_INGRESS |
|---|
| 4863 | 5224 | if (static_branch_unlikely(&ingress_needed_key)) { |
|---|
| 4864 | | - skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev); |
|---|
| 5225 | + bool another = false; |
|---|
| 5226 | + |
|---|
| 5227 | + skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev, |
|---|
| 5228 | + &another); |
|---|
| 5229 | + if (another) |
|---|
| 5230 | + goto another_round; |
|---|
| 4865 | 5231 | if (!skb) |
|---|
| 4866 | 5232 | goto out; |
|---|
| 4867 | 5233 | |
|---|
| .. | .. |
|---|
| 4869 | 5235 | goto out; |
|---|
| 4870 | 5236 | } |
|---|
| 4871 | 5237 | #endif |
|---|
| 4872 | | - skb_reset_tc(skb); |
|---|
| 5238 | + skb_reset_redirect(skb); |
|---|
| 4873 | 5239 | skip_classify: |
|---|
| 4874 | 5240 | if (pfmemalloc && !skb_pfmemalloc_protocol(skb)) |
|---|
| 4875 | 5241 | goto drop; |
|---|
| .. | .. |
|---|
| 4906 | 5272 | } |
|---|
| 4907 | 5273 | } |
|---|
| 4908 | 5274 | |
|---|
| 4909 | | - if (unlikely(skb_vlan_tag_present(skb))) { |
|---|
| 4910 | | - if (skb_vlan_tag_get_id(skb)) |
|---|
| 5275 | + if (unlikely(skb_vlan_tag_present(skb)) && !netdev_uses_dsa(skb->dev)) { |
|---|
| 5276 | +check_vlan_id: |
|---|
| 5277 | + if (skb_vlan_tag_get_id(skb)) { |
|---|
| 5278 | + /* Vlan id is non 0 and vlan_do_receive() above couldn't |
|---|
| 5279 | + * find vlan device. |
|---|
| 5280 | + */ |
|---|
| 4911 | 5281 | skb->pkt_type = PACKET_OTHERHOST; |
|---|
| 5282 | + } else if (skb->protocol == cpu_to_be16(ETH_P_8021Q) || |
|---|
| 5283 | + skb->protocol == cpu_to_be16(ETH_P_8021AD)) { |
|---|
| 5284 | + /* Outer header is 802.1P with vlan 0, inner header is |
|---|
| 5285 | + * 802.1Q or 802.1AD and vlan_do_receive() above could |
|---|
| 5286 | + * not find vlan dev for vlan id 0. |
|---|
| 5287 | + */ |
|---|
| 5288 | + __vlan_hwaccel_clear_tag(skb); |
|---|
| 5289 | + skb = skb_vlan_untag(skb); |
|---|
| 5290 | + if (unlikely(!skb)) |
|---|
| 5291 | + goto out; |
|---|
| 5292 | + if (vlan_do_receive(&skb)) |
|---|
| 5293 | + /* After stripping off 802.1P header with vlan 0 |
|---|
| 5294 | + * vlan dev is found for inner header. |
|---|
| 5295 | + */ |
|---|
| 5296 | + goto another_round; |
|---|
| 5297 | + else if (unlikely(!skb)) |
|---|
| 5298 | + goto out; |
|---|
| 5299 | + else |
|---|
| 5300 | + /* We have stripped outer 802.1P vlan 0 header. |
|---|
| 5301 | + * But could not find vlan dev. |
|---|
| 5302 | + * check again for vlan id to set OTHERHOST. |
|---|
| 5303 | + */ |
|---|
| 5304 | + goto check_vlan_id; |
|---|
| 5305 | + } |
|---|
| 4912 | 5306 | /* Note: we might in the future use prio bits |
|---|
| 4913 | 5307 | * and set skb->priority like in vlan_do_receive() |
|---|
| 4914 | 5308 | * For the time being, just ignore Priority Code Point |
|---|
| 4915 | 5309 | */ |
|---|
| 4916 | | - skb->vlan_tci = 0; |
|---|
| 5310 | + __vlan_hwaccel_clear_tag(skb); |
|---|
| 4917 | 5311 | } |
|---|
| 4918 | 5312 | |
|---|
| 4919 | 5313 | type = skb->protocol; |
|---|
| .. | .. |
|---|
| 4969 | 5363 | |
|---|
| 4970 | 5364 | ret = __netif_receive_skb_core(&skb, pfmemalloc, &pt_prev); |
|---|
| 4971 | 5365 | if (pt_prev) |
|---|
| 4972 | | - ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); |
|---|
| 5366 | + ret = INDIRECT_CALL_INET(pt_prev->func, ipv6_rcv, ip_rcv, skb, |
|---|
| 5367 | + skb->dev, pt_prev, orig_dev); |
|---|
| 4973 | 5368 | return ret; |
|---|
| 4974 | 5369 | } |
|---|
| 4975 | 5370 | |
|---|
| .. | .. |
|---|
| 4979 | 5374 | * |
|---|
| 4980 | 5375 | * More direct receive version of netif_receive_skb(). It should |
|---|
| 4981 | 5376 | * only be used by callers that have a need to skip RPS and Generic XDP. |
|---|
| 4982 | | - * Caller must also take care of handling if (page_is_)pfmemalloc. |
|---|
| 5377 | + * Caller must also take care of handling if ``(page_is_)pfmemalloc``. |
|---|
| 4983 | 5378 | * |
|---|
| 4984 | 5379 | * This function may only be called from softirq context and interrupts |
|---|
| 4985 | 5380 | * should be enabled. |
|---|
| .. | .. |
|---|
| 5011 | 5406 | if (list_empty(head)) |
|---|
| 5012 | 5407 | return; |
|---|
| 5013 | 5408 | if (pt_prev->list_func != NULL) |
|---|
| 5014 | | - pt_prev->list_func(head, pt_prev, orig_dev); |
|---|
| 5409 | + INDIRECT_CALL_INET(pt_prev->list_func, ipv6_list_rcv, |
|---|
| 5410 | + ip_list_rcv, head, pt_prev, orig_dev); |
|---|
| 5015 | 5411 | else |
|---|
| 5016 | 5412 | list_for_each_entry_safe(skb, next, head, list) { |
|---|
| 5017 | 5413 | skb_list_del_init(skb); |
|---|
| .. | .. |
|---|
| 5122 | 5518 | struct bpf_prog *new = xdp->prog; |
|---|
| 5123 | 5519 | int ret = 0; |
|---|
| 5124 | 5520 | |
|---|
| 5521 | + if (new) { |
|---|
| 5522 | + u32 i; |
|---|
| 5523 | + |
|---|
| 5524 | + mutex_lock(&new->aux->used_maps_mutex); |
|---|
| 5525 | + |
|---|
| 5526 | + /* generic XDP does not work with DEVMAPs that can |
|---|
| 5527 | + * have a bpf_prog installed on an entry |
|---|
| 5528 | + */ |
|---|
| 5529 | + for (i = 0; i < new->aux->used_map_cnt; i++) { |
|---|
| 5530 | + if (dev_map_can_have_prog(new->aux->used_maps[i]) || |
|---|
| 5531 | + cpu_map_prog_allowed(new->aux->used_maps[i])) { |
|---|
| 5532 | + mutex_unlock(&new->aux->used_maps_mutex); |
|---|
| 5533 | + return -EINVAL; |
|---|
| 5534 | + } |
|---|
| 5535 | + } |
|---|
| 5536 | + |
|---|
| 5537 | + mutex_unlock(&new->aux->used_maps_mutex); |
|---|
| 5538 | + } |
|---|
| 5539 | + |
|---|
| 5125 | 5540 | switch (xdp->command) { |
|---|
| 5126 | 5541 | case XDP_SETUP_PROG: |
|---|
| 5127 | 5542 | rcu_assign_pointer(dev->xdp_prog, new); |
|---|
| .. | .. |
|---|
| 5137 | 5552 | } |
|---|
| 5138 | 5553 | break; |
|---|
| 5139 | 5554 | |
|---|
| 5140 | | - case XDP_QUERY_PROG: |
|---|
| 5141 | | - xdp->prog_id = old ? old->aux->id : 0; |
|---|
| 5142 | | - break; |
|---|
| 5143 | | - |
|---|
| 5144 | 5555 | default: |
|---|
| 5145 | 5556 | ret = -EINVAL; |
|---|
| 5146 | 5557 | break; |
|---|
| .. | .. |
|---|
| 5153 | 5564 | { |
|---|
| 5154 | 5565 | int ret; |
|---|
| 5155 | 5566 | |
|---|
| 5156 | | - net_timestamp_check(netdev_tstamp_prequeue, skb); |
|---|
| 5567 | + net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb); |
|---|
| 5157 | 5568 | |
|---|
| 5158 | 5569 | if (skb_defer_rx_timestamp(skb)) |
|---|
| 5159 | 5570 | return NET_RX_SUCCESS; |
|---|
| 5160 | 5571 | |
|---|
| 5161 | 5572 | rcu_read_lock(); |
|---|
| 5162 | 5573 | #ifdef CONFIG_RPS |
|---|
| 5163 | | - if (static_key_false(&rps_needed)) { |
|---|
| 5574 | + if (static_branch_unlikely(&rps_needed)) { |
|---|
| 5164 | 5575 | struct rps_dev_flow voidflow, *rflow = &voidflow; |
|---|
| 5165 | 5576 | int cpu = get_rps_cpu(skb->dev, skb, &rflow); |
|---|
| 5166 | 5577 | |
|---|
| .. | .. |
|---|
| 5183 | 5594 | |
|---|
| 5184 | 5595 | INIT_LIST_HEAD(&sublist); |
|---|
| 5185 | 5596 | list_for_each_entry_safe(skb, next, head, list) { |
|---|
| 5186 | | - net_timestamp_check(netdev_tstamp_prequeue, skb); |
|---|
| 5597 | + net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb); |
|---|
| 5187 | 5598 | skb_list_del_init(skb); |
|---|
| 5188 | 5599 | if (!skb_defer_rx_timestamp(skb)) |
|---|
| 5189 | 5600 | list_add_tail(&skb->list, &sublist); |
|---|
| .. | .. |
|---|
| 5192 | 5603 | |
|---|
| 5193 | 5604 | rcu_read_lock(); |
|---|
| 5194 | 5605 | #ifdef CONFIG_RPS |
|---|
| 5195 | | - if (static_key_false(&rps_needed)) { |
|---|
| 5606 | + if (static_branch_unlikely(&rps_needed)) { |
|---|
| 5196 | 5607 | list_for_each_entry_safe(skb, next, head, list) { |
|---|
| 5197 | 5608 | struct rps_dev_flow voidflow, *rflow = &voidflow; |
|---|
| 5198 | 5609 | int cpu = get_rps_cpu(skb->dev, skb, &rflow); |
|---|
| .. | .. |
|---|
| 5226 | 5637 | */ |
|---|
| 5227 | 5638 | int netif_receive_skb(struct sk_buff *skb) |
|---|
| 5228 | 5639 | { |
|---|
| 5640 | + int ret; |
|---|
| 5641 | + |
|---|
| 5229 | 5642 | trace_netif_receive_skb_entry(skb); |
|---|
| 5230 | 5643 | |
|---|
| 5231 | | - return netif_receive_skb_internal(skb); |
|---|
| 5644 | + ret = netif_receive_skb_internal(skb); |
|---|
| 5645 | + trace_netif_receive_skb_exit(ret); |
|---|
| 5646 | + |
|---|
| 5647 | + return ret; |
|---|
| 5232 | 5648 | } |
|---|
| 5233 | 5649 | EXPORT_SYMBOL(netif_receive_skb); |
|---|
| 5234 | 5650 | |
|---|
| .. | .. |
|---|
| 5248 | 5664 | |
|---|
| 5249 | 5665 | if (list_empty(head)) |
|---|
| 5250 | 5666 | return; |
|---|
| 5251 | | - list_for_each_entry(skb, head, list) |
|---|
| 5252 | | - trace_netif_receive_skb_list_entry(skb); |
|---|
| 5667 | + if (trace_netif_receive_skb_list_entry_enabled()) { |
|---|
| 5668 | + list_for_each_entry(skb, head, list) |
|---|
| 5669 | + trace_netif_receive_skb_list_entry(skb); |
|---|
| 5670 | + } |
|---|
| 5253 | 5671 | netif_receive_skb_list_internal(head); |
|---|
| 5672 | + trace_netif_receive_skb_list_exit(0); |
|---|
| 5254 | 5673 | } |
|---|
| 5255 | 5674 | EXPORT_SYMBOL(netif_receive_skb_list); |
|---|
| 5256 | 5675 | |
|---|
| 5257 | | -DEFINE_PER_CPU(struct work_struct, flush_works); |
|---|
| 5676 | +static DEFINE_PER_CPU(struct work_struct, flush_works); |
|---|
| 5258 | 5677 | |
|---|
| 5259 | 5678 | /* Network device is going away, flush any packets still pending */ |
|---|
| 5260 | 5679 | static void flush_backlog(struct work_struct *work) |
|---|
| .. | .. |
|---|
| 5287 | 5706 | local_bh_enable(); |
|---|
| 5288 | 5707 | } |
|---|
| 5289 | 5708 | |
|---|
| 5709 | +static bool flush_required(int cpu) |
|---|
| 5710 | +{ |
|---|
| 5711 | +#if IS_ENABLED(CONFIG_RPS) |
|---|
| 5712 | + struct softnet_data *sd = &per_cpu(softnet_data, cpu); |
|---|
| 5713 | + bool do_flush; |
|---|
| 5714 | + |
|---|
| 5715 | + local_irq_disable(); |
|---|
| 5716 | + rps_lock(sd); |
|---|
| 5717 | + |
|---|
| 5718 | + /* as insertion into process_queue happens with the rps lock held, |
|---|
| 5719 | + * process_queue access may race only with dequeue |
|---|
| 5720 | + */ |
|---|
| 5721 | + do_flush = !skb_queue_empty(&sd->input_pkt_queue) || |
|---|
| 5722 | + !skb_queue_empty_lockless(&sd->process_queue); |
|---|
| 5723 | + rps_unlock(sd); |
|---|
| 5724 | + local_irq_enable(); |
|---|
| 5725 | + |
|---|
| 5726 | + return do_flush; |
|---|
| 5727 | +#endif |
|---|
| 5728 | + /* without RPS we can't safely check input_pkt_queue: during a |
|---|
| 5729 | + * concurrent remote skb_queue_splice() we can detect as empty both |
|---|
| 5730 | + * input_pkt_queue and process_queue even if the latter could end-up |
|---|
| 5731 | + * containing a lot of packets. |
|---|
| 5732 | + */ |
|---|
| 5733 | + return true; |
|---|
| 5734 | +} |
|---|
| 5735 | + |
|---|
| 5290 | 5736 | static void flush_all_backlogs(void) |
|---|
| 5291 | 5737 | { |
|---|
| 5738 | + static cpumask_t flush_cpus; |
|---|
| 5292 | 5739 | unsigned int cpu; |
|---|
| 5740 | + |
|---|
| 5741 | + /* since we are under rtnl lock protection we can use static data |
|---|
| 5742 | + * for the cpumask and avoid allocating on stack the possibly |
|---|
| 5743 | + * large mask |
|---|
| 5744 | + */ |
|---|
| 5745 | + ASSERT_RTNL(); |
|---|
| 5293 | 5746 | |
|---|
| 5294 | 5747 | get_online_cpus(); |
|---|
| 5295 | 5748 | |
|---|
| 5296 | | - for_each_online_cpu(cpu) |
|---|
| 5297 | | - queue_work_on(cpu, system_highpri_wq, |
|---|
| 5298 | | - per_cpu_ptr(&flush_works, cpu)); |
|---|
| 5749 | + cpumask_clear(&flush_cpus); |
|---|
| 5750 | + for_each_online_cpu(cpu) { |
|---|
| 5751 | + if (flush_required(cpu)) { |
|---|
| 5752 | + queue_work_on(cpu, system_highpri_wq, |
|---|
| 5753 | + per_cpu_ptr(&flush_works, cpu)); |
|---|
| 5754 | + cpumask_set_cpu(cpu, &flush_cpus); |
|---|
| 5755 | + } |
|---|
| 5756 | + } |
|---|
| 5299 | 5757 | |
|---|
| 5300 | | - for_each_online_cpu(cpu) |
|---|
| 5758 | + /* we can have in flight packet[s] on the cpus we are not flushing, |
|---|
| 5759 | + * synchronize_net() in unregister_netdevice_many() will take care of |
|---|
| 5760 | + * them |
|---|
| 5761 | + */ |
|---|
| 5762 | + for_each_cpu(cpu, &flush_cpus) |
|---|
| 5301 | 5763 | flush_work(per_cpu_ptr(&flush_works, cpu)); |
|---|
| 5302 | 5764 | |
|---|
| 5303 | 5765 | put_online_cpus(); |
|---|
| 5304 | 5766 | } |
|---|
| 5305 | 5767 | |
|---|
| 5306 | | -static int napi_gro_complete(struct sk_buff *skb) |
|---|
| 5768 | +/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ |
|---|
| 5769 | +static void gro_normal_list(struct napi_struct *napi) |
|---|
| 5770 | +{ |
|---|
| 5771 | + if (!napi->rx_count) |
|---|
| 5772 | + return; |
|---|
| 5773 | + netif_receive_skb_list_internal(&napi->rx_list); |
|---|
| 5774 | + INIT_LIST_HEAD(&napi->rx_list); |
|---|
| 5775 | + napi->rx_count = 0; |
|---|
| 5776 | +} |
|---|
| 5777 | + |
|---|
| 5778 | +/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, |
|---|
| 5779 | + * pass the whole batch up to the stack. |
|---|
| 5780 | + */ |
|---|
| 5781 | +static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, int segs) |
|---|
| 5782 | +{ |
|---|
| 5783 | + list_add_tail(&skb->list, &napi->rx_list); |
|---|
| 5784 | + napi->rx_count += segs; |
|---|
| 5785 | + if (napi->rx_count >= gro_normal_batch) |
|---|
| 5786 | + gro_normal_list(napi); |
|---|
| 5787 | +} |
|---|
| 5788 | + |
|---|
| 5789 | +INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int)); |
|---|
| 5790 | +INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int)); |
|---|
| 5791 | +static int napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb) |
|---|
| 5307 | 5792 | { |
|---|
| 5308 | 5793 | struct packet_offload *ptype; |
|---|
| 5309 | 5794 | __be16 type = skb->protocol; |
|---|
| .. | .. |
|---|
| 5322 | 5807 | if (ptype->type != type || !ptype->callbacks.gro_complete) |
|---|
| 5323 | 5808 | continue; |
|---|
| 5324 | 5809 | |
|---|
| 5325 | | - err = ptype->callbacks.gro_complete(skb, 0); |
|---|
| 5810 | + err = INDIRECT_CALL_INET(ptype->callbacks.gro_complete, |
|---|
| 5811 | + ipv6_gro_complete, inet_gro_complete, |
|---|
| 5812 | + skb, 0); |
|---|
| 5326 | 5813 | break; |
|---|
| 5327 | 5814 | } |
|---|
| 5328 | 5815 | rcu_read_unlock(); |
|---|
| .. | .. |
|---|
| 5334 | 5821 | } |
|---|
| 5335 | 5822 | |
|---|
| 5336 | 5823 | out: |
|---|
| 5337 | | - return netif_receive_skb_internal(skb); |
|---|
| 5824 | + gro_normal_one(napi, skb, NAPI_GRO_CB(skb)->count); |
|---|
| 5825 | + return NET_RX_SUCCESS; |
|---|
| 5338 | 5826 | } |
|---|
| 5339 | 5827 | |
|---|
| 5340 | 5828 | static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index, |
|---|
| .. | .. |
|---|
| 5346 | 5834 | list_for_each_entry_safe_reverse(skb, p, head, list) { |
|---|
| 5347 | 5835 | if (flush_old && NAPI_GRO_CB(skb)->age == jiffies) |
|---|
| 5348 | 5836 | return; |
|---|
| 5349 | | - list_del(&skb->list); |
|---|
| 5350 | | - skb->next = NULL; |
|---|
| 5351 | | - napi_gro_complete(skb); |
|---|
| 5837 | + skb_list_del_init(skb); |
|---|
| 5838 | + napi_gro_complete(napi, skb); |
|---|
| 5352 | 5839 | napi->gro_hash[index].count--; |
|---|
| 5353 | 5840 | } |
|---|
| 5354 | 5841 | |
|---|
| .. | .. |
|---|
| 5362 | 5849 | */ |
|---|
| 5363 | 5850 | void napi_gro_flush(struct napi_struct *napi, bool flush_old) |
|---|
| 5364 | 5851 | { |
|---|
| 5365 | | - u32 i; |
|---|
| 5852 | + unsigned long bitmask = napi->gro_bitmask; |
|---|
| 5853 | + unsigned int i, base = ~0U; |
|---|
| 5366 | 5854 | |
|---|
| 5367 | | - for (i = 0; i < GRO_HASH_BUCKETS; i++) { |
|---|
| 5368 | | - if (test_bit(i, &napi->gro_bitmask)) |
|---|
| 5369 | | - __napi_gro_flush_chain(napi, i, flush_old); |
|---|
| 5855 | + while ((i = ffs(bitmask)) != 0) { |
|---|
| 5856 | + bitmask >>= i; |
|---|
| 5857 | + base += i; |
|---|
| 5858 | + __napi_gro_flush_chain(napi, base, flush_old); |
|---|
| 5370 | 5859 | } |
|---|
| 5371 | 5860 | } |
|---|
| 5372 | 5861 | EXPORT_SYMBOL(napi_gro_flush); |
|---|
| .. | .. |
|---|
| 5391 | 5880 | } |
|---|
| 5392 | 5881 | |
|---|
| 5393 | 5882 | diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; |
|---|
| 5394 | | - diffs |= p->vlan_tci ^ skb->vlan_tci; |
|---|
| 5883 | + diffs |= skb_vlan_tag_present(p) ^ skb_vlan_tag_present(skb); |
|---|
| 5884 | + if (skb_vlan_tag_present(p)) |
|---|
| 5885 | + diffs |= skb_vlan_tag_get(p) ^ skb_vlan_tag_get(skb); |
|---|
| 5395 | 5886 | diffs |= skb_metadata_dst_cmp(p, skb); |
|---|
| 5396 | 5887 | diffs |= skb_metadata_differs(p, skb); |
|---|
| 5397 | 5888 | if (maclen == ETH_HLEN) |
|---|
| .. | .. |
|---|
| 5401 | 5892 | diffs = memcmp(skb_mac_header(p), |
|---|
| 5402 | 5893 | skb_mac_header(skb), |
|---|
| 5403 | 5894 | maclen); |
|---|
| 5895 | + |
|---|
| 5896 | + diffs |= skb_get_nfct(p) ^ skb_get_nfct(skb); |
|---|
| 5897 | +#if IS_ENABLED(CONFIG_SKB_EXTENSIONS) && IS_ENABLED(CONFIG_NET_TC_SKB_EXT) |
|---|
| 5898 | + if (!diffs) { |
|---|
| 5899 | + struct tc_skb_ext *skb_ext = skb_ext_find(skb, TC_SKB_EXT); |
|---|
| 5900 | + struct tc_skb_ext *p_ext = skb_ext_find(p, TC_SKB_EXT); |
|---|
| 5901 | + |
|---|
| 5902 | + diffs |= (!!p_ext) ^ (!!skb_ext); |
|---|
| 5903 | + if (!diffs && unlikely(skb_ext)) |
|---|
| 5904 | + diffs |= p_ext->chain ^ skb_ext->chain; |
|---|
| 5905 | + } |
|---|
| 5906 | +#endif |
|---|
| 5907 | + |
|---|
| 5404 | 5908 | NAPI_GRO_CB(p)->same_flow = !diffs; |
|---|
| 5405 | 5909 | } |
|---|
| 5406 | 5910 | |
|---|
| 5407 | 5911 | return head; |
|---|
| 5408 | 5912 | } |
|---|
| 5409 | 5913 | |
|---|
| 5410 | | -static void skb_gro_reset_offset(struct sk_buff *skb) |
|---|
| 5914 | +static inline void skb_gro_reset_offset(struct sk_buff *skb, u32 nhoff) |
|---|
| 5411 | 5915 | { |
|---|
| 5412 | 5916 | const struct skb_shared_info *pinfo = skb_shinfo(skb); |
|---|
| 5413 | 5917 | const skb_frag_t *frag0 = &pinfo->frags[0]; |
|---|
| .. | .. |
|---|
| 5416 | 5920 | NAPI_GRO_CB(skb)->frag0 = NULL; |
|---|
| 5417 | 5921 | NAPI_GRO_CB(skb)->frag0_len = 0; |
|---|
| 5418 | 5922 | |
|---|
| 5419 | | - if (skb_mac_header(skb) == skb_tail_pointer(skb) && |
|---|
| 5420 | | - pinfo->nr_frags && |
|---|
| 5923 | + if (!skb_headlen(skb) && pinfo->nr_frags && |
|---|
| 5421 | 5924 | !PageHighMem(skb_frag_page(frag0)) && |
|---|
| 5422 | | - (!NET_IP_ALIGN || !(skb_frag_off(frag0) & 3))) { |
|---|
| 5925 | + (!NET_IP_ALIGN || !((skb_frag_off(frag0) + nhoff) & 3))) { |
|---|
| 5423 | 5926 | NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); |
|---|
| 5424 | 5927 | NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int, |
|---|
| 5425 | 5928 | skb_frag_size(frag0), |
|---|
| .. | .. |
|---|
| 5438 | 5941 | skb->data_len -= grow; |
|---|
| 5439 | 5942 | skb->tail += grow; |
|---|
| 5440 | 5943 | |
|---|
| 5441 | | - pinfo->frags[0].page_offset += grow; |
|---|
| 5944 | + skb_frag_off_add(&pinfo->frags[0], grow); |
|---|
| 5442 | 5945 | skb_frag_size_sub(&pinfo->frags[0], grow); |
|---|
| 5443 | 5946 | |
|---|
| 5444 | 5947 | if (unlikely(!skb_frag_size(&pinfo->frags[0]))) { |
|---|
| .. | .. |
|---|
| 5448 | 5951 | } |
|---|
| 5449 | 5952 | } |
|---|
| 5450 | 5953 | |
|---|
| 5451 | | -static void gro_flush_oldest(struct list_head *head) |
|---|
| 5954 | +static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head) |
|---|
| 5452 | 5955 | { |
|---|
| 5453 | 5956 | struct sk_buff *oldest; |
|---|
| 5454 | 5957 | |
|---|
| .. | .. |
|---|
| 5463 | 5966 | /* Do not adjust napi->gro_hash[].count, caller is adding a new |
|---|
| 5464 | 5967 | * SKB to the chain. |
|---|
| 5465 | 5968 | */ |
|---|
| 5466 | | - list_del(&oldest->list); |
|---|
| 5467 | | - oldest->next = NULL; |
|---|
| 5468 | | - napi_gro_complete(oldest); |
|---|
| 5969 | + skb_list_del_init(oldest); |
|---|
| 5970 | + napi_gro_complete(napi, oldest); |
|---|
| 5469 | 5971 | } |
|---|
| 5470 | 5972 | |
|---|
| 5973 | +INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *, |
|---|
| 5974 | + struct sk_buff *)); |
|---|
| 5975 | +INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *, |
|---|
| 5976 | + struct sk_buff *)); |
|---|
| 5471 | 5977 | static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) |
|---|
| 5472 | 5978 | { |
|---|
| 5473 | 5979 | u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1); |
|---|
| .. | .. |
|---|
| 5517 | 6023 | NAPI_GRO_CB(skb)->csum_valid = 0; |
|---|
| 5518 | 6024 | } |
|---|
| 5519 | 6025 | |
|---|
| 5520 | | - pp = ptype->callbacks.gro_receive(gro_head, skb); |
|---|
| 6026 | + pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive, |
|---|
| 6027 | + ipv6_gro_receive, inet_gro_receive, |
|---|
| 6028 | + gro_head, skb); |
|---|
| 5521 | 6029 | break; |
|---|
| 5522 | 6030 | } |
|---|
| 5523 | 6031 | rcu_read_unlock(); |
|---|
| .. | .. |
|---|
| 5525 | 6033 | if (&ptype->list == head) |
|---|
| 5526 | 6034 | goto normal; |
|---|
| 5527 | 6035 | |
|---|
| 5528 | | - if (IS_ERR(pp) && PTR_ERR(pp) == -EINPROGRESS) { |
|---|
| 6036 | + if (PTR_ERR(pp) == -EINPROGRESS) { |
|---|
| 5529 | 6037 | ret = GRO_CONSUMED; |
|---|
| 5530 | 6038 | goto ok; |
|---|
| 5531 | 6039 | } |
|---|
| .. | .. |
|---|
| 5534 | 6042 | ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED; |
|---|
| 5535 | 6043 | |
|---|
| 5536 | 6044 | if (pp) { |
|---|
| 5537 | | - list_del(&pp->list); |
|---|
| 5538 | | - pp->next = NULL; |
|---|
| 5539 | | - napi_gro_complete(pp); |
|---|
| 6045 | + skb_list_del_init(pp); |
|---|
| 6046 | + napi_gro_complete(napi, pp); |
|---|
| 5540 | 6047 | napi->gro_hash[hash].count--; |
|---|
| 5541 | 6048 | } |
|---|
| 5542 | 6049 | |
|---|
| .. | .. |
|---|
| 5547 | 6054 | goto normal; |
|---|
| 5548 | 6055 | |
|---|
| 5549 | 6056 | if (unlikely(napi->gro_hash[hash].count >= MAX_GRO_SKBS)) { |
|---|
| 5550 | | - gro_flush_oldest(gro_head); |
|---|
| 6057 | + gro_flush_oldest(napi, gro_head); |
|---|
| 5551 | 6058 | } else { |
|---|
| 5552 | 6059 | napi->gro_hash[hash].count++; |
|---|
| 5553 | 6060 | } |
|---|
| .. | .. |
|---|
| 5608 | 6115 | static void napi_skb_free_stolen_head(struct sk_buff *skb) |
|---|
| 5609 | 6116 | { |
|---|
| 5610 | 6117 | skb_dst_drop(skb); |
|---|
| 5611 | | - secpath_reset(skb); |
|---|
| 6118 | + skb_ext_put(skb); |
|---|
| 5612 | 6119 | kmem_cache_free(skbuff_head_cache, skb); |
|---|
| 5613 | 6120 | } |
|---|
| 5614 | 6121 | |
|---|
| 5615 | | -static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) |
|---|
| 6122 | +static gro_result_t napi_skb_finish(struct napi_struct *napi, |
|---|
| 6123 | + struct sk_buff *skb, |
|---|
| 6124 | + gro_result_t ret) |
|---|
| 5616 | 6125 | { |
|---|
| 5617 | 6126 | switch (ret) { |
|---|
| 5618 | 6127 | case GRO_NORMAL: |
|---|
| 5619 | | - if (netif_receive_skb_internal(skb)) |
|---|
| 5620 | | - ret = GRO_DROP; |
|---|
| 6128 | + gro_normal_one(napi, skb, 1); |
|---|
| 5621 | 6129 | break; |
|---|
| 5622 | 6130 | |
|---|
| 5623 | 6131 | case GRO_DROP: |
|---|
| .. | .. |
|---|
| 5642 | 6150 | |
|---|
| 5643 | 6151 | gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) |
|---|
| 5644 | 6152 | { |
|---|
| 6153 | + gro_result_t ret; |
|---|
| 6154 | + |
|---|
| 5645 | 6155 | skb_mark_napi_id(skb, napi); |
|---|
| 5646 | 6156 | trace_napi_gro_receive_entry(skb); |
|---|
| 5647 | 6157 | |
|---|
| 5648 | | - skb_gro_reset_offset(skb); |
|---|
| 6158 | + skb_gro_reset_offset(skb, 0); |
|---|
| 5649 | 6159 | |
|---|
| 5650 | | - return napi_skb_finish(dev_gro_receive(napi, skb), skb); |
|---|
| 6160 | + ret = napi_skb_finish(napi, skb, dev_gro_receive(napi, skb)); |
|---|
| 6161 | + trace_napi_gro_receive_exit(ret); |
|---|
| 6162 | + |
|---|
| 6163 | + return ret; |
|---|
| 5651 | 6164 | } |
|---|
| 5652 | 6165 | EXPORT_SYMBOL(napi_gro_receive); |
|---|
| 5653 | 6166 | |
|---|
| .. | .. |
|---|
| 5660 | 6173 | __skb_pull(skb, skb_headlen(skb)); |
|---|
| 5661 | 6174 | /* restore the reserve we had after netdev_alloc_skb_ip_align() */ |
|---|
| 5662 | 6175 | skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb)); |
|---|
| 5663 | | - skb->vlan_tci = 0; |
|---|
| 6176 | + __vlan_hwaccel_clear_tag(skb); |
|---|
| 5664 | 6177 | skb->dev = napi->dev; |
|---|
| 5665 | 6178 | skb->skb_iif = 0; |
|---|
| 5666 | 6179 | |
|---|
| .. | .. |
|---|
| 5670 | 6183 | skb->encapsulation = 0; |
|---|
| 5671 | 6184 | skb_shinfo(skb)->gso_type = 0; |
|---|
| 5672 | 6185 | skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); |
|---|
| 5673 | | - secpath_reset(skb); |
|---|
| 6186 | + skb_ext_reset(skb); |
|---|
| 6187 | + nf_reset_ct(skb); |
|---|
| 5674 | 6188 | |
|---|
| 5675 | 6189 | napi->skb = skb; |
|---|
| 5676 | 6190 | } |
|---|
| .. | .. |
|---|
| 5699 | 6213 | case GRO_HELD: |
|---|
| 5700 | 6214 | __skb_push(skb, ETH_HLEN); |
|---|
| 5701 | 6215 | skb->protocol = eth_type_trans(skb, skb->dev); |
|---|
| 5702 | | - if (ret == GRO_NORMAL && netif_receive_skb_internal(skb)) |
|---|
| 5703 | | - ret = GRO_DROP; |
|---|
| 6216 | + if (ret == GRO_NORMAL) |
|---|
| 6217 | + gro_normal_one(napi, skb, 1); |
|---|
| 5704 | 6218 | break; |
|---|
| 5705 | 6219 | |
|---|
| 5706 | 6220 | case GRO_DROP: |
|---|
| .. | .. |
|---|
| 5735 | 6249 | napi->skb = NULL; |
|---|
| 5736 | 6250 | |
|---|
| 5737 | 6251 | skb_reset_mac_header(skb); |
|---|
| 5738 | | - skb_gro_reset_offset(skb); |
|---|
| 6252 | + skb_gro_reset_offset(skb, hlen); |
|---|
| 5739 | 6253 | |
|---|
| 5740 | 6254 | if (unlikely(skb_gro_header_hard(skb, hlen))) { |
|---|
| 5741 | 6255 | eth = skb_gro_header_slow(skb, hlen, 0); |
|---|
| .. | .. |
|---|
| 5765 | 6279 | |
|---|
| 5766 | 6280 | gro_result_t napi_gro_frags(struct napi_struct *napi) |
|---|
| 5767 | 6281 | { |
|---|
| 6282 | + gro_result_t ret; |
|---|
| 5768 | 6283 | struct sk_buff *skb = napi_frags_skb(napi); |
|---|
| 5769 | 6284 | |
|---|
| 5770 | 6285 | if (!skb) |
|---|
| .. | .. |
|---|
| 5772 | 6287 | |
|---|
| 5773 | 6288 | trace_napi_gro_frags_entry(skb); |
|---|
| 5774 | 6289 | |
|---|
| 5775 | | - return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb)); |
|---|
| 6290 | + ret = napi_frags_finish(napi, skb, dev_gro_receive(napi, skb)); |
|---|
| 6291 | + trace_napi_gro_frags_exit(ret); |
|---|
| 6292 | + |
|---|
| 6293 | + return ret; |
|---|
| 5776 | 6294 | } |
|---|
| 5777 | 6295 | EXPORT_SYMBOL(napi_gro_frags); |
|---|
| 5778 | 6296 | |
|---|
| .. | .. |
|---|
| 5788 | 6306 | |
|---|
| 5789 | 6307 | /* NAPI_GRO_CB(skb)->csum holds pseudo checksum */ |
|---|
| 5790 | 6308 | sum = csum_fold(csum_add(NAPI_GRO_CB(skb)->csum, wsum)); |
|---|
| 6309 | + /* See comments in __skb_checksum_complete(). */ |
|---|
| 5791 | 6310 | if (likely(!sum)) { |
|---|
| 5792 | 6311 | if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && |
|---|
| 5793 | 6312 | !skb->csum_complete_sw) |
|---|
| 5794 | | - netdev_rx_csum_fault(skb->dev); |
|---|
| 6313 | + netdev_rx_csum_fault(skb->dev, skb); |
|---|
| 5795 | 6314 | } |
|---|
| 5796 | 6315 | |
|---|
| 5797 | 6316 | NAPI_GRO_CB(skb)->csum = wsum; |
|---|
| .. | .. |
|---|
| 5827 | 6346 | sd->rps_ipi_list = NULL; |
|---|
| 5828 | 6347 | |
|---|
| 5829 | 6348 | local_irq_enable(); |
|---|
| 5830 | | - preempt_check_resched_rt(); |
|---|
| 5831 | 6349 | |
|---|
| 5832 | 6350 | /* Send pending IPI's to kick RPS processing on remote cpus. */ |
|---|
| 5833 | 6351 | net_rps_send_ipi(remsd); |
|---|
| 5834 | 6352 | } else |
|---|
| 5835 | 6353 | #endif |
|---|
| 5836 | 6354 | local_irq_enable(); |
|---|
| 5837 | | - preempt_check_resched_rt(); |
|---|
| 5838 | 6355 | } |
|---|
| 5839 | 6356 | |
|---|
| 5840 | 6357 | static bool sd_has_rps_ipi_waiting(struct softnet_data *sd) |
|---|
| .. | .. |
|---|
| 5860 | 6377 | net_rps_action_and_irq_enable(sd); |
|---|
| 5861 | 6378 | } |
|---|
| 5862 | 6379 | |
|---|
| 5863 | | - napi->weight = dev_rx_weight; |
|---|
| 6380 | + napi->weight = READ_ONCE(dev_rx_weight); |
|---|
| 5864 | 6381 | while (again) { |
|---|
| 5865 | 6382 | struct sk_buff *skb; |
|---|
| 5866 | 6383 | |
|---|
| 5867 | | - local_irq_disable(); |
|---|
| 5868 | 6384 | while ((skb = __skb_dequeue(&sd->process_queue))) { |
|---|
| 5869 | | - local_irq_enable(); |
|---|
| 5870 | 6385 | rcu_read_lock(); |
|---|
| 5871 | 6386 | __netif_receive_skb(skb); |
|---|
| 5872 | 6387 | rcu_read_unlock(); |
|---|
| 5873 | 6388 | input_queue_head_incr(sd); |
|---|
| 5874 | 6389 | if (++work >= quota) |
|---|
| 5875 | | - goto state_changed; |
|---|
| 5876 | | - local_irq_disable(); |
|---|
| 6390 | + return work; |
|---|
| 6391 | + |
|---|
| 5877 | 6392 | } |
|---|
| 5878 | 6393 | |
|---|
| 6394 | + local_irq_disable(); |
|---|
| 5879 | 6395 | rps_lock(sd); |
|---|
| 5880 | 6396 | if (skb_queue_empty(&sd->input_pkt_queue)) { |
|---|
| 5881 | 6397 | /* |
|---|
| .. | .. |
|---|
| 5896 | 6412 | local_irq_enable(); |
|---|
| 5897 | 6413 | } |
|---|
| 5898 | 6414 | |
|---|
| 5899 | | -state_changed: |
|---|
| 5900 | | - napi_gro_flush(napi, false); |
|---|
| 5901 | | - sd->current_napi = NULL; |
|---|
| 5902 | | - |
|---|
| 5903 | 6415 | return work; |
|---|
| 5904 | 6416 | } |
|---|
| 5905 | 6417 | |
|---|
| .. | .. |
|---|
| 5917 | 6429 | local_irq_save(flags); |
|---|
| 5918 | 6430 | ____napi_schedule(this_cpu_ptr(&softnet_data), n); |
|---|
| 5919 | 6431 | local_irq_restore(flags); |
|---|
| 5920 | | - preempt_check_resched_rt(); |
|---|
| 5921 | 6432 | } |
|---|
| 5922 | 6433 | EXPORT_SYMBOL(__napi_schedule); |
|---|
| 5923 | 6434 | |
|---|
| .. | .. |
|---|
| 5926 | 6437 | * @n: napi context |
|---|
| 5927 | 6438 | * |
|---|
| 5928 | 6439 | * Test if NAPI routine is already running, and if not mark |
|---|
| 5929 | | - * it as running. This is used as a condition variable |
|---|
| 6440 | + * it as running. This is used as a condition variable to |
|---|
| 5930 | 6441 | * insure only one NAPI poll instance runs. We also make |
|---|
| 5931 | 6442 | * sure there is no pending NAPI disable. |
|---|
| 5932 | 6443 | */ |
|---|
| .. | .. |
|---|
| 5954 | 6465 | } |
|---|
| 5955 | 6466 | EXPORT_SYMBOL(napi_schedule_prep); |
|---|
| 5956 | 6467 | |
|---|
| 5957 | | -#ifndef CONFIG_PREEMPT_RT_FULL |
|---|
| 5958 | 6468 | /** |
|---|
| 5959 | 6469 | * __napi_schedule_irqoff - schedule for receive |
|---|
| 5960 | 6470 | * @n: entry to schedule |
|---|
| .. | .. |
|---|
| 5973 | 6483 | __napi_schedule(n); |
|---|
| 5974 | 6484 | } |
|---|
| 5975 | 6485 | EXPORT_SYMBOL(__napi_schedule_irqoff); |
|---|
| 5976 | | -#endif |
|---|
| 5977 | 6486 | |
|---|
| 5978 | 6487 | bool napi_complete_done(struct napi_struct *n, int work_done) |
|---|
| 5979 | 6488 | { |
|---|
| 5980 | | - unsigned long flags, val, new; |
|---|
| 6489 | + unsigned long flags, val, new, timeout = 0; |
|---|
| 6490 | + bool ret = true; |
|---|
| 5981 | 6491 | |
|---|
| 5982 | 6492 | /* |
|---|
| 5983 | 6493 | * 1) Don't let napi dequeue from the cpu poll list |
|---|
| .. | .. |
|---|
| 5989 | 6499 | NAPIF_STATE_IN_BUSY_POLL))) |
|---|
| 5990 | 6500 | return false; |
|---|
| 5991 | 6501 | |
|---|
| 6502 | + if (work_done) { |
|---|
| 6503 | + if (n->gro_bitmask) |
|---|
| 6504 | + timeout = READ_ONCE(n->dev->gro_flush_timeout); |
|---|
| 6505 | + n->defer_hard_irqs_count = READ_ONCE(n->dev->napi_defer_hard_irqs); |
|---|
| 6506 | + } |
|---|
| 6507 | + if (n->defer_hard_irqs_count > 0) { |
|---|
| 6508 | + n->defer_hard_irqs_count--; |
|---|
| 6509 | + timeout = READ_ONCE(n->dev->gro_flush_timeout); |
|---|
| 6510 | + if (timeout) |
|---|
| 6511 | + ret = false; |
|---|
| 6512 | + } |
|---|
| 5992 | 6513 | if (n->gro_bitmask) { |
|---|
| 5993 | | - unsigned long timeout = 0; |
|---|
| 5994 | | - |
|---|
| 5995 | | - if (work_done) |
|---|
| 5996 | | - timeout = n->dev->gro_flush_timeout; |
|---|
| 5997 | | - |
|---|
| 5998 | 6514 | /* When the NAPI instance uses a timeout and keeps postponing |
|---|
| 5999 | 6515 | * it, we need to bound somehow the time packets are kept in |
|---|
| 6000 | 6516 | * the GRO layer |
|---|
| 6001 | 6517 | */ |
|---|
| 6002 | 6518 | napi_gro_flush(n, !!timeout); |
|---|
| 6003 | | - if (timeout) |
|---|
| 6004 | | - hrtimer_start(&n->timer, ns_to_ktime(timeout), |
|---|
| 6005 | | - HRTIMER_MODE_REL_PINNED); |
|---|
| 6006 | 6519 | } |
|---|
| 6007 | | - if (unlikely(!list_empty(&n->poll_list))) { |
|---|
| 6008 | | - struct softnet_data *sd = this_cpu_ptr(&softnet_data); |
|---|
| 6009 | 6520 | |
|---|
| 6521 | + gro_normal_list(n); |
|---|
| 6522 | + |
|---|
| 6523 | + if (unlikely(!list_empty(&n->poll_list))) { |
|---|
| 6010 | 6524 | /* If n->poll_list is not empty, we need to mask irqs */ |
|---|
| 6011 | 6525 | local_irq_save(flags); |
|---|
| 6012 | 6526 | list_del_init(&n->poll_list); |
|---|
| 6013 | | - sd->current_napi = NULL; |
|---|
| 6014 | 6527 | local_irq_restore(flags); |
|---|
| 6015 | 6528 | } |
|---|
| 6016 | 6529 | |
|---|
| .. | .. |
|---|
| 6034 | 6547 | return false; |
|---|
| 6035 | 6548 | } |
|---|
| 6036 | 6549 | |
|---|
| 6037 | | - return true; |
|---|
| 6550 | + if (timeout) |
|---|
| 6551 | + hrtimer_start(&n->timer, ns_to_ktime(timeout), |
|---|
| 6552 | + HRTIMER_MODE_REL_PINNED); |
|---|
| 6553 | + return ret; |
|---|
| 6038 | 6554 | } |
|---|
| 6039 | 6555 | EXPORT_SYMBOL(napi_complete_done); |
|---|
| 6040 | 6556 | |
|---|
| .. | .. |
|---|
| 6077 | 6593 | * Ideally, a new ndo_busy_poll_stop() could avoid another round. |
|---|
| 6078 | 6594 | */ |
|---|
| 6079 | 6595 | rc = napi->poll(napi, BUSY_POLL_BUDGET); |
|---|
| 6596 | + /* We can't gro_normal_list() here, because napi->poll() might have |
|---|
| 6597 | + * rearmed the napi (napi_complete_done()) in which case it could |
|---|
| 6598 | + * already be running on another CPU. |
|---|
| 6599 | + */ |
|---|
| 6080 | 6600 | trace_napi_poll(napi, rc, BUSY_POLL_BUDGET); |
|---|
| 6081 | 6601 | netpoll_poll_unlock(have_poll_lock); |
|---|
| 6082 | | - if (rc == BUSY_POLL_BUDGET) |
|---|
| 6602 | + if (rc == BUSY_POLL_BUDGET) { |
|---|
| 6603 | + /* As the whole budget was spent, we still own the napi so can |
|---|
| 6604 | + * safely handle the rx_list. |
|---|
| 6605 | + */ |
|---|
| 6606 | + gro_normal_list(napi); |
|---|
| 6083 | 6607 | __napi_schedule(napi); |
|---|
| 6608 | + } |
|---|
| 6084 | 6609 | local_bh_enable(); |
|---|
| 6085 | 6610 | } |
|---|
| 6086 | 6611 | |
|---|
| .. | .. |
|---|
| 6125 | 6650 | } |
|---|
| 6126 | 6651 | work = napi_poll(napi, BUSY_POLL_BUDGET); |
|---|
| 6127 | 6652 | trace_napi_poll(napi, work, BUSY_POLL_BUDGET); |
|---|
| 6653 | + gro_normal_list(napi); |
|---|
| 6128 | 6654 | count: |
|---|
| 6129 | 6655 | if (work > 0) |
|---|
| 6130 | 6656 | __NET_ADD_STATS(dev_net(napi->dev), |
|---|
| .. | .. |
|---|
| 6158 | 6684 | |
|---|
| 6159 | 6685 | static void napi_hash_add(struct napi_struct *napi) |
|---|
| 6160 | 6686 | { |
|---|
| 6161 | | - if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state) || |
|---|
| 6162 | | - test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) |
|---|
| 6687 | + if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state)) |
|---|
| 6163 | 6688 | return; |
|---|
| 6164 | 6689 | |
|---|
| 6165 | 6690 | spin_lock(&napi_hash_lock); |
|---|
| .. | .. |
|---|
| 6180 | 6705 | /* Warning : caller is responsible to make sure rcu grace period |
|---|
| 6181 | 6706 | * is respected before freeing memory containing @napi |
|---|
| 6182 | 6707 | */ |
|---|
| 6183 | | -bool napi_hash_del(struct napi_struct *napi) |
|---|
| 6708 | +static void napi_hash_del(struct napi_struct *napi) |
|---|
| 6184 | 6709 | { |
|---|
| 6185 | | - bool rcu_sync_needed = false; |
|---|
| 6186 | | - |
|---|
| 6187 | 6710 | spin_lock(&napi_hash_lock); |
|---|
| 6188 | 6711 | |
|---|
| 6189 | | - if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state)) { |
|---|
| 6190 | | - rcu_sync_needed = true; |
|---|
| 6191 | | - hlist_del_rcu(&napi->napi_hash_node); |
|---|
| 6192 | | - } |
|---|
| 6712 | + hlist_del_init_rcu(&napi->napi_hash_node); |
|---|
| 6713 | + |
|---|
| 6193 | 6714 | spin_unlock(&napi_hash_lock); |
|---|
| 6194 | | - return rcu_sync_needed; |
|---|
| 6195 | 6715 | } |
|---|
| 6196 | | -EXPORT_SYMBOL_GPL(napi_hash_del); |
|---|
| 6197 | 6716 | |
|---|
| 6198 | 6717 | static enum hrtimer_restart napi_watchdog(struct hrtimer *timer) |
|---|
| 6199 | 6718 | { |
|---|
| .. | .. |
|---|
| 6204 | 6723 | /* Note : we use a relaxed variant of napi_schedule_prep() not setting |
|---|
| 6205 | 6724 | * NAPI_STATE_MISSED, since we do not react to a device IRQ. |
|---|
| 6206 | 6725 | */ |
|---|
| 6207 | | - if (napi->gro_bitmask && !napi_disable_pending(napi) && |
|---|
| 6726 | + if (!napi_disable_pending(napi) && |
|---|
| 6208 | 6727 | !test_and_set_bit(NAPI_STATE_SCHED, &napi->state)) |
|---|
| 6209 | 6728 | __napi_schedule_irqoff(napi); |
|---|
| 6210 | 6729 | |
|---|
| .. | .. |
|---|
| 6225 | 6744 | void netif_napi_add(struct net_device *dev, struct napi_struct *napi, |
|---|
| 6226 | 6745 | int (*poll)(struct napi_struct *, int), int weight) |
|---|
| 6227 | 6746 | { |
|---|
| 6747 | + if (WARN_ON(test_and_set_bit(NAPI_STATE_LISTED, &napi->state))) |
|---|
| 6748 | + return; |
|---|
| 6749 | + |
|---|
| 6228 | 6750 | INIT_LIST_HEAD(&napi->poll_list); |
|---|
| 6751 | + INIT_HLIST_NODE(&napi->napi_hash_node); |
|---|
| 6229 | 6752 | hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); |
|---|
| 6230 | 6753 | napi->timer.function = napi_watchdog; |
|---|
| 6231 | 6754 | init_gro_hash(napi); |
|---|
| 6232 | 6755 | napi->skb = NULL; |
|---|
| 6756 | + INIT_LIST_HEAD(&napi->rx_list); |
|---|
| 6757 | + napi->rx_count = 0; |
|---|
| 6233 | 6758 | napi->poll = poll; |
|---|
| 6234 | 6759 | if (weight > NAPI_POLL_WEIGHT) |
|---|
| 6235 | | - pr_err_once("netif_napi_add() called with weight %d on device %s\n", |
|---|
| 6236 | | - weight, dev->name); |
|---|
| 6760 | + netdev_err_once(dev, "%s() called with weight %d\n", __func__, |
|---|
| 6761 | + weight); |
|---|
| 6237 | 6762 | napi->weight = weight; |
|---|
| 6238 | 6763 | napi->dev = dev; |
|---|
| 6239 | 6764 | #ifdef CONFIG_NETPOLL |
|---|
| .. | .. |
|---|
| 6276 | 6801 | } |
|---|
| 6277 | 6802 | |
|---|
| 6278 | 6803 | /* Must be called in process context */ |
|---|
| 6279 | | -void netif_napi_del(struct napi_struct *napi) |
|---|
| 6804 | +void __netif_napi_del(struct napi_struct *napi) |
|---|
| 6280 | 6805 | { |
|---|
| 6281 | | - might_sleep(); |
|---|
| 6282 | | - if (napi_hash_del(napi)) |
|---|
| 6283 | | - synchronize_net(); |
|---|
| 6284 | | - list_del_init(&napi->dev_list); |
|---|
| 6806 | + if (!test_and_clear_bit(NAPI_STATE_LISTED, &napi->state)) |
|---|
| 6807 | + return; |
|---|
| 6808 | + |
|---|
| 6809 | + napi_hash_del(napi); |
|---|
| 6810 | + list_del_rcu(&napi->dev_list); |
|---|
| 6285 | 6811 | napi_free_frags(napi); |
|---|
| 6286 | 6812 | |
|---|
| 6287 | 6813 | flush_gro_hash(napi); |
|---|
| 6288 | 6814 | napi->gro_bitmask = 0; |
|---|
| 6289 | 6815 | } |
|---|
| 6290 | | -EXPORT_SYMBOL(netif_napi_del); |
|---|
| 6291 | | - |
|---|
| 6292 | | -struct napi_struct *get_current_napi_context(void) |
|---|
| 6293 | | -{ |
|---|
| 6294 | | - struct softnet_data *sd = this_cpu_ptr(&softnet_data); |
|---|
| 6295 | | - |
|---|
| 6296 | | - return sd->current_napi; |
|---|
| 6297 | | -} |
|---|
| 6298 | | -EXPORT_SYMBOL(get_current_napi_context); |
|---|
| 6816 | +EXPORT_SYMBOL(__netif_napi_del); |
|---|
| 6299 | 6817 | |
|---|
| 6300 | 6818 | static int napi_poll(struct napi_struct *n, struct list_head *repoll) |
|---|
| 6301 | 6819 | { |
|---|
| .. | .. |
|---|
| 6316 | 6834 | */ |
|---|
| 6317 | 6835 | work = 0; |
|---|
| 6318 | 6836 | if (test_bit(NAPI_STATE_SCHED, &n->state)) { |
|---|
| 6319 | | - struct softnet_data *sd = this_cpu_ptr(&softnet_data); |
|---|
| 6320 | | - |
|---|
| 6321 | | - sd->current_napi = n; |
|---|
| 6322 | 6837 | work = n->poll(n, weight); |
|---|
| 6323 | 6838 | trace_napi_poll(n, work, weight); |
|---|
| 6324 | 6839 | } |
|---|
| 6325 | 6840 | |
|---|
| 6326 | | - WARN_ON_ONCE(work > weight); |
|---|
| 6841 | + if (unlikely(work > weight)) |
|---|
| 6842 | + pr_err_once("NAPI poll function %pS returned %d, exceeding its budget of %d.\n", |
|---|
| 6843 | + n->poll, work, weight); |
|---|
| 6327 | 6844 | |
|---|
| 6328 | 6845 | if (likely(work < weight)) |
|---|
| 6329 | 6846 | goto out_unlock; |
|---|
| .. | .. |
|---|
| 6344 | 6861 | */ |
|---|
| 6345 | 6862 | napi_gro_flush(n, HZ >= 1000); |
|---|
| 6346 | 6863 | } |
|---|
| 6864 | + |
|---|
| 6865 | + gro_normal_list(n); |
|---|
| 6347 | 6866 | |
|---|
| 6348 | 6867 | /* Some drivers may have called napi_schedule |
|---|
| 6349 | 6868 | * prior to exhausting their budget. |
|---|
| .. | .. |
|---|
| 6366 | 6885 | { |
|---|
| 6367 | 6886 | struct softnet_data *sd = this_cpu_ptr(&softnet_data); |
|---|
| 6368 | 6887 | unsigned long time_limit = jiffies + |
|---|
| 6369 | | - usecs_to_jiffies(netdev_budget_usecs); |
|---|
| 6370 | | - int budget = netdev_budget; |
|---|
| 6371 | | - struct sk_buff_head tofree_q; |
|---|
| 6372 | | - struct sk_buff *skb; |
|---|
| 6888 | + usecs_to_jiffies(READ_ONCE(netdev_budget_usecs)); |
|---|
| 6889 | + int budget = READ_ONCE(netdev_budget); |
|---|
| 6373 | 6890 | LIST_HEAD(list); |
|---|
| 6374 | 6891 | LIST_HEAD(repoll); |
|---|
| 6375 | 6892 | |
|---|
| 6376 | | - __skb_queue_head_init(&tofree_q); |
|---|
| 6377 | | - |
|---|
| 6378 | 6893 | local_irq_disable(); |
|---|
| 6379 | | - skb_queue_splice_init(&sd->tofree_queue, &tofree_q); |
|---|
| 6380 | 6894 | list_splice_init(&sd->poll_list, &list); |
|---|
| 6381 | 6895 | local_irq_enable(); |
|---|
| 6382 | | - |
|---|
| 6383 | | - while ((skb = __skb_dequeue(&tofree_q))) |
|---|
| 6384 | | - kfree_skb(skb); |
|---|
| 6385 | 6896 | |
|---|
| 6386 | 6897 | for (;;) { |
|---|
| 6387 | 6898 | struct napi_struct *n; |
|---|
| .. | .. |
|---|
| 6412 | 6923 | list_splice_tail(&repoll, &list); |
|---|
| 6413 | 6924 | list_splice(&list, &sd->poll_list); |
|---|
| 6414 | 6925 | if (!list_empty(&sd->poll_list)) |
|---|
| 6415 | | - __raise_softirq_irqoff_ksoft(NET_RX_SOFTIRQ); |
|---|
| 6926 | + __raise_softirq_irqoff(NET_RX_SOFTIRQ); |
|---|
| 6416 | 6927 | |
|---|
| 6417 | 6928 | net_rps_action_and_irq_enable(sd); |
|---|
| 6418 | 6929 | out: |
|---|
| .. | .. |
|---|
| 6424 | 6935 | |
|---|
| 6425 | 6936 | /* upper master flag, there can only be one master device per list */ |
|---|
| 6426 | 6937 | bool master; |
|---|
| 6938 | + |
|---|
| 6939 | + /* lookup ignore flag */ |
|---|
| 6940 | + bool ignore; |
|---|
| 6427 | 6941 | |
|---|
| 6428 | 6942 | /* counter for the number of times this device was added to us */ |
|---|
| 6429 | 6943 | u16 ref_nr; |
|---|
| .. | .. |
|---|
| 6447 | 6961 | return NULL; |
|---|
| 6448 | 6962 | } |
|---|
| 6449 | 6963 | |
|---|
| 6450 | | -static int __netdev_has_upper_dev(struct net_device *upper_dev, void *data) |
|---|
| 6964 | +static int ____netdev_has_upper_dev(struct net_device *upper_dev, |
|---|
| 6965 | + struct netdev_nested_priv *priv) |
|---|
| 6451 | 6966 | { |
|---|
| 6452 | | - struct net_device *dev = data; |
|---|
| 6967 | + struct net_device *dev = (struct net_device *)priv->data; |
|---|
| 6453 | 6968 | |
|---|
| 6454 | 6969 | return upper_dev == dev; |
|---|
| 6455 | 6970 | } |
|---|
| .. | .. |
|---|
| 6466 | 6981 | bool netdev_has_upper_dev(struct net_device *dev, |
|---|
| 6467 | 6982 | struct net_device *upper_dev) |
|---|
| 6468 | 6983 | { |
|---|
| 6984 | + struct netdev_nested_priv priv = { |
|---|
| 6985 | + .data = (void *)upper_dev, |
|---|
| 6986 | + }; |
|---|
| 6987 | + |
|---|
| 6469 | 6988 | ASSERT_RTNL(); |
|---|
| 6470 | 6989 | |
|---|
| 6471 | | - return netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev, |
|---|
| 6472 | | - upper_dev); |
|---|
| 6990 | + return netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev, |
|---|
| 6991 | + &priv); |
|---|
| 6473 | 6992 | } |
|---|
| 6474 | 6993 | EXPORT_SYMBOL(netdev_has_upper_dev); |
|---|
| 6475 | 6994 | |
|---|
| .. | .. |
|---|
| 6486 | 7005 | bool netdev_has_upper_dev_all_rcu(struct net_device *dev, |
|---|
| 6487 | 7006 | struct net_device *upper_dev) |
|---|
| 6488 | 7007 | { |
|---|
| 6489 | | - return !!netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev, |
|---|
| 6490 | | - upper_dev); |
|---|
| 7008 | + struct netdev_nested_priv priv = { |
|---|
| 7009 | + .data = (void *)upper_dev, |
|---|
| 7010 | + }; |
|---|
| 7011 | + |
|---|
| 7012 | + return !!netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev, |
|---|
| 7013 | + &priv); |
|---|
| 6491 | 7014 | } |
|---|
| 6492 | 7015 | EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu); |
|---|
| 6493 | 7016 | |
|---|
| .. | .. |
|---|
| 6529 | 7052 | return NULL; |
|---|
| 6530 | 7053 | } |
|---|
| 6531 | 7054 | EXPORT_SYMBOL(netdev_master_upper_dev_get); |
|---|
| 7055 | + |
|---|
| 7056 | +static struct net_device *__netdev_master_upper_dev_get(struct net_device *dev) |
|---|
| 7057 | +{ |
|---|
| 7058 | + struct netdev_adjacent *upper; |
|---|
| 7059 | + |
|---|
| 7060 | + ASSERT_RTNL(); |
|---|
| 7061 | + |
|---|
| 7062 | + if (list_empty(&dev->adj_list.upper)) |
|---|
| 7063 | + return NULL; |
|---|
| 7064 | + |
|---|
| 7065 | + upper = list_first_entry(&dev->adj_list.upper, |
|---|
| 7066 | + struct netdev_adjacent, list); |
|---|
| 7067 | + if (likely(upper->master) && !upper->ignore) |
|---|
| 7068 | + return upper->dev; |
|---|
| 7069 | + return NULL; |
|---|
| 7070 | +} |
|---|
| 6532 | 7071 | |
|---|
| 6533 | 7072 | /** |
|---|
| 6534 | 7073 | * netdev_has_any_lower_dev - Check if device is linked to some device |
|---|
| .. | .. |
|---|
| 6580 | 7119 | } |
|---|
| 6581 | 7120 | EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu); |
|---|
| 6582 | 7121 | |
|---|
| 6583 | | -static struct net_device *netdev_next_upper_dev(struct net_device *dev, |
|---|
| 6584 | | - struct list_head **iter) |
|---|
| 7122 | +static struct net_device *__netdev_next_upper_dev(struct net_device *dev, |
|---|
| 7123 | + struct list_head **iter, |
|---|
| 7124 | + bool *ignore) |
|---|
| 6585 | 7125 | { |
|---|
| 6586 | 7126 | struct netdev_adjacent *upper; |
|---|
| 6587 | 7127 | |
|---|
| .. | .. |
|---|
| 6591 | 7131 | return NULL; |
|---|
| 6592 | 7132 | |
|---|
| 6593 | 7133 | *iter = &upper->list; |
|---|
| 7134 | + *ignore = upper->ignore; |
|---|
| 6594 | 7135 | |
|---|
| 6595 | 7136 | return upper->dev; |
|---|
| 6596 | 7137 | } |
|---|
| .. | .. |
|---|
| 6612 | 7153 | return upper->dev; |
|---|
| 6613 | 7154 | } |
|---|
| 6614 | 7155 | |
|---|
| 6615 | | -static int netdev_walk_all_upper_dev(struct net_device *dev, |
|---|
| 6616 | | - int (*fn)(struct net_device *dev, |
|---|
| 6617 | | - void *data), |
|---|
| 6618 | | - void *data) |
|---|
| 7156 | +static int __netdev_walk_all_upper_dev(struct net_device *dev, |
|---|
| 7157 | + int (*fn)(struct net_device *dev, |
|---|
| 7158 | + struct netdev_nested_priv *priv), |
|---|
| 7159 | + struct netdev_nested_priv *priv) |
|---|
| 6619 | 7160 | { |
|---|
| 6620 | 7161 | struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; |
|---|
| 6621 | 7162 | struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; |
|---|
| 6622 | 7163 | int ret, cur = 0; |
|---|
| 7164 | + bool ignore; |
|---|
| 6623 | 7165 | |
|---|
| 6624 | 7166 | now = dev; |
|---|
| 6625 | 7167 | iter = &dev->adj_list.upper; |
|---|
| 6626 | 7168 | |
|---|
| 6627 | 7169 | while (1) { |
|---|
| 6628 | 7170 | if (now != dev) { |
|---|
| 6629 | | - ret = fn(now, data); |
|---|
| 7171 | + ret = fn(now, priv); |
|---|
| 6630 | 7172 | if (ret) |
|---|
| 6631 | 7173 | return ret; |
|---|
| 6632 | 7174 | } |
|---|
| 6633 | 7175 | |
|---|
| 6634 | 7176 | next = NULL; |
|---|
| 6635 | 7177 | while (1) { |
|---|
| 6636 | | - udev = netdev_next_upper_dev(now, &iter); |
|---|
| 7178 | + udev = __netdev_next_upper_dev(now, &iter, &ignore); |
|---|
| 6637 | 7179 | if (!udev) |
|---|
| 6638 | 7180 | break; |
|---|
| 7181 | + if (ignore) |
|---|
| 7182 | + continue; |
|---|
| 6639 | 7183 | |
|---|
| 6640 | 7184 | next = udev; |
|---|
| 6641 | 7185 | niter = &udev->adj_list.upper; |
|---|
| .. | .. |
|---|
| 6660 | 7204 | |
|---|
| 6661 | 7205 | int netdev_walk_all_upper_dev_rcu(struct net_device *dev, |
|---|
| 6662 | 7206 | int (*fn)(struct net_device *dev, |
|---|
| 6663 | | - void *data), |
|---|
| 6664 | | - void *data) |
|---|
| 7207 | + struct netdev_nested_priv *priv), |
|---|
| 7208 | + struct netdev_nested_priv *priv) |
|---|
| 6665 | 7209 | { |
|---|
| 6666 | 7210 | struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; |
|---|
| 6667 | 7211 | struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; |
|---|
| .. | .. |
|---|
| 6672 | 7216 | |
|---|
| 6673 | 7217 | while (1) { |
|---|
| 6674 | 7218 | if (now != dev) { |
|---|
| 6675 | | - ret = fn(now, data); |
|---|
| 7219 | + ret = fn(now, priv); |
|---|
| 6676 | 7220 | if (ret) |
|---|
| 6677 | 7221 | return ret; |
|---|
| 6678 | 7222 | } |
|---|
| .. | .. |
|---|
| 6704 | 7248 | return 0; |
|---|
| 6705 | 7249 | } |
|---|
| 6706 | 7250 | EXPORT_SYMBOL_GPL(netdev_walk_all_upper_dev_rcu); |
|---|
| 7251 | + |
|---|
| 7252 | +static bool __netdev_has_upper_dev(struct net_device *dev, |
|---|
| 7253 | + struct net_device *upper_dev) |
|---|
| 7254 | +{ |
|---|
| 7255 | + struct netdev_nested_priv priv = { |
|---|
| 7256 | + .flags = 0, |
|---|
| 7257 | + .data = (void *)upper_dev, |
|---|
| 7258 | + }; |
|---|
| 7259 | + |
|---|
| 7260 | + ASSERT_RTNL(); |
|---|
| 7261 | + |
|---|
| 7262 | + return __netdev_walk_all_upper_dev(dev, ____netdev_has_upper_dev, |
|---|
| 7263 | + &priv); |
|---|
| 7264 | +} |
|---|
| 6707 | 7265 | |
|---|
| 6708 | 7266 | /** |
|---|
| 6709 | 7267 | * netdev_lower_get_next_private - Get the next ->private from the |
|---|
| .. | .. |
|---|
| 6801 | 7359 | return lower->dev; |
|---|
| 6802 | 7360 | } |
|---|
| 6803 | 7361 | |
|---|
| 7362 | +static struct net_device *__netdev_next_lower_dev(struct net_device *dev, |
|---|
| 7363 | + struct list_head **iter, |
|---|
| 7364 | + bool *ignore) |
|---|
| 7365 | +{ |
|---|
| 7366 | + struct netdev_adjacent *lower; |
|---|
| 7367 | + |
|---|
| 7368 | + lower = list_entry((*iter)->next, struct netdev_adjacent, list); |
|---|
| 7369 | + |
|---|
| 7370 | + if (&lower->list == &dev->adj_list.lower) |
|---|
| 7371 | + return NULL; |
|---|
| 7372 | + |
|---|
| 7373 | + *iter = &lower->list; |
|---|
| 7374 | + *ignore = lower->ignore; |
|---|
| 7375 | + |
|---|
| 7376 | + return lower->dev; |
|---|
| 7377 | +} |
|---|
| 7378 | + |
|---|
| 6804 | 7379 | int netdev_walk_all_lower_dev(struct net_device *dev, |
|---|
| 6805 | 7380 | int (*fn)(struct net_device *dev, |
|---|
| 6806 | | - void *data), |
|---|
| 6807 | | - void *data) |
|---|
| 7381 | + struct netdev_nested_priv *priv), |
|---|
| 7382 | + struct netdev_nested_priv *priv) |
|---|
| 6808 | 7383 | { |
|---|
| 6809 | 7384 | struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; |
|---|
| 6810 | 7385 | struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; |
|---|
| .. | .. |
|---|
| 6815 | 7390 | |
|---|
| 6816 | 7391 | while (1) { |
|---|
| 6817 | 7392 | if (now != dev) { |
|---|
| 6818 | | - ret = fn(now, data); |
|---|
| 7393 | + ret = fn(now, priv); |
|---|
| 6819 | 7394 | if (ret) |
|---|
| 6820 | 7395 | return ret; |
|---|
| 6821 | 7396 | } |
|---|
| .. | .. |
|---|
| 6848 | 7423 | } |
|---|
| 6849 | 7424 | EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev); |
|---|
| 6850 | 7425 | |
|---|
| 6851 | | -static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, |
|---|
| 6852 | | - struct list_head **iter) |
|---|
| 7426 | +static int __netdev_walk_all_lower_dev(struct net_device *dev, |
|---|
| 7427 | + int (*fn)(struct net_device *dev, |
|---|
| 7428 | + struct netdev_nested_priv *priv), |
|---|
| 7429 | + struct netdev_nested_priv *priv) |
|---|
| 7430 | +{ |
|---|
| 7431 | + struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; |
|---|
| 7432 | + struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; |
|---|
| 7433 | + int ret, cur = 0; |
|---|
| 7434 | + bool ignore; |
|---|
| 7435 | + |
|---|
| 7436 | + now = dev; |
|---|
| 7437 | + iter = &dev->adj_list.lower; |
|---|
| 7438 | + |
|---|
| 7439 | + while (1) { |
|---|
| 7440 | + if (now != dev) { |
|---|
| 7441 | + ret = fn(now, priv); |
|---|
| 7442 | + if (ret) |
|---|
| 7443 | + return ret; |
|---|
| 7444 | + } |
|---|
| 7445 | + |
|---|
| 7446 | + next = NULL; |
|---|
| 7447 | + while (1) { |
|---|
| 7448 | + ldev = __netdev_next_lower_dev(now, &iter, &ignore); |
|---|
| 7449 | + if (!ldev) |
|---|
| 7450 | + break; |
|---|
| 7451 | + if (ignore) |
|---|
| 7452 | + continue; |
|---|
| 7453 | + |
|---|
| 7454 | + next = ldev; |
|---|
| 7455 | + niter = &ldev->adj_list.lower; |
|---|
| 7456 | + dev_stack[cur] = now; |
|---|
| 7457 | + iter_stack[cur++] = iter; |
|---|
| 7458 | + break; |
|---|
| 7459 | + } |
|---|
| 7460 | + |
|---|
| 7461 | + if (!next) { |
|---|
| 7462 | + if (!cur) |
|---|
| 7463 | + return 0; |
|---|
| 7464 | + next = dev_stack[--cur]; |
|---|
| 7465 | + niter = iter_stack[cur]; |
|---|
| 7466 | + } |
|---|
| 7467 | + |
|---|
| 7468 | + now = next; |
|---|
| 7469 | + iter = niter; |
|---|
| 7470 | + } |
|---|
| 7471 | + |
|---|
| 7472 | + return 0; |
|---|
| 7473 | +} |
|---|
| 7474 | + |
|---|
| 7475 | +struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, |
|---|
| 7476 | + struct list_head **iter) |
|---|
| 6853 | 7477 | { |
|---|
| 6854 | 7478 | struct netdev_adjacent *lower; |
|---|
| 6855 | 7479 | |
|---|
| .. | .. |
|---|
| 6861 | 7485 | |
|---|
| 6862 | 7486 | return lower->dev; |
|---|
| 6863 | 7487 | } |
|---|
| 7488 | +EXPORT_SYMBOL(netdev_next_lower_dev_rcu); |
|---|
| 6864 | 7489 | |
|---|
| 6865 | 7490 | static u8 __netdev_upper_depth(struct net_device *dev) |
|---|
| 6866 | 7491 | { |
|---|
| 6867 | 7492 | struct net_device *udev; |
|---|
| 6868 | 7493 | struct list_head *iter; |
|---|
| 6869 | 7494 | u8 max_depth = 0; |
|---|
| 7495 | + bool ignore; |
|---|
| 6870 | 7496 | |
|---|
| 6871 | 7497 | for (iter = &dev->adj_list.upper, |
|---|
| 6872 | | - udev = netdev_next_upper_dev(dev, &iter); |
|---|
| 7498 | + udev = __netdev_next_upper_dev(dev, &iter, &ignore); |
|---|
| 6873 | 7499 | udev; |
|---|
| 6874 | | - udev = netdev_next_upper_dev(dev, &iter)) { |
|---|
| 7500 | + udev = __netdev_next_upper_dev(dev, &iter, &ignore)) { |
|---|
| 7501 | + if (ignore) |
|---|
| 7502 | + continue; |
|---|
| 6875 | 7503 | if (max_depth < udev->upper_level) |
|---|
| 6876 | 7504 | max_depth = udev->upper_level; |
|---|
| 6877 | 7505 | } |
|---|
| .. | .. |
|---|
| 6884 | 7512 | struct net_device *ldev; |
|---|
| 6885 | 7513 | struct list_head *iter; |
|---|
| 6886 | 7514 | u8 max_depth = 0; |
|---|
| 7515 | + bool ignore; |
|---|
| 6887 | 7516 | |
|---|
| 6888 | 7517 | for (iter = &dev->adj_list.lower, |
|---|
| 6889 | | - ldev = netdev_next_lower_dev(dev, &iter); |
|---|
| 7518 | + ldev = __netdev_next_lower_dev(dev, &iter, &ignore); |
|---|
| 6890 | 7519 | ldev; |
|---|
| 6891 | | - ldev = netdev_next_lower_dev(dev, &iter)) { |
|---|
| 7520 | + ldev = __netdev_next_lower_dev(dev, &iter, &ignore)) { |
|---|
| 7521 | + if (ignore) |
|---|
| 7522 | + continue; |
|---|
| 6892 | 7523 | if (max_depth < ldev->lower_level) |
|---|
| 6893 | 7524 | max_depth = ldev->lower_level; |
|---|
| 6894 | 7525 | } |
|---|
| .. | .. |
|---|
| 6896 | 7527 | return max_depth; |
|---|
| 6897 | 7528 | } |
|---|
| 6898 | 7529 | |
|---|
| 6899 | | -static int __netdev_update_upper_level(struct net_device *dev, void *data) |
|---|
| 7530 | +static int __netdev_update_upper_level(struct net_device *dev, |
|---|
| 7531 | + struct netdev_nested_priv *__unused) |
|---|
| 6900 | 7532 | { |
|---|
| 6901 | 7533 | dev->upper_level = __netdev_upper_depth(dev) + 1; |
|---|
| 6902 | 7534 | return 0; |
|---|
| 6903 | 7535 | } |
|---|
| 6904 | 7536 | |
|---|
| 6905 | | -static int __netdev_update_lower_level(struct net_device *dev, void *data) |
|---|
| 7537 | +static int __netdev_update_lower_level(struct net_device *dev, |
|---|
| 7538 | + struct netdev_nested_priv *priv) |
|---|
| 6906 | 7539 | { |
|---|
| 6907 | 7540 | dev->lower_level = __netdev_lower_depth(dev) + 1; |
|---|
| 7541 | + |
|---|
| 7542 | +#ifdef CONFIG_LOCKDEP |
|---|
| 7543 | + if (!priv) |
|---|
| 7544 | + return 0; |
|---|
| 7545 | + |
|---|
| 7546 | + if (priv->flags & NESTED_SYNC_IMM) |
|---|
| 7547 | + dev->nested_level = dev->lower_level - 1; |
|---|
| 7548 | + if (priv->flags & NESTED_SYNC_TODO) |
|---|
| 7549 | + net_unlink_todo(dev); |
|---|
| 7550 | +#endif |
|---|
| 6908 | 7551 | return 0; |
|---|
| 6909 | 7552 | } |
|---|
| 6910 | 7553 | |
|---|
| 6911 | 7554 | int netdev_walk_all_lower_dev_rcu(struct net_device *dev, |
|---|
| 6912 | 7555 | int (*fn)(struct net_device *dev, |
|---|
| 6913 | | - void *data), |
|---|
| 6914 | | - void *data) |
|---|
| 7556 | + struct netdev_nested_priv *priv), |
|---|
| 7557 | + struct netdev_nested_priv *priv) |
|---|
| 6915 | 7558 | { |
|---|
| 6916 | 7559 | struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; |
|---|
| 6917 | 7560 | struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; |
|---|
| .. | .. |
|---|
| 6922 | 7565 | |
|---|
| 6923 | 7566 | while (1) { |
|---|
| 6924 | 7567 | if (now != dev) { |
|---|
| 6925 | | - ret = fn(now, data); |
|---|
| 7568 | + ret = fn(now, priv); |
|---|
| 6926 | 7569 | if (ret) |
|---|
| 6927 | 7570 | return ret; |
|---|
| 6928 | 7571 | } |
|---|
| .. | .. |
|---|
| 7052 | 7695 | adj->master = master; |
|---|
| 7053 | 7696 | adj->ref_nr = 1; |
|---|
| 7054 | 7697 | adj->private = private; |
|---|
| 7698 | + adj->ignore = false; |
|---|
| 7055 | 7699 | dev_hold(adj_dev); |
|---|
| 7056 | 7700 | |
|---|
| 7057 | 7701 | pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d; dev_hold on %s\n", |
|---|
| .. | .. |
|---|
| 7181 | 7825 | static int __netdev_upper_dev_link(struct net_device *dev, |
|---|
| 7182 | 7826 | struct net_device *upper_dev, bool master, |
|---|
| 7183 | 7827 | void *upper_priv, void *upper_info, |
|---|
| 7828 | + struct netdev_nested_priv *priv, |
|---|
| 7184 | 7829 | struct netlink_ext_ack *extack) |
|---|
| 7185 | 7830 | { |
|---|
| 7186 | 7831 | struct netdev_notifier_changeupper_info changeupper_info = { |
|---|
| .. | .. |
|---|
| 7202 | 7847 | return -EBUSY; |
|---|
| 7203 | 7848 | |
|---|
| 7204 | 7849 | /* To prevent loops, check if dev is not upper device to upper_dev. */ |
|---|
| 7205 | | - if (netdev_has_upper_dev(upper_dev, dev)) |
|---|
| 7850 | + if (__netdev_has_upper_dev(upper_dev, dev)) |
|---|
| 7206 | 7851 | return -EBUSY; |
|---|
| 7207 | 7852 | |
|---|
| 7208 | 7853 | if ((dev->lower_level + upper_dev->upper_level) > MAX_NEST_DEV) |
|---|
| 7209 | 7854 | return -EMLINK; |
|---|
| 7210 | 7855 | |
|---|
| 7211 | 7856 | if (!master) { |
|---|
| 7212 | | - if (netdev_has_upper_dev(dev, upper_dev)) |
|---|
| 7857 | + if (__netdev_has_upper_dev(dev, upper_dev)) |
|---|
| 7213 | 7858 | return -EEXIST; |
|---|
| 7214 | 7859 | } else { |
|---|
| 7215 | | - master_dev = netdev_master_upper_dev_get(dev); |
|---|
| 7860 | + master_dev = __netdev_master_upper_dev_get(dev); |
|---|
| 7216 | 7861 | if (master_dev) |
|---|
| 7217 | 7862 | return master_dev == upper_dev ? -EEXIST : -EBUSY; |
|---|
| 7218 | 7863 | } |
|---|
| .. | .. |
|---|
| 7235 | 7880 | goto rollback; |
|---|
| 7236 | 7881 | |
|---|
| 7237 | 7882 | __netdev_update_upper_level(dev, NULL); |
|---|
| 7238 | | - netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL); |
|---|
| 7883 | + __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL); |
|---|
| 7239 | 7884 | |
|---|
| 7240 | | - __netdev_update_lower_level(upper_dev, NULL); |
|---|
| 7241 | | - netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, NULL); |
|---|
| 7885 | + __netdev_update_lower_level(upper_dev, priv); |
|---|
| 7886 | + __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, |
|---|
| 7887 | + priv); |
|---|
| 7242 | 7888 | |
|---|
| 7243 | 7889 | return 0; |
|---|
| 7244 | 7890 | |
|---|
| .. | .. |
|---|
| 7263 | 7909 | struct net_device *upper_dev, |
|---|
| 7264 | 7910 | struct netlink_ext_ack *extack) |
|---|
| 7265 | 7911 | { |
|---|
| 7912 | + struct netdev_nested_priv priv = { |
|---|
| 7913 | + .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO, |
|---|
| 7914 | + .data = NULL, |
|---|
| 7915 | + }; |
|---|
| 7916 | + |
|---|
| 7266 | 7917 | return __netdev_upper_dev_link(dev, upper_dev, false, |
|---|
| 7267 | | - NULL, NULL, extack); |
|---|
| 7918 | + NULL, NULL, &priv, extack); |
|---|
| 7268 | 7919 | } |
|---|
| 7269 | 7920 | EXPORT_SYMBOL(netdev_upper_dev_link); |
|---|
| 7270 | 7921 | |
|---|
| .. | .. |
|---|
| 7287 | 7938 | void *upper_priv, void *upper_info, |
|---|
| 7288 | 7939 | struct netlink_ext_ack *extack) |
|---|
| 7289 | 7940 | { |
|---|
| 7941 | + struct netdev_nested_priv priv = { |
|---|
| 7942 | + .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO, |
|---|
| 7943 | + .data = NULL, |
|---|
| 7944 | + }; |
|---|
| 7945 | + |
|---|
| 7290 | 7946 | return __netdev_upper_dev_link(dev, upper_dev, true, |
|---|
| 7291 | | - upper_priv, upper_info, extack); |
|---|
| 7947 | + upper_priv, upper_info, &priv, extack); |
|---|
| 7292 | 7948 | } |
|---|
| 7293 | 7949 | EXPORT_SYMBOL(netdev_master_upper_dev_link); |
|---|
| 7294 | 7950 | |
|---|
| 7295 | | -/** |
|---|
| 7296 | | - * netdev_upper_dev_unlink - Removes a link to upper device |
|---|
| 7297 | | - * @dev: device |
|---|
| 7298 | | - * @upper_dev: new upper device |
|---|
| 7299 | | - * |
|---|
| 7300 | | - * Removes a link to device which is upper to this one. The caller must hold |
|---|
| 7301 | | - * the RTNL lock. |
|---|
| 7302 | | - */ |
|---|
| 7303 | | -void netdev_upper_dev_unlink(struct net_device *dev, |
|---|
| 7304 | | - struct net_device *upper_dev) |
|---|
| 7951 | +static void __netdev_upper_dev_unlink(struct net_device *dev, |
|---|
| 7952 | + struct net_device *upper_dev, |
|---|
| 7953 | + struct netdev_nested_priv *priv) |
|---|
| 7305 | 7954 | { |
|---|
| 7306 | 7955 | struct netdev_notifier_changeupper_info changeupper_info = { |
|---|
| 7307 | 7956 | .info = { |
|---|
| .. | .. |
|---|
| 7324 | 7973 | &changeupper_info.info); |
|---|
| 7325 | 7974 | |
|---|
| 7326 | 7975 | __netdev_update_upper_level(dev, NULL); |
|---|
| 7327 | | - netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL); |
|---|
| 7976 | + __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL); |
|---|
| 7328 | 7977 | |
|---|
| 7329 | | - __netdev_update_lower_level(upper_dev, NULL); |
|---|
| 7330 | | - netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, NULL); |
|---|
| 7978 | + __netdev_update_lower_level(upper_dev, priv); |
|---|
| 7979 | + __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, |
|---|
| 7980 | + priv); |
|---|
| 7981 | +} |
|---|
| 7982 | + |
|---|
| 7983 | +/** |
|---|
| 7984 | + * netdev_upper_dev_unlink - Removes a link to upper device |
|---|
| 7985 | + * @dev: device |
|---|
| 7986 | + * @upper_dev: new upper device |
|---|
| 7987 | + * |
|---|
| 7988 | + * Removes a link to device which is upper to this one. The caller must hold |
|---|
| 7989 | + * the RTNL lock. |
|---|
| 7990 | + */ |
|---|
| 7991 | +void netdev_upper_dev_unlink(struct net_device *dev, |
|---|
| 7992 | + struct net_device *upper_dev) |
|---|
| 7993 | +{ |
|---|
| 7994 | + struct netdev_nested_priv priv = { |
|---|
| 7995 | + .flags = NESTED_SYNC_TODO, |
|---|
| 7996 | + .data = NULL, |
|---|
| 7997 | + }; |
|---|
| 7998 | + |
|---|
| 7999 | + __netdev_upper_dev_unlink(dev, upper_dev, &priv); |
|---|
| 7331 | 8000 | } |
|---|
| 7332 | 8001 | EXPORT_SYMBOL(netdev_upper_dev_unlink); |
|---|
| 8002 | + |
|---|
| 8003 | +static void __netdev_adjacent_dev_set(struct net_device *upper_dev, |
|---|
| 8004 | + struct net_device *lower_dev, |
|---|
| 8005 | + bool val) |
|---|
| 8006 | +{ |
|---|
| 8007 | + struct netdev_adjacent *adj; |
|---|
| 8008 | + |
|---|
| 8009 | + adj = __netdev_find_adj(lower_dev, &upper_dev->adj_list.lower); |
|---|
| 8010 | + if (adj) |
|---|
| 8011 | + adj->ignore = val; |
|---|
| 8012 | + |
|---|
| 8013 | + adj = __netdev_find_adj(upper_dev, &lower_dev->adj_list.upper); |
|---|
| 8014 | + if (adj) |
|---|
| 8015 | + adj->ignore = val; |
|---|
| 8016 | +} |
|---|
| 8017 | + |
|---|
| 8018 | +static void netdev_adjacent_dev_disable(struct net_device *upper_dev, |
|---|
| 8019 | + struct net_device *lower_dev) |
|---|
| 8020 | +{ |
|---|
| 8021 | + __netdev_adjacent_dev_set(upper_dev, lower_dev, true); |
|---|
| 8022 | +} |
|---|
| 8023 | + |
|---|
| 8024 | +static void netdev_adjacent_dev_enable(struct net_device *upper_dev, |
|---|
| 8025 | + struct net_device *lower_dev) |
|---|
| 8026 | +{ |
|---|
| 8027 | + __netdev_adjacent_dev_set(upper_dev, lower_dev, false); |
|---|
| 8028 | +} |
|---|
| 8029 | + |
|---|
| 8030 | +int netdev_adjacent_change_prepare(struct net_device *old_dev, |
|---|
| 8031 | + struct net_device *new_dev, |
|---|
| 8032 | + struct net_device *dev, |
|---|
| 8033 | + struct netlink_ext_ack *extack) |
|---|
| 8034 | +{ |
|---|
| 8035 | + struct netdev_nested_priv priv = { |
|---|
| 8036 | + .flags = 0, |
|---|
| 8037 | + .data = NULL, |
|---|
| 8038 | + }; |
|---|
| 8039 | + int err; |
|---|
| 8040 | + |
|---|
| 8041 | + if (!new_dev) |
|---|
| 8042 | + return 0; |
|---|
| 8043 | + |
|---|
| 8044 | + if (old_dev && new_dev != old_dev) |
|---|
| 8045 | + netdev_adjacent_dev_disable(dev, old_dev); |
|---|
| 8046 | + err = __netdev_upper_dev_link(new_dev, dev, false, NULL, NULL, &priv, |
|---|
| 8047 | + extack); |
|---|
| 8048 | + if (err) { |
|---|
| 8049 | + if (old_dev && new_dev != old_dev) |
|---|
| 8050 | + netdev_adjacent_dev_enable(dev, old_dev); |
|---|
| 8051 | + return err; |
|---|
| 8052 | + } |
|---|
| 8053 | + |
|---|
| 8054 | + return 0; |
|---|
| 8055 | +} |
|---|
| 8056 | +EXPORT_SYMBOL(netdev_adjacent_change_prepare); |
|---|
| 8057 | + |
|---|
| 8058 | +void netdev_adjacent_change_commit(struct net_device *old_dev, |
|---|
| 8059 | + struct net_device *new_dev, |
|---|
| 8060 | + struct net_device *dev) |
|---|
| 8061 | +{ |
|---|
| 8062 | + struct netdev_nested_priv priv = { |
|---|
| 8063 | + .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO, |
|---|
| 8064 | + .data = NULL, |
|---|
| 8065 | + }; |
|---|
| 8066 | + |
|---|
| 8067 | + if (!new_dev || !old_dev) |
|---|
| 8068 | + return; |
|---|
| 8069 | + |
|---|
| 8070 | + if (new_dev == old_dev) |
|---|
| 8071 | + return; |
|---|
| 8072 | + |
|---|
| 8073 | + netdev_adjacent_dev_enable(dev, old_dev); |
|---|
| 8074 | + __netdev_upper_dev_unlink(old_dev, dev, &priv); |
|---|
| 8075 | +} |
|---|
| 8076 | +EXPORT_SYMBOL(netdev_adjacent_change_commit); |
|---|
| 8077 | + |
|---|
| 8078 | +void netdev_adjacent_change_abort(struct net_device *old_dev, |
|---|
| 8079 | + struct net_device *new_dev, |
|---|
| 8080 | + struct net_device *dev) |
|---|
| 8081 | +{ |
|---|
| 8082 | + struct netdev_nested_priv priv = { |
|---|
| 8083 | + .flags = 0, |
|---|
| 8084 | + .data = NULL, |
|---|
| 8085 | + }; |
|---|
| 8086 | + |
|---|
| 8087 | + if (!new_dev) |
|---|
| 8088 | + return; |
|---|
| 8089 | + |
|---|
| 8090 | + if (old_dev && new_dev != old_dev) |
|---|
| 8091 | + netdev_adjacent_dev_enable(dev, old_dev); |
|---|
| 8092 | + |
|---|
| 8093 | + __netdev_upper_dev_unlink(new_dev, dev, &priv); |
|---|
| 8094 | +} |
|---|
| 8095 | +EXPORT_SYMBOL(netdev_adjacent_change_abort); |
|---|
| 7333 | 8096 | |
|---|
| 7334 | 8097 | /** |
|---|
| 7335 | 8098 | * netdev_bonding_info_change - Dispatch event about slave change |
|---|
| .. | .. |
|---|
| 7352 | 8115 | &info.info); |
|---|
| 7353 | 8116 | } |
|---|
| 7354 | 8117 | EXPORT_SYMBOL(netdev_bonding_info_change); |
|---|
| 8118 | + |
|---|
| 8119 | +/** |
|---|
| 8120 | + * netdev_get_xmit_slave - Get the xmit slave of master device |
|---|
| 8121 | + * @dev: device |
|---|
| 8122 | + * @skb: The packet |
|---|
| 8123 | + * @all_slaves: assume all the slaves are active |
|---|
| 8124 | + * |
|---|
| 8125 | + * The reference counters are not incremented so the caller must be |
|---|
| 8126 | + * careful with locks. The caller must hold RCU lock. |
|---|
| 8127 | + * %NULL is returned if no slave is found. |
|---|
| 8128 | + */ |
|---|
| 8129 | + |
|---|
| 8130 | +struct net_device *netdev_get_xmit_slave(struct net_device *dev, |
|---|
| 8131 | + struct sk_buff *skb, |
|---|
| 8132 | + bool all_slaves) |
|---|
| 8133 | +{ |
|---|
| 8134 | + const struct net_device_ops *ops = dev->netdev_ops; |
|---|
| 8135 | + |
|---|
| 8136 | + if (!ops->ndo_get_xmit_slave) |
|---|
| 8137 | + return NULL; |
|---|
| 8138 | + return ops->ndo_get_xmit_slave(dev, skb, all_slaves); |
|---|
| 8139 | +} |
|---|
| 8140 | +EXPORT_SYMBOL(netdev_get_xmit_slave); |
|---|
| 7355 | 8141 | |
|---|
| 7356 | 8142 | static void netdev_adjacent_add_links(struct net_device *dev) |
|---|
| 7357 | 8143 | { |
|---|
| .. | .. |
|---|
| 7443 | 8229 | } |
|---|
| 7444 | 8230 | EXPORT_SYMBOL(netdev_lower_dev_get_private); |
|---|
| 7445 | 8231 | |
|---|
| 7446 | | - |
|---|
| 7447 | | -int dev_get_nest_level(struct net_device *dev) |
|---|
| 7448 | | -{ |
|---|
| 7449 | | - struct net_device *lower = NULL; |
|---|
| 7450 | | - struct list_head *iter; |
|---|
| 7451 | | - int max_nest = -1; |
|---|
| 7452 | | - int nest; |
|---|
| 7453 | | - |
|---|
| 7454 | | - ASSERT_RTNL(); |
|---|
| 7455 | | - |
|---|
| 7456 | | - netdev_for_each_lower_dev(dev, lower, iter) { |
|---|
| 7457 | | - nest = dev_get_nest_level(lower); |
|---|
| 7458 | | - if (max_nest < nest) |
|---|
| 7459 | | - max_nest = nest; |
|---|
| 7460 | | - } |
|---|
| 7461 | | - |
|---|
| 7462 | | - return max_nest + 1; |
|---|
| 7463 | | -} |
|---|
| 7464 | | -EXPORT_SYMBOL(dev_get_nest_level); |
|---|
| 7465 | 8232 | |
|---|
| 7466 | 8233 | /** |
|---|
| 7467 | 8234 | * netdev_lower_change - Dispatch event about lower device state change |
|---|
| .. | .. |
|---|
| 7689 | 8456 | } |
|---|
| 7690 | 8457 | EXPORT_SYMBOL(dev_get_flags); |
|---|
| 7691 | 8458 | |
|---|
| 7692 | | -int __dev_change_flags(struct net_device *dev, unsigned int flags) |
|---|
| 8459 | +int __dev_change_flags(struct net_device *dev, unsigned int flags, |
|---|
| 8460 | + struct netlink_ext_ack *extack) |
|---|
| 7693 | 8461 | { |
|---|
| 7694 | 8462 | unsigned int old_flags = dev->flags; |
|---|
| 7695 | 8463 | int ret; |
|---|
| .. | .. |
|---|
| 7726 | 8494 | if (old_flags & IFF_UP) |
|---|
| 7727 | 8495 | __dev_close(dev); |
|---|
| 7728 | 8496 | else |
|---|
| 7729 | | - ret = __dev_open(dev); |
|---|
| 8497 | + ret = __dev_open(dev, extack); |
|---|
| 7730 | 8498 | } |
|---|
| 7731 | 8499 | |
|---|
| 7732 | 8500 | if ((flags ^ dev->gflags) & IFF_PROMISC) { |
|---|
| .. | .. |
|---|
| 7786 | 8554 | * dev_change_flags - change device settings |
|---|
| 7787 | 8555 | * @dev: device |
|---|
| 7788 | 8556 | * @flags: device state flags |
|---|
| 8557 | + * @extack: netlink extended ack |
|---|
| 7789 | 8558 | * |
|---|
| 7790 | 8559 | * Change settings on device based state flags. The flags are |
|---|
| 7791 | 8560 | * in the userspace exported format. |
|---|
| 7792 | 8561 | */ |
|---|
| 7793 | | -int dev_change_flags(struct net_device *dev, unsigned int flags) |
|---|
| 8562 | +int dev_change_flags(struct net_device *dev, unsigned int flags, |
|---|
| 8563 | + struct netlink_ext_ack *extack) |
|---|
| 7794 | 8564 | { |
|---|
| 7795 | 8565 | int ret; |
|---|
| 7796 | 8566 | unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags; |
|---|
| 7797 | 8567 | |
|---|
| 7798 | | - ret = __dev_change_flags(dev, flags); |
|---|
| 8568 | + ret = __dev_change_flags(dev, flags, extack); |
|---|
| 7799 | 8569 | if (ret < 0) |
|---|
| 7800 | 8570 | return ret; |
|---|
| 7801 | 8571 | |
|---|
| .. | .. |
|---|
| 7938 | 8708 | EXPORT_SYMBOL(dev_set_group); |
|---|
| 7939 | 8709 | |
|---|
| 7940 | 8710 | /** |
|---|
| 8711 | + * dev_pre_changeaddr_notify - Call NETDEV_PRE_CHANGEADDR. |
|---|
| 8712 | + * @dev: device |
|---|
| 8713 | + * @addr: new address |
|---|
| 8714 | + * @extack: netlink extended ack |
|---|
| 8715 | + */ |
|---|
| 8716 | +int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr, |
|---|
| 8717 | + struct netlink_ext_ack *extack) |
|---|
| 8718 | +{ |
|---|
| 8719 | + struct netdev_notifier_pre_changeaddr_info info = { |
|---|
| 8720 | + .info.dev = dev, |
|---|
| 8721 | + .info.extack = extack, |
|---|
| 8722 | + .dev_addr = addr, |
|---|
| 8723 | + }; |
|---|
| 8724 | + int rc; |
|---|
| 8725 | + |
|---|
| 8726 | + rc = call_netdevice_notifiers_info(NETDEV_PRE_CHANGEADDR, &info.info); |
|---|
| 8727 | + return notifier_to_errno(rc); |
|---|
| 8728 | +} |
|---|
| 8729 | +EXPORT_SYMBOL(dev_pre_changeaddr_notify); |
|---|
| 8730 | + |
|---|
| 8731 | +/** |
|---|
| 7941 | 8732 | * dev_set_mac_address - Change Media Access Control Address |
|---|
| 7942 | 8733 | * @dev: device |
|---|
| 7943 | 8734 | * @sa: new address |
|---|
| 8735 | + * @extack: netlink extended ack |
|---|
| 7944 | 8736 | * |
|---|
| 7945 | 8737 | * Change the hardware (MAC) address of the device |
|---|
| 7946 | 8738 | */ |
|---|
| 7947 | | -int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) |
|---|
| 8739 | +int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, |
|---|
| 8740 | + struct netlink_ext_ack *extack) |
|---|
| 7948 | 8741 | { |
|---|
| 7949 | 8742 | const struct net_device_ops *ops = dev->netdev_ops; |
|---|
| 7950 | 8743 | int err; |
|---|
| .. | .. |
|---|
| 7955 | 8748 | return -EINVAL; |
|---|
| 7956 | 8749 | if (!netif_device_present(dev)) |
|---|
| 7957 | 8750 | return -ENODEV; |
|---|
| 8751 | + err = dev_pre_changeaddr_notify(dev, sa->sa_data, extack); |
|---|
| 8752 | + if (err) |
|---|
| 8753 | + return err; |
|---|
| 7958 | 8754 | err = ops->ndo_set_mac_address(dev, sa); |
|---|
| 7959 | 8755 | if (err) |
|---|
| 7960 | 8756 | return err; |
|---|
| .. | .. |
|---|
| 7964 | 8760 | return 0; |
|---|
| 7965 | 8761 | } |
|---|
| 7966 | 8762 | EXPORT_SYMBOL(dev_set_mac_address); |
|---|
| 8763 | + |
|---|
| 8764 | +static DECLARE_RWSEM(dev_addr_sem); |
|---|
| 8765 | + |
|---|
| 8766 | +int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa, |
|---|
| 8767 | + struct netlink_ext_ack *extack) |
|---|
| 8768 | +{ |
|---|
| 8769 | + int ret; |
|---|
| 8770 | + |
|---|
| 8771 | + down_write(&dev_addr_sem); |
|---|
| 8772 | + ret = dev_set_mac_address(dev, sa, extack); |
|---|
| 8773 | + up_write(&dev_addr_sem); |
|---|
| 8774 | + return ret; |
|---|
| 8775 | +} |
|---|
| 8776 | +EXPORT_SYMBOL(dev_set_mac_address_user); |
|---|
| 8777 | + |
|---|
| 8778 | +int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name) |
|---|
| 8779 | +{ |
|---|
| 8780 | + size_t size = sizeof(sa->sa_data); |
|---|
| 8781 | + struct net_device *dev; |
|---|
| 8782 | + int ret = 0; |
|---|
| 8783 | + |
|---|
| 8784 | + down_read(&dev_addr_sem); |
|---|
| 8785 | + rcu_read_lock(); |
|---|
| 8786 | + |
|---|
| 8787 | + dev = dev_get_by_name_rcu(net, dev_name); |
|---|
| 8788 | + if (!dev) { |
|---|
| 8789 | + ret = -ENODEV; |
|---|
| 8790 | + goto unlock; |
|---|
| 8791 | + } |
|---|
| 8792 | + if (!dev->addr_len) |
|---|
| 8793 | + memset(sa->sa_data, 0, size); |
|---|
| 8794 | + else |
|---|
| 8795 | + memcpy(sa->sa_data, dev->dev_addr, |
|---|
| 8796 | + min_t(size_t, size, dev->addr_len)); |
|---|
| 8797 | + sa->sa_family = dev->type; |
|---|
| 8798 | + |
|---|
| 8799 | +unlock: |
|---|
| 8800 | + rcu_read_unlock(); |
|---|
| 8801 | + up_read(&dev_addr_sem); |
|---|
| 8802 | + return ret; |
|---|
| 8803 | +} |
|---|
| 8804 | +EXPORT_SYMBOL(dev_get_mac_address); |
|---|
| 7967 | 8805 | |
|---|
| 7968 | 8806 | /** |
|---|
| 7969 | 8807 | * dev_change_carrier - Change device carrier |
|---|
| .. | .. |
|---|
| 8014 | 8852 | char *name, size_t len) |
|---|
| 8015 | 8853 | { |
|---|
| 8016 | 8854 | const struct net_device_ops *ops = dev->netdev_ops; |
|---|
| 8855 | + int err; |
|---|
| 8017 | 8856 | |
|---|
| 8018 | | - if (!ops->ndo_get_phys_port_name) |
|---|
| 8019 | | - return -EOPNOTSUPP; |
|---|
| 8020 | | - return ops->ndo_get_phys_port_name(dev, name, len); |
|---|
| 8857 | + if (ops->ndo_get_phys_port_name) { |
|---|
| 8858 | + err = ops->ndo_get_phys_port_name(dev, name, len); |
|---|
| 8859 | + if (err != -EOPNOTSUPP) |
|---|
| 8860 | + return err; |
|---|
| 8861 | + } |
|---|
| 8862 | + return devlink_compat_phys_port_name_get(dev, name, len); |
|---|
| 8021 | 8863 | } |
|---|
| 8022 | 8864 | EXPORT_SYMBOL(dev_get_phys_port_name); |
|---|
| 8865 | + |
|---|
| 8866 | +/** |
|---|
| 8867 | + * dev_get_port_parent_id - Get the device's port parent identifier |
|---|
| 8868 | + * @dev: network device |
|---|
| 8869 | + * @ppid: pointer to a storage for the port's parent identifier |
|---|
| 8870 | + * @recurse: allow/disallow recursion to lower devices |
|---|
| 8871 | + * |
|---|
| 8872 | + * Get the devices's port parent identifier |
|---|
| 8873 | + */ |
|---|
| 8874 | +int dev_get_port_parent_id(struct net_device *dev, |
|---|
| 8875 | + struct netdev_phys_item_id *ppid, |
|---|
| 8876 | + bool recurse) |
|---|
| 8877 | +{ |
|---|
| 8878 | + const struct net_device_ops *ops = dev->netdev_ops; |
|---|
| 8879 | + struct netdev_phys_item_id first = { }; |
|---|
| 8880 | + struct net_device *lower_dev; |
|---|
| 8881 | + struct list_head *iter; |
|---|
| 8882 | + int err; |
|---|
| 8883 | + |
|---|
| 8884 | + if (ops->ndo_get_port_parent_id) { |
|---|
| 8885 | + err = ops->ndo_get_port_parent_id(dev, ppid); |
|---|
| 8886 | + if (err != -EOPNOTSUPP) |
|---|
| 8887 | + return err; |
|---|
| 8888 | + } |
|---|
| 8889 | + |
|---|
| 8890 | + err = devlink_compat_switch_id_get(dev, ppid); |
|---|
| 8891 | + if (!err || err != -EOPNOTSUPP) |
|---|
| 8892 | + return err; |
|---|
| 8893 | + |
|---|
| 8894 | + if (!recurse) |
|---|
| 8895 | + return -EOPNOTSUPP; |
|---|
| 8896 | + |
|---|
| 8897 | + netdev_for_each_lower_dev(dev, lower_dev, iter) { |
|---|
| 8898 | + err = dev_get_port_parent_id(lower_dev, ppid, recurse); |
|---|
| 8899 | + if (err) |
|---|
| 8900 | + break; |
|---|
| 8901 | + if (!first.id_len) |
|---|
| 8902 | + first = *ppid; |
|---|
| 8903 | + else if (memcmp(&first, ppid, sizeof(*ppid))) |
|---|
| 8904 | + return -EOPNOTSUPP; |
|---|
| 8905 | + } |
|---|
| 8906 | + |
|---|
| 8907 | + return err; |
|---|
| 8908 | +} |
|---|
| 8909 | +EXPORT_SYMBOL(dev_get_port_parent_id); |
|---|
| 8910 | + |
|---|
| 8911 | +/** |
|---|
| 8912 | + * netdev_port_same_parent_id - Indicate if two network devices have |
|---|
| 8913 | + * the same port parent identifier |
|---|
| 8914 | + * @a: first network device |
|---|
| 8915 | + * @b: second network device |
|---|
| 8916 | + */ |
|---|
| 8917 | +bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b) |
|---|
| 8918 | +{ |
|---|
| 8919 | + struct netdev_phys_item_id a_id = { }; |
|---|
| 8920 | + struct netdev_phys_item_id b_id = { }; |
|---|
| 8921 | + |
|---|
| 8922 | + if (dev_get_port_parent_id(a, &a_id, true) || |
|---|
| 8923 | + dev_get_port_parent_id(b, &b_id, true)) |
|---|
| 8924 | + return false; |
|---|
| 8925 | + |
|---|
| 8926 | + return netdev_phys_item_id_same(&a_id, &b_id); |
|---|
| 8927 | +} |
|---|
| 8928 | +EXPORT_SYMBOL(netdev_port_same_parent_id); |
|---|
| 8023 | 8929 | |
|---|
| 8024 | 8930 | /** |
|---|
| 8025 | 8931 | * dev_change_proto_down - update protocol port state information |
|---|
| .. | .. |
|---|
| 8041 | 8947 | } |
|---|
| 8042 | 8948 | EXPORT_SYMBOL(dev_change_proto_down); |
|---|
| 8043 | 8949 | |
|---|
| 8044 | | -u32 __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op, |
|---|
| 8045 | | - enum bpf_netdev_command cmd) |
|---|
| 8950 | +/** |
|---|
| 8951 | + * dev_change_proto_down_generic - generic implementation for |
|---|
| 8952 | + * ndo_change_proto_down that sets carrier according to |
|---|
| 8953 | + * proto_down. |
|---|
| 8954 | + * |
|---|
| 8955 | + * @dev: device |
|---|
| 8956 | + * @proto_down: new value |
|---|
| 8957 | + */ |
|---|
| 8958 | +int dev_change_proto_down_generic(struct net_device *dev, bool proto_down) |
|---|
| 8046 | 8959 | { |
|---|
| 8047 | | - struct netdev_bpf xdp; |
|---|
| 8960 | + if (proto_down) |
|---|
| 8961 | + netif_carrier_off(dev); |
|---|
| 8962 | + else |
|---|
| 8963 | + netif_carrier_on(dev); |
|---|
| 8964 | + dev->proto_down = proto_down; |
|---|
| 8965 | + return 0; |
|---|
| 8966 | +} |
|---|
| 8967 | +EXPORT_SYMBOL(dev_change_proto_down_generic); |
|---|
| 8048 | 8968 | |
|---|
| 8049 | | - if (!bpf_op) |
|---|
| 8050 | | - return 0; |
|---|
| 8969 | +/** |
|---|
| 8970 | + * dev_change_proto_down_reason - proto down reason |
|---|
| 8971 | + * |
|---|
| 8972 | + * @dev: device |
|---|
| 8973 | + * @mask: proto down mask |
|---|
| 8974 | + * @value: proto down value |
|---|
| 8975 | + */ |
|---|
| 8976 | +void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask, |
|---|
| 8977 | + u32 value) |
|---|
| 8978 | +{ |
|---|
| 8979 | + int b; |
|---|
| 8051 | 8980 | |
|---|
| 8052 | | - memset(&xdp, 0, sizeof(xdp)); |
|---|
| 8053 | | - xdp.command = cmd; |
|---|
| 8981 | + if (!mask) { |
|---|
| 8982 | + dev->proto_down_reason = value; |
|---|
| 8983 | + } else { |
|---|
| 8984 | + for_each_set_bit(b, &mask, 32) { |
|---|
| 8985 | + if (value & (1 << b)) |
|---|
| 8986 | + dev->proto_down_reason |= BIT(b); |
|---|
| 8987 | + else |
|---|
| 8988 | + dev->proto_down_reason &= ~BIT(b); |
|---|
| 8989 | + } |
|---|
| 8990 | + } |
|---|
| 8991 | +} |
|---|
| 8992 | +EXPORT_SYMBOL(dev_change_proto_down_reason); |
|---|
| 8054 | 8993 | |
|---|
| 8055 | | - /* Query must always succeed. */ |
|---|
| 8056 | | - WARN_ON(bpf_op(dev, &xdp) < 0 && cmd == XDP_QUERY_PROG); |
|---|
| 8994 | +struct bpf_xdp_link { |
|---|
| 8995 | + struct bpf_link link; |
|---|
| 8996 | + struct net_device *dev; /* protected by rtnl_lock, no refcnt held */ |
|---|
| 8997 | + int flags; |
|---|
| 8998 | +}; |
|---|
| 8057 | 8999 | |
|---|
| 8058 | | - return xdp.prog_id; |
|---|
| 9000 | +static enum bpf_xdp_mode dev_xdp_mode(struct net_device *dev, u32 flags) |
|---|
| 9001 | +{ |
|---|
| 9002 | + if (flags & XDP_FLAGS_HW_MODE) |
|---|
| 9003 | + return XDP_MODE_HW; |
|---|
| 9004 | + if (flags & XDP_FLAGS_DRV_MODE) |
|---|
| 9005 | + return XDP_MODE_DRV; |
|---|
| 9006 | + if (flags & XDP_FLAGS_SKB_MODE) |
|---|
| 9007 | + return XDP_MODE_SKB; |
|---|
| 9008 | + return dev->netdev_ops->ndo_bpf ? XDP_MODE_DRV : XDP_MODE_SKB; |
|---|
| 8059 | 9009 | } |
|---|
| 8060 | 9010 | |
|---|
| 8061 | | -static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op, |
|---|
| 8062 | | - struct netlink_ext_ack *extack, u32 flags, |
|---|
| 8063 | | - struct bpf_prog *prog) |
|---|
| 9011 | +static bpf_op_t dev_xdp_bpf_op(struct net_device *dev, enum bpf_xdp_mode mode) |
|---|
| 9012 | +{ |
|---|
| 9013 | + switch (mode) { |
|---|
| 9014 | + case XDP_MODE_SKB: |
|---|
| 9015 | + return generic_xdp_install; |
|---|
| 9016 | + case XDP_MODE_DRV: |
|---|
| 9017 | + case XDP_MODE_HW: |
|---|
| 9018 | + return dev->netdev_ops->ndo_bpf; |
|---|
| 9019 | + default: |
|---|
| 9020 | + return NULL; |
|---|
| 9021 | + }; |
|---|
| 9022 | +} |
|---|
| 9023 | + |
|---|
| 9024 | +static struct bpf_xdp_link *dev_xdp_link(struct net_device *dev, |
|---|
| 9025 | + enum bpf_xdp_mode mode) |
|---|
| 9026 | +{ |
|---|
| 9027 | + return dev->xdp_state[mode].link; |
|---|
| 9028 | +} |
|---|
| 9029 | + |
|---|
| 9030 | +static struct bpf_prog *dev_xdp_prog(struct net_device *dev, |
|---|
| 9031 | + enum bpf_xdp_mode mode) |
|---|
| 9032 | +{ |
|---|
| 9033 | + struct bpf_xdp_link *link = dev_xdp_link(dev, mode); |
|---|
| 9034 | + |
|---|
| 9035 | + if (link) |
|---|
| 9036 | + return link->link.prog; |
|---|
| 9037 | + return dev->xdp_state[mode].prog; |
|---|
| 9038 | +} |
|---|
| 9039 | + |
|---|
| 9040 | +static u8 dev_xdp_prog_count(struct net_device *dev) |
|---|
| 9041 | +{ |
|---|
| 9042 | + u8 count = 0; |
|---|
| 9043 | + int i; |
|---|
| 9044 | + |
|---|
| 9045 | + for (i = 0; i < __MAX_XDP_MODE; i++) |
|---|
| 9046 | + if (dev->xdp_state[i].prog || dev->xdp_state[i].link) |
|---|
| 9047 | + count++; |
|---|
| 9048 | + return count; |
|---|
| 9049 | +} |
|---|
| 9050 | + |
|---|
| 9051 | +u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode) |
|---|
| 9052 | +{ |
|---|
| 9053 | + struct bpf_prog *prog = dev_xdp_prog(dev, mode); |
|---|
| 9054 | + |
|---|
| 9055 | + return prog ? prog->aux->id : 0; |
|---|
| 9056 | +} |
|---|
| 9057 | + |
|---|
| 9058 | +static void dev_xdp_set_link(struct net_device *dev, enum bpf_xdp_mode mode, |
|---|
| 9059 | + struct bpf_xdp_link *link) |
|---|
| 9060 | +{ |
|---|
| 9061 | + dev->xdp_state[mode].link = link; |
|---|
| 9062 | + dev->xdp_state[mode].prog = NULL; |
|---|
| 9063 | +} |
|---|
| 9064 | + |
|---|
| 9065 | +static void dev_xdp_set_prog(struct net_device *dev, enum bpf_xdp_mode mode, |
|---|
| 9066 | + struct bpf_prog *prog) |
|---|
| 9067 | +{ |
|---|
| 9068 | + dev->xdp_state[mode].link = NULL; |
|---|
| 9069 | + dev->xdp_state[mode].prog = prog; |
|---|
| 9070 | +} |
|---|
| 9071 | + |
|---|
| 9072 | +static int dev_xdp_install(struct net_device *dev, enum bpf_xdp_mode mode, |
|---|
| 9073 | + bpf_op_t bpf_op, struct netlink_ext_ack *extack, |
|---|
| 9074 | + u32 flags, struct bpf_prog *prog) |
|---|
| 8064 | 9075 | { |
|---|
| 8065 | 9076 | struct netdev_bpf xdp; |
|---|
| 9077 | + int err; |
|---|
| 8066 | 9078 | |
|---|
| 8067 | 9079 | memset(&xdp, 0, sizeof(xdp)); |
|---|
| 8068 | | - if (flags & XDP_FLAGS_HW_MODE) |
|---|
| 8069 | | - xdp.command = XDP_SETUP_PROG_HW; |
|---|
| 8070 | | - else |
|---|
| 8071 | | - xdp.command = XDP_SETUP_PROG; |
|---|
| 9080 | + xdp.command = mode == XDP_MODE_HW ? XDP_SETUP_PROG_HW : XDP_SETUP_PROG; |
|---|
| 8072 | 9081 | xdp.extack = extack; |
|---|
| 8073 | 9082 | xdp.flags = flags; |
|---|
| 8074 | 9083 | xdp.prog = prog; |
|---|
| 8075 | 9084 | |
|---|
| 8076 | | - return bpf_op(dev, &xdp); |
|---|
| 9085 | + /* Drivers assume refcnt is already incremented (i.e, prog pointer is |
|---|
| 9086 | + * "moved" into driver), so they don't increment it on their own, but |
|---|
| 9087 | + * they do decrement refcnt when program is detached or replaced. |
|---|
| 9088 | + * Given net_device also owns link/prog, we need to bump refcnt here |
|---|
| 9089 | + * to prevent drivers from underflowing it. |
|---|
| 9090 | + */ |
|---|
| 9091 | + if (prog) |
|---|
| 9092 | + bpf_prog_inc(prog); |
|---|
| 9093 | + err = bpf_op(dev, &xdp); |
|---|
| 9094 | + if (err) { |
|---|
| 9095 | + if (prog) |
|---|
| 9096 | + bpf_prog_put(prog); |
|---|
| 9097 | + return err; |
|---|
| 9098 | + } |
|---|
| 9099 | + |
|---|
| 9100 | + if (mode != XDP_MODE_HW) |
|---|
| 9101 | + bpf_prog_change_xdp(dev_xdp_prog(dev, mode), prog); |
|---|
| 9102 | + |
|---|
| 9103 | + return 0; |
|---|
| 8077 | 9104 | } |
|---|
| 8078 | 9105 | |
|---|
| 8079 | 9106 | static void dev_xdp_uninstall(struct net_device *dev) |
|---|
| 8080 | 9107 | { |
|---|
| 8081 | | - struct netdev_bpf xdp; |
|---|
| 8082 | | - bpf_op_t ndo_bpf; |
|---|
| 9108 | + struct bpf_xdp_link *link; |
|---|
| 9109 | + struct bpf_prog *prog; |
|---|
| 9110 | + enum bpf_xdp_mode mode; |
|---|
| 9111 | + bpf_op_t bpf_op; |
|---|
| 8083 | 9112 | |
|---|
| 8084 | | - /* Remove generic XDP */ |
|---|
| 8085 | | - WARN_ON(dev_xdp_install(dev, generic_xdp_install, NULL, 0, NULL)); |
|---|
| 9113 | + ASSERT_RTNL(); |
|---|
| 8086 | 9114 | |
|---|
| 8087 | | - /* Remove from the driver */ |
|---|
| 8088 | | - ndo_bpf = dev->netdev_ops->ndo_bpf; |
|---|
| 8089 | | - if (!ndo_bpf) |
|---|
| 8090 | | - return; |
|---|
| 9115 | + for (mode = XDP_MODE_SKB; mode < __MAX_XDP_MODE; mode++) { |
|---|
| 9116 | + prog = dev_xdp_prog(dev, mode); |
|---|
| 9117 | + if (!prog) |
|---|
| 9118 | + continue; |
|---|
| 8091 | 9119 | |
|---|
| 8092 | | - memset(&xdp, 0, sizeof(xdp)); |
|---|
| 8093 | | - xdp.command = XDP_QUERY_PROG; |
|---|
| 8094 | | - WARN_ON(ndo_bpf(dev, &xdp)); |
|---|
| 8095 | | - if (xdp.prog_id) |
|---|
| 8096 | | - WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags, |
|---|
| 8097 | | - NULL)); |
|---|
| 9120 | + bpf_op = dev_xdp_bpf_op(dev, mode); |
|---|
| 9121 | + if (!bpf_op) |
|---|
| 9122 | + continue; |
|---|
| 8098 | 9123 | |
|---|
| 8099 | | - /* Remove HW offload */ |
|---|
| 8100 | | - memset(&xdp, 0, sizeof(xdp)); |
|---|
| 8101 | | - xdp.command = XDP_QUERY_PROG_HW; |
|---|
| 8102 | | - if (!ndo_bpf(dev, &xdp) && xdp.prog_id) |
|---|
| 8103 | | - WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags, |
|---|
| 8104 | | - NULL)); |
|---|
| 9124 | + WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL)); |
|---|
| 9125 | + |
|---|
| 9126 | + /* auto-detach link from net device */ |
|---|
| 9127 | + link = dev_xdp_link(dev, mode); |
|---|
| 9128 | + if (link) |
|---|
| 9129 | + link->dev = NULL; |
|---|
| 9130 | + else |
|---|
| 9131 | + bpf_prog_put(prog); |
|---|
| 9132 | + |
|---|
| 9133 | + dev_xdp_set_link(dev, mode, NULL); |
|---|
| 9134 | + } |
|---|
| 9135 | +} |
|---|
| 9136 | + |
|---|
| 9137 | +static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack, |
|---|
| 9138 | + struct bpf_xdp_link *link, struct bpf_prog *new_prog, |
|---|
| 9139 | + struct bpf_prog *old_prog, u32 flags) |
|---|
| 9140 | +{ |
|---|
| 9141 | + unsigned int num_modes = hweight32(flags & XDP_FLAGS_MODES); |
|---|
| 9142 | + struct bpf_prog *cur_prog; |
|---|
| 9143 | + enum bpf_xdp_mode mode; |
|---|
| 9144 | + bpf_op_t bpf_op; |
|---|
| 9145 | + int err; |
|---|
| 9146 | + |
|---|
| 9147 | + ASSERT_RTNL(); |
|---|
| 9148 | + |
|---|
| 9149 | + /* either link or prog attachment, never both */ |
|---|
| 9150 | + if (link && (new_prog || old_prog)) |
|---|
| 9151 | + return -EINVAL; |
|---|
| 9152 | + /* link supports only XDP mode flags */ |
|---|
| 9153 | + if (link && (flags & ~XDP_FLAGS_MODES)) { |
|---|
| 9154 | + NL_SET_ERR_MSG(extack, "Invalid XDP flags for BPF link attachment"); |
|---|
| 9155 | + return -EINVAL; |
|---|
| 9156 | + } |
|---|
| 9157 | + /* just one XDP mode bit should be set, zero defaults to drv/skb mode */ |
|---|
| 9158 | + if (num_modes > 1) { |
|---|
| 9159 | + NL_SET_ERR_MSG(extack, "Only one XDP mode flag can be set"); |
|---|
| 9160 | + return -EINVAL; |
|---|
| 9161 | + } |
|---|
| 9162 | + /* avoid ambiguity if offload + drv/skb mode progs are both loaded */ |
|---|
| 9163 | + if (!num_modes && dev_xdp_prog_count(dev) > 1) { |
|---|
| 9164 | + NL_SET_ERR_MSG(extack, |
|---|
| 9165 | + "More than one program loaded, unset mode is ambiguous"); |
|---|
| 9166 | + return -EINVAL; |
|---|
| 9167 | + } |
|---|
| 9168 | + /* old_prog != NULL implies XDP_FLAGS_REPLACE is set */ |
|---|
| 9169 | + if (old_prog && !(flags & XDP_FLAGS_REPLACE)) { |
|---|
| 9170 | + NL_SET_ERR_MSG(extack, "XDP_FLAGS_REPLACE is not specified"); |
|---|
| 9171 | + return -EINVAL; |
|---|
| 9172 | + } |
|---|
| 9173 | + |
|---|
| 9174 | + mode = dev_xdp_mode(dev, flags); |
|---|
| 9175 | + /* can't replace attached link */ |
|---|
| 9176 | + if (dev_xdp_link(dev, mode)) { |
|---|
| 9177 | + NL_SET_ERR_MSG(extack, "Can't replace active BPF XDP link"); |
|---|
| 9178 | + return -EBUSY; |
|---|
| 9179 | + } |
|---|
| 9180 | + |
|---|
| 9181 | + cur_prog = dev_xdp_prog(dev, mode); |
|---|
| 9182 | + /* can't replace attached prog with link */ |
|---|
| 9183 | + if (link && cur_prog) { |
|---|
| 9184 | + NL_SET_ERR_MSG(extack, "Can't replace active XDP program with BPF link"); |
|---|
| 9185 | + return -EBUSY; |
|---|
| 9186 | + } |
|---|
| 9187 | + if ((flags & XDP_FLAGS_REPLACE) && cur_prog != old_prog) { |
|---|
| 9188 | + NL_SET_ERR_MSG(extack, "Active program does not match expected"); |
|---|
| 9189 | + return -EEXIST; |
|---|
| 9190 | + } |
|---|
| 9191 | + |
|---|
| 9192 | + /* put effective new program into new_prog */ |
|---|
| 9193 | + if (link) |
|---|
| 9194 | + new_prog = link->link.prog; |
|---|
| 9195 | + |
|---|
| 9196 | + if (new_prog) { |
|---|
| 9197 | + bool offload = mode == XDP_MODE_HW; |
|---|
| 9198 | + enum bpf_xdp_mode other_mode = mode == XDP_MODE_SKB |
|---|
| 9199 | + ? XDP_MODE_DRV : XDP_MODE_SKB; |
|---|
| 9200 | + |
|---|
| 9201 | + if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && cur_prog) { |
|---|
| 9202 | + NL_SET_ERR_MSG(extack, "XDP program already attached"); |
|---|
| 9203 | + return -EBUSY; |
|---|
| 9204 | + } |
|---|
| 9205 | + if (!offload && dev_xdp_prog(dev, other_mode)) { |
|---|
| 9206 | + NL_SET_ERR_MSG(extack, "Native and generic XDP can't be active at the same time"); |
|---|
| 9207 | + return -EEXIST; |
|---|
| 9208 | + } |
|---|
| 9209 | + if (!offload && bpf_prog_is_dev_bound(new_prog->aux)) { |
|---|
| 9210 | + NL_SET_ERR_MSG(extack, "Using device-bound program without HW_MODE flag is not supported"); |
|---|
| 9211 | + return -EINVAL; |
|---|
| 9212 | + } |
|---|
| 9213 | + if (new_prog->expected_attach_type == BPF_XDP_DEVMAP) { |
|---|
| 9214 | + NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device"); |
|---|
| 9215 | + return -EINVAL; |
|---|
| 9216 | + } |
|---|
| 9217 | + if (new_prog->expected_attach_type == BPF_XDP_CPUMAP) { |
|---|
| 9218 | + NL_SET_ERR_MSG(extack, "BPF_XDP_CPUMAP programs can not be attached to a device"); |
|---|
| 9219 | + return -EINVAL; |
|---|
| 9220 | + } |
|---|
| 9221 | + } |
|---|
| 9222 | + |
|---|
| 9223 | + /* don't call drivers if the effective program didn't change */ |
|---|
| 9224 | + if (new_prog != cur_prog) { |
|---|
| 9225 | + bpf_op = dev_xdp_bpf_op(dev, mode); |
|---|
| 9226 | + if (!bpf_op) { |
|---|
| 9227 | + NL_SET_ERR_MSG(extack, "Underlying driver does not support XDP in native mode"); |
|---|
| 9228 | + return -EOPNOTSUPP; |
|---|
| 9229 | + } |
|---|
| 9230 | + |
|---|
| 9231 | + err = dev_xdp_install(dev, mode, bpf_op, extack, flags, new_prog); |
|---|
| 9232 | + if (err) |
|---|
| 9233 | + return err; |
|---|
| 9234 | + } |
|---|
| 9235 | + |
|---|
| 9236 | + if (link) |
|---|
| 9237 | + dev_xdp_set_link(dev, mode, link); |
|---|
| 9238 | + else |
|---|
| 9239 | + dev_xdp_set_prog(dev, mode, new_prog); |
|---|
| 9240 | + if (cur_prog) |
|---|
| 9241 | + bpf_prog_put(cur_prog); |
|---|
| 9242 | + |
|---|
| 9243 | + return 0; |
|---|
| 9244 | +} |
|---|
| 9245 | + |
|---|
| 9246 | +static int dev_xdp_attach_link(struct net_device *dev, |
|---|
| 9247 | + struct netlink_ext_ack *extack, |
|---|
| 9248 | + struct bpf_xdp_link *link) |
|---|
| 9249 | +{ |
|---|
| 9250 | + return dev_xdp_attach(dev, extack, link, NULL, NULL, link->flags); |
|---|
| 9251 | +} |
|---|
| 9252 | + |
|---|
| 9253 | +static int dev_xdp_detach_link(struct net_device *dev, |
|---|
| 9254 | + struct netlink_ext_ack *extack, |
|---|
| 9255 | + struct bpf_xdp_link *link) |
|---|
| 9256 | +{ |
|---|
| 9257 | + enum bpf_xdp_mode mode; |
|---|
| 9258 | + bpf_op_t bpf_op; |
|---|
| 9259 | + |
|---|
| 9260 | + ASSERT_RTNL(); |
|---|
| 9261 | + |
|---|
| 9262 | + mode = dev_xdp_mode(dev, link->flags); |
|---|
| 9263 | + if (dev_xdp_link(dev, mode) != link) |
|---|
| 9264 | + return -EINVAL; |
|---|
| 9265 | + |
|---|
| 9266 | + bpf_op = dev_xdp_bpf_op(dev, mode); |
|---|
| 9267 | + WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL)); |
|---|
| 9268 | + dev_xdp_set_link(dev, mode, NULL); |
|---|
| 9269 | + return 0; |
|---|
| 9270 | +} |
|---|
| 9271 | + |
|---|
| 9272 | +static void bpf_xdp_link_release(struct bpf_link *link) |
|---|
| 9273 | +{ |
|---|
| 9274 | + struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link); |
|---|
| 9275 | + |
|---|
| 9276 | + rtnl_lock(); |
|---|
| 9277 | + |
|---|
| 9278 | + /* if racing with net_device's tear down, xdp_link->dev might be |
|---|
| 9279 | + * already NULL, in which case link was already auto-detached |
|---|
| 9280 | + */ |
|---|
| 9281 | + if (xdp_link->dev) { |
|---|
| 9282 | + WARN_ON(dev_xdp_detach_link(xdp_link->dev, NULL, xdp_link)); |
|---|
| 9283 | + xdp_link->dev = NULL; |
|---|
| 9284 | + } |
|---|
| 9285 | + |
|---|
| 9286 | + rtnl_unlock(); |
|---|
| 9287 | +} |
|---|
| 9288 | + |
|---|
| 9289 | +static int bpf_xdp_link_detach(struct bpf_link *link) |
|---|
| 9290 | +{ |
|---|
| 9291 | + bpf_xdp_link_release(link); |
|---|
| 9292 | + return 0; |
|---|
| 9293 | +} |
|---|
| 9294 | + |
|---|
| 9295 | +static void bpf_xdp_link_dealloc(struct bpf_link *link) |
|---|
| 9296 | +{ |
|---|
| 9297 | + struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link); |
|---|
| 9298 | + |
|---|
| 9299 | + kfree(xdp_link); |
|---|
| 9300 | +} |
|---|
| 9301 | + |
|---|
| 9302 | +static void bpf_xdp_link_show_fdinfo(const struct bpf_link *link, |
|---|
| 9303 | + struct seq_file *seq) |
|---|
| 9304 | +{ |
|---|
| 9305 | + struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link); |
|---|
| 9306 | + u32 ifindex = 0; |
|---|
| 9307 | + |
|---|
| 9308 | + rtnl_lock(); |
|---|
| 9309 | + if (xdp_link->dev) |
|---|
| 9310 | + ifindex = xdp_link->dev->ifindex; |
|---|
| 9311 | + rtnl_unlock(); |
|---|
| 9312 | + |
|---|
| 9313 | + seq_printf(seq, "ifindex:\t%u\n", ifindex); |
|---|
| 9314 | +} |
|---|
| 9315 | + |
|---|
| 9316 | +static int bpf_xdp_link_fill_link_info(const struct bpf_link *link, |
|---|
| 9317 | + struct bpf_link_info *info) |
|---|
| 9318 | +{ |
|---|
| 9319 | + struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link); |
|---|
| 9320 | + u32 ifindex = 0; |
|---|
| 9321 | + |
|---|
| 9322 | + rtnl_lock(); |
|---|
| 9323 | + if (xdp_link->dev) |
|---|
| 9324 | + ifindex = xdp_link->dev->ifindex; |
|---|
| 9325 | + rtnl_unlock(); |
|---|
| 9326 | + |
|---|
| 9327 | + info->xdp.ifindex = ifindex; |
|---|
| 9328 | + return 0; |
|---|
| 9329 | +} |
|---|
| 9330 | + |
|---|
| 9331 | +static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog, |
|---|
| 9332 | + struct bpf_prog *old_prog) |
|---|
| 9333 | +{ |
|---|
| 9334 | + struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link); |
|---|
| 9335 | + enum bpf_xdp_mode mode; |
|---|
| 9336 | + bpf_op_t bpf_op; |
|---|
| 9337 | + int err = 0; |
|---|
| 9338 | + |
|---|
| 9339 | + rtnl_lock(); |
|---|
| 9340 | + |
|---|
| 9341 | + /* link might have been auto-released already, so fail */ |
|---|
| 9342 | + if (!xdp_link->dev) { |
|---|
| 9343 | + err = -ENOLINK; |
|---|
| 9344 | + goto out_unlock; |
|---|
| 9345 | + } |
|---|
| 9346 | + |
|---|
| 9347 | + if (old_prog && link->prog != old_prog) { |
|---|
| 9348 | + err = -EPERM; |
|---|
| 9349 | + goto out_unlock; |
|---|
| 9350 | + } |
|---|
| 9351 | + old_prog = link->prog; |
|---|
| 9352 | + if (old_prog->type != new_prog->type || |
|---|
| 9353 | + old_prog->expected_attach_type != new_prog->expected_attach_type) { |
|---|
| 9354 | + err = -EINVAL; |
|---|
| 9355 | + goto out_unlock; |
|---|
| 9356 | + } |
|---|
| 9357 | + |
|---|
| 9358 | + if (old_prog == new_prog) { |
|---|
| 9359 | + /* no-op, don't disturb drivers */ |
|---|
| 9360 | + bpf_prog_put(new_prog); |
|---|
| 9361 | + goto out_unlock; |
|---|
| 9362 | + } |
|---|
| 9363 | + |
|---|
| 9364 | + mode = dev_xdp_mode(xdp_link->dev, xdp_link->flags); |
|---|
| 9365 | + bpf_op = dev_xdp_bpf_op(xdp_link->dev, mode); |
|---|
| 9366 | + err = dev_xdp_install(xdp_link->dev, mode, bpf_op, NULL, |
|---|
| 9367 | + xdp_link->flags, new_prog); |
|---|
| 9368 | + if (err) |
|---|
| 9369 | + goto out_unlock; |
|---|
| 9370 | + |
|---|
| 9371 | + old_prog = xchg(&link->prog, new_prog); |
|---|
| 9372 | + bpf_prog_put(old_prog); |
|---|
| 9373 | + |
|---|
| 9374 | +out_unlock: |
|---|
| 9375 | + rtnl_unlock(); |
|---|
| 9376 | + return err; |
|---|
| 9377 | +} |
|---|
| 9378 | + |
|---|
| 9379 | +static const struct bpf_link_ops bpf_xdp_link_lops = { |
|---|
| 9380 | + .release = bpf_xdp_link_release, |
|---|
| 9381 | + .dealloc = bpf_xdp_link_dealloc, |
|---|
| 9382 | + .detach = bpf_xdp_link_detach, |
|---|
| 9383 | + .show_fdinfo = bpf_xdp_link_show_fdinfo, |
|---|
| 9384 | + .fill_link_info = bpf_xdp_link_fill_link_info, |
|---|
| 9385 | + .update_prog = bpf_xdp_link_update, |
|---|
| 9386 | +}; |
|---|
| 9387 | + |
|---|
| 9388 | +int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) |
|---|
| 9389 | +{ |
|---|
| 9390 | + struct net *net = current->nsproxy->net_ns; |
|---|
| 9391 | + struct bpf_link_primer link_primer; |
|---|
| 9392 | + struct bpf_xdp_link *link; |
|---|
| 9393 | + struct net_device *dev; |
|---|
| 9394 | + int err, fd; |
|---|
| 9395 | + |
|---|
| 9396 | + rtnl_lock(); |
|---|
| 9397 | + dev = dev_get_by_index(net, attr->link_create.target_ifindex); |
|---|
| 9398 | + if (!dev) { |
|---|
| 9399 | + rtnl_unlock(); |
|---|
| 9400 | + return -EINVAL; |
|---|
| 9401 | + } |
|---|
| 9402 | + |
|---|
| 9403 | + link = kzalloc(sizeof(*link), GFP_USER); |
|---|
| 9404 | + if (!link) { |
|---|
| 9405 | + err = -ENOMEM; |
|---|
| 9406 | + goto unlock; |
|---|
| 9407 | + } |
|---|
| 9408 | + |
|---|
| 9409 | + bpf_link_init(&link->link, BPF_LINK_TYPE_XDP, &bpf_xdp_link_lops, prog); |
|---|
| 9410 | + link->dev = dev; |
|---|
| 9411 | + link->flags = attr->link_create.flags; |
|---|
| 9412 | + |
|---|
| 9413 | + err = bpf_link_prime(&link->link, &link_primer); |
|---|
| 9414 | + if (err) { |
|---|
| 9415 | + kfree(link); |
|---|
| 9416 | + goto unlock; |
|---|
| 9417 | + } |
|---|
| 9418 | + |
|---|
| 9419 | + err = dev_xdp_attach_link(dev, NULL, link); |
|---|
| 9420 | + rtnl_unlock(); |
|---|
| 9421 | + |
|---|
| 9422 | + if (err) { |
|---|
| 9423 | + link->dev = NULL; |
|---|
| 9424 | + bpf_link_cleanup(&link_primer); |
|---|
| 9425 | + goto out_put_dev; |
|---|
| 9426 | + } |
|---|
| 9427 | + |
|---|
| 9428 | + fd = bpf_link_settle(&link_primer); |
|---|
| 9429 | + /* link itself doesn't hold dev's refcnt to not complicate shutdown */ |
|---|
| 9430 | + dev_put(dev); |
|---|
| 9431 | + return fd; |
|---|
| 9432 | + |
|---|
| 9433 | +unlock: |
|---|
| 9434 | + rtnl_unlock(); |
|---|
| 9435 | + |
|---|
| 9436 | +out_put_dev: |
|---|
| 9437 | + dev_put(dev); |
|---|
| 9438 | + return err; |
|---|
| 8105 | 9439 | } |
|---|
| 8106 | 9440 | |
|---|
| 8107 | 9441 | /** |
|---|
| .. | .. |
|---|
| 8109 | 9443 | * @dev: device |
|---|
| 8110 | 9444 | * @extack: netlink extended ack |
|---|
| 8111 | 9445 | * @fd: new program fd or negative value to clear |
|---|
| 9446 | + * @expected_fd: old program fd that userspace expects to replace or clear |
|---|
| 8112 | 9447 | * @flags: xdp-related flags |
|---|
| 8113 | 9448 | * |
|---|
| 8114 | 9449 | * Set or clear a bpf program for a device |
|---|
| 8115 | 9450 | */ |
|---|
| 8116 | 9451 | int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, |
|---|
| 8117 | | - int fd, u32 flags) |
|---|
| 9452 | + int fd, int expected_fd, u32 flags) |
|---|
| 8118 | 9453 | { |
|---|
| 8119 | | - const struct net_device_ops *ops = dev->netdev_ops; |
|---|
| 8120 | | - enum bpf_netdev_command query; |
|---|
| 8121 | | - struct bpf_prog *prog = NULL; |
|---|
| 8122 | | - bpf_op_t bpf_op, bpf_chk; |
|---|
| 9454 | + enum bpf_xdp_mode mode = dev_xdp_mode(dev, flags); |
|---|
| 9455 | + struct bpf_prog *new_prog = NULL, *old_prog = NULL; |
|---|
| 8123 | 9456 | int err; |
|---|
| 8124 | 9457 | |
|---|
| 8125 | 9458 | ASSERT_RTNL(); |
|---|
| 8126 | 9459 | |
|---|
| 8127 | | - query = flags & XDP_FLAGS_HW_MODE ? XDP_QUERY_PROG_HW : XDP_QUERY_PROG; |
|---|
| 8128 | | - |
|---|
| 8129 | | - bpf_op = bpf_chk = ops->ndo_bpf; |
|---|
| 8130 | | - if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE))) |
|---|
| 8131 | | - return -EOPNOTSUPP; |
|---|
| 8132 | | - if (!bpf_op || (flags & XDP_FLAGS_SKB_MODE)) |
|---|
| 8133 | | - bpf_op = generic_xdp_install; |
|---|
| 8134 | | - if (bpf_op == bpf_chk) |
|---|
| 8135 | | - bpf_chk = generic_xdp_install; |
|---|
| 8136 | | - |
|---|
| 8137 | 9460 | if (fd >= 0) { |
|---|
| 8138 | | - if (__dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG) || |
|---|
| 8139 | | - __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG_HW)) |
|---|
| 8140 | | - return -EEXIST; |
|---|
| 8141 | | - if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && |
|---|
| 8142 | | - __dev_xdp_query(dev, bpf_op, query)) |
|---|
| 8143 | | - return -EBUSY; |
|---|
| 9461 | + new_prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP, |
|---|
| 9462 | + mode != XDP_MODE_SKB); |
|---|
| 9463 | + if (IS_ERR(new_prog)) |
|---|
| 9464 | + return PTR_ERR(new_prog); |
|---|
| 9465 | + } |
|---|
| 8144 | 9466 | |
|---|
| 8145 | | - prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP, |
|---|
| 8146 | | - bpf_op == ops->ndo_bpf); |
|---|
| 8147 | | - if (IS_ERR(prog)) |
|---|
| 8148 | | - return PTR_ERR(prog); |
|---|
| 8149 | | - |
|---|
| 8150 | | - if (!(flags & XDP_FLAGS_HW_MODE) && |
|---|
| 8151 | | - bpf_prog_is_dev_bound(prog->aux)) { |
|---|
| 8152 | | - NL_SET_ERR_MSG(extack, "using device-bound program without HW_MODE flag is not supported"); |
|---|
| 8153 | | - bpf_prog_put(prog); |
|---|
| 8154 | | - return -EINVAL; |
|---|
| 9467 | + if (expected_fd >= 0) { |
|---|
| 9468 | + old_prog = bpf_prog_get_type_dev(expected_fd, BPF_PROG_TYPE_XDP, |
|---|
| 9469 | + mode != XDP_MODE_SKB); |
|---|
| 9470 | + if (IS_ERR(old_prog)) { |
|---|
| 9471 | + err = PTR_ERR(old_prog); |
|---|
| 9472 | + old_prog = NULL; |
|---|
| 9473 | + goto err_out; |
|---|
| 8155 | 9474 | } |
|---|
| 8156 | 9475 | } |
|---|
| 8157 | 9476 | |
|---|
| 8158 | | - err = dev_xdp_install(dev, bpf_op, extack, flags, prog); |
|---|
| 8159 | | - if (err < 0 && prog) |
|---|
| 8160 | | - bpf_prog_put(prog); |
|---|
| 9477 | + err = dev_xdp_attach(dev, extack, NULL, new_prog, old_prog, flags); |
|---|
| 8161 | 9478 | |
|---|
| 9479 | +err_out: |
|---|
| 9480 | + if (err && new_prog) |
|---|
| 9481 | + bpf_prog_put(new_prog); |
|---|
| 9482 | + if (old_prog) |
|---|
| 9483 | + bpf_prog_put(old_prog); |
|---|
| 8162 | 9484 | return err; |
|---|
| 8163 | 9485 | } |
|---|
| 8164 | 9486 | |
|---|
| .. | .. |
|---|
| 8190 | 9512 | { |
|---|
| 8191 | 9513 | list_add_tail(&dev->todo_list, &net_todo_list); |
|---|
| 8192 | 9514 | dev_net(dev)->dev_unreg_count++; |
|---|
| 8193 | | -} |
|---|
| 8194 | | - |
|---|
| 8195 | | -static void rollback_registered_many(struct list_head *head) |
|---|
| 8196 | | -{ |
|---|
| 8197 | | - struct net_device *dev, *tmp; |
|---|
| 8198 | | - LIST_HEAD(close_head); |
|---|
| 8199 | | - |
|---|
| 8200 | | - BUG_ON(dev_boot_phase); |
|---|
| 8201 | | - ASSERT_RTNL(); |
|---|
| 8202 | | - |
|---|
| 8203 | | - list_for_each_entry_safe(dev, tmp, head, unreg_list) { |
|---|
| 8204 | | - /* Some devices call without registering |
|---|
| 8205 | | - * for initialization unwind. Remove those |
|---|
| 8206 | | - * devices and proceed with the remaining. |
|---|
| 8207 | | - */ |
|---|
| 8208 | | - if (dev->reg_state == NETREG_UNINITIALIZED) { |
|---|
| 8209 | | - pr_debug("unregister_netdevice: device %s/%p never was registered\n", |
|---|
| 8210 | | - dev->name, dev); |
|---|
| 8211 | | - |
|---|
| 8212 | | - WARN_ON(1); |
|---|
| 8213 | | - list_del(&dev->unreg_list); |
|---|
| 8214 | | - continue; |
|---|
| 8215 | | - } |
|---|
| 8216 | | - dev->dismantle = true; |
|---|
| 8217 | | - BUG_ON(dev->reg_state != NETREG_REGISTERED); |
|---|
| 8218 | | - } |
|---|
| 8219 | | - |
|---|
| 8220 | | - /* If device is running, close it first. */ |
|---|
| 8221 | | - list_for_each_entry(dev, head, unreg_list) |
|---|
| 8222 | | - list_add_tail(&dev->close_list, &close_head); |
|---|
| 8223 | | - dev_close_many(&close_head, true); |
|---|
| 8224 | | - |
|---|
| 8225 | | - list_for_each_entry(dev, head, unreg_list) { |
|---|
| 8226 | | - /* And unlink it from device chain. */ |
|---|
| 8227 | | - unlist_netdevice(dev); |
|---|
| 8228 | | - |
|---|
| 8229 | | - dev->reg_state = NETREG_UNREGISTERING; |
|---|
| 8230 | | - } |
|---|
| 8231 | | - flush_all_backlogs(); |
|---|
| 8232 | | - |
|---|
| 8233 | | - synchronize_net(); |
|---|
| 8234 | | - |
|---|
| 8235 | | - list_for_each_entry(dev, head, unreg_list) { |
|---|
| 8236 | | - struct sk_buff *skb = NULL; |
|---|
| 8237 | | - |
|---|
| 8238 | | - /* Shutdown queueing discipline. */ |
|---|
| 8239 | | - dev_shutdown(dev); |
|---|
| 8240 | | - |
|---|
| 8241 | | - dev_xdp_uninstall(dev); |
|---|
| 8242 | | - |
|---|
| 8243 | | - /* Notify protocols, that we are about to destroy |
|---|
| 8244 | | - * this device. They should clean all the things. |
|---|
| 8245 | | - */ |
|---|
| 8246 | | - call_netdevice_notifiers(NETDEV_UNREGISTER, dev); |
|---|
| 8247 | | - |
|---|
| 8248 | | - if (!dev->rtnl_link_ops || |
|---|
| 8249 | | - dev->rtnl_link_state == RTNL_LINK_INITIALIZED) |
|---|
| 8250 | | - skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0, |
|---|
| 8251 | | - GFP_KERNEL, NULL, 0); |
|---|
| 8252 | | - |
|---|
| 8253 | | - /* |
|---|
| 8254 | | - * Flush the unicast and multicast chains |
|---|
| 8255 | | - */ |
|---|
| 8256 | | - dev_uc_flush(dev); |
|---|
| 8257 | | - dev_mc_flush(dev); |
|---|
| 8258 | | - |
|---|
| 8259 | | - if (dev->netdev_ops->ndo_uninit) |
|---|
| 8260 | | - dev->netdev_ops->ndo_uninit(dev); |
|---|
| 8261 | | - |
|---|
| 8262 | | - if (skb) |
|---|
| 8263 | | - rtmsg_ifinfo_send(skb, dev, GFP_KERNEL); |
|---|
| 8264 | | - |
|---|
| 8265 | | - /* Notifier chain MUST detach us all upper devices. */ |
|---|
| 8266 | | - WARN_ON(netdev_has_any_upper_dev(dev)); |
|---|
| 8267 | | - WARN_ON(netdev_has_any_lower_dev(dev)); |
|---|
| 8268 | | - |
|---|
| 8269 | | - /* Remove entries from kobject tree */ |
|---|
| 8270 | | - netdev_unregister_kobject(dev); |
|---|
| 8271 | | -#ifdef CONFIG_XPS |
|---|
| 8272 | | - /* Remove XPS queueing entries */ |
|---|
| 8273 | | - netif_reset_xps_queues_gt(dev, 0); |
|---|
| 8274 | | -#endif |
|---|
| 8275 | | - } |
|---|
| 8276 | | - |
|---|
| 8277 | | - synchronize_net(); |
|---|
| 8278 | | - |
|---|
| 8279 | | - list_for_each_entry(dev, head, unreg_list) |
|---|
| 8280 | | - dev_put(dev); |
|---|
| 8281 | | -} |
|---|
| 8282 | | - |
|---|
| 8283 | | -static void rollback_registered(struct net_device *dev) |
|---|
| 8284 | | -{ |
|---|
| 8285 | | - LIST_HEAD(single); |
|---|
| 8286 | | - |
|---|
| 8287 | | - list_add(&dev->unreg_list, &single); |
|---|
| 8288 | | - rollback_registered_many(&single); |
|---|
| 8289 | | - list_del(&single); |
|---|
| 8290 | 9515 | } |
|---|
| 8291 | 9516 | |
|---|
| 8292 | 9517 | static netdev_features_t netdev_sync_upper_features(struct net_device *lower, |
|---|
| .. | .. |
|---|
| 8434 | 9659 | /* driver might be less strict about feature dependencies */ |
|---|
| 8435 | 9660 | features = netdev_fix_features(dev, features); |
|---|
| 8436 | 9661 | |
|---|
| 8437 | | - /* some features can't be enabled if they're off an an upper device */ |
|---|
| 9662 | + /* some features can't be enabled if they're off on an upper device */ |
|---|
| 8438 | 9663 | netdev_for_each_upper_dev_rcu(dev, upper, iter) |
|---|
| 8439 | 9664 | features = netdev_sync_upper_features(dev, upper, features); |
|---|
| 8440 | 9665 | |
|---|
| .. | .. |
|---|
| 8558 | 9783 | else |
|---|
| 8559 | 9784 | netif_dormant_off(dev); |
|---|
| 8560 | 9785 | |
|---|
| 9786 | + if (rootdev->operstate == IF_OPER_TESTING) |
|---|
| 9787 | + netif_testing_on(dev); |
|---|
| 9788 | + else |
|---|
| 9789 | + netif_testing_off(dev); |
|---|
| 9790 | + |
|---|
| 8561 | 9791 | if (netif_carrier_ok(rootdev)) |
|---|
| 8562 | 9792 | netif_carrier_on(dev); |
|---|
| 8563 | 9793 | else |
|---|
| .. | .. |
|---|
| 8619 | 9849 | /* Initialize queue lock */ |
|---|
| 8620 | 9850 | spin_lock_init(&queue->_xmit_lock); |
|---|
| 8621 | 9851 | netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type); |
|---|
| 8622 | | - netdev_queue_clear_owner(queue); |
|---|
| 9852 | + queue->xmit_lock_owner = -1; |
|---|
| 8623 | 9853 | netdev_queue_numa_node_write(queue, NUMA_NO_NODE); |
|---|
| 8624 | 9854 | queue->dev = dev; |
|---|
| 8625 | 9855 | #ifdef CONFIG_BQL |
|---|
| .. | .. |
|---|
| 8698 | 9928 | BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); |
|---|
| 8699 | 9929 | BUG_ON(!net); |
|---|
| 8700 | 9930 | |
|---|
| 9931 | + ret = ethtool_check_ops(dev->ethtool_ops); |
|---|
| 9932 | + if (ret) |
|---|
| 9933 | + return ret; |
|---|
| 9934 | + |
|---|
| 8701 | 9935 | spin_lock_init(&dev->addr_list_lock); |
|---|
| 8702 | 9936 | netdev_set_addr_lockdep_class(dev); |
|---|
| 8703 | 9937 | |
|---|
| 8704 | 9938 | ret = dev_get_valid_name(net, dev, dev->name); |
|---|
| 8705 | 9939 | if (ret < 0) |
|---|
| 9940 | + goto out; |
|---|
| 9941 | + |
|---|
| 9942 | + ret = -ENOMEM; |
|---|
| 9943 | + dev->name_node = netdev_name_node_head_alloc(dev); |
|---|
| 9944 | + if (!dev->name_node) |
|---|
| 8706 | 9945 | goto out; |
|---|
| 8707 | 9946 | |
|---|
| 8708 | 9947 | /* Init, if this function is available */ |
|---|
| .. | .. |
|---|
| 8711 | 9950 | if (ret) { |
|---|
| 8712 | 9951 | if (ret > 0) |
|---|
| 8713 | 9952 | ret = -EIO; |
|---|
| 8714 | | - goto out; |
|---|
| 9953 | + goto err_free_name; |
|---|
| 8715 | 9954 | } |
|---|
| 8716 | 9955 | } |
|---|
| 8717 | 9956 | |
|---|
| .. | .. |
|---|
| 8733 | 9972 | /* Transfer changeable features to wanted_features and enable |
|---|
| 8734 | 9973 | * software offloads (GSO and GRO). |
|---|
| 8735 | 9974 | */ |
|---|
| 8736 | | - dev->hw_features |= NETIF_F_SOFT_FEATURES; |
|---|
| 9975 | + dev->hw_features |= (NETIF_F_SOFT_FEATURES | NETIF_F_SOFT_FEATURES_OFF); |
|---|
| 8737 | 9976 | dev->features |= NETIF_F_SOFT_FEATURES; |
|---|
| 8738 | 9977 | |
|---|
| 8739 | 9978 | if (dev->netdev_ops->ndo_udp_tunnel_add) { |
|---|
| .. | .. |
|---|
| 8811 | 10050 | ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); |
|---|
| 8812 | 10051 | ret = notifier_to_errno(ret); |
|---|
| 8813 | 10052 | if (ret) { |
|---|
| 8814 | | - rollback_registered(dev); |
|---|
| 8815 | | - rcu_barrier(); |
|---|
| 8816 | | - |
|---|
| 8817 | | - dev->reg_state = NETREG_UNREGISTERED; |
|---|
| 8818 | | - /* We should put the kobject that hold in |
|---|
| 8819 | | - * netdev_unregister_kobject(), otherwise |
|---|
| 8820 | | - * the net device cannot be freed when |
|---|
| 8821 | | - * driver calls free_netdev(), because the |
|---|
| 8822 | | - * kobject is being hold. |
|---|
| 8823 | | - */ |
|---|
| 8824 | | - kobject_put(&dev->dev.kobj); |
|---|
| 10053 | + /* Expect explicit free_netdev() on failure */ |
|---|
| 10054 | + dev->needs_free_netdev = false; |
|---|
| 10055 | + unregister_netdevice_queue(dev, NULL); |
|---|
| 10056 | + goto out; |
|---|
| 8825 | 10057 | } |
|---|
| 8826 | 10058 | /* |
|---|
| 8827 | 10059 | * Prevent userspace races by waiting until the network |
|---|
| .. | .. |
|---|
| 8839 | 10071 | dev->netdev_ops->ndo_uninit(dev); |
|---|
| 8840 | 10072 | if (dev->priv_destructor) |
|---|
| 8841 | 10073 | dev->priv_destructor(dev); |
|---|
| 10074 | +err_free_name: |
|---|
| 10075 | + netdev_name_node_free(dev->name_node); |
|---|
| 8842 | 10076 | goto out; |
|---|
| 8843 | 10077 | } |
|---|
| 8844 | 10078 | EXPORT_SYMBOL(register_netdevice); |
|---|
| .. | .. |
|---|
| 8922 | 10156 | } |
|---|
| 8923 | 10157 | EXPORT_SYMBOL(netdev_refcnt_read); |
|---|
| 8924 | 10158 | |
|---|
| 10159 | +#define WAIT_REFS_MIN_MSECS 1 |
|---|
| 10160 | +#define WAIT_REFS_MAX_MSECS 250 |
|---|
| 8925 | 10161 | /** |
|---|
| 8926 | 10162 | * netdev_wait_allrefs - wait until all references are gone. |
|---|
| 8927 | 10163 | * @dev: target net_device |
|---|
| .. | .. |
|---|
| 8937 | 10173 | static void netdev_wait_allrefs(struct net_device *dev) |
|---|
| 8938 | 10174 | { |
|---|
| 8939 | 10175 | unsigned long rebroadcast_time, warning_time; |
|---|
| 8940 | | - int refcnt; |
|---|
| 10176 | + int wait = 0, refcnt; |
|---|
| 8941 | 10177 | |
|---|
| 8942 | 10178 | linkwatch_forget_dev(dev); |
|---|
| 8943 | 10179 | |
|---|
| .. | .. |
|---|
| 8971 | 10207 | rebroadcast_time = jiffies; |
|---|
| 8972 | 10208 | } |
|---|
| 8973 | 10209 | |
|---|
| 8974 | | - msleep(250); |
|---|
| 10210 | + if (!wait) { |
|---|
| 10211 | + rcu_barrier(); |
|---|
| 10212 | + wait = WAIT_REFS_MIN_MSECS; |
|---|
| 10213 | + } else { |
|---|
| 10214 | + msleep(wait); |
|---|
| 10215 | + wait = min(wait << 1, WAIT_REFS_MAX_MSECS); |
|---|
| 10216 | + } |
|---|
| 8975 | 10217 | |
|---|
| 8976 | 10218 | refcnt = netdev_refcnt_read(dev); |
|---|
| 8977 | 10219 | |
|---|
| .. | .. |
|---|
| 9010 | 10252 | void netdev_run_todo(void) |
|---|
| 9011 | 10253 | { |
|---|
| 9012 | 10254 | struct list_head list; |
|---|
| 10255 | +#ifdef CONFIG_LOCKDEP |
|---|
| 10256 | + struct list_head unlink_list; |
|---|
| 10257 | + |
|---|
| 10258 | + list_replace_init(&net_unlink_list, &unlink_list); |
|---|
| 10259 | + |
|---|
| 10260 | + while (!list_empty(&unlink_list)) { |
|---|
| 10261 | + struct net_device *dev = list_first_entry(&unlink_list, |
|---|
| 10262 | + struct net_device, |
|---|
| 10263 | + unlink_list); |
|---|
| 10264 | + list_del_init(&dev->unlink_list); |
|---|
| 10265 | + dev->nested_level = dev->lower_level - 1; |
|---|
| 10266 | + } |
|---|
| 10267 | +#endif |
|---|
| 9013 | 10268 | |
|---|
| 9014 | 10269 | /* Snapshot list, allow later requests */ |
|---|
| 9015 | 10270 | list_replace_init(&net_todo_list, &list); |
|---|
| .. | .. |
|---|
| 9121 | 10376 | } |
|---|
| 9122 | 10377 | EXPORT_SYMBOL(dev_get_stats); |
|---|
| 9123 | 10378 | |
|---|
| 10379 | +/** |
|---|
| 10380 | + * dev_fetch_sw_netstats - get per-cpu network device statistics |
|---|
| 10381 | + * @s: place to store stats |
|---|
| 10382 | + * @netstats: per-cpu network stats to read from |
|---|
| 10383 | + * |
|---|
| 10384 | + * Read per-cpu network statistics and populate the related fields in @s. |
|---|
| 10385 | + */ |
|---|
| 10386 | +void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s, |
|---|
| 10387 | + const struct pcpu_sw_netstats __percpu *netstats) |
|---|
| 10388 | +{ |
|---|
| 10389 | + int cpu; |
|---|
| 10390 | + |
|---|
| 10391 | + for_each_possible_cpu(cpu) { |
|---|
| 10392 | + const struct pcpu_sw_netstats *stats; |
|---|
| 10393 | + struct pcpu_sw_netstats tmp; |
|---|
| 10394 | + unsigned int start; |
|---|
| 10395 | + |
|---|
| 10396 | + stats = per_cpu_ptr(netstats, cpu); |
|---|
| 10397 | + do { |
|---|
| 10398 | + start = u64_stats_fetch_begin_irq(&stats->syncp); |
|---|
| 10399 | + tmp.rx_packets = stats->rx_packets; |
|---|
| 10400 | + tmp.rx_bytes = stats->rx_bytes; |
|---|
| 10401 | + tmp.tx_packets = stats->tx_packets; |
|---|
| 10402 | + tmp.tx_bytes = stats->tx_bytes; |
|---|
| 10403 | + } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); |
|---|
| 10404 | + |
|---|
| 10405 | + s->rx_packets += tmp.rx_packets; |
|---|
| 10406 | + s->rx_bytes += tmp.rx_bytes; |
|---|
| 10407 | + s->tx_packets += tmp.tx_packets; |
|---|
| 10408 | + s->tx_bytes += tmp.tx_bytes; |
|---|
| 10409 | + } |
|---|
| 10410 | +} |
|---|
| 10411 | +EXPORT_SYMBOL_GPL(dev_fetch_sw_netstats); |
|---|
| 10412 | + |
|---|
| 9124 | 10413 | struct netdev_queue *dev_ingress_queue_create(struct net_device *dev) |
|---|
| 9125 | 10414 | { |
|---|
| 9126 | 10415 | struct netdev_queue *queue = dev_ingress_queue(dev); |
|---|
| .. | .. |
|---|
| 9222 | 10511 | dev->gso_max_segs = GSO_MAX_SEGS; |
|---|
| 9223 | 10512 | dev->upper_level = 1; |
|---|
| 9224 | 10513 | dev->lower_level = 1; |
|---|
| 10514 | +#ifdef CONFIG_LOCKDEP |
|---|
| 10515 | + dev->nested_level = 0; |
|---|
| 10516 | + INIT_LIST_HEAD(&dev->unlink_list); |
|---|
| 10517 | +#endif |
|---|
| 9225 | 10518 | |
|---|
| 9226 | 10519 | INIT_LIST_HEAD(&dev->napi_list); |
|---|
| 9227 | 10520 | INIT_LIST_HEAD(&dev->unreg_list); |
|---|
| .. | .. |
|---|
| 9231 | 10524 | INIT_LIST_HEAD(&dev->adj_list.lower); |
|---|
| 9232 | 10525 | INIT_LIST_HEAD(&dev->ptype_all); |
|---|
| 9233 | 10526 | INIT_LIST_HEAD(&dev->ptype_specific); |
|---|
| 10527 | + INIT_LIST_HEAD(&dev->net_notifier_list); |
|---|
| 9234 | 10528 | #ifdef CONFIG_NET_SCHED |
|---|
| 9235 | 10529 | hash_init(dev->qdisc_hash); |
|---|
| 9236 | 10530 | #endif |
|---|
| .. | .. |
|---|
| 9288 | 10582 | struct napi_struct *p, *n; |
|---|
| 9289 | 10583 | |
|---|
| 9290 | 10584 | might_sleep(); |
|---|
| 10585 | + |
|---|
| 10586 | + /* When called immediately after register_netdevice() failed the unwind |
|---|
| 10587 | + * handling may still be dismantling the device. Handle that case by |
|---|
| 10588 | + * deferring the free. |
|---|
| 10589 | + */ |
|---|
| 10590 | + if (dev->reg_state == NETREG_UNREGISTERING) { |
|---|
| 10591 | + ASSERT_RTNL(); |
|---|
| 10592 | + dev->needs_free_netdev = true; |
|---|
| 10593 | + return; |
|---|
| 10594 | + } |
|---|
| 10595 | + |
|---|
| 9291 | 10596 | netif_free_tx_queues(dev); |
|---|
| 9292 | 10597 | netif_free_rx_queues(dev); |
|---|
| 9293 | 10598 | |
|---|
| .. | .. |
|---|
| 9301 | 10606 | |
|---|
| 9302 | 10607 | free_percpu(dev->pcpu_refcnt); |
|---|
| 9303 | 10608 | dev->pcpu_refcnt = NULL; |
|---|
| 10609 | + free_percpu(dev->xdp_bulkq); |
|---|
| 10610 | + dev->xdp_bulkq = NULL; |
|---|
| 9304 | 10611 | |
|---|
| 9305 | 10612 | /* Compatibility with error handling in drivers */ |
|---|
| 9306 | 10613 | if (dev->reg_state == NETREG_UNINITIALIZED) { |
|---|
| .. | .. |
|---|
| 9352 | 10659 | if (head) { |
|---|
| 9353 | 10660 | list_move_tail(&dev->unreg_list, head); |
|---|
| 9354 | 10661 | } else { |
|---|
| 9355 | | - rollback_registered(dev); |
|---|
| 9356 | | - /* Finish processing unregister after unlock */ |
|---|
| 9357 | | - net_set_todo(dev); |
|---|
| 10662 | + LIST_HEAD(single); |
|---|
| 10663 | + |
|---|
| 10664 | + list_add(&dev->unreg_list, &single); |
|---|
| 10665 | + unregister_netdevice_many(&single); |
|---|
| 9358 | 10666 | } |
|---|
| 9359 | 10667 | } |
|---|
| 9360 | 10668 | EXPORT_SYMBOL(unregister_netdevice_queue); |
|---|
| .. | .. |
|---|
| 9368 | 10676 | */ |
|---|
| 9369 | 10677 | void unregister_netdevice_many(struct list_head *head) |
|---|
| 9370 | 10678 | { |
|---|
| 9371 | | - struct net_device *dev; |
|---|
| 10679 | + struct net_device *dev, *tmp; |
|---|
| 10680 | + LIST_HEAD(close_head); |
|---|
| 9372 | 10681 | |
|---|
| 9373 | | - if (!list_empty(head)) { |
|---|
| 9374 | | - rollback_registered_many(head); |
|---|
| 9375 | | - list_for_each_entry(dev, head, unreg_list) |
|---|
| 9376 | | - net_set_todo(dev); |
|---|
| 9377 | | - list_del(head); |
|---|
| 10682 | + BUG_ON(dev_boot_phase); |
|---|
| 10683 | + ASSERT_RTNL(); |
|---|
| 10684 | + |
|---|
| 10685 | + if (list_empty(head)) |
|---|
| 10686 | + return; |
|---|
| 10687 | + |
|---|
| 10688 | + list_for_each_entry_safe(dev, tmp, head, unreg_list) { |
|---|
| 10689 | + /* Some devices call without registering |
|---|
| 10690 | + * for initialization unwind. Remove those |
|---|
| 10691 | + * devices and proceed with the remaining. |
|---|
| 10692 | + */ |
|---|
| 10693 | + if (dev->reg_state == NETREG_UNINITIALIZED) { |
|---|
| 10694 | + pr_debug("unregister_netdevice: device %s/%p never was registered\n", |
|---|
| 10695 | + dev->name, dev); |
|---|
| 10696 | + |
|---|
| 10697 | + WARN_ON(1); |
|---|
| 10698 | + list_del(&dev->unreg_list); |
|---|
| 10699 | + continue; |
|---|
| 10700 | + } |
|---|
| 10701 | + dev->dismantle = true; |
|---|
| 10702 | + BUG_ON(dev->reg_state != NETREG_REGISTERED); |
|---|
| 9378 | 10703 | } |
|---|
| 10704 | + |
|---|
| 10705 | + /* If device is running, close it first. */ |
|---|
| 10706 | + list_for_each_entry(dev, head, unreg_list) |
|---|
| 10707 | + list_add_tail(&dev->close_list, &close_head); |
|---|
| 10708 | + dev_close_many(&close_head, true); |
|---|
| 10709 | + |
|---|
| 10710 | + list_for_each_entry(dev, head, unreg_list) { |
|---|
| 10711 | + /* And unlink it from device chain. */ |
|---|
| 10712 | + unlist_netdevice(dev); |
|---|
| 10713 | + |
|---|
| 10714 | + dev->reg_state = NETREG_UNREGISTERING; |
|---|
| 10715 | + } |
|---|
| 10716 | + flush_all_backlogs(); |
|---|
| 10717 | + |
|---|
| 10718 | + synchronize_net(); |
|---|
| 10719 | + |
|---|
| 10720 | + list_for_each_entry(dev, head, unreg_list) { |
|---|
| 10721 | + struct sk_buff *skb = NULL; |
|---|
| 10722 | + |
|---|
| 10723 | + /* Shutdown queueing discipline. */ |
|---|
| 10724 | + dev_shutdown(dev); |
|---|
| 10725 | + |
|---|
| 10726 | + dev_xdp_uninstall(dev); |
|---|
| 10727 | + |
|---|
| 10728 | + /* Notify protocols, that we are about to destroy |
|---|
| 10729 | + * this device. They should clean all the things. |
|---|
| 10730 | + */ |
|---|
| 10731 | + call_netdevice_notifiers(NETDEV_UNREGISTER, dev); |
|---|
| 10732 | + |
|---|
| 10733 | + if (!dev->rtnl_link_ops || |
|---|
| 10734 | + dev->rtnl_link_state == RTNL_LINK_INITIALIZED) |
|---|
| 10735 | + skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0, |
|---|
| 10736 | + GFP_KERNEL, NULL, 0); |
|---|
| 10737 | + |
|---|
| 10738 | + /* |
|---|
| 10739 | + * Flush the unicast and multicast chains |
|---|
| 10740 | + */ |
|---|
| 10741 | + dev_uc_flush(dev); |
|---|
| 10742 | + dev_mc_flush(dev); |
|---|
| 10743 | + |
|---|
| 10744 | + netdev_name_node_alt_flush(dev); |
|---|
| 10745 | + netdev_name_node_free(dev->name_node); |
|---|
| 10746 | + |
|---|
| 10747 | + if (dev->netdev_ops->ndo_uninit) |
|---|
| 10748 | + dev->netdev_ops->ndo_uninit(dev); |
|---|
| 10749 | + |
|---|
| 10750 | + if (skb) |
|---|
| 10751 | + rtmsg_ifinfo_send(skb, dev, GFP_KERNEL); |
|---|
| 10752 | + |
|---|
| 10753 | + /* Notifier chain MUST detach us all upper devices. */ |
|---|
| 10754 | + WARN_ON(netdev_has_any_upper_dev(dev)); |
|---|
| 10755 | + WARN_ON(netdev_has_any_lower_dev(dev)); |
|---|
| 10756 | + |
|---|
| 10757 | + /* Remove entries from kobject tree */ |
|---|
| 10758 | + netdev_unregister_kobject(dev); |
|---|
| 10759 | +#ifdef CONFIG_XPS |
|---|
| 10760 | + /* Remove XPS queueing entries */ |
|---|
| 10761 | + netif_reset_xps_queues_gt(dev, 0); |
|---|
| 10762 | +#endif |
|---|
| 10763 | + } |
|---|
| 10764 | + |
|---|
| 10765 | + synchronize_net(); |
|---|
| 10766 | + |
|---|
| 10767 | + list_for_each_entry(dev, head, unreg_list) { |
|---|
| 10768 | + dev_put(dev); |
|---|
| 10769 | + net_set_todo(dev); |
|---|
| 10770 | + } |
|---|
| 10771 | + |
|---|
| 10772 | + list_del(head); |
|---|
| 9379 | 10773 | } |
|---|
| 9380 | 10774 | EXPORT_SYMBOL(unregister_netdevice_many); |
|---|
| 9381 | 10775 | |
|---|
| .. | .. |
|---|
| 9414 | 10808 | |
|---|
| 9415 | 10809 | int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) |
|---|
| 9416 | 10810 | { |
|---|
| 10811 | + struct net *net_old = dev_net(dev); |
|---|
| 9417 | 10812 | int err, new_nsid, new_ifindex; |
|---|
| 9418 | 10813 | |
|---|
| 9419 | 10814 | ASSERT_RTNL(); |
|---|
| .. | .. |
|---|
| 9429 | 10824 | |
|---|
| 9430 | 10825 | /* Get out if there is nothing todo */ |
|---|
| 9431 | 10826 | err = 0; |
|---|
| 9432 | | - if (net_eq(dev_net(dev), net)) |
|---|
| 10827 | + if (net_eq(net_old, net)) |
|---|
| 9433 | 10828 | goto out; |
|---|
| 9434 | 10829 | |
|---|
| 9435 | 10830 | /* Pick the destination device name, and ensure |
|---|
| .. | .. |
|---|
| 9490 | 10885 | kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE); |
|---|
| 9491 | 10886 | netdev_adjacent_del_links(dev); |
|---|
| 9492 | 10887 | |
|---|
| 10888 | + /* Move per-net netdevice notifiers that are following the netdevice */ |
|---|
| 10889 | + move_netdevice_notifiers_dev_net(dev, net); |
|---|
| 10890 | + |
|---|
| 9493 | 10891 | /* Actually switch the network namespace */ |
|---|
| 9494 | 10892 | dev_net_set(dev, net); |
|---|
| 9495 | 10893 | dev->ifindex = new_ifindex; |
|---|
| .. | .. |
|---|
| 9500 | 10898 | |
|---|
| 9501 | 10899 | /* Fixup kobjects */ |
|---|
| 9502 | 10900 | err = device_rename(&dev->dev, dev->name); |
|---|
| 10901 | + WARN_ON(err); |
|---|
| 10902 | + |
|---|
| 10903 | + /* Adapt owner in case owning user namespace of target network |
|---|
| 10904 | + * namespace is different from the original one. |
|---|
| 10905 | + */ |
|---|
| 10906 | + err = netdev_change_owner(dev, net_old, net); |
|---|
| 9503 | 10907 | WARN_ON(err); |
|---|
| 9504 | 10908 | |
|---|
| 9505 | 10909 | /* Add the device back in the hashes */ |
|---|
| .. | .. |
|---|
| 9566 | 10970 | |
|---|
| 9567 | 10971 | raise_softirq_irqoff(NET_TX_SOFTIRQ); |
|---|
| 9568 | 10972 | local_irq_enable(); |
|---|
| 9569 | | - preempt_check_resched_rt(); |
|---|
| 9570 | 10973 | |
|---|
| 9571 | 10974 | #ifdef CONFIG_RPS |
|---|
| 9572 | 10975 | remsd = oldsd->rps_ipi_list; |
|---|
| .. | .. |
|---|
| 9580 | 10983 | netif_rx_ni(skb); |
|---|
| 9581 | 10984 | input_queue_head_incr(oldsd); |
|---|
| 9582 | 10985 | } |
|---|
| 9583 | | - while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { |
|---|
| 10986 | + while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) { |
|---|
| 9584 | 10987 | netif_rx_ni(skb); |
|---|
| 9585 | 10988 | input_queue_head_incr(oldsd); |
|---|
| 9586 | | - } |
|---|
| 9587 | | - while ((skb = __skb_dequeue(&oldsd->tofree_queue))) { |
|---|
| 9588 | | - kfree_skb(skb); |
|---|
| 9589 | 10989 | } |
|---|
| 9590 | 10990 | |
|---|
| 9591 | 10991 | return 0; |
|---|
| .. | .. |
|---|
| 9636 | 11036 | static int __net_init netdev_init(struct net *net) |
|---|
| 9637 | 11037 | { |
|---|
| 9638 | 11038 | BUILD_BUG_ON(GRO_HASH_BUCKETS > |
|---|
| 9639 | | - 8 * FIELD_SIZEOF(struct napi_struct, gro_bitmask)); |
|---|
| 11039 | + 8 * sizeof_field(struct napi_struct, gro_bitmask)); |
|---|
| 9640 | 11040 | |
|---|
| 9641 | 11041 | if (net != &init_net) |
|---|
| 9642 | 11042 | INIT_LIST_HEAD(&net->dev_base_head); |
|---|
| .. | .. |
|---|
| 9648 | 11048 | net->dev_index_head = netdev_create_hash(); |
|---|
| 9649 | 11049 | if (net->dev_index_head == NULL) |
|---|
| 9650 | 11050 | goto err_idx; |
|---|
| 11051 | + |
|---|
| 11052 | + RAW_INIT_NOTIFIER_HEAD(&net->netdev_chain); |
|---|
| 9651 | 11053 | |
|---|
| 9652 | 11054 | return 0; |
|---|
| 9653 | 11055 | |
|---|
| .. | .. |
|---|
| 9770 | 11172 | continue; |
|---|
| 9771 | 11173 | |
|---|
| 9772 | 11174 | /* Leave virtual devices for the generic cleanup */ |
|---|
| 9773 | | - if (dev->rtnl_link_ops) |
|---|
| 11175 | + if (dev->rtnl_link_ops && !dev->rtnl_link_ops->netns_refund) |
|---|
| 9774 | 11176 | continue; |
|---|
| 9775 | 11177 | |
|---|
| 9776 | 11178 | /* Push remaining network devices to init_net */ |
|---|
| .. | .. |
|---|
| 9897 | 11299 | |
|---|
| 9898 | 11300 | INIT_WORK(flush, flush_backlog); |
|---|
| 9899 | 11301 | |
|---|
| 9900 | | - skb_queue_head_init_raw(&sd->input_pkt_queue); |
|---|
| 9901 | | - skb_queue_head_init_raw(&sd->process_queue); |
|---|
| 9902 | | - skb_queue_head_init_raw(&sd->tofree_queue); |
|---|
| 11302 | + skb_queue_head_init(&sd->input_pkt_queue); |
|---|
| 11303 | + skb_queue_head_init(&sd->process_queue); |
|---|
| 9903 | 11304 | #ifdef CONFIG_XFRM_OFFLOAD |
|---|
| 9904 | 11305 | skb_queue_head_init(&sd->xfrm_backlog); |
|---|
| 9905 | 11306 | #endif |
|---|