From d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Mon, 11 Dec 2023 02:45:28 +0000 Subject: [PATCH] add boot partition size --- kernel/drivers/net/bonding/bond_main.c | 1329 ++++++++++++++++++++++++++++++++++++++++----------------- 1 files changed, 932 insertions(+), 397 deletions(-) diff --git a/kernel/drivers/net/bonding/bond_main.c b/kernel/drivers/net/bonding/bond_main.c index 2d70cdd..f38a6ce 100644 --- a/kernel/drivers/net/bonding/bond_main.c +++ b/kernel/drivers/net/bonding/bond_main.c @@ -41,6 +41,8 @@ #include <linux/in.h> #include <net/ip.h> #include <linux/ip.h> +#include <linux/icmp.h> +#include <linux/icmpv6.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/slab.h> @@ -77,7 +79,7 @@ #include <net/pkt_sched.h> #include <linux/rculist.h> #include <net/flow_dissector.h> -#include <net/switchdev.h> +#include <net/xfrm.h> #include <net/bonding.h> #include <net/bond_3ad.h> #include <net/bond_alb.h> @@ -201,6 +203,51 @@ unsigned int bond_net_id __read_mostly; +static const struct flow_dissector_key flow_keys_bonding_keys[] = { + { + .key_id = FLOW_DISSECTOR_KEY_CONTROL, + .offset = offsetof(struct flow_keys, control), + }, + { + .key_id = FLOW_DISSECTOR_KEY_BASIC, + .offset = offsetof(struct flow_keys, basic), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS, + .offset = offsetof(struct flow_keys, addrs.v4addrs), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS, + .offset = offsetof(struct flow_keys, addrs.v6addrs), + }, + { + .key_id = FLOW_DISSECTOR_KEY_TIPC, + .offset = offsetof(struct flow_keys, addrs.tipckey), + }, + { + .key_id = FLOW_DISSECTOR_KEY_PORTS, + .offset = offsetof(struct flow_keys, ports), + }, + { + .key_id = FLOW_DISSECTOR_KEY_ICMP, + .offset = offsetof(struct flow_keys, icmp), + }, + { + .key_id = FLOW_DISSECTOR_KEY_VLAN, + .offset = offsetof(struct flow_keys, vlan), + }, + { + .key_id = FLOW_DISSECTOR_KEY_FLOW_LABEL, + .offset = offsetof(struct flow_keys, tags), + }, + { + .key_id = FLOW_DISSECTOR_KEY_GRE_KEYID, + .offset = offsetof(struct flow_keys, keyid), + }, +}; + +static struct flow_dissector flow_keys_bonding __read_mostly; + /*-------------------------- Forward declarations ---------------------------*/ static int bond_init(struct net_device *bond_dev); @@ -232,8 +279,6 @@ return names[mode]; } -/*---------------------------------- VLAN -----------------------------------*/ - /** * bond_dev_queue_xmit - Prepare skb for xmit. * @@ -241,7 +286,7 @@ * @skb: hw accel VLAN tagged skb to transmit * @slave_dev: slave that is supposed to xmit this skbuff */ -void bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, +netdev_tx_t bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev) { skb->dev = slave_dev; @@ -251,10 +296,12 @@ skb_set_queue_mapping(skb, qdisc_skb_cb(skb)->slave_dev_queue_mapping); if (unlikely(netpoll_tx_running(bond->dev))) - bond_netpoll_send_skb(bond_get_slave_by_dev(bond, slave_dev), skb); - else - dev_queue_xmit(skb); + return bond_netpoll_send_skb(bond_get_slave_by_dev(bond, slave_dev), skb); + + return dev_queue_xmit(skb); } + +/*---------------------------------- VLAN -----------------------------------*/ /* In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid, * We don't protect the slave list iteration with a lock because: @@ -275,6 +322,7 @@ /** * bond_vlan_rx_add_vid - Propagates adding an id to slaves * @bond_dev: bonding net device that got called + * @proto: network protocol ID * @vid: vlan id being added */ static int bond_vlan_rx_add_vid(struct net_device *bond_dev, @@ -308,6 +356,7 @@ /** * bond_vlan_rx_kill_vid - Propagates deleting an id to slaves * @bond_dev: bonding net device that got called + * @proto: network protocol ID * @vid: vlan id being removed */ static int bond_vlan_rx_kill_vid(struct net_device *bond_dev, @@ -325,6 +374,225 @@ return 0; } + +/*---------------------------------- XFRM -----------------------------------*/ + +#ifdef CONFIG_XFRM_OFFLOAD +/** + * bond_ipsec_add_sa - program device with a security association + * @xs: pointer to transformer state struct + **/ +static int bond_ipsec_add_sa(struct xfrm_state *xs) +{ + struct net_device *bond_dev = xs->xso.dev; + struct bond_ipsec *ipsec; + struct bonding *bond; + struct slave *slave; + int err; + + if (!bond_dev) + return -EINVAL; + + rcu_read_lock(); + bond = netdev_priv(bond_dev); + slave = rcu_dereference(bond->curr_active_slave); + if (!slave) { + rcu_read_unlock(); + return -ENODEV; + } + + if (!slave->dev->xfrmdev_ops || + !slave->dev->xfrmdev_ops->xdo_dev_state_add || + netif_is_bond_master(slave->dev)) { + slave_warn(bond_dev, slave->dev, "Slave does not support ipsec offload\n"); + rcu_read_unlock(); + return -EINVAL; + } + + ipsec = kmalloc(sizeof(*ipsec), GFP_ATOMIC); + if (!ipsec) { + rcu_read_unlock(); + return -ENOMEM; + } + xs->xso.real_dev = slave->dev; + + err = slave->dev->xfrmdev_ops->xdo_dev_state_add(xs); + if (!err) { + ipsec->xs = xs; + INIT_LIST_HEAD(&ipsec->list); + spin_lock_bh(&bond->ipsec_lock); + list_add(&ipsec->list, &bond->ipsec_list); + spin_unlock_bh(&bond->ipsec_lock); + } else { + kfree(ipsec); + } + rcu_read_unlock(); + return err; +} + +static void bond_ipsec_add_sa_all(struct bonding *bond) +{ + struct net_device *bond_dev = bond->dev; + struct bond_ipsec *ipsec; + struct slave *slave; + + rcu_read_lock(); + slave = rcu_dereference(bond->curr_active_slave); + if (!slave) + goto out; + + if (!slave->dev->xfrmdev_ops || + !slave->dev->xfrmdev_ops->xdo_dev_state_add || + netif_is_bond_master(slave->dev)) { + spin_lock_bh(&bond->ipsec_lock); + if (!list_empty(&bond->ipsec_list)) + slave_warn(bond_dev, slave->dev, + "%s: no slave xdo_dev_state_add\n", + __func__); + spin_unlock_bh(&bond->ipsec_lock); + goto out; + } + + spin_lock_bh(&bond->ipsec_lock); + list_for_each_entry(ipsec, &bond->ipsec_list, list) { + ipsec->xs->xso.real_dev = slave->dev; + if (slave->dev->xfrmdev_ops->xdo_dev_state_add(ipsec->xs)) { + slave_warn(bond_dev, slave->dev, "%s: failed to add SA\n", __func__); + ipsec->xs->xso.real_dev = NULL; + } + } + spin_unlock_bh(&bond->ipsec_lock); +out: + rcu_read_unlock(); +} + +/** + * bond_ipsec_del_sa - clear out this specific SA + * @xs: pointer to transformer state struct + **/ +static void bond_ipsec_del_sa(struct xfrm_state *xs) +{ + struct net_device *bond_dev = xs->xso.dev; + struct bond_ipsec *ipsec; + struct bonding *bond; + struct slave *slave; + + if (!bond_dev) + return; + + rcu_read_lock(); + bond = netdev_priv(bond_dev); + slave = rcu_dereference(bond->curr_active_slave); + + if (!slave) + goto out; + + if (!xs->xso.real_dev) + goto out; + + WARN_ON(xs->xso.real_dev != slave->dev); + + if (!slave->dev->xfrmdev_ops || + !slave->dev->xfrmdev_ops->xdo_dev_state_delete || + netif_is_bond_master(slave->dev)) { + slave_warn(bond_dev, slave->dev, "%s: no slave xdo_dev_state_delete\n", __func__); + goto out; + } + + slave->dev->xfrmdev_ops->xdo_dev_state_delete(xs); +out: + spin_lock_bh(&bond->ipsec_lock); + list_for_each_entry(ipsec, &bond->ipsec_list, list) { + if (ipsec->xs == xs) { + list_del(&ipsec->list); + kfree(ipsec); + break; + } + } + spin_unlock_bh(&bond->ipsec_lock); + rcu_read_unlock(); +} + +static void bond_ipsec_del_sa_all(struct bonding *bond) +{ + struct net_device *bond_dev = bond->dev; + struct bond_ipsec *ipsec; + struct slave *slave; + + rcu_read_lock(); + slave = rcu_dereference(bond->curr_active_slave); + if (!slave) { + rcu_read_unlock(); + return; + } + + spin_lock_bh(&bond->ipsec_lock); + list_for_each_entry(ipsec, &bond->ipsec_list, list) { + if (!ipsec->xs->xso.real_dev) + continue; + + if (!slave->dev->xfrmdev_ops || + !slave->dev->xfrmdev_ops->xdo_dev_state_delete || + netif_is_bond_master(slave->dev)) { + slave_warn(bond_dev, slave->dev, + "%s: no slave xdo_dev_state_delete\n", + __func__); + } else { + slave->dev->xfrmdev_ops->xdo_dev_state_delete(ipsec->xs); + } + ipsec->xs->xso.real_dev = NULL; + } + spin_unlock_bh(&bond->ipsec_lock); + rcu_read_unlock(); +} + +/** + * bond_ipsec_offload_ok - can this packet use the xfrm hw offload + * @skb: current data packet + * @xs: pointer to transformer state struct + **/ +static bool bond_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs) +{ + struct net_device *bond_dev = xs->xso.dev; + struct net_device *real_dev; + struct slave *curr_active; + struct bonding *bond; + int err; + + bond = netdev_priv(bond_dev); + rcu_read_lock(); + curr_active = rcu_dereference(bond->curr_active_slave); + real_dev = curr_active->dev; + + if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { + err = false; + goto out; + } + + if (!xs->xso.real_dev) { + err = false; + goto out; + } + + if (!real_dev->xfrmdev_ops || + !real_dev->xfrmdev_ops->xdo_dev_offload_ok || + netif_is_bond_master(real_dev)) { + err = false; + goto out; + } + + err = real_dev->xfrmdev_ops->xdo_dev_offload_ok(skb, xs); +out: + rcu_read_unlock(); + return err; +} + +static const struct xfrmdev_ops bond_xfrmdev_ops = { + .xdo_dev_state_add = bond_ipsec_add_sa, + .xdo_dev_state_delete = bond_ipsec_del_sa, + .xdo_dev_offload_ok = bond_ipsec_offload_ok, +}; +#endif /* CONFIG_XFRM_OFFLOAD */ /*------------------------------- Link status -------------------------------*/ @@ -559,12 +827,8 @@ dev_uc_unsync(slave_dev, bond_dev); dev_mc_unsync(slave_dev, bond_dev); - if (BOND_MODE(bond) == BOND_MODE_8023AD) { - /* del lacpdu mc addr from mc list */ - u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; - - dev_mc_del(slave_dev, lacpdu_multicast); - } + if (BOND_MODE(bond) == BOND_MODE_8023AD) + dev_mc_del(slave_dev, lacpdu_mcast_addr); } /*--------------------------- Active slave change ---------------------------*/ @@ -584,7 +848,8 @@ if (bond->dev->flags & IFF_ALLMULTI) dev_set_allmulti(old_active->dev, -1); - bond_hw_addr_flush(bond->dev, old_active->dev); + if (bond->dev->flags & IFF_UP) + bond_hw_addr_flush(bond->dev, old_active->dev); } if (new_active) { @@ -595,10 +860,12 @@ if (bond->dev->flags & IFF_ALLMULTI) dev_set_allmulti(new_active->dev, 1); - netif_addr_lock_bh(bond->dev); - dev_uc_sync(new_active->dev, bond->dev); - dev_mc_sync(new_active->dev, bond->dev); - netif_addr_unlock_bh(bond->dev); + if (bond->dev->flags & IFF_UP) { + netif_addr_lock_bh(bond->dev); + dev_uc_sync(new_active->dev, bond->dev); + dev_mc_sync(new_active->dev, bond->dev); + netif_addr_unlock_bh(bond->dev); + } } } @@ -609,14 +876,21 @@ * * Should be called with RTNL held. */ -static void bond_set_dev_addr(struct net_device *bond_dev, - struct net_device *slave_dev) +static int bond_set_dev_addr(struct net_device *bond_dev, + struct net_device *slave_dev) { - netdev_dbg(bond_dev, "bond_dev=%p slave_dev=%p slave_dev->name=%s slave_dev->addr_len=%d\n", - bond_dev, slave_dev, slave_dev->name, slave_dev->addr_len); + int err; + + slave_dbg(bond_dev, slave_dev, "bond_dev=%p slave_dev=%p slave_dev->addr_len=%d\n", + bond_dev, slave_dev, slave_dev->addr_len); + err = dev_pre_changeaddr_notify(bond_dev, slave_dev->dev_addr, NULL); + if (err) + return err; + memcpy(bond_dev->dev_addr, slave_dev->dev_addr, slave_dev->addr_len); bond_dev->addr_assign_type = NET_ADDR_STOLEN; call_netdevice_notifiers(NETDEV_CHANGEADDR, bond_dev); + return 0; } static struct slave *bond_get_old_active(struct bonding *bond, @@ -652,8 +926,12 @@ switch (bond->params.fail_over_mac) { case BOND_FOM_ACTIVE: - if (new_active) - bond_set_dev_addr(bond->dev, new_active->dev); + if (new_active) { + rv = bond_set_dev_addr(bond->dev, new_active->dev); + if (rv) + slave_err(bond->dev, new_active->dev, "Error %d setting bond MAC from slave\n", + -rv); + } break; case BOND_FOM_FOLLOW: /* if new_active && old_active, swap them @@ -680,10 +958,10 @@ } rv = dev_set_mac_address(new_active->dev, - (struct sockaddr *)&ss); + (struct sockaddr *)&ss, NULL); if (rv) { - netdev_err(bond->dev, "Error %d setting MAC of slave %s\n", - -rv, new_active->dev->name); + slave_err(bond->dev, new_active->dev, "Error %d setting MAC of new active slave\n", + -rv); goto out; } @@ -695,10 +973,10 @@ ss.ss_family = old_active->dev->type; rv = dev_set_mac_address(old_active->dev, - (struct sockaddr *)&ss); + (struct sockaddr *)&ss, NULL); if (rv) - netdev_err(bond->dev, "Error %d setting MAC of slave %s\n", - -rv, new_active->dev->name); + slave_err(bond->dev, old_active->dev, "Error %d setting MAC of old active slave\n", + -rv); out: break; default: @@ -783,6 +1061,8 @@ rcu_read_unlock(); if (!slave || !bond->send_peer_notif || + bond->send_peer_notif % + max(1, bond->params.peer_notif_delay) != 0 || !netif_carrier_ok(bond->dev) || test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state)) return false; @@ -796,7 +1076,7 @@ /** * change_active_interface - change the active slave into the specified one * @bond: our bonding struct - * @new: the new slave to make the active one + * @new_active: the new slave to make the active one * * Set the new slave to the bond's settings and unset them on the old * curr_active_slave. @@ -819,14 +1099,17 @@ if (old_active == new_active) return; +#ifdef CONFIG_XFRM_OFFLOAD + bond_ipsec_del_sa_all(bond); +#endif /* CONFIG_XFRM_OFFLOAD */ + if (new_active) { new_active->last_link_up = jiffies; if (new_active->link == BOND_LINK_BACK) { if (bond_uses_primary(bond)) { - netdev_info(bond->dev, "making interface %s the new active one %d ms earlier\n", - new_active->dev->name, - (bond->params.updelay - new_active->delay) * bond->params.miimon); + slave_info(bond->dev, new_active->dev, "making interface the new active one %d ms earlier\n", + (bond->params.updelay - new_active->delay) * bond->params.miimon); } new_active->delay = 0; @@ -840,8 +1123,7 @@ bond_alb_handle_link_change(bond, new_active, BOND_LINK_UP); } else { if (bond_uses_primary(bond)) { - netdev_info(bond->dev, "making interface %s the new active one\n", - new_active->dev->name); + slave_info(bond->dev, new_active->dev, "making interface the new active one\n"); } } } @@ -878,17 +1160,24 @@ if (netif_running(bond->dev)) { bond->send_peer_notif = - bond->params.num_peer_notif; + bond->params.num_peer_notif * + max(1, bond->params.peer_notif_delay); should_notify_peers = bond_should_notify_peers(bond); } call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, bond->dev); - if (should_notify_peers) + if (should_notify_peers) { + bond->send_peer_notif--; call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev); + } } } + +#ifdef CONFIG_XFRM_OFFLOAD + bond_ipsec_add_sa_all(bond); +#endif /* CONFIG_XFRM_OFFLOAD */ /* resend IGMP joins since active slave has changed or * all were sent on curr_active_slave. @@ -929,7 +1218,7 @@ return; if (netif_carrier_ok(bond->dev)) - netdev_info(bond->dev, "first active interface up!\n"); + netdev_info(bond->dev, "active interface up!\n"); else netdev_info(bond->dev, "now running without any active interface!\n"); } @@ -963,7 +1252,8 @@ return; slave->np = NULL; - __netpoll_free_async(np); + + __netpoll_free(np); } static void bond_poll_controller(struct net_device *bond_dev) @@ -1066,12 +1356,20 @@ #define BOND_ENC_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ NETIF_F_RXCSUM | NETIF_F_ALL_TSO) +#define BOND_MPLS_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ + NETIF_F_ALL_TSO) + + static void bond_compute_features(struct bonding *bond) { unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; netdev_features_t vlan_features = BOND_VLAN_FEATURES; netdev_features_t enc_features = BOND_ENC_FEATURES; +#ifdef CONFIG_XFRM_OFFLOAD + netdev_features_t xfrm_features = BOND_XFRM_FEATURES; +#endif /* CONFIG_XFRM_OFFLOAD */ + netdev_features_t mpls_features = BOND_MPLS_FEATURES; struct net_device *bond_dev = bond->dev; struct list_head *iter; struct slave *slave; @@ -1082,6 +1380,7 @@ if (!bond_has_slaves(bond)) goto done; vlan_features &= NETIF_F_ALL_FOR_ALL; + mpls_features &= NETIF_F_ALL_FOR_ALL; bond_for_each_slave(bond, slave, iter) { vlan_features = netdev_increment_features(vlan_features, @@ -1090,6 +1389,17 @@ enc_features = netdev_increment_features(enc_features, slave->dev->hw_enc_features, BOND_ENC_FEATURES); + +#ifdef CONFIG_XFRM_OFFLOAD + xfrm_features = netdev_increment_features(xfrm_features, + slave->dev->hw_enc_features, + BOND_XFRM_FEATURES); +#endif /* CONFIG_XFRM_OFFLOAD */ + + mpls_features = netdev_increment_features(mpls_features, + slave->dev->mpls_features, + BOND_MPLS_FEATURES); + dst_release_flag &= slave->dev->priv_flags; if (slave->dev->hard_header_len > max_hard_header_len) max_hard_header_len = slave->dev->hard_header_len; @@ -1105,6 +1415,10 @@ NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX | NETIF_F_GSO_UDP_L4; +#ifdef CONFIG_XFRM_OFFLOAD + bond_dev->hw_enc_features |= xfrm_features; +#endif /* CONFIG_XFRM_OFFLOAD */ + bond_dev->mpls_features = mpls_features; bond_dev->gso_max_segs = gso_max_segs; netif_set_gso_max_size(bond_dev, gso_max_size); @@ -1194,7 +1508,7 @@ skb->dev = bond->dev; if (BOND_MODE(bond) == BOND_MODE_ALB && - bond->dev->priv_flags & IFF_BRIDGE_PORT && + netif_is_bridge_port(bond->dev) && skb->pkt_type == PACKET_HOST) { if (unlikely(skb_cow_head(skb, @@ -1388,15 +1702,14 @@ if (!bond->params.use_carrier && slave_dev->ethtool_ops->get_link == NULL && slave_ops->ndo_do_ioctl == NULL) { - netdev_warn(bond_dev, "no link monitoring support for %s\n", - slave_dev->name); + slave_warn(bond_dev, slave_dev, "no link monitoring support\n"); } /* already in-use? */ if (netdev_is_rx_handler_busy(slave_dev)) { NL_SET_ERR_MSG(extack, "Device is in use and cannot be enslaved"); - netdev_err(bond_dev, - "Error: Device is in use and cannot be enslaved\n"); + slave_err(bond_dev, slave_dev, + "Error: Device is in use and cannot be enslaved\n"); return -EBUSY; } @@ -1409,22 +1722,20 @@ /* vlan challenged mutual exclusion */ /* no need to lock since we're protected by rtnl_lock */ if (slave_dev->features & NETIF_F_VLAN_CHALLENGED) { - netdev_dbg(bond_dev, "%s is NETIF_F_VLAN_CHALLENGED\n", - slave_dev->name); + slave_dbg(bond_dev, slave_dev, "is NETIF_F_VLAN_CHALLENGED\n"); if (vlan_uses_dev(bond_dev)) { NL_SET_ERR_MSG(extack, "Can not enslave VLAN challenged device to VLAN enabled bond"); - netdev_err(bond_dev, "Error: cannot enslave VLAN challenged slave %s on VLAN enabled bond %s\n", - slave_dev->name, bond_dev->name); + slave_err(bond_dev, slave_dev, "Error: cannot enslave VLAN challenged slave on VLAN enabled bond\n"); return -EPERM; } else { - netdev_warn(bond_dev, "enslaved VLAN challenged slave %s. Adding VLANs will be blocked as long as %s is part of bond %s\n", - slave_dev->name, slave_dev->name, - bond_dev->name); + slave_warn(bond_dev, slave_dev, "enslaved VLAN challenged slave. Adding VLANs will be blocked as long as it is part of bond.\n"); } } else { - netdev_dbg(bond_dev, "%s is !NETIF_F_VLAN_CHALLENGED\n", - slave_dev->name); + slave_dbg(bond_dev, slave_dev, "is !NETIF_F_VLAN_CHALLENGED\n"); } + + if (slave_dev->features & NETIF_F_HW_ESP) + slave_dbg(bond_dev, slave_dev, "is esp-hw-offload capable\n"); /* Old ifenslave binaries are no longer supported. These can * be identified with moderate accuracy by the state of the slave: @@ -1433,8 +1744,7 @@ */ if (slave_dev->flags & IFF_UP) { NL_SET_ERR_MSG(extack, "Device can not be enslaved while up"); - netdev_err(bond_dev, "%s is up - this may be due to an out of date ifenslave\n", - slave_dev->name); + slave_err(bond_dev, slave_dev, "slave is up - this may be due to an out of date ifenslave\n"); return -EPERM; } @@ -1447,14 +1757,14 @@ */ if (!bond_has_slaves(bond)) { if (bond_dev->type != slave_dev->type) { - netdev_dbg(bond_dev, "change device type from %d to %d\n", - bond_dev->type, slave_dev->type); + slave_dbg(bond_dev, slave_dev, "change device type from %d to %d\n", + bond_dev->type, slave_dev->type); res = call_netdevice_notifiers(NETDEV_PRE_TYPE_CHANGE, bond_dev); res = notifier_to_errno(res); if (res) { - netdev_err(bond_dev, "refused to change device type\n"); + slave_err(bond_dev, slave_dev, "refused to change device type\n"); return -EBUSY; } @@ -1474,31 +1784,31 @@ } } else if (bond_dev->type != slave_dev->type) { NL_SET_ERR_MSG(extack, "Device type is different from other slaves"); - netdev_err(bond_dev, "%s ether type (%d) is different from other slaves (%d), can not enslave it\n", - slave_dev->name, slave_dev->type, bond_dev->type); + slave_err(bond_dev, slave_dev, "ether type (%d) is different from other slaves (%d), can not enslave it\n", + slave_dev->type, bond_dev->type); return -EINVAL; } if (slave_dev->type == ARPHRD_INFINIBAND && BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { NL_SET_ERR_MSG(extack, "Only active-backup mode is supported for infiniband slaves"); - netdev_warn(bond_dev, "Type (%d) supports only active-backup mode\n", - slave_dev->type); + slave_warn(bond_dev, slave_dev, "Type (%d) supports only active-backup mode\n", + slave_dev->type); res = -EOPNOTSUPP; goto err_undo_flags; } if (!slave_ops->ndo_set_mac_address || slave_dev->type == ARPHRD_INFINIBAND) { - netdev_warn(bond_dev, "The slave device specified does not support setting the MAC address\n"); + slave_warn(bond_dev, slave_dev, "The slave device specified does not support setting the MAC address\n"); if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP && bond->params.fail_over_mac != BOND_FOM_ACTIVE) { if (!bond_has_slaves(bond)) { bond->params.fail_over_mac = BOND_FOM_ACTIVE; - netdev_warn(bond_dev, "Setting fail_over_mac to active for active-backup mode\n"); + slave_warn(bond_dev, slave_dev, "Setting fail_over_mac to active for active-backup mode\n"); } else { NL_SET_ERR_MSG(extack, "Slave device does not support setting the MAC address, but fail_over_mac is not set to active"); - netdev_err(bond_dev, "The slave device specified does not support setting the MAC address, but fail_over_mac is not set to active\n"); + slave_err(bond_dev, slave_dev, "The slave device specified does not support setting the MAC address, but fail_over_mac is not set to active\n"); res = -EOPNOTSUPP; goto err_undo_flags; } @@ -1511,8 +1821,11 @@ * address to be the same as the slave's. */ if (!bond_has_slaves(bond) && - bond->dev->addr_assign_type == NET_ADDR_RANDOM) - bond_set_dev_addr(bond->dev, slave_dev); + bond->dev->addr_assign_type == NET_ADDR_RANDOM) { + res = bond_set_dev_addr(bond->dev, slave_dev); + if (res) + goto err_undo_flags; + } new_slave = bond_alloc_slave(bond, slave_dev); if (!new_slave) { @@ -1529,7 +1842,7 @@ new_slave->original_mtu = slave_dev->mtu; res = dev_set_mtu(slave_dev, bond->dev->mtu); if (res) { - netdev_dbg(bond_dev, "Error %d calling dev_set_mtu\n", res); + slave_err(bond_dev, slave_dev, "Error %d calling dev_set_mtu\n", res); goto err_free; } @@ -1547,9 +1860,10 @@ */ memcpy(ss.__data, bond_dev->dev_addr, bond_dev->addr_len); ss.ss_family = slave_dev->type; - res = dev_set_mac_address(slave_dev, (struct sockaddr *)&ss); + res = dev_set_mac_address(slave_dev, (struct sockaddr *)&ss, + extack); if (res) { - netdev_dbg(bond_dev, "Error %d calling set_mac_address\n", res); + slave_err(bond_dev, slave_dev, "Error %d calling set_mac_address\n", res); goto err_restore_mtu; } } @@ -1558,9 +1872,9 @@ slave_dev->flags |= IFF_SLAVE; /* open the slave since the application closed it */ - res = dev_open(slave_dev); + res = dev_open(slave_dev, extack); if (res) { - netdev_dbg(bond_dev, "Opening slave %s failed\n", slave_dev->name); + slave_err(bond_dev, slave_dev, "Opening slave failed\n"); goto err_restore_mac; } @@ -1579,8 +1893,7 @@ res = vlan_vids_add_by_dev(slave_dev, bond_dev); if (res) { - netdev_err(bond_dev, "Couldn't add bond vlan ids to %s\n", - slave_dev->name); + slave_err(bond_dev, slave_dev, "Couldn't add bond vlan ids\n"); goto err_close; } @@ -1610,12 +1923,10 @@ * supported); thus, we don't need to change * the messages for netif_carrier. */ - netdev_warn(bond_dev, "MII and ETHTOOL support not available for interface %s, and arp_interval/arp_ip_target module parameters not specified, thus bonding will not detect link failures! see bonding.txt for details\n", - slave_dev->name); + slave_warn(bond_dev, slave_dev, "MII and ETHTOOL support not available for slave, and arp_interval/arp_ip_target module parameters not specified, thus bonding will not detect link failures! see bonding.txt for details\n"); } else if (link_reporting == -1) { /* unable get link status using mii/ethtool */ - netdev_warn(bond_dev, "can't get link status from interface %s; the network driver associated with this interface does not support MII or ETHTOOL link status reporting, thus miimon has no effect on this interface\n", - slave_dev->name); + slave_warn(bond_dev, slave_dev, "can't get link status from slave; the network driver associated with this interface does not support MII or ETHTOOL link status reporting, thus miimon has no effect on this interface\n"); } } @@ -1649,9 +1960,9 @@ if (new_slave->link != BOND_LINK_DOWN) new_slave->last_link_up = jiffies; - netdev_dbg(bond_dev, "Initial state of slave_dev is BOND_LINK_%s\n", - new_slave->link == BOND_LINK_DOWN ? "DOWN" : - (new_slave->link == BOND_LINK_UP ? "UP" : "BACK")); + slave_dbg(bond_dev, slave_dev, "Initial state of slave is BOND_LINK_%s\n", + new_slave->link == BOND_LINK_DOWN ? "DOWN" : + (new_slave->link == BOND_LINK_UP ? "UP" : "BACK")); if (bond_uses_primary(bond) && bond->params.primary[0]) { /* if there is a primary slave, remember it */ @@ -1692,7 +2003,7 @@ bond_set_slave_inactive_flags(new_slave, BOND_SLAVE_NOTIFY_NOW); break; default: - netdev_dbg(bond_dev, "This slave is always active in trunk mode\n"); + slave_dbg(bond_dev, slave_dev, "This slave is always active in trunk mode\n"); /* always active in trunk mode */ bond_set_active_slave(new_slave); @@ -1711,7 +2022,7 @@ #ifdef CONFIG_NET_POLL_CONTROLLER if (bond->dev->npinfo) { if (slave_enable_netpoll(new_slave)) { - netdev_info(bond_dev, "master_dev is using netpoll, but new slave device does not support netpoll\n"); + slave_info(bond_dev, slave_dev, "master_dev is using netpoll, but new slave device does not support netpoll\n"); res = -EBUSY; goto err_detach; } @@ -1724,23 +2035,21 @@ res = netdev_rx_handler_register(slave_dev, bond_handle_frame, new_slave); if (res) { - netdev_dbg(bond_dev, "Error %d calling netdev_rx_handler_register\n", res); + slave_dbg(bond_dev, slave_dev, "Error %d calling netdev_rx_handler_register\n", res); goto err_detach; } res = bond_master_upper_dev_link(bond, new_slave, extack); if (res) { - netdev_dbg(bond_dev, "Error %d calling bond_master_upper_dev_link\n", res); + slave_dbg(bond_dev, slave_dev, "Error %d calling bond_master_upper_dev_link\n", res); goto err_unregister; } res = bond_sysfs_slave_add(new_slave); if (res) { - netdev_dbg(bond_dev, "Error %d calling bond_sysfs_slave_add\n", res); + slave_dbg(bond_dev, slave_dev, "Error %d calling bond_sysfs_slave_add\n", res); goto err_upper_unlink; } - - bond->nest_level = dev_get_nest_level(bond_dev) + 1; /* If the mode uses primary, then the following is handled by * bond_change_active_slave(). @@ -1763,16 +2072,14 @@ } } - netif_addr_lock_bh(bond_dev); - dev_mc_sync_multiple(slave_dev, bond_dev); - dev_uc_sync_multiple(slave_dev, bond_dev); - netif_addr_unlock_bh(bond_dev); + if (bond_dev->flags & IFF_UP) { + netif_addr_lock_bh(bond_dev); + dev_mc_sync_multiple(slave_dev, bond_dev); + dev_uc_sync_multiple(slave_dev, bond_dev); + netif_addr_unlock_bh(bond_dev); - if (BOND_MODE(bond) == BOND_MODE_8023AD) { - /* add lacpdu mc addr to mc list */ - u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; - - dev_mc_add(slave_dev, lacpdu_multicast); + if (BOND_MODE(bond) == BOND_MODE_8023AD) + dev_mc_add(slave_dev, lacpdu_mcast_addr); } } @@ -1790,10 +2097,9 @@ bond_update_slave_arr(bond, NULL); - netdev_info(bond_dev, "Enslaving %s as %s interface with %s link\n", - slave_dev->name, - bond_is_active_slave(new_slave) ? "an active" : "a backup", - new_slave->link != BOND_LINK_DOWN ? "an up" : "a down"); + slave_info(bond_dev, slave_dev, "Enslaving as %s interface with %s link\n", + bond_is_active_slave(new_slave) ? "an active" : "a backup", + new_slave->link != BOND_LINK_DOWN ? "an up" : "a down"); /* enslave is successful */ bond_queue_slave_event(new_slave); @@ -1839,7 +2145,7 @@ bond_hw_addr_copy(ss.__data, new_slave->perm_hwaddr, new_slave->dev->addr_len); ss.ss_family = slave_dev->type; - dev_set_mac_address(slave_dev, (struct sockaddr *)&ss); + dev_set_mac_address(slave_dev, (struct sockaddr *)&ss, NULL); } err_restore_mtu: @@ -1889,8 +2195,7 @@ /* slave is not a slave or master is not master of this slave */ if (!(slave_dev->flags & IFF_SLAVE) || !netdev_has_upper_dev(slave_dev, bond_dev)) { - netdev_dbg(bond_dev, "cannot release %s\n", - slave_dev->name); + slave_dbg(bond_dev, slave_dev, "cannot release slave\n"); return -EINVAL; } @@ -1899,8 +2204,7 @@ slave = bond_get_slave_by_dev(bond, slave_dev); if (!slave) { /* not a slave of this bond */ - netdev_info(bond_dev, "%s not enslaved\n", - slave_dev->name); + slave_info(bond_dev, slave_dev, "interface not enslaved\n"); unblock_netpoll_tx(); return -EINVAL; } @@ -1925,9 +2229,8 @@ if (bond_mode_can_use_xmit_hash(bond)) bond_update_slave_arr(bond, slave); - netdev_info(bond_dev, "Releasing %s interface %s\n", - bond_is_active_slave(slave) ? "active" : "backup", - slave_dev->name); + slave_info(bond_dev, slave_dev, "Releasing %s interface\n", + bond_is_active_slave(slave) ? "active" : "backup"); oldcurrent = rcu_access_pointer(bond->curr_active_slave); @@ -1937,9 +2240,8 @@ BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP)) { if (ether_addr_equal_64bits(bond_dev->dev_addr, slave->perm_hwaddr) && bond_has_slaves(bond)) - netdev_warn(bond_dev, "the permanent HWaddr of %s - %pM - is still in use by %s - set the HWaddr of %s to a different address to avoid conflicts\n", - slave_dev->name, slave->perm_hwaddr, - bond_dev->name, slave_dev->name); + slave_warn(bond_dev, slave_dev, "the permanent HWaddr of slave - %pM - is still in use by bond - set the HWaddr of slave to a different address to avoid conflicts\n", + slave->perm_hwaddr); } if (rtnl_dereference(bond->primary_slave) == slave) @@ -1967,13 +2269,9 @@ bond_select_active_slave(bond); } - if (!bond_has_slaves(bond)) { - bond_set_carrier(bond); + bond_set_carrier(bond); + if (!bond_has_slaves(bond)) eth_hw_addr_random(bond_dev); - bond->nest_level = SINGLE_DEPTH_NESTING; - } else { - bond->nest_level = dev_get_nest_level(bond_dev) + 1; - } unblock_netpoll_tx(); synchronize_rcu(); @@ -1987,8 +2285,7 @@ bond_compute_features(bond); if (!(bond_dev->features & NETIF_F_VLAN_CHALLENGED) && (old_features & NETIF_F_VLAN_CHALLENGED)) - netdev_info(bond_dev, "last VLAN challenged slave %s left bond %s - VLAN blocking is removed\n", - slave_dev->name, bond_dev->name); + slave_info(bond_dev, slave_dev, "last VLAN challenged slave left bond - VLAN blocking is removed\n"); vlan_vids_del_by_dev(slave_dev, bond_dev); @@ -2010,7 +2307,8 @@ if (old_flags & IFF_ALLMULTI) dev_set_allmulti(slave_dev, -1); - bond_hw_addr_flush(bond_dev, slave_dev); + if (old_flags & IFF_UP) + bond_hw_addr_flush(bond_dev, slave_dev); } slave_disable_netpoll(slave); @@ -2024,7 +2322,7 @@ bond_hw_addr_copy(ss.__data, slave->perm_hwaddr, slave->dev->addr_len); ss.ss_family = slave_dev->type; - dev_set_mac_address(slave_dev, (struct sockaddr *)&ss); + dev_set_mac_address(slave_dev, (struct sockaddr *)&ss, NULL); } if (unregister) @@ -2049,8 +2347,8 @@ /* First release a slave and then destroy the bond if no more slaves are left. * Must be under rtnl_lock when this function is called. */ -static int bond_release_and_destroy(struct net_device *bond_dev, - struct net_device *slave_dev) +static int bond_release_and_destroy(struct net_device *bond_dev, + struct net_device *slave_dev) { struct bonding *bond = netdev_priv(bond_dev); int ret; @@ -2059,8 +2357,7 @@ if (ret == 0 && !bond_has_slaves(bond) && bond_dev->reg_state != NETREG_UNREGISTERING) { bond_dev->priv_flags |= IFF_DISABLE_NETPOLL; - netdev_info(bond_dev, "Destroying bond %s\n", - bond_dev->name); + netdev_info(bond_dev, "Destroying bond\n"); bond_remove_proc_entry(bond); unregister_netdevice(bond_dev); } @@ -2117,24 +2414,22 @@ commit++; slave->delay = bond->params.downdelay; if (slave->delay) { - netdev_info(bond->dev, "link status down for %sinterface %s, disabling it in %d ms\n", - (BOND_MODE(bond) == - BOND_MODE_ACTIVEBACKUP) ? - (bond_is_active_slave(slave) ? - "active " : "backup ") : "", - slave->dev->name, - bond->params.downdelay * bond->params.miimon); + slave_info(bond->dev, slave->dev, "link status down for %sinterface, disabling it in %d ms\n", + (BOND_MODE(bond) == + BOND_MODE_ACTIVEBACKUP) ? + (bond_is_active_slave(slave) ? + "active " : "backup ") : "", + bond->params.downdelay * bond->params.miimon); } - /*FALLTHRU*/ + fallthrough; case BOND_LINK_FAIL: if (link_state) { /* recovered before downdelay expired */ bond_propose_link_state(slave, BOND_LINK_UP); slave->last_link_up = jiffies; - netdev_info(bond->dev, "link status up again after %d ms for interface %s\n", - (bond->params.downdelay - slave->delay) * - bond->params.miimon, - slave->dev->name); + slave_info(bond->dev, slave->dev, "link status up again after %d ms\n", + (bond->params.downdelay - slave->delay) * + bond->params.miimon); commit++; continue; } @@ -2157,20 +2452,18 @@ slave->delay = bond->params.updelay; if (slave->delay) { - netdev_info(bond->dev, "link status up for interface %s, enabling it in %d ms\n", - slave->dev->name, - ignore_updelay ? 0 : - bond->params.updelay * - bond->params.miimon); + slave_info(bond->dev, slave->dev, "link status up, enabling it in %d ms\n", + ignore_updelay ? 0 : + bond->params.updelay * + bond->params.miimon); } - /*FALLTHRU*/ + fallthrough; case BOND_LINK_BACK: if (!link_state) { bond_propose_link_state(slave, BOND_LINK_DOWN); - netdev_info(bond->dev, "link status down again after %d ms for interface %s\n", - (bond->params.updelay - slave->delay) * - bond->params.miimon, - slave->dev->name); + slave_info(bond->dev, slave->dev, "link status down again after %d ms\n", + (bond->params.updelay - slave->delay) * + bond->params.miimon); commit++; continue; } @@ -2235,9 +2528,8 @@ bond_needs_speed_duplex(bond)) { slave->link = BOND_LINK_DOWN; if (net_ratelimit()) - netdev_warn(bond->dev, - "failed to get link speed/duplex for %s\n", - slave->dev->name); + slave_warn(bond->dev, slave->dev, + "failed to get link speed/duplex\n"); continue; } bond_set_slave_link_state(slave, BOND_LINK_UP, @@ -2253,10 +2545,9 @@ bond_set_active_slave(slave); } - netdev_info(bond->dev, "link status definitely up for interface %s, %u Mbps %s duplex\n", - slave->dev->name, - slave->speed == SPEED_UNKNOWN ? 0 : slave->speed, - slave->duplex ? "full" : "half"); + slave_info(bond->dev, slave->dev, "link status definitely up, %u Mbps %s duplex\n", + slave->speed == SPEED_UNKNOWN ? 0 : slave->speed, + slave->duplex ? "full" : "half"); bond_miimon_link_change(bond, slave, BOND_LINK_UP); @@ -2277,8 +2568,7 @@ bond_set_slave_inactive_flags(slave, BOND_SLAVE_NOTIFY_NOW); - netdev_info(bond->dev, "link status definitely down for interface %s, disabling it\n", - slave->dev->name); + slave_info(bond->dev, slave->dev, "link status definitely down, disabling slave\n"); bond_miimon_link_change(bond, slave, BOND_LINK_DOWN); @@ -2288,8 +2578,8 @@ continue; default: - netdev_err(bond->dev, "invalid new link %d on slave %s\n", - slave->link_new_state, slave->dev->name); + slave_err(bond->dev, slave->dev, "invalid new link %d on slave\n", + slave->link_new_state); bond_propose_link_state(slave, BOND_LINK_NOCHANGE); continue; @@ -2316,6 +2606,7 @@ struct bonding *bond = container_of(work, struct bonding, mii_work.work); bool should_notify_peers = false; + bool commit; unsigned long delay; struct slave *slave; struct list_head *iter; @@ -2326,12 +2617,19 @@ goto re_arm; rcu_read_lock(); - should_notify_peers = bond_should_notify_peers(bond); - - if (bond_miimon_inspect(bond)) { + commit = !!bond_miimon_inspect(bond); + if (bond->send_peer_notif) { rcu_read_unlock(); + if (rtnl_trylock()) { + bond->send_peer_notif--; + rtnl_unlock(); + } + } else { + rcu_read_unlock(); + } + if (commit) { /* Race avoidance with bond_close cancel of workqueue */ if (!rtnl_trylock()) { delay = 1; @@ -2345,8 +2643,7 @@ bond_miimon_commit(bond); rtnl_unlock(); /* might sleep, hold no other locks */ - } else - rcu_read_unlock(); + } re_arm: if (bond->params.miimon) @@ -2360,22 +2657,26 @@ } } -static int bond_upper_dev_walk(struct net_device *upper, void *data) +static int bond_upper_dev_walk(struct net_device *upper, + struct netdev_nested_priv *priv) { - __be32 ip = *((__be32 *)data); + __be32 ip = *(__be32 *)priv->data; return ip == bond_confirm_addr(upper, 0, ip); } static bool bond_has_this_ip(struct bonding *bond, __be32 ip) { + struct netdev_nested_priv priv = { + .data = (void *)&ip, + }; bool ret = false; if (ip == bond_confirm_addr(bond->dev, 0, ip)) return true; rcu_read_lock(); - if (netdev_walk_all_upper_dev_rcu(bond->dev, bond_upper_dev_walk, &ip)) + if (netdev_walk_all_upper_dev_rcu(bond->dev, bond_upper_dev_walk, &priv)) ret = true; rcu_read_unlock(); @@ -2386,15 +2687,16 @@ * switches in VLAN mode (especially if ports are configured as * "native" to a VLAN) might not pass non-tagged frames. */ -static void bond_arp_send(struct net_device *slave_dev, int arp_op, - __be32 dest_ip, __be32 src_ip, - struct bond_vlan_tag *tags) +static void bond_arp_send(struct slave *slave, int arp_op, __be32 dest_ip, + __be32 src_ip, struct bond_vlan_tag *tags) { struct sk_buff *skb; struct bond_vlan_tag *outer_tag = tags; + struct net_device *slave_dev = slave->dev; + struct net_device *bond_dev = slave->bond->dev; - netdev_dbg(slave_dev, "arp %d on slave %s: dst %pI4 src %pI4\n", - arp_op, slave_dev->name, &dest_ip, &src_ip); + slave_dbg(bond_dev, slave_dev, "arp %d on slave: dst %pI4 src %pI4\n", + arp_op, &dest_ip, &src_ip); skb = arp_create(arp_op, ETH_P_ARP, dest_ip, slave_dev, src_ip, NULL, slave_dev->dev_addr, NULL); @@ -2416,8 +2718,8 @@ continue; } - netdev_dbg(slave_dev, "inner tag: proto %X vid %X\n", - ntohs(outer_tag->vlan_proto), tags->vlan_id); + slave_dbg(bond_dev, slave_dev, "inner tag: proto %X vid %X\n", + ntohs(outer_tag->vlan_proto), tags->vlan_id); skb = vlan_insert_tag_set_proto(skb, tags->vlan_proto, tags->vlan_id); if (!skb) { @@ -2429,8 +2731,8 @@ } /* Set the outer tag */ if (outer_tag->vlan_id) { - netdev_dbg(slave_dev, "outer tag: proto %X vid %X\n", - ntohs(outer_tag->vlan_proto), outer_tag->vlan_id); + slave_dbg(bond_dev, slave_dev, "outer tag: proto %X vid %X\n", + ntohs(outer_tag->vlan_proto), outer_tag->vlan_id); __vlan_hwaccel_put_tag(skb, outer_tag->vlan_proto, outer_tag->vlan_id); } @@ -2487,7 +2789,8 @@ int i; for (i = 0; i < BOND_MAX_ARP_TARGETS && targets[i]; i++) { - netdev_dbg(bond->dev, "basa: target %pI4\n", &targets[i]); + slave_dbg(bond->dev, slave->dev, "%s: target %pI4\n", + __func__, &targets[i]); tags = NULL; /* Find out through which dev should the packet go */ @@ -2501,7 +2804,7 @@ net_warn_ratelimited("%s: no route to arp_ip_target %pI4 and arp_validate is set\n", bond->dev->name, &targets[i]); - bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], + bond_arp_send(slave, ARPOP_REQUEST, targets[i], 0, tags); continue; } @@ -2518,7 +2821,7 @@ goto found; /* Not our device - skip */ - netdev_dbg(bond->dev, "no path to arp_ip_target %pI4 via rt.dev %s\n", + slave_dbg(bond->dev, slave->dev, "no path to arp_ip_target %pI4 via rt.dev %s\n", &targets[i], rt->dst.dev ? rt->dst.dev->name : "NULL"); ip_rt_put(rt); @@ -2527,8 +2830,7 @@ found: addr = bond_confirm_addr(rt->dst.dev, targets[i], 0); ip_rt_put(rt); - bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], - addr, tags); + bond_arp_send(slave, ARPOP_REQUEST, targets[i], addr, tags); kfree(tags); } } @@ -2538,15 +2840,15 @@ int i; if (!sip || !bond_has_this_ip(bond, tip)) { - netdev_dbg(bond->dev, "bva: sip %pI4 tip %pI4 not found\n", - &sip, &tip); + slave_dbg(bond->dev, slave->dev, "%s: sip %pI4 tip %pI4 not found\n", + __func__, &sip, &tip); return; } i = bond_get_targets_ip(bond->params.arp_targets, sip); if (i == -1) { - netdev_dbg(bond->dev, "bva: sip %pI4 not found in targets\n", - &sip); + slave_dbg(bond->dev, slave->dev, "%s: sip %pI4 not found in targets\n", + __func__, &sip); return; } slave->last_rx = jiffies; @@ -2574,8 +2876,8 @@ alen = arp_hdr_len(bond->dev); - netdev_dbg(bond->dev, "bond_arp_rcv: skb->dev %s\n", - skb->dev->name); + slave_dbg(bond->dev, slave->dev, "%s: skb->dev %s\n", + __func__, skb->dev->name); if (alen > skb_headlen(skb)) { arp = kmalloc(alen, GFP_ATOMIC); @@ -2599,10 +2901,10 @@ arp_ptr += 4 + bond->dev->addr_len; memcpy(&tip, arp_ptr, 4); - netdev_dbg(bond->dev, "bond_arp_rcv: %s/%d av %d sv %d sip %pI4 tip %pI4\n", - slave->dev->name, bond_slave_state(slave), - bond->params.arp_validate, slave_do_arp_validate(bond, slave), - &sip, &tip); + slave_dbg(bond->dev, slave->dev, "%s: %s/%d av %d sv %d sip %pI4 tip %pI4\n", + __func__, slave->dev->name, bond_slave_state(slave), + bond->params.arp_validate, slave_do_arp_validate(bond, slave), + &sip, &tip); curr_active_slave = rcu_dereference(bond->curr_active_slave); curr_arp_slave = rcu_dereference(bond->current_arp_slave); @@ -2705,12 +3007,10 @@ * is closed. */ if (!oldcurrent) { - netdev_info(bond->dev, "link status definitely up for interface %s\n", - slave->dev->name); + slave_info(bond->dev, slave->dev, "link status definitely up\n"); do_failover = 1; } else { - netdev_info(bond->dev, "interface %s is now up\n", - slave->dev->name); + slave_info(bond->dev, slave->dev, "interface is now up\n"); } } } else { @@ -2729,8 +3029,7 @@ if (slave->link_failure_count < UINT_MAX) slave->link_failure_count++; - netdev_info(bond->dev, "interface %s is now down\n", - slave->dev->name); + slave_info(bond->dev, slave->dev, "interface is now down\n"); if (slave == oldcurrent) do_failover = 1; @@ -2883,8 +3182,7 @@ RCU_INIT_POINTER(bond->current_arp_slave, NULL); } - netdev_info(bond->dev, "link status definitely up for interface %s\n", - slave->dev->name); + slave_info(bond->dev, slave->dev, "link status definitely up\n"); if (!rtnl_dereference(bond->curr_active_slave) || slave == rtnl_dereference(bond->primary_slave)) @@ -2903,8 +3201,7 @@ bond_set_slave_inactive_flags(slave, BOND_SLAVE_NOTIFY_NOW); - netdev_info(bond->dev, "link status definitely down for interface %s, disabling it\n", - slave->dev->name); + slave_info(bond->dev, slave->dev, "link status definitely down, disabling slave\n"); if (slave == rtnl_dereference(bond->curr_active_slave)) { RCU_INIT_POINTER(bond->current_arp_slave, NULL); @@ -2927,8 +3224,9 @@ continue; default: - netdev_err(bond->dev, "impossible: new_link %d on slave %s\n", - slave->link_new_state, slave->dev->name); + slave_err(bond->dev, slave->dev, + "impossible: link_new_state %d on slave\n", + slave->link_new_state); continue; } @@ -2997,8 +3295,7 @@ bond_set_slave_inactive_flags(slave, BOND_SLAVE_NOTIFY_LATER); - netdev_info(bond->dev, "backup interface %s is now down\n", - slave->dev->name); + slave_info(bond->dev, slave->dev, "backup interface is now down\n"); } if (slave == curr_arp_slave) found = true; @@ -3069,9 +3366,11 @@ if (!rtnl_trylock()) return; - if (should_notify_peers) + if (should_notify_peers) { + bond->send_peer_notif--; call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev); + } if (should_notify_rtnl) { bond_slave_state_notify(bond); bond_slave_link_notify(bond); @@ -3110,18 +3409,19 @@ { struct bonding *event_bond = netdev_priv(bond_dev); + netdev_dbg(bond_dev, "%s called\n", __func__); + switch (event) { case NETDEV_CHANGENAME: return bond_event_changename(event_bond); case NETDEV_UNREGISTER: bond_remove_proc_entry(event_bond); +#ifdef CONFIG_XFRM_OFFLOAD + xfrm_dev_state_flush(dev_net(bond_dev), bond_dev, true); +#endif /* CONFIG_XFRM_OFFLOAD */ break; case NETDEV_REGISTER: bond_create_proc_entry(event_bond); - break; - case NETDEV_NOTIFY_PEERS: - if (event_bond->send_peer_notif) - event_bond->send_peer_notif--; break; default: break; @@ -3141,11 +3441,16 @@ * before netdev_rx_handler_register is called in which case * slave will be NULL */ - if (!slave) + if (!slave) { + netdev_dbg(slave_dev, "%s called on NULL slave\n", __func__); return NOTIFY_DONE; + } + bond_dev = slave->bond->dev; bond = slave->bond; primary = rtnl_dereference(bond->primary_slave); + + slave_dbg(bond_dev, slave_dev, "%s called\n", __func__); switch (event) { case NETDEV_UNREGISTER: @@ -3173,7 +3478,7 @@ if (BOND_MODE(bond) == BOND_MODE_8023AD) bond_3ad_adapter_speed_duplex_changed(slave); - /* Fallthrough */ + fallthrough; case NETDEV_DOWN: /* Refresh slave-array if applicable! * If the setup does not use miimon or arpmon (mode-specific!), @@ -3248,7 +3553,8 @@ { struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); - netdev_dbg(event_dev, "event: %lx\n", event); + netdev_dbg(event_dev, "%s received %s\n", + __func__, netdev_cmd_to_name(event)); if (!(event_dev->priv_flags & IFF_BONDING)) return NOTIFY_DONE; @@ -3256,16 +3562,13 @@ if (event_dev->flags & IFF_MASTER) { int ret; - netdev_dbg(event_dev, "IFF_MASTER\n"); ret = bond_master_netdev_event(event, event_dev); if (ret != NOTIFY_DONE) return ret; } - if (event_dev->flags & IFF_SLAVE) { - netdev_dbg(event_dev, "IFF_SLAVE\n"); + if (event_dev->flags & IFF_SLAVE) return bond_slave_netdev_event(event, event_dev); - } return NOTIFY_DONE; } @@ -3287,39 +3590,78 @@ return 0; } +static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk, + int *noff, int *proto, bool l34) +{ + const struct ipv6hdr *iph6; + const struct iphdr *iph; + + if (skb->protocol == htons(ETH_P_IP)) { + if (unlikely(!pskb_may_pull(skb, *noff + sizeof(*iph)))) + return false; + iph = (const struct iphdr *)(skb->data + *noff); + iph_to_flow_copy_v4addrs(fk, iph); + *noff += iph->ihl << 2; + if (!ip_is_fragment(iph)) + *proto = iph->protocol; + } else if (skb->protocol == htons(ETH_P_IPV6)) { + if (unlikely(!pskb_may_pull(skb, *noff + sizeof(*iph6)))) + return false; + iph6 = (const struct ipv6hdr *)(skb->data + *noff); + iph_to_flow_copy_v6addrs(fk, iph6); + *noff += sizeof(*iph6); + *proto = iph6->nexthdr; + } else { + return false; + } + + if (l34 && *proto >= 0) + fk->ports.ports = skb_flow_get_ports(skb, *noff, *proto); + + return true; +} + /* Extract the appropriate headers based on bond's xmit policy */ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, struct flow_keys *fk) { - const struct ipv6hdr *iph6; - const struct iphdr *iph; + bool l34 = bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34; int noff, proto = -1; - if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23) - return skb_flow_dissect_flow_keys(skb, fk, 0); + if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23) { + memset(fk, 0, sizeof(*fk)); + return __skb_flow_dissect(NULL, skb, &flow_keys_bonding, + fk, NULL, 0, 0, 0, 0); + } fk->ports.ports = 0; + memset(&fk->icmp, 0, sizeof(fk->icmp)); noff = skb_network_offset(skb); - if (skb->protocol == htons(ETH_P_IP)) { - if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph)))) - return false; - iph = ip_hdr(skb); - iph_to_flow_copy_v4addrs(fk, iph); - noff += iph->ihl << 2; - if (!ip_is_fragment(iph)) - proto = iph->protocol; - } else if (skb->protocol == htons(ETH_P_IPV6)) { - if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph6)))) - return false; - iph6 = ipv6_hdr(skb); - iph_to_flow_copy_v6addrs(fk, iph6); - noff += sizeof(*iph6); - proto = iph6->nexthdr; - } else { + if (!bond_flow_ip(skb, fk, &noff, &proto, l34)) return false; + + /* ICMP error packets contains at least 8 bytes of the header + * of the packet which generated the error. Use this information + * to correlate ICMP error packets within the same flow which + * generated the error. + */ + if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6) { + skb_flow_get_icmp_tci(skb, &fk->icmp, skb->data, + skb_transport_offset(skb), + skb_headlen(skb)); + if (proto == IPPROTO_ICMP) { + if (!icmp_is_err(fk->icmp.type)) + return true; + + noff += sizeof(struct icmphdr); + } else if (proto == IPPROTO_ICMPV6) { + if (!icmpv6_is_err(fk->icmp.type)) + return true; + + noff += sizeof(struct icmp6hdr); + } + return bond_flow_ip(skb, fk, &noff, &proto, l34); } - if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34 && proto >= 0) - fk->ports.ports = skb_flow_get_ports(skb, noff, proto); return true; } @@ -3346,10 +3688,14 @@ return bond_eth_hash(skb); if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 || - bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) + bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) { hash = bond_eth_hash(skb); - else - hash = (__force u32)flow.ports.ports; + } else { + if (flow.icmp.id) + memcpy(&hash, &flow.icmp, sizeof(hash)); + else + memcpy(&hash, &flow.ports.ports, sizeof(hash)); + } hash ^= (__force u32)flow_get_u32_dst(&flow) ^ (__force u32)flow_get_u32_src(&flow); hash ^= (hash >> 16); @@ -3424,6 +3770,9 @@ /* register to receive LACPDUs */ bond->recv_probe = bond_3ad_lacpdu_recv; bond_3ad_initiate_agg_selection(bond, 1); + + bond_for_each_slave(bond, slave, iter) + dev_mc_add(slave->dev, lacpdu_mcast_addr); } if (bond_mode_can_use_xmit_hash(bond)) @@ -3435,12 +3784,26 @@ static int bond_close(struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); + struct slave *slave; bond_work_cancel_all(bond); bond->send_peer_notif = 0; if (bond_is_lb(bond)) bond_alb_deinitialize(bond); bond->recv_probe = NULL; + + if (bond_uses_primary(bond)) { + rcu_read_lock(); + slave = rcu_dereference(bond->curr_active_slave); + if (slave) + bond_hw_addr_flush(bond_dev, slave->dev); + rcu_read_unlock(); + } else { + struct list_head *iter; + + bond_for_each_slave(bond, slave, iter) + bond_hw_addr_flush(bond_dev, slave->dev); + } return 0; } @@ -3474,12 +3837,46 @@ } } -static int bond_get_nest_level(struct net_device *bond_dev) +#ifdef CONFIG_LOCKDEP +static int bond_get_lowest_level_rcu(struct net_device *dev) { - struct bonding *bond = netdev_priv(bond_dev); + struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; + struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; + int cur = 0, max = 0; - return bond->nest_level; + now = dev; + iter = &dev->adj_list.lower; + + while (1) { + next = NULL; + while (1) { + ldev = netdev_next_lower_dev_rcu(now, &iter); + if (!ldev) + break; + + next = ldev; + niter = &ldev->adj_list.lower; + dev_stack[cur] = now; + iter_stack[cur++] = iter; + if (max <= cur) + max = cur; + break; + } + + if (!next) { + if (!cur) + return max; + next = dev_stack[--cur]; + niter = iter_stack[cur]; + } + + now = next; + iter = niter; + } + + return max; } +#endif static void bond_get_stats(struct net_device *bond_dev, struct rtnl_link_stats64 *stats) @@ -3488,11 +3885,17 @@ struct rtnl_link_stats64 temp; struct list_head *iter; struct slave *slave; + int nest_level = 0; - spin_lock_nested(&bond->stats_lock, bond_get_nest_level(bond_dev)); - memcpy(stats, &bond->bond_stats, sizeof(*stats)); rcu_read_lock(); +#ifdef CONFIG_LOCKDEP + nest_level = bond_get_lowest_level_rcu(bond_dev); +#endif + + spin_lock_nested(&bond->stats_lock, nest_level); + memcpy(stats, &bond->bond_stats, sizeof(*stats)); + bond_for_each_slave_rcu(bond, slave, iter) { const struct rtnl_link_stats64 *new = dev_get_stats(slave->dev, &temp); @@ -3502,10 +3905,10 @@ /* save off the slave stats for the next run */ memcpy(&slave->slave_stats, new, sizeof(*new)); } - rcu_read_unlock(); memcpy(&bond->bond_stats, stats, sizeof(*stats)); spin_unlock(&bond->stats_lock); + rcu_read_unlock(); } static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd) @@ -3530,7 +3933,7 @@ return -EINVAL; mii->phy_id = 0; - /* Fall Through */ + fallthrough; case SIOCGMIIREG: /* We do this again just in case we were called by SIOCGMIIREG * instead of SIOCGMIIPHY. @@ -3582,12 +3985,11 @@ slave_dev = __dev_get_by_name(net, ifr->ifr_slave); - netdev_dbg(bond_dev, "slave_dev=%p:\n", slave_dev); + slave_dbg(bond_dev, slave_dev, "slave_dev=%p:\n", slave_dev); if (!slave_dev) return -ENODEV; - netdev_dbg(bond_dev, "slave_dev->name=%s:\n", slave_dev->name); switch (cmd) { case BOND_ENSLAVE_OLD: case SIOCBONDENSLAVE: @@ -3599,8 +4001,7 @@ break; case BOND_SETHWADDR_OLD: case SIOCBONDSETHWADDR: - bond_set_dev_addr(bond_dev, slave_dev); - res = 0; + res = bond_set_dev_addr(bond_dev, slave_dev); break; case BOND_CHANGE_ACTIVE_OLD: case SIOCBONDCHANGEACTIVE: @@ -3656,32 +4057,35 @@ const struct net_device_ops *slave_ops; struct neigh_parms parms; struct slave *slave; - int ret; + int ret = 0; - slave = bond_first_slave(bond); + rcu_read_lock(); + slave = bond_first_slave_rcu(bond); if (!slave) - return 0; + goto out; slave_ops = slave->dev->netdev_ops; if (!slave_ops->ndo_neigh_setup) - return 0; + goto out; - parms.neigh_setup = NULL; - parms.neigh_cleanup = NULL; - ret = slave_ops->ndo_neigh_setup(slave->dev, &parms); - if (ret) - return ret; - - /* Assign slave's neigh_cleanup to neighbour in case cleanup is called - * after the last slave has been detached. Assumes that all slaves - * utilize the same neigh_cleanup (true at this writing as only user - * is ipoib). + /* TODO: find another way [1] to implement this. + * Passing a zeroed structure is fragile, + * but at least we do not pass garbage. + * + * [1] One way would be that ndo_neigh_setup() never touch + * struct neigh_parms, but propagate the new neigh_setup() + * back to ___neigh_create() / neigh_parms_alloc() */ - n->parms->neigh_cleanup = parms.neigh_cleanup; + memset(&parms, 0, sizeof(parms)); + ret = slave_ops->ndo_neigh_setup(slave->dev, &parms); - if (!parms.neigh_setup) - return 0; + if (ret) + goto out; - return parms.neigh_setup(n); + if (parms.neigh_setup) + ret = parms.neigh_setup(n); +out: + rcu_read_unlock(); + return ret; } /* The bonding ndo_neigh_setup is called at init time beofre any @@ -3713,7 +4117,7 @@ netdev_dbg(bond_dev, "bond=%p, new_mtu=%d\n", bond, new_mtu); bond_for_each_slave(bond, slave, iter) { - netdev_dbg(bond_dev, "s %p c_m %p\n", + slave_dbg(bond_dev, slave->dev, "s %p c_m %p\n", slave, slave->dev->netdev_ops->ndo_change_mtu); res = dev_set_mtu(slave->dev, new_mtu); @@ -3727,8 +4131,8 @@ * means changing their mtu from timer context, which * is probably not a good idea. */ - netdev_dbg(bond_dev, "err %d %s\n", res, - slave->dev->name); + slave_dbg(bond_dev, slave->dev, "err %d setting mtu to %d\n", + res, new_mtu); goto unwind; } } @@ -3746,10 +4150,9 @@ break; tmp_res = dev_set_mtu(rollback_slave->dev, bond_dev->mtu); - if (tmp_res) { - netdev_dbg(bond_dev, "unwind err %d dev %s\n", - tmp_res, rollback_slave->dev->name); - } + if (tmp_res) + slave_dbg(bond_dev, rollback_slave->dev, "unwind err %d\n", + tmp_res); } return res; @@ -3773,7 +4176,7 @@ return bond_alb_set_mac_address(bond_dev, addr); - netdev_dbg(bond_dev, "bond=%p\n", bond); + netdev_dbg(bond_dev, "%s: bond=%p\n", __func__, bond); /* If fail_over_mac is enabled, do nothing and return success. * Returning an error causes ifenslave to fail. @@ -3786,8 +4189,9 @@ return -EADDRNOTAVAIL; bond_for_each_slave(bond, slave, iter) { - netdev_dbg(bond_dev, "slave %p %s\n", slave, slave->dev->name); - res = dev_set_mac_address(slave->dev, addr); + slave_dbg(bond_dev, slave->dev, "%s: slave=%p\n", + __func__, slave); + res = dev_set_mac_address(slave->dev, addr, NULL); if (res) { /* TODO: consider downing the slave * and retry ? @@ -3795,7 +4199,8 @@ * breakage anyway until ARP finish * updating, so... */ - netdev_dbg(bond_dev, "err %d %s\n", res, slave->dev->name); + slave_dbg(bond_dev, slave->dev, "%s: err %d\n", + __func__, res); goto unwind; } } @@ -3816,10 +4221,10 @@ break; tmp_res = dev_set_mac_address(rollback_slave->dev, - (struct sockaddr *)&tmp_ss); + (struct sockaddr *)&tmp_ss, NULL); if (tmp_res) { - netdev_dbg(bond_dev, "unwind err %d dev %s\n", - tmp_res, rollback_slave->dev->name); + slave_dbg(bond_dev, rollback_slave->dev, "%s: unwind err %d\n", + __func__, tmp_res); } } @@ -3827,16 +4232,15 @@ } /** - * bond_xmit_slave_id - transmit skb through slave with slave_id + * bond_get_slave_by_id - get xmit slave with slave_id * @bond: bonding device that is transmitting - * @skb: buffer to transmit * @slave_id: slave id up to slave_cnt-1 through which to transmit * - * This function tries to transmit through slave with slave_id but in case + * This function tries to get slave with slave_id but in case * it fails, it tries to find the first available slave for transmission. - * The skb is consumed in all cases, thus the function is void. */ -static void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id) +static struct slave *bond_get_slave_by_id(struct bonding *bond, + int slave_id) { struct list_head *iter; struct slave *slave; @@ -3845,10 +4249,8 @@ /* Here we start from the slave with slave_id */ bond_for_each_slave_rcu(bond, slave, iter) { if (--i < 0) { - if (bond_slave_can_tx(slave)) { - bond_dev_queue_xmit(bond, skb, slave->dev); - return; - } + if (bond_slave_can_tx(slave)) + return slave; } } @@ -3857,13 +4259,11 @@ bond_for_each_slave_rcu(bond, slave, iter) { if (--i < 0) break; - if (bond_slave_can_tx(slave)) { - bond_dev_queue_xmit(bond, skb, slave->dev); - return; - } + if (bond_slave_can_tx(slave)) + return slave; } /* no slave that can tx has been found */ - bond_tx_drop(bond->dev, skb); + return NULL; } /** @@ -3899,10 +4299,9 @@ return slave_id; } -static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb, - struct net_device *bond_dev) +static struct slave *bond_xmit_roundrobin_slave_get(struct bonding *bond, + struct sk_buff *skb) { - struct bonding *bond = netdev_priv(bond_dev); struct slave *slave; int slave_cnt; u32 slave_id; @@ -3924,22 +4323,37 @@ if (iph->protocol == IPPROTO_IGMP) { slave = rcu_dereference(bond->curr_active_slave); if (slave) - bond_dev_queue_xmit(bond, skb, slave->dev); - else - bond_xmit_slave_id(bond, skb, 0); - return NETDEV_TX_OK; + return slave; + return bond_get_slave_by_id(bond, 0); } } non_igmp: slave_cnt = READ_ONCE(bond->slave_cnt); if (likely(slave_cnt)) { - slave_id = bond_rr_gen_slave_id(bond); - bond_xmit_slave_id(bond, skb, slave_id % slave_cnt); - } else { - bond_tx_drop(bond_dev, skb); + slave_id = bond_rr_gen_slave_id(bond) % slave_cnt; + return bond_get_slave_by_id(bond, slave_id); } - return NETDEV_TX_OK; + return NULL; +} + +static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb, + struct net_device *bond_dev) +{ + struct bonding *bond = netdev_priv(bond_dev); + struct slave *slave; + + slave = bond_xmit_roundrobin_slave_get(bond, skb); + if (likely(slave)) + return bond_dev_queue_xmit(bond, skb, slave->dev); + + return bond_tx_drop(bond_dev, skb); +} + +static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond, + struct sk_buff *skb) +{ + return rcu_dereference(bond->curr_active_slave); } /* In active-backup mode, we know that bond->curr_active_slave is always valid if @@ -3951,13 +4365,11 @@ struct bonding *bond = netdev_priv(bond_dev); struct slave *slave; - slave = rcu_dereference(bond->curr_active_slave); + slave = bond_xmit_activebackup_slave_get(bond, skb); if (slave) - bond_dev_queue_xmit(bond, skb, slave->dev); - else - bond_tx_drop(bond_dev, skb); + return bond_dev_queue_xmit(bond, skb, slave->dev); - return NETDEV_TX_OK; + return bond_tx_drop(bond_dev, skb); } /* Use this to update slave_array when (a) it's not appropriate to update @@ -3991,6 +4403,61 @@ bond_slave_arr_work_rearm(bond, 1); } +static void bond_skip_slave(struct bond_up_slave *slaves, + struct slave *skipslave) +{ + int idx; + + /* Rare situation where caller has asked to skip a specific + * slave but allocation failed (most likely!). BTW this is + * only possible when the call is initiated from + * __bond_release_one(). In this situation; overwrite the + * skipslave entry in the array with the last entry from the + * array to avoid a situation where the xmit path may choose + * this to-be-skipped slave to send a packet out. + */ + for (idx = 0; slaves && idx < slaves->count; idx++) { + if (skipslave == slaves->arr[idx]) { + slaves->arr[idx] = + slaves->arr[slaves->count - 1]; + slaves->count--; + break; + } + } +} + +static void bond_set_slave_arr(struct bonding *bond, + struct bond_up_slave *usable_slaves, + struct bond_up_slave *all_slaves) +{ + struct bond_up_slave *usable, *all; + + usable = rtnl_dereference(bond->usable_slaves); + rcu_assign_pointer(bond->usable_slaves, usable_slaves); + kfree_rcu(usable, rcu); + + all = rtnl_dereference(bond->all_slaves); + rcu_assign_pointer(bond->all_slaves, all_slaves); + kfree_rcu(all, rcu); +} + +static void bond_reset_slave_arr(struct bonding *bond) +{ + struct bond_up_slave *usable, *all; + + usable = rtnl_dereference(bond->usable_slaves); + if (usable) { + RCU_INIT_POINTER(bond->usable_slaves, NULL); + kfree_rcu(usable, rcu); + } + + all = rtnl_dereference(bond->all_slaves); + if (all) { + RCU_INIT_POINTER(bond->all_slaves, NULL); + kfree_rcu(all, rcu); + } +} + /* Build the usable slaves array in control path for modes that use xmit-hash * to determine the slave interface - * (a) BOND_MODE_8023AD @@ -4001,9 +4468,9 @@ */ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) { + struct bond_up_slave *usable_slaves = NULL, *all_slaves = NULL; struct slave *slave; struct list_head *iter; - struct bond_up_slave *new_arr, *old_arr; int agg_id = 0; int ret = 0; @@ -4011,11 +4478,12 @@ WARN_ON(lockdep_is_held(&bond->mode_lock)); #endif - new_arr = kzalloc(offsetof(struct bond_up_slave, arr[bond->slave_cnt]), - GFP_KERNEL); - if (!new_arr) { + usable_slaves = kzalloc(struct_size(usable_slaves, arr, + bond->slave_cnt), GFP_KERNEL); + all_slaves = kzalloc(struct_size(all_slaves, arr, + bond->slave_cnt), GFP_KERNEL); + if (!usable_slaves || !all_slaves) { ret = -ENOMEM; - pr_err("Failed to build slave-array.\n"); goto out; } if (BOND_MODE(bond) == BOND_MODE_8023AD) { @@ -4023,20 +4491,19 @@ if (bond_3ad_get_active_agg_info(bond, &ad_info)) { pr_debug("bond_3ad_get_active_agg_info failed\n"); - kfree_rcu(new_arr, rcu); /* No active aggragator means it's not safe to use * the previous array. */ - old_arr = rtnl_dereference(bond->slave_arr); - if (old_arr) { - RCU_INIT_POINTER(bond->slave_arr, NULL); - kfree_rcu(old_arr, rcu); - } + bond_reset_slave_arr(bond); goto out; } agg_id = ad_info.aggregator_id; } bond_for_each_slave(bond, slave, iter) { + if (skipslave == slave) + continue; + + all_slaves->arr[all_slaves->count++] = slave; if (BOND_MODE(bond) == BOND_MODE_8023AD) { struct aggregator *agg; @@ -4046,43 +4513,43 @@ } if (!bond_slave_can_tx(slave)) continue; - if (skipslave == slave) - continue; - netdev_dbg(bond->dev, - "Adding slave dev %s to tx hash array[%d]\n", - slave->dev->name, new_arr->count); + slave_dbg(bond->dev, slave->dev, "Adding slave to tx hash array[%d]\n", + usable_slaves->count); - new_arr->arr[new_arr->count++] = slave; + usable_slaves->arr[usable_slaves->count++] = slave; } - old_arr = rtnl_dereference(bond->slave_arr); - rcu_assign_pointer(bond->slave_arr, new_arr); - if (old_arr) - kfree_rcu(old_arr, rcu); + bond_set_slave_arr(bond, usable_slaves, all_slaves); + return ret; out: if (ret != 0 && skipslave) { - int idx; - - /* Rare situation where caller has asked to skip a specific - * slave but allocation failed (most likely!). BTW this is - * only possible when the call is initiated from - * __bond_release_one(). In this situation; overwrite the - * skipslave entry in the array with the last entry from the - * array to avoid a situation where the xmit path may choose - * this to-be-skipped slave to send a packet out. - */ - old_arr = rtnl_dereference(bond->slave_arr); - for (idx = 0; old_arr != NULL && idx < old_arr->count; idx++) { - if (skipslave == old_arr->arr[idx]) { - old_arr->arr[idx] = - old_arr->arr[old_arr->count-1]; - old_arr->count--; - break; - } - } + bond_skip_slave(rtnl_dereference(bond->all_slaves), + skipslave); + bond_skip_slave(rtnl_dereference(bond->usable_slaves), + skipslave); } + kfree_rcu(all_slaves, rcu); + kfree_rcu(usable_slaves, rcu); + return ret; +} + +static struct slave *bond_xmit_3ad_xor_slave_get(struct bonding *bond, + struct sk_buff *skb, + struct bond_up_slave *slaves) +{ + struct slave *slave; + unsigned int count; + u32 hash; + + hash = bond_xmit_hash(bond, skb); + count = slaves ? READ_ONCE(slaves->count) : 0; + if (unlikely(!count)) + return NULL; + + slave = slaves->arr[hash % count]; + return slave; } /* Use this Xmit function for 3AD as well as XOR modes. The current @@ -4093,20 +4560,15 @@ struct net_device *dev) { struct bonding *bond = netdev_priv(dev); - struct slave *slave; struct bond_up_slave *slaves; - unsigned int count; + struct slave *slave; - slaves = rcu_dereference(bond->slave_arr); - count = slaves ? READ_ONCE(slaves->count) : 0; - if (likely(count)) { - slave = slaves->arr[bond_xmit_hash(bond, skb) % count]; - bond_dev_queue_xmit(bond, skb, slave->dev); - } else { - bond_tx_drop(dev, skb); - } + slaves = rcu_dereference(bond->usable_slaves); + slave = bond_xmit_3ad_xor_slave_get(bond, skb, slaves); + if (likely(slave)) + return bond_dev_queue_xmit(bond, skb, slave->dev); - return NETDEV_TX_OK; + return bond_tx_drop(dev, skb); } /* in broadcast mode, we send everything to all usable interfaces. */ @@ -4116,27 +4578,39 @@ struct bonding *bond = netdev_priv(bond_dev); struct slave *slave = NULL; struct list_head *iter; + bool xmit_suc = false; + bool skb_used = false; bond_for_each_slave_rcu(bond, slave, iter) { - if (bond_is_last_slave(bond, slave)) - break; - if (bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) { - struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); + struct sk_buff *skb2; + if (!(bond_slave_is_up(slave) && slave->link == BOND_LINK_UP)) + continue; + + if (bond_is_last_slave(bond, slave)) { + skb2 = skb; + skb_used = true; + } else { + skb2 = skb_clone(skb, GFP_ATOMIC); if (!skb2) { net_err_ratelimited("%s: Error: %s: skb_clone() failed\n", bond_dev->name, __func__); continue; } - bond_dev_queue_xmit(bond, skb2, slave->dev); } - } - if (slave && bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) - bond_dev_queue_xmit(bond, skb, slave->dev); - else - bond_tx_drop(bond_dev, skb); - return NETDEV_TX_OK; + if (bond_dev_queue_xmit(bond, skb2, slave->dev) == NETDEV_TX_OK) + xmit_suc = true; + } + + if (!skb_used) + dev_kfree_skb_any(skb); + + if (xmit_suc) + return NETDEV_TX_OK; + + atomic_long_inc(&bond_dev->tx_dropped); + return NET_XMIT_DROP; } /*------------------------- Device initialization ---------------------------*/ @@ -4169,8 +4643,7 @@ static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { /* This helper function exists to help dev_pick_tx get the correct * destination queue. Using a helper function skips a call to @@ -4188,6 +4661,48 @@ } while (txq >= dev->real_num_tx_queues); } return txq; +} + +static struct net_device *bond_xmit_get_slave(struct net_device *master_dev, + struct sk_buff *skb, + bool all_slaves) +{ + struct bonding *bond = netdev_priv(master_dev); + struct bond_up_slave *slaves; + struct slave *slave = NULL; + + switch (BOND_MODE(bond)) { + case BOND_MODE_ROUNDROBIN: + slave = bond_xmit_roundrobin_slave_get(bond, skb); + break; + case BOND_MODE_ACTIVEBACKUP: + slave = bond_xmit_activebackup_slave_get(bond, skb); + break; + case BOND_MODE_8023AD: + case BOND_MODE_XOR: + if (all_slaves) + slaves = rcu_dereference(bond->all_slaves); + else + slaves = rcu_dereference(bond->usable_slaves); + slave = bond_xmit_3ad_xor_slave_get(bond, skb, slaves); + break; + case BOND_MODE_BROADCAST: + break; + case BOND_MODE_ALB: + slave = bond_xmit_alb_slave_get(bond, skb); + break; + case BOND_MODE_TLB: + slave = bond_xmit_tlb_slave_get(bond, skb); + break; + default: + /* Should never happen, mode already checked */ + WARN_ONCE(true, "Unknown bonding mode"); + break; + } + + if (slave) + return slave->dev; + return NULL; } static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev) @@ -4216,8 +4731,7 @@ /* Should never happen, mode already checked */ netdev_err(dev, "Unknown bonding mode %d\n", BOND_MODE(bond)); WARN_ON_ONCE(1); - bond_tx_drop(dev, skb); - return NETDEV_TX_OK; + return bond_tx_drop(dev, skb); } } @@ -4236,7 +4750,7 @@ if (bond_has_slaves(bond)) ret = __bond_start_xmit(skb, dev); else - bond_tx_drop(dev, skb); + ret = bond_tx_drop(dev, skb); rcu_read_unlock(); return ret; @@ -4291,7 +4805,6 @@ struct ethtool_drvinfo *drvinfo) { strlcpy(drvinfo->driver, DRV_NAME, sizeof(drvinfo->driver)); - strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version)); snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), "%d", BOND_ABI_VERSION); } @@ -4318,7 +4831,6 @@ .ndo_neigh_setup = bond_neigh_setup, .ndo_vlan_rx_add_vid = bond_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = bond_vlan_rx_kill_vid, - .ndo_get_lock_subclass = bond_get_nest_level, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_netpoll_setup = bond_netpoll_setup, .ndo_netpoll_cleanup = bond_netpoll_cleanup, @@ -4328,6 +4840,7 @@ .ndo_del_slave = bond_release, .ndo_fix_features = bond_fix_features, .ndo_features_check = passthru_features_check, + .ndo_get_xmit_slave = bond_xmit_get_slave, }; static const struct device_type bond_type = { @@ -4346,7 +4859,6 @@ struct bonding *bond = netdev_priv(bond_dev); spin_lock_init(&bond->mode_lock); - spin_lock_init(&bond->stats_lock); bond->params = bonding_defaults; /* Initialize pointers */ @@ -4368,6 +4880,13 @@ bond_dev->priv_flags |= IFF_BONDING | IFF_UNICAST_FLT | IFF_NO_QUEUE; bond_dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING); +#ifdef CONFIG_XFRM_OFFLOAD + /* set up xfrm device ops (only supported in active-backup right now) */ + bond_dev->xfrmdev_ops = &bond_xfrmdev_ops; + INIT_LIST_HEAD(&bond->ipsec_list); + spin_lock_init(&bond->ipsec_lock); +#endif /* CONFIG_XFRM_OFFLOAD */ + /* don't acquire bond device's netif_tx_lock when transmitting */ bond_dev->features |= NETIF_F_LLTX; @@ -4386,8 +4905,16 @@ NETIF_F_HW_VLAN_CTAG_FILTER; bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL | NETIF_F_GSO_UDP_L4; +#ifdef CONFIG_XFRM_OFFLOAD + bond_dev->hw_features |= BOND_XFRM_FEATURES; +#endif /* CONFIG_XFRM_OFFLOAD */ bond_dev->features |= bond_dev->hw_features; bond_dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; +#ifdef CONFIG_XFRM_OFFLOAD + /* Disable XFRM features if this isn't an active-backup config */ + if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) + bond_dev->features &= ~BOND_XFRM_FEATURES; +#endif /* CONFIG_XFRM_OFFLOAD */ } /* Destroy a bonding device. @@ -4396,9 +4923,9 @@ static void bond_uninit(struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); + struct bond_up_slave *usable, *all; struct list_head *iter; struct slave *slave; - struct bond_up_slave *arr; bond_netpoll_cleanup(bond_dev); @@ -4407,10 +4934,16 @@ __bond_release_one(bond_dev, slave->dev, true, true); netdev_info(bond_dev, "Released all slaves\n"); - arr = rtnl_dereference(bond->slave_arr); - if (arr) { - RCU_INIT_POINTER(bond->slave_arr, NULL); - kfree_rcu(arr, rcu); + usable = rtnl_dereference(bond->usable_slaves); + if (usable) { + RCU_INIT_POINTER(bond->usable_slaves, NULL); + kfree_rcu(usable, rcu); + } + + all = rtnl_dereference(bond->all_slaves); + if (all) { + RCU_INIT_POINTER(bond->all_slaves, NULL); + kfree_rcu(all, rcu); } list_del(&bond->bond_list); @@ -4769,6 +5302,7 @@ params->arp_all_targets = arp_all_targets_value; params->updelay = updelay; params->downdelay = downdelay; + params->peer_notif_delay = 0; params->use_carrier = use_carrier; params->lacp_fast = lacp_fast; params->primary[0] = 0; @@ -4817,7 +5351,7 @@ if (!bond->wq) return -ENOMEM; - bond->nest_level = SINGLE_DEPTH_NESTING; + spin_lock_init(&bond->stats_lock); netdev_lockdep_set_classes(bond_dev); list_add_tail(&bond->bond_list, &bn->dev_list); @@ -4932,8 +5466,6 @@ int i; int res; - pr_info("%s", bond_version); - res = bond_check_params(&bonding_defaults); if (res) goto out; @@ -4953,6 +5485,10 @@ if (res) goto err; } + + skb_flow_dissector_init(&flow_keys_bonding, + flow_keys_bonding_keys, + ARRAY_SIZE(flow_keys_bonding_keys)); register_netdevice_notifier(&bond_netdev_notifier); out: @@ -4984,6 +5520,5 @@ module_init(bonding_init); module_exit(bonding_exit); MODULE_LICENSE("GPL"); -MODULE_VERSION(DRV_VERSION); -MODULE_DESCRIPTION(DRV_DESCRIPTION ", v" DRV_VERSION); +MODULE_DESCRIPTION(DRV_DESCRIPTION); MODULE_AUTHOR("Thomas Davis, tadavis@lbl.gov and many others"); -- Gitblit v1.6.2