From 102a0743326a03cd1a1202ceda21e175b7d3575c Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Tue, 20 Feb 2024 01:20:52 +0000
Subject: [PATCH] add new system file
---
kernel/drivers/net/bonding/bond_main.c | 1359 +++++++++++++++++++++++++++++++++++++++++-----------------
1 files changed, 958 insertions(+), 401 deletions(-)
diff --git a/kernel/drivers/net/bonding/bond_main.c b/kernel/drivers/net/bonding/bond_main.c
index 2d70cdd..bcb0191 100644
--- a/kernel/drivers/net/bonding/bond_main.c
+++ b/kernel/drivers/net/bonding/bond_main.c
@@ -41,6 +41,8 @@
#include <linux/in.h>
#include <net/ip.h>
#include <linux/ip.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/slab.h>
@@ -77,7 +79,7 @@
#include <net/pkt_sched.h>
#include <linux/rculist.h>
#include <net/flow_dissector.h>
-#include <net/switchdev.h>
+#include <net/xfrm.h>
#include <net/bonding.h>
#include <net/bond_3ad.h>
#include <net/bond_alb.h>
@@ -201,6 +203,51 @@
unsigned int bond_net_id __read_mostly;
+static const struct flow_dissector_key flow_keys_bonding_keys[] = {
+ {
+ .key_id = FLOW_DISSECTOR_KEY_CONTROL,
+ .offset = offsetof(struct flow_keys, control),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_BASIC,
+ .offset = offsetof(struct flow_keys, basic),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ .offset = offsetof(struct flow_keys, addrs.v4addrs),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+ .offset = offsetof(struct flow_keys, addrs.v6addrs),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_TIPC,
+ .offset = offsetof(struct flow_keys, addrs.tipckey),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_PORTS,
+ .offset = offsetof(struct flow_keys, ports),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_ICMP,
+ .offset = offsetof(struct flow_keys, icmp),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_VLAN,
+ .offset = offsetof(struct flow_keys, vlan),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_FLOW_LABEL,
+ .offset = offsetof(struct flow_keys, tags),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_GRE_KEYID,
+ .offset = offsetof(struct flow_keys, keyid),
+ },
+};
+
+static struct flow_dissector flow_keys_bonding __read_mostly;
+
/*-------------------------- Forward declarations ---------------------------*/
static int bond_init(struct net_device *bond_dev);
@@ -232,8 +279,6 @@
return names[mode];
}
-/*---------------------------------- VLAN -----------------------------------*/
-
/**
* bond_dev_queue_xmit - Prepare skb for xmit.
*
@@ -241,7 +286,7 @@
* @skb: hw accel VLAN tagged skb to transmit
* @slave_dev: slave that is supposed to xmit this skbuff
*/
-void bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,
+netdev_tx_t bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,
struct net_device *slave_dev)
{
skb->dev = slave_dev;
@@ -251,10 +296,12 @@
skb_set_queue_mapping(skb, qdisc_skb_cb(skb)->slave_dev_queue_mapping);
if (unlikely(netpoll_tx_running(bond->dev)))
- bond_netpoll_send_skb(bond_get_slave_by_dev(bond, slave_dev), skb);
- else
- dev_queue_xmit(skb);
+ return bond_netpoll_send_skb(bond_get_slave_by_dev(bond, slave_dev), skb);
+
+ return dev_queue_xmit(skb);
}
+
+/*---------------------------------- VLAN -----------------------------------*/
/* In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid,
* We don't protect the slave list iteration with a lock because:
@@ -275,6 +322,7 @@
/**
* bond_vlan_rx_add_vid - Propagates adding an id to slaves
* @bond_dev: bonding net device that got called
+ * @proto: network protocol ID
* @vid: vlan id being added
*/
static int bond_vlan_rx_add_vid(struct net_device *bond_dev,
@@ -308,6 +356,7 @@
/**
* bond_vlan_rx_kill_vid - Propagates deleting an id to slaves
* @bond_dev: bonding net device that got called
+ * @proto: network protocol ID
* @vid: vlan id being removed
*/
static int bond_vlan_rx_kill_vid(struct net_device *bond_dev,
@@ -325,6 +374,225 @@
return 0;
}
+
+/*---------------------------------- XFRM -----------------------------------*/
+
+#ifdef CONFIG_XFRM_OFFLOAD
+/**
+ * bond_ipsec_add_sa - program device with a security association
+ * @xs: pointer to transformer state struct
+ **/
+static int bond_ipsec_add_sa(struct xfrm_state *xs)
+{
+ struct net_device *bond_dev = xs->xso.dev;
+ struct bond_ipsec *ipsec;
+ struct bonding *bond;
+ struct slave *slave;
+ int err;
+
+ if (!bond_dev)
+ return -EINVAL;
+
+ rcu_read_lock();
+ bond = netdev_priv(bond_dev);
+ slave = rcu_dereference(bond->curr_active_slave);
+ if (!slave) {
+ rcu_read_unlock();
+ return -ENODEV;
+ }
+
+ if (!slave->dev->xfrmdev_ops ||
+ !slave->dev->xfrmdev_ops->xdo_dev_state_add ||
+ netif_is_bond_master(slave->dev)) {
+ slave_warn(bond_dev, slave->dev, "Slave does not support ipsec offload\n");
+ rcu_read_unlock();
+ return -EINVAL;
+ }
+
+ ipsec = kmalloc(sizeof(*ipsec), GFP_ATOMIC);
+ if (!ipsec) {
+ rcu_read_unlock();
+ return -ENOMEM;
+ }
+ xs->xso.real_dev = slave->dev;
+
+ err = slave->dev->xfrmdev_ops->xdo_dev_state_add(xs);
+ if (!err) {
+ ipsec->xs = xs;
+ INIT_LIST_HEAD(&ipsec->list);
+ spin_lock_bh(&bond->ipsec_lock);
+ list_add(&ipsec->list, &bond->ipsec_list);
+ spin_unlock_bh(&bond->ipsec_lock);
+ } else {
+ kfree(ipsec);
+ }
+ rcu_read_unlock();
+ return err;
+}
+
+static void bond_ipsec_add_sa_all(struct bonding *bond)
+{
+ struct net_device *bond_dev = bond->dev;
+ struct bond_ipsec *ipsec;
+ struct slave *slave;
+
+ rcu_read_lock();
+ slave = rcu_dereference(bond->curr_active_slave);
+ if (!slave)
+ goto out;
+
+ if (!slave->dev->xfrmdev_ops ||
+ !slave->dev->xfrmdev_ops->xdo_dev_state_add ||
+ netif_is_bond_master(slave->dev)) {
+ spin_lock_bh(&bond->ipsec_lock);
+ if (!list_empty(&bond->ipsec_list))
+ slave_warn(bond_dev, slave->dev,
+ "%s: no slave xdo_dev_state_add\n",
+ __func__);
+ spin_unlock_bh(&bond->ipsec_lock);
+ goto out;
+ }
+
+ spin_lock_bh(&bond->ipsec_lock);
+ list_for_each_entry(ipsec, &bond->ipsec_list, list) {
+ ipsec->xs->xso.real_dev = slave->dev;
+ if (slave->dev->xfrmdev_ops->xdo_dev_state_add(ipsec->xs)) {
+ slave_warn(bond_dev, slave->dev, "%s: failed to add SA\n", __func__);
+ ipsec->xs->xso.real_dev = NULL;
+ }
+ }
+ spin_unlock_bh(&bond->ipsec_lock);
+out:
+ rcu_read_unlock();
+}
+
+/**
+ * bond_ipsec_del_sa - clear out this specific SA
+ * @xs: pointer to transformer state struct
+ **/
+static void bond_ipsec_del_sa(struct xfrm_state *xs)
+{
+ struct net_device *bond_dev = xs->xso.dev;
+ struct bond_ipsec *ipsec;
+ struct bonding *bond;
+ struct slave *slave;
+
+ if (!bond_dev)
+ return;
+
+ rcu_read_lock();
+ bond = netdev_priv(bond_dev);
+ slave = rcu_dereference(bond->curr_active_slave);
+
+ if (!slave)
+ goto out;
+
+ if (!xs->xso.real_dev)
+ goto out;
+
+ WARN_ON(xs->xso.real_dev != slave->dev);
+
+ if (!slave->dev->xfrmdev_ops ||
+ !slave->dev->xfrmdev_ops->xdo_dev_state_delete ||
+ netif_is_bond_master(slave->dev)) {
+ slave_warn(bond_dev, slave->dev, "%s: no slave xdo_dev_state_delete\n", __func__);
+ goto out;
+ }
+
+ slave->dev->xfrmdev_ops->xdo_dev_state_delete(xs);
+out:
+ spin_lock_bh(&bond->ipsec_lock);
+ list_for_each_entry(ipsec, &bond->ipsec_list, list) {
+ if (ipsec->xs == xs) {
+ list_del(&ipsec->list);
+ kfree(ipsec);
+ break;
+ }
+ }
+ spin_unlock_bh(&bond->ipsec_lock);
+ rcu_read_unlock();
+}
+
+static void bond_ipsec_del_sa_all(struct bonding *bond)
+{
+ struct net_device *bond_dev = bond->dev;
+ struct bond_ipsec *ipsec;
+ struct slave *slave;
+
+ rcu_read_lock();
+ slave = rcu_dereference(bond->curr_active_slave);
+ if (!slave) {
+ rcu_read_unlock();
+ return;
+ }
+
+ spin_lock_bh(&bond->ipsec_lock);
+ list_for_each_entry(ipsec, &bond->ipsec_list, list) {
+ if (!ipsec->xs->xso.real_dev)
+ continue;
+
+ if (!slave->dev->xfrmdev_ops ||
+ !slave->dev->xfrmdev_ops->xdo_dev_state_delete ||
+ netif_is_bond_master(slave->dev)) {
+ slave_warn(bond_dev, slave->dev,
+ "%s: no slave xdo_dev_state_delete\n",
+ __func__);
+ } else {
+ slave->dev->xfrmdev_ops->xdo_dev_state_delete(ipsec->xs);
+ }
+ ipsec->xs->xso.real_dev = NULL;
+ }
+ spin_unlock_bh(&bond->ipsec_lock);
+ rcu_read_unlock();
+}
+
+/**
+ * bond_ipsec_offload_ok - can this packet use the xfrm hw offload
+ * @skb: current data packet
+ * @xs: pointer to transformer state struct
+ **/
+static bool bond_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs)
+{
+ struct net_device *bond_dev = xs->xso.dev;
+ struct net_device *real_dev;
+ struct slave *curr_active;
+ struct bonding *bond;
+ int err;
+
+ bond = netdev_priv(bond_dev);
+ rcu_read_lock();
+ curr_active = rcu_dereference(bond->curr_active_slave);
+ real_dev = curr_active->dev;
+
+ if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
+ err = false;
+ goto out;
+ }
+
+ if (!xs->xso.real_dev) {
+ err = false;
+ goto out;
+ }
+
+ if (!real_dev->xfrmdev_ops ||
+ !real_dev->xfrmdev_ops->xdo_dev_offload_ok ||
+ netif_is_bond_master(real_dev)) {
+ err = false;
+ goto out;
+ }
+
+ err = real_dev->xfrmdev_ops->xdo_dev_offload_ok(skb, xs);
+out:
+ rcu_read_unlock();
+ return err;
+}
+
+static const struct xfrmdev_ops bond_xfrmdev_ops = {
+ .xdo_dev_state_add = bond_ipsec_add_sa,
+ .xdo_dev_state_delete = bond_ipsec_del_sa,
+ .xdo_dev_offload_ok = bond_ipsec_offload_ok,
+};
+#endif /* CONFIG_XFRM_OFFLOAD */
/*------------------------------- Link status -------------------------------*/
@@ -559,12 +827,8 @@
dev_uc_unsync(slave_dev, bond_dev);
dev_mc_unsync(slave_dev, bond_dev);
- if (BOND_MODE(bond) == BOND_MODE_8023AD) {
- /* del lacpdu mc addr from mc list */
- u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
-
- dev_mc_del(slave_dev, lacpdu_multicast);
- }
+ if (BOND_MODE(bond) == BOND_MODE_8023AD)
+ dev_mc_del(slave_dev, lacpdu_mcast_addr);
}
/*--------------------------- Active slave change ---------------------------*/
@@ -584,7 +848,8 @@
if (bond->dev->flags & IFF_ALLMULTI)
dev_set_allmulti(old_active->dev, -1);
- bond_hw_addr_flush(bond->dev, old_active->dev);
+ if (bond->dev->flags & IFF_UP)
+ bond_hw_addr_flush(bond->dev, old_active->dev);
}
if (new_active) {
@@ -595,10 +860,12 @@
if (bond->dev->flags & IFF_ALLMULTI)
dev_set_allmulti(new_active->dev, 1);
- netif_addr_lock_bh(bond->dev);
- dev_uc_sync(new_active->dev, bond->dev);
- dev_mc_sync(new_active->dev, bond->dev);
- netif_addr_unlock_bh(bond->dev);
+ if (bond->dev->flags & IFF_UP) {
+ netif_addr_lock_bh(bond->dev);
+ dev_uc_sync(new_active->dev, bond->dev);
+ dev_mc_sync(new_active->dev, bond->dev);
+ netif_addr_unlock_bh(bond->dev);
+ }
}
}
@@ -609,14 +876,21 @@
*
* Should be called with RTNL held.
*/
-static void bond_set_dev_addr(struct net_device *bond_dev,
- struct net_device *slave_dev)
+static int bond_set_dev_addr(struct net_device *bond_dev,
+ struct net_device *slave_dev)
{
- netdev_dbg(bond_dev, "bond_dev=%p slave_dev=%p slave_dev->name=%s slave_dev->addr_len=%d\n",
- bond_dev, slave_dev, slave_dev->name, slave_dev->addr_len);
+ int err;
+
+ slave_dbg(bond_dev, slave_dev, "bond_dev=%p slave_dev=%p slave_dev->addr_len=%d\n",
+ bond_dev, slave_dev, slave_dev->addr_len);
+ err = dev_pre_changeaddr_notify(bond_dev, slave_dev->dev_addr, NULL);
+ if (err)
+ return err;
+
memcpy(bond_dev->dev_addr, slave_dev->dev_addr, slave_dev->addr_len);
bond_dev->addr_assign_type = NET_ADDR_STOLEN;
call_netdevice_notifiers(NETDEV_CHANGEADDR, bond_dev);
+ return 0;
}
static struct slave *bond_get_old_active(struct bonding *bond,
@@ -652,8 +926,12 @@
switch (bond->params.fail_over_mac) {
case BOND_FOM_ACTIVE:
- if (new_active)
- bond_set_dev_addr(bond->dev, new_active->dev);
+ if (new_active) {
+ rv = bond_set_dev_addr(bond->dev, new_active->dev);
+ if (rv)
+ slave_err(bond->dev, new_active->dev, "Error %d setting bond MAC from slave\n",
+ -rv);
+ }
break;
case BOND_FOM_FOLLOW:
/* if new_active && old_active, swap them
@@ -680,10 +958,10 @@
}
rv = dev_set_mac_address(new_active->dev,
- (struct sockaddr *)&ss);
+ (struct sockaddr *)&ss, NULL);
if (rv) {
- netdev_err(bond->dev, "Error %d setting MAC of slave %s\n",
- -rv, new_active->dev->name);
+ slave_err(bond->dev, new_active->dev, "Error %d setting MAC of new active slave\n",
+ -rv);
goto out;
}
@@ -695,10 +973,10 @@
ss.ss_family = old_active->dev->type;
rv = dev_set_mac_address(old_active->dev,
- (struct sockaddr *)&ss);
+ (struct sockaddr *)&ss, NULL);
if (rv)
- netdev_err(bond->dev, "Error %d setting MAC of slave %s\n",
- -rv, new_active->dev->name);
+ slave_err(bond->dev, old_active->dev, "Error %d setting MAC of old active slave\n",
+ -rv);
out:
break;
default:
@@ -783,6 +1061,8 @@
rcu_read_unlock();
if (!slave || !bond->send_peer_notif ||
+ bond->send_peer_notif %
+ max(1, bond->params.peer_notif_delay) != 0 ||
!netif_carrier_ok(bond->dev) ||
test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state))
return false;
@@ -796,7 +1076,7 @@
/**
* change_active_interface - change the active slave into the specified one
* @bond: our bonding struct
- * @new: the new slave to make the active one
+ * @new_active: the new slave to make the active one
*
* Set the new slave to the bond's settings and unset them on the old
* curr_active_slave.
@@ -819,14 +1099,17 @@
if (old_active == new_active)
return;
+#ifdef CONFIG_XFRM_OFFLOAD
+ bond_ipsec_del_sa_all(bond);
+#endif /* CONFIG_XFRM_OFFLOAD */
+
if (new_active) {
new_active->last_link_up = jiffies;
if (new_active->link == BOND_LINK_BACK) {
if (bond_uses_primary(bond)) {
- netdev_info(bond->dev, "making interface %s the new active one %d ms earlier\n",
- new_active->dev->name,
- (bond->params.updelay - new_active->delay) * bond->params.miimon);
+ slave_info(bond->dev, new_active->dev, "making interface the new active one %d ms earlier\n",
+ (bond->params.updelay - new_active->delay) * bond->params.miimon);
}
new_active->delay = 0;
@@ -840,8 +1123,7 @@
bond_alb_handle_link_change(bond, new_active, BOND_LINK_UP);
} else {
if (bond_uses_primary(bond)) {
- netdev_info(bond->dev, "making interface %s the new active one\n",
- new_active->dev->name);
+ slave_info(bond->dev, new_active->dev, "making interface the new active one\n");
}
}
}
@@ -878,17 +1160,24 @@
if (netif_running(bond->dev)) {
bond->send_peer_notif =
- bond->params.num_peer_notif;
+ bond->params.num_peer_notif *
+ max(1, bond->params.peer_notif_delay);
should_notify_peers =
bond_should_notify_peers(bond);
}
call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, bond->dev);
- if (should_notify_peers)
+ if (should_notify_peers) {
+ bond->send_peer_notif--;
call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
bond->dev);
+ }
}
}
+
+#ifdef CONFIG_XFRM_OFFLOAD
+ bond_ipsec_add_sa_all(bond);
+#endif /* CONFIG_XFRM_OFFLOAD */
/* resend IGMP joins since active slave has changed or
* all were sent on curr_active_slave.
@@ -929,7 +1218,7 @@
return;
if (netif_carrier_ok(bond->dev))
- netdev_info(bond->dev, "first active interface up!\n");
+ netdev_info(bond->dev, "active interface up!\n");
else
netdev_info(bond->dev, "now running without any active interface!\n");
}
@@ -963,7 +1252,8 @@
return;
slave->np = NULL;
- __netpoll_free_async(np);
+
+ __netpoll_free(np);
}
static void bond_poll_controller(struct net_device *bond_dev)
@@ -1066,12 +1356,20 @@
#define BOND_ENC_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \
NETIF_F_RXCSUM | NETIF_F_ALL_TSO)
+#define BOND_MPLS_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \
+ NETIF_F_ALL_TSO)
+
+
static void bond_compute_features(struct bonding *bond)
{
unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE |
IFF_XMIT_DST_RELEASE_PERM;
netdev_features_t vlan_features = BOND_VLAN_FEATURES;
netdev_features_t enc_features = BOND_ENC_FEATURES;
+#ifdef CONFIG_XFRM_OFFLOAD
+ netdev_features_t xfrm_features = BOND_XFRM_FEATURES;
+#endif /* CONFIG_XFRM_OFFLOAD */
+ netdev_features_t mpls_features = BOND_MPLS_FEATURES;
struct net_device *bond_dev = bond->dev;
struct list_head *iter;
struct slave *slave;
@@ -1082,6 +1380,7 @@
if (!bond_has_slaves(bond))
goto done;
vlan_features &= NETIF_F_ALL_FOR_ALL;
+ mpls_features &= NETIF_F_ALL_FOR_ALL;
bond_for_each_slave(bond, slave, iter) {
vlan_features = netdev_increment_features(vlan_features,
@@ -1090,6 +1389,17 @@
enc_features = netdev_increment_features(enc_features,
slave->dev->hw_enc_features,
BOND_ENC_FEATURES);
+
+#ifdef CONFIG_XFRM_OFFLOAD
+ xfrm_features = netdev_increment_features(xfrm_features,
+ slave->dev->hw_enc_features,
+ BOND_XFRM_FEATURES);
+#endif /* CONFIG_XFRM_OFFLOAD */
+
+ mpls_features = netdev_increment_features(mpls_features,
+ slave->dev->mpls_features,
+ BOND_MPLS_FEATURES);
+
dst_release_flag &= slave->dev->priv_flags;
if (slave->dev->hard_header_len > max_hard_header_len)
max_hard_header_len = slave->dev->hard_header_len;
@@ -1105,6 +1415,10 @@
NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_STAG_TX |
NETIF_F_GSO_UDP_L4;
+#ifdef CONFIG_XFRM_OFFLOAD
+ bond_dev->hw_enc_features |= xfrm_features;
+#endif /* CONFIG_XFRM_OFFLOAD */
+ bond_dev->mpls_features = mpls_features;
bond_dev->gso_max_segs = gso_max_segs;
netif_set_gso_max_size(bond_dev, gso_max_size);
@@ -1128,6 +1442,11 @@
memcpy(bond_dev->broadcast, slave_dev->broadcast,
slave_dev->addr_len);
+
+ if (slave_dev->flags & IFF_POINTOPOINT) {
+ bond_dev->flags &= ~(IFF_BROADCAST | IFF_MULTICAST);
+ bond_dev->flags |= (IFF_POINTOPOINT | IFF_NOARP);
+ }
}
/* On bonding slaves other than the currently active slave, suppress
@@ -1194,7 +1513,7 @@
skb->dev = bond->dev;
if (BOND_MODE(bond) == BOND_MODE_ALB &&
- bond->dev->priv_flags & IFF_BRIDGE_PORT &&
+ netif_is_bridge_port(bond->dev) &&
skb->pkt_type == PACKET_HOST) {
if (unlikely(skb_cow_head(skb,
@@ -1388,15 +1707,14 @@
if (!bond->params.use_carrier &&
slave_dev->ethtool_ops->get_link == NULL &&
slave_ops->ndo_do_ioctl == NULL) {
- netdev_warn(bond_dev, "no link monitoring support for %s\n",
- slave_dev->name);
+ slave_warn(bond_dev, slave_dev, "no link monitoring support\n");
}
/* already in-use? */
if (netdev_is_rx_handler_busy(slave_dev)) {
NL_SET_ERR_MSG(extack, "Device is in use and cannot be enslaved");
- netdev_err(bond_dev,
- "Error: Device is in use and cannot be enslaved\n");
+ slave_err(bond_dev, slave_dev,
+ "Error: Device is in use and cannot be enslaved\n");
return -EBUSY;
}
@@ -1409,22 +1727,20 @@
/* vlan challenged mutual exclusion */
/* no need to lock since we're protected by rtnl_lock */
if (slave_dev->features & NETIF_F_VLAN_CHALLENGED) {
- netdev_dbg(bond_dev, "%s is NETIF_F_VLAN_CHALLENGED\n",
- slave_dev->name);
+ slave_dbg(bond_dev, slave_dev, "is NETIF_F_VLAN_CHALLENGED\n");
if (vlan_uses_dev(bond_dev)) {
NL_SET_ERR_MSG(extack, "Can not enslave VLAN challenged device to VLAN enabled bond");
- netdev_err(bond_dev, "Error: cannot enslave VLAN challenged slave %s on VLAN enabled bond %s\n",
- slave_dev->name, bond_dev->name);
+ slave_err(bond_dev, slave_dev, "Error: cannot enslave VLAN challenged slave on VLAN enabled bond\n");
return -EPERM;
} else {
- netdev_warn(bond_dev, "enslaved VLAN challenged slave %s. Adding VLANs will be blocked as long as %s is part of bond %s\n",
- slave_dev->name, slave_dev->name,
- bond_dev->name);
+ slave_warn(bond_dev, slave_dev, "enslaved VLAN challenged slave. Adding VLANs will be blocked as long as it is part of bond.\n");
}
} else {
- netdev_dbg(bond_dev, "%s is !NETIF_F_VLAN_CHALLENGED\n",
- slave_dev->name);
+ slave_dbg(bond_dev, slave_dev, "is !NETIF_F_VLAN_CHALLENGED\n");
}
+
+ if (slave_dev->features & NETIF_F_HW_ESP)
+ slave_dbg(bond_dev, slave_dev, "is esp-hw-offload capable\n");
/* Old ifenslave binaries are no longer supported. These can
* be identified with moderate accuracy by the state of the slave:
@@ -1433,8 +1749,7 @@
*/
if (slave_dev->flags & IFF_UP) {
NL_SET_ERR_MSG(extack, "Device can not be enslaved while up");
- netdev_err(bond_dev, "%s is up - this may be due to an out of date ifenslave\n",
- slave_dev->name);
+ slave_err(bond_dev, slave_dev, "slave is up - this may be due to an out of date ifenslave\n");
return -EPERM;
}
@@ -1447,14 +1762,14 @@
*/
if (!bond_has_slaves(bond)) {
if (bond_dev->type != slave_dev->type) {
- netdev_dbg(bond_dev, "change device type from %d to %d\n",
- bond_dev->type, slave_dev->type);
+ slave_dbg(bond_dev, slave_dev, "change device type from %d to %d\n",
+ bond_dev->type, slave_dev->type);
res = call_netdevice_notifiers(NETDEV_PRE_TYPE_CHANGE,
bond_dev);
res = notifier_to_errno(res);
if (res) {
- netdev_err(bond_dev, "refused to change device type\n");
+ slave_err(bond_dev, slave_dev, "refused to change device type\n");
return -EBUSY;
}
@@ -1474,31 +1789,31 @@
}
} else if (bond_dev->type != slave_dev->type) {
NL_SET_ERR_MSG(extack, "Device type is different from other slaves");
- netdev_err(bond_dev, "%s ether type (%d) is different from other slaves (%d), can not enslave it\n",
- slave_dev->name, slave_dev->type, bond_dev->type);
+ slave_err(bond_dev, slave_dev, "ether type (%d) is different from other slaves (%d), can not enslave it\n",
+ slave_dev->type, bond_dev->type);
return -EINVAL;
}
if (slave_dev->type == ARPHRD_INFINIBAND &&
BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
NL_SET_ERR_MSG(extack, "Only active-backup mode is supported for infiniband slaves");
- netdev_warn(bond_dev, "Type (%d) supports only active-backup mode\n",
- slave_dev->type);
+ slave_warn(bond_dev, slave_dev, "Type (%d) supports only active-backup mode\n",
+ slave_dev->type);
res = -EOPNOTSUPP;
goto err_undo_flags;
}
if (!slave_ops->ndo_set_mac_address ||
slave_dev->type == ARPHRD_INFINIBAND) {
- netdev_warn(bond_dev, "The slave device specified does not support setting the MAC address\n");
+ slave_warn(bond_dev, slave_dev, "The slave device specified does not support setting the MAC address\n");
if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP &&
bond->params.fail_over_mac != BOND_FOM_ACTIVE) {
if (!bond_has_slaves(bond)) {
bond->params.fail_over_mac = BOND_FOM_ACTIVE;
- netdev_warn(bond_dev, "Setting fail_over_mac to active for active-backup mode\n");
+ slave_warn(bond_dev, slave_dev, "Setting fail_over_mac to active for active-backup mode\n");
} else {
NL_SET_ERR_MSG(extack, "Slave device does not support setting the MAC address, but fail_over_mac is not set to active");
- netdev_err(bond_dev, "The slave device specified does not support setting the MAC address, but fail_over_mac is not set to active\n");
+ slave_err(bond_dev, slave_dev, "The slave device specified does not support setting the MAC address, but fail_over_mac is not set to active\n");
res = -EOPNOTSUPP;
goto err_undo_flags;
}
@@ -1511,8 +1826,11 @@
* address to be the same as the slave's.
*/
if (!bond_has_slaves(bond) &&
- bond->dev->addr_assign_type == NET_ADDR_RANDOM)
- bond_set_dev_addr(bond->dev, slave_dev);
+ bond->dev->addr_assign_type == NET_ADDR_RANDOM) {
+ res = bond_set_dev_addr(bond->dev, slave_dev);
+ if (res)
+ goto err_undo_flags;
+ }
new_slave = bond_alloc_slave(bond, slave_dev);
if (!new_slave) {
@@ -1529,7 +1847,7 @@
new_slave->original_mtu = slave_dev->mtu;
res = dev_set_mtu(slave_dev, bond->dev->mtu);
if (res) {
- netdev_dbg(bond_dev, "Error %d calling dev_set_mtu\n", res);
+ slave_err(bond_dev, slave_dev, "Error %d calling dev_set_mtu\n", res);
goto err_free;
}
@@ -1547,9 +1865,10 @@
*/
memcpy(ss.__data, bond_dev->dev_addr, bond_dev->addr_len);
ss.ss_family = slave_dev->type;
- res = dev_set_mac_address(slave_dev, (struct sockaddr *)&ss);
+ res = dev_set_mac_address(slave_dev, (struct sockaddr *)&ss,
+ extack);
if (res) {
- netdev_dbg(bond_dev, "Error %d calling set_mac_address\n", res);
+ slave_err(bond_dev, slave_dev, "Error %d calling set_mac_address\n", res);
goto err_restore_mtu;
}
}
@@ -1558,9 +1877,9 @@
slave_dev->flags |= IFF_SLAVE;
/* open the slave since the application closed it */
- res = dev_open(slave_dev);
+ res = dev_open(slave_dev, extack);
if (res) {
- netdev_dbg(bond_dev, "Opening slave %s failed\n", slave_dev->name);
+ slave_err(bond_dev, slave_dev, "Opening slave failed\n");
goto err_restore_mac;
}
@@ -1579,8 +1898,7 @@
res = vlan_vids_add_by_dev(slave_dev, bond_dev);
if (res) {
- netdev_err(bond_dev, "Couldn't add bond vlan ids to %s\n",
- slave_dev->name);
+ slave_err(bond_dev, slave_dev, "Couldn't add bond vlan ids\n");
goto err_close;
}
@@ -1610,12 +1928,10 @@
* supported); thus, we don't need to change
* the messages for netif_carrier.
*/
- netdev_warn(bond_dev, "MII and ETHTOOL support not available for interface %s, and arp_interval/arp_ip_target module parameters not specified, thus bonding will not detect link failures! see bonding.txt for details\n",
- slave_dev->name);
+ slave_warn(bond_dev, slave_dev, "MII and ETHTOOL support not available for slave, and arp_interval/arp_ip_target module parameters not specified, thus bonding will not detect link failures! see bonding.txt for details\n");
} else if (link_reporting == -1) {
/* unable get link status using mii/ethtool */
- netdev_warn(bond_dev, "can't get link status from interface %s; the network driver associated with this interface does not support MII or ETHTOOL link status reporting, thus miimon has no effect on this interface\n",
- slave_dev->name);
+ slave_warn(bond_dev, slave_dev, "can't get link status from slave; the network driver associated with this interface does not support MII or ETHTOOL link status reporting, thus miimon has no effect on this interface\n");
}
}
@@ -1649,9 +1965,9 @@
if (new_slave->link != BOND_LINK_DOWN)
new_slave->last_link_up = jiffies;
- netdev_dbg(bond_dev, "Initial state of slave_dev is BOND_LINK_%s\n",
- new_slave->link == BOND_LINK_DOWN ? "DOWN" :
- (new_slave->link == BOND_LINK_UP ? "UP" : "BACK"));
+ slave_dbg(bond_dev, slave_dev, "Initial state of slave is BOND_LINK_%s\n",
+ new_slave->link == BOND_LINK_DOWN ? "DOWN" :
+ (new_slave->link == BOND_LINK_UP ? "UP" : "BACK"));
if (bond_uses_primary(bond) && bond->params.primary[0]) {
/* if there is a primary slave, remember it */
@@ -1692,7 +2008,7 @@
bond_set_slave_inactive_flags(new_slave, BOND_SLAVE_NOTIFY_NOW);
break;
default:
- netdev_dbg(bond_dev, "This slave is always active in trunk mode\n");
+ slave_dbg(bond_dev, slave_dev, "This slave is always active in trunk mode\n");
/* always active in trunk mode */
bond_set_active_slave(new_slave);
@@ -1711,7 +2027,7 @@
#ifdef CONFIG_NET_POLL_CONTROLLER
if (bond->dev->npinfo) {
if (slave_enable_netpoll(new_slave)) {
- netdev_info(bond_dev, "master_dev is using netpoll, but new slave device does not support netpoll\n");
+ slave_info(bond_dev, slave_dev, "master_dev is using netpoll, but new slave device does not support netpoll\n");
res = -EBUSY;
goto err_detach;
}
@@ -1724,23 +2040,21 @@
res = netdev_rx_handler_register(slave_dev, bond_handle_frame,
new_slave);
if (res) {
- netdev_dbg(bond_dev, "Error %d calling netdev_rx_handler_register\n", res);
+ slave_dbg(bond_dev, slave_dev, "Error %d calling netdev_rx_handler_register\n", res);
goto err_detach;
}
res = bond_master_upper_dev_link(bond, new_slave, extack);
if (res) {
- netdev_dbg(bond_dev, "Error %d calling bond_master_upper_dev_link\n", res);
+ slave_dbg(bond_dev, slave_dev, "Error %d calling bond_master_upper_dev_link\n", res);
goto err_unregister;
}
res = bond_sysfs_slave_add(new_slave);
if (res) {
- netdev_dbg(bond_dev, "Error %d calling bond_sysfs_slave_add\n", res);
+ slave_dbg(bond_dev, slave_dev, "Error %d calling bond_sysfs_slave_add\n", res);
goto err_upper_unlink;
}
-
- bond->nest_level = dev_get_nest_level(bond_dev) + 1;
/* If the mode uses primary, then the following is handled by
* bond_change_active_slave().
@@ -1763,16 +2077,14 @@
}
}
- netif_addr_lock_bh(bond_dev);
- dev_mc_sync_multiple(slave_dev, bond_dev);
- dev_uc_sync_multiple(slave_dev, bond_dev);
- netif_addr_unlock_bh(bond_dev);
+ if (bond_dev->flags & IFF_UP) {
+ netif_addr_lock_bh(bond_dev);
+ dev_mc_sync_multiple(slave_dev, bond_dev);
+ dev_uc_sync_multiple(slave_dev, bond_dev);
+ netif_addr_unlock_bh(bond_dev);
- if (BOND_MODE(bond) == BOND_MODE_8023AD) {
- /* add lacpdu mc addr to mc list */
- u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
-
- dev_mc_add(slave_dev, lacpdu_multicast);
+ if (BOND_MODE(bond) == BOND_MODE_8023AD)
+ dev_mc_add(slave_dev, lacpdu_mcast_addr);
}
}
@@ -1790,10 +2102,9 @@
bond_update_slave_arr(bond, NULL);
- netdev_info(bond_dev, "Enslaving %s as %s interface with %s link\n",
- slave_dev->name,
- bond_is_active_slave(new_slave) ? "an active" : "a backup",
- new_slave->link != BOND_LINK_DOWN ? "an up" : "a down");
+ slave_info(bond_dev, slave_dev, "Enslaving as %s interface with %s link\n",
+ bond_is_active_slave(new_slave) ? "an active" : "a backup",
+ new_slave->link != BOND_LINK_DOWN ? "an up" : "a down");
/* enslave is successful */
bond_queue_slave_event(new_slave);
@@ -1839,7 +2150,7 @@
bond_hw_addr_copy(ss.__data, new_slave->perm_hwaddr,
new_slave->dev->addr_len);
ss.ss_family = slave_dev->type;
- dev_set_mac_address(slave_dev, (struct sockaddr *)&ss);
+ dev_set_mac_address(slave_dev, (struct sockaddr *)&ss, NULL);
}
err_restore_mtu:
@@ -1889,8 +2200,7 @@
/* slave is not a slave or master is not master of this slave */
if (!(slave_dev->flags & IFF_SLAVE) ||
!netdev_has_upper_dev(slave_dev, bond_dev)) {
- netdev_dbg(bond_dev, "cannot release %s\n",
- slave_dev->name);
+ slave_dbg(bond_dev, slave_dev, "cannot release slave\n");
return -EINVAL;
}
@@ -1899,8 +2209,7 @@
slave = bond_get_slave_by_dev(bond, slave_dev);
if (!slave) {
/* not a slave of this bond */
- netdev_info(bond_dev, "%s not enslaved\n",
- slave_dev->name);
+ slave_info(bond_dev, slave_dev, "interface not enslaved\n");
unblock_netpoll_tx();
return -EINVAL;
}
@@ -1925,9 +2234,8 @@
if (bond_mode_can_use_xmit_hash(bond))
bond_update_slave_arr(bond, slave);
- netdev_info(bond_dev, "Releasing %s interface %s\n",
- bond_is_active_slave(slave) ? "active" : "backup",
- slave_dev->name);
+ slave_info(bond_dev, slave_dev, "Releasing %s interface\n",
+ bond_is_active_slave(slave) ? "active" : "backup");
oldcurrent = rcu_access_pointer(bond->curr_active_slave);
@@ -1937,9 +2245,8 @@
BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP)) {
if (ether_addr_equal_64bits(bond_dev->dev_addr, slave->perm_hwaddr) &&
bond_has_slaves(bond))
- netdev_warn(bond_dev, "the permanent HWaddr of %s - %pM - is still in use by %s - set the HWaddr of %s to a different address to avoid conflicts\n",
- slave_dev->name, slave->perm_hwaddr,
- bond_dev->name, slave_dev->name);
+ slave_warn(bond_dev, slave_dev, "the permanent HWaddr of slave - %pM - is still in use by bond - set the HWaddr of slave to a different address to avoid conflicts\n",
+ slave->perm_hwaddr);
}
if (rtnl_dereference(bond->primary_slave) == slave)
@@ -1967,13 +2274,9 @@
bond_select_active_slave(bond);
}
- if (!bond_has_slaves(bond)) {
- bond_set_carrier(bond);
+ bond_set_carrier(bond);
+ if (!bond_has_slaves(bond))
eth_hw_addr_random(bond_dev);
- bond->nest_level = SINGLE_DEPTH_NESTING;
- } else {
- bond->nest_level = dev_get_nest_level(bond_dev) + 1;
- }
unblock_netpoll_tx();
synchronize_rcu();
@@ -1987,8 +2290,7 @@
bond_compute_features(bond);
if (!(bond_dev->features & NETIF_F_VLAN_CHALLENGED) &&
(old_features & NETIF_F_VLAN_CHALLENGED))
- netdev_info(bond_dev, "last VLAN challenged slave %s left bond %s - VLAN blocking is removed\n",
- slave_dev->name, bond_dev->name);
+ slave_info(bond_dev, slave_dev, "last VLAN challenged slave left bond - VLAN blocking is removed\n");
vlan_vids_del_by_dev(slave_dev, bond_dev);
@@ -2010,7 +2312,8 @@
if (old_flags & IFF_ALLMULTI)
dev_set_allmulti(slave_dev, -1);
- bond_hw_addr_flush(bond_dev, slave_dev);
+ if (old_flags & IFF_UP)
+ bond_hw_addr_flush(bond_dev, slave_dev);
}
slave_disable_netpoll(slave);
@@ -2024,7 +2327,7 @@
bond_hw_addr_copy(ss.__data, slave->perm_hwaddr,
slave->dev->addr_len);
ss.ss_family = slave_dev->type;
- dev_set_mac_address(slave_dev, (struct sockaddr *)&ss);
+ dev_set_mac_address(slave_dev, (struct sockaddr *)&ss, NULL);
}
if (unregister)
@@ -2049,8 +2352,8 @@
/* First release a slave and then destroy the bond if no more slaves are left.
* Must be under rtnl_lock when this function is called.
*/
-static int bond_release_and_destroy(struct net_device *bond_dev,
- struct net_device *slave_dev)
+static int bond_release_and_destroy(struct net_device *bond_dev,
+ struct net_device *slave_dev)
{
struct bonding *bond = netdev_priv(bond_dev);
int ret;
@@ -2059,8 +2362,7 @@
if (ret == 0 && !bond_has_slaves(bond) &&
bond_dev->reg_state != NETREG_UNREGISTERING) {
bond_dev->priv_flags |= IFF_DISABLE_NETPOLL;
- netdev_info(bond_dev, "Destroying bond %s\n",
- bond_dev->name);
+ netdev_info(bond_dev, "Destroying bond\n");
bond_remove_proc_entry(bond);
unregister_netdevice(bond_dev);
}
@@ -2096,12 +2398,21 @@
/* called with rcu_read_lock() */
static int bond_miimon_inspect(struct bonding *bond)
{
+ bool ignore_updelay = false;
int link_state, commit = 0;
struct list_head *iter;
struct slave *slave;
- bool ignore_updelay;
- ignore_updelay = !rcu_dereference(bond->curr_active_slave);
+ if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) {
+ ignore_updelay = !rcu_dereference(bond->curr_active_slave);
+ } else {
+ struct bond_up_slave *usable_slaves;
+
+ usable_slaves = rcu_dereference(bond->usable_slaves);
+
+ if (usable_slaves && usable_slaves->count == 0)
+ ignore_updelay = true;
+ }
bond_for_each_slave_rcu(bond, slave, iter) {
bond_propose_link_state(slave, BOND_LINK_NOCHANGE);
@@ -2117,24 +2428,22 @@
commit++;
slave->delay = bond->params.downdelay;
if (slave->delay) {
- netdev_info(bond->dev, "link status down for %sinterface %s, disabling it in %d ms\n",
- (BOND_MODE(bond) ==
- BOND_MODE_ACTIVEBACKUP) ?
- (bond_is_active_slave(slave) ?
- "active " : "backup ") : "",
- slave->dev->name,
- bond->params.downdelay * bond->params.miimon);
+ slave_info(bond->dev, slave->dev, "link status down for %sinterface, disabling it in %d ms\n",
+ (BOND_MODE(bond) ==
+ BOND_MODE_ACTIVEBACKUP) ?
+ (bond_is_active_slave(slave) ?
+ "active " : "backup ") : "",
+ bond->params.downdelay * bond->params.miimon);
}
- /*FALLTHRU*/
+ fallthrough;
case BOND_LINK_FAIL:
if (link_state) {
/* recovered before downdelay expired */
bond_propose_link_state(slave, BOND_LINK_UP);
slave->last_link_up = jiffies;
- netdev_info(bond->dev, "link status up again after %d ms for interface %s\n",
- (bond->params.downdelay - slave->delay) *
- bond->params.miimon,
- slave->dev->name);
+ slave_info(bond->dev, slave->dev, "link status up again after %d ms\n",
+ (bond->params.downdelay - slave->delay) *
+ bond->params.miimon);
commit++;
continue;
}
@@ -2157,20 +2466,18 @@
slave->delay = bond->params.updelay;
if (slave->delay) {
- netdev_info(bond->dev, "link status up for interface %s, enabling it in %d ms\n",
- slave->dev->name,
- ignore_updelay ? 0 :
- bond->params.updelay *
- bond->params.miimon);
+ slave_info(bond->dev, slave->dev, "link status up, enabling it in %d ms\n",
+ ignore_updelay ? 0 :
+ bond->params.updelay *
+ bond->params.miimon);
}
- /*FALLTHRU*/
+ fallthrough;
case BOND_LINK_BACK:
if (!link_state) {
bond_propose_link_state(slave, BOND_LINK_DOWN);
- netdev_info(bond->dev, "link status down again after %d ms for interface %s\n",
- (bond->params.updelay - slave->delay) *
- bond->params.miimon,
- slave->dev->name);
+ slave_info(bond->dev, slave->dev, "link status down again after %d ms\n",
+ (bond->params.updelay - slave->delay) *
+ bond->params.miimon);
commit++;
continue;
}
@@ -2235,9 +2542,8 @@
bond_needs_speed_duplex(bond)) {
slave->link = BOND_LINK_DOWN;
if (net_ratelimit())
- netdev_warn(bond->dev,
- "failed to get link speed/duplex for %s\n",
- slave->dev->name);
+ slave_warn(bond->dev, slave->dev,
+ "failed to get link speed/duplex\n");
continue;
}
bond_set_slave_link_state(slave, BOND_LINK_UP,
@@ -2253,10 +2559,9 @@
bond_set_active_slave(slave);
}
- netdev_info(bond->dev, "link status definitely up for interface %s, %u Mbps %s duplex\n",
- slave->dev->name,
- slave->speed == SPEED_UNKNOWN ? 0 : slave->speed,
- slave->duplex ? "full" : "half");
+ slave_info(bond->dev, slave->dev, "link status definitely up, %u Mbps %s duplex\n",
+ slave->speed == SPEED_UNKNOWN ? 0 : slave->speed,
+ slave->duplex ? "full" : "half");
bond_miimon_link_change(bond, slave, BOND_LINK_UP);
@@ -2277,8 +2582,7 @@
bond_set_slave_inactive_flags(slave,
BOND_SLAVE_NOTIFY_NOW);
- netdev_info(bond->dev, "link status definitely down for interface %s, disabling it\n",
- slave->dev->name);
+ slave_info(bond->dev, slave->dev, "link status definitely down, disabling slave\n");
bond_miimon_link_change(bond, slave, BOND_LINK_DOWN);
@@ -2288,8 +2592,8 @@
continue;
default:
- netdev_err(bond->dev, "invalid new link %d on slave %s\n",
- slave->link_new_state, slave->dev->name);
+ slave_err(bond->dev, slave->dev, "invalid new link %d on slave\n",
+ slave->link_new_state);
bond_propose_link_state(slave, BOND_LINK_NOCHANGE);
continue;
@@ -2316,6 +2620,7 @@
struct bonding *bond = container_of(work, struct bonding,
mii_work.work);
bool should_notify_peers = false;
+ bool commit;
unsigned long delay;
struct slave *slave;
struct list_head *iter;
@@ -2326,12 +2631,19 @@
goto re_arm;
rcu_read_lock();
-
should_notify_peers = bond_should_notify_peers(bond);
-
- if (bond_miimon_inspect(bond)) {
+ commit = !!bond_miimon_inspect(bond);
+ if (bond->send_peer_notif) {
rcu_read_unlock();
+ if (rtnl_trylock()) {
+ bond->send_peer_notif--;
+ rtnl_unlock();
+ }
+ } else {
+ rcu_read_unlock();
+ }
+ if (commit) {
/* Race avoidance with bond_close cancel of workqueue */
if (!rtnl_trylock()) {
delay = 1;
@@ -2345,8 +2657,7 @@
bond_miimon_commit(bond);
rtnl_unlock(); /* might sleep, hold no other locks */
- } else
- rcu_read_unlock();
+ }
re_arm:
if (bond->params.miimon)
@@ -2360,22 +2671,26 @@
}
}
-static int bond_upper_dev_walk(struct net_device *upper, void *data)
+static int bond_upper_dev_walk(struct net_device *upper,
+ struct netdev_nested_priv *priv)
{
- __be32 ip = *((__be32 *)data);
+ __be32 ip = *(__be32 *)priv->data;
return ip == bond_confirm_addr(upper, 0, ip);
}
static bool bond_has_this_ip(struct bonding *bond, __be32 ip)
{
+ struct netdev_nested_priv priv = {
+ .data = (void *)&ip,
+ };
bool ret = false;
if (ip == bond_confirm_addr(bond->dev, 0, ip))
return true;
rcu_read_lock();
- if (netdev_walk_all_upper_dev_rcu(bond->dev, bond_upper_dev_walk, &ip))
+ if (netdev_walk_all_upper_dev_rcu(bond->dev, bond_upper_dev_walk, &priv))
ret = true;
rcu_read_unlock();
@@ -2386,15 +2701,16 @@
* switches in VLAN mode (especially if ports are configured as
* "native" to a VLAN) might not pass non-tagged frames.
*/
-static void bond_arp_send(struct net_device *slave_dev, int arp_op,
- __be32 dest_ip, __be32 src_ip,
- struct bond_vlan_tag *tags)
+static void bond_arp_send(struct slave *slave, int arp_op, __be32 dest_ip,
+ __be32 src_ip, struct bond_vlan_tag *tags)
{
struct sk_buff *skb;
struct bond_vlan_tag *outer_tag = tags;
+ struct net_device *slave_dev = slave->dev;
+ struct net_device *bond_dev = slave->bond->dev;
- netdev_dbg(slave_dev, "arp %d on slave %s: dst %pI4 src %pI4\n",
- arp_op, slave_dev->name, &dest_ip, &src_ip);
+ slave_dbg(bond_dev, slave_dev, "arp %d on slave: dst %pI4 src %pI4\n",
+ arp_op, &dest_ip, &src_ip);
skb = arp_create(arp_op, ETH_P_ARP, dest_ip, slave_dev, src_ip,
NULL, slave_dev->dev_addr, NULL);
@@ -2416,8 +2732,8 @@
continue;
}
- netdev_dbg(slave_dev, "inner tag: proto %X vid %X\n",
- ntohs(outer_tag->vlan_proto), tags->vlan_id);
+ slave_dbg(bond_dev, slave_dev, "inner tag: proto %X vid %X\n",
+ ntohs(outer_tag->vlan_proto), tags->vlan_id);
skb = vlan_insert_tag_set_proto(skb, tags->vlan_proto,
tags->vlan_id);
if (!skb) {
@@ -2429,8 +2745,8 @@
}
/* Set the outer tag */
if (outer_tag->vlan_id) {
- netdev_dbg(slave_dev, "outer tag: proto %X vid %X\n",
- ntohs(outer_tag->vlan_proto), outer_tag->vlan_id);
+ slave_dbg(bond_dev, slave_dev, "outer tag: proto %X vid %X\n",
+ ntohs(outer_tag->vlan_proto), outer_tag->vlan_id);
__vlan_hwaccel_put_tag(skb, outer_tag->vlan_proto,
outer_tag->vlan_id);
}
@@ -2487,7 +2803,8 @@
int i;
for (i = 0; i < BOND_MAX_ARP_TARGETS && targets[i]; i++) {
- netdev_dbg(bond->dev, "basa: target %pI4\n", &targets[i]);
+ slave_dbg(bond->dev, slave->dev, "%s: target %pI4\n",
+ __func__, &targets[i]);
tags = NULL;
/* Find out through which dev should the packet go */
@@ -2501,7 +2818,7 @@
net_warn_ratelimited("%s: no route to arp_ip_target %pI4 and arp_validate is set\n",
bond->dev->name,
&targets[i]);
- bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
+ bond_arp_send(slave, ARPOP_REQUEST, targets[i],
0, tags);
continue;
}
@@ -2518,7 +2835,7 @@
goto found;
/* Not our device - skip */
- netdev_dbg(bond->dev, "no path to arp_ip_target %pI4 via rt.dev %s\n",
+ slave_dbg(bond->dev, slave->dev, "no path to arp_ip_target %pI4 via rt.dev %s\n",
&targets[i], rt->dst.dev ? rt->dst.dev->name : "NULL");
ip_rt_put(rt);
@@ -2527,8 +2844,7 @@
found:
addr = bond_confirm_addr(rt->dst.dev, targets[i], 0);
ip_rt_put(rt);
- bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
- addr, tags);
+ bond_arp_send(slave, ARPOP_REQUEST, targets[i], addr, tags);
kfree(tags);
}
}
@@ -2538,15 +2854,15 @@
int i;
if (!sip || !bond_has_this_ip(bond, tip)) {
- netdev_dbg(bond->dev, "bva: sip %pI4 tip %pI4 not found\n",
- &sip, &tip);
+ slave_dbg(bond->dev, slave->dev, "%s: sip %pI4 tip %pI4 not found\n",
+ __func__, &sip, &tip);
return;
}
i = bond_get_targets_ip(bond->params.arp_targets, sip);
if (i == -1) {
- netdev_dbg(bond->dev, "bva: sip %pI4 not found in targets\n",
- &sip);
+ slave_dbg(bond->dev, slave->dev, "%s: sip %pI4 not found in targets\n",
+ __func__, &sip);
return;
}
slave->last_rx = jiffies;
@@ -2574,8 +2890,8 @@
alen = arp_hdr_len(bond->dev);
- netdev_dbg(bond->dev, "bond_arp_rcv: skb->dev %s\n",
- skb->dev->name);
+ slave_dbg(bond->dev, slave->dev, "%s: skb->dev %s\n",
+ __func__, skb->dev->name);
if (alen > skb_headlen(skb)) {
arp = kmalloc(alen, GFP_ATOMIC);
@@ -2599,10 +2915,10 @@
arp_ptr += 4 + bond->dev->addr_len;
memcpy(&tip, arp_ptr, 4);
- netdev_dbg(bond->dev, "bond_arp_rcv: %s/%d av %d sv %d sip %pI4 tip %pI4\n",
- slave->dev->name, bond_slave_state(slave),
- bond->params.arp_validate, slave_do_arp_validate(bond, slave),
- &sip, &tip);
+ slave_dbg(bond->dev, slave->dev, "%s: %s/%d av %d sv %d sip %pI4 tip %pI4\n",
+ __func__, slave->dev->name, bond_slave_state(slave),
+ bond->params.arp_validate, slave_do_arp_validate(bond, slave),
+ &sip, &tip);
curr_active_slave = rcu_dereference(bond->curr_active_slave);
curr_arp_slave = rcu_dereference(bond->current_arp_slave);
@@ -2705,12 +3021,10 @@
* is closed.
*/
if (!oldcurrent) {
- netdev_info(bond->dev, "link status definitely up for interface %s\n",
- slave->dev->name);
+ slave_info(bond->dev, slave->dev, "link status definitely up\n");
do_failover = 1;
} else {
- netdev_info(bond->dev, "interface %s is now up\n",
- slave->dev->name);
+ slave_info(bond->dev, slave->dev, "interface is now up\n");
}
}
} else {
@@ -2729,8 +3043,7 @@
if (slave->link_failure_count < UINT_MAX)
slave->link_failure_count++;
- netdev_info(bond->dev, "interface %s is now down\n",
- slave->dev->name);
+ slave_info(bond->dev, slave->dev, "interface is now down\n");
if (slave == oldcurrent)
do_failover = 1;
@@ -2883,8 +3196,7 @@
RCU_INIT_POINTER(bond->current_arp_slave, NULL);
}
- netdev_info(bond->dev, "link status definitely up for interface %s\n",
- slave->dev->name);
+ slave_info(bond->dev, slave->dev, "link status definitely up\n");
if (!rtnl_dereference(bond->curr_active_slave) ||
slave == rtnl_dereference(bond->primary_slave))
@@ -2903,8 +3215,7 @@
bond_set_slave_inactive_flags(slave,
BOND_SLAVE_NOTIFY_NOW);
- netdev_info(bond->dev, "link status definitely down for interface %s, disabling it\n",
- slave->dev->name);
+ slave_info(bond->dev, slave->dev, "link status definitely down, disabling slave\n");
if (slave == rtnl_dereference(bond->curr_active_slave)) {
RCU_INIT_POINTER(bond->current_arp_slave, NULL);
@@ -2927,8 +3238,9 @@
continue;
default:
- netdev_err(bond->dev, "impossible: new_link %d on slave %s\n",
- slave->link_new_state, slave->dev->name);
+ slave_err(bond->dev, slave->dev,
+ "impossible: link_new_state %d on slave\n",
+ slave->link_new_state);
continue;
}
@@ -2997,8 +3309,7 @@
bond_set_slave_inactive_flags(slave,
BOND_SLAVE_NOTIFY_LATER);
- netdev_info(bond->dev, "backup interface %s is now down\n",
- slave->dev->name);
+ slave_info(bond->dev, slave->dev, "backup interface is now down\n");
}
if (slave == curr_arp_slave)
found = true;
@@ -3069,9 +3380,11 @@
if (!rtnl_trylock())
return;
- if (should_notify_peers)
+ if (should_notify_peers) {
+ bond->send_peer_notif--;
call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
bond->dev);
+ }
if (should_notify_rtnl) {
bond_slave_state_notify(bond);
bond_slave_link_notify(bond);
@@ -3110,18 +3423,19 @@
{
struct bonding *event_bond = netdev_priv(bond_dev);
+ netdev_dbg(bond_dev, "%s called\n", __func__);
+
switch (event) {
case NETDEV_CHANGENAME:
return bond_event_changename(event_bond);
case NETDEV_UNREGISTER:
bond_remove_proc_entry(event_bond);
+#ifdef CONFIG_XFRM_OFFLOAD
+ xfrm_dev_state_flush(dev_net(bond_dev), bond_dev, true);
+#endif /* CONFIG_XFRM_OFFLOAD */
break;
case NETDEV_REGISTER:
bond_create_proc_entry(event_bond);
- break;
- case NETDEV_NOTIFY_PEERS:
- if (event_bond->send_peer_notif)
- event_bond->send_peer_notif--;
break;
default:
break;
@@ -3141,11 +3455,16 @@
* before netdev_rx_handler_register is called in which case
* slave will be NULL
*/
- if (!slave)
+ if (!slave) {
+ netdev_dbg(slave_dev, "%s called on NULL slave\n", __func__);
return NOTIFY_DONE;
+ }
+
bond_dev = slave->bond->dev;
bond = slave->bond;
primary = rtnl_dereference(bond->primary_slave);
+
+ slave_dbg(bond_dev, slave_dev, "%s called\n", __func__);
switch (event) {
case NETDEV_UNREGISTER:
@@ -3173,7 +3492,7 @@
if (BOND_MODE(bond) == BOND_MODE_8023AD)
bond_3ad_adapter_speed_duplex_changed(slave);
- /* Fallthrough */
+ fallthrough;
case NETDEV_DOWN:
/* Refresh slave-array if applicable!
* If the setup does not use miimon or arpmon (mode-specific!),
@@ -3223,7 +3542,11 @@
unblock_netpoll_tx();
break;
case NETDEV_FEAT_CHANGE:
- bond_compute_features(bond);
+ if (!bond->notifier_ctx) {
+ bond->notifier_ctx = true;
+ bond_compute_features(bond);
+ bond->notifier_ctx = false;
+ }
break;
case NETDEV_RESEND_IGMP:
/* Propagate to master device */
@@ -3248,7 +3571,8 @@
{
struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
- netdev_dbg(event_dev, "event: %lx\n", event);
+ netdev_dbg(event_dev, "%s received %s\n",
+ __func__, netdev_cmd_to_name(event));
if (!(event_dev->priv_flags & IFF_BONDING))
return NOTIFY_DONE;
@@ -3256,16 +3580,13 @@
if (event_dev->flags & IFF_MASTER) {
int ret;
- netdev_dbg(event_dev, "IFF_MASTER\n");
ret = bond_master_netdev_event(event, event_dev);
if (ret != NOTIFY_DONE)
return ret;
}
- if (event_dev->flags & IFF_SLAVE) {
- netdev_dbg(event_dev, "IFF_SLAVE\n");
+ if (event_dev->flags & IFF_SLAVE)
return bond_slave_netdev_event(event, event_dev);
- }
return NOTIFY_DONE;
}
@@ -3287,39 +3608,78 @@
return 0;
}
+static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk,
+ int *noff, int *proto, bool l34)
+{
+ const struct ipv6hdr *iph6;
+ const struct iphdr *iph;
+
+ if (skb->protocol == htons(ETH_P_IP)) {
+ if (unlikely(!pskb_may_pull(skb, *noff + sizeof(*iph))))
+ return false;
+ iph = (const struct iphdr *)(skb->data + *noff);
+ iph_to_flow_copy_v4addrs(fk, iph);
+ *noff += iph->ihl << 2;
+ if (!ip_is_fragment(iph))
+ *proto = iph->protocol;
+ } else if (skb->protocol == htons(ETH_P_IPV6)) {
+ if (unlikely(!pskb_may_pull(skb, *noff + sizeof(*iph6))))
+ return false;
+ iph6 = (const struct ipv6hdr *)(skb->data + *noff);
+ iph_to_flow_copy_v6addrs(fk, iph6);
+ *noff += sizeof(*iph6);
+ *proto = iph6->nexthdr;
+ } else {
+ return false;
+ }
+
+ if (l34 && *proto >= 0)
+ fk->ports.ports = skb_flow_get_ports(skb, *noff, *proto);
+
+ return true;
+}
+
/* Extract the appropriate headers based on bond's xmit policy */
static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
struct flow_keys *fk)
{
- const struct ipv6hdr *iph6;
- const struct iphdr *iph;
+ bool l34 = bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34;
int noff, proto = -1;
- if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23)
- return skb_flow_dissect_flow_keys(skb, fk, 0);
+ if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23) {
+ memset(fk, 0, sizeof(*fk));
+ return __skb_flow_dissect(NULL, skb, &flow_keys_bonding,
+ fk, NULL, 0, 0, 0, 0);
+ }
fk->ports.ports = 0;
+ memset(&fk->icmp, 0, sizeof(fk->icmp));
noff = skb_network_offset(skb);
- if (skb->protocol == htons(ETH_P_IP)) {
- if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph))))
- return false;
- iph = ip_hdr(skb);
- iph_to_flow_copy_v4addrs(fk, iph);
- noff += iph->ihl << 2;
- if (!ip_is_fragment(iph))
- proto = iph->protocol;
- } else if (skb->protocol == htons(ETH_P_IPV6)) {
- if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph6))))
- return false;
- iph6 = ipv6_hdr(skb);
- iph_to_flow_copy_v6addrs(fk, iph6);
- noff += sizeof(*iph6);
- proto = iph6->nexthdr;
- } else {
+ if (!bond_flow_ip(skb, fk, &noff, &proto, l34))
return false;
+
+ /* ICMP error packets contains at least 8 bytes of the header
+ * of the packet which generated the error. Use this information
+ * to correlate ICMP error packets within the same flow which
+ * generated the error.
+ */
+ if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6) {
+ skb_flow_get_icmp_tci(skb, &fk->icmp, skb->data,
+ skb_transport_offset(skb),
+ skb_headlen(skb));
+ if (proto == IPPROTO_ICMP) {
+ if (!icmp_is_err(fk->icmp.type))
+ return true;
+
+ noff += sizeof(struct icmphdr);
+ } else if (proto == IPPROTO_ICMPV6) {
+ if (!icmpv6_is_err(fk->icmp.type))
+ return true;
+
+ noff += sizeof(struct icmp6hdr);
+ }
+ return bond_flow_ip(skb, fk, &noff, &proto, l34);
}
- if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34 && proto >= 0)
- fk->ports.ports = skb_flow_get_ports(skb, noff, proto);
return true;
}
@@ -3346,10 +3706,14 @@
return bond_eth_hash(skb);
if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 ||
- bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23)
+ bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) {
hash = bond_eth_hash(skb);
- else
- hash = (__force u32)flow.ports.ports;
+ } else {
+ if (flow.icmp.id)
+ memcpy(&hash, &flow.icmp, sizeof(hash));
+ else
+ memcpy(&hash, &flow.ports.ports, sizeof(hash));
+ }
hash ^= (__force u32)flow_get_u32_dst(&flow) ^
(__force u32)flow_get_u32_src(&flow);
hash ^= (hash >> 16);
@@ -3424,6 +3788,9 @@
/* register to receive LACPDUs */
bond->recv_probe = bond_3ad_lacpdu_recv;
bond_3ad_initiate_agg_selection(bond, 1);
+
+ bond_for_each_slave(bond, slave, iter)
+ dev_mc_add(slave->dev, lacpdu_mcast_addr);
}
if (bond_mode_can_use_xmit_hash(bond))
@@ -3435,12 +3802,26 @@
static int bond_close(struct net_device *bond_dev)
{
struct bonding *bond = netdev_priv(bond_dev);
+ struct slave *slave;
bond_work_cancel_all(bond);
bond->send_peer_notif = 0;
if (bond_is_lb(bond))
bond_alb_deinitialize(bond);
bond->recv_probe = NULL;
+
+ if (bond_uses_primary(bond)) {
+ rcu_read_lock();
+ slave = rcu_dereference(bond->curr_active_slave);
+ if (slave)
+ bond_hw_addr_flush(bond_dev, slave->dev);
+ rcu_read_unlock();
+ } else {
+ struct list_head *iter;
+
+ bond_for_each_slave(bond, slave, iter)
+ bond_hw_addr_flush(bond_dev, slave->dev);
+ }
return 0;
}
@@ -3474,12 +3855,46 @@
}
}
-static int bond_get_nest_level(struct net_device *bond_dev)
+#ifdef CONFIG_LOCKDEP
+static int bond_get_lowest_level_rcu(struct net_device *dev)
{
- struct bonding *bond = netdev_priv(bond_dev);
+ struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
+ struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
+ int cur = 0, max = 0;
- return bond->nest_level;
+ now = dev;
+ iter = &dev->adj_list.lower;
+
+ while (1) {
+ next = NULL;
+ while (1) {
+ ldev = netdev_next_lower_dev_rcu(now, &iter);
+ if (!ldev)
+ break;
+
+ next = ldev;
+ niter = &ldev->adj_list.lower;
+ dev_stack[cur] = now;
+ iter_stack[cur++] = iter;
+ if (max <= cur)
+ max = cur;
+ break;
+ }
+
+ if (!next) {
+ if (!cur)
+ return max;
+ next = dev_stack[--cur];
+ niter = iter_stack[cur];
+ }
+
+ now = next;
+ iter = niter;
+ }
+
+ return max;
}
+#endif
static void bond_get_stats(struct net_device *bond_dev,
struct rtnl_link_stats64 *stats)
@@ -3488,11 +3903,17 @@
struct rtnl_link_stats64 temp;
struct list_head *iter;
struct slave *slave;
+ int nest_level = 0;
- spin_lock_nested(&bond->stats_lock, bond_get_nest_level(bond_dev));
- memcpy(stats, &bond->bond_stats, sizeof(*stats));
rcu_read_lock();
+#ifdef CONFIG_LOCKDEP
+ nest_level = bond_get_lowest_level_rcu(bond_dev);
+#endif
+
+ spin_lock_nested(&bond->stats_lock, nest_level);
+ memcpy(stats, &bond->bond_stats, sizeof(*stats));
+
bond_for_each_slave_rcu(bond, slave, iter) {
const struct rtnl_link_stats64 *new =
dev_get_stats(slave->dev, &temp);
@@ -3502,10 +3923,10 @@
/* save off the slave stats for the next run */
memcpy(&slave->slave_stats, new, sizeof(*new));
}
- rcu_read_unlock();
memcpy(&bond->bond_stats, stats, sizeof(*stats));
spin_unlock(&bond->stats_lock);
+ rcu_read_unlock();
}
static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd)
@@ -3530,7 +3951,7 @@
return -EINVAL;
mii->phy_id = 0;
- /* Fall Through */
+ fallthrough;
case SIOCGMIIREG:
/* We do this again just in case we were called by SIOCGMIIREG
* instead of SIOCGMIIPHY.
@@ -3582,12 +4003,11 @@
slave_dev = __dev_get_by_name(net, ifr->ifr_slave);
- netdev_dbg(bond_dev, "slave_dev=%p:\n", slave_dev);
+ slave_dbg(bond_dev, slave_dev, "slave_dev=%p:\n", slave_dev);
if (!slave_dev)
return -ENODEV;
- netdev_dbg(bond_dev, "slave_dev->name=%s:\n", slave_dev->name);
switch (cmd) {
case BOND_ENSLAVE_OLD:
case SIOCBONDENSLAVE:
@@ -3599,8 +4019,7 @@
break;
case BOND_SETHWADDR_OLD:
case SIOCBONDSETHWADDR:
- bond_set_dev_addr(bond_dev, slave_dev);
- res = 0;
+ res = bond_set_dev_addr(bond_dev, slave_dev);
break;
case BOND_CHANGE_ACTIVE_OLD:
case SIOCBONDCHANGEACTIVE:
@@ -3656,32 +4075,35 @@
const struct net_device_ops *slave_ops;
struct neigh_parms parms;
struct slave *slave;
- int ret;
+ int ret = 0;
- slave = bond_first_slave(bond);
+ rcu_read_lock();
+ slave = bond_first_slave_rcu(bond);
if (!slave)
- return 0;
+ goto out;
slave_ops = slave->dev->netdev_ops;
if (!slave_ops->ndo_neigh_setup)
- return 0;
+ goto out;
- parms.neigh_setup = NULL;
- parms.neigh_cleanup = NULL;
- ret = slave_ops->ndo_neigh_setup(slave->dev, &parms);
- if (ret)
- return ret;
-
- /* Assign slave's neigh_cleanup to neighbour in case cleanup is called
- * after the last slave has been detached. Assumes that all slaves
- * utilize the same neigh_cleanup (true at this writing as only user
- * is ipoib).
+ /* TODO: find another way [1] to implement this.
+ * Passing a zeroed structure is fragile,
+ * but at least we do not pass garbage.
+ *
+ * [1] One way would be that ndo_neigh_setup() never touch
+ * struct neigh_parms, but propagate the new neigh_setup()
+ * back to ___neigh_create() / neigh_parms_alloc()
*/
- n->parms->neigh_cleanup = parms.neigh_cleanup;
+ memset(&parms, 0, sizeof(parms));
+ ret = slave_ops->ndo_neigh_setup(slave->dev, &parms);
- if (!parms.neigh_setup)
- return 0;
+ if (ret)
+ goto out;
- return parms.neigh_setup(n);
+ if (parms.neigh_setup)
+ ret = parms.neigh_setup(n);
+out:
+ rcu_read_unlock();
+ return ret;
}
/* The bonding ndo_neigh_setup is called at init time beofre any
@@ -3713,7 +4135,7 @@
netdev_dbg(bond_dev, "bond=%p, new_mtu=%d\n", bond, new_mtu);
bond_for_each_slave(bond, slave, iter) {
- netdev_dbg(bond_dev, "s %p c_m %p\n",
+ slave_dbg(bond_dev, slave->dev, "s %p c_m %p\n",
slave, slave->dev->netdev_ops->ndo_change_mtu);
res = dev_set_mtu(slave->dev, new_mtu);
@@ -3727,8 +4149,8 @@
* means changing their mtu from timer context, which
* is probably not a good idea.
*/
- netdev_dbg(bond_dev, "err %d %s\n", res,
- slave->dev->name);
+ slave_dbg(bond_dev, slave->dev, "err %d setting mtu to %d\n",
+ res, new_mtu);
goto unwind;
}
}
@@ -3746,10 +4168,9 @@
break;
tmp_res = dev_set_mtu(rollback_slave->dev, bond_dev->mtu);
- if (tmp_res) {
- netdev_dbg(bond_dev, "unwind err %d dev %s\n",
- tmp_res, rollback_slave->dev->name);
- }
+ if (tmp_res)
+ slave_dbg(bond_dev, rollback_slave->dev, "unwind err %d\n",
+ tmp_res);
}
return res;
@@ -3773,7 +4194,7 @@
return bond_alb_set_mac_address(bond_dev, addr);
- netdev_dbg(bond_dev, "bond=%p\n", bond);
+ netdev_dbg(bond_dev, "%s: bond=%p\n", __func__, bond);
/* If fail_over_mac is enabled, do nothing and return success.
* Returning an error causes ifenslave to fail.
@@ -3786,8 +4207,9 @@
return -EADDRNOTAVAIL;
bond_for_each_slave(bond, slave, iter) {
- netdev_dbg(bond_dev, "slave %p %s\n", slave, slave->dev->name);
- res = dev_set_mac_address(slave->dev, addr);
+ slave_dbg(bond_dev, slave->dev, "%s: slave=%p\n",
+ __func__, slave);
+ res = dev_set_mac_address(slave->dev, addr, NULL);
if (res) {
/* TODO: consider downing the slave
* and retry ?
@@ -3795,7 +4217,8 @@
* breakage anyway until ARP finish
* updating, so...
*/
- netdev_dbg(bond_dev, "err %d %s\n", res, slave->dev->name);
+ slave_dbg(bond_dev, slave->dev, "%s: err %d\n",
+ __func__, res);
goto unwind;
}
}
@@ -3816,10 +4239,10 @@
break;
tmp_res = dev_set_mac_address(rollback_slave->dev,
- (struct sockaddr *)&tmp_ss);
+ (struct sockaddr *)&tmp_ss, NULL);
if (tmp_res) {
- netdev_dbg(bond_dev, "unwind err %d dev %s\n",
- tmp_res, rollback_slave->dev->name);
+ slave_dbg(bond_dev, rollback_slave->dev, "%s: unwind err %d\n",
+ __func__, tmp_res);
}
}
@@ -3827,16 +4250,15 @@
}
/**
- * bond_xmit_slave_id - transmit skb through slave with slave_id
+ * bond_get_slave_by_id - get xmit slave with slave_id
* @bond: bonding device that is transmitting
- * @skb: buffer to transmit
* @slave_id: slave id up to slave_cnt-1 through which to transmit
*
- * This function tries to transmit through slave with slave_id but in case
+ * This function tries to get slave with slave_id but in case
* it fails, it tries to find the first available slave for transmission.
- * The skb is consumed in all cases, thus the function is void.
*/
-static void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id)
+static struct slave *bond_get_slave_by_id(struct bonding *bond,
+ int slave_id)
{
struct list_head *iter;
struct slave *slave;
@@ -3845,10 +4267,8 @@
/* Here we start from the slave with slave_id */
bond_for_each_slave_rcu(bond, slave, iter) {
if (--i < 0) {
- if (bond_slave_can_tx(slave)) {
- bond_dev_queue_xmit(bond, skb, slave->dev);
- return;
- }
+ if (bond_slave_can_tx(slave))
+ return slave;
}
}
@@ -3857,13 +4277,11 @@
bond_for_each_slave_rcu(bond, slave, iter) {
if (--i < 0)
break;
- if (bond_slave_can_tx(slave)) {
- bond_dev_queue_xmit(bond, skb, slave->dev);
- return;
- }
+ if (bond_slave_can_tx(slave))
+ return slave;
}
/* no slave that can tx has been found */
- bond_tx_drop(bond->dev, skb);
+ return NULL;
}
/**
@@ -3899,10 +4317,9 @@
return slave_id;
}
-static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb,
- struct net_device *bond_dev)
+static struct slave *bond_xmit_roundrobin_slave_get(struct bonding *bond,
+ struct sk_buff *skb)
{
- struct bonding *bond = netdev_priv(bond_dev);
struct slave *slave;
int slave_cnt;
u32 slave_id;
@@ -3924,22 +4341,37 @@
if (iph->protocol == IPPROTO_IGMP) {
slave = rcu_dereference(bond->curr_active_slave);
if (slave)
- bond_dev_queue_xmit(bond, skb, slave->dev);
- else
- bond_xmit_slave_id(bond, skb, 0);
- return NETDEV_TX_OK;
+ return slave;
+ return bond_get_slave_by_id(bond, 0);
}
}
non_igmp:
slave_cnt = READ_ONCE(bond->slave_cnt);
if (likely(slave_cnt)) {
- slave_id = bond_rr_gen_slave_id(bond);
- bond_xmit_slave_id(bond, skb, slave_id % slave_cnt);
- } else {
- bond_tx_drop(bond_dev, skb);
+ slave_id = bond_rr_gen_slave_id(bond) % slave_cnt;
+ return bond_get_slave_by_id(bond, slave_id);
}
- return NETDEV_TX_OK;
+ return NULL;
+}
+
+static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb,
+ struct net_device *bond_dev)
+{
+ struct bonding *bond = netdev_priv(bond_dev);
+ struct slave *slave;
+
+ slave = bond_xmit_roundrobin_slave_get(bond, skb);
+ if (likely(slave))
+ return bond_dev_queue_xmit(bond, skb, slave->dev);
+
+ return bond_tx_drop(bond_dev, skb);
+}
+
+static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond,
+ struct sk_buff *skb)
+{
+ return rcu_dereference(bond->curr_active_slave);
}
/* In active-backup mode, we know that bond->curr_active_slave is always valid if
@@ -3951,13 +4383,11 @@
struct bonding *bond = netdev_priv(bond_dev);
struct slave *slave;
- slave = rcu_dereference(bond->curr_active_slave);
+ slave = bond_xmit_activebackup_slave_get(bond, skb);
if (slave)
- bond_dev_queue_xmit(bond, skb, slave->dev);
- else
- bond_tx_drop(bond_dev, skb);
+ return bond_dev_queue_xmit(bond, skb, slave->dev);
- return NETDEV_TX_OK;
+ return bond_tx_drop(bond_dev, skb);
}
/* Use this to update slave_array when (a) it's not appropriate to update
@@ -3991,6 +4421,61 @@
bond_slave_arr_work_rearm(bond, 1);
}
+static void bond_skip_slave(struct bond_up_slave *slaves,
+ struct slave *skipslave)
+{
+ int idx;
+
+ /* Rare situation where caller has asked to skip a specific
+ * slave but allocation failed (most likely!). BTW this is
+ * only possible when the call is initiated from
+ * __bond_release_one(). In this situation; overwrite the
+ * skipslave entry in the array with the last entry from the
+ * array to avoid a situation where the xmit path may choose
+ * this to-be-skipped slave to send a packet out.
+ */
+ for (idx = 0; slaves && idx < slaves->count; idx++) {
+ if (skipslave == slaves->arr[idx]) {
+ slaves->arr[idx] =
+ slaves->arr[slaves->count - 1];
+ slaves->count--;
+ break;
+ }
+ }
+}
+
+static void bond_set_slave_arr(struct bonding *bond,
+ struct bond_up_slave *usable_slaves,
+ struct bond_up_slave *all_slaves)
+{
+ struct bond_up_slave *usable, *all;
+
+ usable = rtnl_dereference(bond->usable_slaves);
+ rcu_assign_pointer(bond->usable_slaves, usable_slaves);
+ kfree_rcu(usable, rcu);
+
+ all = rtnl_dereference(bond->all_slaves);
+ rcu_assign_pointer(bond->all_slaves, all_slaves);
+ kfree_rcu(all, rcu);
+}
+
+static void bond_reset_slave_arr(struct bonding *bond)
+{
+ struct bond_up_slave *usable, *all;
+
+ usable = rtnl_dereference(bond->usable_slaves);
+ if (usable) {
+ RCU_INIT_POINTER(bond->usable_slaves, NULL);
+ kfree_rcu(usable, rcu);
+ }
+
+ all = rtnl_dereference(bond->all_slaves);
+ if (all) {
+ RCU_INIT_POINTER(bond->all_slaves, NULL);
+ kfree_rcu(all, rcu);
+ }
+}
+
/* Build the usable slaves array in control path for modes that use xmit-hash
* to determine the slave interface -
* (a) BOND_MODE_8023AD
@@ -4001,9 +4486,9 @@
*/
int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
{
+ struct bond_up_slave *usable_slaves = NULL, *all_slaves = NULL;
struct slave *slave;
struct list_head *iter;
- struct bond_up_slave *new_arr, *old_arr;
int agg_id = 0;
int ret = 0;
@@ -4011,11 +4496,12 @@
WARN_ON(lockdep_is_held(&bond->mode_lock));
#endif
- new_arr = kzalloc(offsetof(struct bond_up_slave, arr[bond->slave_cnt]),
- GFP_KERNEL);
- if (!new_arr) {
+ usable_slaves = kzalloc(struct_size(usable_slaves, arr,
+ bond->slave_cnt), GFP_KERNEL);
+ all_slaves = kzalloc(struct_size(all_slaves, arr,
+ bond->slave_cnt), GFP_KERNEL);
+ if (!usable_slaves || !all_slaves) {
ret = -ENOMEM;
- pr_err("Failed to build slave-array.\n");
goto out;
}
if (BOND_MODE(bond) == BOND_MODE_8023AD) {
@@ -4023,20 +4509,19 @@
if (bond_3ad_get_active_agg_info(bond, &ad_info)) {
pr_debug("bond_3ad_get_active_agg_info failed\n");
- kfree_rcu(new_arr, rcu);
/* No active aggragator means it's not safe to use
* the previous array.
*/
- old_arr = rtnl_dereference(bond->slave_arr);
- if (old_arr) {
- RCU_INIT_POINTER(bond->slave_arr, NULL);
- kfree_rcu(old_arr, rcu);
- }
+ bond_reset_slave_arr(bond);
goto out;
}
agg_id = ad_info.aggregator_id;
}
bond_for_each_slave(bond, slave, iter) {
+ if (skipslave == slave)
+ continue;
+
+ all_slaves->arr[all_slaves->count++] = slave;
if (BOND_MODE(bond) == BOND_MODE_8023AD) {
struct aggregator *agg;
@@ -4046,43 +4531,43 @@
}
if (!bond_slave_can_tx(slave))
continue;
- if (skipslave == slave)
- continue;
- netdev_dbg(bond->dev,
- "Adding slave dev %s to tx hash array[%d]\n",
- slave->dev->name, new_arr->count);
+ slave_dbg(bond->dev, slave->dev, "Adding slave to tx hash array[%d]\n",
+ usable_slaves->count);
- new_arr->arr[new_arr->count++] = slave;
+ usable_slaves->arr[usable_slaves->count++] = slave;
}
- old_arr = rtnl_dereference(bond->slave_arr);
- rcu_assign_pointer(bond->slave_arr, new_arr);
- if (old_arr)
- kfree_rcu(old_arr, rcu);
+ bond_set_slave_arr(bond, usable_slaves, all_slaves);
+ return ret;
out:
if (ret != 0 && skipslave) {
- int idx;
-
- /* Rare situation where caller has asked to skip a specific
- * slave but allocation failed (most likely!). BTW this is
- * only possible when the call is initiated from
- * __bond_release_one(). In this situation; overwrite the
- * skipslave entry in the array with the last entry from the
- * array to avoid a situation where the xmit path may choose
- * this to-be-skipped slave to send a packet out.
- */
- old_arr = rtnl_dereference(bond->slave_arr);
- for (idx = 0; old_arr != NULL && idx < old_arr->count; idx++) {
- if (skipslave == old_arr->arr[idx]) {
- old_arr->arr[idx] =
- old_arr->arr[old_arr->count-1];
- old_arr->count--;
- break;
- }
- }
+ bond_skip_slave(rtnl_dereference(bond->all_slaves),
+ skipslave);
+ bond_skip_slave(rtnl_dereference(bond->usable_slaves),
+ skipslave);
}
+ kfree_rcu(all_slaves, rcu);
+ kfree_rcu(usable_slaves, rcu);
+
return ret;
+}
+
+static struct slave *bond_xmit_3ad_xor_slave_get(struct bonding *bond,
+ struct sk_buff *skb,
+ struct bond_up_slave *slaves)
+{
+ struct slave *slave;
+ unsigned int count;
+ u32 hash;
+
+ hash = bond_xmit_hash(bond, skb);
+ count = slaves ? READ_ONCE(slaves->count) : 0;
+ if (unlikely(!count))
+ return NULL;
+
+ slave = slaves->arr[hash % count];
+ return slave;
}
/* Use this Xmit function for 3AD as well as XOR modes. The current
@@ -4093,20 +4578,15 @@
struct net_device *dev)
{
struct bonding *bond = netdev_priv(dev);
- struct slave *slave;
struct bond_up_slave *slaves;
- unsigned int count;
+ struct slave *slave;
- slaves = rcu_dereference(bond->slave_arr);
- count = slaves ? READ_ONCE(slaves->count) : 0;
- if (likely(count)) {
- slave = slaves->arr[bond_xmit_hash(bond, skb) % count];
- bond_dev_queue_xmit(bond, skb, slave->dev);
- } else {
- bond_tx_drop(dev, skb);
- }
+ slaves = rcu_dereference(bond->usable_slaves);
+ slave = bond_xmit_3ad_xor_slave_get(bond, skb, slaves);
+ if (likely(slave))
+ return bond_dev_queue_xmit(bond, skb, slave->dev);
- return NETDEV_TX_OK;
+ return bond_tx_drop(dev, skb);
}
/* in broadcast mode, we send everything to all usable interfaces. */
@@ -4116,27 +4596,39 @@
struct bonding *bond = netdev_priv(bond_dev);
struct slave *slave = NULL;
struct list_head *iter;
+ bool xmit_suc = false;
+ bool skb_used = false;
bond_for_each_slave_rcu(bond, slave, iter) {
- if (bond_is_last_slave(bond, slave))
- break;
- if (bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) {
- struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+ struct sk_buff *skb2;
+ if (!(bond_slave_is_up(slave) && slave->link == BOND_LINK_UP))
+ continue;
+
+ if (bond_is_last_slave(bond, slave)) {
+ skb2 = skb;
+ skb_used = true;
+ } else {
+ skb2 = skb_clone(skb, GFP_ATOMIC);
if (!skb2) {
net_err_ratelimited("%s: Error: %s: skb_clone() failed\n",
bond_dev->name, __func__);
continue;
}
- bond_dev_queue_xmit(bond, skb2, slave->dev);
}
- }
- if (slave && bond_slave_is_up(slave) && slave->link == BOND_LINK_UP)
- bond_dev_queue_xmit(bond, skb, slave->dev);
- else
- bond_tx_drop(bond_dev, skb);
- return NETDEV_TX_OK;
+ if (bond_dev_queue_xmit(bond, skb2, slave->dev) == NETDEV_TX_OK)
+ xmit_suc = true;
+ }
+
+ if (!skb_used)
+ dev_kfree_skb_any(skb);
+
+ if (xmit_suc)
+ return NETDEV_TX_OK;
+
+ atomic_long_inc(&bond_dev->tx_dropped);
+ return NET_XMIT_DROP;
}
/*------------------------- Device initialization ---------------------------*/
@@ -4169,8 +4661,7 @@
static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb,
- struct net_device *sb_dev,
- select_queue_fallback_t fallback)
+ struct net_device *sb_dev)
{
/* This helper function exists to help dev_pick_tx get the correct
* destination queue. Using a helper function skips a call to
@@ -4188,6 +4679,48 @@
} while (txq >= dev->real_num_tx_queues);
}
return txq;
+}
+
+static struct net_device *bond_xmit_get_slave(struct net_device *master_dev,
+ struct sk_buff *skb,
+ bool all_slaves)
+{
+ struct bonding *bond = netdev_priv(master_dev);
+ struct bond_up_slave *slaves;
+ struct slave *slave = NULL;
+
+ switch (BOND_MODE(bond)) {
+ case BOND_MODE_ROUNDROBIN:
+ slave = bond_xmit_roundrobin_slave_get(bond, skb);
+ break;
+ case BOND_MODE_ACTIVEBACKUP:
+ slave = bond_xmit_activebackup_slave_get(bond, skb);
+ break;
+ case BOND_MODE_8023AD:
+ case BOND_MODE_XOR:
+ if (all_slaves)
+ slaves = rcu_dereference(bond->all_slaves);
+ else
+ slaves = rcu_dereference(bond->usable_slaves);
+ slave = bond_xmit_3ad_xor_slave_get(bond, skb, slaves);
+ break;
+ case BOND_MODE_BROADCAST:
+ break;
+ case BOND_MODE_ALB:
+ slave = bond_xmit_alb_slave_get(bond, skb);
+ break;
+ case BOND_MODE_TLB:
+ slave = bond_xmit_tlb_slave_get(bond, skb);
+ break;
+ default:
+ /* Should never happen, mode already checked */
+ WARN_ONCE(true, "Unknown bonding mode");
+ break;
+ }
+
+ if (slave)
+ return slave->dev;
+ return NULL;
}
static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -4216,8 +4749,7 @@
/* Should never happen, mode already checked */
netdev_err(dev, "Unknown bonding mode %d\n", BOND_MODE(bond));
WARN_ON_ONCE(1);
- bond_tx_drop(dev, skb);
- return NETDEV_TX_OK;
+ return bond_tx_drop(dev, skb);
}
}
@@ -4236,7 +4768,7 @@
if (bond_has_slaves(bond))
ret = __bond_start_xmit(skb, dev);
else
- bond_tx_drop(dev, skb);
+ ret = bond_tx_drop(dev, skb);
rcu_read_unlock();
return ret;
@@ -4291,7 +4823,6 @@
struct ethtool_drvinfo *drvinfo)
{
strlcpy(drvinfo->driver, DRV_NAME, sizeof(drvinfo->driver));
- strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version));
snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), "%d",
BOND_ABI_VERSION);
}
@@ -4318,7 +4849,6 @@
.ndo_neigh_setup = bond_neigh_setup,
.ndo_vlan_rx_add_vid = bond_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = bond_vlan_rx_kill_vid,
- .ndo_get_lock_subclass = bond_get_nest_level,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_netpoll_setup = bond_netpoll_setup,
.ndo_netpoll_cleanup = bond_netpoll_cleanup,
@@ -4328,6 +4858,7 @@
.ndo_del_slave = bond_release,
.ndo_fix_features = bond_fix_features,
.ndo_features_check = passthru_features_check,
+ .ndo_get_xmit_slave = bond_xmit_get_slave,
};
static const struct device_type bond_type = {
@@ -4346,7 +4877,6 @@
struct bonding *bond = netdev_priv(bond_dev);
spin_lock_init(&bond->mode_lock);
- spin_lock_init(&bond->stats_lock);
bond->params = bonding_defaults;
/* Initialize pointers */
@@ -4368,6 +4898,13 @@
bond_dev->priv_flags |= IFF_BONDING | IFF_UNICAST_FLT | IFF_NO_QUEUE;
bond_dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
+#ifdef CONFIG_XFRM_OFFLOAD
+ /* set up xfrm device ops (only supported in active-backup right now) */
+ bond_dev->xfrmdev_ops = &bond_xfrmdev_ops;
+ INIT_LIST_HEAD(&bond->ipsec_list);
+ spin_lock_init(&bond->ipsec_lock);
+#endif /* CONFIG_XFRM_OFFLOAD */
+
/* don't acquire bond device's netif_tx_lock when transmitting */
bond_dev->features |= NETIF_F_LLTX;
@@ -4383,11 +4920,21 @@
bond_dev->hw_features = BOND_VLAN_FEATURES |
NETIF_F_HW_VLAN_CTAG_RX |
- NETIF_F_HW_VLAN_CTAG_FILTER;
+ NETIF_F_HW_VLAN_CTAG_FILTER |
+ NETIF_F_HW_VLAN_STAG_RX |
+ NETIF_F_HW_VLAN_STAG_FILTER;
bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL | NETIF_F_GSO_UDP_L4;
+#ifdef CONFIG_XFRM_OFFLOAD
+ bond_dev->hw_features |= BOND_XFRM_FEATURES;
+#endif /* CONFIG_XFRM_OFFLOAD */
bond_dev->features |= bond_dev->hw_features;
bond_dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
+#ifdef CONFIG_XFRM_OFFLOAD
+ /* Disable XFRM features if this isn't an active-backup config */
+ if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP)
+ bond_dev->features &= ~BOND_XFRM_FEATURES;
+#endif /* CONFIG_XFRM_OFFLOAD */
}
/* Destroy a bonding device.
@@ -4396,9 +4943,9 @@
static void bond_uninit(struct net_device *bond_dev)
{
struct bonding *bond = netdev_priv(bond_dev);
+ struct bond_up_slave *usable, *all;
struct list_head *iter;
struct slave *slave;
- struct bond_up_slave *arr;
bond_netpoll_cleanup(bond_dev);
@@ -4407,10 +4954,16 @@
__bond_release_one(bond_dev, slave->dev, true, true);
netdev_info(bond_dev, "Released all slaves\n");
- arr = rtnl_dereference(bond->slave_arr);
- if (arr) {
- RCU_INIT_POINTER(bond->slave_arr, NULL);
- kfree_rcu(arr, rcu);
+ usable = rtnl_dereference(bond->usable_slaves);
+ if (usable) {
+ RCU_INIT_POINTER(bond->usable_slaves, NULL);
+ kfree_rcu(usable, rcu);
+ }
+
+ all = rtnl_dereference(bond->all_slaves);
+ if (all) {
+ RCU_INIT_POINTER(bond->all_slaves, NULL);
+ kfree_rcu(all, rcu);
}
list_del(&bond->bond_list);
@@ -4769,6 +5322,7 @@
params->arp_all_targets = arp_all_targets_value;
params->updelay = updelay;
params->downdelay = downdelay;
+ params->peer_notif_delay = 0;
params->use_carrier = use_carrier;
params->lacp_fast = lacp_fast;
params->primary[0] = 0;
@@ -4817,7 +5371,9 @@
if (!bond->wq)
return -ENOMEM;
- bond->nest_level = SINGLE_DEPTH_NESTING;
+ bond->notifier_ctx = false;
+
+ spin_lock_init(&bond->stats_lock);
netdev_lockdep_set_classes(bond_dev);
list_add_tail(&bond->bond_list, &bn->dev_list);
@@ -4932,8 +5488,6 @@
int i;
int res;
- pr_info("%s", bond_version);
-
res = bond_check_params(&bonding_defaults);
if (res)
goto out;
@@ -4953,6 +5507,10 @@
if (res)
goto err;
}
+
+ skb_flow_dissector_init(&flow_keys_bonding,
+ flow_keys_bonding_keys,
+ ARRAY_SIZE(flow_keys_bonding_keys));
register_netdevice_notifier(&bond_netdev_notifier);
out:
@@ -4984,6 +5542,5 @@
module_init(bonding_init);
module_exit(bonding_exit);
MODULE_LICENSE("GPL");
-MODULE_VERSION(DRV_VERSION);
-MODULE_DESCRIPTION(DRV_DESCRIPTION ", v" DRV_VERSION);
+MODULE_DESCRIPTION(DRV_DESCRIPTION);
MODULE_AUTHOR("Thomas Davis, tadavis@lbl.gov and many others");
--
Gitblit v1.6.2