From 1543e317f1da31b75942316931e8f491a8920811 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Thu, 04 Jan 2024 10:08:02 +0000
Subject: [PATCH] disable FB
---
kernel/drivers/net/ethernet/mellanox/mlx5/core/lag.c | 569 ++++++++++++++++++++++++++++++++++----------------------
1 files changed, 345 insertions(+), 224 deletions(-)
diff --git a/kernel/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/kernel/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index 582b2f1..9fb3e5e 100644
--- a/kernel/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/kernel/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -34,51 +34,20 @@
#include <linux/mlx5/driver.h>
#include <linux/mlx5/vport.h>
#include "mlx5_core.h"
-
-enum {
- MLX5_LAG_FLAG_BONDED = 1 << 0,
-};
-
-struct lag_func {
- struct mlx5_core_dev *dev;
- struct net_device *netdev;
-};
-
-/* Used for collection of netdev event info. */
-struct lag_tracker {
- enum netdev_lag_tx_type tx_type;
- struct netdev_lag_lower_state_info netdev_state[MLX5_MAX_PORTS];
- bool is_bonded;
-};
-
-/* LAG data of a ConnectX card.
- * It serves both its phys functions.
- */
-struct mlx5_lag {
- u8 flags;
- u8 v2p_map[MLX5_MAX_PORTS];
- struct lag_func pf[MLX5_MAX_PORTS];
- struct lag_tracker tracker;
- struct delayed_work bond_work;
- struct notifier_block nb;
-
- /* Admin state. Allow lag only if allowed is true
- * even if network conditions for lag were met
- */
- bool allowed;
-};
+#include "eswitch.h"
+#include "lag.h"
+#include "lag_mp.h"
/* General purpose, use for short periods of time.
* Beware of lock dependencies (preferably, no locks should be acquired
* under it).
*/
-static DEFINE_MUTEX(lag_mutex);
+static DEFINE_SPINLOCK(lag_lock);
static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
u8 remap_port2)
{
- u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(create_lag_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
@@ -86,14 +55,13 @@
MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(dev, create_lag, in);
}
static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1,
u8 remap_port2)
{
- u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(modify_lag_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
@@ -102,59 +70,31 @@
MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
-static int mlx5_cmd_destroy_lag(struct mlx5_core_dev *dev)
-{
- u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(destroy_lag_out)] = {0};
-
- MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
-
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(dev, modify_lag, in);
}
int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
{
- u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(create_vport_lag_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(dev, create_vport_lag, in);
}
EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
{
- u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(destroy_vport_lag_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
}
EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
-static int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
- bool reset, void *out, int out_size)
-{
- u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = { };
-
- MLX5_SET(query_cong_statistics_in, in, opcode,
- MLX5_CMD_OP_QUERY_CONG_STATISTICS);
- MLX5_SET(query_cong_statistics_in, in, clear, reset);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
-}
-
-static struct mlx5_lag *mlx5_lag_dev_get(struct mlx5_core_dev *dev)
-{
- return dev->priv.lag;
-}
-
-static int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
- struct net_device *ndev)
+int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
+ struct net_device *ndev)
{
int i;
@@ -162,120 +102,239 @@
if (ldev->pf[i].netdev == ndev)
return i;
- return -1;
+ return -ENOENT;
}
-static bool mlx5_lag_is_bonded(struct mlx5_lag *ldev)
+static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
{
- return !!(ldev->flags & MLX5_LAG_FLAG_BONDED);
+ return !!(ldev->flags & MLX5_LAG_FLAG_ROCE);
+}
+
+static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
+{
+ return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV);
}
static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
u8 *port1, u8 *port2)
{
+ bool p1en;
+ bool p2en;
+
+ p1en = tracker->netdev_state[MLX5_LAG_P1].tx_enabled &&
+ tracker->netdev_state[MLX5_LAG_P1].link_up;
+
+ p2en = tracker->netdev_state[MLX5_LAG_P2].tx_enabled &&
+ tracker->netdev_state[MLX5_LAG_P2].link_up;
+
*port1 = 1;
*port2 = 2;
- if (!tracker->netdev_state[0].tx_enabled ||
- !tracker->netdev_state[0].link_up) {
- *port1 = 2;
+ if ((!p1en && !p2en) || (p1en && p2en))
return;
- }
- if (!tracker->netdev_state[1].tx_enabled ||
- !tracker->netdev_state[1].link_up)
+ if (p1en)
*port2 = 1;
+ else
+ *port1 = 2;
}
-static void mlx5_activate_lag(struct mlx5_lag *ldev,
- struct lag_tracker *tracker)
+void mlx5_modify_lag(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker)
{
- struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ u8 v2p_port1, v2p_port2;
int err;
- ldev->flags |= MLX5_LAG_FLAG_BONDED;
+ mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1,
+ &v2p_port2);
- mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[0],
- &ldev->v2p_map[1]);
+ if (v2p_port1 != ldev->v2p_map[MLX5_LAG_P1] ||
+ v2p_port2 != ldev->v2p_map[MLX5_LAG_P2]) {
+ ldev->v2p_map[MLX5_LAG_P1] = v2p_port1;
+ ldev->v2p_map[MLX5_LAG_P2] = v2p_port2;
- err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[0], ldev->v2p_map[1]);
+ mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d",
+ ldev->v2p_map[MLX5_LAG_P1],
+ ldev->v2p_map[MLX5_LAG_P2]);
+
+ err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
+ if (err)
+ mlx5_core_err(dev0,
+ "Failed to modify LAG (%d)\n",
+ err);
+ }
+}
+
+static int mlx5_create_lag(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker)
+{
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ int err;
+
+ mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1],
+ &ldev->v2p_map[MLX5_LAG_P2]);
+
+ mlx5_core_info(dev0, "lag map port 1:%d port 2:%d",
+ ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2]);
+
+ err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1],
+ ldev->v2p_map[MLX5_LAG_P2]);
if (err)
mlx5_core_err(dev0,
"Failed to create LAG (%d)\n",
err);
+ return err;
}
-static void mlx5_deactivate_lag(struct mlx5_lag *ldev)
+int mlx5_activate_lag(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker,
+ u8 flags)
{
- struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+ bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
int err;
- ldev->flags &= ~MLX5_LAG_FLAG_BONDED;
+ err = mlx5_create_lag(ldev, tracker);
+ if (err) {
+ if (roce_lag) {
+ mlx5_core_err(dev0,
+ "Failed to activate RoCE LAG\n");
+ } else {
+ mlx5_core_err(dev0,
+ "Failed to activate VF LAG\n"
+ "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
+ }
+ return err;
+ }
- err = mlx5_cmd_destroy_lag(dev0);
- if (err)
- mlx5_core_err(dev0,
- "Failed to destroy LAG (%d)\n",
- err);
+ ldev->flags |= flags;
+ return 0;
+}
+
+static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
+{
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
+ bool roce_lag = __mlx5_lag_is_roce(ldev);
+ int err;
+
+ ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
+
+ MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
+ err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
+ if (err) {
+ if (roce_lag) {
+ mlx5_core_err(dev0,
+ "Failed to deactivate RoCE LAG; driver restart required\n");
+ } else {
+ mlx5_core_err(dev0,
+ "Failed to deactivate VF LAG; driver restart required\n"
+ "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
+ }
+ }
+
+ return err;
+}
+
+static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
+{
+ if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev)
+ return false;
+
+#ifdef CONFIG_MLX5_ESWITCH
+ return mlx5_esw_lag_prereq(ldev->pf[MLX5_LAG_P1].dev,
+ ldev->pf[MLX5_LAG_P2].dev);
+#else
+ return (!mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P1].dev) &&
+ !mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P2].dev));
+#endif
+}
+
+static void mlx5_lag_add_ib_devices(struct mlx5_lag *ldev)
+{
+ int i;
+
+ for (i = 0; i < MLX5_MAX_PORTS; i++)
+ if (ldev->pf[i].dev)
+ mlx5_add_dev_by_protocol(ldev->pf[i].dev,
+ MLX5_INTERFACE_PROTOCOL_IB);
+}
+
+static void mlx5_lag_remove_ib_devices(struct mlx5_lag *ldev)
+{
+ int i;
+
+ for (i = 0; i < MLX5_MAX_PORTS; i++)
+ if (ldev->pf[i].dev)
+ mlx5_remove_dev_by_protocol(ldev->pf[i].dev,
+ MLX5_INTERFACE_PROTOCOL_IB);
}
static void mlx5_do_bond(struct mlx5_lag *ldev)
{
- struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
- struct mlx5_core_dev *dev1 = ldev->pf[1].dev;
- struct lag_tracker tracker;
- u8 v2p_port1, v2p_port2;
- int i, err;
- bool do_bond;
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+ struct lag_tracker tracker = { };
+ bool do_bond, roce_lag;
+ int err;
- if (!dev0 || !dev1)
+ if (!mlx5_lag_is_ready(ldev))
return;
- mutex_lock(&lag_mutex);
+ spin_lock(&lag_lock);
tracker = ldev->tracker;
- mutex_unlock(&lag_mutex);
+ spin_unlock(&lag_lock);
- do_bond = tracker.is_bonded && ldev->allowed;
+ do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
- if (do_bond && !mlx5_lag_is_bonded(ldev)) {
- for (i = 0; i < MLX5_MAX_PORTS; i++)
- mlx5_remove_dev_by_protocol(ldev->pf[i].dev,
- MLX5_INTERFACE_PROTOCOL_IB);
+ if (do_bond && !__mlx5_lag_is_active(ldev)) {
+ roce_lag = !mlx5_sriov_is_enabled(dev0) &&
+ !mlx5_sriov_is_enabled(dev1);
- mlx5_activate_lag(ldev, &tracker);
+#ifdef CONFIG_MLX5_ESWITCH
+ roce_lag &= dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
+ dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE;
+#endif
- mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
- mlx5_nic_vport_enable_roce(dev1);
- } else if (do_bond && mlx5_lag_is_bonded(ldev)) {
- mlx5_infer_tx_affinity_mapping(&tracker, &v2p_port1,
- &v2p_port2);
+ if (roce_lag)
+ mlx5_lag_remove_ib_devices(ldev);
- if ((v2p_port1 != ldev->v2p_map[0]) ||
- (v2p_port2 != ldev->v2p_map[1])) {
- ldev->v2p_map[0] = v2p_port1;
- ldev->v2p_map[1] = v2p_port2;
+ err = mlx5_activate_lag(ldev, &tracker,
+ roce_lag ? MLX5_LAG_FLAG_ROCE :
+ MLX5_LAG_FLAG_SRIOV);
+ if (err) {
+ if (roce_lag)
+ mlx5_lag_add_ib_devices(ldev);
- err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
- if (err)
- mlx5_core_err(dev0,
- "Failed to modify LAG (%d)\n",
- err);
+ return;
}
- } else if (!do_bond && mlx5_lag_is_bonded(ldev)) {
- mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
- mlx5_nic_vport_disable_roce(dev1);
- mlx5_deactivate_lag(ldev);
+ if (roce_lag) {
+ mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
+ mlx5_nic_vport_enable_roce(dev1);
+ }
+ } else if (do_bond && __mlx5_lag_is_active(ldev)) {
+ mlx5_modify_lag(ldev, &tracker);
+ } else if (!do_bond && __mlx5_lag_is_active(ldev)) {
+ roce_lag = __mlx5_lag_is_roce(ldev);
- for (i = 0; i < MLX5_MAX_PORTS; i++)
- if (ldev->pf[i].dev)
- mlx5_add_dev_by_protocol(ldev->pf[i].dev,
- MLX5_INTERFACE_PROTOCOL_IB);
+ if (roce_lag) {
+ mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
+ mlx5_nic_vport_disable_roce(dev1);
+ }
+
+ err = mlx5_deactivate_lag(ldev);
+ if (err)
+ return;
+
+ if (roce_lag)
+ mlx5_lag_add_ib_devices(ldev);
}
}
static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
{
- schedule_delayed_work(&ldev->bond_work, delay);
+ queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
}
static void mlx5_do_bond_work(struct work_struct *work)
@@ -303,9 +362,10 @@
{
struct net_device *upper = info->upper_dev, *ndev_tmp;
struct netdev_lag_upper_info *lag_upper_info = NULL;
- bool is_bonded;
+ bool is_bonded, is_in_lag, mode_supported;
int bond_status = 0;
int num_slaves = 0;
+ int changed = 0;
int idx;
if (!netif_is_lag_master(upper))
@@ -322,7 +382,7 @@
rcu_read_lock();
for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
- if (idx > -1)
+ if (idx >= 0)
bond_status |= (1 << idx);
num_slaves++;
@@ -339,19 +399,30 @@
/* Determine bonding status:
* A device is considered bonded if both its physical ports are slaves
* of the same lag master, and only them.
- * Lag mode must be activebackup or hash.
*/
- is_bonded = (num_slaves == MLX5_MAX_PORTS) &&
- (bond_status == 0x3) &&
- ((tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) ||
- (tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH));
+ is_in_lag = num_slaves == MLX5_MAX_PORTS && bond_status == 0x3;
+ /* Lag mode must be activebackup or hash. */
+ mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
+ tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
+
+ is_bonded = is_in_lag && mode_supported;
if (tracker->is_bonded != is_bonded) {
tracker->is_bonded = is_bonded;
- return 1;
+ changed = 1;
}
- return 0;
+ if (!is_in_lag)
+ return changed;
+
+ if (!mlx5_lag_is_ready(ldev))
+ NL_SET_ERR_MSG_MOD(info->info.extack,
+ "Can't activate LAG offload, PF is configured with more than 64 VFs");
+ else if (!mode_supported)
+ NL_SET_ERR_MSG_MOD(info->info.extack,
+ "Can't activate LAG offload, TX type isn't supported");
+
+ return changed;
}
static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
@@ -366,7 +437,7 @@
return 0;
idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
- if (idx == -1)
+ if (idx < 0)
return 0;
/* This information is used to determine virtual to physical
@@ -389,13 +460,11 @@
struct mlx5_lag *ldev;
int changed = 0;
- if (!net_eq(dev_net(ndev), &init_net))
- return NOTIFY_DONE;
-
if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE))
return NOTIFY_DONE;
ldev = container_of(this, struct mlx5_lag, nb);
+
tracker = ldev->tracker;
switch (event) {
@@ -409,23 +478,14 @@
break;
}
- mutex_lock(&lag_mutex);
+ spin_lock(&lag_lock);
ldev->tracker = tracker;
- mutex_unlock(&lag_mutex);
+ spin_unlock(&lag_lock);
if (changed)
mlx5_queue_bond_work(ldev, 0);
return NOTIFY_DONE;
-}
-
-static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
-{
- if ((ldev->pf[0].dev && mlx5_sriov_is_enabled(ldev->pf[0].dev)) ||
- (ldev->pf[1].dev && mlx5_sriov_is_enabled(ldev->pf[1].dev)))
- return false;
- else
- return true;
}
static struct mlx5_lag *mlx5_lag_dev_alloc(void)
@@ -436,36 +496,43 @@
if (!ldev)
return NULL;
+ ldev->wq = create_singlethread_workqueue("mlx5_lag");
+ if (!ldev->wq) {
+ kfree(ldev);
+ return NULL;
+ }
+
INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
- ldev->allowed = mlx5_lag_check_prereq(ldev);
return ldev;
}
static void mlx5_lag_dev_free(struct mlx5_lag *ldev)
{
+ destroy_workqueue(ldev->wq);
kfree(ldev);
}
-static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev,
- struct mlx5_core_dev *dev,
- struct net_device *netdev)
+static int mlx5_lag_dev_add_pf(struct mlx5_lag *ldev,
+ struct mlx5_core_dev *dev,
+ struct net_device *netdev)
{
unsigned int fn = PCI_FUNC(dev->pdev->devfn);
if (fn >= MLX5_MAX_PORTS)
- return;
+ return -EPERM;
- mutex_lock(&lag_mutex);
+ spin_lock(&lag_lock);
ldev->pf[fn].dev = dev;
ldev->pf[fn].netdev = netdev;
ldev->tracker.netdev_state[fn].link_up = 0;
ldev->tracker.netdev_state[fn].tx_enabled = 0;
- ldev->allowed = mlx5_lag_check_prereq(ldev);
dev->priv.lag = ldev;
- mutex_unlock(&lag_mutex);
+ spin_unlock(&lag_lock);
+
+ return fn;
}
static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev,
@@ -480,12 +547,11 @@
if (i == MLX5_MAX_PORTS)
return;
- mutex_lock(&lag_mutex);
+ spin_lock(&lag_lock);
memset(&ldev->pf[i], 0, sizeof(*ldev->pf));
dev->priv.lag = NULL;
- ldev->allowed = mlx5_lag_check_prereq(ldev);
- mutex_unlock(&lag_mutex);
+ spin_unlock(&lag_lock);
}
/* Must be called with intf_mutex held */
@@ -493,10 +559,11 @@
{
struct mlx5_lag *ldev = NULL;
struct mlx5_core_dev *tmp_dev;
+ int i, err;
if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
!MLX5_CAP_GEN(dev, lag_master) ||
- (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS))
+ MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)
return;
tmp_dev = mlx5_get_next_phys_dev(dev);
@@ -511,15 +578,28 @@
}
}
- mlx5_lag_dev_add_pf(ldev, dev, netdev);
+ if (mlx5_lag_dev_add_pf(ldev, dev, netdev) < 0)
+ return;
+
+ for (i = 0; i < MLX5_MAX_PORTS; i++)
+ if (!ldev->pf[i].dev)
+ break;
+
+ if (i >= MLX5_MAX_PORTS)
+ ldev->flags |= MLX5_LAG_FLAG_READY;
if (!ldev->nb.notifier_call) {
ldev->nb.notifier_call = mlx5_lag_netdev_event;
- if (register_netdevice_notifier(&ldev->nb)) {
+ if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
ldev->nb.notifier_call = NULL;
mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
}
}
+
+ err = mlx5_lag_mp_init(ldev);
+ if (err)
+ mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
+ err);
}
/* Must be called with intf_mutex held */
@@ -532,73 +612,83 @@
if (!ldev)
return;
- if (mlx5_lag_is_bonded(ldev))
+ if (__mlx5_lag_is_active(ldev))
mlx5_deactivate_lag(ldev);
mlx5_lag_dev_remove_pf(ldev, dev);
+
+ ldev->flags &= ~MLX5_LAG_FLAG_READY;
for (i = 0; i < MLX5_MAX_PORTS; i++)
if (ldev->pf[i].dev)
break;
if (i == MLX5_MAX_PORTS) {
- if (ldev->nb.notifier_call)
- unregister_netdevice_notifier(&ldev->nb);
+ if (ldev->nb.notifier_call) {
+ unregister_netdevice_notifier_net(&init_net, &ldev->nb);
+ ldev->nb.notifier_call = NULL;
+ }
+ mlx5_lag_mp_cleanup(ldev);
cancel_delayed_work_sync(&ldev->bond_work);
mlx5_lag_dev_free(ldev);
}
}
+
+bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+ bool res;
+
+ spin_lock(&lag_lock);
+ ldev = mlx5_lag_dev_get(dev);
+ res = ldev && __mlx5_lag_is_roce(ldev);
+ spin_unlock(&lag_lock);
+
+ return res;
+}
+EXPORT_SYMBOL(mlx5_lag_is_roce);
bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
bool res;
- mutex_lock(&lag_mutex);
+ spin_lock(&lag_lock);
ldev = mlx5_lag_dev_get(dev);
- res = ldev && mlx5_lag_is_bonded(ldev);
- mutex_unlock(&lag_mutex);
+ res = ldev && __mlx5_lag_is_active(ldev);
+ spin_unlock(&lag_lock);
return res;
}
EXPORT_SYMBOL(mlx5_lag_is_active);
-static int mlx5_lag_set_state(struct mlx5_core_dev *dev, bool allow)
+bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
- int ret = 0;
- bool lag_active;
+ bool res;
+
+ spin_lock(&lag_lock);
+ ldev = mlx5_lag_dev_get(dev);
+ res = ldev && __mlx5_lag_is_sriov(ldev);
+ spin_unlock(&lag_lock);
+
+ return res;
+}
+EXPORT_SYMBOL(mlx5_lag_is_sriov);
+
+void mlx5_lag_update(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
mlx5_dev_list_lock();
-
ldev = mlx5_lag_dev_get(dev);
- if (!ldev) {
- ret = -ENODEV;
+ if (!ldev)
goto unlock;
- }
- lag_active = mlx5_lag_is_bonded(ldev);
- if (!mlx5_lag_check_prereq(ldev) && allow) {
- ret = -EINVAL;
- goto unlock;
- }
- if (ldev->allowed == allow)
- goto unlock;
- ldev->allowed = allow;
- if ((lag_active && !allow) || allow)
- mlx5_do_bond(ldev);
+
+ mlx5_do_bond(ldev);
+
unlock:
mlx5_dev_list_unlock();
- return ret;
-}
-
-int mlx5_lag_forbid(struct mlx5_core_dev *dev)
-{
- return mlx5_lag_set_state(dev, false);
-}
-
-int mlx5_lag_allow(struct mlx5_core_dev *dev)
-{
- return mlx5_lag_set_state(dev, true);
}
struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
@@ -606,27 +696,52 @@
struct net_device *ndev = NULL;
struct mlx5_lag *ldev;
- mutex_lock(&lag_mutex);
+ spin_lock(&lag_lock);
ldev = mlx5_lag_dev_get(dev);
- if (!(ldev && mlx5_lag_is_bonded(ldev)))
+ if (!(ldev && __mlx5_lag_is_roce(ldev)))
goto unlock;
if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
- ndev = ldev->tracker.netdev_state[0].tx_enabled ?
- ldev->pf[0].netdev : ldev->pf[1].netdev;
+ ndev = ldev->tracker.netdev_state[MLX5_LAG_P1].tx_enabled ?
+ ldev->pf[MLX5_LAG_P1].netdev :
+ ldev->pf[MLX5_LAG_P2].netdev;
} else {
- ndev = ldev->pf[0].netdev;
+ ndev = ldev->pf[MLX5_LAG_P1].netdev;
}
if (ndev)
dev_hold(ndev);
unlock:
- mutex_unlock(&lag_mutex);
+ spin_unlock(&lag_lock);
return ndev;
}
EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
+
+u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
+ struct net_device *slave)
+{
+ struct mlx5_lag *ldev;
+ u8 port = 0;
+
+ spin_lock(&lag_lock);
+ ldev = mlx5_lag_dev_get(dev);
+ if (!(ldev && __mlx5_lag_is_roce(ldev)))
+ goto unlock;
+
+ if (ldev->pf[MLX5_LAG_P1].netdev == slave)
+ port = MLX5_LAG_P1;
+ else
+ port = MLX5_LAG_P2;
+
+ port = ldev->v2p_map[port];
+
+unlock:
+ spin_unlock(&lag_lock);
+ return port;
+}
+EXPORT_SYMBOL(mlx5_lag_get_slave_port);
bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv)
{
@@ -638,7 +753,8 @@
return true;
ldev = mlx5_lag_dev_get(dev);
- if (!ldev || !mlx5_lag_is_bonded(ldev) || ldev->pf[0].dev == dev)
+ if (!ldev || !__mlx5_lag_is_roce(ldev) ||
+ ldev->pf[MLX5_LAG_P1].dev == dev)
return true;
/* If bonded, we do not add an IB device for PF1. */
@@ -663,28 +779,33 @@
memset(values, 0, sizeof(*values) * num_counters);
- mutex_lock(&lag_mutex);
+ spin_lock(&lag_lock);
ldev = mlx5_lag_dev_get(dev);
- if (ldev && mlx5_lag_is_bonded(ldev)) {
+ if (ldev && __mlx5_lag_is_roce(ldev)) {
num_ports = MLX5_MAX_PORTS;
- mdev[0] = ldev->pf[0].dev;
- mdev[1] = ldev->pf[1].dev;
+ mdev[MLX5_LAG_P1] = ldev->pf[MLX5_LAG_P1].dev;
+ mdev[MLX5_LAG_P2] = ldev->pf[MLX5_LAG_P2].dev;
} else {
num_ports = 1;
- mdev[0] = dev;
+ mdev[MLX5_LAG_P1] = dev;
}
+ spin_unlock(&lag_lock);
for (i = 0; i < num_ports; ++i) {
- ret = mlx5_cmd_query_cong_counter(mdev[i], false, out, outlen);
+ u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
+
+ MLX5_SET(query_cong_statistics_in, in, opcode,
+ MLX5_CMD_OP_QUERY_CONG_STATISTICS);
+ ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
+ out);
if (ret)
- goto unlock;
+ goto free;
for (j = 0; j < num_counters; ++j)
values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
}
-unlock:
- mutex_unlock(&lag_mutex);
+free:
kvfree(out);
return ret;
}
--
Gitblit v1.6.2