hc
2024-01-03 2f7c68cb55ecb7331f2381deb497c27155f32faf
kernel/drivers/net/ethernet/mellanox/mlx5/core/lag.c
....@@ -34,51 +34,20 @@
3434 #include <linux/mlx5/driver.h>
3535 #include <linux/mlx5/vport.h>
3636 #include "mlx5_core.h"
37
-
38
-enum {
39
- MLX5_LAG_FLAG_BONDED = 1 << 0,
40
-};
41
-
42
-struct lag_func {
43
- struct mlx5_core_dev *dev;
44
- struct net_device *netdev;
45
-};
46
-
47
-/* Used for collection of netdev event info. */
48
-struct lag_tracker {
49
- enum netdev_lag_tx_type tx_type;
50
- struct netdev_lag_lower_state_info netdev_state[MLX5_MAX_PORTS];
51
- bool is_bonded;
52
-};
53
-
54
-/* LAG data of a ConnectX card.
55
- * It serves both its phys functions.
56
- */
57
-struct mlx5_lag {
58
- u8 flags;
59
- u8 v2p_map[MLX5_MAX_PORTS];
60
- struct lag_func pf[MLX5_MAX_PORTS];
61
- struct lag_tracker tracker;
62
- struct delayed_work bond_work;
63
- struct notifier_block nb;
64
-
65
- /* Admin state. Allow lag only if allowed is true
66
- * even if network conditions for lag were met
67
- */
68
- bool allowed;
69
-};
37
+#include "eswitch.h"
38
+#include "lag.h"
39
+#include "lag_mp.h"
7040
7141 /* General purpose, use for short periods of time.
7242 * Beware of lock dependencies (preferably, no locks should be acquired
7343 * under it).
7444 */
75
-static DEFINE_MUTEX(lag_mutex);
45
+static DEFINE_SPINLOCK(lag_lock);
7646
7747 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
7848 u8 remap_port2)
7949 {
80
- u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {0};
81
- u32 out[MLX5_ST_SZ_DW(create_lag_out)] = {0};
50
+ u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
8251 void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
8352
8453 MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
....@@ -86,14 +55,13 @@
8655 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
8756 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
8857
89
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
58
+ return mlx5_cmd_exec_in(dev, create_lag, in);
9059 }
9160
9261 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1,
9362 u8 remap_port2)
9463 {
95
- u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {0};
96
- u32 out[MLX5_ST_SZ_DW(modify_lag_out)] = {0};
64
+ u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
9765 void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
9866
9967 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
....@@ -102,59 +70,31 @@
10270 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
10371 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
10472
105
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
106
-}
107
-
108
-static int mlx5_cmd_destroy_lag(struct mlx5_core_dev *dev)
109
-{
110
- u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {0};
111
- u32 out[MLX5_ST_SZ_DW(destroy_lag_out)] = {0};
112
-
113
- MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
114
-
115
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
73
+ return mlx5_cmd_exec_in(dev, modify_lag, in);
11674 }
11775
11876 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
11977 {
120
- u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {0};
121
- u32 out[MLX5_ST_SZ_DW(create_vport_lag_out)] = {0};
78
+ u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
12279
12380 MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
12481
125
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
82
+ return mlx5_cmd_exec_in(dev, create_vport_lag, in);
12683 }
12784 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
12885
12986 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
13087 {
131
- u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {0};
132
- u32 out[MLX5_ST_SZ_DW(destroy_vport_lag_out)] = {0};
88
+ u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
13389
13490 MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
13591
136
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
92
+ return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
13793 }
13894 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
13995
140
-static int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
141
- bool reset, void *out, int out_size)
142
-{
143
- u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = { };
144
-
145
- MLX5_SET(query_cong_statistics_in, in, opcode,
146
- MLX5_CMD_OP_QUERY_CONG_STATISTICS);
147
- MLX5_SET(query_cong_statistics_in, in, clear, reset);
148
- return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
149
-}
150
-
151
-static struct mlx5_lag *mlx5_lag_dev_get(struct mlx5_core_dev *dev)
152
-{
153
- return dev->priv.lag;
154
-}
155
-
156
-static int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
157
- struct net_device *ndev)
96
+int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
97
+ struct net_device *ndev)
15898 {
15999 int i;
160100
....@@ -162,120 +102,239 @@
162102 if (ldev->pf[i].netdev == ndev)
163103 return i;
164104
165
- return -1;
105
+ return -ENOENT;
166106 }
167107
168
-static bool mlx5_lag_is_bonded(struct mlx5_lag *ldev)
108
+static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
169109 {
170
- return !!(ldev->flags & MLX5_LAG_FLAG_BONDED);
110
+ return !!(ldev->flags & MLX5_LAG_FLAG_ROCE);
111
+}
112
+
113
+static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
114
+{
115
+ return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV);
171116 }
172117
173118 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
174119 u8 *port1, u8 *port2)
175120 {
121
+ bool p1en;
122
+ bool p2en;
123
+
124
+ p1en = tracker->netdev_state[MLX5_LAG_P1].tx_enabled &&
125
+ tracker->netdev_state[MLX5_LAG_P1].link_up;
126
+
127
+ p2en = tracker->netdev_state[MLX5_LAG_P2].tx_enabled &&
128
+ tracker->netdev_state[MLX5_LAG_P2].link_up;
129
+
176130 *port1 = 1;
177131 *port2 = 2;
178
- if (!tracker->netdev_state[0].tx_enabled ||
179
- !tracker->netdev_state[0].link_up) {
180
- *port1 = 2;
132
+ if ((!p1en && !p2en) || (p1en && p2en))
181133 return;
182
- }
183134
184
- if (!tracker->netdev_state[1].tx_enabled ||
185
- !tracker->netdev_state[1].link_up)
135
+ if (p1en)
186136 *port2 = 1;
137
+ else
138
+ *port1 = 2;
187139 }
188140
189
-static void mlx5_activate_lag(struct mlx5_lag *ldev,
190
- struct lag_tracker *tracker)
141
+void mlx5_modify_lag(struct mlx5_lag *ldev,
142
+ struct lag_tracker *tracker)
191143 {
192
- struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
144
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
145
+ u8 v2p_port1, v2p_port2;
193146 int err;
194147
195
- ldev->flags |= MLX5_LAG_FLAG_BONDED;
148
+ mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1,
149
+ &v2p_port2);
196150
197
- mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[0],
198
- &ldev->v2p_map[1]);
151
+ if (v2p_port1 != ldev->v2p_map[MLX5_LAG_P1] ||
152
+ v2p_port2 != ldev->v2p_map[MLX5_LAG_P2]) {
153
+ ldev->v2p_map[MLX5_LAG_P1] = v2p_port1;
154
+ ldev->v2p_map[MLX5_LAG_P2] = v2p_port2;
199155
200
- err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[0], ldev->v2p_map[1]);
156
+ mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d",
157
+ ldev->v2p_map[MLX5_LAG_P1],
158
+ ldev->v2p_map[MLX5_LAG_P2]);
159
+
160
+ err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
161
+ if (err)
162
+ mlx5_core_err(dev0,
163
+ "Failed to modify LAG (%d)\n",
164
+ err);
165
+ }
166
+}
167
+
168
+static int mlx5_create_lag(struct mlx5_lag *ldev,
169
+ struct lag_tracker *tracker)
170
+{
171
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
172
+ int err;
173
+
174
+ mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1],
175
+ &ldev->v2p_map[MLX5_LAG_P2]);
176
+
177
+ mlx5_core_info(dev0, "lag map port 1:%d port 2:%d",
178
+ ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2]);
179
+
180
+ err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1],
181
+ ldev->v2p_map[MLX5_LAG_P2]);
201182 if (err)
202183 mlx5_core_err(dev0,
203184 "Failed to create LAG (%d)\n",
204185 err);
186
+ return err;
205187 }
206188
207
-static void mlx5_deactivate_lag(struct mlx5_lag *ldev)
189
+int mlx5_activate_lag(struct mlx5_lag *ldev,
190
+ struct lag_tracker *tracker,
191
+ u8 flags)
208192 {
209
- struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
193
+ bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
194
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
210195 int err;
211196
212
- ldev->flags &= ~MLX5_LAG_FLAG_BONDED;
197
+ err = mlx5_create_lag(ldev, tracker);
198
+ if (err) {
199
+ if (roce_lag) {
200
+ mlx5_core_err(dev0,
201
+ "Failed to activate RoCE LAG\n");
202
+ } else {
203
+ mlx5_core_err(dev0,
204
+ "Failed to activate VF LAG\n"
205
+ "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
206
+ }
207
+ return err;
208
+ }
213209
214
- err = mlx5_cmd_destroy_lag(dev0);
215
- if (err)
216
- mlx5_core_err(dev0,
217
- "Failed to destroy LAG (%d)\n",
218
- err);
210
+ ldev->flags |= flags;
211
+ return 0;
212
+}
213
+
214
+static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
215
+{
216
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
217
+ u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
218
+ bool roce_lag = __mlx5_lag_is_roce(ldev);
219
+ int err;
220
+
221
+ ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
222
+
223
+ MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
224
+ err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
225
+ if (err) {
226
+ if (roce_lag) {
227
+ mlx5_core_err(dev0,
228
+ "Failed to deactivate RoCE LAG; driver restart required\n");
229
+ } else {
230
+ mlx5_core_err(dev0,
231
+ "Failed to deactivate VF LAG; driver restart required\n"
232
+ "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
233
+ }
234
+ }
235
+
236
+ return err;
237
+}
238
+
239
+static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
240
+{
241
+ if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev)
242
+ return false;
243
+
244
+#ifdef CONFIG_MLX5_ESWITCH
245
+ return mlx5_esw_lag_prereq(ldev->pf[MLX5_LAG_P1].dev,
246
+ ldev->pf[MLX5_LAG_P2].dev);
247
+#else
248
+ return (!mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P1].dev) &&
249
+ !mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P2].dev));
250
+#endif
251
+}
252
+
253
+static void mlx5_lag_add_ib_devices(struct mlx5_lag *ldev)
254
+{
255
+ int i;
256
+
257
+ for (i = 0; i < MLX5_MAX_PORTS; i++)
258
+ if (ldev->pf[i].dev)
259
+ mlx5_add_dev_by_protocol(ldev->pf[i].dev,
260
+ MLX5_INTERFACE_PROTOCOL_IB);
261
+}
262
+
263
+static void mlx5_lag_remove_ib_devices(struct mlx5_lag *ldev)
264
+{
265
+ int i;
266
+
267
+ for (i = 0; i < MLX5_MAX_PORTS; i++)
268
+ if (ldev->pf[i].dev)
269
+ mlx5_remove_dev_by_protocol(ldev->pf[i].dev,
270
+ MLX5_INTERFACE_PROTOCOL_IB);
219271 }
220272
221273 static void mlx5_do_bond(struct mlx5_lag *ldev)
222274 {
223
- struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
224
- struct mlx5_core_dev *dev1 = ldev->pf[1].dev;
225
- struct lag_tracker tracker;
226
- u8 v2p_port1, v2p_port2;
227
- int i, err;
228
- bool do_bond;
275
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
276
+ struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
277
+ struct lag_tracker tracker = { };
278
+ bool do_bond, roce_lag;
279
+ int err;
229280
230
- if (!dev0 || !dev1)
281
+ if (!mlx5_lag_is_ready(ldev))
231282 return;
232283
233
- mutex_lock(&lag_mutex);
284
+ spin_lock(&lag_lock);
234285 tracker = ldev->tracker;
235
- mutex_unlock(&lag_mutex);
286
+ spin_unlock(&lag_lock);
236287
237
- do_bond = tracker.is_bonded && ldev->allowed;
288
+ do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
238289
239
- if (do_bond && !mlx5_lag_is_bonded(ldev)) {
240
- for (i = 0; i < MLX5_MAX_PORTS; i++)
241
- mlx5_remove_dev_by_protocol(ldev->pf[i].dev,
242
- MLX5_INTERFACE_PROTOCOL_IB);
290
+ if (do_bond && !__mlx5_lag_is_active(ldev)) {
291
+ roce_lag = !mlx5_sriov_is_enabled(dev0) &&
292
+ !mlx5_sriov_is_enabled(dev1);
243293
244
- mlx5_activate_lag(ldev, &tracker);
294
+#ifdef CONFIG_MLX5_ESWITCH
295
+ roce_lag &= dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
296
+ dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE;
297
+#endif
245298
246
- mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
247
- mlx5_nic_vport_enable_roce(dev1);
248
- } else if (do_bond && mlx5_lag_is_bonded(ldev)) {
249
- mlx5_infer_tx_affinity_mapping(&tracker, &v2p_port1,
250
- &v2p_port2);
299
+ if (roce_lag)
300
+ mlx5_lag_remove_ib_devices(ldev);
251301
252
- if ((v2p_port1 != ldev->v2p_map[0]) ||
253
- (v2p_port2 != ldev->v2p_map[1])) {
254
- ldev->v2p_map[0] = v2p_port1;
255
- ldev->v2p_map[1] = v2p_port2;
302
+ err = mlx5_activate_lag(ldev, &tracker,
303
+ roce_lag ? MLX5_LAG_FLAG_ROCE :
304
+ MLX5_LAG_FLAG_SRIOV);
305
+ if (err) {
306
+ if (roce_lag)
307
+ mlx5_lag_add_ib_devices(ldev);
256308
257
- err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
258
- if (err)
259
- mlx5_core_err(dev0,
260
- "Failed to modify LAG (%d)\n",
261
- err);
309
+ return;
262310 }
263
- } else if (!do_bond && mlx5_lag_is_bonded(ldev)) {
264
- mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
265
- mlx5_nic_vport_disable_roce(dev1);
266311
267
- mlx5_deactivate_lag(ldev);
312
+ if (roce_lag) {
313
+ mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
314
+ mlx5_nic_vport_enable_roce(dev1);
315
+ }
316
+ } else if (do_bond && __mlx5_lag_is_active(ldev)) {
317
+ mlx5_modify_lag(ldev, &tracker);
318
+ } else if (!do_bond && __mlx5_lag_is_active(ldev)) {
319
+ roce_lag = __mlx5_lag_is_roce(ldev);
268320
269
- for (i = 0; i < MLX5_MAX_PORTS; i++)
270
- if (ldev->pf[i].dev)
271
- mlx5_add_dev_by_protocol(ldev->pf[i].dev,
272
- MLX5_INTERFACE_PROTOCOL_IB);
321
+ if (roce_lag) {
322
+ mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
323
+ mlx5_nic_vport_disable_roce(dev1);
324
+ }
325
+
326
+ err = mlx5_deactivate_lag(ldev);
327
+ if (err)
328
+ return;
329
+
330
+ if (roce_lag)
331
+ mlx5_lag_add_ib_devices(ldev);
273332 }
274333 }
275334
276335 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
277336 {
278
- schedule_delayed_work(&ldev->bond_work, delay);
337
+ queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
279338 }
280339
281340 static void mlx5_do_bond_work(struct work_struct *work)
....@@ -303,9 +362,10 @@
303362 {
304363 struct net_device *upper = info->upper_dev, *ndev_tmp;
305364 struct netdev_lag_upper_info *lag_upper_info = NULL;
306
- bool is_bonded;
365
+ bool is_bonded, is_in_lag, mode_supported;
307366 int bond_status = 0;
308367 int num_slaves = 0;
368
+ int changed = 0;
309369 int idx;
310370
311371 if (!netif_is_lag_master(upper))
....@@ -322,7 +382,7 @@
322382 rcu_read_lock();
323383 for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
324384 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
325
- if (idx > -1)
385
+ if (idx >= 0)
326386 bond_status |= (1 << idx);
327387
328388 num_slaves++;
....@@ -339,19 +399,30 @@
339399 /* Determine bonding status:
340400 * A device is considered bonded if both its physical ports are slaves
341401 * of the same lag master, and only them.
342
- * Lag mode must be activebackup or hash.
343402 */
344
- is_bonded = (num_slaves == MLX5_MAX_PORTS) &&
345
- (bond_status == 0x3) &&
346
- ((tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) ||
347
- (tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH));
403
+ is_in_lag = num_slaves == MLX5_MAX_PORTS && bond_status == 0x3;
348404
405
+ /* Lag mode must be activebackup or hash. */
406
+ mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
407
+ tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
408
+
409
+ is_bonded = is_in_lag && mode_supported;
349410 if (tracker->is_bonded != is_bonded) {
350411 tracker->is_bonded = is_bonded;
351
- return 1;
412
+ changed = 1;
352413 }
353414
354
- return 0;
415
+ if (!is_in_lag)
416
+ return changed;
417
+
418
+ if (!mlx5_lag_is_ready(ldev))
419
+ NL_SET_ERR_MSG_MOD(info->info.extack,
420
+ "Can't activate LAG offload, PF is configured with more than 64 VFs");
421
+ else if (!mode_supported)
422
+ NL_SET_ERR_MSG_MOD(info->info.extack,
423
+ "Can't activate LAG offload, TX type isn't supported");
424
+
425
+ return changed;
355426 }
356427
357428 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
....@@ -366,7 +437,7 @@
366437 return 0;
367438
368439 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
369
- if (idx == -1)
440
+ if (idx < 0)
370441 return 0;
371442
372443 /* This information is used to determine virtual to physical
....@@ -389,13 +460,11 @@
389460 struct mlx5_lag *ldev;
390461 int changed = 0;
391462
392
- if (!net_eq(dev_net(ndev), &init_net))
393
- return NOTIFY_DONE;
394
-
395463 if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE))
396464 return NOTIFY_DONE;
397465
398466 ldev = container_of(this, struct mlx5_lag, nb);
467
+
399468 tracker = ldev->tracker;
400469
401470 switch (event) {
....@@ -409,23 +478,14 @@
409478 break;
410479 }
411480
412
- mutex_lock(&lag_mutex);
481
+ spin_lock(&lag_lock);
413482 ldev->tracker = tracker;
414
- mutex_unlock(&lag_mutex);
483
+ spin_unlock(&lag_lock);
415484
416485 if (changed)
417486 mlx5_queue_bond_work(ldev, 0);
418487
419488 return NOTIFY_DONE;
420
-}
421
-
422
-static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
423
-{
424
- if ((ldev->pf[0].dev && mlx5_sriov_is_enabled(ldev->pf[0].dev)) ||
425
- (ldev->pf[1].dev && mlx5_sriov_is_enabled(ldev->pf[1].dev)))
426
- return false;
427
- else
428
- return true;
429489 }
430490
431491 static struct mlx5_lag *mlx5_lag_dev_alloc(void)
....@@ -436,36 +496,43 @@
436496 if (!ldev)
437497 return NULL;
438498
499
+ ldev->wq = create_singlethread_workqueue("mlx5_lag");
500
+ if (!ldev->wq) {
501
+ kfree(ldev);
502
+ return NULL;
503
+ }
504
+
439505 INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
440
- ldev->allowed = mlx5_lag_check_prereq(ldev);
441506
442507 return ldev;
443508 }
444509
445510 static void mlx5_lag_dev_free(struct mlx5_lag *ldev)
446511 {
512
+ destroy_workqueue(ldev->wq);
447513 kfree(ldev);
448514 }
449515
450
-static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev,
451
- struct mlx5_core_dev *dev,
452
- struct net_device *netdev)
516
+static int mlx5_lag_dev_add_pf(struct mlx5_lag *ldev,
517
+ struct mlx5_core_dev *dev,
518
+ struct net_device *netdev)
453519 {
454520 unsigned int fn = PCI_FUNC(dev->pdev->devfn);
455521
456522 if (fn >= MLX5_MAX_PORTS)
457
- return;
523
+ return -EPERM;
458524
459
- mutex_lock(&lag_mutex);
525
+ spin_lock(&lag_lock);
460526 ldev->pf[fn].dev = dev;
461527 ldev->pf[fn].netdev = netdev;
462528 ldev->tracker.netdev_state[fn].link_up = 0;
463529 ldev->tracker.netdev_state[fn].tx_enabled = 0;
464530
465
- ldev->allowed = mlx5_lag_check_prereq(ldev);
466531 dev->priv.lag = ldev;
467532
468
- mutex_unlock(&lag_mutex);
533
+ spin_unlock(&lag_lock);
534
+
535
+ return fn;
469536 }
470537
471538 static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev,
....@@ -480,12 +547,11 @@
480547 if (i == MLX5_MAX_PORTS)
481548 return;
482549
483
- mutex_lock(&lag_mutex);
550
+ spin_lock(&lag_lock);
484551 memset(&ldev->pf[i], 0, sizeof(*ldev->pf));
485552
486553 dev->priv.lag = NULL;
487
- ldev->allowed = mlx5_lag_check_prereq(ldev);
488
- mutex_unlock(&lag_mutex);
554
+ spin_unlock(&lag_lock);
489555 }
490556
491557 /* Must be called with intf_mutex held */
....@@ -493,10 +559,11 @@
493559 {
494560 struct mlx5_lag *ldev = NULL;
495561 struct mlx5_core_dev *tmp_dev;
562
+ int i, err;
496563
497564 if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
498565 !MLX5_CAP_GEN(dev, lag_master) ||
499
- (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS))
566
+ MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)
500567 return;
501568
502569 tmp_dev = mlx5_get_next_phys_dev(dev);
....@@ -511,15 +578,28 @@
511578 }
512579 }
513580
514
- mlx5_lag_dev_add_pf(ldev, dev, netdev);
581
+ if (mlx5_lag_dev_add_pf(ldev, dev, netdev) < 0)
582
+ return;
583
+
584
+ for (i = 0; i < MLX5_MAX_PORTS; i++)
585
+ if (!ldev->pf[i].dev)
586
+ break;
587
+
588
+ if (i >= MLX5_MAX_PORTS)
589
+ ldev->flags |= MLX5_LAG_FLAG_READY;
515590
516591 if (!ldev->nb.notifier_call) {
517592 ldev->nb.notifier_call = mlx5_lag_netdev_event;
518
- if (register_netdevice_notifier(&ldev->nb)) {
593
+ if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
519594 ldev->nb.notifier_call = NULL;
520595 mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
521596 }
522597 }
598
+
599
+ err = mlx5_lag_mp_init(ldev);
600
+ if (err)
601
+ mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
602
+ err);
523603 }
524604
525605 /* Must be called with intf_mutex held */
....@@ -532,73 +612,83 @@
532612 if (!ldev)
533613 return;
534614
535
- if (mlx5_lag_is_bonded(ldev))
615
+ if (__mlx5_lag_is_active(ldev))
536616 mlx5_deactivate_lag(ldev);
537617
538618 mlx5_lag_dev_remove_pf(ldev, dev);
619
+
620
+ ldev->flags &= ~MLX5_LAG_FLAG_READY;
539621
540622 for (i = 0; i < MLX5_MAX_PORTS; i++)
541623 if (ldev->pf[i].dev)
542624 break;
543625
544626 if (i == MLX5_MAX_PORTS) {
545
- if (ldev->nb.notifier_call)
546
- unregister_netdevice_notifier(&ldev->nb);
627
+ if (ldev->nb.notifier_call) {
628
+ unregister_netdevice_notifier_net(&init_net, &ldev->nb);
629
+ ldev->nb.notifier_call = NULL;
630
+ }
631
+ mlx5_lag_mp_cleanup(ldev);
547632 cancel_delayed_work_sync(&ldev->bond_work);
548633 mlx5_lag_dev_free(ldev);
549634 }
550635 }
636
+
637
+bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
638
+{
639
+ struct mlx5_lag *ldev;
640
+ bool res;
641
+
642
+ spin_lock(&lag_lock);
643
+ ldev = mlx5_lag_dev_get(dev);
644
+ res = ldev && __mlx5_lag_is_roce(ldev);
645
+ spin_unlock(&lag_lock);
646
+
647
+ return res;
648
+}
649
+EXPORT_SYMBOL(mlx5_lag_is_roce);
551650
552651 bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
553652 {
554653 struct mlx5_lag *ldev;
555654 bool res;
556655
557
- mutex_lock(&lag_mutex);
656
+ spin_lock(&lag_lock);
558657 ldev = mlx5_lag_dev_get(dev);
559
- res = ldev && mlx5_lag_is_bonded(ldev);
560
- mutex_unlock(&lag_mutex);
658
+ res = ldev && __mlx5_lag_is_active(ldev);
659
+ spin_unlock(&lag_lock);
561660
562661 return res;
563662 }
564663 EXPORT_SYMBOL(mlx5_lag_is_active);
565664
566
-static int mlx5_lag_set_state(struct mlx5_core_dev *dev, bool allow)
665
+bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
567666 {
568667 struct mlx5_lag *ldev;
569
- int ret = 0;
570
- bool lag_active;
668
+ bool res;
669
+
670
+ spin_lock(&lag_lock);
671
+ ldev = mlx5_lag_dev_get(dev);
672
+ res = ldev && __mlx5_lag_is_sriov(ldev);
673
+ spin_unlock(&lag_lock);
674
+
675
+ return res;
676
+}
677
+EXPORT_SYMBOL(mlx5_lag_is_sriov);
678
+
679
+void mlx5_lag_update(struct mlx5_core_dev *dev)
680
+{
681
+ struct mlx5_lag *ldev;
571682
572683 mlx5_dev_list_lock();
573
-
574684 ldev = mlx5_lag_dev_get(dev);
575
- if (!ldev) {
576
- ret = -ENODEV;
685
+ if (!ldev)
577686 goto unlock;
578
- }
579
- lag_active = mlx5_lag_is_bonded(ldev);
580
- if (!mlx5_lag_check_prereq(ldev) && allow) {
581
- ret = -EINVAL;
582
- goto unlock;
583
- }
584
- if (ldev->allowed == allow)
585
- goto unlock;
586
- ldev->allowed = allow;
587
- if ((lag_active && !allow) || allow)
588
- mlx5_do_bond(ldev);
687
+
688
+ mlx5_do_bond(ldev);
689
+
589690 unlock:
590691 mlx5_dev_list_unlock();
591
- return ret;
592
-}
593
-
594
-int mlx5_lag_forbid(struct mlx5_core_dev *dev)
595
-{
596
- return mlx5_lag_set_state(dev, false);
597
-}
598
-
599
-int mlx5_lag_allow(struct mlx5_core_dev *dev)
600
-{
601
- return mlx5_lag_set_state(dev, true);
602692 }
603693
604694 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
....@@ -606,27 +696,52 @@
606696 struct net_device *ndev = NULL;
607697 struct mlx5_lag *ldev;
608698
609
- mutex_lock(&lag_mutex);
699
+ spin_lock(&lag_lock);
610700 ldev = mlx5_lag_dev_get(dev);
611701
612
- if (!(ldev && mlx5_lag_is_bonded(ldev)))
702
+ if (!(ldev && __mlx5_lag_is_roce(ldev)))
613703 goto unlock;
614704
615705 if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
616
- ndev = ldev->tracker.netdev_state[0].tx_enabled ?
617
- ldev->pf[0].netdev : ldev->pf[1].netdev;
706
+ ndev = ldev->tracker.netdev_state[MLX5_LAG_P1].tx_enabled ?
707
+ ldev->pf[MLX5_LAG_P1].netdev :
708
+ ldev->pf[MLX5_LAG_P2].netdev;
618709 } else {
619
- ndev = ldev->pf[0].netdev;
710
+ ndev = ldev->pf[MLX5_LAG_P1].netdev;
620711 }
621712 if (ndev)
622713 dev_hold(ndev);
623714
624715 unlock:
625
- mutex_unlock(&lag_mutex);
716
+ spin_unlock(&lag_lock);
626717
627718 return ndev;
628719 }
629720 EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
721
+
722
+u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
723
+ struct net_device *slave)
724
+{
725
+ struct mlx5_lag *ldev;
726
+ u8 port = 0;
727
+
728
+ spin_lock(&lag_lock);
729
+ ldev = mlx5_lag_dev_get(dev);
730
+ if (!(ldev && __mlx5_lag_is_roce(ldev)))
731
+ goto unlock;
732
+
733
+ if (ldev->pf[MLX5_LAG_P1].netdev == slave)
734
+ port = MLX5_LAG_P1;
735
+ else
736
+ port = MLX5_LAG_P2;
737
+
738
+ port = ldev->v2p_map[port];
739
+
740
+unlock:
741
+ spin_unlock(&lag_lock);
742
+ return port;
743
+}
744
+EXPORT_SYMBOL(mlx5_lag_get_slave_port);
630745
631746 bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv)
632747 {
....@@ -638,7 +753,8 @@
638753 return true;
639754
640755 ldev = mlx5_lag_dev_get(dev);
641
- if (!ldev || !mlx5_lag_is_bonded(ldev) || ldev->pf[0].dev == dev)
756
+ if (!ldev || !__mlx5_lag_is_roce(ldev) ||
757
+ ldev->pf[MLX5_LAG_P1].dev == dev)
642758 return true;
643759
644760 /* If bonded, we do not add an IB device for PF1. */
....@@ -663,28 +779,33 @@
663779
664780 memset(values, 0, sizeof(*values) * num_counters);
665781
666
- mutex_lock(&lag_mutex);
782
+ spin_lock(&lag_lock);
667783 ldev = mlx5_lag_dev_get(dev);
668
- if (ldev && mlx5_lag_is_bonded(ldev)) {
784
+ if (ldev && __mlx5_lag_is_roce(ldev)) {
669785 num_ports = MLX5_MAX_PORTS;
670
- mdev[0] = ldev->pf[0].dev;
671
- mdev[1] = ldev->pf[1].dev;
786
+ mdev[MLX5_LAG_P1] = ldev->pf[MLX5_LAG_P1].dev;
787
+ mdev[MLX5_LAG_P2] = ldev->pf[MLX5_LAG_P2].dev;
672788 } else {
673789 num_ports = 1;
674
- mdev[0] = dev;
790
+ mdev[MLX5_LAG_P1] = dev;
675791 }
792
+ spin_unlock(&lag_lock);
676793
677794 for (i = 0; i < num_ports; ++i) {
678
- ret = mlx5_cmd_query_cong_counter(mdev[i], false, out, outlen);
795
+ u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
796
+
797
+ MLX5_SET(query_cong_statistics_in, in, opcode,
798
+ MLX5_CMD_OP_QUERY_CONG_STATISTICS);
799
+ ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
800
+ out);
679801 if (ret)
680
- goto unlock;
802
+ goto free;
681803
682804 for (j = 0; j < num_counters; ++j)
683805 values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
684806 }
685807
686
-unlock:
687
- mutex_unlock(&lag_mutex);
808
+free:
688809 kvfree(out);
689810 return ret;
690811 }