forked from ~ljy/RK356X_SDK_RELEASE

hc
2023-12-09 b22da3d8526a935aa31e086e63f60ff3246cb61c
kernel/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
....@@ -31,6 +31,7 @@
3131 */
3232
3333 #include <net/flow_dissector.h>
34
+#include <net/flow_offload.h>
3435 #include <net/sch_generic.h>
3536 #include <net/pkt_cls.h>
3637 #include <net/tc_act/tc_gact.h>
....@@ -38,76 +39,286 @@
3839 #include <linux/mlx5/fs.h>
3940 #include <linux/mlx5/device.h>
4041 #include <linux/rhashtable.h>
41
-#include <net/switchdev.h>
42
+#include <linux/refcount.h>
43
+#include <linux/completion.h>
4244 #include <net/tc_act/tc_mirred.h>
4345 #include <net/tc_act/tc_vlan.h>
4446 #include <net/tc_act/tc_tunnel_key.h>
4547 #include <net/tc_act/tc_pedit.h>
4648 #include <net/tc_act/tc_csum.h>
47
-#include <net/vxlan.h>
49
+#include <net/tc_act/tc_mpls.h>
4850 #include <net/arp.h>
51
+#include <net/ipv6_stubs.h>
52
+#include <net/bareudp.h>
53
+#include <net/bonding.h>
4954 #include "en.h"
5055 #include "en_rep.h"
56
+#include "en/rep/tc.h"
57
+#include "en/rep/neigh.h"
5158 #include "en_tc.h"
5259 #include "eswitch.h"
53
-#include "lib/vxlan.h"
5460 #include "fs_core.h"
5561 #include "en/port.h"
62
+#include "en/tc_tun.h"
63
+#include "en/mapping.h"
64
+#include "en/tc_ct.h"
65
+#include "en/mod_hdr.h"
66
+#include "lib/devcom.h"
67
+#include "lib/geneve.h"
68
+#include "lib/fs_chains.h"
69
+#include "diag/en_tc_tracepoint.h"
70
+#include <asm/div64.h>
5671
57
-struct mlx5_nic_flow_attr {
58
- u32 action;
59
- u32 flow_tag;
60
- u32 mod_hdr_id;
61
- u32 hairpin_tirn;
62
- u8 match_level;
63
- struct mlx5_flow_table *hairpin_ft;
64
-};
65
-
66
-#define MLX5E_TC_FLOW_BASE (MLX5E_TC_LAST_EXPORTED_BIT + 1)
72
+#define nic_chains(priv) ((priv)->fs.tc.chains)
73
+#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)
74
+#define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1)
6775
6876 enum {
69
- MLX5E_TC_FLOW_INGRESS = MLX5E_TC_INGRESS,
70
- MLX5E_TC_FLOW_EGRESS = MLX5E_TC_EGRESS,
71
- MLX5E_TC_FLOW_ESWITCH = BIT(MLX5E_TC_FLOW_BASE),
72
- MLX5E_TC_FLOW_NIC = BIT(MLX5E_TC_FLOW_BASE + 1),
73
- MLX5E_TC_FLOW_OFFLOADED = BIT(MLX5E_TC_FLOW_BASE + 2),
74
- MLX5E_TC_FLOW_HAIRPIN = BIT(MLX5E_TC_FLOW_BASE + 3),
75
- MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 4),
77
+ MLX5E_TC_FLOW_FLAG_INGRESS = MLX5E_TC_FLAG_INGRESS_BIT,
78
+ MLX5E_TC_FLOW_FLAG_EGRESS = MLX5E_TC_FLAG_EGRESS_BIT,
79
+ MLX5E_TC_FLOW_FLAG_ESWITCH = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT,
80
+ MLX5E_TC_FLOW_FLAG_FT = MLX5E_TC_FLAG_FT_OFFLOAD_BIT,
81
+ MLX5E_TC_FLOW_FLAG_NIC = MLX5E_TC_FLAG_NIC_OFFLOAD_BIT,
82
+ MLX5E_TC_FLOW_FLAG_OFFLOADED = MLX5E_TC_FLOW_BASE,
83
+ MLX5E_TC_FLOW_FLAG_HAIRPIN = MLX5E_TC_FLOW_BASE + 1,
84
+ MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS = MLX5E_TC_FLOW_BASE + 2,
85
+ MLX5E_TC_FLOW_FLAG_SLOW = MLX5E_TC_FLOW_BASE + 3,
86
+ MLX5E_TC_FLOW_FLAG_DUP = MLX5E_TC_FLOW_BASE + 4,
87
+ MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5,
88
+ MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6,
89
+ MLX5E_TC_FLOW_FLAG_CT = MLX5E_TC_FLOW_BASE + 7,
90
+ MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 8,
7691 };
7792
7893 #define MLX5E_TC_MAX_SPLITS 1
94
+
95
+/* Helper struct for accessing a struct containing list_head array.
96
+ * Containing struct
97
+ * |- Helper array
98
+ * [0] Helper item 0
99
+ * |- list_head item 0
100
+ * |- index (0)
101
+ * [1] Helper item 1
102
+ * |- list_head item 1
103
+ * |- index (1)
104
+ * To access the containing struct from one of the list_head items:
105
+ * 1. Get the helper item from the list_head item using
106
+ * helper item =
107
+ * container_of(list_head item, helper struct type, list_head field)
108
+ * 2. Get the contining struct from the helper item and its index in the array:
109
+ * containing struct =
110
+ * container_of(helper item, containing struct type, helper field[index])
111
+ */
112
+struct encap_flow_item {
113
+ struct mlx5e_encap_entry *e; /* attached encap instance */
114
+ struct list_head list;
115
+ int index;
116
+};
79117
80118 struct mlx5e_tc_flow {
81119 struct rhash_head node;
82120 struct mlx5e_priv *priv;
83121 u64 cookie;
84
- u8 flags;
122
+ unsigned long flags;
85123 struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1];
86
- struct list_head encap; /* flows sharing the same encap ID */
87
- struct list_head mod_hdr; /* flows sharing the same mod hdr ID */
124
+
125
+ /* flows sharing the same reformat object - currently mpls decap */
126
+ struct list_head l3_to_l2_reformat;
127
+ struct mlx5e_decap_entry *decap_reformat;
128
+
129
+ /* Flow can be associated with multiple encap IDs.
130
+ * The number of encaps is bounded by the number of supported
131
+ * destinations.
132
+ */
133
+ struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
134
+ struct mlx5e_tc_flow *peer_flow;
135
+ struct mlx5e_mod_hdr_handle *mh; /* attached mod header instance */
136
+ struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */
88137 struct list_head hairpin; /* flows sharing the same hairpin */
89
- union {
90
- struct mlx5_esw_flow_attr esw_attr[0];
91
- struct mlx5_nic_flow_attr nic_attr[0];
92
- };
138
+ struct list_head peer; /* flows with peer flow */
139
+ struct list_head unready; /* flows not ready to be offloaded (e.g due to missing route) */
140
+ struct net_device *orig_dev; /* netdev adding flow first */
141
+ int tmp_efi_index;
142
+ struct list_head tmp_list; /* temporary flow list used by neigh update */
143
+ refcount_t refcnt;
144
+ struct rcu_head rcu_head;
145
+ struct completion init_done;
146
+ int tunnel_id; /* the mapped tunnel id of this flow */
147
+ struct mlx5_flow_attr *attr;
93148 };
94149
95150 struct mlx5e_tc_flow_parse_attr {
96
- struct ip_tunnel_info tun_info;
151
+ const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS];
152
+ struct net_device *filter_dev;
97153 struct mlx5_flow_spec spec;
98
- int num_mod_hdr_actions;
99
- int max_mod_hdr_actions;
100
- void *mod_hdr_actions;
101
- int mirred_ifindex;
102
-};
103
-
104
-enum {
105
- MLX5_HEADER_TYPE_VXLAN = 0x0,
106
- MLX5_HEADER_TYPE_NVGRE = 0x1,
154
+ struct mlx5e_tc_mod_hdr_acts mod_hdr_acts;
155
+ int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS];
156
+ struct ethhdr eth;
107157 };
108158
109159 #define MLX5E_TC_TABLE_NUM_GROUPS 4
110
-#define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16)
160
+#define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18)
161
+
162
+struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
163
+ [CHAIN_TO_REG] = {
164
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
165
+ .moffset = 0,
166
+ .mlen = 2,
167
+ },
168
+ [TUNNEL_TO_REG] = {
169
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,
170
+ .moffset = 1,
171
+ .mlen = 3,
172
+ .soffset = MLX5_BYTE_OFF(fte_match_param,
173
+ misc_parameters_2.metadata_reg_c_1),
174
+ },
175
+ [ZONE_TO_REG] = zone_to_reg_ct,
176
+ [ZONE_RESTORE_TO_REG] = zone_restore_to_reg_ct,
177
+ [CTSTATE_TO_REG] = ctstate_to_reg_ct,
178
+ [MARK_TO_REG] = mark_to_reg_ct,
179
+ [LABELS_TO_REG] = labels_to_reg_ct,
180
+ [FTEID_TO_REG] = fteid_to_reg_ct,
181
+ /* For NIC rules we store the retore metadata directly
182
+ * into reg_b that is passed to SW since we don't
183
+ * jump between steering domains.
184
+ */
185
+ [NIC_CHAIN_TO_REG] = {
186
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B,
187
+ .moffset = 0,
188
+ .mlen = 2,
189
+ },
190
+ [NIC_ZONE_RESTORE_TO_REG] = nic_zone_restore_to_reg_ct,
191
+};
192
+
193
+static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow);
194
+
195
+void
196
+mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec,
197
+ enum mlx5e_tc_attr_to_reg type,
198
+ u32 data,
199
+ u32 mask)
200
+{
201
+ int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
202
+ int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
203
+ void *headers_c = spec->match_criteria;
204
+ void *headers_v = spec->match_value;
205
+ void *fmask, *fval;
206
+
207
+ fmask = headers_c + soffset;
208
+ fval = headers_v + soffset;
209
+
210
+ mask = (__force u32)(cpu_to_be32(mask)) >> (32 - (match_len * 8));
211
+ data = (__force u32)(cpu_to_be32(data)) >> (32 - (match_len * 8));
212
+
213
+ memcpy(fmask, &mask, match_len);
214
+ memcpy(fval, &data, match_len);
215
+
216
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
217
+}
218
+
219
+void
220
+mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec,
221
+ enum mlx5e_tc_attr_to_reg type,
222
+ u32 *data,
223
+ u32 *mask)
224
+{
225
+ int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
226
+ int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
227
+ void *headers_c = spec->match_criteria;
228
+ void *headers_v = spec->match_value;
229
+ void *fmask, *fval;
230
+
231
+ fmask = headers_c + soffset;
232
+ fval = headers_v + soffset;
233
+
234
+ memcpy(mask, fmask, match_len);
235
+ memcpy(data, fval, match_len);
236
+
237
+ *mask = be32_to_cpu((__force __be32)(*mask << (32 - (match_len * 8))));
238
+ *data = be32_to_cpu((__force __be32)(*data << (32 - (match_len * 8))));
239
+}
240
+
241
+int
242
+mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
243
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
244
+ enum mlx5_flow_namespace_type ns,
245
+ enum mlx5e_tc_attr_to_reg type,
246
+ u32 data)
247
+{
248
+ int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
249
+ int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
250
+ int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
251
+ char *modact;
252
+ int err;
253
+
254
+ err = alloc_mod_hdr_actions(mdev, ns, mod_hdr_acts);
255
+ if (err)
256
+ return err;
257
+
258
+ modact = mod_hdr_acts->actions +
259
+ (mod_hdr_acts->num_actions * MLX5_MH_ACT_SZ);
260
+
261
+ /* Firmware has 5bit length field and 0 means 32bits */
262
+ if (mlen == 4)
263
+ mlen = 0;
264
+
265
+ MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
266
+ MLX5_SET(set_action_in, modact, field, mfield);
267
+ MLX5_SET(set_action_in, modact, offset, moffset * 8);
268
+ MLX5_SET(set_action_in, modact, length, mlen * 8);
269
+ MLX5_SET(set_action_in, modact, data, data);
270
+ mod_hdr_acts->num_actions++;
271
+
272
+ return 0;
273
+}
274
+
275
+#define esw_offloads_mode(esw) (mlx5_eswitch_mode(esw) == MLX5_ESWITCH_OFFLOADS)
276
+
277
+static struct mlx5_tc_ct_priv *
278
+get_ct_priv(struct mlx5e_priv *priv)
279
+{
280
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
281
+ struct mlx5_rep_uplink_priv *uplink_priv;
282
+ struct mlx5e_rep_priv *uplink_rpriv;
283
+
284
+ if (esw_offloads_mode(esw)) {
285
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
286
+ uplink_priv = &uplink_rpriv->uplink_priv;
287
+
288
+ return uplink_priv->ct_priv;
289
+ }
290
+
291
+ return priv->fs.tc.ct;
292
+}
293
+
294
+struct mlx5_flow_handle *
295
+mlx5_tc_rule_insert(struct mlx5e_priv *priv,
296
+ struct mlx5_flow_spec *spec,
297
+ struct mlx5_flow_attr *attr)
298
+{
299
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
300
+
301
+ if (esw_offloads_mode(esw))
302
+ return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
303
+
304
+ return mlx5e_add_offloaded_nic_rule(priv, spec, attr);
305
+}
306
+
307
+void
308
+mlx5_tc_rule_delete(struct mlx5e_priv *priv,
309
+ struct mlx5_flow_handle *rule,
310
+ struct mlx5_flow_attr *attr)
311
+{
312
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
313
+
314
+ if (esw_offloads_mode(esw)) {
315
+ mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
316
+
317
+ return;
318
+ }
319
+
320
+ mlx5e_del_offloaded_nic_rule(priv, rule, attr);
321
+}
111322
112323 struct mlx5e_hairpin {
113324 struct mlx5_hairpin *pair;
....@@ -127,157 +338,180 @@
127338 /* a node of a hash table which keeps all the hairpin entries */
128339 struct hlist_node hairpin_hlist;
129340
341
+ /* protects flows list */
342
+ spinlock_t flows_lock;
130343 /* flows sharing the same hairpin */
131344 struct list_head flows;
345
+ /* hpe's that were not fully initialized when dead peer update event
346
+ * function traversed them.
347
+ */
348
+ struct list_head dead_peer_wait_list;
132349
133350 u16 peer_vhca_id;
134351 u8 prio;
135352 struct mlx5e_hairpin *hp;
353
+ refcount_t refcnt;
354
+ struct completion res_ready;
136355 };
137356
138
-struct mod_hdr_key {
139
- int num_actions;
140
- void *actions;
141
-};
357
+static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
358
+ struct mlx5e_tc_flow *flow);
142359
143
-struct mlx5e_mod_hdr_entry {
144
- /* a node of a hash table which keeps all the mod_hdr entries */
145
- struct hlist_node mod_hdr_hlist;
146
-
147
- /* flows sharing the same mod_hdr entry */
148
- struct list_head flows;
149
-
150
- struct mod_hdr_key key;
151
-
152
- u32 mod_hdr_id;
153
-};
154
-
155
-#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)
156
-
157
-static inline u32 hash_mod_hdr_info(struct mod_hdr_key *key)
360
+static struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow)
158361 {
159
- return jhash(key->actions,
160
- key->num_actions * MLX5_MH_ACT_SZ, 0);
362
+ if (!flow || !refcount_inc_not_zero(&flow->refcnt))
363
+ return ERR_PTR(-EINVAL);
364
+ return flow;
161365 }
162366
163
-static inline int cmp_mod_hdr_info(struct mod_hdr_key *a,
164
- struct mod_hdr_key *b)
367
+static void mlx5e_flow_put(struct mlx5e_priv *priv,
368
+ struct mlx5e_tc_flow *flow)
165369 {
166
- if (a->num_actions != b->num_actions)
167
- return 1;
370
+ if (refcount_dec_and_test(&flow->refcnt)) {
371
+ mlx5e_tc_del_flow(priv, flow);
372
+ kfree_rcu(flow, rcu_head);
373
+ }
374
+}
168375
169
- return memcmp(a->actions, b->actions, a->num_actions * MLX5_MH_ACT_SZ);
376
+static void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag)
377
+{
378
+ /* Complete all memory stores before setting bit. */
379
+ smp_mb__before_atomic();
380
+ set_bit(flag, &flow->flags);
381
+}
382
+
383
+#define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag)
384
+
385
+static bool __flow_flag_test_and_set(struct mlx5e_tc_flow *flow,
386
+ unsigned long flag)
387
+{
388
+ /* test_and_set_bit() provides all necessary barriers */
389
+ return test_and_set_bit(flag, &flow->flags);
390
+}
391
+
392
+#define flow_flag_test_and_set(flow, flag) \
393
+ __flow_flag_test_and_set(flow, \
394
+ MLX5E_TC_FLOW_FLAG_##flag)
395
+
396
+static void __flow_flag_clear(struct mlx5e_tc_flow *flow, unsigned long flag)
397
+{
398
+ /* Complete all memory stores before clearing bit. */
399
+ smp_mb__before_atomic();
400
+ clear_bit(flag, &flow->flags);
401
+}
402
+
403
+#define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \
404
+ MLX5E_TC_FLOW_FLAG_##flag)
405
+
406
+static bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag)
407
+{
408
+ bool ret = test_bit(flag, &flow->flags);
409
+
410
+ /* Read fields of flow structure only after checking flags. */
411
+ smp_mb__after_atomic();
412
+ return ret;
413
+}
414
+
415
+#define flow_flag_test(flow, flag) __flow_flag_test(flow, \
416
+ MLX5E_TC_FLOW_FLAG_##flag)
417
+
418
+bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow)
419
+{
420
+ return flow_flag_test(flow, ESWITCH);
421
+}
422
+
423
+static bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow)
424
+{
425
+ return flow_flag_test(flow, FT);
426
+}
427
+
428
+static bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow)
429
+{
430
+ return flow_flag_test(flow, OFFLOADED);
431
+}
432
+
433
+static int get_flow_name_space(struct mlx5e_tc_flow *flow)
434
+{
435
+ return mlx5e_is_eswitch_flow(flow) ?
436
+ MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL;
437
+}
438
+
439
+static struct mod_hdr_tbl *
440
+get_mod_hdr_table(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow)
441
+{
442
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
443
+
444
+ return get_flow_name_space(flow) == MLX5_FLOW_NAMESPACE_FDB ?
445
+ &esw->offloads.mod_hdr :
446
+ &priv->fs.tc.mod_hdr;
170447 }
171448
172449 static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
173450 struct mlx5e_tc_flow *flow,
174451 struct mlx5e_tc_flow_parse_attr *parse_attr)
175452 {
176
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
177
- int num_actions, actions_size, namespace, err;
178
- struct mlx5e_mod_hdr_entry *mh;
179
- struct mod_hdr_key key;
180
- bool found = false;
181
- u32 hash_key;
453
+ struct mlx5_modify_hdr *modify_hdr;
454
+ struct mlx5e_mod_hdr_handle *mh;
182455
183
- num_actions = parse_attr->num_mod_hdr_actions;
184
- actions_size = MLX5_MH_ACT_SZ * num_actions;
456
+ mh = mlx5e_mod_hdr_attach(priv->mdev, get_mod_hdr_table(priv, flow),
457
+ get_flow_name_space(flow),
458
+ &parse_attr->mod_hdr_acts);
459
+ if (IS_ERR(mh))
460
+ return PTR_ERR(mh);
185461
186
- key.actions = parse_attr->mod_hdr_actions;
187
- key.num_actions = num_actions;
188
-
189
- hash_key = hash_mod_hdr_info(&key);
190
-
191
- if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
192
- namespace = MLX5_FLOW_NAMESPACE_FDB;
193
- hash_for_each_possible(esw->offloads.mod_hdr_tbl, mh,
194
- mod_hdr_hlist, hash_key) {
195
- if (!cmp_mod_hdr_info(&mh->key, &key)) {
196
- found = true;
197
- break;
198
- }
199
- }
200
- } else {
201
- namespace = MLX5_FLOW_NAMESPACE_KERNEL;
202
- hash_for_each_possible(priv->fs.tc.mod_hdr_tbl, mh,
203
- mod_hdr_hlist, hash_key) {
204
- if (!cmp_mod_hdr_info(&mh->key, &key)) {
205
- found = true;
206
- break;
207
- }
208
- }
209
- }
210
-
211
- if (found)
212
- goto attach_flow;
213
-
214
- mh = kzalloc(sizeof(*mh) + actions_size, GFP_KERNEL);
215
- if (!mh)
216
- return -ENOMEM;
217
-
218
- mh->key.actions = (void *)mh + sizeof(*mh);
219
- memcpy(mh->key.actions, key.actions, actions_size);
220
- mh->key.num_actions = num_actions;
221
- INIT_LIST_HEAD(&mh->flows);
222
-
223
- err = mlx5_modify_header_alloc(priv->mdev, namespace,
224
- mh->key.num_actions,
225
- mh->key.actions,
226
- &mh->mod_hdr_id);
227
- if (err)
228
- goto out_err;
229
-
230
- if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
231
- hash_add(esw->offloads.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key);
232
- else
233
- hash_add(priv->fs.tc.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key);
234
-
235
-attach_flow:
236
- list_add(&flow->mod_hdr, &mh->flows);
237
- if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
238
- flow->esw_attr->mod_hdr_id = mh->mod_hdr_id;
239
- else
240
- flow->nic_attr->mod_hdr_id = mh->mod_hdr_id;
462
+ modify_hdr = mlx5e_mod_hdr_get(mh);
463
+ flow->attr->modify_hdr = modify_hdr;
464
+ flow->mh = mh;
241465
242466 return 0;
243
-
244
-out_err:
245
- kfree(mh);
246
- return err;
247467 }
248468
249469 static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv,
250470 struct mlx5e_tc_flow *flow)
251471 {
252
- struct list_head *next = flow->mod_hdr.next;
472
+ /* flow wasn't fully initialized */
473
+ if (!flow->mh)
474
+ return;
253475
254
- list_del(&flow->mod_hdr);
255
-
256
- if (list_empty(next)) {
257
- struct mlx5e_mod_hdr_entry *mh;
258
-
259
- mh = list_entry(next, struct mlx5e_mod_hdr_entry, flows);
260
-
261
- mlx5_modify_header_dealloc(priv->mdev, mh->mod_hdr_id);
262
- hash_del(&mh->mod_hdr_hlist);
263
- kfree(mh);
264
- }
476
+ mlx5e_mod_hdr_detach(priv->mdev, get_mod_hdr_table(priv, flow),
477
+ flow->mh);
478
+ flow->mh = NULL;
265479 }
266480
267481 static
268482 struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex)
269483 {
484
+ struct mlx5_core_dev *mdev;
270485 struct net_device *netdev;
271486 struct mlx5e_priv *priv;
272487
273
- netdev = __dev_get_by_index(net, ifindex);
488
+ netdev = dev_get_by_index(net, ifindex);
489
+ if (!netdev)
490
+ return ERR_PTR(-ENODEV);
491
+
274492 priv = netdev_priv(netdev);
275
- return priv->mdev;
493
+ mdev = priv->mdev;
494
+ dev_put(netdev);
495
+
496
+ /* Mirred tc action holds a refcount on the ifindex net_device (see
497
+ * net/sched/act_mirred.c:tcf_mirred_get_dev). So, it's okay to continue using mdev
498
+ * after dev_put(netdev), while we're in the context of adding a tc flow.
499
+ *
500
+ * The mdev pointer corresponds to the peer/out net_device of a hairpin. It is then
501
+ * stored in a hairpin object, which exists until all flows, that refer to it, get
502
+ * removed.
503
+ *
504
+ * On the other hand, after a hairpin object has been created, the peer net_device may
505
+ * be removed/unbound while there are still some hairpin flows that are using it. This
506
+ * case is handled by mlx5e_tc_hairpin_update_dead_peer, which is hooked to
507
+ * NETDEV_UNREGISTER event of the peer net_device.
508
+ */
509
+ return mdev;
276510 }
277511
278512 static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp)
279513 {
280
- u32 in[MLX5_ST_SZ_DW(create_tir_in)] = {0};
514
+ u32 in[MLX5_ST_SZ_DW(create_tir_in)] = {};
281515 void *tirc;
282516 int err;
283517
....@@ -291,7 +525,7 @@
291525 MLX5_SET(tirc, tirc, inline_rqn, hp->pair->rqn[0]);
292526 MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
293527
294
- err = mlx5_core_create_tir(hp->func_mdev, in, MLX5_ST_SZ_BYTES(create_tir_in), &hp->tirn);
528
+ err = mlx5_core_create_tir(hp->func_mdev, in, &hp->tirn);
295529 if (err)
296530 goto create_tir_err;
297531
....@@ -320,7 +554,7 @@
320554
321555 for (i = 0; i < sz; i++) {
322556 ix = i;
323
- if (priv->channels.params.rss_hfunc == ETH_RSS_HASH_XOR)
557
+ if (priv->rss_params.hfunc == ETH_RSS_HASH_XOR)
324558 ix = mlx5e_bits_invert(i, ilog2(sz));
325559 ix = indirection_rqt[ix];
326560 rqn = hp->pair->rqn[ix];
....@@ -364,16 +598,18 @@
364598 void *tirc;
365599
366600 for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
601
+ struct mlx5e_tirc_config ttconfig = mlx5e_tirc_get_default_config(tt);
602
+
367603 memset(in, 0, MLX5_ST_SZ_BYTES(create_tir_in));
368604 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
369605
370606 MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
371607 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
372608 MLX5_SET(tirc, tirc, indirect_table, hp->indir_rqt.rqtn);
373
- mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false);
609
+ mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, &ttconfig, tirc, false);
374610
375611 err = mlx5_core_create_tir(hp->func_mdev, in,
376
- MLX5_ST_SZ_BYTES(create_tir_in), &hp->indir_tirn[tt]);
612
+ &hp->indir_tirn[tt]);
377613 if (err) {
378614 mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err);
379615 goto err_destroy_tirs;
....@@ -408,7 +644,7 @@
408644 for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
409645 ttc_params->indir_tirn[tt] = hp->indir_tirn[tt];
410646
411
- ft_attr->max_fte = MLX5E_NUM_TT;
647
+ ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE;
412648 ft_attr->level = MLX5E_TC_TTC_FT_LEVEL;
413649 ft_attr->prio = MLX5E_TC_PRIO;
414650 }
....@@ -469,6 +705,10 @@
469705
470706 func_mdev = priv->mdev;
471707 peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
708
+ if (IS_ERR(peer_mdev)) {
709
+ err = PTR_ERR(peer_mdev);
710
+ goto create_pair_err;
711
+ }
472712
473713 pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params);
474714 if (IS_ERR(pair)) {
....@@ -523,17 +763,40 @@
523763
524764 hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe,
525765 hairpin_hlist, hash_key) {
526
- if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio)
766
+ if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) {
767
+ refcount_inc(&hpe->refcnt);
527768 return hpe;
769
+ }
528770 }
529771
530772 return NULL;
531773 }
532774
775
+static void mlx5e_hairpin_put(struct mlx5e_priv *priv,
776
+ struct mlx5e_hairpin_entry *hpe)
777
+{
778
+ /* no more hairpin flows for us, release the hairpin pair */
779
+ if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &priv->fs.tc.hairpin_tbl_lock))
780
+ return;
781
+ hash_del(&hpe->hairpin_hlist);
782
+ mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
783
+
784
+ if (!IS_ERR_OR_NULL(hpe->hp)) {
785
+ netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
786
+ dev_name(hpe->hp->pair->peer_mdev->device));
787
+
788
+ mlx5e_hairpin_destroy(hpe->hp);
789
+ }
790
+
791
+ WARN_ON(!list_empty(&hpe->flows));
792
+ kfree(hpe);
793
+}
794
+
533795 #define UNKNOWN_MATCH_PRIO 8
534796
535797 static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv,
536
- struct mlx5_flow_spec *spec, u8 *match_prio)
798
+ struct mlx5_flow_spec *spec, u8 *match_prio,
799
+ struct netlink_ext_ack *extack)
537800 {
538801 void *headers_c, *headers_v;
539802 u8 prio_val, prio_mask = 0;
....@@ -541,8 +804,8 @@
541804
542805 #ifdef CONFIG_MLX5_CORE_EN_DCB
543806 if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) {
544
- netdev_warn(priv->netdev,
545
- "only PCP trust state supported for hairpin\n");
807
+ NL_SET_ERR_MSG_MOD(extack,
808
+ "only PCP trust state supported for hairpin");
546809 return -EOPNOTSUPP;
547810 }
548811 #endif
....@@ -558,8 +821,8 @@
558821 if (!vlan_present || !prio_mask) {
559822 prio_val = UNKNOWN_MATCH_PRIO;
560823 } else if (prio_mask != 0x7) {
561
- netdev_warn(priv->netdev,
562
- "masked priority match not supported for hairpin\n");
824
+ NL_SET_ERR_MSG_MOD(extack,
825
+ "masked priority match not supported for hairpin");
563826 return -EOPNOTSUPP;
564827 }
565828
....@@ -569,9 +832,10 @@
569832
570833 static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
571834 struct mlx5e_tc_flow *flow,
572
- struct mlx5e_tc_flow_parse_attr *parse_attr)
835
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
836
+ struct netlink_ext_ack *extack)
573837 {
574
- int peer_ifindex = parse_attr->mirred_ifindex;
838
+ int peer_ifindex = parse_attr->mirred_ifindex[0];
575839 struct mlx5_hairpin_params params;
576840 struct mlx5_core_dev *peer_mdev;
577841 struct mlx5e_hairpin_entry *hpe;
....@@ -583,26 +847,52 @@
583847 int err;
584848
585849 peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
850
+ if (IS_ERR(peer_mdev)) {
851
+ NL_SET_ERR_MSG_MOD(extack, "invalid ifindex of mirred device");
852
+ return PTR_ERR(peer_mdev);
853
+ }
854
+
586855 if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) {
587
- netdev_warn(priv->netdev, "hairpin is not supported\n");
856
+ NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported");
588857 return -EOPNOTSUPP;
589858 }
590859
591860 peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
592
- err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio);
861
+ err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio,
862
+ extack);
593863 if (err)
594864 return err;
865
+
866
+ mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
595867 hpe = mlx5e_hairpin_get(priv, peer_id, match_prio);
596
- if (hpe)
868
+ if (hpe) {
869
+ mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
870
+ wait_for_completion(&hpe->res_ready);
871
+
872
+ if (IS_ERR(hpe->hp)) {
873
+ err = -EREMOTEIO;
874
+ goto out_err;
875
+ }
597876 goto attach_flow;
877
+ }
598878
599879 hpe = kzalloc(sizeof(*hpe), GFP_KERNEL);
600
- if (!hpe)
880
+ if (!hpe) {
881
+ mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
601882 return -ENOMEM;
883
+ }
602884
885
+ spin_lock_init(&hpe->flows_lock);
603886 INIT_LIST_HEAD(&hpe->flows);
887
+ INIT_LIST_HEAD(&hpe->dead_peer_wait_list);
604888 hpe->peer_vhca_id = peer_id;
605889 hpe->prio = match_prio;
890
+ refcount_set(&hpe->refcnt, 1);
891
+ init_completion(&hpe->res_ready);
892
+
893
+ hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist,
894
+ hash_hairpin_info(peer_id, match_prio));
895
+ mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
606896
607897 params.log_data_size = 15;
608898 params.log_data_size = min_t(u8, params.log_data_size,
....@@ -624,376 +914,789 @@
624914 params.num_channels = link_speed64;
625915
626916 hp = mlx5e_hairpin_create(priv, &params, peer_ifindex);
917
+ hpe->hp = hp;
918
+ complete_all(&hpe->res_ready);
627919 if (IS_ERR(hp)) {
628920 err = PTR_ERR(hp);
629
- goto create_hairpin_err;
921
+ goto out_err;
630922 }
631923
632924 netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n",
633
- hp->tirn, hp->pair->rqn[0], hp->pair->peer_mdev->priv.name,
925
+ hp->tirn, hp->pair->rqn[0],
926
+ dev_name(hp->pair->peer_mdev->device),
634927 hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets);
635
-
636
- hpe->hp = hp;
637
- hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist,
638
- hash_hairpin_info(peer_id, match_prio));
639928
640929 attach_flow:
641930 if (hpe->hp->num_channels > 1) {
642
- flow->flags |= MLX5E_TC_FLOW_HAIRPIN_RSS;
643
- flow->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t;
931
+ flow_flag_set(flow, HAIRPIN_RSS);
932
+ flow->attr->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t;
644933 } else {
645
- flow->nic_attr->hairpin_tirn = hpe->hp->tirn;
934
+ flow->attr->nic_attr->hairpin_tirn = hpe->hp->tirn;
646935 }
936
+
937
+ flow->hpe = hpe;
938
+ spin_lock(&hpe->flows_lock);
647939 list_add(&flow->hairpin, &hpe->flows);
940
+ spin_unlock(&hpe->flows_lock);
648941
649942 return 0;
650943
651
-create_hairpin_err:
652
- kfree(hpe);
944
+out_err:
945
+ mlx5e_hairpin_put(priv, hpe);
653946 return err;
654947 }
655948
656949 static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
657950 struct mlx5e_tc_flow *flow)
658951 {
659
- struct list_head *next = flow->hairpin.next;
952
+ /* flow wasn't fully initialized */
953
+ if (!flow->hpe)
954
+ return;
660955
956
+ spin_lock(&flow->hpe->flows_lock);
661957 list_del(&flow->hairpin);
958
+ spin_unlock(&flow->hpe->flows_lock);
662959
663
- /* no more hairpin flows for us, release the hairpin pair */
664
- if (list_empty(next)) {
665
- struct mlx5e_hairpin_entry *hpe;
666
-
667
- hpe = list_entry(next, struct mlx5e_hairpin_entry, flows);
668
-
669
- netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
670
- hpe->hp->pair->peer_mdev->priv.name);
671
-
672
- mlx5e_hairpin_destroy(hpe->hp);
673
- hash_del(&hpe->hairpin_hlist);
674
- kfree(hpe);
675
- }
960
+ mlx5e_hairpin_put(priv, flow->hpe);
961
+ flow->hpe = NULL;
676962 }
677963
678
-static struct mlx5_flow_handle *
679
-mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
680
- struct mlx5e_tc_flow_parse_attr *parse_attr,
681
- struct mlx5e_tc_flow *flow)
964
+struct mlx5_flow_handle *
965
+mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv,
966
+ struct mlx5_flow_spec *spec,
967
+ struct mlx5_flow_attr *attr)
682968 {
683
- struct mlx5_nic_flow_attr *attr = flow->nic_attr;
684
- struct mlx5_core_dev *dev = priv->mdev;
969
+ struct mlx5_flow_context *flow_context = &spec->flow_context;
970
+ struct mlx5_fs_chains *nic_chains = nic_chains(priv);
971
+ struct mlx5_nic_flow_attr *nic_attr = attr->nic_attr;
972
+ struct mlx5e_tc_table *tc = &priv->fs.tc;
685973 struct mlx5_flow_destination dest[2] = {};
686974 struct mlx5_flow_act flow_act = {
687975 .action = attr->action,
688
- .has_flow_tag = true,
689
- .flow_tag = attr->flow_tag,
690
- .encap_id = 0,
976
+ .flags = FLOW_ACT_NO_APPEND,
691977 };
692
- struct mlx5_fc *counter = NULL;
693978 struct mlx5_flow_handle *rule;
694
- bool table_created = false;
695
- int err, dest_ix = 0;
979
+ struct mlx5_flow_table *ft;
980
+ int dest_ix = 0;
696981
697
- if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) {
698
- err = mlx5e_hairpin_flow_add(priv, flow, parse_attr);
699
- if (err) {
700
- rule = ERR_PTR(err);
701
- goto err_add_hairpin_flow;
702
- }
703
- if (flow->flags & MLX5E_TC_FLOW_HAIRPIN_RSS) {
704
- dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
705
- dest[dest_ix].ft = attr->hairpin_ft;
706
- } else {
707
- dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
708
- dest[dest_ix].tir_num = attr->hairpin_tirn;
709
- }
982
+ flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
983
+ flow_context->flow_tag = nic_attr->flow_tag;
984
+
985
+ if (attr->dest_ft) {
986
+ dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
987
+ dest[dest_ix].ft = attr->dest_ft;
988
+ dest_ix++;
989
+ } else if (nic_attr->hairpin_ft) {
990
+ dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
991
+ dest[dest_ix].ft = nic_attr->hairpin_ft;
992
+ dest_ix++;
993
+ } else if (nic_attr->hairpin_tirn) {
994
+ dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
995
+ dest[dest_ix].tir_num = nic_attr->hairpin_tirn;
710996 dest_ix++;
711997 } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
712998 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
713
- dest[dest_ix].ft = priv->fs.vlan.ft.t;
999
+ if (attr->dest_chain) {
1000
+ dest[dest_ix].ft = mlx5_chains_get_table(nic_chains,
1001
+ attr->dest_chain, 1,
1002
+ MLX5E_TC_FT_LEVEL);
1003
+ if (IS_ERR(dest[dest_ix].ft))
1004
+ return ERR_CAST(dest[dest_ix].ft);
1005
+ } else {
1006
+ dest[dest_ix].ft = priv->fs.vlan.ft.t;
1007
+ }
7141008 dest_ix++;
1009
+ }
1010
+
1011
+ if (dest[0].type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
1012
+ MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level))
1013
+ flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
1014
+
1015
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1016
+ dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1017
+ dest[dest_ix].counter_id = mlx5_fc_id(attr->counter);
1018
+ dest_ix++;
1019
+ }
1020
+
1021
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
1022
+ flow_act.modify_hdr = attr->modify_hdr;
1023
+
1024
+ mutex_lock(&tc->t_lock);
1025
+ if (IS_ERR_OR_NULL(tc->t)) {
1026
+ /* Create the root table here if doesn't exist yet */
1027
+ tc->t =
1028
+ mlx5_chains_get_table(nic_chains, 0, 1, MLX5E_TC_FT_LEVEL);
1029
+
1030
+ if (IS_ERR(tc->t)) {
1031
+ mutex_unlock(&tc->t_lock);
1032
+ netdev_err(priv->netdev,
1033
+ "Failed to create tc offload table\n");
1034
+ rule = ERR_CAST(priv->fs.tc.t);
1035
+ goto err_ft_get;
1036
+ }
1037
+ }
1038
+ mutex_unlock(&tc->t_lock);
1039
+
1040
+ if (attr->chain || attr->prio)
1041
+ ft = mlx5_chains_get_table(nic_chains,
1042
+ attr->chain, attr->prio,
1043
+ MLX5E_TC_FT_LEVEL);
1044
+ else
1045
+ ft = attr->ft;
1046
+
1047
+ if (IS_ERR(ft)) {
1048
+ rule = ERR_CAST(ft);
1049
+ goto err_ft_get;
1050
+ }
1051
+
1052
+ if (attr->outer_match_level != MLX5_MATCH_NONE)
1053
+ spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
1054
+
1055
+ rule = mlx5_add_flow_rules(ft, spec,
1056
+ &flow_act, dest, dest_ix);
1057
+ if (IS_ERR(rule))
1058
+ goto err_rule;
1059
+
1060
+ return rule;
1061
+
1062
+err_rule:
1063
+ if (attr->chain || attr->prio)
1064
+ mlx5_chains_put_table(nic_chains,
1065
+ attr->chain, attr->prio,
1066
+ MLX5E_TC_FT_LEVEL);
1067
+err_ft_get:
1068
+ if (attr->dest_chain)
1069
+ mlx5_chains_put_table(nic_chains,
1070
+ attr->dest_chain, 1,
1071
+ MLX5E_TC_FT_LEVEL);
1072
+
1073
+ return ERR_CAST(rule);
1074
+}
1075
+
1076
+static int
1077
+mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
1078
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
1079
+ struct mlx5e_tc_flow *flow,
1080
+ struct netlink_ext_ack *extack)
1081
+{
1082
+ struct mlx5_flow_attr *attr = flow->attr;
1083
+ struct mlx5_core_dev *dev = priv->mdev;
1084
+ struct mlx5_fc *counter = NULL;
1085
+ int err;
1086
+
1087
+ if (flow_flag_test(flow, HAIRPIN)) {
1088
+ err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
1089
+ if (err)
1090
+ return err;
7151091 }
7161092
7171093 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
7181094 counter = mlx5_fc_create(dev, true);
719
- if (IS_ERR(counter)) {
720
- rule = ERR_CAST(counter);
721
- goto err_fc_create;
722
- }
723
- dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
724
- dest[dest_ix].counter = counter;
725
- dest_ix++;
1095
+ if (IS_ERR(counter))
1096
+ return PTR_ERR(counter);
1097
+
1098
+ attr->counter = counter;
7261099 }
7271100
7281101 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
7291102 err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
730
- flow_act.modify_id = attr->mod_hdr_id;
731
- kfree(parse_attr->mod_hdr_actions);
732
- if (err) {
733
- rule = ERR_PTR(err);
734
- goto err_create_mod_hdr_id;
735
- }
1103
+ dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
1104
+ if (err)
1105
+ return err;
7361106 }
7371107
738
- if (IS_ERR_OR_NULL(priv->fs.tc.t)) {
739
- int tc_grp_size, tc_tbl_size;
740
- u32 max_flow_counter;
1108
+ if (flow_flag_test(flow, CT))
1109
+ flow->rule[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv), flow, &parse_attr->spec,
1110
+ attr, &parse_attr->mod_hdr_acts);
1111
+ else
1112
+ flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec,
1113
+ attr);
7411114
742
- max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
743
- MLX5_CAP_GEN(dev, max_flow_counter_15_0);
1115
+ return PTR_ERR_OR_ZERO(flow->rule[0]);
1116
+}
7441117
745
- tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE);
1118
+void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv,
1119
+ struct mlx5_flow_handle *rule,
1120
+ struct mlx5_flow_attr *attr)
1121
+{
1122
+ struct mlx5_fs_chains *nic_chains = nic_chains(priv);
7461123
747
- tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS,
748
- BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size)));
1124
+ mlx5_del_flow_rules(rule);
7491125
750
- priv->fs.tc.t =
751
- mlx5_create_auto_grouped_flow_table(priv->fs.ns,
752
- MLX5E_TC_PRIO,
753
- tc_tbl_size,
754
- MLX5E_TC_TABLE_NUM_GROUPS,
755
- MLX5E_TC_FT_LEVEL, 0);
756
- if (IS_ERR(priv->fs.tc.t)) {
757
- netdev_err(priv->netdev,
758
- "Failed to create tc offload table\n");
759
- rule = ERR_CAST(priv->fs.tc.t);
760
- goto err_create_ft;
761
- }
1126
+ if (attr->chain || attr->prio)
1127
+ mlx5_chains_put_table(nic_chains, attr->chain, attr->prio,
1128
+ MLX5E_TC_FT_LEVEL);
7621129
763
- table_created = true;
764
- }
765
-
766
- if (attr->match_level != MLX5_MATCH_NONE)
767
- parse_attr->spec.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
768
-
769
- rule = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec,
770
- &flow_act, dest, dest_ix);
771
-
772
- if (IS_ERR(rule))
773
- goto err_add_rule;
774
-
775
- return rule;
776
-
777
-err_add_rule:
778
- if (table_created) {
779
- mlx5_destroy_flow_table(priv->fs.tc.t);
780
- priv->fs.tc.t = NULL;
781
- }
782
-err_create_ft:
783
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
784
- mlx5e_detach_mod_hdr(priv, flow);
785
-err_create_mod_hdr_id:
786
- mlx5_fc_destroy(dev, counter);
787
-err_fc_create:
788
- if (flow->flags & MLX5E_TC_FLOW_HAIRPIN)
789
- mlx5e_hairpin_flow_del(priv, flow);
790
-err_add_hairpin_flow:
791
- return rule;
1130
+ if (attr->dest_chain)
1131
+ mlx5_chains_put_table(nic_chains, attr->dest_chain, 1,
1132
+ MLX5E_TC_FT_LEVEL);
7921133 }
7931134
7941135 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
7951136 struct mlx5e_tc_flow *flow)
7961137 {
797
- struct mlx5_nic_flow_attr *attr = flow->nic_attr;
798
- struct mlx5_fc *counter = NULL;
1138
+ struct mlx5_flow_attr *attr = flow->attr;
1139
+ struct mlx5e_tc_table *tc = &priv->fs.tc;
7991140
800
- counter = mlx5_flow_rule_counter(flow->rule[0]);
801
- mlx5_del_flow_rules(flow->rule[0]);
802
- mlx5_fc_destroy(priv->mdev, counter);
1141
+ flow_flag_clear(flow, OFFLOADED);
8031142
804
- if (!mlx5e_tc_num_filters(priv) && priv->fs.tc.t) {
805
- mlx5_destroy_flow_table(priv->fs.tc.t);
1143
+ if (flow_flag_test(flow, CT))
1144
+ mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
1145
+ else if (!IS_ERR_OR_NULL(flow->rule[0]))
1146
+ mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr);
1147
+
1148
+ /* Remove root table if no rules are left to avoid
1149
+ * extra steering hops.
1150
+ */
1151
+ mutex_lock(&priv->fs.tc.t_lock);
1152
+ if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) &&
1153
+ !IS_ERR_OR_NULL(tc->t)) {
1154
+ mlx5_chains_put_table(nic_chains(priv), 0, 1, MLX5E_TC_FT_LEVEL);
8061155 priv->fs.tc.t = NULL;
8071156 }
1157
+ mutex_unlock(&priv->fs.tc.t_lock);
1158
+
1159
+ kvfree(attr->parse_attr);
8081160
8091161 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
8101162 mlx5e_detach_mod_hdr(priv, flow);
8111163
812
- if (flow->flags & MLX5E_TC_FLOW_HAIRPIN)
1164
+ mlx5_fc_destroy(priv->mdev, attr->counter);
1165
+
1166
+ if (flow_flag_test(flow, HAIRPIN))
8131167 mlx5e_hairpin_flow_del(priv, flow);
1168
+
1169
+ kfree(flow->attr);
8141170 }
8151171
8161172 static void mlx5e_detach_encap(struct mlx5e_priv *priv,
817
- struct mlx5e_tc_flow *flow);
1173
+ struct mlx5e_tc_flow *flow, int out_index);
8181174
8191175 static int mlx5e_attach_encap(struct mlx5e_priv *priv,
820
- struct ip_tunnel_info *tun_info,
1176
+ struct mlx5e_tc_flow *flow,
8211177 struct net_device *mirred_dev,
1178
+ int out_index,
1179
+ struct netlink_ext_ack *extack,
8221180 struct net_device **encap_dev,
823
- struct mlx5e_tc_flow *flow);
1181
+ bool *encap_valid);
1182
+static int mlx5e_attach_decap(struct mlx5e_priv *priv,
1183
+ struct mlx5e_tc_flow *flow,
1184
+ struct netlink_ext_ack *extack);
1185
+static void mlx5e_detach_decap(struct mlx5e_priv *priv,
1186
+ struct mlx5e_tc_flow *flow);
8241187
8251188 static struct mlx5_flow_handle *
1189
+mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
1190
+ struct mlx5e_tc_flow *flow,
1191
+ struct mlx5_flow_spec *spec,
1192
+ struct mlx5_flow_attr *attr)
1193
+{
1194
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
1195
+ struct mlx5_flow_handle *rule;
1196
+
1197
+ if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)
1198
+ return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
1199
+
1200
+ if (flow_flag_test(flow, CT)) {
1201
+ mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
1202
+
1203
+ return mlx5_tc_ct_flow_offload(get_ct_priv(flow->priv),
1204
+ flow, spec, attr,
1205
+ mod_hdr_acts);
1206
+ }
1207
+
1208
+ rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
1209
+ if (IS_ERR(rule))
1210
+ return rule;
1211
+
1212
+ if (attr->esw_attr->split_count) {
1213
+ flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
1214
+ if (IS_ERR(flow->rule[1])) {
1215
+ mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
1216
+ return flow->rule[1];
1217
+ }
1218
+ }
1219
+
1220
+ return rule;
1221
+}
1222
+
1223
+static void
1224
+mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
1225
+ struct mlx5e_tc_flow *flow,
1226
+ struct mlx5_flow_attr *attr)
1227
+{
1228
+ flow_flag_clear(flow, OFFLOADED);
1229
+
1230
+ if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)
1231
+ goto offload_rule_0;
1232
+
1233
+ if (flow_flag_test(flow, CT)) {
1234
+ mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
1235
+ return;
1236
+ }
1237
+
1238
+ if (attr->esw_attr->split_count)
1239
+ mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
1240
+
1241
+offload_rule_0:
1242
+ mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
1243
+}
1244
+
1245
+static struct mlx5_flow_handle *
1246
+mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
1247
+ struct mlx5e_tc_flow *flow,
1248
+ struct mlx5_flow_spec *spec)
1249
+{
1250
+ struct mlx5_flow_attr *slow_attr;
1251
+ struct mlx5_flow_handle *rule;
1252
+
1253
+ slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
1254
+ if (!slow_attr)
1255
+ return ERR_PTR(-ENOMEM);
1256
+
1257
+ memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
1258
+ slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1259
+ slow_attr->esw_attr->split_count = 0;
1260
+ slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
1261
+
1262
+ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
1263
+ if (!IS_ERR(rule))
1264
+ flow_flag_set(flow, SLOW);
1265
+
1266
+ kfree(slow_attr);
1267
+
1268
+ return rule;
1269
+}
1270
+
1271
+static void
1272
+mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
1273
+ struct mlx5e_tc_flow *flow)
1274
+{
1275
+ struct mlx5_flow_attr *slow_attr;
1276
+
1277
+ slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
1278
+ if (!slow_attr) {
1279
+ mlx5_core_warn(flow->priv->mdev, "Unable to alloc attr to unoffload slow path rule\n");
1280
+ return;
1281
+ }
1282
+
1283
+ memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
1284
+ slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1285
+ slow_attr->esw_attr->split_count = 0;
1286
+ slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
1287
+ mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
1288
+ flow_flag_clear(flow, SLOW);
1289
+ kfree(slow_attr);
1290
+}
1291
+
1292
+/* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1293
+ * function.
1294
+ */
1295
+static void unready_flow_add(struct mlx5e_tc_flow *flow,
1296
+ struct list_head *unready_flows)
1297
+{
1298
+ flow_flag_set(flow, NOT_READY);
1299
+ list_add_tail(&flow->unready, unready_flows);
1300
+}
1301
+
1302
+/* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1303
+ * function.
1304
+ */
1305
+static void unready_flow_del(struct mlx5e_tc_flow *flow)
1306
+{
1307
+ list_del(&flow->unready);
1308
+ flow_flag_clear(flow, NOT_READY);
1309
+}
1310
+
1311
+static void add_unready_flow(struct mlx5e_tc_flow *flow)
1312
+{
1313
+ struct mlx5_rep_uplink_priv *uplink_priv;
1314
+ struct mlx5e_rep_priv *rpriv;
1315
+ struct mlx5_eswitch *esw;
1316
+
1317
+ esw = flow->priv->mdev->priv.eswitch;
1318
+ rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1319
+ uplink_priv = &rpriv->uplink_priv;
1320
+
1321
+ mutex_lock(&uplink_priv->unready_flows_lock);
1322
+ unready_flow_add(flow, &uplink_priv->unready_flows);
1323
+ mutex_unlock(&uplink_priv->unready_flows_lock);
1324
+}
1325
+
1326
+static void remove_unready_flow(struct mlx5e_tc_flow *flow)
1327
+{
1328
+ struct mlx5_rep_uplink_priv *uplink_priv;
1329
+ struct mlx5e_rep_priv *rpriv;
1330
+ struct mlx5_eswitch *esw;
1331
+
1332
+ esw = flow->priv->mdev->priv.eswitch;
1333
+ rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1334
+ uplink_priv = &rpriv->uplink_priv;
1335
+
1336
+ mutex_lock(&uplink_priv->unready_flows_lock);
1337
+ unready_flow_del(flow);
1338
+ mutex_unlock(&uplink_priv->unready_flows_lock);
1339
+}
1340
+
1341
+static int
8261342 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
827
- struct mlx5e_tc_flow_parse_attr *parse_attr,
828
- struct mlx5e_tc_flow *flow)
1343
+ struct mlx5e_tc_flow *flow,
1344
+ struct netlink_ext_ack *extack)
8291345 {
8301346 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
831
- struct mlx5_esw_flow_attr *attr = flow->esw_attr;
8321347 struct net_device *out_dev, *encap_dev = NULL;
833
- struct mlx5_flow_handle *rule = NULL;
1348
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
1349
+ struct mlx5_flow_attr *attr = flow->attr;
1350
+ struct mlx5_esw_flow_attr *esw_attr;
1351
+ struct mlx5_fc *counter = NULL;
8341352 struct mlx5e_rep_priv *rpriv;
8351353 struct mlx5e_priv *out_priv;
836
- int err;
1354
+ bool encap_valid = true;
1355
+ u32 max_prio, max_chain;
1356
+ int err = 0;
1357
+ int out_index;
8371358
838
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) {
1359
+ if (!mlx5_chains_prios_supported(esw_chains(esw)) && attr->prio != 1) {
1360
+ NL_SET_ERR_MSG_MOD(extack,
1361
+ "E-switch priorities unsupported, upgrade FW");
1362
+ return -EOPNOTSUPP;
1363
+ }
1364
+
1365
+ /* We check chain range only for tc flows.
1366
+ * For ft flows, we checked attr->chain was originally 0 and set it to
1367
+ * FDB_FT_CHAIN which is outside tc range.
1368
+ * See mlx5e_rep_setup_ft_cb().
1369
+ */
1370
+ max_chain = mlx5_chains_get_chain_range(esw_chains(esw));
1371
+ if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) {
1372
+ NL_SET_ERR_MSG_MOD(extack,
1373
+ "Requested chain is out of supported range");
1374
+ return -EOPNOTSUPP;
1375
+ }
1376
+
1377
+ max_prio = mlx5_chains_get_prio_range(esw_chains(esw));
1378
+ if (attr->prio > max_prio) {
1379
+ NL_SET_ERR_MSG_MOD(extack,
1380
+ "Requested priority is out of supported range");
1381
+ return -EOPNOTSUPP;
1382
+ }
1383
+
1384
+ if (flow_flag_test(flow, L3_TO_L2_DECAP)) {
1385
+ err = mlx5e_attach_decap(priv, flow, extack);
1386
+ if (err)
1387
+ return err;
1388
+ }
1389
+
1390
+ parse_attr = attr->parse_attr;
1391
+ esw_attr = attr->esw_attr;
1392
+
1393
+ for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1394
+ int mirred_ifindex;
1395
+
1396
+ if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1397
+ continue;
1398
+
1399
+ mirred_ifindex = parse_attr->mirred_ifindex[out_index];
8391400 out_dev = __dev_get_by_index(dev_net(priv->netdev),
840
- attr->parse_attr->mirred_ifindex);
841
- err = mlx5e_attach_encap(priv, &parse_attr->tun_info,
842
- out_dev, &encap_dev, flow);
843
- if (err) {
844
- rule = ERR_PTR(err);
845
- if (err != -EAGAIN)
846
- goto err_attach_encap;
847
- }
1401
+ mirred_ifindex);
1402
+ err = mlx5e_attach_encap(priv, flow, out_dev, out_index,
1403
+ extack, &encap_dev, &encap_valid);
1404
+ if (err)
1405
+ return err;
1406
+
8481407 out_priv = netdev_priv(encap_dev);
8491408 rpriv = out_priv->ppriv;
850
- attr->out_rep[attr->out_count] = rpriv->rep;
851
- attr->out_mdev[attr->out_count++] = out_priv->mdev;
1409
+ esw_attr->dests[out_index].rep = rpriv->rep;
1410
+ esw_attr->dests[out_index].mdev = out_priv->mdev;
8521411 }
8531412
8541413 err = mlx5_eswitch_add_vlan_action(esw, attr);
855
- if (err) {
856
- rule = ERR_PTR(err);
857
- goto err_add_vlan;
858
- }
1414
+ if (err)
1415
+ return err;
8591416
860
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1417
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
1418
+ !(attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR)) {
8611419 err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
862
- kfree(parse_attr->mod_hdr_actions);
863
- if (err) {
864
- rule = ERR_PTR(err);
865
- goto err_mod_hdr;
866
- }
1420
+ dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
1421
+ if (err)
1422
+ return err;
8671423 }
8681424
869
- /* we get here if (1) there's no error (rule being null) or when
870
- * (2) there's an encap action and we're on -EAGAIN (no valid neigh)
1425
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1426
+ counter = mlx5_fc_create(esw_attr->counter_dev, true);
1427
+ if (IS_ERR(counter))
1428
+ return PTR_ERR(counter);
1429
+
1430
+ attr->counter = counter;
1431
+ }
1432
+
1433
+ /* we get here if one of the following takes place:
1434
+ * (1) there's no error
1435
+ * (2) there's an encap action and we don't have valid neigh
8711436 */
872
- if (rule != ERR_PTR(-EAGAIN)) {
873
- rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr);
874
- if (IS_ERR(rule))
875
- goto err_add_rule;
1437
+ if (!encap_valid)
1438
+ flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec);
1439
+ else
1440
+ flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr);
8761441
877
- if (attr->mirror_count) {
878
- flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, &parse_attr->spec, attr);
879
- if (IS_ERR(flow->rule[1]))
880
- goto err_fwd_rule;
881
- }
882
- }
883
- return rule;
1442
+ if (IS_ERR(flow->rule[0]))
1443
+ return PTR_ERR(flow->rule[0]);
1444
+ else
1445
+ flow_flag_set(flow, OFFLOADED);
8841446
885
-err_fwd_rule:
886
- mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
887
- rule = flow->rule[1];
888
-err_add_rule:
889
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
890
- mlx5e_detach_mod_hdr(priv, flow);
891
-err_mod_hdr:
892
- mlx5_eswitch_del_vlan_action(esw, attr);
893
-err_add_vlan:
894
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
895
- mlx5e_detach_encap(priv, flow);
896
-err_attach_encap:
897
- return rule;
1447
+ return 0;
1448
+}
1449
+
1450
+static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow)
1451
+{
1452
+ struct mlx5_flow_spec *spec = &flow->attr->parse_attr->spec;
1453
+ void *headers_v = MLX5_ADDR_OF(fte_match_param,
1454
+ spec->match_value,
1455
+ misc_parameters_3);
1456
+ u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3,
1457
+ headers_v,
1458
+ geneve_tlv_option_0_data);
1459
+
1460
+ return !!geneve_tlv_opt_0_data;
8981461 }
8991462
9001463 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
9011464 struct mlx5e_tc_flow *flow)
9021465 {
9031466 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
904
- struct mlx5_esw_flow_attr *attr = flow->esw_attr;
1467
+ struct mlx5_flow_attr *attr = flow->attr;
1468
+ int out_index;
9051469
906
- if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
907
- flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED;
908
- if (attr->mirror_count)
909
- mlx5_eswitch_del_offloaded_rule(esw, flow->rule[1], attr);
910
- mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
1470
+ mlx5e_put_flow_tunnel_id(flow);
1471
+
1472
+ if (flow_flag_test(flow, NOT_READY))
1473
+ remove_unready_flow(flow);
1474
+
1475
+ if (mlx5e_is_offloaded_flow(flow)) {
1476
+ if (flow_flag_test(flow, SLOW))
1477
+ mlx5e_tc_unoffload_from_slow_path(esw, flow);
1478
+ else
1479
+ mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
9111480 }
1481
+
1482
+ if (mlx5_flow_has_geneve_opt(flow))
1483
+ mlx5_geneve_tlv_option_del(priv->mdev->geneve);
9121484
9131485 mlx5_eswitch_del_vlan_action(esw, attr);
9141486
915
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) {
916
- mlx5e_detach_encap(priv, flow);
917
- kvfree(attr->parse_attr);
918
- }
1487
+ for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
1488
+ if (attr->esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) {
1489
+ mlx5e_detach_encap(priv, flow, out_index);
1490
+ kfree(attr->parse_attr->tun_info[out_index]);
1491
+ }
1492
+ kvfree(attr->parse_attr);
1493
+
1494
+ mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr);
9191495
9201496 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
9211497 mlx5e_detach_mod_hdr(priv, flow);
1498
+
1499
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
1500
+ mlx5_fc_destroy(attr->esw_attr->counter_dev, attr->counter);
1501
+
1502
+ if (flow_flag_test(flow, L3_TO_L2_DECAP))
1503
+ mlx5e_detach_decap(priv, flow);
1504
+
1505
+ kfree(flow->attr);
9221506 }
9231507
9241508 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
925
- struct mlx5e_encap_entry *e)
1509
+ struct mlx5e_encap_entry *e,
1510
+ struct list_head *flow_list)
9261511 {
9271512 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
9281513 struct mlx5_esw_flow_attr *esw_attr;
1514
+ struct mlx5_flow_handle *rule;
1515
+ struct mlx5_flow_attr *attr;
1516
+ struct mlx5_flow_spec *spec;
9291517 struct mlx5e_tc_flow *flow;
9301518 int err;
9311519
932
- err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
933
- e->encap_size, e->encap_header,
934
- &e->encap_id);
935
- if (err) {
936
- mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %d\n",
937
- err);
1520
+ e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
1521
+ e->reformat_type,
1522
+ e->encap_size, e->encap_header,
1523
+ MLX5_FLOW_NAMESPACE_FDB);
1524
+ if (IS_ERR(e->pkt_reformat)) {
1525
+ mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
1526
+ PTR_ERR(e->pkt_reformat));
9381527 return;
9391528 }
9401529 e->flags |= MLX5_ENCAP_ENTRY_VALID;
9411530 mlx5e_rep_queue_neigh_stats_work(priv);
9421531
943
- list_for_each_entry(flow, &e->flows, encap) {
944
- esw_attr = flow->esw_attr;
945
- esw_attr->encap_id = e->encap_id;
946
- flow->rule[0] = mlx5_eswitch_add_offloaded_rule(esw, &esw_attr->parse_attr->spec, esw_attr);
947
- if (IS_ERR(flow->rule[0])) {
948
- err = PTR_ERR(flow->rule[0]);
1532
+ list_for_each_entry(flow, flow_list, tmp_list) {
1533
+ bool all_flow_encaps_valid = true;
1534
+ int i;
1535
+
1536
+ if (!mlx5e_is_offloaded_flow(flow))
1537
+ continue;
1538
+ attr = flow->attr;
1539
+ esw_attr = attr->esw_attr;
1540
+ spec = &attr->parse_attr->spec;
1541
+
1542
+ esw_attr->dests[flow->tmp_efi_index].pkt_reformat = e->pkt_reformat;
1543
+ esw_attr->dests[flow->tmp_efi_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
1544
+ /* Flow can be associated with multiple encap entries.
1545
+ * Before offloading the flow verify that all of them have
1546
+ * a valid neighbour.
1547
+ */
1548
+ for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
1549
+ if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
1550
+ continue;
1551
+ if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
1552
+ all_flow_encaps_valid = false;
1553
+ break;
1554
+ }
1555
+ }
1556
+ /* Do not offload flows with unresolved neighbors */
1557
+ if (!all_flow_encaps_valid)
1558
+ continue;
1559
+ /* update from slow path rule to encap rule */
1560
+ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1561
+ if (IS_ERR(rule)) {
1562
+ err = PTR_ERR(rule);
9491563 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
9501564 err);
9511565 continue;
9521566 }
9531567
954
- if (esw_attr->mirror_count) {
955
- flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, &esw_attr->parse_attr->spec, esw_attr);
956
- if (IS_ERR(flow->rule[1])) {
957
- mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], esw_attr);
958
- err = PTR_ERR(flow->rule[1]);
959
- mlx5_core_warn(priv->mdev, "Failed to update cached mirror flow, %d\n",
960
- err);
961
- continue;
962
- }
963
- }
964
-
965
- flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
1568
+ mlx5e_tc_unoffload_from_slow_path(esw, flow);
1569
+ flow->rule[0] = rule;
1570
+ /* was unset when slow path rule removed */
1571
+ flow_flag_set(flow, OFFLOADED);
9661572 }
9671573 }
9681574
9691575 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
970
- struct mlx5e_encap_entry *e)
1576
+ struct mlx5e_encap_entry *e,
1577
+ struct list_head *flow_list)
9711578 {
9721579 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1580
+ struct mlx5_esw_flow_attr *esw_attr;
1581
+ struct mlx5_flow_handle *rule;
1582
+ struct mlx5_flow_attr *attr;
1583
+ struct mlx5_flow_spec *spec;
1584
+ struct mlx5e_tc_flow *flow;
1585
+ int err;
1586
+
1587
+ list_for_each_entry(flow, flow_list, tmp_list) {
1588
+ if (!mlx5e_is_offloaded_flow(flow))
1589
+ continue;
1590
+ attr = flow->attr;
1591
+ esw_attr = attr->esw_attr;
1592
+ spec = &attr->parse_attr->spec;
1593
+
1594
+ /* update from encap rule to slow path rule */
1595
+ rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
1596
+ /* mark the flow's encap dest as non-valid */
1597
+ esw_attr->dests[flow->tmp_efi_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
1598
+
1599
+ if (IS_ERR(rule)) {
1600
+ err = PTR_ERR(rule);
1601
+ mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
1602
+ err);
1603
+ continue;
1604
+ }
1605
+
1606
+ mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
1607
+ flow->rule[0] = rule;
1608
+ /* was unset when fast path rule removed */
1609
+ flow_flag_set(flow, OFFLOADED);
1610
+ }
1611
+
1612
+ /* we know that the encap is valid */
1613
+ e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
1614
+ mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1615
+}
1616
+
1617
+static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
1618
+{
1619
+ return flow->attr->counter;
1620
+}
1621
+
1622
+/* Takes reference to all flows attached to encap and adds the flows to
1623
+ * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
1624
+ */
1625
+void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
1626
+{
1627
+ struct encap_flow_item *efi;
9731628 struct mlx5e_tc_flow *flow;
9741629
975
- list_for_each_entry(flow, &e->flows, encap) {
976
- if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
977
- struct mlx5_esw_flow_attr *attr = flow->esw_attr;
1630
+ list_for_each_entry(efi, &e->flows, list) {
1631
+ flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
1632
+ if (IS_ERR(mlx5e_flow_get(flow)))
1633
+ continue;
1634
+ wait_for_completion(&flow->init_done);
9781635
979
- flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED;
980
- if (attr->mirror_count)
981
- mlx5_eswitch_del_offloaded_rule(esw, flow->rule[1], attr);
982
- mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
983
- }
1636
+ flow->tmp_efi_index = efi->index;
1637
+ list_add(&flow->tmp_list, flow_list);
1638
+ }
1639
+}
1640
+
1641
+/* Iterate over tmp_list of flows attached to flow_list head. */
1642
+void mlx5e_put_encap_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list)
1643
+{
1644
+ struct mlx5e_tc_flow *flow, *tmp;
1645
+
1646
+ list_for_each_entry_safe(flow, tmp, flow_list, tmp_list)
1647
+ mlx5e_flow_put(priv, flow);
1648
+}
1649
+
1650
+static struct mlx5e_encap_entry *
1651
+mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
1652
+ struct mlx5e_encap_entry *e)
1653
+{
1654
+ struct mlx5e_encap_entry *next = NULL;
1655
+
1656
+retry:
1657
+ rcu_read_lock();
1658
+
1659
+ /* find encap with non-zero reference counter value */
1660
+ for (next = e ?
1661
+ list_next_or_null_rcu(&nhe->encap_list,
1662
+ &e->encap_list,
1663
+ struct mlx5e_encap_entry,
1664
+ encap_list) :
1665
+ list_first_or_null_rcu(&nhe->encap_list,
1666
+ struct mlx5e_encap_entry,
1667
+ encap_list);
1668
+ next;
1669
+ next = list_next_or_null_rcu(&nhe->encap_list,
1670
+ &next->encap_list,
1671
+ struct mlx5e_encap_entry,
1672
+ encap_list))
1673
+ if (mlx5e_encap_take(next))
1674
+ break;
1675
+
1676
+ rcu_read_unlock();
1677
+
1678
+ /* release starting encap */
1679
+ if (e)
1680
+ mlx5e_encap_put(netdev_priv(e->out_dev), e);
1681
+ if (!next)
1682
+ return next;
1683
+
1684
+ /* wait for encap to be fully initialized */
1685
+ wait_for_completion(&next->res_ready);
1686
+ /* continue searching if encap entry is not in valid state after completion */
1687
+ if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) {
1688
+ e = next;
1689
+ goto retry;
9841690 }
9851691
986
- if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
987
- e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
988
- mlx5_encap_dealloc(priv->mdev, e->encap_id);
989
- }
1692
+ return next;
9901693 }
9911694
9921695 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
9931696 {
9941697 struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
1698
+ struct mlx5e_encap_entry *e = NULL;
9951699 struct mlx5e_tc_flow *flow;
996
- struct mlx5e_encap_entry *e;
9971700 struct mlx5_fc *counter;
9981701 struct neigh_table *tbl;
9991702 bool neigh_used = false;
....@@ -1004,17 +1707,31 @@
10041707 tbl = &arp_tbl;
10051708 #if IS_ENABLED(CONFIG_IPV6)
10061709 else if (m_neigh->family == AF_INET6)
1007
- tbl = &nd_tbl;
1710
+ tbl = ipv6_stub->nd_tbl;
10081711 #endif
10091712 else
10101713 return;
10111714
1012
- list_for_each_entry(e, &nhe->encap_list, encap_list) {
1013
- if (!(e->flags & MLX5_ENCAP_ENTRY_VALID))
1014
- continue;
1015
- list_for_each_entry(flow, &e->flows, encap) {
1016
- if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
1017
- counter = mlx5_flow_rule_counter(flow->rule[0]);
1715
+ /* mlx5e_get_next_valid_encap() releases previous encap before returning
1716
+ * next one.
1717
+ */
1718
+ while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
1719
+ struct mlx5e_priv *priv = netdev_priv(e->out_dev);
1720
+ struct encap_flow_item *efi, *tmp;
1721
+ struct mlx5_eswitch *esw;
1722
+ LIST_HEAD(flow_list);
1723
+
1724
+ esw = priv->mdev->priv.eswitch;
1725
+ mutex_lock(&esw->offloads.encap_tbl_lock);
1726
+ list_for_each_entry_safe(efi, tmp, &e->flows, list) {
1727
+ flow = container_of(efi, struct mlx5e_tc_flow,
1728
+ encaps[efi->index]);
1729
+ if (IS_ERR(mlx5e_flow_get(flow)))
1730
+ continue;
1731
+ list_add(&flow->tmp_list, &flow_list);
1732
+
1733
+ if (mlx5e_is_offloaded_flow(flow)) {
1734
+ counter = mlx5e_tc_get_counter(flow);
10181735 lastuse = mlx5_fc_query_lastuse(counter);
10191736 if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
10201737 neigh_used = true;
....@@ -1022,9 +1739,17 @@
10221739 }
10231740 }
10241741 }
1025
- if (neigh_used)
1742
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
1743
+
1744
+ mlx5e_put_encap_flow_list(priv, &flow_list);
1745
+ if (neigh_used) {
1746
+ /* release current encap before breaking the loop */
1747
+ mlx5e_encap_put(priv, e);
10261748 break;
1749
+ }
10271750 }
1751
+
1752
+ trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
10281753
10291754 if (neigh_used) {
10301755 nhe->reported_lastuse = jiffies;
....@@ -1041,213 +1766,512 @@
10411766 }
10421767 }
10431768
1044
-static void mlx5e_detach_encap(struct mlx5e_priv *priv,
1045
- struct mlx5e_tc_flow *flow)
1769
+static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
10461770 {
1047
- struct list_head *next = flow->encap.next;
1771
+ WARN_ON(!list_empty(&e->flows));
10481772
1049
- list_del(&flow->encap);
1050
- if (list_empty(next)) {
1051
- struct mlx5e_encap_entry *e;
1052
-
1053
- e = list_entry(next, struct mlx5e_encap_entry, flows);
1773
+ if (e->compl_result > 0) {
10541774 mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
10551775
10561776 if (e->flags & MLX5_ENCAP_ENTRY_VALID)
1057
- mlx5_encap_dealloc(priv->mdev, e->encap_id);
1058
-
1059
- hash_del_rcu(&e->encap_hlist);
1060
- kfree(e->encap_header);
1061
- kfree(e);
1777
+ mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
10621778 }
1779
+
1780
+ kfree(e->tun_info);
1781
+ kfree(e->encap_header);
1782
+ kfree_rcu(e, rcu);
1783
+}
1784
+
1785
+static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
1786
+ struct mlx5e_decap_entry *d)
1787
+{
1788
+ WARN_ON(!list_empty(&d->flows));
1789
+
1790
+ if (!d->compl_result)
1791
+ mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
1792
+
1793
+ kfree_rcu(d, rcu);
1794
+}
1795
+
1796
+void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
1797
+{
1798
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1799
+
1800
+ if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
1801
+ return;
1802
+ hash_del_rcu(&e->encap_hlist);
1803
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
1804
+
1805
+ mlx5e_encap_dealloc(priv, e);
1806
+}
1807
+
1808
+static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
1809
+{
1810
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1811
+
1812
+ if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
1813
+ return;
1814
+ hash_del_rcu(&d->hlist);
1815
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
1816
+
1817
+ mlx5e_decap_dealloc(priv, d);
1818
+}
1819
+
1820
+static void mlx5e_detach_encap(struct mlx5e_priv *priv,
1821
+ struct mlx5e_tc_flow *flow, int out_index)
1822
+{
1823
+ struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
1824
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1825
+
1826
+ /* flow wasn't fully initialized */
1827
+ if (!e)
1828
+ return;
1829
+
1830
+ mutex_lock(&esw->offloads.encap_tbl_lock);
1831
+ list_del(&flow->encaps[out_index].list);
1832
+ flow->encaps[out_index].e = NULL;
1833
+ if (!refcount_dec_and_test(&e->refcnt)) {
1834
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
1835
+ return;
1836
+ }
1837
+ hash_del_rcu(&e->encap_hlist);
1838
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
1839
+
1840
+ mlx5e_encap_dealloc(priv, e);
1841
+}
1842
+
1843
+static void mlx5e_detach_decap(struct mlx5e_priv *priv,
1844
+ struct mlx5e_tc_flow *flow)
1845
+{
1846
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1847
+ struct mlx5e_decap_entry *d = flow->decap_reformat;
1848
+
1849
+ if (!d)
1850
+ return;
1851
+
1852
+ mutex_lock(&esw->offloads.decap_tbl_lock);
1853
+ list_del(&flow->l3_to_l2_reformat);
1854
+ flow->decap_reformat = NULL;
1855
+
1856
+ if (!refcount_dec_and_test(&d->refcnt)) {
1857
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
1858
+ return;
1859
+ }
1860
+ hash_del_rcu(&d->hlist);
1861
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
1862
+
1863
+ mlx5e_decap_dealloc(priv, d);
1864
+}
1865
+
1866
+static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
1867
+{
1868
+ struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
1869
+
1870
+ if (!flow_flag_test(flow, ESWITCH) ||
1871
+ !flow_flag_test(flow, DUP))
1872
+ return;
1873
+
1874
+ mutex_lock(&esw->offloads.peer_mutex);
1875
+ list_del(&flow->peer);
1876
+ mutex_unlock(&esw->offloads.peer_mutex);
1877
+
1878
+ flow_flag_clear(flow, DUP);
1879
+
1880
+ if (refcount_dec_and_test(&flow->peer_flow->refcnt)) {
1881
+ mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
1882
+ kfree(flow->peer_flow);
1883
+ }
1884
+
1885
+ flow->peer_flow = NULL;
1886
+}
1887
+
1888
+static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
1889
+{
1890
+ struct mlx5_core_dev *dev = flow->priv->mdev;
1891
+ struct mlx5_devcom *devcom = dev->priv.devcom;
1892
+ struct mlx5_eswitch *peer_esw;
1893
+
1894
+ peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1895
+ if (!peer_esw)
1896
+ return;
1897
+
1898
+ __mlx5e_tc_del_fdb_peer_flow(flow);
1899
+ mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
10631900 }
10641901
10651902 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
10661903 struct mlx5e_tc_flow *flow)
10671904 {
1068
- if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
1905
+ if (mlx5e_is_eswitch_flow(flow)) {
1906
+ mlx5e_tc_del_fdb_peer_flow(flow);
10691907 mlx5e_tc_del_fdb_flow(priv, flow);
1070
- else
1908
+ } else {
10711909 mlx5e_tc_del_nic_flow(priv, flow);
1072
-}
1073
-
1074
-static void parse_vxlan_attr(struct mlx5_flow_spec *spec,
1075
- struct tc_cls_flower_offload *f)
1076
-{
1077
- void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1078
- outer_headers);
1079
- void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1080
- outer_headers);
1081
- void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1082
- misc_parameters);
1083
- void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1084
- misc_parameters);
1085
-
1086
- MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
1087
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP);
1088
-
1089
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
1090
- struct flow_dissector_key_keyid *key =
1091
- skb_flow_dissector_target(f->dissector,
1092
- FLOW_DISSECTOR_KEY_ENC_KEYID,
1093
- f->key);
1094
- struct flow_dissector_key_keyid *mask =
1095
- skb_flow_dissector_target(f->dissector,
1096
- FLOW_DISSECTOR_KEY_ENC_KEYID,
1097
- f->mask);
1098
- MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni,
1099
- be32_to_cpu(mask->keyid));
1100
- MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni,
1101
- be32_to_cpu(key->keyid));
11021910 }
11031911 }
11041912
1105
-static int parse_tunnel_attr(struct mlx5e_priv *priv,
1106
- struct mlx5_flow_spec *spec,
1107
- struct tc_cls_flower_offload *f)
1913
+static int flow_has_tc_fwd_action(struct flow_cls_offload *f)
11081914 {
1109
- void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1110
- outer_headers);
1111
- void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1112
- outer_headers);
1915
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1916
+ struct flow_action *flow_action = &rule->action;
1917
+ const struct flow_action_entry *act;
1918
+ int i;
11131919
1114
- struct flow_dissector_key_control *enc_control =
1115
- skb_flow_dissector_target(f->dissector,
1116
- FLOW_DISSECTOR_KEY_ENC_CONTROL,
1117
- f->key);
1920
+ flow_action_for_each(i, act, flow_action) {
1921
+ switch (act->id) {
1922
+ case FLOW_ACTION_GOTO:
1923
+ return true;
1924
+ default:
1925
+ continue;
1926
+ }
1927
+ }
11181928
1119
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
1120
- struct flow_dissector_key_ports *key =
1121
- skb_flow_dissector_target(f->dissector,
1122
- FLOW_DISSECTOR_KEY_ENC_PORTS,
1123
- f->key);
1124
- struct flow_dissector_key_ports *mask =
1125
- skb_flow_dissector_target(f->dissector,
1126
- FLOW_DISSECTOR_KEY_ENC_PORTS,
1127
- f->mask);
1929
+ return false;
1930
+}
11281931
1129
- /* Full udp dst port must be given */
1130
- if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst)))
1131
- goto vxlan_match_offload_err;
1932
+static int
1933
+enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv,
1934
+ struct flow_dissector_key_enc_opts *opts,
1935
+ struct netlink_ext_ack *extack,
1936
+ bool *dont_care)
1937
+{
1938
+ struct geneve_opt *opt;
1939
+ int off = 0;
11321940
1133
- if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->dst)) &&
1134
- MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
1135
- parse_vxlan_attr(spec, f);
1136
- else {
1137
- netdev_warn(priv->netdev,
1138
- "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->dst));
1139
- return -EOPNOTSUPP;
1941
+ *dont_care = true;
1942
+
1943
+ while (opts->len > off) {
1944
+ opt = (struct geneve_opt *)&opts->data[off];
1945
+
1946
+ if (!(*dont_care) || opt->opt_class || opt->type ||
1947
+ memchr_inv(opt->opt_data, 0, opt->length * 4)) {
1948
+ *dont_care = false;
1949
+
1950
+ if (opt->opt_class != htons(U16_MAX) ||
1951
+ opt->type != U8_MAX) {
1952
+ NL_SET_ERR_MSG(extack,
1953
+ "Partial match of tunnel options in chain > 0 isn't supported");
1954
+ netdev_warn(priv->netdev,
1955
+ "Partial match of tunnel options in chain > 0 isn't supported");
1956
+ return -EOPNOTSUPP;
1957
+ }
11401958 }
11411959
1142
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
1143
- udp_dport, ntohs(mask->dst));
1144
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
1145
- udp_dport, ntohs(key->dst));
1146
-
1147
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
1148
- udp_sport, ntohs(mask->src));
1149
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
1150
- udp_sport, ntohs(key->src));
1151
- } else { /* udp dst port must be given */
1152
-vxlan_match_offload_err:
1153
- netdev_warn(priv->netdev,
1154
- "IP tunnel decap offload supported only for vxlan, must set UDP dport\n");
1155
- return -EOPNOTSUPP;
1960
+ off += sizeof(struct geneve_opt) + opt->length * 4;
11561961 }
1157
-
1158
- if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
1159
- struct flow_dissector_key_ipv4_addrs *key =
1160
- skb_flow_dissector_target(f->dissector,
1161
- FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
1162
- f->key);
1163
- struct flow_dissector_key_ipv4_addrs *mask =
1164
- skb_flow_dissector_target(f->dissector,
1165
- FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
1166
- f->mask);
1167
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
1168
- src_ipv4_src_ipv6.ipv4_layout.ipv4,
1169
- ntohl(mask->src));
1170
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
1171
- src_ipv4_src_ipv6.ipv4_layout.ipv4,
1172
- ntohl(key->src));
1173
-
1174
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
1175
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
1176
- ntohl(mask->dst));
1177
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
1178
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
1179
- ntohl(key->dst));
1180
-
1181
- MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
1182
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP);
1183
- } else if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
1184
- struct flow_dissector_key_ipv6_addrs *key =
1185
- skb_flow_dissector_target(f->dissector,
1186
- FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
1187
- f->key);
1188
- struct flow_dissector_key_ipv6_addrs *mask =
1189
- skb_flow_dissector_target(f->dissector,
1190
- FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
1191
- f->mask);
1192
-
1193
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
1194
- src_ipv4_src_ipv6.ipv6_layout.ipv6),
1195
- &mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
1196
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
1197
- src_ipv4_src_ipv6.ipv6_layout.ipv6),
1198
- &key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
1199
-
1200
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
1201
- dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
1202
- &mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
1203
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
1204
- dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
1205
- &key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
1206
-
1207
- MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
1208
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IPV6);
1209
- }
1210
-
1211
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_IP)) {
1212
- struct flow_dissector_key_ip *key =
1213
- skb_flow_dissector_target(f->dissector,
1214
- FLOW_DISSECTOR_KEY_ENC_IP,
1215
- f->key);
1216
- struct flow_dissector_key_ip *mask =
1217
- skb_flow_dissector_target(f->dissector,
1218
- FLOW_DISSECTOR_KEY_ENC_IP,
1219
- f->mask);
1220
-
1221
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, mask->tos & 0x3);
1222
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, key->tos & 0x3);
1223
-
1224
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, mask->tos >> 2);
1225
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, key->tos >> 2);
1226
-
1227
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, mask->ttl);
1228
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, key->ttl);
1229
- }
1230
-
1231
- /* Enforce DMAC when offloading incoming tunneled flows.
1232
- * Flow counters require a match on the DMAC.
1233
- */
1234
- MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16);
1235
- MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0);
1236
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
1237
- dmac_47_16), priv->netdev->dev_addr);
1238
-
1239
- /* let software handle IP fragments */
1240
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
1241
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0);
12421962
12431963 return 0;
12441964 }
12451965
1246
-static int __parse_cls_flower(struct mlx5e_priv *priv,
1247
- struct mlx5_flow_spec *spec,
1248
- struct tc_cls_flower_offload *f,
1249
- u8 *match_level)
1966
+#define COPY_DISSECTOR(rule, diss_key, dst)\
1967
+({ \
1968
+ struct flow_rule *__rule = (rule);\
1969
+ typeof(dst) __dst = dst;\
1970
+\
1971
+ memcpy(__dst,\
1972
+ skb_flow_dissector_target(__rule->match.dissector,\
1973
+ diss_key,\
1974
+ __rule->match.key),\
1975
+ sizeof(*__dst));\
1976
+})
1977
+
1978
+static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv,
1979
+ struct mlx5e_tc_flow *flow,
1980
+ struct flow_cls_offload *f,
1981
+ struct net_device *filter_dev)
12501982 {
1983
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1984
+ struct netlink_ext_ack *extack = f->common.extack;
1985
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
1986
+ struct flow_match_enc_opts enc_opts_match;
1987
+ struct tunnel_match_enc_opts tun_enc_opts;
1988
+ struct mlx5_rep_uplink_priv *uplink_priv;
1989
+ struct mlx5_flow_attr *attr = flow->attr;
1990
+ struct mlx5e_rep_priv *uplink_rpriv;
1991
+ struct tunnel_match_key tunnel_key;
1992
+ bool enc_opts_is_dont_care = true;
1993
+ u32 tun_id, enc_opts_id = 0;
1994
+ struct mlx5_eswitch *esw;
1995
+ u32 value, mask;
1996
+ int err;
1997
+
1998
+ esw = priv->mdev->priv.eswitch;
1999
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
2000
+ uplink_priv = &uplink_rpriv->uplink_priv;
2001
+
2002
+ memset(&tunnel_key, 0, sizeof(tunnel_key));
2003
+ COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL,
2004
+ &tunnel_key.enc_control);
2005
+ if (tunnel_key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS)
2006
+ COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
2007
+ &tunnel_key.enc_ipv4);
2008
+ else
2009
+ COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
2010
+ &tunnel_key.enc_ipv6);
2011
+ COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IP, &tunnel_key.enc_ip);
2012
+ COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_PORTS,
2013
+ &tunnel_key.enc_tp);
2014
+ COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_KEYID,
2015
+ &tunnel_key.enc_key_id);
2016
+ tunnel_key.filter_ifindex = filter_dev->ifindex;
2017
+
2018
+ err = mapping_add(uplink_priv->tunnel_mapping, &tunnel_key, &tun_id);
2019
+ if (err)
2020
+ return err;
2021
+
2022
+ flow_rule_match_enc_opts(rule, &enc_opts_match);
2023
+ err = enc_opts_is_dont_care_or_full_match(priv,
2024
+ enc_opts_match.mask,
2025
+ extack,
2026
+ &enc_opts_is_dont_care);
2027
+ if (err)
2028
+ goto err_enc_opts;
2029
+
2030
+ if (!enc_opts_is_dont_care) {
2031
+ memset(&tun_enc_opts, 0, sizeof(tun_enc_opts));
2032
+ memcpy(&tun_enc_opts.key, enc_opts_match.key,
2033
+ sizeof(*enc_opts_match.key));
2034
+ memcpy(&tun_enc_opts.mask, enc_opts_match.mask,
2035
+ sizeof(*enc_opts_match.mask));
2036
+
2037
+ err = mapping_add(uplink_priv->tunnel_enc_opts_mapping,
2038
+ &tun_enc_opts, &enc_opts_id);
2039
+ if (err)
2040
+ goto err_enc_opts;
2041
+ }
2042
+
2043
+ value = tun_id << ENC_OPTS_BITS | enc_opts_id;
2044
+ mask = enc_opts_id ? TUNNEL_ID_MASK :
2045
+ (TUNNEL_ID_MASK & ~ENC_OPTS_BITS_MASK);
2046
+
2047
+ if (attr->chain) {
2048
+ mlx5e_tc_match_to_reg_match(&attr->parse_attr->spec,
2049
+ TUNNEL_TO_REG, value, mask);
2050
+ } else {
2051
+ mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
2052
+ err = mlx5e_tc_match_to_reg_set(priv->mdev,
2053
+ mod_hdr_acts, MLX5_FLOW_NAMESPACE_FDB,
2054
+ TUNNEL_TO_REG, value);
2055
+ if (err)
2056
+ goto err_set;
2057
+
2058
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
2059
+ }
2060
+
2061
+ flow->tunnel_id = value;
2062
+ return 0;
2063
+
2064
+err_set:
2065
+ if (enc_opts_id)
2066
+ mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
2067
+ enc_opts_id);
2068
+err_enc_opts:
2069
+ mapping_remove(uplink_priv->tunnel_mapping, tun_id);
2070
+ return err;
2071
+}
2072
+
2073
+static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow)
2074
+{
2075
+ u32 enc_opts_id = flow->tunnel_id & ENC_OPTS_BITS_MASK;
2076
+ u32 tun_id = flow->tunnel_id >> ENC_OPTS_BITS;
2077
+ struct mlx5_rep_uplink_priv *uplink_priv;
2078
+ struct mlx5e_rep_priv *uplink_rpriv;
2079
+ struct mlx5_eswitch *esw;
2080
+
2081
+ esw = flow->priv->mdev->priv.eswitch;
2082
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
2083
+ uplink_priv = &uplink_rpriv->uplink_priv;
2084
+
2085
+ if (tun_id)
2086
+ mapping_remove(uplink_priv->tunnel_mapping, tun_id);
2087
+ if (enc_opts_id)
2088
+ mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
2089
+ enc_opts_id);
2090
+}
2091
+
2092
+u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow)
2093
+{
2094
+ return flow->tunnel_id;
2095
+}
2096
+
2097
+void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev,
2098
+ struct flow_match_basic *match, bool outer,
2099
+ void *headers_c, void *headers_v)
2100
+{
2101
+ bool ip_version_cap;
2102
+
2103
+ ip_version_cap = outer ?
2104
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2105
+ ft_field_support.outer_ip_version) :
2106
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2107
+ ft_field_support.inner_ip_version);
2108
+
2109
+ if (ip_version_cap && match->mask->n_proto == htons(0xFFFF) &&
2110
+ (match->key->n_proto == htons(ETH_P_IP) ||
2111
+ match->key->n_proto == htons(ETH_P_IPV6))) {
2112
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version);
2113
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version,
2114
+ match->key->n_proto == htons(ETH_P_IP) ? 4 : 6);
2115
+ } else {
2116
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
2117
+ ntohs(match->mask->n_proto));
2118
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
2119
+ ntohs(match->key->n_proto));
2120
+ }
2121
+}
2122
+
2123
+static int parse_tunnel_attr(struct mlx5e_priv *priv,
2124
+ struct mlx5e_tc_flow *flow,
2125
+ struct mlx5_flow_spec *spec,
2126
+ struct flow_cls_offload *f,
2127
+ struct net_device *filter_dev,
2128
+ u8 *match_level,
2129
+ bool *match_inner)
2130
+{
2131
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2132
+ struct netlink_ext_ack *extack = f->common.extack;
2133
+ bool needs_mapping, sets_mapping;
2134
+ int err;
2135
+
2136
+ if (!mlx5e_is_eswitch_flow(flow))
2137
+ return -EOPNOTSUPP;
2138
+
2139
+ needs_mapping = !!flow->attr->chain;
2140
+ sets_mapping = !flow->attr->chain && flow_has_tc_fwd_action(f);
2141
+ *match_inner = !needs_mapping;
2142
+
2143
+ if ((needs_mapping || sets_mapping) &&
2144
+ !mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
2145
+ NL_SET_ERR_MSG(extack,
2146
+ "Chains on tunnel devices isn't supported without register loopback support");
2147
+ netdev_warn(priv->netdev,
2148
+ "Chains on tunnel devices isn't supported without register loopback support");
2149
+ return -EOPNOTSUPP;
2150
+ }
2151
+
2152
+ if (!flow->attr->chain) {
2153
+ err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
2154
+ match_level);
2155
+ if (err) {
2156
+ NL_SET_ERR_MSG_MOD(extack,
2157
+ "Failed to parse tunnel attributes");
2158
+ netdev_warn(priv->netdev,
2159
+ "Failed to parse tunnel attributes");
2160
+ return err;
2161
+ }
2162
+
2163
+ /* With mpls over udp we decapsulate using packet reformat
2164
+ * object
2165
+ */
2166
+ if (!netif_is_bareudp(filter_dev))
2167
+ flow->attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
2168
+ }
2169
+
2170
+ if (!needs_mapping && !sets_mapping)
2171
+ return 0;
2172
+
2173
+ return mlx5e_get_flow_tunnel_id(priv, flow, f, filter_dev);
2174
+}
2175
+
2176
+static void *get_match_inner_headers_criteria(struct mlx5_flow_spec *spec)
2177
+{
2178
+ return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2179
+ inner_headers);
2180
+}
2181
+
2182
+static void *get_match_inner_headers_value(struct mlx5_flow_spec *spec)
2183
+{
2184
+ return MLX5_ADDR_OF(fte_match_param, spec->match_value,
2185
+ inner_headers);
2186
+}
2187
+
2188
+static void *get_match_outer_headers_criteria(struct mlx5_flow_spec *spec)
2189
+{
2190
+ return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2191
+ outer_headers);
2192
+}
2193
+
2194
+static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec)
2195
+{
2196
+ return MLX5_ADDR_OF(fte_match_param, spec->match_value,
2197
+ outer_headers);
2198
+}
2199
+
2200
+static void *get_match_headers_value(u32 flags,
2201
+ struct mlx5_flow_spec *spec)
2202
+{
2203
+ return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
2204
+ get_match_inner_headers_value(spec) :
2205
+ get_match_outer_headers_value(spec);
2206
+}
2207
+
2208
+static void *get_match_headers_criteria(u32 flags,
2209
+ struct mlx5_flow_spec *spec)
2210
+{
2211
+ return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
2212
+ get_match_inner_headers_criteria(spec) :
2213
+ get_match_outer_headers_criteria(spec);
2214
+}
2215
+
2216
+static int mlx5e_flower_parse_meta(struct net_device *filter_dev,
2217
+ struct flow_cls_offload *f)
2218
+{
2219
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2220
+ struct netlink_ext_ack *extack = f->common.extack;
2221
+ struct net_device *ingress_dev;
2222
+ struct flow_match_meta match;
2223
+
2224
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META))
2225
+ return 0;
2226
+
2227
+ flow_rule_match_meta(rule, &match);
2228
+ if (!match.mask->ingress_ifindex)
2229
+ return 0;
2230
+
2231
+ if (match.mask->ingress_ifindex != 0xFFFFFFFF) {
2232
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask");
2233
+ return -EOPNOTSUPP;
2234
+ }
2235
+
2236
+ ingress_dev = __dev_get_by_index(dev_net(filter_dev),
2237
+ match.key->ingress_ifindex);
2238
+ if (!ingress_dev) {
2239
+ NL_SET_ERR_MSG_MOD(extack,
2240
+ "Can't find the ingress port to match on");
2241
+ return -ENOENT;
2242
+ }
2243
+
2244
+ if (ingress_dev != filter_dev) {
2245
+ NL_SET_ERR_MSG_MOD(extack,
2246
+ "Can't match on the ingress filter port");
2247
+ return -EOPNOTSUPP;
2248
+ }
2249
+
2250
+ return 0;
2251
+}
2252
+
2253
+static bool skip_key_basic(struct net_device *filter_dev,
2254
+ struct flow_cls_offload *f)
2255
+{
2256
+ /* When doing mpls over udp decap, the user needs to provide
2257
+ * MPLS_UC as the protocol in order to be able to match on mpls
2258
+ * label fields. However, the actual ethertype is IP so we want to
2259
+ * avoid matching on this, otherwise we'll fail the match.
2260
+ */
2261
+ if (netif_is_bareudp(filter_dev) && f->common.chain_index == 0)
2262
+ return true;
2263
+
2264
+ return false;
2265
+}
2266
+
2267
+static int __parse_cls_flower(struct mlx5e_priv *priv,
2268
+ struct mlx5e_tc_flow *flow,
2269
+ struct mlx5_flow_spec *spec,
2270
+ struct flow_cls_offload *f,
2271
+ struct net_device *filter_dev,
2272
+ u8 *inner_match_level, u8 *outer_match_level)
2273
+{
2274
+ struct netlink_ext_ack *extack = f->common.extack;
12512275 void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
12522276 outer_headers);
12532277 void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
....@@ -1256,13 +2280,20 @@
12562280 misc_parameters);
12572281 void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
12582282 misc_parameters);
2283
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2284
+ struct flow_dissector *dissector = rule->match.dissector;
2285
+ enum fs_flow_table_type fs_type;
12592286 u16 addr_type = 0;
12602287 u8 ip_proto = 0;
2288
+ u8 *match_level;
2289
+ int err;
12612290
1262
- *match_level = MLX5_MATCH_NONE;
2291
+ fs_type = mlx5e_is_eswitch_flow(flow) ? FS_FT_FDB : FS_FT_NIC_RX;
2292
+ match_level = outer_match_level;
12632293
1264
- if (f->dissector->used_keys &
1265
- ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
2294
+ if (dissector->used_keys &
2295
+ ~(BIT(FLOW_DISSECTOR_KEY_META) |
2296
+ BIT(FLOW_DISSECTOR_KEY_CONTROL) |
12662297 BIT(FLOW_DISSECTOR_KEY_BASIC) |
12672298 BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
12682299 BIT(FLOW_DISSECTOR_KEY_VLAN) |
....@@ -1277,69 +2308,72 @@
12772308 BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
12782309 BIT(FLOW_DISSECTOR_KEY_TCP) |
12792310 BIT(FLOW_DISSECTOR_KEY_IP) |
1280
- BIT(FLOW_DISSECTOR_KEY_ENC_IP))) {
1281
- netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n",
1282
- f->dissector->used_keys);
2311
+ BIT(FLOW_DISSECTOR_KEY_CT) |
2312
+ BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
2313
+ BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) |
2314
+ BIT(FLOW_DISSECTOR_KEY_MPLS))) {
2315
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
2316
+ netdev_dbg(priv->netdev, "Unsupported key used: 0x%x\n",
2317
+ dissector->used_keys);
12832318 return -EOPNOTSUPP;
12842319 }
12852320
1286
- if ((dissector_uses_key(f->dissector,
1287
- FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) ||
1288
- dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID) ||
1289
- dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) &&
1290
- dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
1291
- struct flow_dissector_key_control *key =
1292
- skb_flow_dissector_target(f->dissector,
1293
- FLOW_DISSECTOR_KEY_ENC_CONTROL,
1294
- f->key);
1295
- switch (key->addr_type) {
1296
- case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
1297
- case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
1298
- if (parse_tunnel_attr(priv, spec, f))
1299
- return -EOPNOTSUPP;
1300
- break;
1301
- default:
1302
- return -EOPNOTSUPP;
1303
- }
2321
+ if (mlx5e_get_tc_tun(filter_dev)) {
2322
+ bool match_inner = false;
13042323
1305
- /* In decap flow, header pointers should point to the inner
1306
- * headers, outer header were already set by parse_tunnel_attr
1307
- */
1308
- headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1309
- inner_headers);
1310
- headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1311
- inner_headers);
2324
+ err = parse_tunnel_attr(priv, flow, spec, f, filter_dev,
2325
+ outer_match_level, &match_inner);
2326
+ if (err)
2327
+ return err;
2328
+
2329
+ if (match_inner) {
2330
+ /* header pointers should point to the inner headers
2331
+ * if the packet was decapsulated already.
2332
+ * outer headers are set by parse_tunnel_attr.
2333
+ */
2334
+ match_level = inner_match_level;
2335
+ headers_c = get_match_inner_headers_criteria(spec);
2336
+ headers_v = get_match_inner_headers_value(spec);
2337
+ }
13122338 }
13132339
1314
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
1315
- struct flow_dissector_key_basic *key =
1316
- skb_flow_dissector_target(f->dissector,
1317
- FLOW_DISSECTOR_KEY_BASIC,
1318
- f->key);
1319
- struct flow_dissector_key_basic *mask =
1320
- skb_flow_dissector_target(f->dissector,
1321
- FLOW_DISSECTOR_KEY_BASIC,
1322
- f->mask);
1323
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
1324
- ntohs(mask->n_proto));
1325
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
1326
- ntohs(key->n_proto));
2340
+ err = mlx5e_flower_parse_meta(filter_dev, f);
2341
+ if (err)
2342
+ return err;
13272343
1328
- if (mask->n_proto)
2344
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC) &&
2345
+ !skip_key_basic(filter_dev, f)) {
2346
+ struct flow_match_basic match;
2347
+
2348
+ flow_rule_match_basic(rule, &match);
2349
+ mlx5e_tc_set_ethertype(priv->mdev, &match,
2350
+ match_level == outer_match_level,
2351
+ headers_c, headers_v);
2352
+
2353
+ if (match.mask->n_proto)
13292354 *match_level = MLX5_MATCH_L2;
13302355 }
2356
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) ||
2357
+ is_vlan_dev(filter_dev)) {
2358
+ struct flow_dissector_key_vlan filter_dev_mask;
2359
+ struct flow_dissector_key_vlan filter_dev_key;
2360
+ struct flow_match_vlan match;
13312361
1332
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
1333
- struct flow_dissector_key_vlan *key =
1334
- skb_flow_dissector_target(f->dissector,
1335
- FLOW_DISSECTOR_KEY_VLAN,
1336
- f->key);
1337
- struct flow_dissector_key_vlan *mask =
1338
- skb_flow_dissector_target(f->dissector,
1339
- FLOW_DISSECTOR_KEY_VLAN,
1340
- f->mask);
1341
- if (mask->vlan_id || mask->vlan_priority || mask->vlan_tpid) {
1342
- if (key->vlan_tpid == htons(ETH_P_8021AD)) {
2362
+ if (is_vlan_dev(filter_dev)) {
2363
+ match.key = &filter_dev_key;
2364
+ match.key->vlan_id = vlan_dev_vlan_id(filter_dev);
2365
+ match.key->vlan_tpid = vlan_dev_vlan_proto(filter_dev);
2366
+ match.key->vlan_priority = 0;
2367
+ match.mask = &filter_dev_mask;
2368
+ memset(match.mask, 0xff, sizeof(*match.mask));
2369
+ match.mask->vlan_priority = 0;
2370
+ } else {
2371
+ flow_rule_match_vlan(rule, &match);
2372
+ }
2373
+ if (match.mask->vlan_id ||
2374
+ match.mask->vlan_priority ||
2375
+ match.mask->vlan_tpid) {
2376
+ if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
13432377 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
13442378 svlan_tag, 1);
13452379 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
....@@ -1351,31 +2385,53 @@
13512385 cvlan_tag, 1);
13522386 }
13532387
1354
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, mask->vlan_id);
1355
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, key->vlan_id);
2388
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid,
2389
+ match.mask->vlan_id);
2390
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid,
2391
+ match.key->vlan_id);
13562392
1357
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, mask->vlan_priority);
1358
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, key->vlan_priority);
2393
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio,
2394
+ match.mask->vlan_priority);
2395
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio,
2396
+ match.key->vlan_priority);
13592397
13602398 *match_level = MLX5_MATCH_L2;
2399
+
2400
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN) &&
2401
+ match.mask->vlan_eth_type &&
2402
+ MLX5_CAP_FLOWTABLE_TYPE(priv->mdev,
2403
+ ft_field_support.outer_second_vid,
2404
+ fs_type)) {
2405
+ MLX5_SET(fte_match_set_misc, misc_c,
2406
+ outer_second_cvlan_tag, 1);
2407
+ spec->match_criteria_enable |=
2408
+ MLX5_MATCH_MISC_PARAMETERS;
2409
+ }
13612410 }
13622411 } else if (*match_level != MLX5_MATCH_NONE) {
1363
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, svlan_tag, 1);
2412
+ /* cvlan_tag enabled in match criteria and
2413
+ * disabled in match value means both S & C tags
2414
+ * don't exist (untagged of both)
2415
+ */
13642416 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
13652417 *match_level = MLX5_MATCH_L2;
13662418 }
13672419
1368
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CVLAN)) {
1369
- struct flow_dissector_key_vlan *key =
1370
- skb_flow_dissector_target(f->dissector,
1371
- FLOW_DISSECTOR_KEY_CVLAN,
1372
- f->key);
1373
- struct flow_dissector_key_vlan *mask =
1374
- skb_flow_dissector_target(f->dissector,
1375
- FLOW_DISSECTOR_KEY_CVLAN,
1376
- f->mask);
1377
- if (mask->vlan_id || mask->vlan_priority || mask->vlan_tpid) {
1378
- if (key->vlan_tpid == htons(ETH_P_8021AD)) {
2420
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
2421
+ struct flow_match_vlan match;
2422
+
2423
+ flow_rule_match_cvlan(rule, &match);
2424
+ if (match.mask->vlan_id ||
2425
+ match.mask->vlan_priority ||
2426
+ match.mask->vlan_tpid) {
2427
+ if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ft_field_support.outer_second_vid,
2428
+ fs_type)) {
2429
+ NL_SET_ERR_MSG_MOD(extack,
2430
+ "Matching on CVLAN is not supported");
2431
+ return -EOPNOTSUPP;
2432
+ }
2433
+
2434
+ if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
13792435 MLX5_SET(fte_match_set_misc, misc_c,
13802436 outer_second_svlan_tag, 1);
13812437 MLX5_SET(fte_match_set_misc, misc_v,
....@@ -1388,69 +2444,59 @@
13882444 }
13892445
13902446 MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid,
1391
- mask->vlan_id);
2447
+ match.mask->vlan_id);
13922448 MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid,
1393
- key->vlan_id);
2449
+ match.key->vlan_id);
13942450 MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio,
1395
- mask->vlan_priority);
2451
+ match.mask->vlan_priority);
13962452 MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio,
1397
- key->vlan_priority);
2453
+ match.key->vlan_priority);
13982454
13992455 *match_level = MLX5_MATCH_L2;
2456
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
14002457 }
14012458 }
14022459
1403
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
1404
- struct flow_dissector_key_eth_addrs *key =
1405
- skb_flow_dissector_target(f->dissector,
1406
- FLOW_DISSECTOR_KEY_ETH_ADDRS,
1407
- f->key);
1408
- struct flow_dissector_key_eth_addrs *mask =
1409
- skb_flow_dissector_target(f->dissector,
1410
- FLOW_DISSECTOR_KEY_ETH_ADDRS,
1411
- f->mask);
2460
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
2461
+ struct flow_match_eth_addrs match;
14122462
2463
+ flow_rule_match_eth_addrs(rule, &match);
14132464 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
14142465 dmac_47_16),
1415
- mask->dst);
2466
+ match.mask->dst);
14162467 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
14172468 dmac_47_16),
1418
- key->dst);
2469
+ match.key->dst);
14192470
14202471 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
14212472 smac_47_16),
1422
- mask->src);
2473
+ match.mask->src);
14232474 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
14242475 smac_47_16),
1425
- key->src);
2476
+ match.key->src);
14262477
1427
- if (!is_zero_ether_addr(mask->src) || !is_zero_ether_addr(mask->dst))
2478
+ if (!is_zero_ether_addr(match.mask->src) ||
2479
+ !is_zero_ether_addr(match.mask->dst))
14282480 *match_level = MLX5_MATCH_L2;
14292481 }
14302482
1431
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
1432
- struct flow_dissector_key_control *key =
1433
- skb_flow_dissector_target(f->dissector,
1434
- FLOW_DISSECTOR_KEY_CONTROL,
1435
- f->key);
2483
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
2484
+ struct flow_match_control match;
14362485
1437
- struct flow_dissector_key_control *mask =
1438
- skb_flow_dissector_target(f->dissector,
1439
- FLOW_DISSECTOR_KEY_CONTROL,
1440
- f->mask);
1441
- addr_type = key->addr_type;
2486
+ flow_rule_match_control(rule, &match);
2487
+ addr_type = match.key->addr_type;
14422488
14432489 /* the HW doesn't support frag first/later */
1444
- if (mask->flags & FLOW_DIS_FIRST_FRAG)
2490
+ if (match.mask->flags & FLOW_DIS_FIRST_FRAG)
14452491 return -EOPNOTSUPP;
14462492
1447
- if (mask->flags & FLOW_DIS_IS_FRAGMENT) {
2493
+ if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) {
14482494 MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
14492495 MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
1450
- key->flags & FLOW_DIS_IS_FRAGMENT);
2496
+ match.key->flags & FLOW_DIS_IS_FRAGMENT);
14512497
14522498 /* the HW doesn't need L3 inline to match on frag=no */
1453
- if (!(key->flags & FLOW_DIS_IS_FRAGMENT))
2499
+ if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT))
14542500 *match_level = MLX5_MATCH_L2;
14552501 /* *** L2 attributes parsing up to here *** */
14562502 else
....@@ -1458,172 +2504,159 @@
14582504 }
14592505 }
14602506
1461
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
1462
- struct flow_dissector_key_basic *key =
1463
- skb_flow_dissector_target(f->dissector,
1464
- FLOW_DISSECTOR_KEY_BASIC,
1465
- f->key);
1466
- struct flow_dissector_key_basic *mask =
1467
- skb_flow_dissector_target(f->dissector,
1468
- FLOW_DISSECTOR_KEY_BASIC,
1469
- f->mask);
1470
- ip_proto = key->ip_proto;
2507
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
2508
+ struct flow_match_basic match;
2509
+
2510
+ flow_rule_match_basic(rule, &match);
2511
+ ip_proto = match.key->ip_proto;
14712512
14722513 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
1473
- mask->ip_proto);
2514
+ match.mask->ip_proto);
14742515 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
1475
- key->ip_proto);
2516
+ match.key->ip_proto);
14762517
1477
- if (mask->ip_proto)
2518
+ if (match.mask->ip_proto)
14782519 *match_level = MLX5_MATCH_L3;
14792520 }
14802521
14812522 if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
1482
- struct flow_dissector_key_ipv4_addrs *key =
1483
- skb_flow_dissector_target(f->dissector,
1484
- FLOW_DISSECTOR_KEY_IPV4_ADDRS,
1485
- f->key);
1486
- struct flow_dissector_key_ipv4_addrs *mask =
1487
- skb_flow_dissector_target(f->dissector,
1488
- FLOW_DISSECTOR_KEY_IPV4_ADDRS,
1489
- f->mask);
2523
+ struct flow_match_ipv4_addrs match;
14902524
2525
+ flow_rule_match_ipv4_addrs(rule, &match);
14912526 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
14922527 src_ipv4_src_ipv6.ipv4_layout.ipv4),
1493
- &mask->src, sizeof(mask->src));
2528
+ &match.mask->src, sizeof(match.mask->src));
14942529 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
14952530 src_ipv4_src_ipv6.ipv4_layout.ipv4),
1496
- &key->src, sizeof(key->src));
2531
+ &match.key->src, sizeof(match.key->src));
14972532 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
14982533 dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
1499
- &mask->dst, sizeof(mask->dst));
2534
+ &match.mask->dst, sizeof(match.mask->dst));
15002535 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
15012536 dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
1502
- &key->dst, sizeof(key->dst));
2537
+ &match.key->dst, sizeof(match.key->dst));
15032538
1504
- if (mask->src || mask->dst)
2539
+ if (match.mask->src || match.mask->dst)
15052540 *match_level = MLX5_MATCH_L3;
15062541 }
15072542
15082543 if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
1509
- struct flow_dissector_key_ipv6_addrs *key =
1510
- skb_flow_dissector_target(f->dissector,
1511
- FLOW_DISSECTOR_KEY_IPV6_ADDRS,
1512
- f->key);
1513
- struct flow_dissector_key_ipv6_addrs *mask =
1514
- skb_flow_dissector_target(f->dissector,
1515
- FLOW_DISSECTOR_KEY_IPV6_ADDRS,
1516
- f->mask);
2544
+ struct flow_match_ipv6_addrs match;
15172545
2546
+ flow_rule_match_ipv6_addrs(rule, &match);
15182547 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
15192548 src_ipv4_src_ipv6.ipv6_layout.ipv6),
1520
- &mask->src, sizeof(mask->src));
2549
+ &match.mask->src, sizeof(match.mask->src));
15212550 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
15222551 src_ipv4_src_ipv6.ipv6_layout.ipv6),
1523
- &key->src, sizeof(key->src));
2552
+ &match.key->src, sizeof(match.key->src));
15242553
15252554 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
15262555 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
1527
- &mask->dst, sizeof(mask->dst));
2556
+ &match.mask->dst, sizeof(match.mask->dst));
15282557 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
15292558 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
1530
- &key->dst, sizeof(key->dst));
2559
+ &match.key->dst, sizeof(match.key->dst));
15312560
1532
- if (ipv6_addr_type(&mask->src) != IPV6_ADDR_ANY ||
1533
- ipv6_addr_type(&mask->dst) != IPV6_ADDR_ANY)
2561
+ if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY ||
2562
+ ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY)
15342563 *match_level = MLX5_MATCH_L3;
15352564 }
15362565
1537
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IP)) {
1538
- struct flow_dissector_key_ip *key =
1539
- skb_flow_dissector_target(f->dissector,
1540
- FLOW_DISSECTOR_KEY_IP,
1541
- f->key);
1542
- struct flow_dissector_key_ip *mask =
1543
- skb_flow_dissector_target(f->dissector,
1544
- FLOW_DISSECTOR_KEY_IP,
1545
- f->mask);
2566
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
2567
+ struct flow_match_ip match;
15462568
1547
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, mask->tos & 0x3);
1548
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, key->tos & 0x3);
2569
+ flow_rule_match_ip(rule, &match);
2570
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
2571
+ match.mask->tos & 0x3);
2572
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
2573
+ match.key->tos & 0x3);
15492574
1550
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, mask->tos >> 2);
1551
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, key->tos >> 2);
2575
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
2576
+ match.mask->tos >> 2);
2577
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
2578
+ match.key->tos >> 2);
15522579
1553
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, mask->ttl);
1554
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, key->ttl);
2580
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
2581
+ match.mask->ttl);
2582
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
2583
+ match.key->ttl);
15552584
1556
- if (mask->ttl &&
2585
+ if (match.mask->ttl &&
15572586 !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
1558
- ft_field_support.outer_ipv4_ttl))
2587
+ ft_field_support.outer_ipv4_ttl)) {
2588
+ NL_SET_ERR_MSG_MOD(extack,
2589
+ "Matching on TTL is not supported");
15592590 return -EOPNOTSUPP;
2591
+ }
15602592
1561
- if (mask->tos || mask->ttl)
2593
+ if (match.mask->tos || match.mask->ttl)
15622594 *match_level = MLX5_MATCH_L3;
15632595 }
15642596
15652597 /* *** L3 attributes parsing up to here *** */
15662598
1567
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
1568
- struct flow_dissector_key_ports *key =
1569
- skb_flow_dissector_target(f->dissector,
1570
- FLOW_DISSECTOR_KEY_PORTS,
1571
- f->key);
1572
- struct flow_dissector_key_ports *mask =
1573
- skb_flow_dissector_target(f->dissector,
1574
- FLOW_DISSECTOR_KEY_PORTS,
1575
- f->mask);
2599
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
2600
+ struct flow_match_ports match;
2601
+
2602
+ flow_rule_match_ports(rule, &match);
15762603 switch (ip_proto) {
15772604 case IPPROTO_TCP:
15782605 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
1579
- tcp_sport, ntohs(mask->src));
2606
+ tcp_sport, ntohs(match.mask->src));
15802607 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
1581
- tcp_sport, ntohs(key->src));
2608
+ tcp_sport, ntohs(match.key->src));
15822609
15832610 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
1584
- tcp_dport, ntohs(mask->dst));
2611
+ tcp_dport, ntohs(match.mask->dst));
15852612 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
1586
- tcp_dport, ntohs(key->dst));
2613
+ tcp_dport, ntohs(match.key->dst));
15872614 break;
15882615
15892616 case IPPROTO_UDP:
15902617 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
1591
- udp_sport, ntohs(mask->src));
2618
+ udp_sport, ntohs(match.mask->src));
15922619 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
1593
- udp_sport, ntohs(key->src));
2620
+ udp_sport, ntohs(match.key->src));
15942621
15952622 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
1596
- udp_dport, ntohs(mask->dst));
2623
+ udp_dport, ntohs(match.mask->dst));
15972624 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
1598
- udp_dport, ntohs(key->dst));
2625
+ udp_dport, ntohs(match.key->dst));
15992626 break;
16002627 default:
2628
+ NL_SET_ERR_MSG_MOD(extack,
2629
+ "Only UDP and TCP transports are supported for L4 matching");
16012630 netdev_err(priv->netdev,
16022631 "Only UDP and TCP transport are supported\n");
16032632 return -EINVAL;
16042633 }
16052634
1606
- if (mask->src || mask->dst)
2635
+ if (match.mask->src || match.mask->dst)
16072636 *match_level = MLX5_MATCH_L4;
16082637 }
16092638
1610
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_TCP)) {
1611
- struct flow_dissector_key_tcp *key =
1612
- skb_flow_dissector_target(f->dissector,
1613
- FLOW_DISSECTOR_KEY_TCP,
1614
- f->key);
1615
- struct flow_dissector_key_tcp *mask =
1616
- skb_flow_dissector_target(f->dissector,
1617
- FLOW_DISSECTOR_KEY_TCP,
1618
- f->mask);
2639
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
2640
+ struct flow_match_tcp match;
16192641
2642
+ flow_rule_match_tcp(rule, &match);
16202643 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
1621
- ntohs(mask->flags));
2644
+ ntohs(match.mask->flags));
16222645 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
1623
- ntohs(key->flags));
2646
+ ntohs(match.key->flags));
16242647
1625
- if (mask->flags)
2648
+ if (match.mask->flags)
16262649 *match_level = MLX5_MATCH_L4;
2650
+ }
2651
+
2652
+ /* Currenlty supported only for MPLS over UDP */
2653
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS) &&
2654
+ !netif_is_bareudp(filter_dev)) {
2655
+ NL_SET_ERR_MSG_MOD(extack,
2656
+ "Matching on MPLS is supported only for MPLS over UDP");
2657
+ netdev_err(priv->netdev,
2658
+ "Matching on MPLS is supported only for MPLS over UDP\n");
2659
+ return -EOPNOTSUPP;
16272660 }
16282661
16292662 return 0;
....@@ -1632,66 +2665,80 @@
16322665 static int parse_cls_flower(struct mlx5e_priv *priv,
16332666 struct mlx5e_tc_flow *flow,
16342667 struct mlx5_flow_spec *spec,
1635
- struct tc_cls_flower_offload *f)
2668
+ struct flow_cls_offload *f,
2669
+ struct net_device *filter_dev)
16362670 {
2671
+ u8 inner_match_level, outer_match_level, non_tunnel_match_level;
2672
+ struct netlink_ext_ack *extack = f->common.extack;
16372673 struct mlx5_core_dev *dev = priv->mdev;
16382674 struct mlx5_eswitch *esw = dev->priv.eswitch;
16392675 struct mlx5e_rep_priv *rpriv = priv->ppriv;
16402676 struct mlx5_eswitch_rep *rep;
1641
- u8 match_level;
2677
+ bool is_eswitch_flow;
16422678 int err;
16432679
1644
- err = __parse_cls_flower(priv, spec, f, &match_level);
2680
+ inner_match_level = MLX5_MATCH_NONE;
2681
+ outer_match_level = MLX5_MATCH_NONE;
16452682
1646
- if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) {
2683
+ err = __parse_cls_flower(priv, flow, spec, f, filter_dev,
2684
+ &inner_match_level, &outer_match_level);
2685
+ non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ?
2686
+ outer_match_level : inner_match_level;
2687
+
2688
+ is_eswitch_flow = mlx5e_is_eswitch_flow(flow);
2689
+ if (!err && is_eswitch_flow) {
16472690 rep = rpriv->rep;
1648
- if (rep->vport != FDB_UPLINK_VPORT &&
2691
+ if (rep->vport != MLX5_VPORT_UPLINK &&
16492692 (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
1650
- esw->offloads.inline_mode < match_level)) {
2693
+ esw->offloads.inline_mode < non_tunnel_match_level)) {
2694
+ NL_SET_ERR_MSG_MOD(extack,
2695
+ "Flow is not offloaded due to min inline setting");
16512696 netdev_warn(priv->netdev,
16522697 "Flow is not offloaded due to min inline setting, required %d actual %d\n",
1653
- match_level, esw->offloads.inline_mode);
2698
+ non_tunnel_match_level, esw->offloads.inline_mode);
16542699 return -EOPNOTSUPP;
16552700 }
16562701 }
16572702
1658
- if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
1659
- flow->esw_attr->match_level = match_level;
1660
- else
1661
- flow->nic_attr->match_level = match_level;
2703
+ flow->attr->inner_match_level = inner_match_level;
2704
+ flow->attr->outer_match_level = outer_match_level;
2705
+
16622706
16632707 return err;
16642708 }
16652709
16662710 struct pedit_headers {
16672711 struct ethhdr eth;
2712
+ struct vlan_hdr vlan;
16682713 struct iphdr ip4;
16692714 struct ipv6hdr ip6;
16702715 struct tcphdr tcp;
16712716 struct udphdr udp;
16722717 };
16732718
2719
+struct pedit_headers_action {
2720
+ struct pedit_headers vals;
2721
+ struct pedit_headers masks;
2722
+ u32 pedits;
2723
+};
2724
+
16742725 static int pedit_header_offsets[] = {
1675
- [TCA_PEDIT_KEY_EX_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth),
1676
- [TCA_PEDIT_KEY_EX_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4),
1677
- [TCA_PEDIT_KEY_EX_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6),
1678
- [TCA_PEDIT_KEY_EX_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp),
1679
- [TCA_PEDIT_KEY_EX_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp),
2726
+ [FLOW_ACT_MANGLE_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth),
2727
+ [FLOW_ACT_MANGLE_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4),
2728
+ [FLOW_ACT_MANGLE_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6),
2729
+ [FLOW_ACT_MANGLE_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp),
2730
+ [FLOW_ACT_MANGLE_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp),
16802731 };
16812732
16822733 #define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype])
16832734
16842735 static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset,
1685
- struct pedit_headers *masks,
1686
- struct pedit_headers *vals)
2736
+ struct pedit_headers_action *hdrs)
16872737 {
16882738 u32 *curr_pmask, *curr_pval;
16892739
1690
- if (hdr_type >= __PEDIT_HDR_TYPE_MAX)
1691
- goto out_err;
1692
-
1693
- curr_pmask = (u32 *)(pedit_header(masks, hdr_type) + offset);
1694
- curr_pval = (u32 *)(pedit_header(vals, hdr_type) + offset);
2740
+ curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset);
2741
+ curr_pval = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset);
16952742
16962743 if (*curr_pmask & mask) /* disallow acting twice on the same location */
16972744 goto out_err;
....@@ -1707,74 +2754,138 @@
17072754
17082755 struct mlx5_fields {
17092756 u8 field;
1710
- u8 size;
2757
+ u8 field_bsize;
2758
+ u32 field_mask;
17112759 u32 offset;
2760
+ u32 match_offset;
17122761 };
17132762
1714
-#define OFFLOAD(fw_field, size, field, off) \
1715
- {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, size, offsetof(struct pedit_headers, field) + (off)}
2763
+#define OFFLOAD(fw_field, field_bsize, field_mask, field, off, match_field) \
2764
+ {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, field_bsize, field_mask, \
2765
+ offsetof(struct pedit_headers, field) + (off), \
2766
+ MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)}
2767
+
2768
+/* masked values are the same and there are no rewrites that do not have a
2769
+ * match.
2770
+ */
2771
+#define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \
2772
+ type matchmaskx = *(type *)(matchmaskp); \
2773
+ type matchvalx = *(type *)(matchvalp); \
2774
+ type maskx = *(type *)(maskp); \
2775
+ type valx = *(type *)(valp); \
2776
+ \
2777
+ (valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \
2778
+ matchmaskx)); \
2779
+})
2780
+
2781
+static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp,
2782
+ void *matchmaskp, u8 bsize)
2783
+{
2784
+ bool same = false;
2785
+
2786
+ switch (bsize) {
2787
+ case 8:
2788
+ same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp);
2789
+ break;
2790
+ case 16:
2791
+ same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp);
2792
+ break;
2793
+ case 32:
2794
+ same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp);
2795
+ break;
2796
+ }
2797
+
2798
+ return same;
2799
+}
17162800
17172801 static struct mlx5_fields fields[] = {
1718
- OFFLOAD(DMAC_47_16, 4, eth.h_dest[0], 0),
1719
- OFFLOAD(DMAC_15_0, 2, eth.h_dest[4], 0),
1720
- OFFLOAD(SMAC_47_16, 4, eth.h_source[0], 0),
1721
- OFFLOAD(SMAC_15_0, 2, eth.h_source[4], 0),
1722
- OFFLOAD(ETHERTYPE, 2, eth.h_proto, 0),
2802
+ OFFLOAD(DMAC_47_16, 32, U32_MAX, eth.h_dest[0], 0, dmac_47_16),
2803
+ OFFLOAD(DMAC_15_0, 16, U16_MAX, eth.h_dest[4], 0, dmac_15_0),
2804
+ OFFLOAD(SMAC_47_16, 32, U32_MAX, eth.h_source[0], 0, smac_47_16),
2805
+ OFFLOAD(SMAC_15_0, 16, U16_MAX, eth.h_source[4], 0, smac_15_0),
2806
+ OFFLOAD(ETHERTYPE, 16, U16_MAX, eth.h_proto, 0, ethertype),
2807
+ OFFLOAD(FIRST_VID, 16, U16_MAX, vlan.h_vlan_TCI, 0, first_vid),
17232808
1724
- OFFLOAD(IP_TTL, 1, ip4.ttl, 0),
1725
- OFFLOAD(SIPV4, 4, ip4.saddr, 0),
1726
- OFFLOAD(DIPV4, 4, ip4.daddr, 0),
2809
+ OFFLOAD(IP_DSCP, 8, 0xfc, ip4.tos, 0, ip_dscp),
2810
+ OFFLOAD(IP_TTL, 8, U8_MAX, ip4.ttl, 0, ttl_hoplimit),
2811
+ OFFLOAD(SIPV4, 32, U32_MAX, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4),
2812
+ OFFLOAD(DIPV4, 32, U32_MAX, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
17272813
1728
- OFFLOAD(SIPV6_127_96, 4, ip6.saddr.s6_addr32[0], 0),
1729
- OFFLOAD(SIPV6_95_64, 4, ip6.saddr.s6_addr32[1], 0),
1730
- OFFLOAD(SIPV6_63_32, 4, ip6.saddr.s6_addr32[2], 0),
1731
- OFFLOAD(SIPV6_31_0, 4, ip6.saddr.s6_addr32[3], 0),
1732
- OFFLOAD(DIPV6_127_96, 4, ip6.daddr.s6_addr32[0], 0),
1733
- OFFLOAD(DIPV6_95_64, 4, ip6.daddr.s6_addr32[1], 0),
1734
- OFFLOAD(DIPV6_63_32, 4, ip6.daddr.s6_addr32[2], 0),
1735
- OFFLOAD(DIPV6_31_0, 4, ip6.daddr.s6_addr32[3], 0),
1736
- OFFLOAD(IPV6_HOPLIMIT, 1, ip6.hop_limit, 0),
2814
+ OFFLOAD(SIPV6_127_96, 32, U32_MAX, ip6.saddr.s6_addr32[0], 0,
2815
+ src_ipv4_src_ipv6.ipv6_layout.ipv6[0]),
2816
+ OFFLOAD(SIPV6_95_64, 32, U32_MAX, ip6.saddr.s6_addr32[1], 0,
2817
+ src_ipv4_src_ipv6.ipv6_layout.ipv6[4]),
2818
+ OFFLOAD(SIPV6_63_32, 32, U32_MAX, ip6.saddr.s6_addr32[2], 0,
2819
+ src_ipv4_src_ipv6.ipv6_layout.ipv6[8]),
2820
+ OFFLOAD(SIPV6_31_0, 32, U32_MAX, ip6.saddr.s6_addr32[3], 0,
2821
+ src_ipv4_src_ipv6.ipv6_layout.ipv6[12]),
2822
+ OFFLOAD(DIPV6_127_96, 32, U32_MAX, ip6.daddr.s6_addr32[0], 0,
2823
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]),
2824
+ OFFLOAD(DIPV6_95_64, 32, U32_MAX, ip6.daddr.s6_addr32[1], 0,
2825
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]),
2826
+ OFFLOAD(DIPV6_63_32, 32, U32_MAX, ip6.daddr.s6_addr32[2], 0,
2827
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]),
2828
+ OFFLOAD(DIPV6_31_0, 32, U32_MAX, ip6.daddr.s6_addr32[3], 0,
2829
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]),
2830
+ OFFLOAD(IPV6_HOPLIMIT, 8, U8_MAX, ip6.hop_limit, 0, ttl_hoplimit),
2831
+ OFFLOAD(IP_DSCP, 16, 0xc00f, ip6, 0, ip_dscp),
17372832
1738
- OFFLOAD(TCP_SPORT, 2, tcp.source, 0),
1739
- OFFLOAD(TCP_DPORT, 2, tcp.dest, 0),
1740
- OFFLOAD(TCP_FLAGS, 1, tcp.ack_seq, 5),
2833
+ OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source, 0, tcp_sport),
2834
+ OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest, 0, tcp_dport),
2835
+ /* in linux iphdr tcp_flags is 8 bits long */
2836
+ OFFLOAD(TCP_FLAGS, 8, U8_MAX, tcp.ack_seq, 5, tcp_flags),
17412837
1742
- OFFLOAD(UDP_SPORT, 2, udp.source, 0),
1743
- OFFLOAD(UDP_DPORT, 2, udp.dest, 0),
2838
+ OFFLOAD(UDP_SPORT, 16, U16_MAX, udp.source, 0, udp_sport),
2839
+ OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest, 0, udp_dport),
17442840 };
17452841
1746
-/* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at
1747
- * max from the SW pedit action. On success, attr->num_mod_hdr_actions
1748
- * says how many HW actions were actually parsed.
1749
- */
1750
-static int offload_pedit_fields(struct pedit_headers *masks,
1751
- struct pedit_headers *vals,
1752
- struct mlx5e_tc_flow_parse_attr *parse_attr)
2842
+static unsigned long mask_to_le(unsigned long mask, int size)
17532843 {
1754
- struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
1755
- int i, action_size, nactions, max_actions, first, last, next_z;
1756
- void *s_masks_p, *a_masks_p, *vals_p;
1757
- struct mlx5_fields *f;
1758
- u8 cmd, field_bsize;
1759
- u32 s_mask, a_mask;
1760
- unsigned long mask;
17612844 __be32 mask_be32;
17622845 __be16 mask_be16;
1763
- void *action;
17642846
1765
- set_masks = &masks[TCA_PEDIT_KEY_EX_CMD_SET];
1766
- add_masks = &masks[TCA_PEDIT_KEY_EX_CMD_ADD];
1767
- set_vals = &vals[TCA_PEDIT_KEY_EX_CMD_SET];
1768
- add_vals = &vals[TCA_PEDIT_KEY_EX_CMD_ADD];
2847
+ if (size == 32) {
2848
+ mask_be32 = (__force __be32)(mask);
2849
+ mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
2850
+ } else if (size == 16) {
2851
+ mask_be32 = (__force __be32)(mask);
2852
+ mask_be16 = *(__be16 *)&mask_be32;
2853
+ mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
2854
+ }
17692855
1770
- action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
1771
- action = parse_attr->mod_hdr_actions +
1772
- parse_attr->num_mod_hdr_actions * action_size;
2856
+ return mask;
2857
+}
2858
+static int offload_pedit_fields(struct mlx5e_priv *priv,
2859
+ int namespace,
2860
+ struct pedit_headers_action *hdrs,
2861
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
2862
+ u32 *action_flags,
2863
+ struct netlink_ext_ack *extack)
2864
+{
2865
+ struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
2866
+ int i, action_size, first, last, next_z;
2867
+ void *headers_c, *headers_v, *action, *vals_p;
2868
+ u32 *s_masks_p, *a_masks_p, s_mask, a_mask;
2869
+ struct mlx5e_tc_mod_hdr_acts *mod_acts;
2870
+ struct mlx5_fields *f;
2871
+ unsigned long mask, field_mask;
2872
+ int err;
2873
+ u8 cmd;
17732874
1774
- max_actions = parse_attr->max_mod_hdr_actions;
1775
- nactions = parse_attr->num_mod_hdr_actions;
2875
+ mod_acts = &parse_attr->mod_hdr_acts;
2876
+ headers_c = get_match_headers_criteria(*action_flags, &parse_attr->spec);
2877
+ headers_v = get_match_headers_value(*action_flags, &parse_attr->spec);
2878
+
2879
+ set_masks = &hdrs[0].masks;
2880
+ add_masks = &hdrs[1].masks;
2881
+ set_vals = &hdrs[0].vals;
2882
+ add_vals = &hdrs[1].vals;
2883
+
2884
+ action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
17762885
17772886 for (i = 0; i < ARRAY_SIZE(fields); i++) {
2887
+ bool skip;
2888
+
17782889 f = &fields[i];
17792890 /* avoid seeing bits set from previous iterations */
17802891 s_mask = 0;
....@@ -1783,157 +2894,254 @@
17832894 s_masks_p = (void *)set_masks + f->offset;
17842895 a_masks_p = (void *)add_masks + f->offset;
17852896
1786
- memcpy(&s_mask, s_masks_p, f->size);
1787
- memcpy(&a_mask, a_masks_p, f->size);
2897
+ s_mask = *s_masks_p & f->field_mask;
2898
+ a_mask = *a_masks_p & f->field_mask;
17882899
17892900 if (!s_mask && !a_mask) /* nothing to offload here */
17902901 continue;
17912902
17922903 if (s_mask && a_mask) {
2904
+ NL_SET_ERR_MSG_MOD(extack,
2905
+ "can't set and add to the same HW field");
17932906 printk(KERN_WARNING "mlx5: can't set and add to the same HW field (%x)\n", f->field);
17942907 return -EOPNOTSUPP;
17952908 }
17962909
1797
- if (nactions == max_actions) {
1798
- printk(KERN_WARNING "mlx5: parsed %d pedit actions, can't do more\n", nactions);
1799
- return -EOPNOTSUPP;
1800
- }
1801
-
2910
+ skip = false;
18022911 if (s_mask) {
2912
+ void *match_mask = headers_c + f->match_offset;
2913
+ void *match_val = headers_v + f->match_offset;
2914
+
18032915 cmd = MLX5_ACTION_TYPE_SET;
18042916 mask = s_mask;
18052917 vals_p = (void *)set_vals + f->offset;
2918
+ /* don't rewrite if we have a match on the same value */
2919
+ if (cmp_val_mask(vals_p, s_masks_p, match_val,
2920
+ match_mask, f->field_bsize))
2921
+ skip = true;
18062922 /* clear to denote we consumed this field */
1807
- memset(s_masks_p, 0, f->size);
2923
+ *s_masks_p &= ~f->field_mask;
18082924 } else {
18092925 cmd = MLX5_ACTION_TYPE_ADD;
18102926 mask = a_mask;
18112927 vals_p = (void *)add_vals + f->offset;
2928
+ /* add 0 is no change */
2929
+ if ((*(u32 *)vals_p & f->field_mask) == 0)
2930
+ skip = true;
18122931 /* clear to denote we consumed this field */
1813
- memset(a_masks_p, 0, f->size);
2932
+ *a_masks_p &= ~f->field_mask;
18142933 }
2934
+ if (skip)
2935
+ continue;
18152936
1816
- field_bsize = f->size * BITS_PER_BYTE;
2937
+ mask = mask_to_le(mask, f->field_bsize);
18172938
1818
- if (field_bsize == 32) {
1819
- mask_be32 = *(__be32 *)&mask;
1820
- mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
1821
- } else if (field_bsize == 16) {
1822
- mask_be16 = *(__be16 *)&mask;
1823
- mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
1824
- }
1825
-
1826
- first = find_first_bit(&mask, field_bsize);
1827
- next_z = find_next_zero_bit(&mask, field_bsize, first);
1828
- last = find_last_bit(&mask, field_bsize);
2939
+ first = find_first_bit(&mask, f->field_bsize);
2940
+ next_z = find_next_zero_bit(&mask, f->field_bsize, first);
2941
+ last = find_last_bit(&mask, f->field_bsize);
18292942 if (first < next_z && next_z < last) {
2943
+ NL_SET_ERR_MSG_MOD(extack,
2944
+ "rewrite of few sub-fields isn't supported");
18302945 printk(KERN_WARNING "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
18312946 mask);
18322947 return -EOPNOTSUPP;
18332948 }
18342949
2950
+ err = alloc_mod_hdr_actions(priv->mdev, namespace, mod_acts);
2951
+ if (err) {
2952
+ NL_SET_ERR_MSG_MOD(extack,
2953
+ "too many pedit actions, can't offload");
2954
+ mlx5_core_warn(priv->mdev,
2955
+ "mlx5: parsed %d pedit actions, can't do more\n",
2956
+ mod_acts->num_actions);
2957
+ return err;
2958
+ }
2959
+
2960
+ action = mod_acts->actions +
2961
+ (mod_acts->num_actions * action_size);
18352962 MLX5_SET(set_action_in, action, action_type, cmd);
18362963 MLX5_SET(set_action_in, action, field, f->field);
18372964
18382965 if (cmd == MLX5_ACTION_TYPE_SET) {
1839
- MLX5_SET(set_action_in, action, offset, first);
2966
+ int start;
2967
+
2968
+ field_mask = mask_to_le(f->field_mask, f->field_bsize);
2969
+
2970
+ /* if field is bit sized it can start not from first bit */
2971
+ start = find_first_bit(&field_mask, f->field_bsize);
2972
+
2973
+ MLX5_SET(set_action_in, action, offset, first - start);
18402974 /* length is num of bits to be written, zero means length of 32 */
18412975 MLX5_SET(set_action_in, action, length, (last - first + 1));
18422976 }
18432977
1844
- if (field_bsize == 32)
2978
+ if (f->field_bsize == 32)
18452979 MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first);
1846
- else if (field_bsize == 16)
2980
+ else if (f->field_bsize == 16)
18472981 MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first);
1848
- else if (field_bsize == 8)
2982
+ else if (f->field_bsize == 8)
18492983 MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first);
18502984
1851
- action += action_size;
1852
- nactions++;
2985
+ ++mod_acts->num_actions;
18532986 }
18542987
1855
- parse_attr->num_mod_hdr_actions = nactions;
18562988 return 0;
18572989 }
18582990
1859
-static int alloc_mod_hdr_actions(struct mlx5e_priv *priv,
1860
- const struct tc_action *a, int namespace,
1861
- struct mlx5e_tc_flow_parse_attr *parse_attr)
2991
+static int mlx5e_flow_namespace_max_modify_action(struct mlx5_core_dev *mdev,
2992
+ int namespace)
18622993 {
1863
- int nkeys, action_size, max_actions;
1864
-
1865
- nkeys = tcf_pedit_nkeys(a);
1866
- action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
1867
-
18682994 if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */
1869
- max_actions = MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, max_modify_header_actions);
2995
+ return MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, max_modify_header_actions);
18702996 else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
1871
- max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, max_modify_header_actions);
2997
+ return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions);
2998
+}
18722999
1873
- /* can get up to crazingly 16 HW actions in 32 bits pedit SW key */
1874
- max_actions = min(max_actions, nkeys * 16);
3000
+int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev,
3001
+ int namespace,
3002
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
3003
+{
3004
+ int action_size, new_num_actions, max_hw_actions;
3005
+ size_t new_sz, old_sz;
3006
+ void *ret;
18753007
1876
- parse_attr->mod_hdr_actions = kcalloc(max_actions, action_size, GFP_KERNEL);
1877
- if (!parse_attr->mod_hdr_actions)
3008
+ if (mod_hdr_acts->num_actions < mod_hdr_acts->max_actions)
3009
+ return 0;
3010
+
3011
+ action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
3012
+
3013
+ max_hw_actions = mlx5e_flow_namespace_max_modify_action(mdev,
3014
+ namespace);
3015
+ new_num_actions = min(max_hw_actions,
3016
+ mod_hdr_acts->actions ?
3017
+ mod_hdr_acts->max_actions * 2 : 1);
3018
+ if (mod_hdr_acts->max_actions == new_num_actions)
3019
+ return -ENOSPC;
3020
+
3021
+ new_sz = action_size * new_num_actions;
3022
+ old_sz = mod_hdr_acts->max_actions * action_size;
3023
+ ret = krealloc(mod_hdr_acts->actions, new_sz, GFP_KERNEL);
3024
+ if (!ret)
18783025 return -ENOMEM;
18793026
1880
- parse_attr->max_mod_hdr_actions = max_actions;
3027
+ memset(ret + old_sz, 0, new_sz - old_sz);
3028
+ mod_hdr_acts->actions = ret;
3029
+ mod_hdr_acts->max_actions = new_num_actions;
3030
+
18813031 return 0;
3032
+}
3033
+
3034
+void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
3035
+{
3036
+ kfree(mod_hdr_acts->actions);
3037
+ mod_hdr_acts->actions = NULL;
3038
+ mod_hdr_acts->num_actions = 0;
3039
+ mod_hdr_acts->max_actions = 0;
18823040 }
18833041
18843042 static const struct pedit_headers zero_masks = {};
18853043
1886
-static int parse_tc_pedit_action(struct mlx5e_priv *priv,
1887
- const struct tc_action *a, int namespace,
1888
- struct mlx5e_tc_flow_parse_attr *parse_attr)
3044
+static int
3045
+parse_pedit_to_modify_hdr(struct mlx5e_priv *priv,
3046
+ const struct flow_action_entry *act, int namespace,
3047
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
3048
+ struct pedit_headers_action *hdrs,
3049
+ struct netlink_ext_ack *extack)
18893050 {
1890
- struct pedit_headers masks[__PEDIT_CMD_MAX], vals[__PEDIT_CMD_MAX], *cmd_masks;
1891
- int nkeys, i, err = -EOPNOTSUPP;
3051
+ u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1;
3052
+ int err = -EOPNOTSUPP;
18923053 u32 mask, val, offset;
1893
- u8 cmd, htype;
3054
+ u8 htype;
18943055
1895
- nkeys = tcf_pedit_nkeys(a);
3056
+ htype = act->mangle.htype;
3057
+ err = -EOPNOTSUPP; /* can't be all optimistic */
18963058
1897
- memset(masks, 0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX);
1898
- memset(vals, 0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX);
1899
-
1900
- for (i = 0; i < nkeys; i++) {
1901
- htype = tcf_pedit_htype(a, i);
1902
- cmd = tcf_pedit_cmd(a, i);
1903
- err = -EOPNOTSUPP; /* can't be all optimistic */
1904
-
1905
- if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK) {
1906
- netdev_warn(priv->netdev, "legacy pedit isn't offloaded\n");
1907
- goto out_err;
1908
- }
1909
-
1910
- if (cmd != TCA_PEDIT_KEY_EX_CMD_SET && cmd != TCA_PEDIT_KEY_EX_CMD_ADD) {
1911
- netdev_warn(priv->netdev, "pedit cmd %d isn't offloaded\n", cmd);
1912
- goto out_err;
1913
- }
1914
-
1915
- mask = tcf_pedit_mask(a, i);
1916
- val = tcf_pedit_val(a, i);
1917
- offset = tcf_pedit_offset(a, i);
1918
-
1919
- err = set_pedit_val(htype, ~mask, val, offset, &masks[cmd], &vals[cmd]);
1920
- if (err)
1921
- goto out_err;
3059
+ if (htype == FLOW_ACT_MANGLE_UNSPEC) {
3060
+ NL_SET_ERR_MSG_MOD(extack, "legacy pedit isn't offloaded");
3061
+ goto out_err;
19223062 }
19233063
1924
- if (!parse_attr->mod_hdr_actions) {
1925
- err = alloc_mod_hdr_actions(priv, a, namespace, parse_attr);
1926
- if (err)
1927
- goto out_err;
3064
+ if (!mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace)) {
3065
+ NL_SET_ERR_MSG_MOD(extack,
3066
+ "The pedit offload action is not supported");
3067
+ goto out_err;
19283068 }
19293069
1930
- err = offload_pedit_fields(masks, vals, parse_attr);
3070
+ mask = act->mangle.mask;
3071
+ val = act->mangle.val;
3072
+ offset = act->mangle.offset;
3073
+
3074
+ err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd]);
3075
+ if (err)
3076
+ goto out_err;
3077
+
3078
+ hdrs[cmd].pedits++;
3079
+
3080
+ return 0;
3081
+out_err:
3082
+ return err;
3083
+}
3084
+
3085
+static int
3086
+parse_pedit_to_reformat(struct mlx5e_priv *priv,
3087
+ const struct flow_action_entry *act,
3088
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
3089
+ struct netlink_ext_ack *extack)
3090
+{
3091
+ u32 mask, val, offset;
3092
+ u32 *p;
3093
+
3094
+ if (act->id != FLOW_ACTION_MANGLE)
3095
+ return -EOPNOTSUPP;
3096
+
3097
+ if (act->mangle.htype != FLOW_ACT_MANGLE_HDR_TYPE_ETH) {
3098
+ NL_SET_ERR_MSG_MOD(extack, "Only Ethernet modification is supported");
3099
+ return -EOPNOTSUPP;
3100
+ }
3101
+
3102
+ mask = ~act->mangle.mask;
3103
+ val = act->mangle.val;
3104
+ offset = act->mangle.offset;
3105
+ p = (u32 *)&parse_attr->eth;
3106
+ *(p + (offset >> 2)) |= (val & mask);
3107
+
3108
+ return 0;
3109
+}
3110
+
3111
+static int parse_tc_pedit_action(struct mlx5e_priv *priv,
3112
+ const struct flow_action_entry *act, int namespace,
3113
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
3114
+ struct pedit_headers_action *hdrs,
3115
+ struct mlx5e_tc_flow *flow,
3116
+ struct netlink_ext_ack *extack)
3117
+{
3118
+ if (flow && flow_flag_test(flow, L3_TO_L2_DECAP))
3119
+ return parse_pedit_to_reformat(priv, act, parse_attr, extack);
3120
+
3121
+ return parse_pedit_to_modify_hdr(priv, act, namespace,
3122
+ parse_attr, hdrs, extack);
3123
+}
3124
+
3125
+static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
3126
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
3127
+ struct pedit_headers_action *hdrs,
3128
+ u32 *action_flags,
3129
+ struct netlink_ext_ack *extack)
3130
+{
3131
+ struct pedit_headers *cmd_masks;
3132
+ int err;
3133
+ u8 cmd;
3134
+
3135
+ err = offload_pedit_fields(priv, namespace, hdrs, parse_attr,
3136
+ action_flags, extack);
19313137 if (err < 0)
19323138 goto out_dealloc_parsed_actions;
19333139
19343140 for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
1935
- cmd_masks = &masks[cmd];
3141
+ cmd_masks = &hdrs[cmd].masks;
19363142 if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
3143
+ NL_SET_ERR_MSG_MOD(extack,
3144
+ "attempt to offload an unsupported field");
19373145 netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd);
19383146 print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
19393147 16, 1, cmd_masks, sizeof(zero_masks), true);
....@@ -1945,24 +3153,30 @@
19453153 return 0;
19463154
19473155 out_dealloc_parsed_actions:
1948
- kfree(parse_attr->mod_hdr_actions);
1949
-out_err:
3156
+ dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
19503157 return err;
19513158 }
19523159
1953
-static bool csum_offload_supported(struct mlx5e_priv *priv, u32 action, u32 update_flags)
3160
+static bool csum_offload_supported(struct mlx5e_priv *priv,
3161
+ u32 action,
3162
+ u32 update_flags,
3163
+ struct netlink_ext_ack *extack)
19543164 {
19553165 u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP |
19563166 TCA_CSUM_UPDATE_FLAG_UDP;
19573167
19583168 /* The HW recalcs checksums only if re-writing headers */
19593169 if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) {
3170
+ NL_SET_ERR_MSG_MOD(extack,
3171
+ "TC csum action is only offloaded with pedit");
19603172 netdev_warn(priv->netdev,
19613173 "TC csum action is only offloaded with pedit\n");
19623174 return false;
19633175 }
19643176
19653177 if (update_flags & ~prot_flags) {
3178
+ NL_SET_ERR_MSG_MOD(extack,
3179
+ "can't offload TC csum action for some header/s");
19663180 netdev_warn(priv->netdev,
19673181 "can't offload TC csum action for some header/s - flags %#x\n",
19683182 update_flags);
....@@ -1972,46 +3186,162 @@
19723186 return true;
19733187 }
19743188
1975
-static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
1976
- struct tcf_exts *exts)
3189
+struct ip_ttl_word {
3190
+ __u8 ttl;
3191
+ __u8 protocol;
3192
+ __sum16 check;
3193
+};
3194
+
3195
+struct ipv6_hoplimit_word {
3196
+ __be16 payload_len;
3197
+ __u8 nexthdr;
3198
+ __u8 hop_limit;
3199
+};
3200
+
3201
+static int is_action_keys_supported(const struct flow_action_entry *act,
3202
+ bool ct_flow, bool *modify_ip_header,
3203
+ bool *modify_tuple,
3204
+ struct netlink_ext_ack *extack)
19773205 {
1978
- const struct tc_action *a;
1979
- bool modify_ip_header;
1980
- LIST_HEAD(actions);
1981
- u8 htype, ip_proto;
3206
+ u32 mask, offset;
3207
+ u8 htype;
3208
+
3209
+ htype = act->mangle.htype;
3210
+ offset = act->mangle.offset;
3211
+ mask = ~act->mangle.mask;
3212
+ /* For IPv4 & IPv6 header check 4 byte word,
3213
+ * to determine that modified fields
3214
+ * are NOT ttl & hop_limit only.
3215
+ */
3216
+ if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) {
3217
+ struct ip_ttl_word *ttl_word =
3218
+ (struct ip_ttl_word *)&mask;
3219
+
3220
+ if (offset != offsetof(struct iphdr, ttl) ||
3221
+ ttl_word->protocol ||
3222
+ ttl_word->check) {
3223
+ *modify_ip_header = true;
3224
+ }
3225
+
3226
+ if (offset >= offsetof(struct iphdr, saddr))
3227
+ *modify_tuple = true;
3228
+
3229
+ if (ct_flow && *modify_tuple) {
3230
+ NL_SET_ERR_MSG_MOD(extack,
3231
+ "can't offload re-write of ipv4 address with action ct");
3232
+ return -EOPNOTSUPP;
3233
+ }
3234
+ } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) {
3235
+ struct ipv6_hoplimit_word *hoplimit_word =
3236
+ (struct ipv6_hoplimit_word *)&mask;
3237
+
3238
+ if (offset != offsetof(struct ipv6hdr, payload_len) ||
3239
+ hoplimit_word->payload_len ||
3240
+ hoplimit_word->nexthdr) {
3241
+ *modify_ip_header = true;
3242
+ }
3243
+
3244
+ if (ct_flow && offset >= offsetof(struct ipv6hdr, saddr))
3245
+ *modify_tuple = true;
3246
+
3247
+ if (ct_flow && *modify_tuple) {
3248
+ NL_SET_ERR_MSG_MOD(extack,
3249
+ "can't offload re-write of ipv6 address with action ct");
3250
+ return -EOPNOTSUPP;
3251
+ }
3252
+ } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_TCP ||
3253
+ htype == FLOW_ACT_MANGLE_HDR_TYPE_UDP) {
3254
+ *modify_tuple = true;
3255
+ if (ct_flow) {
3256
+ NL_SET_ERR_MSG_MOD(extack,
3257
+ "can't offload re-write of transport header ports with action ct");
3258
+ return -EOPNOTSUPP;
3259
+ }
3260
+ }
3261
+
3262
+ return 0;
3263
+}
3264
+
3265
+static bool modify_tuple_supported(bool modify_tuple, bool ct_clear,
3266
+ bool ct_flow, struct netlink_ext_ack *extack,
3267
+ struct mlx5e_priv *priv,
3268
+ struct mlx5_flow_spec *spec)
3269
+{
3270
+ if (!modify_tuple || ct_clear)
3271
+ return true;
3272
+
3273
+ if (ct_flow) {
3274
+ NL_SET_ERR_MSG_MOD(extack,
3275
+ "can't offload tuple modification with non-clear ct()");
3276
+ netdev_info(priv->netdev,
3277
+ "can't offload tuple modification with non-clear ct()");
3278
+ return false;
3279
+ }
3280
+
3281
+ /* Add ct_state=-trk match so it will be offloaded for non ct flows
3282
+ * (or after clear action), as otherwise, since the tuple is changed,
3283
+ * we can't restore ct state
3284
+ */
3285
+ if (mlx5_tc_ct_add_no_trk_match(spec)) {
3286
+ NL_SET_ERR_MSG_MOD(extack,
3287
+ "can't offload tuple modification with ct matches and no ct(clear) action");
3288
+ netdev_info(priv->netdev,
3289
+ "can't offload tuple modification with ct matches and no ct(clear) action");
3290
+ return false;
3291
+ }
3292
+
3293
+ return true;
3294
+}
3295
+
3296
+static bool modify_header_match_supported(struct mlx5e_priv *priv,
3297
+ struct mlx5_flow_spec *spec,
3298
+ struct flow_action *flow_action,
3299
+ u32 actions, bool ct_flow,
3300
+ bool ct_clear,
3301
+ struct netlink_ext_ack *extack)
3302
+{
3303
+ const struct flow_action_entry *act;
3304
+ bool modify_ip_header, modify_tuple;
3305
+ void *headers_c;
19823306 void *headers_v;
19833307 u16 ethertype;
1984
- int nkeys, i;
3308
+ u8 ip_proto;
3309
+ int i, err;
19853310
1986
- headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
3311
+ headers_c = get_match_headers_criteria(actions, spec);
3312
+ headers_v = get_match_headers_value(actions, spec);
19873313 ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
19883314
19893315 /* for non-IP we only re-write MACs, so we're okay */
1990
- if (ethertype != ETH_P_IP && ethertype != ETH_P_IPV6)
3316
+ if (MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_version) == 0 &&
3317
+ ethertype != ETH_P_IP && ethertype != ETH_P_IPV6)
19913318 goto out_ok;
19923319
19933320 modify_ip_header = false;
1994
- tcf_exts_for_each_action(i, a, exts) {
1995
- int k;
1996
-
1997
- if (!is_tcf_pedit(a))
3321
+ modify_tuple = false;
3322
+ flow_action_for_each(i, act, flow_action) {
3323
+ if (act->id != FLOW_ACTION_MANGLE &&
3324
+ act->id != FLOW_ACTION_ADD)
19983325 continue;
19993326
2000
- nkeys = tcf_pedit_nkeys(a);
2001
- for (k = 0; k < nkeys; k++) {
2002
- htype = tcf_pedit_htype(a, k);
2003
- if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 ||
2004
- htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP6) {
2005
- modify_ip_header = true;
2006
- break;
2007
- }
2008
- }
3327
+ err = is_action_keys_supported(act, ct_flow,
3328
+ &modify_ip_header,
3329
+ &modify_tuple, extack);
3330
+ if (err)
3331
+ return err;
20093332 }
3333
+
3334
+ if (!modify_tuple_supported(modify_tuple, ct_clear, ct_flow, extack,
3335
+ priv, spec))
3336
+ return false;
20103337
20113338 ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
20123339 if (modify_ip_header && ip_proto != IPPROTO_TCP &&
20133340 ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) {
2014
- pr_info("can't offload re-write of ip proto %d\n", ip_proto);
3341
+ NL_SET_ERR_MSG_MOD(extack,
3342
+ "can't offload re-write of non TCP/UDP");
3343
+ netdev_info(priv->netdev, "can't offload re-write of ip proto %d\n",
3344
+ ip_proto);
20153345 return false;
20163346 }
20173347
....@@ -2020,25 +3350,42 @@
20203350 }
20213351
20223352 static bool actions_match_supported(struct mlx5e_priv *priv,
2023
- struct tcf_exts *exts,
3353
+ struct flow_action *flow_action,
20243354 struct mlx5e_tc_flow_parse_attr *parse_attr,
2025
- struct mlx5e_tc_flow *flow)
3355
+ struct mlx5e_tc_flow *flow,
3356
+ struct netlink_ext_ack *extack)
20263357 {
3358
+ bool ct_flow = false, ct_clear = false;
20273359 u32 actions;
20283360
2029
- if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
2030
- actions = flow->esw_attr->action;
2031
- else
2032
- actions = flow->nic_attr->action;
3361
+ ct_clear = flow->attr->ct_attr.ct_action &
3362
+ TCA_CT_ACT_CLEAR;
3363
+ ct_flow = flow_flag_test(flow, CT) && !ct_clear;
3364
+ actions = flow->attr->action;
20333365
2034
- if (flow->flags & MLX5E_TC_FLOW_EGRESS &&
2035
- !(actions & MLX5_FLOW_CONTEXT_ACTION_DECAP))
2036
- return false;
3366
+ if (mlx5e_is_eswitch_flow(flow)) {
3367
+ if (flow->attr->esw_attr->split_count && ct_flow) {
3368
+ /* All registers used by ct are cleared when using
3369
+ * split rules.
3370
+ */
3371
+ NL_SET_ERR_MSG_MOD(extack,
3372
+ "Can't offload mirroring with action ct");
3373
+ return false;
3374
+ }
3375
+ }
20373376
20383377 if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
2039
- return modify_header_match_supported(&parse_attr->spec, exts);
3378
+ return modify_header_match_supported(priv, &parse_attr->spec,
3379
+ flow_action, actions,
3380
+ ct_flow, ct_clear,
3381
+ extack);
20403382
20413383 return true;
3384
+}
3385
+
3386
+static bool same_port_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
3387
+{
3388
+ return priv->mdev == peer_priv->mdev;
20423389 }
20433390
20443391 static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
....@@ -2049,148 +3396,308 @@
20493396 fmdev = priv->mdev;
20503397 pmdev = peer_priv->mdev;
20513398
2052
- mlx5_query_nic_vport_system_image_guid(fmdev, &fsystem_guid);
2053
- mlx5_query_nic_vport_system_image_guid(pmdev, &psystem_guid);
3399
+ fsystem_guid = mlx5_query_nic_system_image_guid(fmdev);
3400
+ psystem_guid = mlx5_query_nic_system_image_guid(pmdev);
20543401
20553402 return (fsystem_guid == psystem_guid);
20563403 }
20573404
2058
-static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
2059
- struct mlx5e_tc_flow_parse_attr *parse_attr,
2060
- struct mlx5e_tc_flow *flow)
3405
+static int add_vlan_rewrite_action(struct mlx5e_priv *priv, int namespace,
3406
+ const struct flow_action_entry *act,
3407
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
3408
+ struct pedit_headers_action *hdrs,
3409
+ u32 *action, struct netlink_ext_ack *extack)
20613410 {
2062
- struct mlx5_nic_flow_attr *attr = flow->nic_attr;
2063
- const struct tc_action *a;
2064
- LIST_HEAD(actions);
3411
+ u16 mask16 = VLAN_VID_MASK;
3412
+ u16 val16 = act->vlan.vid & VLAN_VID_MASK;
3413
+ const struct flow_action_entry pedit_act = {
3414
+ .id = FLOW_ACTION_MANGLE,
3415
+ .mangle.htype = FLOW_ACT_MANGLE_HDR_TYPE_ETH,
3416
+ .mangle.offset = offsetof(struct vlan_ethhdr, h_vlan_TCI),
3417
+ .mangle.mask = ~(u32)be16_to_cpu(*(__be16 *)&mask16),
3418
+ .mangle.val = (u32)be16_to_cpu(*(__be16 *)&val16),
3419
+ };
3420
+ u8 match_prio_mask, match_prio_val;
3421
+ void *headers_c, *headers_v;
3422
+ int err;
3423
+
3424
+ headers_c = get_match_headers_criteria(*action, &parse_attr->spec);
3425
+ headers_v = get_match_headers_value(*action, &parse_attr->spec);
3426
+
3427
+ if (!(MLX5_GET(fte_match_set_lyr_2_4, headers_c, cvlan_tag) &&
3428
+ MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag))) {
3429
+ NL_SET_ERR_MSG_MOD(extack,
3430
+ "VLAN rewrite action must have VLAN protocol match");
3431
+ return -EOPNOTSUPP;
3432
+ }
3433
+
3434
+ match_prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
3435
+ match_prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
3436
+ if (act->vlan.prio != (match_prio_val & match_prio_mask)) {
3437
+ NL_SET_ERR_MSG_MOD(extack,
3438
+ "Changing VLAN prio is not supported");
3439
+ return -EOPNOTSUPP;
3440
+ }
3441
+
3442
+ err = parse_tc_pedit_action(priv, &pedit_act, namespace, parse_attr, hdrs, NULL, extack);
3443
+ *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3444
+
3445
+ return err;
3446
+}
3447
+
3448
+static int
3449
+add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv,
3450
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
3451
+ struct pedit_headers_action *hdrs,
3452
+ u32 *action, struct netlink_ext_ack *extack)
3453
+{
3454
+ const struct flow_action_entry prio_tag_act = {
3455
+ .vlan.vid = 0,
3456
+ .vlan.prio =
3457
+ MLX5_GET(fte_match_set_lyr_2_4,
3458
+ get_match_headers_value(*action,
3459
+ &parse_attr->spec),
3460
+ first_prio) &
3461
+ MLX5_GET(fte_match_set_lyr_2_4,
3462
+ get_match_headers_criteria(*action,
3463
+ &parse_attr->spec),
3464
+ first_prio),
3465
+ };
3466
+
3467
+ return add_vlan_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB,
3468
+ &prio_tag_act, parse_attr, hdrs, action,
3469
+ extack);
3470
+}
3471
+
3472
+static int validate_goto_chain(struct mlx5e_priv *priv,
3473
+ struct mlx5e_tc_flow *flow,
3474
+ const struct flow_action_entry *act,
3475
+ u32 actions,
3476
+ struct netlink_ext_ack *extack)
3477
+{
3478
+ bool is_esw = mlx5e_is_eswitch_flow(flow);
3479
+ struct mlx5_flow_attr *attr = flow->attr;
3480
+ bool ft_flow = mlx5e_is_ft_flow(flow);
3481
+ u32 dest_chain = act->chain_index;
3482
+ struct mlx5_fs_chains *chains;
3483
+ struct mlx5_eswitch *esw;
3484
+ u32 reformat_and_fwd;
3485
+ u32 max_chain;
3486
+
3487
+ esw = priv->mdev->priv.eswitch;
3488
+ chains = is_esw ? esw_chains(esw) : nic_chains(priv);
3489
+ max_chain = mlx5_chains_get_chain_range(chains);
3490
+ reformat_and_fwd = is_esw ?
3491
+ MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_and_fwd_to_table) :
3492
+ MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, reformat_and_fwd_to_table);
3493
+
3494
+ if (ft_flow) {
3495
+ NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported");
3496
+ return -EOPNOTSUPP;
3497
+ }
3498
+
3499
+ if (!mlx5_chains_backwards_supported(chains) &&
3500
+ dest_chain <= attr->chain) {
3501
+ NL_SET_ERR_MSG_MOD(extack,
3502
+ "Goto lower numbered chain isn't supported");
3503
+ return -EOPNOTSUPP;
3504
+ }
3505
+
3506
+ if (dest_chain > max_chain) {
3507
+ NL_SET_ERR_MSG_MOD(extack,
3508
+ "Requested destination chain is out of supported range");
3509
+ return -EOPNOTSUPP;
3510
+ }
3511
+
3512
+ if (actions & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
3513
+ MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
3514
+ !reformat_and_fwd) {
3515
+ NL_SET_ERR_MSG_MOD(extack,
3516
+ "Goto chain is not allowed if action has reformat or decap");
3517
+ return -EOPNOTSUPP;
3518
+ }
3519
+
3520
+ return 0;
3521
+}
3522
+
3523
+static int parse_tc_nic_actions(struct mlx5e_priv *priv,
3524
+ struct flow_action *flow_action,
3525
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
3526
+ struct mlx5e_tc_flow *flow,
3527
+ struct netlink_ext_ack *extack)
3528
+{
3529
+ struct mlx5_flow_attr *attr = flow->attr;
3530
+ struct pedit_headers_action hdrs[2] = {};
3531
+ const struct flow_action_entry *act;
3532
+ struct mlx5_nic_flow_attr *nic_attr;
20653533 u32 action = 0;
20663534 int err, i;
20673535
2068
- if (!tcf_exts_has_actions(exts))
3536
+ if (!flow_action_has_entries(flow_action))
20693537 return -EINVAL;
20703538
2071
- attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
3539
+ if (!flow_action_hw_stats_check(flow_action, extack,
3540
+ FLOW_ACTION_HW_STATS_DELAYED_BIT))
3541
+ return -EOPNOTSUPP;
20723542
2073
- tcf_exts_for_each_action(i, a, exts) {
2074
- if (is_tcf_gact_shot(a)) {
3543
+ nic_attr = attr->nic_attr;
3544
+
3545
+ nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
3546
+
3547
+ flow_action_for_each(i, act, flow_action) {
3548
+ switch (act->id) {
3549
+ case FLOW_ACTION_ACCEPT:
3550
+ action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
3551
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
3552
+ break;
3553
+ case FLOW_ACTION_DROP:
20753554 action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
20763555 if (MLX5_CAP_FLOWTABLE(priv->mdev,
20773556 flow_table_properties_nic_receive.flow_counter))
20783557 action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
2079
- continue;
2080
- }
2081
-
2082
- if (is_tcf_pedit(a)) {
2083
- err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_KERNEL,
2084
- parse_attr);
3558
+ break;
3559
+ case FLOW_ACTION_MANGLE:
3560
+ case FLOW_ACTION_ADD:
3561
+ err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_KERNEL,
3562
+ parse_attr, hdrs, NULL, extack);
20853563 if (err)
20863564 return err;
20873565
2088
- action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
2089
- MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
2090
- continue;
2091
- }
3566
+ action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3567
+ break;
3568
+ case FLOW_ACTION_VLAN_MANGLE:
3569
+ err = add_vlan_rewrite_action(priv,
3570
+ MLX5_FLOW_NAMESPACE_KERNEL,
3571
+ act, parse_attr, hdrs,
3572
+ &action, extack);
3573
+ if (err)
3574
+ return err;
20923575
2093
- if (is_tcf_csum(a)) {
3576
+ break;
3577
+ case FLOW_ACTION_CSUM:
20943578 if (csum_offload_supported(priv, action,
2095
- tcf_csum_update_flags(a)))
2096
- continue;
3579
+ act->csum_flags,
3580
+ extack))
3581
+ break;
20973582
20983583 return -EOPNOTSUPP;
2099
- }
2100
-
2101
- if (is_tcf_mirred_egress_redirect(a)) {
2102
- struct net_device *peer_dev = tcf_mirred_dev(a);
3584
+ case FLOW_ACTION_REDIRECT: {
3585
+ struct net_device *peer_dev = act->dev;
21033586
21043587 if (priv->netdev->netdev_ops == peer_dev->netdev_ops &&
21053588 same_hw_devs(priv, netdev_priv(peer_dev))) {
2106
- parse_attr->mirred_ifindex = peer_dev->ifindex;
2107
- flow->flags |= MLX5E_TC_FLOW_HAIRPIN;
3589
+ parse_attr->mirred_ifindex[0] = peer_dev->ifindex;
3590
+ flow_flag_set(flow, HAIRPIN);
21083591 action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
21093592 MLX5_FLOW_CONTEXT_ACTION_COUNT;
21103593 } else {
3594
+ NL_SET_ERR_MSG_MOD(extack,
3595
+ "device is not on same HW, can't offload");
21113596 netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n",
21123597 peer_dev->name);
21133598 return -EINVAL;
21143599 }
2115
- continue;
2116
- }
2117
-
2118
- if (is_tcf_skbedit_mark(a)) {
2119
- u32 mark = tcf_skbedit_mark(a);
3600
+ }
3601
+ break;
3602
+ case FLOW_ACTION_MARK: {
3603
+ u32 mark = act->mark;
21203604
21213605 if (mark & ~MLX5E_TC_FLOW_ID_MASK) {
2122
- netdev_warn(priv->netdev, "Bad flow mark - only 16 bit is supported: 0x%x\n",
2123
- mark);
3606
+ NL_SET_ERR_MSG_MOD(extack,
3607
+ "Bad flow mark - only 16 bit is supported");
21243608 return -EINVAL;
21253609 }
21263610
2127
- attr->flow_tag = mark;
3611
+ nic_attr->flow_tag = mark;
21283612 action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
2129
- continue;
2130
- }
3613
+ }
3614
+ break;
3615
+ case FLOW_ACTION_GOTO:
3616
+ err = validate_goto_chain(priv, flow, act, action,
3617
+ extack);
3618
+ if (err)
3619
+ return err;
21313620
2132
- return -EINVAL;
3621
+ action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
3622
+ attr->dest_chain = act->chain_index;
3623
+ break;
3624
+ case FLOW_ACTION_CT:
3625
+ err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, act, extack);
3626
+ if (err)
3627
+ return err;
3628
+
3629
+ flow_flag_set(flow, CT);
3630
+ break;
3631
+ default:
3632
+ NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
3633
+ return -EOPNOTSUPP;
3634
+ }
3635
+ }
3636
+
3637
+ if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits ||
3638
+ hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) {
3639
+ err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_KERNEL,
3640
+ parse_attr, hdrs, &action, extack);
3641
+ if (err)
3642
+ return err;
3643
+ /* in case all pedit actions are skipped, remove the MOD_HDR
3644
+ * flag.
3645
+ */
3646
+ if (parse_attr->mod_hdr_acts.num_actions == 0) {
3647
+ action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3648
+ dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
3649
+ }
21333650 }
21343651
21353652 attr->action = action;
2136
- if (!actions_match_supported(priv, exts, parse_attr, flow))
3653
+
3654
+ if (attr->dest_chain) {
3655
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
3656
+ NL_SET_ERR_MSG(extack, "Mirroring goto chain rules isn't supported");
3657
+ return -EOPNOTSUPP;
3658
+ }
3659
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3660
+ }
3661
+
3662
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
3663
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3664
+
3665
+ if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack))
21373666 return -EOPNOTSUPP;
21383667
21393668 return 0;
21403669 }
21413670
2142
-static inline int cmp_encap_info(struct ip_tunnel_key *a,
2143
- struct ip_tunnel_key *b)
3671
+struct encap_key {
3672
+ const struct ip_tunnel_key *ip_tun_key;
3673
+ struct mlx5e_tc_tunnel *tc_tunnel;
3674
+};
3675
+
3676
+static inline int cmp_encap_info(struct encap_key *a,
3677
+ struct encap_key *b)
21443678 {
2145
- return memcmp(a, b, sizeof(*a));
3679
+ return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) ||
3680
+ a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type;
21463681 }
21473682
2148
-static inline int hash_encap_info(struct ip_tunnel_key *key)
3683
+static inline int cmp_decap_info(struct mlx5e_decap_key *a,
3684
+ struct mlx5e_decap_key *b)
21493685 {
2150
- return jhash(key, sizeof(*key), 0);
3686
+ return memcmp(&a->key, &b->key, sizeof(b->key));
21513687 }
21523688
2153
-static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
2154
- struct net_device *mirred_dev,
2155
- struct net_device **out_dev,
2156
- struct flowi4 *fl4,
2157
- struct neighbour **out_n,
2158
- u8 *out_ttl)
3689
+static inline int hash_encap_info(struct encap_key *key)
21593690 {
2160
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2161
- struct mlx5e_rep_priv *uplink_rpriv;
2162
- struct rtable *rt;
2163
- struct neighbour *n = NULL;
2164
-
2165
-#if IS_ENABLED(CONFIG_INET)
2166
- int ret;
2167
-
2168
- rt = ip_route_output_key(dev_net(mirred_dev), fl4);
2169
- ret = PTR_ERR_OR_ZERO(rt);
2170
- if (ret)
2171
- return ret;
2172
-#else
2173
- return -EOPNOTSUPP;
2174
-#endif
2175
- uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
2176
- /* if the egress device isn't on the same HW e-switch, we use the uplink */
2177
- if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev))
2178
- *out_dev = uplink_rpriv->netdev;
2179
- else
2180
- *out_dev = rt->dst.dev;
2181
-
2182
- if (!(*out_ttl))
2183
- *out_ttl = ip4_dst_hoplimit(&rt->dst);
2184
- n = dst_neigh_lookup(&rt->dst, &fl4->daddr);
2185
- ip_rt_put(rt);
2186
- if (!n)
2187
- return -ENOMEM;
2188
-
2189
- *out_n = n;
2190
- return 0;
3691
+ return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
3692
+ key->tc_tunnel->tunnel_type);
21913693 }
21923694
2193
-static bool is_merged_eswitch_dev(struct mlx5e_priv *priv,
3695
+static inline int hash_decap_info(struct mlx5e_decap_key *key)
3696
+{
3697
+ return jhash(&key->key, sizeof(key->key), 0);
3698
+}
3699
+
3700
+static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv,
21943701 struct net_device *peer_netdev)
21953702 {
21963703 struct mlx5e_priv *peer_priv;
....@@ -2198,423 +3705,284 @@
21983705 peer_priv = netdev_priv(peer_netdev);
21993706
22003707 return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) &&
2201
- (priv->netdev->netdev_ops == peer_netdev->netdev_ops) &&
2202
- same_hw_devs(priv, peer_priv) &&
2203
- MLX5_VPORT_MANAGER(peer_priv->mdev) &&
2204
- (peer_priv->mdev->priv.eswitch->mode == SRIOV_OFFLOADS));
3708
+ mlx5e_eswitch_vf_rep(priv->netdev) &&
3709
+ mlx5e_eswitch_vf_rep(peer_netdev) &&
3710
+ same_hw_devs(priv, peer_priv));
22053711 }
22063712
2207
-static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
2208
- struct net_device *mirred_dev,
2209
- struct net_device **out_dev,
2210
- struct flowi6 *fl6,
2211
- struct neighbour **out_n,
2212
- u8 *out_ttl)
3713
+bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
22133714 {
2214
- struct neighbour *n = NULL;
2215
- struct dst_entry *dst;
2216
-
2217
-#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
2218
- struct mlx5e_rep_priv *uplink_rpriv;
2219
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2220
-
2221
- dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(mirred_dev), NULL, fl6,
2222
- NULL);
2223
- if (IS_ERR(dst))
2224
- return PTR_ERR(dst);
2225
-
2226
- if (!(*out_ttl))
2227
- *out_ttl = ip6_dst_hoplimit(dst);
2228
-
2229
- uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
2230
- /* if the egress device isn't on the same HW e-switch, we use the uplink */
2231
- if (!switchdev_port_same_parent_id(priv->netdev, dst->dev))
2232
- *out_dev = uplink_rpriv->netdev;
2233
- else
2234
- *out_dev = dst->dev;
2235
-#else
2236
- return -EOPNOTSUPP;
2237
-#endif
2238
-
2239
- n = dst_neigh_lookup(dst, &fl6->daddr);
2240
- dst_release(dst);
2241
- if (!n)
2242
- return -ENOMEM;
2243
-
2244
- *out_n = n;
2245
- return 0;
3715
+ return refcount_inc_not_zero(&e->refcnt);
22463716 }
22473717
2248
-static void gen_vxlan_header_ipv4(struct net_device *out_dev,
2249
- char buf[], int encap_size,
2250
- unsigned char h_dest[ETH_ALEN],
2251
- u8 tos, u8 ttl,
2252
- __be32 daddr,
2253
- __be32 saddr,
2254
- __be16 udp_dst_port,
2255
- __be32 vx_vni)
3718
+static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
22563719 {
2257
- struct ethhdr *eth = (struct ethhdr *)buf;
2258
- struct iphdr *ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr));
2259
- struct udphdr *udp = (struct udphdr *)((char *)ip + sizeof(struct iphdr));
2260
- struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
2261
-
2262
- memset(buf, 0, encap_size);
2263
-
2264
- ether_addr_copy(eth->h_dest, h_dest);
2265
- ether_addr_copy(eth->h_source, out_dev->dev_addr);
2266
- eth->h_proto = htons(ETH_P_IP);
2267
-
2268
- ip->daddr = daddr;
2269
- ip->saddr = saddr;
2270
-
2271
- ip->tos = tos;
2272
- ip->ttl = ttl;
2273
- ip->protocol = IPPROTO_UDP;
2274
- ip->version = 0x4;
2275
- ip->ihl = 0x5;
2276
-
2277
- udp->dest = udp_dst_port;
2278
- vxh->vx_flags = VXLAN_HF_VNI;
2279
- vxh->vx_vni = vxlan_vni_field(vx_vni);
3720
+ return refcount_inc_not_zero(&e->refcnt);
22803721 }
22813722
2282
-static void gen_vxlan_header_ipv6(struct net_device *out_dev,
2283
- char buf[], int encap_size,
2284
- unsigned char h_dest[ETH_ALEN],
2285
- u8 tos, u8 ttl,
2286
- struct in6_addr *daddr,
2287
- struct in6_addr *saddr,
2288
- __be16 udp_dst_port,
2289
- __be32 vx_vni)
2290
-{
2291
- struct ethhdr *eth = (struct ethhdr *)buf;
2292
- struct ipv6hdr *ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr));
2293
- struct udphdr *udp = (struct udphdr *)((char *)ip6h + sizeof(struct ipv6hdr));
2294
- struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
2295
-
2296
- memset(buf, 0, encap_size);
2297
-
2298
- ether_addr_copy(eth->h_dest, h_dest);
2299
- ether_addr_copy(eth->h_source, out_dev->dev_addr);
2300
- eth->h_proto = htons(ETH_P_IPV6);
2301
-
2302
- ip6_flow_hdr(ip6h, tos, 0);
2303
- /* the HW fills up ipv6 payload len */
2304
- ip6h->nexthdr = IPPROTO_UDP;
2305
- ip6h->hop_limit = ttl;
2306
- ip6h->daddr = *daddr;
2307
- ip6h->saddr = *saddr;
2308
-
2309
- udp->dest = udp_dst_port;
2310
- vxh->vx_flags = VXLAN_HF_VNI;
2311
- vxh->vx_vni = vxlan_vni_field(vx_vni);
2312
-}
2313
-
2314
-static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
2315
- struct net_device *mirred_dev,
2316
- struct mlx5e_encap_entry *e)
2317
-{
2318
- int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
2319
- int ipv4_encap_size = ETH_HLEN + sizeof(struct iphdr) + VXLAN_HLEN;
2320
- struct ip_tunnel_key *tun_key = &e->tun_info.key;
2321
- struct net_device *out_dev;
2322
- struct neighbour *n = NULL;
2323
- struct flowi4 fl4 = {};
2324
- u8 nud_state, tos, ttl;
2325
- char *encap_header;
2326
- int err;
2327
-
2328
- if (max_encap_size < ipv4_encap_size) {
2329
- mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
2330
- ipv4_encap_size, max_encap_size);
2331
- return -EOPNOTSUPP;
2332
- }
2333
-
2334
- encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL);
2335
- if (!encap_header)
2336
- return -ENOMEM;
2337
-
2338
- switch (e->tunnel_type) {
2339
- case MLX5_HEADER_TYPE_VXLAN:
2340
- fl4.flowi4_proto = IPPROTO_UDP;
2341
- fl4.fl4_dport = tun_key->tp_dst;
2342
- break;
2343
- default:
2344
- err = -EOPNOTSUPP;
2345
- goto free_encap;
2346
- }
2347
-
2348
- tos = tun_key->tos;
2349
- ttl = tun_key->ttl;
2350
-
2351
- fl4.flowi4_tos = tun_key->tos;
2352
- fl4.daddr = tun_key->u.ipv4.dst;
2353
- fl4.saddr = tun_key->u.ipv4.src;
2354
-
2355
- err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev,
2356
- &fl4, &n, &ttl);
2357
- if (err)
2358
- goto free_encap;
2359
-
2360
- /* used by mlx5e_detach_encap to lookup a neigh hash table
2361
- * entry in the neigh hash table when a user deletes a rule
2362
- */
2363
- e->m_neigh.dev = n->dev;
2364
- e->m_neigh.family = n->ops->family;
2365
- memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
2366
- e->out_dev = out_dev;
2367
-
2368
- /* It's importent to add the neigh to the hash table before checking
2369
- * the neigh validity state. So if we'll get a notification, in case the
2370
- * neigh changes it's validity state, we would find the relevant neigh
2371
- * in the hash.
2372
- */
2373
- err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
2374
- if (err)
2375
- goto free_encap;
2376
-
2377
- read_lock_bh(&n->lock);
2378
- nud_state = n->nud_state;
2379
- ether_addr_copy(e->h_dest, n->ha);
2380
- read_unlock_bh(&n->lock);
2381
-
2382
- switch (e->tunnel_type) {
2383
- case MLX5_HEADER_TYPE_VXLAN:
2384
- gen_vxlan_header_ipv4(out_dev, encap_header,
2385
- ipv4_encap_size, e->h_dest, tos, ttl,
2386
- fl4.daddr,
2387
- fl4.saddr, tun_key->tp_dst,
2388
- tunnel_id_to_key32(tun_key->tun_id));
2389
- break;
2390
- default:
2391
- err = -EOPNOTSUPP;
2392
- goto destroy_neigh_entry;
2393
- }
2394
- e->encap_size = ipv4_encap_size;
2395
- e->encap_header = encap_header;
2396
-
2397
- if (!(nud_state & NUD_VALID)) {
2398
- neigh_event_send(n, NULL);
2399
- err = -EAGAIN;
2400
- goto out;
2401
- }
2402
-
2403
- err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
2404
- ipv4_encap_size, encap_header, &e->encap_id);
2405
- if (err)
2406
- goto destroy_neigh_entry;
2407
-
2408
- e->flags |= MLX5_ENCAP_ENTRY_VALID;
2409
- mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
2410
- neigh_release(n);
2411
- return err;
2412
-
2413
-destroy_neigh_entry:
2414
- mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
2415
-free_encap:
2416
- kfree(encap_header);
2417
-out:
2418
- if (n)
2419
- neigh_release(n);
2420
- return err;
2421
-}
2422
-
2423
-static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
2424
- struct net_device *mirred_dev,
2425
- struct mlx5e_encap_entry *e)
2426
-{
2427
- int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
2428
- int ipv6_encap_size = ETH_HLEN + sizeof(struct ipv6hdr) + VXLAN_HLEN;
2429
- struct ip_tunnel_key *tun_key = &e->tun_info.key;
2430
- struct net_device *out_dev = NULL;
2431
- struct neighbour *n = NULL;
2432
- struct flowi6 fl6 = {};
2433
- u8 nud_state, tos, ttl;
2434
- char *encap_header;
2435
- int err;
2436
-
2437
- if (max_encap_size < ipv6_encap_size) {
2438
- mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
2439
- ipv6_encap_size, max_encap_size);
2440
- return -EOPNOTSUPP;
2441
- }
2442
-
2443
- encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL);
2444
- if (!encap_header)
2445
- return -ENOMEM;
2446
-
2447
- switch (e->tunnel_type) {
2448
- case MLX5_HEADER_TYPE_VXLAN:
2449
- fl6.flowi6_proto = IPPROTO_UDP;
2450
- fl6.fl6_dport = tun_key->tp_dst;
2451
- break;
2452
- default:
2453
- err = -EOPNOTSUPP;
2454
- goto free_encap;
2455
- }
2456
-
2457
- tos = tun_key->tos;
2458
- ttl = tun_key->ttl;
2459
-
2460
- fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
2461
- fl6.daddr = tun_key->u.ipv6.dst;
2462
- fl6.saddr = tun_key->u.ipv6.src;
2463
-
2464
- err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev,
2465
- &fl6, &n, &ttl);
2466
- if (err)
2467
- goto free_encap;
2468
-
2469
- /* used by mlx5e_detach_encap to lookup a neigh hash table
2470
- * entry in the neigh hash table when a user deletes a rule
2471
- */
2472
- e->m_neigh.dev = n->dev;
2473
- e->m_neigh.family = n->ops->family;
2474
- memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
2475
- e->out_dev = out_dev;
2476
-
2477
- /* It's importent to add the neigh to the hash table before checking
2478
- * the neigh validity state. So if we'll get a notification, in case the
2479
- * neigh changes it's validity state, we would find the relevant neigh
2480
- * in the hash.
2481
- */
2482
- err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
2483
- if (err)
2484
- goto free_encap;
2485
-
2486
- read_lock_bh(&n->lock);
2487
- nud_state = n->nud_state;
2488
- ether_addr_copy(e->h_dest, n->ha);
2489
- read_unlock_bh(&n->lock);
2490
-
2491
- switch (e->tunnel_type) {
2492
- case MLX5_HEADER_TYPE_VXLAN:
2493
- gen_vxlan_header_ipv6(out_dev, encap_header,
2494
- ipv6_encap_size, e->h_dest, tos, ttl,
2495
- &fl6.daddr,
2496
- &fl6.saddr, tun_key->tp_dst,
2497
- tunnel_id_to_key32(tun_key->tun_id));
2498
- break;
2499
- default:
2500
- err = -EOPNOTSUPP;
2501
- goto destroy_neigh_entry;
2502
- }
2503
-
2504
- e->encap_size = ipv6_encap_size;
2505
- e->encap_header = encap_header;
2506
-
2507
- if (!(nud_state & NUD_VALID)) {
2508
- neigh_event_send(n, NULL);
2509
- err = -EAGAIN;
2510
- goto out;
2511
- }
2512
-
2513
- err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
2514
- ipv6_encap_size, encap_header, &e->encap_id);
2515
- if (err)
2516
- goto destroy_neigh_entry;
2517
-
2518
- e->flags |= MLX5_ENCAP_ENTRY_VALID;
2519
- mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
2520
- neigh_release(n);
2521
- return err;
2522
-
2523
-destroy_neigh_entry:
2524
- mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
2525
-free_encap:
2526
- kfree(encap_header);
2527
-out:
2528
- if (n)
2529
- neigh_release(n);
2530
- return err;
2531
-}
2532
-
2533
-static int mlx5e_attach_encap(struct mlx5e_priv *priv,
2534
- struct ip_tunnel_info *tun_info,
2535
- struct net_device *mirred_dev,
2536
- struct net_device **encap_dev,
2537
- struct mlx5e_tc_flow *flow)
3723
+static struct mlx5e_encap_entry *
3724
+mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key,
3725
+ uintptr_t hash_key)
25383726 {
25393727 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2540
- unsigned short family = ip_tunnel_info_af(tun_info);
2541
- struct mlx5_esw_flow_attr *attr = flow->esw_attr;
2542
- struct ip_tunnel_key *key = &tun_info->key;
25433728 struct mlx5e_encap_entry *e;
2544
- int tunnel_type, err = 0;
2545
- uintptr_t hash_key;
2546
- bool found = false;
2547
-
2548
- /* udp dst port must be set */
2549
- if (!memchr_inv(&key->tp_dst, 0, sizeof(key->tp_dst)))
2550
- goto vxlan_encap_offload_err;
2551
-
2552
- /* setting udp src port isn't supported */
2553
- if (memchr_inv(&key->tp_src, 0, sizeof(key->tp_src))) {
2554
-vxlan_encap_offload_err:
2555
- netdev_warn(priv->netdev,
2556
- "must set udp dst port and not set udp src port\n");
2557
- return -EOPNOTSUPP;
2558
- }
2559
-
2560
- if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->tp_dst)) &&
2561
- MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) {
2562
- tunnel_type = MLX5_HEADER_TYPE_VXLAN;
2563
- } else {
2564
- netdev_warn(priv->netdev,
2565
- "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->tp_dst));
2566
- return -EOPNOTSUPP;
2567
- }
2568
-
2569
- hash_key = hash_encap_info(key);
3729
+ struct encap_key e_key;
25703730
25713731 hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
25723732 encap_hlist, hash_key) {
2573
- if (!cmp_encap_info(&e->tun_info.key, key)) {
2574
- found = true;
2575
- break;
2576
- }
3733
+ e_key.ip_tun_key = &e->tun_info->key;
3734
+ e_key.tc_tunnel = e->tunnel;
3735
+ if (!cmp_encap_info(&e_key, key) &&
3736
+ mlx5e_encap_take(e))
3737
+ return e;
25773738 }
25783739
3740
+ return NULL;
3741
+}
3742
+
3743
+static struct mlx5e_decap_entry *
3744
+mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
3745
+ uintptr_t hash_key)
3746
+{
3747
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3748
+ struct mlx5e_decap_key r_key;
3749
+ struct mlx5e_decap_entry *e;
3750
+
3751
+ hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
3752
+ hlist, hash_key) {
3753
+ r_key = e->key;
3754
+ if (!cmp_decap_info(&r_key, key) &&
3755
+ mlx5e_decap_take(e))
3756
+ return e;
3757
+ }
3758
+ return NULL;
3759
+}
3760
+
3761
+static struct ip_tunnel_info *dup_tun_info(const struct ip_tunnel_info *tun_info)
3762
+{
3763
+ size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
3764
+
3765
+ return kmemdup(tun_info, tun_size, GFP_KERNEL);
3766
+}
3767
+
3768
+static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
3769
+ struct mlx5e_tc_flow *flow,
3770
+ int out_index,
3771
+ struct mlx5e_encap_entry *e,
3772
+ struct netlink_ext_ack *extack)
3773
+{
3774
+ int i;
3775
+
3776
+ for (i = 0; i < out_index; i++) {
3777
+ if (flow->encaps[i].e != e)
3778
+ continue;
3779
+ NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
3780
+ netdev_err(priv->netdev, "can't duplicate encap action\n");
3781
+ return true;
3782
+ }
3783
+
3784
+ return false;
3785
+}
3786
+
3787
+static int mlx5e_attach_encap(struct mlx5e_priv *priv,
3788
+ struct mlx5e_tc_flow *flow,
3789
+ struct net_device *mirred_dev,
3790
+ int out_index,
3791
+ struct netlink_ext_ack *extack,
3792
+ struct net_device **encap_dev,
3793
+ bool *encap_valid)
3794
+{
3795
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3796
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
3797
+ struct mlx5_flow_attr *attr = flow->attr;
3798
+ const struct ip_tunnel_info *tun_info;
3799
+ struct encap_key key;
3800
+ struct mlx5e_encap_entry *e;
3801
+ unsigned short family;
3802
+ uintptr_t hash_key;
3803
+ int err = 0;
3804
+
3805
+ parse_attr = attr->parse_attr;
3806
+ tun_info = parse_attr->tun_info[out_index];
3807
+ family = ip_tunnel_info_af(tun_info);
3808
+ key.ip_tun_key = &tun_info->key;
3809
+ key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
3810
+ if (!key.tc_tunnel) {
3811
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
3812
+ return -EOPNOTSUPP;
3813
+ }
3814
+
3815
+ hash_key = hash_encap_info(&key);
3816
+
3817
+ mutex_lock(&esw->offloads.encap_tbl_lock);
3818
+ e = mlx5e_encap_get(priv, &key, hash_key);
3819
+
25793820 /* must verify if encap is valid or not */
2580
- if (found)
3821
+ if (e) {
3822
+ /* Check that entry was not already attached to this flow */
3823
+ if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
3824
+ err = -EOPNOTSUPP;
3825
+ goto out_err;
3826
+ }
3827
+
3828
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
3829
+ wait_for_completion(&e->res_ready);
3830
+
3831
+ /* Protect against concurrent neigh update. */
3832
+ mutex_lock(&esw->offloads.encap_tbl_lock);
3833
+ if (e->compl_result < 0) {
3834
+ err = -EREMOTEIO;
3835
+ goto out_err;
3836
+ }
25813837 goto attach_flow;
3838
+ }
25823839
25833840 e = kzalloc(sizeof(*e), GFP_KERNEL);
2584
- if (!e)
2585
- return -ENOMEM;
3841
+ if (!e) {
3842
+ err = -ENOMEM;
3843
+ goto out_err;
3844
+ }
25863845
2587
- e->tun_info = *tun_info;
2588
- e->tunnel_type = tunnel_type;
3846
+ refcount_set(&e->refcnt, 1);
3847
+ init_completion(&e->res_ready);
3848
+
3849
+ tun_info = dup_tun_info(tun_info);
3850
+ if (!tun_info) {
3851
+ err = -ENOMEM;
3852
+ goto out_err_init;
3853
+ }
3854
+ e->tun_info = tun_info;
3855
+ err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
3856
+ if (err)
3857
+ goto out_err_init;
3858
+
25893859 INIT_LIST_HEAD(&e->flows);
3860
+ hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
3861
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
25903862
25913863 if (family == AF_INET)
2592
- err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e);
3864
+ err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
25933865 else if (family == AF_INET6)
2594
- err = mlx5e_create_encap_header_ipv6(priv, mirred_dev, e);
3866
+ err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
25953867
2596
- if (err && err != -EAGAIN)
3868
+ /* Protect against concurrent neigh update. */
3869
+ mutex_lock(&esw->offloads.encap_tbl_lock);
3870
+ complete_all(&e->res_ready);
3871
+ if (err) {
3872
+ e->compl_result = err;
25973873 goto out_err;
2598
-
2599
- hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
3874
+ }
3875
+ e->compl_result = 1;
26003876
26013877 attach_flow:
2602
- list_add(&flow->encap, &e->flows);
3878
+ flow->encaps[out_index].e = e;
3879
+ list_add(&flow->encaps[out_index].list, &e->flows);
3880
+ flow->encaps[out_index].index = out_index;
26033881 *encap_dev = e->out_dev;
2604
- if (e->flags & MLX5_ENCAP_ENTRY_VALID)
2605
- attr->encap_id = e->encap_id;
2606
- else
2607
- err = -EAGAIN;
3882
+ if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
3883
+ attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
3884
+ attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
3885
+ *encap_valid = true;
3886
+ } else {
3887
+ *encap_valid = false;
3888
+ }
3889
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
26083890
26093891 return err;
26103892
26113893 out_err:
3894
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
3895
+ if (e)
3896
+ mlx5e_encap_put(priv, e);
3897
+ return err;
3898
+
3899
+out_err_init:
3900
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
3901
+ kfree(tun_info);
26123902 kfree(e);
26133903 return err;
26143904 }
26153905
3906
+static int mlx5e_attach_decap(struct mlx5e_priv *priv,
3907
+ struct mlx5e_tc_flow *flow,
3908
+ struct netlink_ext_ack *extack)
3909
+{
3910
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3911
+ struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
3912
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
3913
+ struct mlx5e_decap_entry *d;
3914
+ struct mlx5e_decap_key key;
3915
+ uintptr_t hash_key;
3916
+ int err = 0;
3917
+
3918
+ parse_attr = flow->attr->parse_attr;
3919
+ if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
3920
+ NL_SET_ERR_MSG_MOD(extack,
3921
+ "encap header larger than max supported");
3922
+ return -EOPNOTSUPP;
3923
+ }
3924
+
3925
+ key.key = parse_attr->eth;
3926
+ hash_key = hash_decap_info(&key);
3927
+ mutex_lock(&esw->offloads.decap_tbl_lock);
3928
+ d = mlx5e_decap_get(priv, &key, hash_key);
3929
+ if (d) {
3930
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
3931
+ wait_for_completion(&d->res_ready);
3932
+ mutex_lock(&esw->offloads.decap_tbl_lock);
3933
+ if (d->compl_result) {
3934
+ err = -EREMOTEIO;
3935
+ goto out_free;
3936
+ }
3937
+ goto found;
3938
+ }
3939
+
3940
+ d = kzalloc(sizeof(*d), GFP_KERNEL);
3941
+ if (!d) {
3942
+ err = -ENOMEM;
3943
+ goto out_err;
3944
+ }
3945
+
3946
+ d->key = key;
3947
+ refcount_set(&d->refcnt, 1);
3948
+ init_completion(&d->res_ready);
3949
+ INIT_LIST_HEAD(&d->flows);
3950
+ hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
3951
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
3952
+
3953
+ d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
3954
+ MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2,
3955
+ sizeof(parse_attr->eth),
3956
+ &parse_attr->eth,
3957
+ MLX5_FLOW_NAMESPACE_FDB);
3958
+ if (IS_ERR(d->pkt_reformat)) {
3959
+ err = PTR_ERR(d->pkt_reformat);
3960
+ d->compl_result = err;
3961
+ }
3962
+ mutex_lock(&esw->offloads.decap_tbl_lock);
3963
+ complete_all(&d->res_ready);
3964
+ if (err)
3965
+ goto out_free;
3966
+
3967
+found:
3968
+ flow->decap_reformat = d;
3969
+ attr->decap_pkt_reformat = d->pkt_reformat;
3970
+ list_add(&flow->l3_to_l2_reformat, &d->flows);
3971
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
3972
+ return 0;
3973
+
3974
+out_free:
3975
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
3976
+ mlx5e_decap_put(priv, d);
3977
+ return err;
3978
+
3979
+out_err:
3980
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
3981
+ return err;
3982
+}
3983
+
26163984 static int parse_tc_vlan_action(struct mlx5e_priv *priv,
2617
- const struct tc_action *a,
3985
+ const struct flow_action_entry *act,
26183986 struct mlx5_esw_flow_attr *attr,
26193987 u32 *action)
26203988 {
....@@ -2623,7 +3991,8 @@
26233991 if (vlan_idx >= MLX5_FS_VLAN_DEPTH)
26243992 return -EOPNOTSUPP;
26253993
2626
- if (tcf_vlan_action(a) == TCA_VLAN_ACT_POP) {
3994
+ switch (act->id) {
3995
+ case FLOW_ACTION_VLAN_POP:
26273996 if (vlan_idx) {
26283997 if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
26293998 MLX5_FS_VLAN_DEPTH))
....@@ -2633,10 +4002,11 @@
26334002 } else {
26344003 *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
26354004 }
2636
- } else if (tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) {
2637
- attr->vlan_vid[vlan_idx] = tcf_vlan_push_vid(a);
2638
- attr->vlan_prio[vlan_idx] = tcf_vlan_push_prio(a);
2639
- attr->vlan_proto[vlan_idx] = tcf_vlan_push_proto(a);
4005
+ break;
4006
+ case FLOW_ACTION_VLAN_PUSH:
4007
+ attr->vlan_vid[vlan_idx] = act->vlan.vid;
4008
+ attr->vlan_prio[vlan_idx] = act->vlan.prio;
4009
+ attr->vlan_proto[vlan_idx] = act->vlan.proto;
26404010 if (!attr->vlan_proto[vlan_idx])
26414011 attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q);
26424012
....@@ -2648,14 +4018,15 @@
26484018 *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
26494019 } else {
26504020 if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) &&
2651
- (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q) ||
2652
- tcf_vlan_push_prio(a)))
4021
+ (act->vlan.proto != htons(ETH_P_8021Q) ||
4022
+ act->vlan.prio))
26534023 return -EOPNOTSUPP;
26544024
26554025 *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
26564026 }
2657
- } else { /* action is TCA_VLAN_ACT_MODIFY */
2658
- return -EOPNOTSUPP;
4027
+ break;
4028
+ default:
4029
+ return -EINVAL;
26594030 }
26604031
26614032 attr->total_vlan = vlan_idx + 1;
....@@ -2663,121 +4034,516 @@
26634034 return 0;
26644035 }
26654036
2666
-static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
2667
- struct mlx5e_tc_flow_parse_attr *parse_attr,
2668
- struct mlx5e_tc_flow *flow)
4037
+static struct net_device *get_fdb_out_dev(struct net_device *uplink_dev,
4038
+ struct net_device *out_dev)
26694039 {
2670
- struct mlx5_esw_flow_attr *attr = flow->esw_attr;
2671
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
2672
- struct ip_tunnel_info *info = NULL;
2673
- const struct tc_action *a;
2674
- LIST_HEAD(actions);
2675
- bool encap = false;
2676
- u32 action = 0;
2677
- int err, i;
4040
+ struct net_device *fdb_out_dev = out_dev;
4041
+ struct net_device *uplink_upper;
26784042
2679
- if (!tcf_exts_has_actions(exts))
4043
+ rcu_read_lock();
4044
+ uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev);
4045
+ if (uplink_upper && netif_is_lag_master(uplink_upper) &&
4046
+ uplink_upper == out_dev) {
4047
+ fdb_out_dev = uplink_dev;
4048
+ } else if (netif_is_lag_master(out_dev)) {
4049
+ fdb_out_dev = bond_option_active_slave_get_rcu(netdev_priv(out_dev));
4050
+ if (fdb_out_dev &&
4051
+ (!mlx5e_eswitch_rep(fdb_out_dev) ||
4052
+ !netdev_port_same_parent_id(fdb_out_dev, uplink_dev)))
4053
+ fdb_out_dev = NULL;
4054
+ }
4055
+ rcu_read_unlock();
4056
+ return fdb_out_dev;
4057
+}
4058
+
4059
+static int add_vlan_push_action(struct mlx5e_priv *priv,
4060
+ struct mlx5_flow_attr *attr,
4061
+ struct net_device **out_dev,
4062
+ u32 *action)
4063
+{
4064
+ struct net_device *vlan_dev = *out_dev;
4065
+ struct flow_action_entry vlan_act = {
4066
+ .id = FLOW_ACTION_VLAN_PUSH,
4067
+ .vlan.vid = vlan_dev_vlan_id(vlan_dev),
4068
+ .vlan.proto = vlan_dev_vlan_proto(vlan_dev),
4069
+ .vlan.prio = 0,
4070
+ };
4071
+ int err;
4072
+
4073
+ err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action);
4074
+ if (err)
4075
+ return err;
4076
+
4077
+ rcu_read_lock();
4078
+ *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev), dev_get_iflink(vlan_dev));
4079
+ rcu_read_unlock();
4080
+ if (!*out_dev)
4081
+ return -ENODEV;
4082
+
4083
+ if (is_vlan_dev(*out_dev))
4084
+ err = add_vlan_push_action(priv, attr, out_dev, action);
4085
+
4086
+ return err;
4087
+}
4088
+
4089
+static int add_vlan_pop_action(struct mlx5e_priv *priv,
4090
+ struct mlx5_flow_attr *attr,
4091
+ u32 *action)
4092
+{
4093
+ struct flow_action_entry vlan_act = {
4094
+ .id = FLOW_ACTION_VLAN_POP,
4095
+ };
4096
+ int nest_level, err = 0;
4097
+
4098
+ nest_level = attr->parse_attr->filter_dev->lower_level -
4099
+ priv->netdev->lower_level;
4100
+ while (nest_level--) {
4101
+ err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action);
4102
+ if (err)
4103
+ return err;
4104
+ }
4105
+
4106
+ return err;
4107
+}
4108
+
4109
+static bool same_hw_reps(struct mlx5e_priv *priv,
4110
+ struct net_device *peer_netdev)
4111
+{
4112
+ struct mlx5e_priv *peer_priv;
4113
+
4114
+ peer_priv = netdev_priv(peer_netdev);
4115
+
4116
+ return mlx5e_eswitch_rep(priv->netdev) &&
4117
+ mlx5e_eswitch_rep(peer_netdev) &&
4118
+ same_hw_devs(priv, peer_priv);
4119
+}
4120
+
4121
+static bool is_lag_dev(struct mlx5e_priv *priv,
4122
+ struct net_device *peer_netdev)
4123
+{
4124
+ return ((mlx5_lag_is_sriov(priv->mdev) ||
4125
+ mlx5_lag_is_multipath(priv->mdev)) &&
4126
+ same_hw_reps(priv, peer_netdev));
4127
+}
4128
+
4129
+bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
4130
+ struct net_device *out_dev)
4131
+{
4132
+ if (is_merged_eswitch_vfs(priv, out_dev))
4133
+ return true;
4134
+
4135
+ if (is_lag_dev(priv, out_dev))
4136
+ return true;
4137
+
4138
+ return mlx5e_eswitch_rep(out_dev) &&
4139
+ same_port_devs(priv, netdev_priv(out_dev));
4140
+}
4141
+
4142
+static bool is_duplicated_output_device(struct net_device *dev,
4143
+ struct net_device *out_dev,
4144
+ int *ifindexes, int if_count,
4145
+ struct netlink_ext_ack *extack)
4146
+{
4147
+ int i;
4148
+
4149
+ for (i = 0; i < if_count; i++) {
4150
+ if (ifindexes[i] == out_dev->ifindex) {
4151
+ NL_SET_ERR_MSG_MOD(extack,
4152
+ "can't duplicate output to same device");
4153
+ netdev_err(dev, "can't duplicate output to same device: %s\n",
4154
+ out_dev->name);
4155
+ return true;
4156
+ }
4157
+ }
4158
+
4159
+ return false;
4160
+}
4161
+
4162
+static int verify_uplink_forwarding(struct mlx5e_priv *priv,
4163
+ struct mlx5e_tc_flow *flow,
4164
+ struct net_device *out_dev,
4165
+ struct netlink_ext_ack *extack)
4166
+{
4167
+ struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
4168
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4169
+ struct mlx5e_rep_priv *rep_priv;
4170
+
4171
+ /* Forwarding non encapsulated traffic between
4172
+ * uplink ports is allowed only if
4173
+ * termination_table_raw_traffic cap is set.
4174
+ *
4175
+ * Input vport was stored attr->in_rep.
4176
+ * In LAG case, *priv* is the private data of
4177
+ * uplink which may be not the input vport.
4178
+ */
4179
+ rep_priv = mlx5e_rep_to_rep_priv(attr->in_rep);
4180
+
4181
+ if (!(mlx5e_eswitch_uplink_rep(rep_priv->netdev) &&
4182
+ mlx5e_eswitch_uplink_rep(out_dev)))
4183
+ return 0;
4184
+
4185
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev,
4186
+ termination_table_raw_traffic)) {
4187
+ NL_SET_ERR_MSG_MOD(extack,
4188
+ "devices are both uplink, can't offload forwarding");
4189
+ pr_err("devices %s %s are both uplink, can't offload forwarding\n",
4190
+ priv->netdev->name, out_dev->name);
4191
+ return -EOPNOTSUPP;
4192
+ } else if (out_dev != rep_priv->netdev) {
4193
+ NL_SET_ERR_MSG_MOD(extack,
4194
+ "devices are not the same uplink, can't offload forwarding");
4195
+ pr_err("devices %s %s are both uplink but not the same, can't offload forwarding\n",
4196
+ priv->netdev->name, out_dev->name);
4197
+ return -EOPNOTSUPP;
4198
+ }
4199
+ return 0;
4200
+}
4201
+
4202
+static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
4203
+ struct flow_action *flow_action,
4204
+ struct mlx5e_tc_flow *flow,
4205
+ struct netlink_ext_ack *extack,
4206
+ struct net_device *filter_dev)
4207
+{
4208
+ struct pedit_headers_action hdrs[2] = {};
4209
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4210
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
4211
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
4212
+ const struct ip_tunnel_info *info = NULL;
4213
+ struct mlx5_flow_attr *attr = flow->attr;
4214
+ int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS];
4215
+ bool ft_flow = mlx5e_is_ft_flow(flow);
4216
+ const struct flow_action_entry *act;
4217
+ struct mlx5_esw_flow_attr *esw_attr;
4218
+ bool encap = false, decap = false;
4219
+ u32 action = attr->action;
4220
+ int err, i, if_count = 0;
4221
+ bool mpls_push = false;
4222
+
4223
+ if (!flow_action_has_entries(flow_action))
26804224 return -EINVAL;
26814225
2682
- attr->in_rep = rpriv->rep;
2683
- attr->in_mdev = priv->mdev;
4226
+ if (!flow_action_hw_stats_check(flow_action, extack,
4227
+ FLOW_ACTION_HW_STATS_DELAYED_BIT))
4228
+ return -EOPNOTSUPP;
26844229
2685
- tcf_exts_for_each_action(i, a, exts) {
2686
- if (is_tcf_gact_shot(a)) {
4230
+ esw_attr = attr->esw_attr;
4231
+ parse_attr = attr->parse_attr;
4232
+
4233
+ flow_action_for_each(i, act, flow_action) {
4234
+ switch (act->id) {
4235
+ case FLOW_ACTION_DROP:
26874236 action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
26884237 MLX5_FLOW_CONTEXT_ACTION_COUNT;
2689
- continue;
2690
- }
2691
-
2692
- if (is_tcf_pedit(a)) {
2693
- err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_FDB,
2694
- parse_attr);
2695
- if (err)
2696
- return err;
2697
-
2698
- action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
2699
- attr->mirror_count = attr->out_count;
2700
- continue;
2701
- }
2702
-
2703
- if (is_tcf_csum(a)) {
2704
- if (csum_offload_supported(priv, action,
2705
- tcf_csum_update_flags(a)))
2706
- continue;
2707
-
2708
- return -EOPNOTSUPP;
2709
- }
2710
-
2711
- if (is_tcf_mirred_egress_redirect(a) || is_tcf_mirred_egress_mirror(a)) {
2712
- struct mlx5e_priv *out_priv;
2713
- struct net_device *out_dev;
2714
-
2715
- out_dev = tcf_mirred_dev(a);
2716
-
2717
- if (attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) {
2718
- pr_err("can't support more than %d output ports, can't offload forwarding\n",
2719
- attr->out_count);
4238
+ break;
4239
+ case FLOW_ACTION_TRAP:
4240
+ if (!flow_offload_has_one_action(flow_action)) {
4241
+ NL_SET_ERR_MSG_MOD(extack,
4242
+ "action trap is supported as a sole action only");
4243
+ return -EOPNOTSUPP;
4244
+ }
4245
+ action |= (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
4246
+ MLX5_FLOW_CONTEXT_ACTION_COUNT);
4247
+ attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
4248
+ break;
4249
+ case FLOW_ACTION_MPLS_PUSH:
4250
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
4251
+ reformat_l2_to_l3_tunnel) ||
4252
+ act->mpls_push.proto != htons(ETH_P_MPLS_UC)) {
4253
+ NL_SET_ERR_MSG_MOD(extack,
4254
+ "mpls push is supported only for mpls_uc protocol");
4255
+ return -EOPNOTSUPP;
4256
+ }
4257
+ mpls_push = true;
4258
+ break;
4259
+ case FLOW_ACTION_MPLS_POP:
4260
+ /* we only support mpls pop if it is the first action
4261
+ * and the filter net device is bareudp. Subsequent
4262
+ * actions can be pedit and the last can be mirred
4263
+ * egress redirect.
4264
+ */
4265
+ if (i) {
4266
+ NL_SET_ERR_MSG_MOD(extack,
4267
+ "mpls pop supported only as first action");
4268
+ return -EOPNOTSUPP;
4269
+ }
4270
+ if (!netif_is_bareudp(filter_dev)) {
4271
+ NL_SET_ERR_MSG_MOD(extack,
4272
+ "mpls pop supported only on bareudp devices");
27204273 return -EOPNOTSUPP;
27214274 }
27224275
2723
- if (switchdev_port_same_parent_id(priv->netdev,
2724
- out_dev) ||
2725
- is_merged_eswitch_dev(priv, out_dev)) {
2726
- action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
2727
- MLX5_FLOW_CONTEXT_ACTION_COUNT;
2728
- out_priv = netdev_priv(out_dev);
2729
- rpriv = out_priv->ppriv;
2730
- attr->out_rep[attr->out_count] = rpriv->rep;
2731
- attr->out_mdev[attr->out_count++] = out_priv->mdev;
2732
- } else if (encap) {
2733
- parse_attr->mirred_ifindex = out_dev->ifindex;
2734
- parse_attr->tun_info = *info;
2735
- attr->parse_attr = parse_attr;
2736
- action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP |
2737
- MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
2738
- MLX5_FLOW_CONTEXT_ACTION_COUNT;
2739
- /* attr->out_rep is resolved when we handle encap */
2740
- } else {
2741
- pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
2742
- priv->netdev->name, out_dev->name);
4276
+ parse_attr->eth.h_proto = act->mpls_pop.proto;
4277
+ action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
4278
+ flow_flag_set(flow, L3_TO_L2_DECAP);
4279
+ break;
4280
+ case FLOW_ACTION_MANGLE:
4281
+ case FLOW_ACTION_ADD:
4282
+ err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB,
4283
+ parse_attr, hdrs, flow, extack);
4284
+ if (err)
4285
+ return err;
4286
+
4287
+ if (!flow_flag_test(flow, L3_TO_L2_DECAP)) {
4288
+ action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
4289
+ esw_attr->split_count = esw_attr->out_count;
4290
+ }
4291
+ break;
4292
+ case FLOW_ACTION_CSUM:
4293
+ if (csum_offload_supported(priv, action,
4294
+ act->csum_flags, extack))
4295
+ break;
4296
+
4297
+ return -EOPNOTSUPP;
4298
+ case FLOW_ACTION_REDIRECT:
4299
+ case FLOW_ACTION_MIRRED: {
4300
+ struct mlx5e_priv *out_priv;
4301
+ struct net_device *out_dev;
4302
+
4303
+ out_dev = act->dev;
4304
+ if (!out_dev) {
4305
+ /* out_dev is NULL when filters with
4306
+ * non-existing mirred device are replayed to
4307
+ * the driver.
4308
+ */
27434309 return -EINVAL;
27444310 }
2745
- continue;
2746
- }
27474311
2748
- if (is_tcf_tunnel_set(a)) {
2749
- info = tcf_tunnel_info(a);
4312
+ if (mpls_push && !netif_is_bareudp(out_dev)) {
4313
+ NL_SET_ERR_MSG_MOD(extack,
4314
+ "mpls is supported only through a bareudp device");
4315
+ return -EOPNOTSUPP;
4316
+ }
4317
+
4318
+ if (ft_flow && out_dev == priv->netdev) {
4319
+ /* Ignore forward to self rules generated
4320
+ * by adding both mlx5 devs to the flow table
4321
+ * block on a normal nft offload setup.
4322
+ */
4323
+ return -EOPNOTSUPP;
4324
+ }
4325
+
4326
+ if (esw_attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) {
4327
+ NL_SET_ERR_MSG_MOD(extack,
4328
+ "can't support more output ports, can't offload forwarding");
4329
+ netdev_warn(priv->netdev,
4330
+ "can't support more than %d output ports, can't offload forwarding\n",
4331
+ esw_attr->out_count);
4332
+ return -EOPNOTSUPP;
4333
+ }
4334
+
4335
+ action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
4336
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
4337
+ if (encap) {
4338
+ parse_attr->mirred_ifindex[esw_attr->out_count] =
4339
+ out_dev->ifindex;
4340
+ parse_attr->tun_info[esw_attr->out_count] = dup_tun_info(info);
4341
+ if (!parse_attr->tun_info[esw_attr->out_count])
4342
+ return -ENOMEM;
4343
+ encap = false;
4344
+ esw_attr->dests[esw_attr->out_count].flags |=
4345
+ MLX5_ESW_DEST_ENCAP;
4346
+ esw_attr->out_count++;
4347
+ /* attr->dests[].rep is resolved when we
4348
+ * handle encap
4349
+ */
4350
+ } else if (netdev_port_same_parent_id(priv->netdev, out_dev)) {
4351
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4352
+ struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
4353
+
4354
+ if (is_duplicated_output_device(priv->netdev,
4355
+ out_dev,
4356
+ ifindexes,
4357
+ if_count,
4358
+ extack))
4359
+ return -EOPNOTSUPP;
4360
+
4361
+ ifindexes[if_count] = out_dev->ifindex;
4362
+ if_count++;
4363
+
4364
+ out_dev = get_fdb_out_dev(uplink_dev, out_dev);
4365
+ if (!out_dev)
4366
+ return -ENODEV;
4367
+
4368
+ if (is_vlan_dev(out_dev)) {
4369
+ err = add_vlan_push_action(priv, attr,
4370
+ &out_dev,
4371
+ &action);
4372
+ if (err)
4373
+ return err;
4374
+ }
4375
+
4376
+ if (is_vlan_dev(parse_attr->filter_dev)) {
4377
+ err = add_vlan_pop_action(priv, attr,
4378
+ &action);
4379
+ if (err)
4380
+ return err;
4381
+ }
4382
+
4383
+ err = verify_uplink_forwarding(priv, flow, out_dev, extack);
4384
+ if (err)
4385
+ return err;
4386
+
4387
+ if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) {
4388
+ NL_SET_ERR_MSG_MOD(extack,
4389
+ "devices are not on same switch HW, can't offload forwarding");
4390
+ return -EOPNOTSUPP;
4391
+ }
4392
+
4393
+ out_priv = netdev_priv(out_dev);
4394
+ rpriv = out_priv->ppriv;
4395
+ esw_attr->dests[esw_attr->out_count].rep = rpriv->rep;
4396
+ esw_attr->dests[esw_attr->out_count].mdev = out_priv->mdev;
4397
+ esw_attr->out_count++;
4398
+ } else if (parse_attr->filter_dev != priv->netdev) {
4399
+ /* All mlx5 devices are called to configure
4400
+ * high level device filters. Therefore, the
4401
+ * *attempt* to install a filter on invalid
4402
+ * eswitch should not trigger an explicit error
4403
+ */
4404
+ return -EINVAL;
4405
+ } else {
4406
+ NL_SET_ERR_MSG_MOD(extack,
4407
+ "devices are not on same switch HW, can't offload forwarding");
4408
+ netdev_warn(priv->netdev,
4409
+ "devices %s %s not on same switch HW, can't offload forwarding\n",
4410
+ priv->netdev->name,
4411
+ out_dev->name);
4412
+ return -EINVAL;
4413
+ }
4414
+ }
4415
+ break;
4416
+ case FLOW_ACTION_TUNNEL_ENCAP:
4417
+ info = act->tunnel;
27504418 if (info)
27514419 encap = true;
27524420 else
27534421 return -EOPNOTSUPP;
2754
- attr->mirror_count = attr->out_count;
2755
- continue;
2756
- }
27574422
2758
- if (is_tcf_vlan(a)) {
2759
- err = parse_tc_vlan_action(priv, a, attr, &action);
2760
-
4423
+ break;
4424
+ case FLOW_ACTION_VLAN_PUSH:
4425
+ case FLOW_ACTION_VLAN_POP:
4426
+ if (act->id == FLOW_ACTION_VLAN_PUSH &&
4427
+ (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)) {
4428
+ /* Replace vlan pop+push with vlan modify */
4429
+ action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
4430
+ err = add_vlan_rewrite_action(priv,
4431
+ MLX5_FLOW_NAMESPACE_FDB,
4432
+ act, parse_attr, hdrs,
4433
+ &action, extack);
4434
+ } else {
4435
+ err = parse_tc_vlan_action(priv, act, esw_attr, &action);
4436
+ }
27614437 if (err)
27624438 return err;
27634439
2764
- attr->mirror_count = attr->out_count;
2765
- continue;
2766
- }
4440
+ esw_attr->split_count = esw_attr->out_count;
4441
+ break;
4442
+ case FLOW_ACTION_VLAN_MANGLE:
4443
+ err = add_vlan_rewrite_action(priv,
4444
+ MLX5_FLOW_NAMESPACE_FDB,
4445
+ act, parse_attr, hdrs,
4446
+ &action, extack);
4447
+ if (err)
4448
+ return err;
27674449
2768
- if (is_tcf_tunnel_release(a)) {
2769
- action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
2770
- continue;
2771
- }
4450
+ esw_attr->split_count = esw_attr->out_count;
4451
+ break;
4452
+ case FLOW_ACTION_TUNNEL_DECAP:
4453
+ decap = true;
4454
+ break;
4455
+ case FLOW_ACTION_GOTO:
4456
+ err = validate_goto_chain(priv, flow, act, action,
4457
+ extack);
4458
+ if (err)
4459
+ return err;
27724460
2773
- return -EINVAL;
4461
+ action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
4462
+ attr->dest_chain = act->chain_index;
4463
+ break;
4464
+ case FLOW_ACTION_CT:
4465
+ err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, act, extack);
4466
+ if (err)
4467
+ return err;
4468
+
4469
+ flow_flag_set(flow, CT);
4470
+ break;
4471
+ default:
4472
+ NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
4473
+ return -EOPNOTSUPP;
4474
+ }
4475
+ }
4476
+
4477
+ if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
4478
+ action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) {
4479
+ /* For prio tag mode, replace vlan pop with rewrite vlan prio
4480
+ * tag rewrite.
4481
+ */
4482
+ action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
4483
+ err = add_vlan_prio_tag_rewrite_action(priv, parse_attr, hdrs,
4484
+ &action, extack);
4485
+ if (err)
4486
+ return err;
4487
+ }
4488
+
4489
+ if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits ||
4490
+ hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) {
4491
+ err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_FDB,
4492
+ parse_attr, hdrs, &action, extack);
4493
+ if (err)
4494
+ return err;
4495
+ /* in case all pedit actions are skipped, remove the MOD_HDR
4496
+ * flag. we might have set split_count either by pedit or
4497
+ * pop/push. if there is no pop/push either, reset it too.
4498
+ */
4499
+ if (parse_attr->mod_hdr_acts.num_actions == 0) {
4500
+ action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
4501
+ dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
4502
+ if (!((action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
4503
+ (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)))
4504
+ esw_attr->split_count = 0;
4505
+ }
27744506 }
27754507
27764508 attr->action = action;
2777
- if (!actions_match_supported(priv, exts, parse_attr, flow))
4509
+ if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack))
27784510 return -EOPNOTSUPP;
27794511
2780
- if (attr->out_count > 1 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
4512
+ if (attr->dest_chain) {
4513
+ if (decap) {
4514
+ /* It can be supported if we'll create a mapping for
4515
+ * the tunnel device only (without tunnel), and set
4516
+ * this tunnel id with this decap flow.
4517
+ *
4518
+ * On restore (miss), we'll just set this saved tunnel
4519
+ * device.
4520
+ */
4521
+
4522
+ NL_SET_ERR_MSG(extack,
4523
+ "Decap with goto isn't supported");
4524
+ netdev_warn(priv->netdev,
4525
+ "Decap with goto isn't supported");
4526
+ return -EOPNOTSUPP;
4527
+ }
4528
+
4529
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
4530
+ NL_SET_ERR_MSG_MOD(extack,
4531
+ "Mirroring goto chain rules isn't supported");
4532
+ return -EOPNOTSUPP;
4533
+ }
4534
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
4535
+ }
4536
+
4537
+ if (!(attr->action &
4538
+ (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
4539
+ NL_SET_ERR_MSG_MOD(extack,
4540
+ "Rule must have at least one forward/drop action");
4541
+ return -EOPNOTSUPP;
4542
+ }
4543
+
4544
+ if (esw_attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
4545
+ NL_SET_ERR_MSG_MOD(extack,
4546
+ "current firmware doesn't support split rule for port mirroring");
27814547 netdev_warn_once(priv->netdev, "current firmware doesn't support split rule for port mirroring\n");
27824548 return -EOPNOTSUPP;
27834549 }
....@@ -2785,14 +4551,21 @@
27854551 return 0;
27864552 }
27874553
2788
-static void get_flags(int flags, u8 *flow_flags)
4554
+static void get_flags(int flags, unsigned long *flow_flags)
27894555 {
2790
- u8 __flow_flags = 0;
4556
+ unsigned long __flow_flags = 0;
27914557
2792
- if (flags & MLX5E_TC_INGRESS)
2793
- __flow_flags |= MLX5E_TC_FLOW_INGRESS;
2794
- if (flags & MLX5E_TC_EGRESS)
2795
- __flow_flags |= MLX5E_TC_FLOW_EGRESS;
4558
+ if (flags & MLX5_TC_FLAG(INGRESS))
4559
+ __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS);
4560
+ if (flags & MLX5_TC_FLAG(EGRESS))
4561
+ __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS);
4562
+
4563
+ if (flags & MLX5_TC_FLAG(ESW_OFFLOAD))
4564
+ __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
4565
+ if (flags & MLX5_TC_FLAG(NIC_OFFLOAD))
4566
+ __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
4567
+ if (flags & MLX5_TC_FLAG(FT_OFFLOAD))
4568
+ __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_FT);
27964569
27974570 *flow_flags = __flow_flags;
27984571 }
....@@ -2804,161 +4577,658 @@
28044577 .automatic_shrinking = true,
28054578 };
28064579
2807
-static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv)
4580
+static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv,
4581
+ unsigned long flags)
28084582 {
28094583 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
28104584 struct mlx5e_rep_priv *uplink_rpriv;
28114585
2812
- if (MLX5_VPORT_MANAGER(priv->mdev) && esw->mode == SRIOV_OFFLOADS) {
4586
+ if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) {
28134587 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
2814
- return &uplink_rpriv->tc_ht;
2815
- } else
4588
+ return &uplink_rpriv->uplink_priv.tc_ht;
4589
+ } else /* NIC offload */
28164590 return &priv->fs.tc.ht;
28174591 }
28184592
2819
-int mlx5e_configure_flower(struct mlx5e_priv *priv,
2820
- struct tc_cls_flower_offload *f, int flags)
4593
+static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
28214594 {
2822
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2823
- struct mlx5e_tc_flow_parse_attr *parse_attr;
2824
- struct rhashtable *tc_ht = get_tc_ht(priv);
2825
- struct mlx5e_tc_flow *flow;
2826
- int attr_size, err = 0;
2827
- u8 flow_flags = 0;
4595
+ struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
4596
+ struct mlx5_flow_attr *attr = flow->attr;
4597
+ bool is_rep_ingress = esw_attr->in_rep->vport != MLX5_VPORT_UPLINK &&
4598
+ flow_flag_test(flow, INGRESS);
4599
+ bool act_is_encap = !!(attr->action &
4600
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT);
4601
+ bool esw_paired = mlx5_devcom_is_paired(esw_attr->in_mdev->priv.devcom,
4602
+ MLX5_DEVCOM_ESW_OFFLOADS);
28284603
2829
- get_flags(flags, &flow_flags);
4604
+ if (!esw_paired)
4605
+ return false;
28304606
2831
- flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
2832
- if (flow) {
2833
- netdev_warn_once(priv->netdev, "flow cookie %lx already exists, ignoring\n", f->cookie);
2834
- return 0;
2835
- }
2836
-
2837
- if (esw && esw->mode == SRIOV_OFFLOADS) {
2838
- flow_flags |= MLX5E_TC_FLOW_ESWITCH;
2839
- attr_size = sizeof(struct mlx5_esw_flow_attr);
2840
- } else {
2841
- flow_flags |= MLX5E_TC_FLOW_NIC;
2842
- attr_size = sizeof(struct mlx5_nic_flow_attr);
2843
- }
2844
-
2845
- flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL);
2846
- parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
2847
- if (!parse_attr || !flow) {
2848
- err = -ENOMEM;
2849
- goto err_free;
2850
- }
2851
-
2852
- flow->cookie = f->cookie;
2853
- flow->flags = flow_flags;
2854
- flow->priv = priv;
2855
-
2856
- err = parse_cls_flower(priv, flow, &parse_attr->spec, f);
2857
- if (err < 0)
2858
- goto err_free;
2859
-
2860
- if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
2861
- err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow);
2862
- if (err < 0)
2863
- goto err_free;
2864
- flow->rule[0] = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow);
2865
- } else {
2866
- err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow);
2867
- if (err < 0)
2868
- goto err_free;
2869
- flow->rule[0] = mlx5e_tc_add_nic_flow(priv, parse_attr, flow);
2870
- }
2871
-
2872
- if (IS_ERR(flow->rule[0])) {
2873
- err = PTR_ERR(flow->rule[0]);
2874
- if (err != -EAGAIN)
2875
- goto err_free;
2876
- }
2877
-
2878
- if (err != -EAGAIN)
2879
- flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
2880
-
2881
- if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) ||
2882
- !(flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP))
2883
- kvfree(parse_attr);
2884
-
2885
- err = rhashtable_insert_fast(tc_ht, &flow->node, tc_ht_params);
2886
- if (err) {
2887
- mlx5e_tc_del_flow(priv, flow);
2888
- kfree(flow);
2889
- }
2890
-
2891
- return err;
2892
-
2893
-err_free:
2894
- kvfree(parse_attr);
2895
- kfree(flow);
2896
- return err;
2897
-}
2898
-
2899
-#define DIRECTION_MASK (MLX5E_TC_INGRESS | MLX5E_TC_EGRESS)
2900
-#define FLOW_DIRECTION_MASK (MLX5E_TC_FLOW_INGRESS | MLX5E_TC_FLOW_EGRESS)
2901
-
2902
-static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags)
2903
-{
2904
- if ((flow->flags & FLOW_DIRECTION_MASK) == (flags & DIRECTION_MASK))
4607
+ if ((mlx5_lag_is_sriov(esw_attr->in_mdev) ||
4608
+ mlx5_lag_is_multipath(esw_attr->in_mdev)) &&
4609
+ (is_rep_ingress || act_is_encap))
29054610 return true;
29064611
29074612 return false;
29084613 }
29094614
2910
-int mlx5e_delete_flower(struct mlx5e_priv *priv,
2911
- struct tc_cls_flower_offload *f, int flags)
4615
+struct mlx5_flow_attr *
4616
+mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type)
29124617 {
2913
- struct rhashtable *tc_ht = get_tc_ht(priv);
4618
+ u32 ex_attr_size = (type == MLX5_FLOW_NAMESPACE_FDB) ?
4619
+ sizeof(struct mlx5_esw_flow_attr) :
4620
+ sizeof(struct mlx5_nic_flow_attr);
4621
+ struct mlx5_flow_attr *attr;
4622
+
4623
+ return kzalloc(sizeof(*attr) + ex_attr_size, GFP_KERNEL);
4624
+}
4625
+
4626
+static int
4627
+mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
4628
+ struct flow_cls_offload *f, unsigned long flow_flags,
4629
+ struct mlx5e_tc_flow_parse_attr **__parse_attr,
4630
+ struct mlx5e_tc_flow **__flow)
4631
+{
4632
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
4633
+ struct mlx5_flow_attr *attr;
29144634 struct mlx5e_tc_flow *flow;
4635
+ int err = -ENOMEM;
4636
+ int out_index;
29154637
2916
- flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
2917
- if (!flow || !same_flow_direction(flow, flags))
2918
- return -EINVAL;
4638
+ flow = kzalloc(sizeof(*flow), GFP_KERNEL);
4639
+ parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
4640
+ if (!parse_attr || !flow)
4641
+ goto err_free;
29194642
2920
- rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params);
4643
+ flow->flags = flow_flags;
4644
+ flow->cookie = f->cookie;
4645
+ flow->priv = priv;
29214646
2922
- mlx5e_tc_del_flow(priv, flow);
4647
+ attr = mlx5_alloc_flow_attr(get_flow_name_space(flow));
4648
+ if (!attr)
4649
+ goto err_free;
29234650
4651
+ flow->attr = attr;
4652
+
4653
+ for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
4654
+ INIT_LIST_HEAD(&flow->encaps[out_index].list);
4655
+ INIT_LIST_HEAD(&flow->hairpin);
4656
+ INIT_LIST_HEAD(&flow->l3_to_l2_reformat);
4657
+ refcount_set(&flow->refcnt, 1);
4658
+ init_completion(&flow->init_done);
4659
+
4660
+ *__flow = flow;
4661
+ *__parse_attr = parse_attr;
4662
+
4663
+ return 0;
4664
+
4665
+err_free:
29244666 kfree(flow);
4667
+ kvfree(parse_attr);
4668
+ return err;
4669
+}
4670
+
4671
+static void
4672
+mlx5e_flow_attr_init(struct mlx5_flow_attr *attr,
4673
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
4674
+ struct flow_cls_offload *f)
4675
+{
4676
+ attr->parse_attr = parse_attr;
4677
+ attr->chain = f->common.chain_index;
4678
+ attr->prio = f->common.prio;
4679
+}
4680
+
4681
+static void
4682
+mlx5e_flow_esw_attr_init(struct mlx5_flow_attr *attr,
4683
+ struct mlx5e_priv *priv,
4684
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
4685
+ struct flow_cls_offload *f,
4686
+ struct mlx5_eswitch_rep *in_rep,
4687
+ struct mlx5_core_dev *in_mdev)
4688
+{
4689
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4690
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
4691
+
4692
+ mlx5e_flow_attr_init(attr, parse_attr, f);
4693
+
4694
+ esw_attr->in_rep = in_rep;
4695
+ esw_attr->in_mdev = in_mdev;
4696
+
4697
+ if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) ==
4698
+ MLX5_COUNTER_SOURCE_ESWITCH)
4699
+ esw_attr->counter_dev = in_mdev;
4700
+ else
4701
+ esw_attr->counter_dev = priv->mdev;
4702
+}
4703
+
4704
+static struct mlx5e_tc_flow *
4705
+__mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
4706
+ struct flow_cls_offload *f,
4707
+ unsigned long flow_flags,
4708
+ struct net_device *filter_dev,
4709
+ struct mlx5_eswitch_rep *in_rep,
4710
+ struct mlx5_core_dev *in_mdev)
4711
+{
4712
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
4713
+ struct netlink_ext_ack *extack = f->common.extack;
4714
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
4715
+ struct mlx5e_tc_flow *flow;
4716
+ int attr_size, err;
4717
+
4718
+ flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
4719
+ attr_size = sizeof(struct mlx5_esw_flow_attr);
4720
+ err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
4721
+ &parse_attr, &flow);
4722
+ if (err)
4723
+ goto out;
4724
+
4725
+ parse_attr->filter_dev = filter_dev;
4726
+ mlx5e_flow_esw_attr_init(flow->attr,
4727
+ priv, parse_attr,
4728
+ f, in_rep, in_mdev);
4729
+
4730
+ err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
4731
+ f, filter_dev);
4732
+ if (err)
4733
+ goto err_free;
4734
+
4735
+ /* actions validation depends on parsing the ct matches first */
4736
+ err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
4737
+ &flow->attr->ct_attr, extack);
4738
+ if (err)
4739
+ goto err_free;
4740
+
4741
+ err = parse_tc_fdb_actions(priv, &rule->action, flow, extack, filter_dev);
4742
+ if (err)
4743
+ goto err_free;
4744
+
4745
+ err = mlx5e_tc_add_fdb_flow(priv, flow, extack);
4746
+ complete_all(&flow->init_done);
4747
+ if (err) {
4748
+ if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev)))
4749
+ goto err_free;
4750
+
4751
+ add_unready_flow(flow);
4752
+ }
4753
+
4754
+ return flow;
4755
+
4756
+err_free:
4757
+ dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
4758
+ mlx5e_flow_put(priv, flow);
4759
+out:
4760
+ return ERR_PTR(err);
4761
+}
4762
+
4763
+static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
4764
+ struct mlx5e_tc_flow *flow,
4765
+ unsigned long flow_flags)
4766
+{
4767
+ struct mlx5e_priv *priv = flow->priv, *peer_priv;
4768
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
4769
+ struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
4770
+ struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
4771
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
4772
+ struct mlx5e_rep_priv *peer_urpriv;
4773
+ struct mlx5e_tc_flow *peer_flow;
4774
+ struct mlx5_core_dev *in_mdev;
4775
+ int err = 0;
4776
+
4777
+ peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4778
+ if (!peer_esw)
4779
+ return -ENODEV;
4780
+
4781
+ peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH);
4782
+ peer_priv = netdev_priv(peer_urpriv->netdev);
4783
+
4784
+ /* in_mdev is assigned of which the packet originated from.
4785
+ * So packets redirected to uplink use the same mdev of the
4786
+ * original flow and packets redirected from uplink use the
4787
+ * peer mdev.
4788
+ */
4789
+ if (attr->in_rep->vport == MLX5_VPORT_UPLINK)
4790
+ in_mdev = peer_priv->mdev;
4791
+ else
4792
+ in_mdev = priv->mdev;
4793
+
4794
+ parse_attr = flow->attr->parse_attr;
4795
+ peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags,
4796
+ parse_attr->filter_dev,
4797
+ attr->in_rep, in_mdev);
4798
+ if (IS_ERR(peer_flow)) {
4799
+ err = PTR_ERR(peer_flow);
4800
+ goto out;
4801
+ }
4802
+
4803
+ flow->peer_flow = peer_flow;
4804
+ flow_flag_set(flow, DUP);
4805
+ mutex_lock(&esw->offloads.peer_mutex);
4806
+ list_add_tail(&flow->peer, &esw->offloads.peer_flows);
4807
+ mutex_unlock(&esw->offloads.peer_mutex);
4808
+
4809
+out:
4810
+ mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4811
+ return err;
4812
+}
4813
+
4814
+static int
4815
+mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
4816
+ struct flow_cls_offload *f,
4817
+ unsigned long flow_flags,
4818
+ struct net_device *filter_dev,
4819
+ struct mlx5e_tc_flow **__flow)
4820
+{
4821
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
4822
+ struct mlx5_eswitch_rep *in_rep = rpriv->rep;
4823
+ struct mlx5_core_dev *in_mdev = priv->mdev;
4824
+ struct mlx5e_tc_flow *flow;
4825
+ int err;
4826
+
4827
+ flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep,
4828
+ in_mdev);
4829
+ if (IS_ERR(flow))
4830
+ return PTR_ERR(flow);
4831
+
4832
+ if (is_peer_flow_needed(flow)) {
4833
+ err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags);
4834
+ if (err) {
4835
+ mlx5e_tc_del_fdb_flow(priv, flow);
4836
+ goto out;
4837
+ }
4838
+ }
4839
+
4840
+ *__flow = flow;
4841
+
4842
+ return 0;
4843
+
4844
+out:
4845
+ return err;
4846
+}
4847
+
4848
+static int
4849
+mlx5e_add_nic_flow(struct mlx5e_priv *priv,
4850
+ struct flow_cls_offload *f,
4851
+ unsigned long flow_flags,
4852
+ struct net_device *filter_dev,
4853
+ struct mlx5e_tc_flow **__flow)
4854
+{
4855
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
4856
+ struct netlink_ext_ack *extack = f->common.extack;
4857
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
4858
+ struct mlx5e_tc_flow *flow;
4859
+ int attr_size, err;
4860
+
4861
+ if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
4862
+ if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common))
4863
+ return -EOPNOTSUPP;
4864
+ } else if (!tc_can_offload_extack(priv->netdev, f->common.extack)) {
4865
+ return -EOPNOTSUPP;
4866
+ }
4867
+
4868
+ flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
4869
+ attr_size = sizeof(struct mlx5_nic_flow_attr);
4870
+ err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
4871
+ &parse_attr, &flow);
4872
+ if (err)
4873
+ goto out;
4874
+
4875
+ parse_attr->filter_dev = filter_dev;
4876
+ mlx5e_flow_attr_init(flow->attr, parse_attr, f);
4877
+
4878
+ err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
4879
+ f, filter_dev);
4880
+ if (err)
4881
+ goto err_free;
4882
+
4883
+ err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
4884
+ &flow->attr->ct_attr, extack);
4885
+ if (err)
4886
+ goto err_free;
4887
+
4888
+ err = parse_tc_nic_actions(priv, &rule->action, parse_attr, flow, extack);
4889
+ if (err)
4890
+ goto err_free;
4891
+
4892
+ err = mlx5e_tc_add_nic_flow(priv, parse_attr, flow, extack);
4893
+ if (err)
4894
+ goto err_free;
4895
+
4896
+ flow_flag_set(flow, OFFLOADED);
4897
+ *__flow = flow;
4898
+
4899
+ return 0;
4900
+
4901
+err_free:
4902
+ dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
4903
+ mlx5e_flow_put(priv, flow);
4904
+out:
4905
+ return err;
4906
+}
4907
+
4908
+static int
4909
+mlx5e_tc_add_flow(struct mlx5e_priv *priv,
4910
+ struct flow_cls_offload *f,
4911
+ unsigned long flags,
4912
+ struct net_device *filter_dev,
4913
+ struct mlx5e_tc_flow **flow)
4914
+{
4915
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4916
+ unsigned long flow_flags;
4917
+ int err;
4918
+
4919
+ get_flags(flags, &flow_flags);
4920
+
4921
+ if (!tc_can_offload_extack(priv->netdev, f->common.extack))
4922
+ return -EOPNOTSUPP;
4923
+
4924
+ if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
4925
+ err = mlx5e_add_fdb_flow(priv, f, flow_flags,
4926
+ filter_dev, flow);
4927
+ else
4928
+ err = mlx5e_add_nic_flow(priv, f, flow_flags,
4929
+ filter_dev, flow);
4930
+
4931
+ return err;
4932
+}
4933
+
4934
+static bool is_flow_rule_duplicate_allowed(struct net_device *dev,
4935
+ struct mlx5e_rep_priv *rpriv)
4936
+{
4937
+ /* Offloaded flow rule is allowed to duplicate on non-uplink representor
4938
+ * sharing tc block with other slaves of a lag device. Rpriv can be NULL if this
4939
+ * function is called from NIC mode.
4940
+ */
4941
+ return netif_is_lag_port(dev) && rpriv && rpriv->rep->vport != MLX5_VPORT_UPLINK;
4942
+}
4943
+
4944
+int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
4945
+ struct flow_cls_offload *f, unsigned long flags)
4946
+{
4947
+ struct netlink_ext_ack *extack = f->common.extack;
4948
+ struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4949
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
4950
+ struct mlx5e_tc_flow *flow;
4951
+ int err = 0;
4952
+
4953
+ rcu_read_lock();
4954
+ flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
4955
+ if (flow) {
4956
+ /* Same flow rule offloaded to non-uplink representor sharing tc block,
4957
+ * just return 0.
4958
+ */
4959
+ if (is_flow_rule_duplicate_allowed(dev, rpriv) && flow->orig_dev != dev)
4960
+ goto rcu_unlock;
4961
+
4962
+ NL_SET_ERR_MSG_MOD(extack,
4963
+ "flow cookie already exists, ignoring");
4964
+ netdev_warn_once(priv->netdev,
4965
+ "flow cookie %lx already exists, ignoring\n",
4966
+ f->cookie);
4967
+ err = -EEXIST;
4968
+ goto rcu_unlock;
4969
+ }
4970
+rcu_unlock:
4971
+ rcu_read_unlock();
4972
+ if (flow)
4973
+ goto out;
4974
+
4975
+ trace_mlx5e_configure_flower(f);
4976
+ err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow);
4977
+ if (err)
4978
+ goto out;
4979
+
4980
+ /* Flow rule offloaded to non-uplink representor sharing tc block,
4981
+ * set the flow's owner dev.
4982
+ */
4983
+ if (is_flow_rule_duplicate_allowed(dev, rpriv))
4984
+ flow->orig_dev = dev;
4985
+
4986
+ err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params);
4987
+ if (err)
4988
+ goto err_free;
4989
+
4990
+ return 0;
4991
+
4992
+err_free:
4993
+ mlx5e_flow_put(priv, flow);
4994
+out:
4995
+ return err;
4996
+}
4997
+
4998
+static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags)
4999
+{
5000
+ bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS));
5001
+ bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS));
5002
+
5003
+ return flow_flag_test(flow, INGRESS) == dir_ingress &&
5004
+ flow_flag_test(flow, EGRESS) == dir_egress;
5005
+}
5006
+
5007
+int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
5008
+ struct flow_cls_offload *f, unsigned long flags)
5009
+{
5010
+ struct rhashtable *tc_ht = get_tc_ht(priv, flags);
5011
+ struct mlx5e_tc_flow *flow;
5012
+ int err;
5013
+
5014
+ rcu_read_lock();
5015
+ flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
5016
+ if (!flow || !same_flow_direction(flow, flags)) {
5017
+ err = -EINVAL;
5018
+ goto errout;
5019
+ }
5020
+
5021
+ /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag
5022
+ * set.
5023
+ */
5024
+ if (flow_flag_test_and_set(flow, DELETED)) {
5025
+ err = -EINVAL;
5026
+ goto errout;
5027
+ }
5028
+ rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params);
5029
+ rcu_read_unlock();
5030
+
5031
+ trace_mlx5e_delete_flower(f);
5032
+ mlx5e_flow_put(priv, flow);
5033
+
5034
+ return 0;
5035
+
5036
+errout:
5037
+ rcu_read_unlock();
5038
+ return err;
5039
+}
5040
+
5041
+int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
5042
+ struct flow_cls_offload *f, unsigned long flags)
5043
+{
5044
+ struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
5045
+ struct rhashtable *tc_ht = get_tc_ht(priv, flags);
5046
+ struct mlx5_eswitch *peer_esw;
5047
+ struct mlx5e_tc_flow *flow;
5048
+ struct mlx5_fc *counter;
5049
+ u64 lastuse = 0;
5050
+ u64 packets = 0;
5051
+ u64 bytes = 0;
5052
+ int err = 0;
5053
+
5054
+ rcu_read_lock();
5055
+ flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie,
5056
+ tc_ht_params));
5057
+ rcu_read_unlock();
5058
+ if (IS_ERR(flow))
5059
+ return PTR_ERR(flow);
5060
+
5061
+ if (!same_flow_direction(flow, flags)) {
5062
+ err = -EINVAL;
5063
+ goto errout;
5064
+ }
5065
+
5066
+ if (mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, CT)) {
5067
+ counter = mlx5e_tc_get_counter(flow);
5068
+ if (!counter)
5069
+ goto errout;
5070
+
5071
+ mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
5072
+ }
5073
+
5074
+ /* Under multipath it's possible for one rule to be currently
5075
+ * un-offloaded while the other rule is offloaded.
5076
+ */
5077
+ peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
5078
+ if (!peer_esw)
5079
+ goto out;
5080
+
5081
+ if (flow_flag_test(flow, DUP) &&
5082
+ flow_flag_test(flow->peer_flow, OFFLOADED)) {
5083
+ u64 bytes2;
5084
+ u64 packets2;
5085
+ u64 lastuse2;
5086
+
5087
+ counter = mlx5e_tc_get_counter(flow->peer_flow);
5088
+ if (!counter)
5089
+ goto no_peer_counter;
5090
+ mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2);
5091
+
5092
+ bytes += bytes2;
5093
+ packets += packets2;
5094
+ lastuse = max_t(u64, lastuse, lastuse2);
5095
+ }
5096
+
5097
+no_peer_counter:
5098
+ mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
5099
+out:
5100
+ flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
5101
+ FLOW_ACTION_HW_STATS_DELAYED);
5102
+ trace_mlx5e_stats_flower(f);
5103
+errout:
5104
+ mlx5e_flow_put(priv, flow);
5105
+ return err;
5106
+}
5107
+
5108
+static int apply_police_params(struct mlx5e_priv *priv, u64 rate,
5109
+ struct netlink_ext_ack *extack)
5110
+{
5111
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
5112
+ struct mlx5_eswitch *esw;
5113
+ u32 rate_mbps = 0;
5114
+ u16 vport_num;
5115
+ int err;
5116
+
5117
+ vport_num = rpriv->rep->vport;
5118
+ if (vport_num >= MLX5_VPORT_ECPF) {
5119
+ NL_SET_ERR_MSG_MOD(extack,
5120
+ "Ingress rate limit is supported only for Eswitch ports connected to VFs");
5121
+ return -EOPNOTSUPP;
5122
+ }
5123
+
5124
+ esw = priv->mdev->priv.eswitch;
5125
+ /* rate is given in bytes/sec.
5126
+ * First convert to bits/sec and then round to the nearest mbit/secs.
5127
+ * mbit means million bits.
5128
+ * Moreover, if rate is non zero we choose to configure to a minimum of
5129
+ * 1 mbit/sec.
5130
+ */
5131
+ if (rate) {
5132
+ rate = (rate * BITS_PER_BYTE) + 500000;
5133
+ rate_mbps = max_t(u32, do_div(rate, 1000000), 1);
5134
+ }
5135
+
5136
+ err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps);
5137
+ if (err)
5138
+ NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware");
5139
+
5140
+ return err;
5141
+}
5142
+
5143
+static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
5144
+ struct flow_action *flow_action,
5145
+ struct netlink_ext_ack *extack)
5146
+{
5147
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
5148
+ const struct flow_action_entry *act;
5149
+ int err;
5150
+ int i;
5151
+
5152
+ if (!flow_action_has_entries(flow_action)) {
5153
+ NL_SET_ERR_MSG_MOD(extack, "matchall called with no action");
5154
+ return -EINVAL;
5155
+ }
5156
+
5157
+ if (!flow_offload_has_one_action(flow_action)) {
5158
+ NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action");
5159
+ return -EOPNOTSUPP;
5160
+ }
5161
+
5162
+ if (!flow_action_basic_hw_stats_check(flow_action, extack))
5163
+ return -EOPNOTSUPP;
5164
+
5165
+ flow_action_for_each(i, act, flow_action) {
5166
+ switch (act->id) {
5167
+ case FLOW_ACTION_POLICE:
5168
+ err = apply_police_params(priv, act->police.rate_bytes_ps, extack);
5169
+ if (err)
5170
+ return err;
5171
+
5172
+ rpriv->prev_vf_vport_stats = priv->stats.vf_vport;
5173
+ break;
5174
+ default:
5175
+ NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall");
5176
+ return -EOPNOTSUPP;
5177
+ }
5178
+ }
29255179
29265180 return 0;
29275181 }
29285182
2929
-int mlx5e_stats_flower(struct mlx5e_priv *priv,
2930
- struct tc_cls_flower_offload *f, int flags)
5183
+int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv,
5184
+ struct tc_cls_matchall_offload *ma)
29315185 {
2932
- struct rhashtable *tc_ht = get_tc_ht(priv);
2933
- struct mlx5e_tc_flow *flow;
2934
- struct mlx5_fc *counter;
2935
- u64 bytes;
2936
- u64 packets;
2937
- u64 lastuse;
5186
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
5187
+ struct netlink_ext_ack *extack = ma->common.extack;
29385188
2939
- flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
2940
- if (!flow || !same_flow_direction(flow, flags))
5189
+ if (!mlx5_esw_qos_enabled(esw)) {
5190
+ NL_SET_ERR_MSG_MOD(extack, "QoS is not supported on this device");
5191
+ return -EOPNOTSUPP;
5192
+ }
5193
+
5194
+ if (ma->common.prio != 1) {
5195
+ NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported");
29415196 return -EINVAL;
5197
+ }
29425198
2943
- if (!(flow->flags & MLX5E_TC_FLOW_OFFLOADED))
2944
- return 0;
5199
+ return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack);
5200
+}
29455201
2946
- counter = mlx5_flow_rule_counter(flow->rule[0]);
2947
- if (!counter)
2948
- return 0;
5202
+int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv,
5203
+ struct tc_cls_matchall_offload *ma)
5204
+{
5205
+ struct netlink_ext_ack *extack = ma->common.extack;
29495206
2950
- mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
5207
+ return apply_police_params(priv, 0, extack);
5208
+}
29515209
2952
- tcf_exts_stats_update(f->exts, bytes, packets, lastuse);
5210
+void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
5211
+ struct tc_cls_matchall_offload *ma)
5212
+{
5213
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
5214
+ struct rtnl_link_stats64 cur_stats;
5215
+ u64 dbytes;
5216
+ u64 dpkts;
29535217
2954
- return 0;
5218
+ cur_stats = priv->stats.vf_vport;
5219
+ dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets;
5220
+ dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes;
5221
+ rpriv->prev_vf_vport_stats = cur_stats;
5222
+ flow_stats_update(&ma->stats, dbytes, dpkts, 0, jiffies,
5223
+ FLOW_ACTION_HW_STATS_DELAYED);
29555224 }
29565225
29575226 static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
29585227 struct mlx5e_priv *peer_priv)
29595228 {
29605229 struct mlx5_core_dev *peer_mdev = peer_priv->mdev;
2961
- struct mlx5e_hairpin_entry *hpe;
5230
+ struct mlx5e_hairpin_entry *hpe, *tmp;
5231
+ LIST_HEAD(init_wait_list);
29625232 u16 peer_vhca_id;
29635233 int bkt;
29645234
....@@ -2967,9 +5237,18 @@
29675237
29685238 peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
29695239
2970
- hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist) {
2971
- if (hpe->peer_vhca_id == peer_vhca_id)
2972
- hpe->hp->pair->peer_gone = true;
5240
+ mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
5241
+ hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist)
5242
+ if (refcount_inc_not_zero(&hpe->refcnt))
5243
+ list_add(&hpe->dead_peer_wait_list, &init_wait_list);
5244
+ mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
5245
+
5246
+ list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) {
5247
+ wait_for_completion(&hpe->res_ready);
5248
+ if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id)
5249
+ mlx5_core_hairpin_clear_dead_peer(hpe->hp->pair);
5250
+
5251
+ mlx5e_hairpin_put(priv, hpe);
29735252 }
29745253 }
29755254
....@@ -3000,24 +5279,79 @@
30005279 return NOTIFY_DONE;
30015280 }
30025281
5282
+static int mlx5e_tc_nic_get_ft_size(struct mlx5_core_dev *dev)
5283
+{
5284
+ int tc_grp_size, tc_tbl_size;
5285
+ u32 max_flow_counter;
5286
+
5287
+ max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
5288
+ MLX5_CAP_GEN(dev, max_flow_counter_15_0);
5289
+
5290
+ tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE);
5291
+
5292
+ tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS,
5293
+ BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size)));
5294
+
5295
+ return tc_tbl_size;
5296
+}
5297
+
30035298 int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
30045299 {
30055300 struct mlx5e_tc_table *tc = &priv->fs.tc;
5301
+ struct mlx5_core_dev *dev = priv->mdev;
5302
+ struct mlx5_chains_attr attr = {};
30065303 int err;
30075304
3008
- hash_init(tc->mod_hdr_tbl);
5305
+ mlx5e_mod_hdr_tbl_init(&tc->mod_hdr);
5306
+ mutex_init(&tc->t_lock);
5307
+ mutex_init(&tc->hairpin_tbl_lock);
30095308 hash_init(tc->hairpin_tbl);
30105309
30115310 err = rhashtable_init(&tc->ht, &tc_ht_params);
30125311 if (err)
30135312 return err;
30145313
3015
- tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
3016
- if (register_netdevice_notifier(&tc->netdevice_nb)) {
3017
- tc->netdevice_nb.notifier_call = NULL;
3018
- mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n");
5314
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
5315
+ attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED |
5316
+ MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
5317
+ attr.max_restore_tag = MLX5E_TC_TABLE_CHAIN_TAG_MASK;
5318
+ }
5319
+ attr.ns = MLX5_FLOW_NAMESPACE_KERNEL;
5320
+ attr.max_ft_sz = mlx5e_tc_nic_get_ft_size(dev);
5321
+ attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS;
5322
+ attr.default_ft = priv->fs.vlan.ft.t;
5323
+
5324
+ tc->chains = mlx5_chains_create(dev, &attr);
5325
+ if (IS_ERR(tc->chains)) {
5326
+ err = PTR_ERR(tc->chains);
5327
+ goto err_chains;
30195328 }
30205329
5330
+ tc->ct = mlx5_tc_ct_init(priv, tc->chains, &priv->fs.tc.mod_hdr,
5331
+ MLX5_FLOW_NAMESPACE_KERNEL);
5332
+ if (IS_ERR(tc->ct)) {
5333
+ err = PTR_ERR(tc->ct);
5334
+ goto err_ct;
5335
+ }
5336
+
5337
+ tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
5338
+ err = register_netdevice_notifier_dev_net(priv->netdev,
5339
+ &tc->netdevice_nb,
5340
+ &tc->netdevice_nn);
5341
+ if (err) {
5342
+ tc->netdevice_nb.notifier_call = NULL;
5343
+ mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n");
5344
+ goto err_reg;
5345
+ }
5346
+
5347
+ return 0;
5348
+
5349
+err_reg:
5350
+ mlx5_tc_ct_clean(tc->ct);
5351
+err_ct:
5352
+ mlx5_chains_destroy(tc->chains);
5353
+err_chains:
5354
+ rhashtable_destroy(&tc->ht);
30215355 return err;
30225356 }
30235357
....@@ -3035,29 +5369,194 @@
30355369 struct mlx5e_tc_table *tc = &priv->fs.tc;
30365370
30375371 if (tc->netdevice_nb.notifier_call)
3038
- unregister_netdevice_notifier(&tc->netdevice_nb);
5372
+ unregister_netdevice_notifier_dev_net(priv->netdev,
5373
+ &tc->netdevice_nb,
5374
+ &tc->netdevice_nn);
5375
+
5376
+ mlx5e_mod_hdr_tbl_destroy(&tc->mod_hdr);
5377
+ mutex_destroy(&tc->hairpin_tbl_lock);
30395378
30405379 rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL);
30415380
30425381 if (!IS_ERR_OR_NULL(tc->t)) {
3043
- mlx5_destroy_flow_table(tc->t);
5382
+ mlx5_chains_put_table(tc->chains, 0, 1, MLX5E_TC_FT_LEVEL);
30445383 tc->t = NULL;
30455384 }
5385
+ mutex_destroy(&tc->t_lock);
5386
+
5387
+ mlx5_tc_ct_clean(tc->ct);
5388
+ mlx5_chains_destroy(tc->chains);
30465389 }
30475390
30485391 int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
30495392 {
3050
- return rhashtable_init(tc_ht, &tc_ht_params);
5393
+ const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts);
5394
+ struct mlx5_rep_uplink_priv *uplink_priv;
5395
+ struct mlx5e_rep_priv *rpriv;
5396
+ struct mapping_ctx *mapping;
5397
+ struct mlx5_eswitch *esw;
5398
+ struct mlx5e_priv *priv;
5399
+ int err = 0;
5400
+
5401
+ uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
5402
+ rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
5403
+ priv = netdev_priv(rpriv->netdev);
5404
+ esw = priv->mdev->priv.eswitch;
5405
+
5406
+ uplink_priv->ct_priv = mlx5_tc_ct_init(netdev_priv(priv->netdev),
5407
+ esw_chains(esw),
5408
+ &esw->offloads.mod_hdr,
5409
+ MLX5_FLOW_NAMESPACE_FDB);
5410
+ if (IS_ERR(uplink_priv->ct_priv))
5411
+ goto err_ct;
5412
+
5413
+ mapping = mapping_create(sizeof(struct tunnel_match_key),
5414
+ TUNNEL_INFO_BITS_MASK, true);
5415
+ if (IS_ERR(mapping)) {
5416
+ err = PTR_ERR(mapping);
5417
+ goto err_tun_mapping;
5418
+ }
5419
+ uplink_priv->tunnel_mapping = mapping;
5420
+
5421
+ mapping = mapping_create(sz_enc_opts, ENC_OPTS_BITS_MASK, true);
5422
+ if (IS_ERR(mapping)) {
5423
+ err = PTR_ERR(mapping);
5424
+ goto err_enc_opts_mapping;
5425
+ }
5426
+ uplink_priv->tunnel_enc_opts_mapping = mapping;
5427
+
5428
+ err = rhashtable_init(tc_ht, &tc_ht_params);
5429
+ if (err)
5430
+ goto err_ht_init;
5431
+
5432
+ return err;
5433
+
5434
+err_ht_init:
5435
+ mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
5436
+err_enc_opts_mapping:
5437
+ mapping_destroy(uplink_priv->tunnel_mapping);
5438
+err_tun_mapping:
5439
+ mlx5_tc_ct_clean(uplink_priv->ct_priv);
5440
+err_ct:
5441
+ netdev_warn(priv->netdev,
5442
+ "Failed to initialize tc (eswitch), err: %d", err);
5443
+ return err;
30515444 }
30525445
30535446 void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
30545447 {
5448
+ struct mlx5_rep_uplink_priv *uplink_priv;
5449
+
30555450 rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
5451
+
5452
+ uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
5453
+
5454
+ mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
5455
+ mapping_destroy(uplink_priv->tunnel_mapping);
5456
+
5457
+ mlx5_tc_ct_clean(uplink_priv->ct_priv);
30565458 }
30575459
3058
-int mlx5e_tc_num_filters(struct mlx5e_priv *priv)
5460
+int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags)
30595461 {
3060
- struct rhashtable *tc_ht = get_tc_ht(priv);
5462
+ struct rhashtable *tc_ht = get_tc_ht(priv, flags);
30615463
30625464 return atomic_read(&tc_ht->nelems);
30635465 }
5466
+
5467
+void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
5468
+{
5469
+ struct mlx5e_tc_flow *flow, *tmp;
5470
+
5471
+ list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer)
5472
+ __mlx5e_tc_del_fdb_peer_flow(flow);
5473
+}
5474
+
5475
+void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
5476
+{
5477
+ struct mlx5_rep_uplink_priv *rpriv =
5478
+ container_of(work, struct mlx5_rep_uplink_priv,
5479
+ reoffload_flows_work);
5480
+ struct mlx5e_tc_flow *flow, *tmp;
5481
+
5482
+ mutex_lock(&rpriv->unready_flows_lock);
5483
+ list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) {
5484
+ if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL))
5485
+ unready_flow_del(flow);
5486
+ }
5487
+ mutex_unlock(&rpriv->unready_flows_lock);
5488
+}
5489
+
5490
+static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
5491
+ struct flow_cls_offload *cls_flower,
5492
+ unsigned long flags)
5493
+{
5494
+ switch (cls_flower->command) {
5495
+ case FLOW_CLS_REPLACE:
5496
+ return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
5497
+ flags);
5498
+ case FLOW_CLS_DESTROY:
5499
+ return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
5500
+ flags);
5501
+ case FLOW_CLS_STATS:
5502
+ return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
5503
+ flags);
5504
+ default:
5505
+ return -EOPNOTSUPP;
5506
+ }
5507
+}
5508
+
5509
+int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
5510
+ void *cb_priv)
5511
+{
5512
+ unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(NIC_OFFLOAD);
5513
+ struct mlx5e_priv *priv = cb_priv;
5514
+
5515
+ switch (type) {
5516
+ case TC_SETUP_CLSFLOWER:
5517
+ return mlx5e_setup_tc_cls_flower(priv, type_data, flags);
5518
+ default:
5519
+ return -EOPNOTSUPP;
5520
+ }
5521
+}
5522
+
5523
+bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe,
5524
+ struct sk_buff *skb)
5525
+{
5526
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
5527
+ u32 chain = 0, chain_tag, reg_b, zone_restore_id;
5528
+ struct mlx5e_priv *priv = netdev_priv(skb->dev);
5529
+ struct mlx5e_tc_table *tc = &priv->fs.tc;
5530
+ struct tc_skb_ext *tc_skb_ext;
5531
+ int err;
5532
+
5533
+ reg_b = be32_to_cpu(cqe->ft_metadata);
5534
+
5535
+ chain_tag = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK;
5536
+
5537
+ err = mlx5_get_chain_for_tag(nic_chains(priv), chain_tag, &chain);
5538
+ if (err) {
5539
+ netdev_dbg(priv->netdev,
5540
+ "Couldn't find chain for chain tag: %d, err: %d\n",
5541
+ chain_tag, err);
5542
+ return false;
5543
+ }
5544
+
5545
+ if (chain) {
5546
+ tc_skb_ext = tc_skb_ext_alloc(skb);
5547
+ if (WARN_ON(!tc_skb_ext))
5548
+ return false;
5549
+
5550
+ tc_skb_ext->chain = chain;
5551
+
5552
+ zone_restore_id = (reg_b >> REG_MAPPING_SHIFT(NIC_ZONE_RESTORE_TO_REG)) &
5553
+ ZONE_RESTORE_MAX;
5554
+
5555
+ if (!mlx5e_tc_ct_restore_flow(tc->ct, skb,
5556
+ zone_restore_id))
5557
+ return false;
5558
+ }
5559
+#endif /* CONFIG_NET_TC_SKB_EXT */
5560
+
5561
+ return true;
5562
+}