forked from ~ljy/RK356X_SDK_RELEASE

hc
2024-01-04 1543e317f1da31b75942316931e8f491a8920811
kernel/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
....@@ -31,6 +31,7 @@
3131 */
3232
3333 #include <net/flow_dissector.h>
34
+#include <net/flow_offload.h>
3435 #include <net/sch_generic.h>
3536 #include <net/pkt_cls.h>
3637 #include <net/tc_act/tc_gact.h>
....@@ -38,76 +39,286 @@
3839 #include <linux/mlx5/fs.h>
3940 #include <linux/mlx5/device.h>
4041 #include <linux/rhashtable.h>
41
-#include <net/switchdev.h>
42
+#include <linux/refcount.h>
43
+#include <linux/completion.h>
4244 #include <net/tc_act/tc_mirred.h>
4345 #include <net/tc_act/tc_vlan.h>
4446 #include <net/tc_act/tc_tunnel_key.h>
4547 #include <net/tc_act/tc_pedit.h>
4648 #include <net/tc_act/tc_csum.h>
47
-#include <net/vxlan.h>
49
+#include <net/tc_act/tc_mpls.h>
4850 #include <net/arp.h>
51
+#include <net/ipv6_stubs.h>
52
+#include <net/bareudp.h>
53
+#include <net/bonding.h>
4954 #include "en.h"
5055 #include "en_rep.h"
56
+#include "en/rep/tc.h"
57
+#include "en/rep/neigh.h"
5158 #include "en_tc.h"
5259 #include "eswitch.h"
53
-#include "lib/vxlan.h"
5460 #include "fs_core.h"
5561 #include "en/port.h"
62
+#include "en/tc_tun.h"
63
+#include "en/mapping.h"
64
+#include "en/tc_ct.h"
65
+#include "en/mod_hdr.h"
66
+#include "lib/devcom.h"
67
+#include "lib/geneve.h"
68
+#include "lib/fs_chains.h"
69
+#include "diag/en_tc_tracepoint.h"
70
+#include <asm/div64.h>
5671
57
-struct mlx5_nic_flow_attr {
58
- u32 action;
59
- u32 flow_tag;
60
- u32 mod_hdr_id;
61
- u32 hairpin_tirn;
62
- u8 match_level;
63
- struct mlx5_flow_table *hairpin_ft;
64
-};
65
-
66
-#define MLX5E_TC_FLOW_BASE (MLX5E_TC_LAST_EXPORTED_BIT + 1)
72
+#define nic_chains(priv) ((priv)->fs.tc.chains)
73
+#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)
74
+#define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1)
6775
6876 enum {
69
- MLX5E_TC_FLOW_INGRESS = MLX5E_TC_INGRESS,
70
- MLX5E_TC_FLOW_EGRESS = MLX5E_TC_EGRESS,
71
- MLX5E_TC_FLOW_ESWITCH = BIT(MLX5E_TC_FLOW_BASE),
72
- MLX5E_TC_FLOW_NIC = BIT(MLX5E_TC_FLOW_BASE + 1),
73
- MLX5E_TC_FLOW_OFFLOADED = BIT(MLX5E_TC_FLOW_BASE + 2),
74
- MLX5E_TC_FLOW_HAIRPIN = BIT(MLX5E_TC_FLOW_BASE + 3),
75
- MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 4),
77
+ MLX5E_TC_FLOW_FLAG_INGRESS = MLX5E_TC_FLAG_INGRESS_BIT,
78
+ MLX5E_TC_FLOW_FLAG_EGRESS = MLX5E_TC_FLAG_EGRESS_BIT,
79
+ MLX5E_TC_FLOW_FLAG_ESWITCH = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT,
80
+ MLX5E_TC_FLOW_FLAG_FT = MLX5E_TC_FLAG_FT_OFFLOAD_BIT,
81
+ MLX5E_TC_FLOW_FLAG_NIC = MLX5E_TC_FLAG_NIC_OFFLOAD_BIT,
82
+ MLX5E_TC_FLOW_FLAG_OFFLOADED = MLX5E_TC_FLOW_BASE,
83
+ MLX5E_TC_FLOW_FLAG_HAIRPIN = MLX5E_TC_FLOW_BASE + 1,
84
+ MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS = MLX5E_TC_FLOW_BASE + 2,
85
+ MLX5E_TC_FLOW_FLAG_SLOW = MLX5E_TC_FLOW_BASE + 3,
86
+ MLX5E_TC_FLOW_FLAG_DUP = MLX5E_TC_FLOW_BASE + 4,
87
+ MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5,
88
+ MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6,
89
+ MLX5E_TC_FLOW_FLAG_CT = MLX5E_TC_FLOW_BASE + 7,
90
+ MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 8,
7691 };
7792
7893 #define MLX5E_TC_MAX_SPLITS 1
94
+
95
+/* Helper struct for accessing a struct containing list_head array.
96
+ * Containing struct
97
+ * |- Helper array
98
+ * [0] Helper item 0
99
+ * |- list_head item 0
100
+ * |- index (0)
101
+ * [1] Helper item 1
102
+ * |- list_head item 1
103
+ * |- index (1)
104
+ * To access the containing struct from one of the list_head items:
105
+ * 1. Get the helper item from the list_head item using
106
+ * helper item =
107
+ * container_of(list_head item, helper struct type, list_head field)
108
+ * 2. Get the contining struct from the helper item and its index in the array:
109
+ * containing struct =
110
+ * container_of(helper item, containing struct type, helper field[index])
111
+ */
112
+struct encap_flow_item {
113
+ struct mlx5e_encap_entry *e; /* attached encap instance */
114
+ struct list_head list;
115
+ int index;
116
+};
79117
80118 struct mlx5e_tc_flow {
81119 struct rhash_head node;
82120 struct mlx5e_priv *priv;
83121 u64 cookie;
84
- u8 flags;
122
+ unsigned long flags;
85123 struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1];
86
- struct list_head encap; /* flows sharing the same encap ID */
87
- struct list_head mod_hdr; /* flows sharing the same mod hdr ID */
124
+
125
+ /* flows sharing the same reformat object - currently mpls decap */
126
+ struct list_head l3_to_l2_reformat;
127
+ struct mlx5e_decap_entry *decap_reformat;
128
+
129
+ /* Flow can be associated with multiple encap IDs.
130
+ * The number of encaps is bounded by the number of supported
131
+ * destinations.
132
+ */
133
+ struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
134
+ struct mlx5e_tc_flow *peer_flow;
135
+ struct mlx5e_mod_hdr_handle *mh; /* attached mod header instance */
136
+ struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */
88137 struct list_head hairpin; /* flows sharing the same hairpin */
89
- union {
90
- struct mlx5_esw_flow_attr esw_attr[0];
91
- struct mlx5_nic_flow_attr nic_attr[0];
92
- };
138
+ struct list_head peer; /* flows with peer flow */
139
+ struct list_head unready; /* flows not ready to be offloaded (e.g due to missing route) */
140
+ struct net_device *orig_dev; /* netdev adding flow first */
141
+ int tmp_efi_index;
142
+ struct list_head tmp_list; /* temporary flow list used by neigh update */
143
+ refcount_t refcnt;
144
+ struct rcu_head rcu_head;
145
+ struct completion init_done;
146
+ int tunnel_id; /* the mapped tunnel id of this flow */
147
+ struct mlx5_flow_attr *attr;
93148 };
94149
95150 struct mlx5e_tc_flow_parse_attr {
96
- struct ip_tunnel_info tun_info;
151
+ const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS];
152
+ struct net_device *filter_dev;
97153 struct mlx5_flow_spec spec;
98
- int num_mod_hdr_actions;
99
- int max_mod_hdr_actions;
100
- void *mod_hdr_actions;
101
- int mirred_ifindex;
102
-};
103
-
104
-enum {
105
- MLX5_HEADER_TYPE_VXLAN = 0x0,
106
- MLX5_HEADER_TYPE_NVGRE = 0x1,
154
+ struct mlx5e_tc_mod_hdr_acts mod_hdr_acts;
155
+ int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS];
156
+ struct ethhdr eth;
107157 };
108158
109159 #define MLX5E_TC_TABLE_NUM_GROUPS 4
110
-#define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16)
160
+#define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18)
161
+
162
+struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
163
+ [CHAIN_TO_REG] = {
164
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
165
+ .moffset = 0,
166
+ .mlen = 2,
167
+ },
168
+ [TUNNEL_TO_REG] = {
169
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,
170
+ .moffset = 1,
171
+ .mlen = 3,
172
+ .soffset = MLX5_BYTE_OFF(fte_match_param,
173
+ misc_parameters_2.metadata_reg_c_1),
174
+ },
175
+ [ZONE_TO_REG] = zone_to_reg_ct,
176
+ [ZONE_RESTORE_TO_REG] = zone_restore_to_reg_ct,
177
+ [CTSTATE_TO_REG] = ctstate_to_reg_ct,
178
+ [MARK_TO_REG] = mark_to_reg_ct,
179
+ [LABELS_TO_REG] = labels_to_reg_ct,
180
+ [FTEID_TO_REG] = fteid_to_reg_ct,
181
+ /* For NIC rules we store the retore metadata directly
182
+ * into reg_b that is passed to SW since we don't
183
+ * jump between steering domains.
184
+ */
185
+ [NIC_CHAIN_TO_REG] = {
186
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B,
187
+ .moffset = 0,
188
+ .mlen = 2,
189
+ },
190
+ [NIC_ZONE_RESTORE_TO_REG] = nic_zone_restore_to_reg_ct,
191
+};
192
+
193
+static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow);
194
+
195
+void
196
+mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec,
197
+ enum mlx5e_tc_attr_to_reg type,
198
+ u32 data,
199
+ u32 mask)
200
+{
201
+ int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
202
+ int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
203
+ void *headers_c = spec->match_criteria;
204
+ void *headers_v = spec->match_value;
205
+ void *fmask, *fval;
206
+
207
+ fmask = headers_c + soffset;
208
+ fval = headers_v + soffset;
209
+
210
+ mask = (__force u32)(cpu_to_be32(mask)) >> (32 - (match_len * 8));
211
+ data = (__force u32)(cpu_to_be32(data)) >> (32 - (match_len * 8));
212
+
213
+ memcpy(fmask, &mask, match_len);
214
+ memcpy(fval, &data, match_len);
215
+
216
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
217
+}
218
+
219
+void
220
+mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec,
221
+ enum mlx5e_tc_attr_to_reg type,
222
+ u32 *data,
223
+ u32 *mask)
224
+{
225
+ int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
226
+ int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
227
+ void *headers_c = spec->match_criteria;
228
+ void *headers_v = spec->match_value;
229
+ void *fmask, *fval;
230
+
231
+ fmask = headers_c + soffset;
232
+ fval = headers_v + soffset;
233
+
234
+ memcpy(mask, fmask, match_len);
235
+ memcpy(data, fval, match_len);
236
+
237
+ *mask = be32_to_cpu((__force __be32)(*mask << (32 - (match_len * 8))));
238
+ *data = be32_to_cpu((__force __be32)(*data << (32 - (match_len * 8))));
239
+}
240
+
241
+int
242
+mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
243
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
244
+ enum mlx5_flow_namespace_type ns,
245
+ enum mlx5e_tc_attr_to_reg type,
246
+ u32 data)
247
+{
248
+ int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
249
+ int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
250
+ int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
251
+ char *modact;
252
+ int err;
253
+
254
+ err = alloc_mod_hdr_actions(mdev, ns, mod_hdr_acts);
255
+ if (err)
256
+ return err;
257
+
258
+ modact = mod_hdr_acts->actions +
259
+ (mod_hdr_acts->num_actions * MLX5_MH_ACT_SZ);
260
+
261
+ /* Firmware has 5bit length field and 0 means 32bits */
262
+ if (mlen == 4)
263
+ mlen = 0;
264
+
265
+ MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
266
+ MLX5_SET(set_action_in, modact, field, mfield);
267
+ MLX5_SET(set_action_in, modact, offset, moffset * 8);
268
+ MLX5_SET(set_action_in, modact, length, mlen * 8);
269
+ MLX5_SET(set_action_in, modact, data, data);
270
+ mod_hdr_acts->num_actions++;
271
+
272
+ return 0;
273
+}
274
+
275
+#define esw_offloads_mode(esw) (mlx5_eswitch_mode(esw) == MLX5_ESWITCH_OFFLOADS)
276
+
277
+static struct mlx5_tc_ct_priv *
278
+get_ct_priv(struct mlx5e_priv *priv)
279
+{
280
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
281
+ struct mlx5_rep_uplink_priv *uplink_priv;
282
+ struct mlx5e_rep_priv *uplink_rpriv;
283
+
284
+ if (esw_offloads_mode(esw)) {
285
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
286
+ uplink_priv = &uplink_rpriv->uplink_priv;
287
+
288
+ return uplink_priv->ct_priv;
289
+ }
290
+
291
+ return priv->fs.tc.ct;
292
+}
293
+
294
+struct mlx5_flow_handle *
295
+mlx5_tc_rule_insert(struct mlx5e_priv *priv,
296
+ struct mlx5_flow_spec *spec,
297
+ struct mlx5_flow_attr *attr)
298
+{
299
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
300
+
301
+ if (esw_offloads_mode(esw))
302
+ return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
303
+
304
+ return mlx5e_add_offloaded_nic_rule(priv, spec, attr);
305
+}
306
+
307
+void
308
+mlx5_tc_rule_delete(struct mlx5e_priv *priv,
309
+ struct mlx5_flow_handle *rule,
310
+ struct mlx5_flow_attr *attr)
311
+{
312
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
313
+
314
+ if (esw_offloads_mode(esw)) {
315
+ mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
316
+
317
+ return;
318
+ }
319
+
320
+ mlx5e_del_offloaded_nic_rule(priv, rule, attr);
321
+}
111322
112323 struct mlx5e_hairpin {
113324 struct mlx5_hairpin *pair;
....@@ -127,157 +338,180 @@
127338 /* a node of a hash table which keeps all the hairpin entries */
128339 struct hlist_node hairpin_hlist;
129340
341
+ /* protects flows list */
342
+ spinlock_t flows_lock;
130343 /* flows sharing the same hairpin */
131344 struct list_head flows;
345
+ /* hpe's that were not fully initialized when dead peer update event
346
+ * function traversed them.
347
+ */
348
+ struct list_head dead_peer_wait_list;
132349
133350 u16 peer_vhca_id;
134351 u8 prio;
135352 struct mlx5e_hairpin *hp;
353
+ refcount_t refcnt;
354
+ struct completion res_ready;
136355 };
137356
138
-struct mod_hdr_key {
139
- int num_actions;
140
- void *actions;
141
-};
357
+static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
358
+ struct mlx5e_tc_flow *flow);
142359
143
-struct mlx5e_mod_hdr_entry {
144
- /* a node of a hash table which keeps all the mod_hdr entries */
145
- struct hlist_node mod_hdr_hlist;
146
-
147
- /* flows sharing the same mod_hdr entry */
148
- struct list_head flows;
149
-
150
- struct mod_hdr_key key;
151
-
152
- u32 mod_hdr_id;
153
-};
154
-
155
-#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)
156
-
157
-static inline u32 hash_mod_hdr_info(struct mod_hdr_key *key)
360
+static struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow)
158361 {
159
- return jhash(key->actions,
160
- key->num_actions * MLX5_MH_ACT_SZ, 0);
362
+ if (!flow || !refcount_inc_not_zero(&flow->refcnt))
363
+ return ERR_PTR(-EINVAL);
364
+ return flow;
161365 }
162366
163
-static inline int cmp_mod_hdr_info(struct mod_hdr_key *a,
164
- struct mod_hdr_key *b)
367
+static void mlx5e_flow_put(struct mlx5e_priv *priv,
368
+ struct mlx5e_tc_flow *flow)
165369 {
166
- if (a->num_actions != b->num_actions)
167
- return 1;
370
+ if (refcount_dec_and_test(&flow->refcnt)) {
371
+ mlx5e_tc_del_flow(priv, flow);
372
+ kfree_rcu(flow, rcu_head);
373
+ }
374
+}
168375
169
- return memcmp(a->actions, b->actions, a->num_actions * MLX5_MH_ACT_SZ);
376
+static void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag)
377
+{
378
+ /* Complete all memory stores before setting bit. */
379
+ smp_mb__before_atomic();
380
+ set_bit(flag, &flow->flags);
381
+}
382
+
383
+#define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag)
384
+
385
+static bool __flow_flag_test_and_set(struct mlx5e_tc_flow *flow,
386
+ unsigned long flag)
387
+{
388
+ /* test_and_set_bit() provides all necessary barriers */
389
+ return test_and_set_bit(flag, &flow->flags);
390
+}
391
+
392
+#define flow_flag_test_and_set(flow, flag) \
393
+ __flow_flag_test_and_set(flow, \
394
+ MLX5E_TC_FLOW_FLAG_##flag)
395
+
396
+static void __flow_flag_clear(struct mlx5e_tc_flow *flow, unsigned long flag)
397
+{
398
+ /* Complete all memory stores before clearing bit. */
399
+ smp_mb__before_atomic();
400
+ clear_bit(flag, &flow->flags);
401
+}
402
+
403
+#define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \
404
+ MLX5E_TC_FLOW_FLAG_##flag)
405
+
406
+static bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag)
407
+{
408
+ bool ret = test_bit(flag, &flow->flags);
409
+
410
+ /* Read fields of flow structure only after checking flags. */
411
+ smp_mb__after_atomic();
412
+ return ret;
413
+}
414
+
415
+#define flow_flag_test(flow, flag) __flow_flag_test(flow, \
416
+ MLX5E_TC_FLOW_FLAG_##flag)
417
+
418
+bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow)
419
+{
420
+ return flow_flag_test(flow, ESWITCH);
421
+}
422
+
423
+static bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow)
424
+{
425
+ return flow_flag_test(flow, FT);
426
+}
427
+
428
+static bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow)
429
+{
430
+ return flow_flag_test(flow, OFFLOADED);
431
+}
432
+
433
+static int get_flow_name_space(struct mlx5e_tc_flow *flow)
434
+{
435
+ return mlx5e_is_eswitch_flow(flow) ?
436
+ MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL;
437
+}
438
+
439
+static struct mod_hdr_tbl *
440
+get_mod_hdr_table(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow)
441
+{
442
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
443
+
444
+ return get_flow_name_space(flow) == MLX5_FLOW_NAMESPACE_FDB ?
445
+ &esw->offloads.mod_hdr :
446
+ &priv->fs.tc.mod_hdr;
170447 }
171448
172449 static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
173450 struct mlx5e_tc_flow *flow,
174451 struct mlx5e_tc_flow_parse_attr *parse_attr)
175452 {
176
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
177
- int num_actions, actions_size, namespace, err;
178
- struct mlx5e_mod_hdr_entry *mh;
179
- struct mod_hdr_key key;
180
- bool found = false;
181
- u32 hash_key;
453
+ struct mlx5_modify_hdr *modify_hdr;
454
+ struct mlx5e_mod_hdr_handle *mh;
182455
183
- num_actions = parse_attr->num_mod_hdr_actions;
184
- actions_size = MLX5_MH_ACT_SZ * num_actions;
456
+ mh = mlx5e_mod_hdr_attach(priv->mdev, get_mod_hdr_table(priv, flow),
457
+ get_flow_name_space(flow),
458
+ &parse_attr->mod_hdr_acts);
459
+ if (IS_ERR(mh))
460
+ return PTR_ERR(mh);
185461
186
- key.actions = parse_attr->mod_hdr_actions;
187
- key.num_actions = num_actions;
188
-
189
- hash_key = hash_mod_hdr_info(&key);
190
-
191
- if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
192
- namespace = MLX5_FLOW_NAMESPACE_FDB;
193
- hash_for_each_possible(esw->offloads.mod_hdr_tbl, mh,
194
- mod_hdr_hlist, hash_key) {
195
- if (!cmp_mod_hdr_info(&mh->key, &key)) {
196
- found = true;
197
- break;
198
- }
199
- }
200
- } else {
201
- namespace = MLX5_FLOW_NAMESPACE_KERNEL;
202
- hash_for_each_possible(priv->fs.tc.mod_hdr_tbl, mh,
203
- mod_hdr_hlist, hash_key) {
204
- if (!cmp_mod_hdr_info(&mh->key, &key)) {
205
- found = true;
206
- break;
207
- }
208
- }
209
- }
210
-
211
- if (found)
212
- goto attach_flow;
213
-
214
- mh = kzalloc(sizeof(*mh) + actions_size, GFP_KERNEL);
215
- if (!mh)
216
- return -ENOMEM;
217
-
218
- mh->key.actions = (void *)mh + sizeof(*mh);
219
- memcpy(mh->key.actions, key.actions, actions_size);
220
- mh->key.num_actions = num_actions;
221
- INIT_LIST_HEAD(&mh->flows);
222
-
223
- err = mlx5_modify_header_alloc(priv->mdev, namespace,
224
- mh->key.num_actions,
225
- mh->key.actions,
226
- &mh->mod_hdr_id);
227
- if (err)
228
- goto out_err;
229
-
230
- if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
231
- hash_add(esw->offloads.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key);
232
- else
233
- hash_add(priv->fs.tc.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key);
234
-
235
-attach_flow:
236
- list_add(&flow->mod_hdr, &mh->flows);
237
- if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
238
- flow->esw_attr->mod_hdr_id = mh->mod_hdr_id;
239
- else
240
- flow->nic_attr->mod_hdr_id = mh->mod_hdr_id;
462
+ modify_hdr = mlx5e_mod_hdr_get(mh);
463
+ flow->attr->modify_hdr = modify_hdr;
464
+ flow->mh = mh;
241465
242466 return 0;
243
-
244
-out_err:
245
- kfree(mh);
246
- return err;
247467 }
248468
249469 static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv,
250470 struct mlx5e_tc_flow *flow)
251471 {
252
- struct list_head *next = flow->mod_hdr.next;
472
+ /* flow wasn't fully initialized */
473
+ if (!flow->mh)
474
+ return;
253475
254
- list_del(&flow->mod_hdr);
255
-
256
- if (list_empty(next)) {
257
- struct mlx5e_mod_hdr_entry *mh;
258
-
259
- mh = list_entry(next, struct mlx5e_mod_hdr_entry, flows);
260
-
261
- mlx5_modify_header_dealloc(priv->mdev, mh->mod_hdr_id);
262
- hash_del(&mh->mod_hdr_hlist);
263
- kfree(mh);
264
- }
476
+ mlx5e_mod_hdr_detach(priv->mdev, get_mod_hdr_table(priv, flow),
477
+ flow->mh);
478
+ flow->mh = NULL;
265479 }
266480
267481 static
268482 struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex)
269483 {
484
+ struct mlx5_core_dev *mdev;
270485 struct net_device *netdev;
271486 struct mlx5e_priv *priv;
272487
273
- netdev = __dev_get_by_index(net, ifindex);
488
+ netdev = dev_get_by_index(net, ifindex);
489
+ if (!netdev)
490
+ return ERR_PTR(-ENODEV);
491
+
274492 priv = netdev_priv(netdev);
275
- return priv->mdev;
493
+ mdev = priv->mdev;
494
+ dev_put(netdev);
495
+
496
+ /* Mirred tc action holds a refcount on the ifindex net_device (see
497
+ * net/sched/act_mirred.c:tcf_mirred_get_dev). So, it's okay to continue using mdev
498
+ * after dev_put(netdev), while we're in the context of adding a tc flow.
499
+ *
500
+ * The mdev pointer corresponds to the peer/out net_device of a hairpin. It is then
501
+ * stored in a hairpin object, which exists until all flows, that refer to it, get
502
+ * removed.
503
+ *
504
+ * On the other hand, after a hairpin object has been created, the peer net_device may
505
+ * be removed/unbound while there are still some hairpin flows that are using it. This
506
+ * case is handled by mlx5e_tc_hairpin_update_dead_peer, which is hooked to
507
+ * NETDEV_UNREGISTER event of the peer net_device.
508
+ */
509
+ return mdev;
276510 }
277511
278512 static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp)
279513 {
280
- u32 in[MLX5_ST_SZ_DW(create_tir_in)] = {0};
514
+ u32 in[MLX5_ST_SZ_DW(create_tir_in)] = {};
281515 void *tirc;
282516 int err;
283517
....@@ -291,7 +525,7 @@
291525 MLX5_SET(tirc, tirc, inline_rqn, hp->pair->rqn[0]);
292526 MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
293527
294
- err = mlx5_core_create_tir(hp->func_mdev, in, MLX5_ST_SZ_BYTES(create_tir_in), &hp->tirn);
528
+ err = mlx5_core_create_tir(hp->func_mdev, in, &hp->tirn);
295529 if (err)
296530 goto create_tir_err;
297531
....@@ -320,7 +554,7 @@
320554
321555 for (i = 0; i < sz; i++) {
322556 ix = i;
323
- if (priv->channels.params.rss_hfunc == ETH_RSS_HASH_XOR)
557
+ if (priv->rss_params.hfunc == ETH_RSS_HASH_XOR)
324558 ix = mlx5e_bits_invert(i, ilog2(sz));
325559 ix = indirection_rqt[ix];
326560 rqn = hp->pair->rqn[ix];
....@@ -364,16 +598,18 @@
364598 void *tirc;
365599
366600 for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
601
+ struct mlx5e_tirc_config ttconfig = mlx5e_tirc_get_default_config(tt);
602
+
367603 memset(in, 0, MLX5_ST_SZ_BYTES(create_tir_in));
368604 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
369605
370606 MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
371607 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
372608 MLX5_SET(tirc, tirc, indirect_table, hp->indir_rqt.rqtn);
373
- mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false);
609
+ mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, &ttconfig, tirc, false);
374610
375611 err = mlx5_core_create_tir(hp->func_mdev, in,
376
- MLX5_ST_SZ_BYTES(create_tir_in), &hp->indir_tirn[tt]);
612
+ &hp->indir_tirn[tt]);
377613 if (err) {
378614 mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err);
379615 goto err_destroy_tirs;
....@@ -408,7 +644,7 @@
408644 for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
409645 ttc_params->indir_tirn[tt] = hp->indir_tirn[tt];
410646
411
- ft_attr->max_fte = MLX5E_NUM_TT;
647
+ ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE;
412648 ft_attr->level = MLX5E_TC_TTC_FT_LEVEL;
413649 ft_attr->prio = MLX5E_TC_PRIO;
414650 }
....@@ -469,6 +705,10 @@
469705
470706 func_mdev = priv->mdev;
471707 peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
708
+ if (IS_ERR(peer_mdev)) {
709
+ err = PTR_ERR(peer_mdev);
710
+ goto create_pair_err;
711
+ }
472712
473713 pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params);
474714 if (IS_ERR(pair)) {
....@@ -523,17 +763,40 @@
523763
524764 hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe,
525765 hairpin_hlist, hash_key) {
526
- if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio)
766
+ if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) {
767
+ refcount_inc(&hpe->refcnt);
527768 return hpe;
769
+ }
528770 }
529771
530772 return NULL;
531773 }
532774
775
+static void mlx5e_hairpin_put(struct mlx5e_priv *priv,
776
+ struct mlx5e_hairpin_entry *hpe)
777
+{
778
+ /* no more hairpin flows for us, release the hairpin pair */
779
+ if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &priv->fs.tc.hairpin_tbl_lock))
780
+ return;
781
+ hash_del(&hpe->hairpin_hlist);
782
+ mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
783
+
784
+ if (!IS_ERR_OR_NULL(hpe->hp)) {
785
+ netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
786
+ dev_name(hpe->hp->pair->peer_mdev->device));
787
+
788
+ mlx5e_hairpin_destroy(hpe->hp);
789
+ }
790
+
791
+ WARN_ON(!list_empty(&hpe->flows));
792
+ kfree(hpe);
793
+}
794
+
533795 #define UNKNOWN_MATCH_PRIO 8
534796
535797 static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv,
536
- struct mlx5_flow_spec *spec, u8 *match_prio)
798
+ struct mlx5_flow_spec *spec, u8 *match_prio,
799
+ struct netlink_ext_ack *extack)
537800 {
538801 void *headers_c, *headers_v;
539802 u8 prio_val, prio_mask = 0;
....@@ -541,8 +804,8 @@
541804
542805 #ifdef CONFIG_MLX5_CORE_EN_DCB
543806 if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) {
544
- netdev_warn(priv->netdev,
545
- "only PCP trust state supported for hairpin\n");
807
+ NL_SET_ERR_MSG_MOD(extack,
808
+ "only PCP trust state supported for hairpin");
546809 return -EOPNOTSUPP;
547810 }
548811 #endif
....@@ -558,8 +821,8 @@
558821 if (!vlan_present || !prio_mask) {
559822 prio_val = UNKNOWN_MATCH_PRIO;
560823 } else if (prio_mask != 0x7) {
561
- netdev_warn(priv->netdev,
562
- "masked priority match not supported for hairpin\n");
824
+ NL_SET_ERR_MSG_MOD(extack,
825
+ "masked priority match not supported for hairpin");
563826 return -EOPNOTSUPP;
564827 }
565828
....@@ -569,9 +832,10 @@
569832
570833 static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
571834 struct mlx5e_tc_flow *flow,
572
- struct mlx5e_tc_flow_parse_attr *parse_attr)
835
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
836
+ struct netlink_ext_ack *extack)
573837 {
574
- int peer_ifindex = parse_attr->mirred_ifindex;
838
+ int peer_ifindex = parse_attr->mirred_ifindex[0];
575839 struct mlx5_hairpin_params params;
576840 struct mlx5_core_dev *peer_mdev;
577841 struct mlx5e_hairpin_entry *hpe;
....@@ -583,26 +847,52 @@
583847 int err;
584848
585849 peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
850
+ if (IS_ERR(peer_mdev)) {
851
+ NL_SET_ERR_MSG_MOD(extack, "invalid ifindex of mirred device");
852
+ return PTR_ERR(peer_mdev);
853
+ }
854
+
586855 if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) {
587
- netdev_warn(priv->netdev, "hairpin is not supported\n");
856
+ NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported");
588857 return -EOPNOTSUPP;
589858 }
590859
591860 peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
592
- err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio);
861
+ err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio,
862
+ extack);
593863 if (err)
594864 return err;
865
+
866
+ mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
595867 hpe = mlx5e_hairpin_get(priv, peer_id, match_prio);
596
- if (hpe)
868
+ if (hpe) {
869
+ mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
870
+ wait_for_completion(&hpe->res_ready);
871
+
872
+ if (IS_ERR(hpe->hp)) {
873
+ err = -EREMOTEIO;
874
+ goto out_err;
875
+ }
597876 goto attach_flow;
877
+ }
598878
599879 hpe = kzalloc(sizeof(*hpe), GFP_KERNEL);
600
- if (!hpe)
880
+ if (!hpe) {
881
+ mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
601882 return -ENOMEM;
883
+ }
602884
885
+ spin_lock_init(&hpe->flows_lock);
603886 INIT_LIST_HEAD(&hpe->flows);
887
+ INIT_LIST_HEAD(&hpe->dead_peer_wait_list);
604888 hpe->peer_vhca_id = peer_id;
605889 hpe->prio = match_prio;
890
+ refcount_set(&hpe->refcnt, 1);
891
+ init_completion(&hpe->res_ready);
892
+
893
+ hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist,
894
+ hash_hairpin_info(peer_id, match_prio));
895
+ mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
606896
607897 params.log_data_size = 15;
608898 params.log_data_size = min_t(u8, params.log_data_size,
....@@ -624,376 +914,795 @@
624914 params.num_channels = link_speed64;
625915
626916 hp = mlx5e_hairpin_create(priv, &params, peer_ifindex);
917
+ hpe->hp = hp;
918
+ complete_all(&hpe->res_ready);
627919 if (IS_ERR(hp)) {
628920 err = PTR_ERR(hp);
629
- goto create_hairpin_err;
921
+ goto out_err;
630922 }
631923
632924 netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n",
633
- hp->tirn, hp->pair->rqn[0], hp->pair->peer_mdev->priv.name,
925
+ hp->tirn, hp->pair->rqn[0],
926
+ dev_name(hp->pair->peer_mdev->device),
634927 hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets);
635
-
636
- hpe->hp = hp;
637
- hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist,
638
- hash_hairpin_info(peer_id, match_prio));
639928
640929 attach_flow:
641930 if (hpe->hp->num_channels > 1) {
642
- flow->flags |= MLX5E_TC_FLOW_HAIRPIN_RSS;
643
- flow->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t;
931
+ flow_flag_set(flow, HAIRPIN_RSS);
932
+ flow->attr->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t;
644933 } else {
645
- flow->nic_attr->hairpin_tirn = hpe->hp->tirn;
934
+ flow->attr->nic_attr->hairpin_tirn = hpe->hp->tirn;
646935 }
936
+
937
+ flow->hpe = hpe;
938
+ spin_lock(&hpe->flows_lock);
647939 list_add(&flow->hairpin, &hpe->flows);
940
+ spin_unlock(&hpe->flows_lock);
648941
649942 return 0;
650943
651
-create_hairpin_err:
652
- kfree(hpe);
944
+out_err:
945
+ mlx5e_hairpin_put(priv, hpe);
653946 return err;
654947 }
655948
656949 static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
657950 struct mlx5e_tc_flow *flow)
658951 {
659
- struct list_head *next = flow->hairpin.next;
952
+ /* flow wasn't fully initialized */
953
+ if (!flow->hpe)
954
+ return;
660955
956
+ spin_lock(&flow->hpe->flows_lock);
661957 list_del(&flow->hairpin);
958
+ spin_unlock(&flow->hpe->flows_lock);
662959
663
- /* no more hairpin flows for us, release the hairpin pair */
664
- if (list_empty(next)) {
665
- struct mlx5e_hairpin_entry *hpe;
666
-
667
- hpe = list_entry(next, struct mlx5e_hairpin_entry, flows);
668
-
669
- netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
670
- hpe->hp->pair->peer_mdev->priv.name);
671
-
672
- mlx5e_hairpin_destroy(hpe->hp);
673
- hash_del(&hpe->hairpin_hlist);
674
- kfree(hpe);
675
- }
960
+ mlx5e_hairpin_put(priv, flow->hpe);
961
+ flow->hpe = NULL;
676962 }
677963
678
-static struct mlx5_flow_handle *
679
-mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
680
- struct mlx5e_tc_flow_parse_attr *parse_attr,
681
- struct mlx5e_tc_flow *flow)
964
+struct mlx5_flow_handle *
965
+mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv,
966
+ struct mlx5_flow_spec *spec,
967
+ struct mlx5_flow_attr *attr)
682968 {
683
- struct mlx5_nic_flow_attr *attr = flow->nic_attr;
684
- struct mlx5_core_dev *dev = priv->mdev;
969
+ struct mlx5_flow_context *flow_context = &spec->flow_context;
970
+ struct mlx5_fs_chains *nic_chains = nic_chains(priv);
971
+ struct mlx5_nic_flow_attr *nic_attr = attr->nic_attr;
972
+ struct mlx5e_tc_table *tc = &priv->fs.tc;
685973 struct mlx5_flow_destination dest[2] = {};
686974 struct mlx5_flow_act flow_act = {
687975 .action = attr->action,
688
- .has_flow_tag = true,
689
- .flow_tag = attr->flow_tag,
690
- .encap_id = 0,
976
+ .flags = FLOW_ACT_NO_APPEND,
691977 };
692
- struct mlx5_fc *counter = NULL;
693978 struct mlx5_flow_handle *rule;
694
- bool table_created = false;
695
- int err, dest_ix = 0;
979
+ struct mlx5_flow_table *ft;
980
+ int dest_ix = 0;
696981
697
- if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) {
698
- err = mlx5e_hairpin_flow_add(priv, flow, parse_attr);
699
- if (err) {
700
- rule = ERR_PTR(err);
701
- goto err_add_hairpin_flow;
702
- }
703
- if (flow->flags & MLX5E_TC_FLOW_HAIRPIN_RSS) {
704
- dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
705
- dest[dest_ix].ft = attr->hairpin_ft;
706
- } else {
707
- dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
708
- dest[dest_ix].tir_num = attr->hairpin_tirn;
709
- }
982
+ flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
983
+ flow_context->flow_tag = nic_attr->flow_tag;
984
+
985
+ if (attr->dest_ft) {
986
+ dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
987
+ dest[dest_ix].ft = attr->dest_ft;
988
+ dest_ix++;
989
+ } else if (nic_attr->hairpin_ft) {
990
+ dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
991
+ dest[dest_ix].ft = nic_attr->hairpin_ft;
992
+ dest_ix++;
993
+ } else if (nic_attr->hairpin_tirn) {
994
+ dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
995
+ dest[dest_ix].tir_num = nic_attr->hairpin_tirn;
710996 dest_ix++;
711997 } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
712998 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
713
- dest[dest_ix].ft = priv->fs.vlan.ft.t;
999
+ if (attr->dest_chain) {
1000
+ dest[dest_ix].ft = mlx5_chains_get_table(nic_chains,
1001
+ attr->dest_chain, 1,
1002
+ MLX5E_TC_FT_LEVEL);
1003
+ if (IS_ERR(dest[dest_ix].ft))
1004
+ return ERR_CAST(dest[dest_ix].ft);
1005
+ } else {
1006
+ dest[dest_ix].ft = priv->fs.vlan.ft.t;
1007
+ }
7141008 dest_ix++;
1009
+ }
1010
+
1011
+ if (dest[0].type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
1012
+ MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level))
1013
+ flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
1014
+
1015
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1016
+ dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1017
+ dest[dest_ix].counter_id = mlx5_fc_id(attr->counter);
1018
+ dest_ix++;
1019
+ }
1020
+
1021
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
1022
+ flow_act.modify_hdr = attr->modify_hdr;
1023
+
1024
+ mutex_lock(&tc->t_lock);
1025
+ if (IS_ERR_OR_NULL(tc->t)) {
1026
+ /* Create the root table here if doesn't exist yet */
1027
+ tc->t =
1028
+ mlx5_chains_get_table(nic_chains, 0, 1, MLX5E_TC_FT_LEVEL);
1029
+
1030
+ if (IS_ERR(tc->t)) {
1031
+ mutex_unlock(&tc->t_lock);
1032
+ netdev_err(priv->netdev,
1033
+ "Failed to create tc offload table\n");
1034
+ rule = ERR_CAST(priv->fs.tc.t);
1035
+ goto err_ft_get;
1036
+ }
1037
+ }
1038
+ mutex_unlock(&tc->t_lock);
1039
+
1040
+ if (attr->chain || attr->prio)
1041
+ ft = mlx5_chains_get_table(nic_chains,
1042
+ attr->chain, attr->prio,
1043
+ MLX5E_TC_FT_LEVEL);
1044
+ else
1045
+ ft = attr->ft;
1046
+
1047
+ if (IS_ERR(ft)) {
1048
+ rule = ERR_CAST(ft);
1049
+ goto err_ft_get;
1050
+ }
1051
+
1052
+ if (attr->outer_match_level != MLX5_MATCH_NONE)
1053
+ spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
1054
+
1055
+ rule = mlx5_add_flow_rules(ft, spec,
1056
+ &flow_act, dest, dest_ix);
1057
+ if (IS_ERR(rule))
1058
+ goto err_rule;
1059
+
1060
+ return rule;
1061
+
1062
+err_rule:
1063
+ if (attr->chain || attr->prio)
1064
+ mlx5_chains_put_table(nic_chains,
1065
+ attr->chain, attr->prio,
1066
+ MLX5E_TC_FT_LEVEL);
1067
+err_ft_get:
1068
+ if (attr->dest_chain)
1069
+ mlx5_chains_put_table(nic_chains,
1070
+ attr->dest_chain, 1,
1071
+ MLX5E_TC_FT_LEVEL);
1072
+
1073
+ return ERR_CAST(rule);
1074
+}
1075
+
1076
+static int
1077
+mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
1078
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
1079
+ struct mlx5e_tc_flow *flow,
1080
+ struct netlink_ext_ack *extack)
1081
+{
1082
+ struct mlx5_flow_attr *attr = flow->attr;
1083
+ struct mlx5_core_dev *dev = priv->mdev;
1084
+ struct mlx5_fc *counter = NULL;
1085
+ int err;
1086
+
1087
+ if (flow_flag_test(flow, HAIRPIN)) {
1088
+ err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
1089
+ if (err)
1090
+ return err;
7151091 }
7161092
7171093 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
7181094 counter = mlx5_fc_create(dev, true);
719
- if (IS_ERR(counter)) {
720
- rule = ERR_CAST(counter);
721
- goto err_fc_create;
722
- }
723
- dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
724
- dest[dest_ix].counter = counter;
725
- dest_ix++;
1095
+ if (IS_ERR(counter))
1096
+ return PTR_ERR(counter);
1097
+
1098
+ attr->counter = counter;
7261099 }
7271100
7281101 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
7291102 err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
730
- flow_act.modify_id = attr->mod_hdr_id;
731
- kfree(parse_attr->mod_hdr_actions);
732
- if (err) {
733
- rule = ERR_PTR(err);
734
- goto err_create_mod_hdr_id;
735
- }
1103
+ dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
1104
+ if (err)
1105
+ return err;
7361106 }
7371107
738
- if (IS_ERR_OR_NULL(priv->fs.tc.t)) {
739
- int tc_grp_size, tc_tbl_size;
740
- u32 max_flow_counter;
1108
+ if (flow_flag_test(flow, CT))
1109
+ flow->rule[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv), flow, &parse_attr->spec,
1110
+ attr, &parse_attr->mod_hdr_acts);
1111
+ else
1112
+ flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec,
1113
+ attr);
7411114
742
- max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
743
- MLX5_CAP_GEN(dev, max_flow_counter_15_0);
1115
+ return PTR_ERR_OR_ZERO(flow->rule[0]);
1116
+}
7441117
745
- tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE);
1118
+void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv,
1119
+ struct mlx5_flow_handle *rule,
1120
+ struct mlx5_flow_attr *attr)
1121
+{
1122
+ struct mlx5_fs_chains *nic_chains = nic_chains(priv);
7461123
747
- tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS,
748
- BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size)));
1124
+ mlx5_del_flow_rules(rule);
7491125
750
- priv->fs.tc.t =
751
- mlx5_create_auto_grouped_flow_table(priv->fs.ns,
752
- MLX5E_TC_PRIO,
753
- tc_tbl_size,
754
- MLX5E_TC_TABLE_NUM_GROUPS,
755
- MLX5E_TC_FT_LEVEL, 0);
756
- if (IS_ERR(priv->fs.tc.t)) {
757
- netdev_err(priv->netdev,
758
- "Failed to create tc offload table\n");
759
- rule = ERR_CAST(priv->fs.tc.t);
760
- goto err_create_ft;
761
- }
1126
+ if (attr->chain || attr->prio)
1127
+ mlx5_chains_put_table(nic_chains, attr->chain, attr->prio,
1128
+ MLX5E_TC_FT_LEVEL);
7621129
763
- table_created = true;
764
- }
765
-
766
- if (attr->match_level != MLX5_MATCH_NONE)
767
- parse_attr->spec.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
768
-
769
- rule = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec,
770
- &flow_act, dest, dest_ix);
771
-
772
- if (IS_ERR(rule))
773
- goto err_add_rule;
774
-
775
- return rule;
776
-
777
-err_add_rule:
778
- if (table_created) {
779
- mlx5_destroy_flow_table(priv->fs.tc.t);
780
- priv->fs.tc.t = NULL;
781
- }
782
-err_create_ft:
783
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
784
- mlx5e_detach_mod_hdr(priv, flow);
785
-err_create_mod_hdr_id:
786
- mlx5_fc_destroy(dev, counter);
787
-err_fc_create:
788
- if (flow->flags & MLX5E_TC_FLOW_HAIRPIN)
789
- mlx5e_hairpin_flow_del(priv, flow);
790
-err_add_hairpin_flow:
791
- return rule;
1130
+ if (attr->dest_chain)
1131
+ mlx5_chains_put_table(nic_chains, attr->dest_chain, 1,
1132
+ MLX5E_TC_FT_LEVEL);
7921133 }
7931134
7941135 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
7951136 struct mlx5e_tc_flow *flow)
7961137 {
797
- struct mlx5_nic_flow_attr *attr = flow->nic_attr;
798
- struct mlx5_fc *counter = NULL;
1138
+ struct mlx5_flow_attr *attr = flow->attr;
1139
+ struct mlx5e_tc_table *tc = &priv->fs.tc;
7991140
800
- counter = mlx5_flow_rule_counter(flow->rule[0]);
801
- mlx5_del_flow_rules(flow->rule[0]);
802
- mlx5_fc_destroy(priv->mdev, counter);
1141
+ flow_flag_clear(flow, OFFLOADED);
8031142
804
- if (!mlx5e_tc_num_filters(priv) && priv->fs.tc.t) {
805
- mlx5_destroy_flow_table(priv->fs.tc.t);
1143
+ if (flow_flag_test(flow, CT))
1144
+ mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
1145
+ else if (!IS_ERR_OR_NULL(flow->rule[0]))
1146
+ mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr);
1147
+
1148
+ /* Remove root table if no rules are left to avoid
1149
+ * extra steering hops.
1150
+ */
1151
+ mutex_lock(&priv->fs.tc.t_lock);
1152
+ if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) &&
1153
+ !IS_ERR_OR_NULL(tc->t)) {
1154
+ mlx5_chains_put_table(nic_chains(priv), 0, 1, MLX5E_TC_FT_LEVEL);
8061155 priv->fs.tc.t = NULL;
8071156 }
1157
+ mutex_unlock(&priv->fs.tc.t_lock);
1158
+
1159
+ kvfree(attr->parse_attr);
8081160
8091161 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
8101162 mlx5e_detach_mod_hdr(priv, flow);
8111163
812
- if (flow->flags & MLX5E_TC_FLOW_HAIRPIN)
1164
+ mlx5_fc_destroy(priv->mdev, attr->counter);
1165
+
1166
+ if (flow_flag_test(flow, HAIRPIN))
8131167 mlx5e_hairpin_flow_del(priv, flow);
1168
+
1169
+ kfree(flow->attr);
8141170 }
8151171
8161172 static void mlx5e_detach_encap(struct mlx5e_priv *priv,
817
- struct mlx5e_tc_flow *flow);
1173
+ struct mlx5e_tc_flow *flow, int out_index);
8181174
8191175 static int mlx5e_attach_encap(struct mlx5e_priv *priv,
820
- struct ip_tunnel_info *tun_info,
1176
+ struct mlx5e_tc_flow *flow,
8211177 struct net_device *mirred_dev,
1178
+ int out_index,
1179
+ struct netlink_ext_ack *extack,
8221180 struct net_device **encap_dev,
823
- struct mlx5e_tc_flow *flow);
1181
+ bool *encap_valid);
1182
+static int mlx5e_attach_decap(struct mlx5e_priv *priv,
1183
+ struct mlx5e_tc_flow *flow,
1184
+ struct netlink_ext_ack *extack);
1185
+static void mlx5e_detach_decap(struct mlx5e_priv *priv,
1186
+ struct mlx5e_tc_flow *flow);
8241187
8251188 static struct mlx5_flow_handle *
1189
+mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
1190
+ struct mlx5e_tc_flow *flow,
1191
+ struct mlx5_flow_spec *spec,
1192
+ struct mlx5_flow_attr *attr)
1193
+{
1194
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
1195
+ struct mlx5_flow_handle *rule;
1196
+
1197
+ if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)
1198
+ return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
1199
+
1200
+ if (flow_flag_test(flow, CT)) {
1201
+ mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
1202
+
1203
+ return mlx5_tc_ct_flow_offload(get_ct_priv(flow->priv),
1204
+ flow, spec, attr,
1205
+ mod_hdr_acts);
1206
+ }
1207
+
1208
+ rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
1209
+ if (IS_ERR(rule))
1210
+ return rule;
1211
+
1212
+ if (attr->esw_attr->split_count) {
1213
+ flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
1214
+ if (IS_ERR(flow->rule[1])) {
1215
+ mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
1216
+ return flow->rule[1];
1217
+ }
1218
+ }
1219
+
1220
+ return rule;
1221
+}
1222
+
1223
+static void
1224
+mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
1225
+ struct mlx5e_tc_flow *flow,
1226
+ struct mlx5_flow_attr *attr)
1227
+{
1228
+ flow_flag_clear(flow, OFFLOADED);
1229
+
1230
+ if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)
1231
+ goto offload_rule_0;
1232
+
1233
+ if (flow_flag_test(flow, CT)) {
1234
+ mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
1235
+ return;
1236
+ }
1237
+
1238
+ if (attr->esw_attr->split_count)
1239
+ mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
1240
+
1241
+offload_rule_0:
1242
+ mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
1243
+}
1244
+
1245
+static struct mlx5_flow_handle *
1246
+mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
1247
+ struct mlx5e_tc_flow *flow,
1248
+ struct mlx5_flow_spec *spec)
1249
+{
1250
+ struct mlx5_flow_attr *slow_attr;
1251
+ struct mlx5_flow_handle *rule;
1252
+
1253
+ slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
1254
+ if (!slow_attr)
1255
+ return ERR_PTR(-ENOMEM);
1256
+
1257
+ memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
1258
+ slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1259
+ slow_attr->esw_attr->split_count = 0;
1260
+ slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
1261
+
1262
+ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
1263
+ if (!IS_ERR(rule))
1264
+ flow_flag_set(flow, SLOW);
1265
+
1266
+ kfree(slow_attr);
1267
+
1268
+ return rule;
1269
+}
1270
+
1271
+static void
1272
+mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
1273
+ struct mlx5e_tc_flow *flow)
1274
+{
1275
+ struct mlx5_flow_attr *slow_attr;
1276
+
1277
+ slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
1278
+ if (!slow_attr) {
1279
+ mlx5_core_warn(flow->priv->mdev, "Unable to alloc attr to unoffload slow path rule\n");
1280
+ return;
1281
+ }
1282
+
1283
+ memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
1284
+ slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1285
+ slow_attr->esw_attr->split_count = 0;
1286
+ slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
1287
+ mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
1288
+ flow_flag_clear(flow, SLOW);
1289
+ kfree(slow_attr);
1290
+}
1291
+
1292
+/* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1293
+ * function.
1294
+ */
1295
+static void unready_flow_add(struct mlx5e_tc_flow *flow,
1296
+ struct list_head *unready_flows)
1297
+{
1298
+ flow_flag_set(flow, NOT_READY);
1299
+ list_add_tail(&flow->unready, unready_flows);
1300
+}
1301
+
1302
+/* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1303
+ * function.
1304
+ */
1305
+static void unready_flow_del(struct mlx5e_tc_flow *flow)
1306
+{
1307
+ list_del(&flow->unready);
1308
+ flow_flag_clear(flow, NOT_READY);
1309
+}
1310
+
1311
+static void add_unready_flow(struct mlx5e_tc_flow *flow)
1312
+{
1313
+ struct mlx5_rep_uplink_priv *uplink_priv;
1314
+ struct mlx5e_rep_priv *rpriv;
1315
+ struct mlx5_eswitch *esw;
1316
+
1317
+ esw = flow->priv->mdev->priv.eswitch;
1318
+ rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1319
+ uplink_priv = &rpriv->uplink_priv;
1320
+
1321
+ mutex_lock(&uplink_priv->unready_flows_lock);
1322
+ unready_flow_add(flow, &uplink_priv->unready_flows);
1323
+ mutex_unlock(&uplink_priv->unready_flows_lock);
1324
+}
1325
+
1326
+static void remove_unready_flow(struct mlx5e_tc_flow *flow)
1327
+{
1328
+ struct mlx5_rep_uplink_priv *uplink_priv;
1329
+ struct mlx5e_rep_priv *rpriv;
1330
+ struct mlx5_eswitch *esw;
1331
+
1332
+ esw = flow->priv->mdev->priv.eswitch;
1333
+ rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1334
+ uplink_priv = &rpriv->uplink_priv;
1335
+
1336
+ mutex_lock(&uplink_priv->unready_flows_lock);
1337
+ if (flow_flag_test(flow, NOT_READY))
1338
+ unready_flow_del(flow);
1339
+ mutex_unlock(&uplink_priv->unready_flows_lock);
1340
+}
1341
+
1342
+static int
8261343 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
827
- struct mlx5e_tc_flow_parse_attr *parse_attr,
828
- struct mlx5e_tc_flow *flow)
1344
+ struct mlx5e_tc_flow *flow,
1345
+ struct netlink_ext_ack *extack)
8291346 {
8301347 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
831
- struct mlx5_esw_flow_attr *attr = flow->esw_attr;
832
- struct net_device *out_dev, *encap_dev = NULL;
833
- struct mlx5_flow_handle *rule = NULL;
1348
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
1349
+ struct mlx5_flow_attr *attr = flow->attr;
1350
+ struct net_device *encap_dev = NULL;
1351
+ struct mlx5_esw_flow_attr *esw_attr;
1352
+ struct mlx5_fc *counter = NULL;
8341353 struct mlx5e_rep_priv *rpriv;
8351354 struct mlx5e_priv *out_priv;
836
- int err;
1355
+ bool encap_valid = true;
1356
+ u32 max_prio, max_chain;
1357
+ int err = 0;
1358
+ int out_index;
8371359
838
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) {
839
- out_dev = __dev_get_by_index(dev_net(priv->netdev),
840
- attr->parse_attr->mirred_ifindex);
841
- err = mlx5e_attach_encap(priv, &parse_attr->tun_info,
842
- out_dev, &encap_dev, flow);
843
- if (err) {
844
- rule = ERR_PTR(err);
845
- if (err != -EAGAIN)
846
- goto err_attach_encap;
1360
+ if (!mlx5_chains_prios_supported(esw_chains(esw)) && attr->prio != 1) {
1361
+ NL_SET_ERR_MSG_MOD(extack,
1362
+ "E-switch priorities unsupported, upgrade FW");
1363
+ return -EOPNOTSUPP;
1364
+ }
1365
+
1366
+ /* We check chain range only for tc flows.
1367
+ * For ft flows, we checked attr->chain was originally 0 and set it to
1368
+ * FDB_FT_CHAIN which is outside tc range.
1369
+ * See mlx5e_rep_setup_ft_cb().
1370
+ */
1371
+ max_chain = mlx5_chains_get_chain_range(esw_chains(esw));
1372
+ if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) {
1373
+ NL_SET_ERR_MSG_MOD(extack,
1374
+ "Requested chain is out of supported range");
1375
+ return -EOPNOTSUPP;
1376
+ }
1377
+
1378
+ max_prio = mlx5_chains_get_prio_range(esw_chains(esw));
1379
+ if (attr->prio > max_prio) {
1380
+ NL_SET_ERR_MSG_MOD(extack,
1381
+ "Requested priority is out of supported range");
1382
+ return -EOPNOTSUPP;
1383
+ }
1384
+
1385
+ if (flow_flag_test(flow, L3_TO_L2_DECAP)) {
1386
+ err = mlx5e_attach_decap(priv, flow, extack);
1387
+ if (err)
1388
+ return err;
1389
+ }
1390
+
1391
+ parse_attr = attr->parse_attr;
1392
+ esw_attr = attr->esw_attr;
1393
+
1394
+ for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1395
+ struct net_device *out_dev;
1396
+ int mirred_ifindex;
1397
+
1398
+ if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1399
+ continue;
1400
+
1401
+ mirred_ifindex = parse_attr->mirred_ifindex[out_index];
1402
+ out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex);
1403
+ if (!out_dev) {
1404
+ NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found");
1405
+ err = -ENODEV;
1406
+ return err;
8471407 }
1408
+ err = mlx5e_attach_encap(priv, flow, out_dev, out_index,
1409
+ extack, &encap_dev, &encap_valid);
1410
+ dev_put(out_dev);
1411
+ if (err)
1412
+ return err;
1413
+
8481414 out_priv = netdev_priv(encap_dev);
8491415 rpriv = out_priv->ppriv;
850
- attr->out_rep[attr->out_count] = rpriv->rep;
851
- attr->out_mdev[attr->out_count++] = out_priv->mdev;
1416
+ esw_attr->dests[out_index].rep = rpriv->rep;
1417
+ esw_attr->dests[out_index].mdev = out_priv->mdev;
8521418 }
8531419
8541420 err = mlx5_eswitch_add_vlan_action(esw, attr);
855
- if (err) {
856
- rule = ERR_PTR(err);
857
- goto err_add_vlan;
858
- }
1421
+ if (err)
1422
+ return err;
8591423
860
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1424
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
1425
+ !(attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR)) {
8611426 err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
862
- kfree(parse_attr->mod_hdr_actions);
863
- if (err) {
864
- rule = ERR_PTR(err);
865
- goto err_mod_hdr;
866
- }
1427
+ dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
1428
+ if (err)
1429
+ return err;
8671430 }
8681431
869
- /* we get here if (1) there's no error (rule being null) or when
870
- * (2) there's an encap action and we're on -EAGAIN (no valid neigh)
1432
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1433
+ counter = mlx5_fc_create(esw_attr->counter_dev, true);
1434
+ if (IS_ERR(counter))
1435
+ return PTR_ERR(counter);
1436
+
1437
+ attr->counter = counter;
1438
+ }
1439
+
1440
+ /* we get here if one of the following takes place:
1441
+ * (1) there's no error
1442
+ * (2) there's an encap action and we don't have valid neigh
8711443 */
872
- if (rule != ERR_PTR(-EAGAIN)) {
873
- rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr);
874
- if (IS_ERR(rule))
875
- goto err_add_rule;
1444
+ if (!encap_valid)
1445
+ flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec);
1446
+ else
1447
+ flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr);
8761448
877
- if (attr->mirror_count) {
878
- flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, &parse_attr->spec, attr);
879
- if (IS_ERR(flow->rule[1]))
880
- goto err_fwd_rule;
881
- }
882
- }
883
- return rule;
1449
+ if (IS_ERR(flow->rule[0]))
1450
+ return PTR_ERR(flow->rule[0]);
1451
+ else
1452
+ flow_flag_set(flow, OFFLOADED);
8841453
885
-err_fwd_rule:
886
- mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
887
- rule = flow->rule[1];
888
-err_add_rule:
889
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
890
- mlx5e_detach_mod_hdr(priv, flow);
891
-err_mod_hdr:
892
- mlx5_eswitch_del_vlan_action(esw, attr);
893
-err_add_vlan:
894
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
895
- mlx5e_detach_encap(priv, flow);
896
-err_attach_encap:
897
- return rule;
1454
+ return 0;
1455
+}
1456
+
1457
+static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow)
1458
+{
1459
+ struct mlx5_flow_spec *spec = &flow->attr->parse_attr->spec;
1460
+ void *headers_v = MLX5_ADDR_OF(fte_match_param,
1461
+ spec->match_value,
1462
+ misc_parameters_3);
1463
+ u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3,
1464
+ headers_v,
1465
+ geneve_tlv_option_0_data);
1466
+
1467
+ return !!geneve_tlv_opt_0_data;
8981468 }
8991469
9001470 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
9011471 struct mlx5e_tc_flow *flow)
9021472 {
9031473 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
904
- struct mlx5_esw_flow_attr *attr = flow->esw_attr;
1474
+ struct mlx5_flow_attr *attr = flow->attr;
1475
+ int out_index;
9051476
906
- if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
907
- flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED;
908
- if (attr->mirror_count)
909
- mlx5_eswitch_del_offloaded_rule(esw, flow->rule[1], attr);
910
- mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
1477
+ mlx5e_put_flow_tunnel_id(flow);
1478
+
1479
+ remove_unready_flow(flow);
1480
+
1481
+ if (mlx5e_is_offloaded_flow(flow)) {
1482
+ if (flow_flag_test(flow, SLOW))
1483
+ mlx5e_tc_unoffload_from_slow_path(esw, flow);
1484
+ else
1485
+ mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
9111486 }
1487
+
1488
+ if (mlx5_flow_has_geneve_opt(flow))
1489
+ mlx5_geneve_tlv_option_del(priv->mdev->geneve);
9121490
9131491 mlx5_eswitch_del_vlan_action(esw, attr);
9141492
915
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) {
916
- mlx5e_detach_encap(priv, flow);
917
- kvfree(attr->parse_attr);
918
- }
1493
+ for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
1494
+ if (attr->esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) {
1495
+ mlx5e_detach_encap(priv, flow, out_index);
1496
+ kfree(attr->parse_attr->tun_info[out_index]);
1497
+ }
1498
+ kvfree(attr->parse_attr);
1499
+
1500
+ mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr);
9191501
9201502 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
9211503 mlx5e_detach_mod_hdr(priv, flow);
1504
+
1505
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
1506
+ mlx5_fc_destroy(attr->esw_attr->counter_dev, attr->counter);
1507
+
1508
+ if (flow_flag_test(flow, L3_TO_L2_DECAP))
1509
+ mlx5e_detach_decap(priv, flow);
1510
+
1511
+ kfree(flow->attr);
9221512 }
9231513
9241514 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
925
- struct mlx5e_encap_entry *e)
1515
+ struct mlx5e_encap_entry *e,
1516
+ struct list_head *flow_list)
9261517 {
9271518 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
9281519 struct mlx5_esw_flow_attr *esw_attr;
1520
+ struct mlx5_flow_handle *rule;
1521
+ struct mlx5_flow_attr *attr;
1522
+ struct mlx5_flow_spec *spec;
9291523 struct mlx5e_tc_flow *flow;
9301524 int err;
9311525
932
- err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
933
- e->encap_size, e->encap_header,
934
- &e->encap_id);
935
- if (err) {
936
- mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %d\n",
937
- err);
1526
+ e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
1527
+ e->reformat_type,
1528
+ e->encap_size, e->encap_header,
1529
+ MLX5_FLOW_NAMESPACE_FDB);
1530
+ if (IS_ERR(e->pkt_reformat)) {
1531
+ mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
1532
+ PTR_ERR(e->pkt_reformat));
9381533 return;
9391534 }
9401535 e->flags |= MLX5_ENCAP_ENTRY_VALID;
9411536 mlx5e_rep_queue_neigh_stats_work(priv);
9421537
943
- list_for_each_entry(flow, &e->flows, encap) {
944
- esw_attr = flow->esw_attr;
945
- esw_attr->encap_id = e->encap_id;
946
- flow->rule[0] = mlx5_eswitch_add_offloaded_rule(esw, &esw_attr->parse_attr->spec, esw_attr);
947
- if (IS_ERR(flow->rule[0])) {
948
- err = PTR_ERR(flow->rule[0]);
1538
+ list_for_each_entry(flow, flow_list, tmp_list) {
1539
+ bool all_flow_encaps_valid = true;
1540
+ int i;
1541
+
1542
+ if (!mlx5e_is_offloaded_flow(flow))
1543
+ continue;
1544
+ attr = flow->attr;
1545
+ esw_attr = attr->esw_attr;
1546
+ spec = &attr->parse_attr->spec;
1547
+
1548
+ esw_attr->dests[flow->tmp_efi_index].pkt_reformat = e->pkt_reformat;
1549
+ esw_attr->dests[flow->tmp_efi_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
1550
+ /* Flow can be associated with multiple encap entries.
1551
+ * Before offloading the flow verify that all of them have
1552
+ * a valid neighbour.
1553
+ */
1554
+ for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
1555
+ if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
1556
+ continue;
1557
+ if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
1558
+ all_flow_encaps_valid = false;
1559
+ break;
1560
+ }
1561
+ }
1562
+ /* Do not offload flows with unresolved neighbors */
1563
+ if (!all_flow_encaps_valid)
1564
+ continue;
1565
+ /* update from slow path rule to encap rule */
1566
+ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1567
+ if (IS_ERR(rule)) {
1568
+ err = PTR_ERR(rule);
9491569 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
9501570 err);
9511571 continue;
9521572 }
9531573
954
- if (esw_attr->mirror_count) {
955
- flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, &esw_attr->parse_attr->spec, esw_attr);
956
- if (IS_ERR(flow->rule[1])) {
957
- mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], esw_attr);
958
- err = PTR_ERR(flow->rule[1]);
959
- mlx5_core_warn(priv->mdev, "Failed to update cached mirror flow, %d\n",
960
- err);
961
- continue;
962
- }
963
- }
964
-
965
- flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
1574
+ mlx5e_tc_unoffload_from_slow_path(esw, flow);
1575
+ flow->rule[0] = rule;
1576
+ /* was unset when slow path rule removed */
1577
+ flow_flag_set(flow, OFFLOADED);
9661578 }
9671579 }
9681580
9691581 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
970
- struct mlx5e_encap_entry *e)
1582
+ struct mlx5e_encap_entry *e,
1583
+ struct list_head *flow_list)
9711584 {
9721585 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1586
+ struct mlx5_esw_flow_attr *esw_attr;
1587
+ struct mlx5_flow_handle *rule;
1588
+ struct mlx5_flow_attr *attr;
1589
+ struct mlx5_flow_spec *spec;
1590
+ struct mlx5e_tc_flow *flow;
1591
+ int err;
1592
+
1593
+ list_for_each_entry(flow, flow_list, tmp_list) {
1594
+ if (!mlx5e_is_offloaded_flow(flow))
1595
+ continue;
1596
+ attr = flow->attr;
1597
+ esw_attr = attr->esw_attr;
1598
+ spec = &attr->parse_attr->spec;
1599
+
1600
+ /* update from encap rule to slow path rule */
1601
+ rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
1602
+ /* mark the flow's encap dest as non-valid */
1603
+ esw_attr->dests[flow->tmp_efi_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
1604
+
1605
+ if (IS_ERR(rule)) {
1606
+ err = PTR_ERR(rule);
1607
+ mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
1608
+ err);
1609
+ continue;
1610
+ }
1611
+
1612
+ mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
1613
+ flow->rule[0] = rule;
1614
+ /* was unset when fast path rule removed */
1615
+ flow_flag_set(flow, OFFLOADED);
1616
+ }
1617
+
1618
+ /* we know that the encap is valid */
1619
+ e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
1620
+ mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1621
+}
1622
+
1623
+static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
1624
+{
1625
+ return flow->attr->counter;
1626
+}
1627
+
1628
+/* Takes reference to all flows attached to encap and adds the flows to
1629
+ * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
1630
+ */
1631
+void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
1632
+{
1633
+ struct encap_flow_item *efi;
9731634 struct mlx5e_tc_flow *flow;
9741635
975
- list_for_each_entry(flow, &e->flows, encap) {
976
- if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
977
- struct mlx5_esw_flow_attr *attr = flow->esw_attr;
1636
+ list_for_each_entry(efi, &e->flows, list) {
1637
+ flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
1638
+ if (IS_ERR(mlx5e_flow_get(flow)))
1639
+ continue;
1640
+ wait_for_completion(&flow->init_done);
9781641
979
- flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED;
980
- if (attr->mirror_count)
981
- mlx5_eswitch_del_offloaded_rule(esw, flow->rule[1], attr);
982
- mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
983
- }
1642
+ flow->tmp_efi_index = efi->index;
1643
+ list_add(&flow->tmp_list, flow_list);
1644
+ }
1645
+}
1646
+
1647
+/* Iterate over tmp_list of flows attached to flow_list head. */
1648
+void mlx5e_put_encap_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list)
1649
+{
1650
+ struct mlx5e_tc_flow *flow, *tmp;
1651
+
1652
+ list_for_each_entry_safe(flow, tmp, flow_list, tmp_list)
1653
+ mlx5e_flow_put(priv, flow);
1654
+}
1655
+
1656
+static struct mlx5e_encap_entry *
1657
+mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
1658
+ struct mlx5e_encap_entry *e)
1659
+{
1660
+ struct mlx5e_encap_entry *next = NULL;
1661
+
1662
+retry:
1663
+ rcu_read_lock();
1664
+
1665
+ /* find encap with non-zero reference counter value */
1666
+ for (next = e ?
1667
+ list_next_or_null_rcu(&nhe->encap_list,
1668
+ &e->encap_list,
1669
+ struct mlx5e_encap_entry,
1670
+ encap_list) :
1671
+ list_first_or_null_rcu(&nhe->encap_list,
1672
+ struct mlx5e_encap_entry,
1673
+ encap_list);
1674
+ next;
1675
+ next = list_next_or_null_rcu(&nhe->encap_list,
1676
+ &next->encap_list,
1677
+ struct mlx5e_encap_entry,
1678
+ encap_list))
1679
+ if (mlx5e_encap_take(next))
1680
+ break;
1681
+
1682
+ rcu_read_unlock();
1683
+
1684
+ /* release starting encap */
1685
+ if (e)
1686
+ mlx5e_encap_put(netdev_priv(e->out_dev), e);
1687
+ if (!next)
1688
+ return next;
1689
+
1690
+ /* wait for encap to be fully initialized */
1691
+ wait_for_completion(&next->res_ready);
1692
+ /* continue searching if encap entry is not in valid state after completion */
1693
+ if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) {
1694
+ e = next;
1695
+ goto retry;
9841696 }
9851697
986
- if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
987
- e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
988
- mlx5_encap_dealloc(priv->mdev, e->encap_id);
989
- }
1698
+ return next;
9901699 }
9911700
9921701 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
9931702 {
9941703 struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
1704
+ struct mlx5e_encap_entry *e = NULL;
9951705 struct mlx5e_tc_flow *flow;
996
- struct mlx5e_encap_entry *e;
9971706 struct mlx5_fc *counter;
9981707 struct neigh_table *tbl;
9991708 bool neigh_used = false;
....@@ -1004,17 +1713,31 @@
10041713 tbl = &arp_tbl;
10051714 #if IS_ENABLED(CONFIG_IPV6)
10061715 else if (m_neigh->family == AF_INET6)
1007
- tbl = &nd_tbl;
1716
+ tbl = ipv6_stub->nd_tbl;
10081717 #endif
10091718 else
10101719 return;
10111720
1012
- list_for_each_entry(e, &nhe->encap_list, encap_list) {
1013
- if (!(e->flags & MLX5_ENCAP_ENTRY_VALID))
1014
- continue;
1015
- list_for_each_entry(flow, &e->flows, encap) {
1016
- if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
1017
- counter = mlx5_flow_rule_counter(flow->rule[0]);
1721
+ /* mlx5e_get_next_valid_encap() releases previous encap before returning
1722
+ * next one.
1723
+ */
1724
+ while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
1725
+ struct mlx5e_priv *priv = netdev_priv(e->out_dev);
1726
+ struct encap_flow_item *efi, *tmp;
1727
+ struct mlx5_eswitch *esw;
1728
+ LIST_HEAD(flow_list);
1729
+
1730
+ esw = priv->mdev->priv.eswitch;
1731
+ mutex_lock(&esw->offloads.encap_tbl_lock);
1732
+ list_for_each_entry_safe(efi, tmp, &e->flows, list) {
1733
+ flow = container_of(efi, struct mlx5e_tc_flow,
1734
+ encaps[efi->index]);
1735
+ if (IS_ERR(mlx5e_flow_get(flow)))
1736
+ continue;
1737
+ list_add(&flow->tmp_list, &flow_list);
1738
+
1739
+ if (mlx5e_is_offloaded_flow(flow)) {
1740
+ counter = mlx5e_tc_get_counter(flow);
10181741 lastuse = mlx5_fc_query_lastuse(counter);
10191742 if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
10201743 neigh_used = true;
....@@ -1022,9 +1745,17 @@
10221745 }
10231746 }
10241747 }
1025
- if (neigh_used)
1748
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
1749
+
1750
+ mlx5e_put_encap_flow_list(priv, &flow_list);
1751
+ if (neigh_used) {
1752
+ /* release current encap before breaking the loop */
1753
+ mlx5e_encap_put(priv, e);
10261754 break;
1755
+ }
10271756 }
1757
+
1758
+ trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
10281759
10291760 if (neigh_used) {
10301761 nhe->reported_lastuse = jiffies;
....@@ -1041,213 +1772,512 @@
10411772 }
10421773 }
10431774
1044
-static void mlx5e_detach_encap(struct mlx5e_priv *priv,
1045
- struct mlx5e_tc_flow *flow)
1775
+static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
10461776 {
1047
- struct list_head *next = flow->encap.next;
1777
+ WARN_ON(!list_empty(&e->flows));
10481778
1049
- list_del(&flow->encap);
1050
- if (list_empty(next)) {
1051
- struct mlx5e_encap_entry *e;
1052
-
1053
- e = list_entry(next, struct mlx5e_encap_entry, flows);
1779
+ if (e->compl_result > 0) {
10541780 mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
10551781
10561782 if (e->flags & MLX5_ENCAP_ENTRY_VALID)
1057
- mlx5_encap_dealloc(priv->mdev, e->encap_id);
1058
-
1059
- hash_del_rcu(&e->encap_hlist);
1060
- kfree(e->encap_header);
1061
- kfree(e);
1783
+ mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
10621784 }
1785
+
1786
+ kfree(e->tun_info);
1787
+ kfree(e->encap_header);
1788
+ kfree_rcu(e, rcu);
1789
+}
1790
+
1791
+static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
1792
+ struct mlx5e_decap_entry *d)
1793
+{
1794
+ WARN_ON(!list_empty(&d->flows));
1795
+
1796
+ if (!d->compl_result)
1797
+ mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
1798
+
1799
+ kfree_rcu(d, rcu);
1800
+}
1801
+
1802
+void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
1803
+{
1804
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1805
+
1806
+ if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
1807
+ return;
1808
+ hash_del_rcu(&e->encap_hlist);
1809
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
1810
+
1811
+ mlx5e_encap_dealloc(priv, e);
1812
+}
1813
+
1814
+static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
1815
+{
1816
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1817
+
1818
+ if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
1819
+ return;
1820
+ hash_del_rcu(&d->hlist);
1821
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
1822
+
1823
+ mlx5e_decap_dealloc(priv, d);
1824
+}
1825
+
1826
+static void mlx5e_detach_encap(struct mlx5e_priv *priv,
1827
+ struct mlx5e_tc_flow *flow, int out_index)
1828
+{
1829
+ struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
1830
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1831
+
1832
+ /* flow wasn't fully initialized */
1833
+ if (!e)
1834
+ return;
1835
+
1836
+ mutex_lock(&esw->offloads.encap_tbl_lock);
1837
+ list_del(&flow->encaps[out_index].list);
1838
+ flow->encaps[out_index].e = NULL;
1839
+ if (!refcount_dec_and_test(&e->refcnt)) {
1840
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
1841
+ return;
1842
+ }
1843
+ hash_del_rcu(&e->encap_hlist);
1844
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
1845
+
1846
+ mlx5e_encap_dealloc(priv, e);
1847
+}
1848
+
1849
+static void mlx5e_detach_decap(struct mlx5e_priv *priv,
1850
+ struct mlx5e_tc_flow *flow)
1851
+{
1852
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1853
+ struct mlx5e_decap_entry *d = flow->decap_reformat;
1854
+
1855
+ if (!d)
1856
+ return;
1857
+
1858
+ mutex_lock(&esw->offloads.decap_tbl_lock);
1859
+ list_del(&flow->l3_to_l2_reformat);
1860
+ flow->decap_reformat = NULL;
1861
+
1862
+ if (!refcount_dec_and_test(&d->refcnt)) {
1863
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
1864
+ return;
1865
+ }
1866
+ hash_del_rcu(&d->hlist);
1867
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
1868
+
1869
+ mlx5e_decap_dealloc(priv, d);
1870
+}
1871
+
1872
+static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
1873
+{
1874
+ struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
1875
+
1876
+ if (!flow_flag_test(flow, ESWITCH) ||
1877
+ !flow_flag_test(flow, DUP))
1878
+ return;
1879
+
1880
+ mutex_lock(&esw->offloads.peer_mutex);
1881
+ list_del(&flow->peer);
1882
+ mutex_unlock(&esw->offloads.peer_mutex);
1883
+
1884
+ flow_flag_clear(flow, DUP);
1885
+
1886
+ if (refcount_dec_and_test(&flow->peer_flow->refcnt)) {
1887
+ mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
1888
+ kfree(flow->peer_flow);
1889
+ }
1890
+
1891
+ flow->peer_flow = NULL;
1892
+}
1893
+
1894
+static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
1895
+{
1896
+ struct mlx5_core_dev *dev = flow->priv->mdev;
1897
+ struct mlx5_devcom *devcom = dev->priv.devcom;
1898
+ struct mlx5_eswitch *peer_esw;
1899
+
1900
+ peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1901
+ if (!peer_esw)
1902
+ return;
1903
+
1904
+ __mlx5e_tc_del_fdb_peer_flow(flow);
1905
+ mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
10631906 }
10641907
10651908 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
10661909 struct mlx5e_tc_flow *flow)
10671910 {
1068
- if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
1911
+ if (mlx5e_is_eswitch_flow(flow)) {
1912
+ mlx5e_tc_del_fdb_peer_flow(flow);
10691913 mlx5e_tc_del_fdb_flow(priv, flow);
1070
- else
1914
+ } else {
10711915 mlx5e_tc_del_nic_flow(priv, flow);
1072
-}
1073
-
1074
-static void parse_vxlan_attr(struct mlx5_flow_spec *spec,
1075
- struct tc_cls_flower_offload *f)
1076
-{
1077
- void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1078
- outer_headers);
1079
- void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1080
- outer_headers);
1081
- void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1082
- misc_parameters);
1083
- void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1084
- misc_parameters);
1085
-
1086
- MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
1087
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP);
1088
-
1089
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
1090
- struct flow_dissector_key_keyid *key =
1091
- skb_flow_dissector_target(f->dissector,
1092
- FLOW_DISSECTOR_KEY_ENC_KEYID,
1093
- f->key);
1094
- struct flow_dissector_key_keyid *mask =
1095
- skb_flow_dissector_target(f->dissector,
1096
- FLOW_DISSECTOR_KEY_ENC_KEYID,
1097
- f->mask);
1098
- MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni,
1099
- be32_to_cpu(mask->keyid));
1100
- MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni,
1101
- be32_to_cpu(key->keyid));
11021916 }
11031917 }
11041918
1105
-static int parse_tunnel_attr(struct mlx5e_priv *priv,
1106
- struct mlx5_flow_spec *spec,
1107
- struct tc_cls_flower_offload *f)
1919
+static int flow_has_tc_fwd_action(struct flow_cls_offload *f)
11081920 {
1109
- void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1110
- outer_headers);
1111
- void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1112
- outer_headers);
1921
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1922
+ struct flow_action *flow_action = &rule->action;
1923
+ const struct flow_action_entry *act;
1924
+ int i;
11131925
1114
- struct flow_dissector_key_control *enc_control =
1115
- skb_flow_dissector_target(f->dissector,
1116
- FLOW_DISSECTOR_KEY_ENC_CONTROL,
1117
- f->key);
1926
+ flow_action_for_each(i, act, flow_action) {
1927
+ switch (act->id) {
1928
+ case FLOW_ACTION_GOTO:
1929
+ return true;
1930
+ default:
1931
+ continue;
1932
+ }
1933
+ }
11181934
1119
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
1120
- struct flow_dissector_key_ports *key =
1121
- skb_flow_dissector_target(f->dissector,
1122
- FLOW_DISSECTOR_KEY_ENC_PORTS,
1123
- f->key);
1124
- struct flow_dissector_key_ports *mask =
1125
- skb_flow_dissector_target(f->dissector,
1126
- FLOW_DISSECTOR_KEY_ENC_PORTS,
1127
- f->mask);
1935
+ return false;
1936
+}
11281937
1129
- /* Full udp dst port must be given */
1130
- if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst)))
1131
- goto vxlan_match_offload_err;
1938
+static int
1939
+enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv,
1940
+ struct flow_dissector_key_enc_opts *opts,
1941
+ struct netlink_ext_ack *extack,
1942
+ bool *dont_care)
1943
+{
1944
+ struct geneve_opt *opt;
1945
+ int off = 0;
11321946
1133
- if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->dst)) &&
1134
- MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
1135
- parse_vxlan_attr(spec, f);
1136
- else {
1137
- netdev_warn(priv->netdev,
1138
- "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->dst));
1139
- return -EOPNOTSUPP;
1947
+ *dont_care = true;
1948
+
1949
+ while (opts->len > off) {
1950
+ opt = (struct geneve_opt *)&opts->data[off];
1951
+
1952
+ if (!(*dont_care) || opt->opt_class || opt->type ||
1953
+ memchr_inv(opt->opt_data, 0, opt->length * 4)) {
1954
+ *dont_care = false;
1955
+
1956
+ if (opt->opt_class != htons(U16_MAX) ||
1957
+ opt->type != U8_MAX) {
1958
+ NL_SET_ERR_MSG(extack,
1959
+ "Partial match of tunnel options in chain > 0 isn't supported");
1960
+ netdev_warn(priv->netdev,
1961
+ "Partial match of tunnel options in chain > 0 isn't supported");
1962
+ return -EOPNOTSUPP;
1963
+ }
11401964 }
11411965
1142
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
1143
- udp_dport, ntohs(mask->dst));
1144
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
1145
- udp_dport, ntohs(key->dst));
1146
-
1147
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
1148
- udp_sport, ntohs(mask->src));
1149
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
1150
- udp_sport, ntohs(key->src));
1151
- } else { /* udp dst port must be given */
1152
-vxlan_match_offload_err:
1153
- netdev_warn(priv->netdev,
1154
- "IP tunnel decap offload supported only for vxlan, must set UDP dport\n");
1155
- return -EOPNOTSUPP;
1966
+ off += sizeof(struct geneve_opt) + opt->length * 4;
11561967 }
1157
-
1158
- if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
1159
- struct flow_dissector_key_ipv4_addrs *key =
1160
- skb_flow_dissector_target(f->dissector,
1161
- FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
1162
- f->key);
1163
- struct flow_dissector_key_ipv4_addrs *mask =
1164
- skb_flow_dissector_target(f->dissector,
1165
- FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
1166
- f->mask);
1167
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
1168
- src_ipv4_src_ipv6.ipv4_layout.ipv4,
1169
- ntohl(mask->src));
1170
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
1171
- src_ipv4_src_ipv6.ipv4_layout.ipv4,
1172
- ntohl(key->src));
1173
-
1174
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
1175
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
1176
- ntohl(mask->dst));
1177
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
1178
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
1179
- ntohl(key->dst));
1180
-
1181
- MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
1182
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP);
1183
- } else if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
1184
- struct flow_dissector_key_ipv6_addrs *key =
1185
- skb_flow_dissector_target(f->dissector,
1186
- FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
1187
- f->key);
1188
- struct flow_dissector_key_ipv6_addrs *mask =
1189
- skb_flow_dissector_target(f->dissector,
1190
- FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
1191
- f->mask);
1192
-
1193
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
1194
- src_ipv4_src_ipv6.ipv6_layout.ipv6),
1195
- &mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
1196
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
1197
- src_ipv4_src_ipv6.ipv6_layout.ipv6),
1198
- &key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
1199
-
1200
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
1201
- dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
1202
- &mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
1203
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
1204
- dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
1205
- &key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
1206
-
1207
- MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
1208
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IPV6);
1209
- }
1210
-
1211
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_IP)) {
1212
- struct flow_dissector_key_ip *key =
1213
- skb_flow_dissector_target(f->dissector,
1214
- FLOW_DISSECTOR_KEY_ENC_IP,
1215
- f->key);
1216
- struct flow_dissector_key_ip *mask =
1217
- skb_flow_dissector_target(f->dissector,
1218
- FLOW_DISSECTOR_KEY_ENC_IP,
1219
- f->mask);
1220
-
1221
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, mask->tos & 0x3);
1222
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, key->tos & 0x3);
1223
-
1224
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, mask->tos >> 2);
1225
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, key->tos >> 2);
1226
-
1227
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, mask->ttl);
1228
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, key->ttl);
1229
- }
1230
-
1231
- /* Enforce DMAC when offloading incoming tunneled flows.
1232
- * Flow counters require a match on the DMAC.
1233
- */
1234
- MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16);
1235
- MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0);
1236
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
1237
- dmac_47_16), priv->netdev->dev_addr);
1238
-
1239
- /* let software handle IP fragments */
1240
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
1241
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0);
12421968
12431969 return 0;
12441970 }
12451971
1246
-static int __parse_cls_flower(struct mlx5e_priv *priv,
1247
- struct mlx5_flow_spec *spec,
1248
- struct tc_cls_flower_offload *f,
1249
- u8 *match_level)
1972
+#define COPY_DISSECTOR(rule, diss_key, dst)\
1973
+({ \
1974
+ struct flow_rule *__rule = (rule);\
1975
+ typeof(dst) __dst = dst;\
1976
+\
1977
+ memcpy(__dst,\
1978
+ skb_flow_dissector_target(__rule->match.dissector,\
1979
+ diss_key,\
1980
+ __rule->match.key),\
1981
+ sizeof(*__dst));\
1982
+})
1983
+
1984
+static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv,
1985
+ struct mlx5e_tc_flow *flow,
1986
+ struct flow_cls_offload *f,
1987
+ struct net_device *filter_dev)
12501988 {
1989
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1990
+ struct netlink_ext_ack *extack = f->common.extack;
1991
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
1992
+ struct flow_match_enc_opts enc_opts_match;
1993
+ struct tunnel_match_enc_opts tun_enc_opts;
1994
+ struct mlx5_rep_uplink_priv *uplink_priv;
1995
+ struct mlx5_flow_attr *attr = flow->attr;
1996
+ struct mlx5e_rep_priv *uplink_rpriv;
1997
+ struct tunnel_match_key tunnel_key;
1998
+ bool enc_opts_is_dont_care = true;
1999
+ u32 tun_id, enc_opts_id = 0;
2000
+ struct mlx5_eswitch *esw;
2001
+ u32 value, mask;
2002
+ int err;
2003
+
2004
+ esw = priv->mdev->priv.eswitch;
2005
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
2006
+ uplink_priv = &uplink_rpriv->uplink_priv;
2007
+
2008
+ memset(&tunnel_key, 0, sizeof(tunnel_key));
2009
+ COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL,
2010
+ &tunnel_key.enc_control);
2011
+ if (tunnel_key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS)
2012
+ COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
2013
+ &tunnel_key.enc_ipv4);
2014
+ else
2015
+ COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
2016
+ &tunnel_key.enc_ipv6);
2017
+ COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IP, &tunnel_key.enc_ip);
2018
+ COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_PORTS,
2019
+ &tunnel_key.enc_tp);
2020
+ COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_KEYID,
2021
+ &tunnel_key.enc_key_id);
2022
+ tunnel_key.filter_ifindex = filter_dev->ifindex;
2023
+
2024
+ err = mapping_add(uplink_priv->tunnel_mapping, &tunnel_key, &tun_id);
2025
+ if (err)
2026
+ return err;
2027
+
2028
+ flow_rule_match_enc_opts(rule, &enc_opts_match);
2029
+ err = enc_opts_is_dont_care_or_full_match(priv,
2030
+ enc_opts_match.mask,
2031
+ extack,
2032
+ &enc_opts_is_dont_care);
2033
+ if (err)
2034
+ goto err_enc_opts;
2035
+
2036
+ if (!enc_opts_is_dont_care) {
2037
+ memset(&tun_enc_opts, 0, sizeof(tun_enc_opts));
2038
+ memcpy(&tun_enc_opts.key, enc_opts_match.key,
2039
+ sizeof(*enc_opts_match.key));
2040
+ memcpy(&tun_enc_opts.mask, enc_opts_match.mask,
2041
+ sizeof(*enc_opts_match.mask));
2042
+
2043
+ err = mapping_add(uplink_priv->tunnel_enc_opts_mapping,
2044
+ &tun_enc_opts, &enc_opts_id);
2045
+ if (err)
2046
+ goto err_enc_opts;
2047
+ }
2048
+
2049
+ value = tun_id << ENC_OPTS_BITS | enc_opts_id;
2050
+ mask = enc_opts_id ? TUNNEL_ID_MASK :
2051
+ (TUNNEL_ID_MASK & ~ENC_OPTS_BITS_MASK);
2052
+
2053
+ if (attr->chain) {
2054
+ mlx5e_tc_match_to_reg_match(&attr->parse_attr->spec,
2055
+ TUNNEL_TO_REG, value, mask);
2056
+ } else {
2057
+ mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
2058
+ err = mlx5e_tc_match_to_reg_set(priv->mdev,
2059
+ mod_hdr_acts, MLX5_FLOW_NAMESPACE_FDB,
2060
+ TUNNEL_TO_REG, value);
2061
+ if (err)
2062
+ goto err_set;
2063
+
2064
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
2065
+ }
2066
+
2067
+ flow->tunnel_id = value;
2068
+ return 0;
2069
+
2070
+err_set:
2071
+ if (enc_opts_id)
2072
+ mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
2073
+ enc_opts_id);
2074
+err_enc_opts:
2075
+ mapping_remove(uplink_priv->tunnel_mapping, tun_id);
2076
+ return err;
2077
+}
2078
+
2079
+static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow)
2080
+{
2081
+ u32 enc_opts_id = flow->tunnel_id & ENC_OPTS_BITS_MASK;
2082
+ u32 tun_id = flow->tunnel_id >> ENC_OPTS_BITS;
2083
+ struct mlx5_rep_uplink_priv *uplink_priv;
2084
+ struct mlx5e_rep_priv *uplink_rpriv;
2085
+ struct mlx5_eswitch *esw;
2086
+
2087
+ esw = flow->priv->mdev->priv.eswitch;
2088
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
2089
+ uplink_priv = &uplink_rpriv->uplink_priv;
2090
+
2091
+ if (tun_id)
2092
+ mapping_remove(uplink_priv->tunnel_mapping, tun_id);
2093
+ if (enc_opts_id)
2094
+ mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
2095
+ enc_opts_id);
2096
+}
2097
+
2098
+u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow)
2099
+{
2100
+ return flow->tunnel_id;
2101
+}
2102
+
2103
+void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev,
2104
+ struct flow_match_basic *match, bool outer,
2105
+ void *headers_c, void *headers_v)
2106
+{
2107
+ bool ip_version_cap;
2108
+
2109
+ ip_version_cap = outer ?
2110
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2111
+ ft_field_support.outer_ip_version) :
2112
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2113
+ ft_field_support.inner_ip_version);
2114
+
2115
+ if (ip_version_cap && match->mask->n_proto == htons(0xFFFF) &&
2116
+ (match->key->n_proto == htons(ETH_P_IP) ||
2117
+ match->key->n_proto == htons(ETH_P_IPV6))) {
2118
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version);
2119
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version,
2120
+ match->key->n_proto == htons(ETH_P_IP) ? 4 : 6);
2121
+ } else {
2122
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
2123
+ ntohs(match->mask->n_proto));
2124
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
2125
+ ntohs(match->key->n_proto));
2126
+ }
2127
+}
2128
+
2129
+static int parse_tunnel_attr(struct mlx5e_priv *priv,
2130
+ struct mlx5e_tc_flow *flow,
2131
+ struct mlx5_flow_spec *spec,
2132
+ struct flow_cls_offload *f,
2133
+ struct net_device *filter_dev,
2134
+ u8 *match_level,
2135
+ bool *match_inner)
2136
+{
2137
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2138
+ struct netlink_ext_ack *extack = f->common.extack;
2139
+ bool needs_mapping, sets_mapping;
2140
+ int err;
2141
+
2142
+ if (!mlx5e_is_eswitch_flow(flow))
2143
+ return -EOPNOTSUPP;
2144
+
2145
+ needs_mapping = !!flow->attr->chain;
2146
+ sets_mapping = !flow->attr->chain && flow_has_tc_fwd_action(f);
2147
+ *match_inner = !needs_mapping;
2148
+
2149
+ if ((needs_mapping || sets_mapping) &&
2150
+ !mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
2151
+ NL_SET_ERR_MSG(extack,
2152
+ "Chains on tunnel devices isn't supported without register loopback support");
2153
+ netdev_warn(priv->netdev,
2154
+ "Chains on tunnel devices isn't supported without register loopback support");
2155
+ return -EOPNOTSUPP;
2156
+ }
2157
+
2158
+ if (!flow->attr->chain) {
2159
+ err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
2160
+ match_level);
2161
+ if (err) {
2162
+ NL_SET_ERR_MSG_MOD(extack,
2163
+ "Failed to parse tunnel attributes");
2164
+ netdev_warn(priv->netdev,
2165
+ "Failed to parse tunnel attributes");
2166
+ return err;
2167
+ }
2168
+
2169
+ /* With mpls over udp we decapsulate using packet reformat
2170
+ * object
2171
+ */
2172
+ if (!netif_is_bareudp(filter_dev))
2173
+ flow->attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
2174
+ }
2175
+
2176
+ if (!needs_mapping && !sets_mapping)
2177
+ return 0;
2178
+
2179
+ return mlx5e_get_flow_tunnel_id(priv, flow, f, filter_dev);
2180
+}
2181
+
2182
+static void *get_match_inner_headers_criteria(struct mlx5_flow_spec *spec)
2183
+{
2184
+ return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2185
+ inner_headers);
2186
+}
2187
+
2188
+static void *get_match_inner_headers_value(struct mlx5_flow_spec *spec)
2189
+{
2190
+ return MLX5_ADDR_OF(fte_match_param, spec->match_value,
2191
+ inner_headers);
2192
+}
2193
+
2194
+static void *get_match_outer_headers_criteria(struct mlx5_flow_spec *spec)
2195
+{
2196
+ return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2197
+ outer_headers);
2198
+}
2199
+
2200
+static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec)
2201
+{
2202
+ return MLX5_ADDR_OF(fte_match_param, spec->match_value,
2203
+ outer_headers);
2204
+}
2205
+
2206
+static void *get_match_headers_value(u32 flags,
2207
+ struct mlx5_flow_spec *spec)
2208
+{
2209
+ return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
2210
+ get_match_inner_headers_value(spec) :
2211
+ get_match_outer_headers_value(spec);
2212
+}
2213
+
2214
+static void *get_match_headers_criteria(u32 flags,
2215
+ struct mlx5_flow_spec *spec)
2216
+{
2217
+ return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
2218
+ get_match_inner_headers_criteria(spec) :
2219
+ get_match_outer_headers_criteria(spec);
2220
+}
2221
+
2222
+static int mlx5e_flower_parse_meta(struct net_device *filter_dev,
2223
+ struct flow_cls_offload *f)
2224
+{
2225
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2226
+ struct netlink_ext_ack *extack = f->common.extack;
2227
+ struct net_device *ingress_dev;
2228
+ struct flow_match_meta match;
2229
+
2230
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META))
2231
+ return 0;
2232
+
2233
+ flow_rule_match_meta(rule, &match);
2234
+ if (!match.mask->ingress_ifindex)
2235
+ return 0;
2236
+
2237
+ if (match.mask->ingress_ifindex != 0xFFFFFFFF) {
2238
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask");
2239
+ return -EOPNOTSUPP;
2240
+ }
2241
+
2242
+ ingress_dev = __dev_get_by_index(dev_net(filter_dev),
2243
+ match.key->ingress_ifindex);
2244
+ if (!ingress_dev) {
2245
+ NL_SET_ERR_MSG_MOD(extack,
2246
+ "Can't find the ingress port to match on");
2247
+ return -ENOENT;
2248
+ }
2249
+
2250
+ if (ingress_dev != filter_dev) {
2251
+ NL_SET_ERR_MSG_MOD(extack,
2252
+ "Can't match on the ingress filter port");
2253
+ return -EOPNOTSUPP;
2254
+ }
2255
+
2256
+ return 0;
2257
+}
2258
+
2259
+static bool skip_key_basic(struct net_device *filter_dev,
2260
+ struct flow_cls_offload *f)
2261
+{
2262
+ /* When doing mpls over udp decap, the user needs to provide
2263
+ * MPLS_UC as the protocol in order to be able to match on mpls
2264
+ * label fields. However, the actual ethertype is IP so we want to
2265
+ * avoid matching on this, otherwise we'll fail the match.
2266
+ */
2267
+ if (netif_is_bareudp(filter_dev) && f->common.chain_index == 0)
2268
+ return true;
2269
+
2270
+ return false;
2271
+}
2272
+
2273
+static int __parse_cls_flower(struct mlx5e_priv *priv,
2274
+ struct mlx5e_tc_flow *flow,
2275
+ struct mlx5_flow_spec *spec,
2276
+ struct flow_cls_offload *f,
2277
+ struct net_device *filter_dev,
2278
+ u8 *inner_match_level, u8 *outer_match_level)
2279
+{
2280
+ struct netlink_ext_ack *extack = f->common.extack;
12512281 void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
12522282 outer_headers);
12532283 void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
....@@ -1256,13 +2286,20 @@
12562286 misc_parameters);
12572287 void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
12582288 misc_parameters);
2289
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2290
+ struct flow_dissector *dissector = rule->match.dissector;
2291
+ enum fs_flow_table_type fs_type;
12592292 u16 addr_type = 0;
12602293 u8 ip_proto = 0;
2294
+ u8 *match_level;
2295
+ int err;
12612296
1262
- *match_level = MLX5_MATCH_NONE;
2297
+ fs_type = mlx5e_is_eswitch_flow(flow) ? FS_FT_FDB : FS_FT_NIC_RX;
2298
+ match_level = outer_match_level;
12632299
1264
- if (f->dissector->used_keys &
1265
- ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
2300
+ if (dissector->used_keys &
2301
+ ~(BIT(FLOW_DISSECTOR_KEY_META) |
2302
+ BIT(FLOW_DISSECTOR_KEY_CONTROL) |
12662303 BIT(FLOW_DISSECTOR_KEY_BASIC) |
12672304 BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
12682305 BIT(FLOW_DISSECTOR_KEY_VLAN) |
....@@ -1277,69 +2314,72 @@
12772314 BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
12782315 BIT(FLOW_DISSECTOR_KEY_TCP) |
12792316 BIT(FLOW_DISSECTOR_KEY_IP) |
1280
- BIT(FLOW_DISSECTOR_KEY_ENC_IP))) {
1281
- netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n",
1282
- f->dissector->used_keys);
2317
+ BIT(FLOW_DISSECTOR_KEY_CT) |
2318
+ BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
2319
+ BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) |
2320
+ BIT(FLOW_DISSECTOR_KEY_MPLS))) {
2321
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
2322
+ netdev_dbg(priv->netdev, "Unsupported key used: 0x%x\n",
2323
+ dissector->used_keys);
12832324 return -EOPNOTSUPP;
12842325 }
12852326
1286
- if ((dissector_uses_key(f->dissector,
1287
- FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) ||
1288
- dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID) ||
1289
- dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) &&
1290
- dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
1291
- struct flow_dissector_key_control *key =
1292
- skb_flow_dissector_target(f->dissector,
1293
- FLOW_DISSECTOR_KEY_ENC_CONTROL,
1294
- f->key);
1295
- switch (key->addr_type) {
1296
- case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
1297
- case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
1298
- if (parse_tunnel_attr(priv, spec, f))
1299
- return -EOPNOTSUPP;
1300
- break;
1301
- default:
1302
- return -EOPNOTSUPP;
1303
- }
2327
+ if (mlx5e_get_tc_tun(filter_dev)) {
2328
+ bool match_inner = false;
13042329
1305
- /* In decap flow, header pointers should point to the inner
1306
- * headers, outer header were already set by parse_tunnel_attr
1307
- */
1308
- headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1309
- inner_headers);
1310
- headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1311
- inner_headers);
2330
+ err = parse_tunnel_attr(priv, flow, spec, f, filter_dev,
2331
+ outer_match_level, &match_inner);
2332
+ if (err)
2333
+ return err;
2334
+
2335
+ if (match_inner) {
2336
+ /* header pointers should point to the inner headers
2337
+ * if the packet was decapsulated already.
2338
+ * outer headers are set by parse_tunnel_attr.
2339
+ */
2340
+ match_level = inner_match_level;
2341
+ headers_c = get_match_inner_headers_criteria(spec);
2342
+ headers_v = get_match_inner_headers_value(spec);
2343
+ }
13122344 }
13132345
1314
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
1315
- struct flow_dissector_key_basic *key =
1316
- skb_flow_dissector_target(f->dissector,
1317
- FLOW_DISSECTOR_KEY_BASIC,
1318
- f->key);
1319
- struct flow_dissector_key_basic *mask =
1320
- skb_flow_dissector_target(f->dissector,
1321
- FLOW_DISSECTOR_KEY_BASIC,
1322
- f->mask);
1323
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
1324
- ntohs(mask->n_proto));
1325
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
1326
- ntohs(key->n_proto));
2346
+ err = mlx5e_flower_parse_meta(filter_dev, f);
2347
+ if (err)
2348
+ return err;
13272349
1328
- if (mask->n_proto)
2350
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC) &&
2351
+ !skip_key_basic(filter_dev, f)) {
2352
+ struct flow_match_basic match;
2353
+
2354
+ flow_rule_match_basic(rule, &match);
2355
+ mlx5e_tc_set_ethertype(priv->mdev, &match,
2356
+ match_level == outer_match_level,
2357
+ headers_c, headers_v);
2358
+
2359
+ if (match.mask->n_proto)
13292360 *match_level = MLX5_MATCH_L2;
13302361 }
2362
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) ||
2363
+ is_vlan_dev(filter_dev)) {
2364
+ struct flow_dissector_key_vlan filter_dev_mask;
2365
+ struct flow_dissector_key_vlan filter_dev_key;
2366
+ struct flow_match_vlan match;
13312367
1332
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
1333
- struct flow_dissector_key_vlan *key =
1334
- skb_flow_dissector_target(f->dissector,
1335
- FLOW_DISSECTOR_KEY_VLAN,
1336
- f->key);
1337
- struct flow_dissector_key_vlan *mask =
1338
- skb_flow_dissector_target(f->dissector,
1339
- FLOW_DISSECTOR_KEY_VLAN,
1340
- f->mask);
1341
- if (mask->vlan_id || mask->vlan_priority || mask->vlan_tpid) {
1342
- if (key->vlan_tpid == htons(ETH_P_8021AD)) {
2368
+ if (is_vlan_dev(filter_dev)) {
2369
+ match.key = &filter_dev_key;
2370
+ match.key->vlan_id = vlan_dev_vlan_id(filter_dev);
2371
+ match.key->vlan_tpid = vlan_dev_vlan_proto(filter_dev);
2372
+ match.key->vlan_priority = 0;
2373
+ match.mask = &filter_dev_mask;
2374
+ memset(match.mask, 0xff, sizeof(*match.mask));
2375
+ match.mask->vlan_priority = 0;
2376
+ } else {
2377
+ flow_rule_match_vlan(rule, &match);
2378
+ }
2379
+ if (match.mask->vlan_id ||
2380
+ match.mask->vlan_priority ||
2381
+ match.mask->vlan_tpid) {
2382
+ if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
13432383 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
13442384 svlan_tag, 1);
13452385 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
....@@ -1351,31 +2391,53 @@
13512391 cvlan_tag, 1);
13522392 }
13532393
1354
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, mask->vlan_id);
1355
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, key->vlan_id);
2394
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid,
2395
+ match.mask->vlan_id);
2396
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid,
2397
+ match.key->vlan_id);
13562398
1357
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, mask->vlan_priority);
1358
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, key->vlan_priority);
2399
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio,
2400
+ match.mask->vlan_priority);
2401
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio,
2402
+ match.key->vlan_priority);
13592403
13602404 *match_level = MLX5_MATCH_L2;
2405
+
2406
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN) &&
2407
+ match.mask->vlan_eth_type &&
2408
+ MLX5_CAP_FLOWTABLE_TYPE(priv->mdev,
2409
+ ft_field_support.outer_second_vid,
2410
+ fs_type)) {
2411
+ MLX5_SET(fte_match_set_misc, misc_c,
2412
+ outer_second_cvlan_tag, 1);
2413
+ spec->match_criteria_enable |=
2414
+ MLX5_MATCH_MISC_PARAMETERS;
2415
+ }
13612416 }
13622417 } else if (*match_level != MLX5_MATCH_NONE) {
1363
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, svlan_tag, 1);
2418
+ /* cvlan_tag enabled in match criteria and
2419
+ * disabled in match value means both S & C tags
2420
+ * don't exist (untagged of both)
2421
+ */
13642422 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
13652423 *match_level = MLX5_MATCH_L2;
13662424 }
13672425
1368
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CVLAN)) {
1369
- struct flow_dissector_key_vlan *key =
1370
- skb_flow_dissector_target(f->dissector,
1371
- FLOW_DISSECTOR_KEY_CVLAN,
1372
- f->key);
1373
- struct flow_dissector_key_vlan *mask =
1374
- skb_flow_dissector_target(f->dissector,
1375
- FLOW_DISSECTOR_KEY_CVLAN,
1376
- f->mask);
1377
- if (mask->vlan_id || mask->vlan_priority || mask->vlan_tpid) {
1378
- if (key->vlan_tpid == htons(ETH_P_8021AD)) {
2426
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
2427
+ struct flow_match_vlan match;
2428
+
2429
+ flow_rule_match_cvlan(rule, &match);
2430
+ if (match.mask->vlan_id ||
2431
+ match.mask->vlan_priority ||
2432
+ match.mask->vlan_tpid) {
2433
+ if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ft_field_support.outer_second_vid,
2434
+ fs_type)) {
2435
+ NL_SET_ERR_MSG_MOD(extack,
2436
+ "Matching on CVLAN is not supported");
2437
+ return -EOPNOTSUPP;
2438
+ }
2439
+
2440
+ if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
13792441 MLX5_SET(fte_match_set_misc, misc_c,
13802442 outer_second_svlan_tag, 1);
13812443 MLX5_SET(fte_match_set_misc, misc_v,
....@@ -1388,69 +2450,59 @@
13882450 }
13892451
13902452 MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid,
1391
- mask->vlan_id);
2453
+ match.mask->vlan_id);
13922454 MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid,
1393
- key->vlan_id);
2455
+ match.key->vlan_id);
13942456 MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio,
1395
- mask->vlan_priority);
2457
+ match.mask->vlan_priority);
13962458 MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio,
1397
- key->vlan_priority);
2459
+ match.key->vlan_priority);
13982460
13992461 *match_level = MLX5_MATCH_L2;
2462
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
14002463 }
14012464 }
14022465
1403
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
1404
- struct flow_dissector_key_eth_addrs *key =
1405
- skb_flow_dissector_target(f->dissector,
1406
- FLOW_DISSECTOR_KEY_ETH_ADDRS,
1407
- f->key);
1408
- struct flow_dissector_key_eth_addrs *mask =
1409
- skb_flow_dissector_target(f->dissector,
1410
- FLOW_DISSECTOR_KEY_ETH_ADDRS,
1411
- f->mask);
2466
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
2467
+ struct flow_match_eth_addrs match;
14122468
2469
+ flow_rule_match_eth_addrs(rule, &match);
14132470 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
14142471 dmac_47_16),
1415
- mask->dst);
2472
+ match.mask->dst);
14162473 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
14172474 dmac_47_16),
1418
- key->dst);
2475
+ match.key->dst);
14192476
14202477 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
14212478 smac_47_16),
1422
- mask->src);
2479
+ match.mask->src);
14232480 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
14242481 smac_47_16),
1425
- key->src);
2482
+ match.key->src);
14262483
1427
- if (!is_zero_ether_addr(mask->src) || !is_zero_ether_addr(mask->dst))
2484
+ if (!is_zero_ether_addr(match.mask->src) ||
2485
+ !is_zero_ether_addr(match.mask->dst))
14282486 *match_level = MLX5_MATCH_L2;
14292487 }
14302488
1431
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
1432
- struct flow_dissector_key_control *key =
1433
- skb_flow_dissector_target(f->dissector,
1434
- FLOW_DISSECTOR_KEY_CONTROL,
1435
- f->key);
2489
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
2490
+ struct flow_match_control match;
14362491
1437
- struct flow_dissector_key_control *mask =
1438
- skb_flow_dissector_target(f->dissector,
1439
- FLOW_DISSECTOR_KEY_CONTROL,
1440
- f->mask);
1441
- addr_type = key->addr_type;
2492
+ flow_rule_match_control(rule, &match);
2493
+ addr_type = match.key->addr_type;
14422494
14432495 /* the HW doesn't support frag first/later */
1444
- if (mask->flags & FLOW_DIS_FIRST_FRAG)
2496
+ if (match.mask->flags & FLOW_DIS_FIRST_FRAG)
14452497 return -EOPNOTSUPP;
14462498
1447
- if (mask->flags & FLOW_DIS_IS_FRAGMENT) {
2499
+ if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) {
14482500 MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
14492501 MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
1450
- key->flags & FLOW_DIS_IS_FRAGMENT);
2502
+ match.key->flags & FLOW_DIS_IS_FRAGMENT);
14512503
14522504 /* the HW doesn't need L3 inline to match on frag=no */
1453
- if (!(key->flags & FLOW_DIS_IS_FRAGMENT))
2505
+ if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT))
14542506 *match_level = MLX5_MATCH_L2;
14552507 /* *** L2 attributes parsing up to here *** */
14562508 else
....@@ -1458,172 +2510,159 @@
14582510 }
14592511 }
14602512
1461
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
1462
- struct flow_dissector_key_basic *key =
1463
- skb_flow_dissector_target(f->dissector,
1464
- FLOW_DISSECTOR_KEY_BASIC,
1465
- f->key);
1466
- struct flow_dissector_key_basic *mask =
1467
- skb_flow_dissector_target(f->dissector,
1468
- FLOW_DISSECTOR_KEY_BASIC,
1469
- f->mask);
1470
- ip_proto = key->ip_proto;
2513
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
2514
+ struct flow_match_basic match;
2515
+
2516
+ flow_rule_match_basic(rule, &match);
2517
+ ip_proto = match.key->ip_proto;
14712518
14722519 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
1473
- mask->ip_proto);
2520
+ match.mask->ip_proto);
14742521 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
1475
- key->ip_proto);
2522
+ match.key->ip_proto);
14762523
1477
- if (mask->ip_proto)
2524
+ if (match.mask->ip_proto)
14782525 *match_level = MLX5_MATCH_L3;
14792526 }
14802527
14812528 if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
1482
- struct flow_dissector_key_ipv4_addrs *key =
1483
- skb_flow_dissector_target(f->dissector,
1484
- FLOW_DISSECTOR_KEY_IPV4_ADDRS,
1485
- f->key);
1486
- struct flow_dissector_key_ipv4_addrs *mask =
1487
- skb_flow_dissector_target(f->dissector,
1488
- FLOW_DISSECTOR_KEY_IPV4_ADDRS,
1489
- f->mask);
2529
+ struct flow_match_ipv4_addrs match;
14902530
2531
+ flow_rule_match_ipv4_addrs(rule, &match);
14912532 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
14922533 src_ipv4_src_ipv6.ipv4_layout.ipv4),
1493
- &mask->src, sizeof(mask->src));
2534
+ &match.mask->src, sizeof(match.mask->src));
14942535 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
14952536 src_ipv4_src_ipv6.ipv4_layout.ipv4),
1496
- &key->src, sizeof(key->src));
2537
+ &match.key->src, sizeof(match.key->src));
14972538 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
14982539 dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
1499
- &mask->dst, sizeof(mask->dst));
2540
+ &match.mask->dst, sizeof(match.mask->dst));
15002541 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
15012542 dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
1502
- &key->dst, sizeof(key->dst));
2543
+ &match.key->dst, sizeof(match.key->dst));
15032544
1504
- if (mask->src || mask->dst)
2545
+ if (match.mask->src || match.mask->dst)
15052546 *match_level = MLX5_MATCH_L3;
15062547 }
15072548
15082549 if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
1509
- struct flow_dissector_key_ipv6_addrs *key =
1510
- skb_flow_dissector_target(f->dissector,
1511
- FLOW_DISSECTOR_KEY_IPV6_ADDRS,
1512
- f->key);
1513
- struct flow_dissector_key_ipv6_addrs *mask =
1514
- skb_flow_dissector_target(f->dissector,
1515
- FLOW_DISSECTOR_KEY_IPV6_ADDRS,
1516
- f->mask);
2550
+ struct flow_match_ipv6_addrs match;
15172551
2552
+ flow_rule_match_ipv6_addrs(rule, &match);
15182553 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
15192554 src_ipv4_src_ipv6.ipv6_layout.ipv6),
1520
- &mask->src, sizeof(mask->src));
2555
+ &match.mask->src, sizeof(match.mask->src));
15212556 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
15222557 src_ipv4_src_ipv6.ipv6_layout.ipv6),
1523
- &key->src, sizeof(key->src));
2558
+ &match.key->src, sizeof(match.key->src));
15242559
15252560 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
15262561 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
1527
- &mask->dst, sizeof(mask->dst));
2562
+ &match.mask->dst, sizeof(match.mask->dst));
15282563 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
15292564 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
1530
- &key->dst, sizeof(key->dst));
2565
+ &match.key->dst, sizeof(match.key->dst));
15312566
1532
- if (ipv6_addr_type(&mask->src) != IPV6_ADDR_ANY ||
1533
- ipv6_addr_type(&mask->dst) != IPV6_ADDR_ANY)
2567
+ if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY ||
2568
+ ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY)
15342569 *match_level = MLX5_MATCH_L3;
15352570 }
15362571
1537
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IP)) {
1538
- struct flow_dissector_key_ip *key =
1539
- skb_flow_dissector_target(f->dissector,
1540
- FLOW_DISSECTOR_KEY_IP,
1541
- f->key);
1542
- struct flow_dissector_key_ip *mask =
1543
- skb_flow_dissector_target(f->dissector,
1544
- FLOW_DISSECTOR_KEY_IP,
1545
- f->mask);
2572
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
2573
+ struct flow_match_ip match;
15462574
1547
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, mask->tos & 0x3);
1548
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, key->tos & 0x3);
2575
+ flow_rule_match_ip(rule, &match);
2576
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
2577
+ match.mask->tos & 0x3);
2578
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
2579
+ match.key->tos & 0x3);
15492580
1550
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, mask->tos >> 2);
1551
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, key->tos >> 2);
2581
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
2582
+ match.mask->tos >> 2);
2583
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
2584
+ match.key->tos >> 2);
15522585
1553
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, mask->ttl);
1554
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, key->ttl);
2586
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
2587
+ match.mask->ttl);
2588
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
2589
+ match.key->ttl);
15552590
1556
- if (mask->ttl &&
2591
+ if (match.mask->ttl &&
15572592 !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
1558
- ft_field_support.outer_ipv4_ttl))
2593
+ ft_field_support.outer_ipv4_ttl)) {
2594
+ NL_SET_ERR_MSG_MOD(extack,
2595
+ "Matching on TTL is not supported");
15592596 return -EOPNOTSUPP;
2597
+ }
15602598
1561
- if (mask->tos || mask->ttl)
2599
+ if (match.mask->tos || match.mask->ttl)
15622600 *match_level = MLX5_MATCH_L3;
15632601 }
15642602
15652603 /* *** L3 attributes parsing up to here *** */
15662604
1567
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
1568
- struct flow_dissector_key_ports *key =
1569
- skb_flow_dissector_target(f->dissector,
1570
- FLOW_DISSECTOR_KEY_PORTS,
1571
- f->key);
1572
- struct flow_dissector_key_ports *mask =
1573
- skb_flow_dissector_target(f->dissector,
1574
- FLOW_DISSECTOR_KEY_PORTS,
1575
- f->mask);
2605
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
2606
+ struct flow_match_ports match;
2607
+
2608
+ flow_rule_match_ports(rule, &match);
15762609 switch (ip_proto) {
15772610 case IPPROTO_TCP:
15782611 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
1579
- tcp_sport, ntohs(mask->src));
2612
+ tcp_sport, ntohs(match.mask->src));
15802613 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
1581
- tcp_sport, ntohs(key->src));
2614
+ tcp_sport, ntohs(match.key->src));
15822615
15832616 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
1584
- tcp_dport, ntohs(mask->dst));
2617
+ tcp_dport, ntohs(match.mask->dst));
15852618 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
1586
- tcp_dport, ntohs(key->dst));
2619
+ tcp_dport, ntohs(match.key->dst));
15872620 break;
15882621
15892622 case IPPROTO_UDP:
15902623 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
1591
- udp_sport, ntohs(mask->src));
2624
+ udp_sport, ntohs(match.mask->src));
15922625 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
1593
- udp_sport, ntohs(key->src));
2626
+ udp_sport, ntohs(match.key->src));
15942627
15952628 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
1596
- udp_dport, ntohs(mask->dst));
2629
+ udp_dport, ntohs(match.mask->dst));
15972630 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
1598
- udp_dport, ntohs(key->dst));
2631
+ udp_dport, ntohs(match.key->dst));
15992632 break;
16002633 default:
2634
+ NL_SET_ERR_MSG_MOD(extack,
2635
+ "Only UDP and TCP transports are supported for L4 matching");
16012636 netdev_err(priv->netdev,
16022637 "Only UDP and TCP transport are supported\n");
16032638 return -EINVAL;
16042639 }
16052640
1606
- if (mask->src || mask->dst)
2641
+ if (match.mask->src || match.mask->dst)
16072642 *match_level = MLX5_MATCH_L4;
16082643 }
16092644
1610
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_TCP)) {
1611
- struct flow_dissector_key_tcp *key =
1612
- skb_flow_dissector_target(f->dissector,
1613
- FLOW_DISSECTOR_KEY_TCP,
1614
- f->key);
1615
- struct flow_dissector_key_tcp *mask =
1616
- skb_flow_dissector_target(f->dissector,
1617
- FLOW_DISSECTOR_KEY_TCP,
1618
- f->mask);
2645
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
2646
+ struct flow_match_tcp match;
16192647
2648
+ flow_rule_match_tcp(rule, &match);
16202649 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
1621
- ntohs(mask->flags));
2650
+ ntohs(match.mask->flags));
16222651 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
1623
- ntohs(key->flags));
2652
+ ntohs(match.key->flags));
16242653
1625
- if (mask->flags)
2654
+ if (match.mask->flags)
16262655 *match_level = MLX5_MATCH_L4;
2656
+ }
2657
+
2658
+ /* Currenlty supported only for MPLS over UDP */
2659
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS) &&
2660
+ !netif_is_bareudp(filter_dev)) {
2661
+ NL_SET_ERR_MSG_MOD(extack,
2662
+ "Matching on MPLS is supported only for MPLS over UDP");
2663
+ netdev_err(priv->netdev,
2664
+ "Matching on MPLS is supported only for MPLS over UDP\n");
2665
+ return -EOPNOTSUPP;
16272666 }
16282667
16292668 return 0;
....@@ -1632,66 +2671,80 @@
16322671 static int parse_cls_flower(struct mlx5e_priv *priv,
16332672 struct mlx5e_tc_flow *flow,
16342673 struct mlx5_flow_spec *spec,
1635
- struct tc_cls_flower_offload *f)
2674
+ struct flow_cls_offload *f,
2675
+ struct net_device *filter_dev)
16362676 {
2677
+ u8 inner_match_level, outer_match_level, non_tunnel_match_level;
2678
+ struct netlink_ext_ack *extack = f->common.extack;
16372679 struct mlx5_core_dev *dev = priv->mdev;
16382680 struct mlx5_eswitch *esw = dev->priv.eswitch;
16392681 struct mlx5e_rep_priv *rpriv = priv->ppriv;
16402682 struct mlx5_eswitch_rep *rep;
1641
- u8 match_level;
2683
+ bool is_eswitch_flow;
16422684 int err;
16432685
1644
- err = __parse_cls_flower(priv, spec, f, &match_level);
2686
+ inner_match_level = MLX5_MATCH_NONE;
2687
+ outer_match_level = MLX5_MATCH_NONE;
16452688
1646
- if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) {
2689
+ err = __parse_cls_flower(priv, flow, spec, f, filter_dev,
2690
+ &inner_match_level, &outer_match_level);
2691
+ non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ?
2692
+ outer_match_level : inner_match_level;
2693
+
2694
+ is_eswitch_flow = mlx5e_is_eswitch_flow(flow);
2695
+ if (!err && is_eswitch_flow) {
16472696 rep = rpriv->rep;
1648
- if (rep->vport != FDB_UPLINK_VPORT &&
2697
+ if (rep->vport != MLX5_VPORT_UPLINK &&
16492698 (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
1650
- esw->offloads.inline_mode < match_level)) {
2699
+ esw->offloads.inline_mode < non_tunnel_match_level)) {
2700
+ NL_SET_ERR_MSG_MOD(extack,
2701
+ "Flow is not offloaded due to min inline setting");
16512702 netdev_warn(priv->netdev,
16522703 "Flow is not offloaded due to min inline setting, required %d actual %d\n",
1653
- match_level, esw->offloads.inline_mode);
2704
+ non_tunnel_match_level, esw->offloads.inline_mode);
16542705 return -EOPNOTSUPP;
16552706 }
16562707 }
16572708
1658
- if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
1659
- flow->esw_attr->match_level = match_level;
1660
- else
1661
- flow->nic_attr->match_level = match_level;
2709
+ flow->attr->inner_match_level = inner_match_level;
2710
+ flow->attr->outer_match_level = outer_match_level;
2711
+
16622712
16632713 return err;
16642714 }
16652715
16662716 struct pedit_headers {
16672717 struct ethhdr eth;
2718
+ struct vlan_hdr vlan;
16682719 struct iphdr ip4;
16692720 struct ipv6hdr ip6;
16702721 struct tcphdr tcp;
16712722 struct udphdr udp;
16722723 };
16732724
2725
+struct pedit_headers_action {
2726
+ struct pedit_headers vals;
2727
+ struct pedit_headers masks;
2728
+ u32 pedits;
2729
+};
2730
+
16742731 static int pedit_header_offsets[] = {
1675
- [TCA_PEDIT_KEY_EX_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth),
1676
- [TCA_PEDIT_KEY_EX_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4),
1677
- [TCA_PEDIT_KEY_EX_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6),
1678
- [TCA_PEDIT_KEY_EX_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp),
1679
- [TCA_PEDIT_KEY_EX_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp),
2732
+ [FLOW_ACT_MANGLE_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth),
2733
+ [FLOW_ACT_MANGLE_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4),
2734
+ [FLOW_ACT_MANGLE_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6),
2735
+ [FLOW_ACT_MANGLE_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp),
2736
+ [FLOW_ACT_MANGLE_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp),
16802737 };
16812738
16822739 #define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype])
16832740
16842741 static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset,
1685
- struct pedit_headers *masks,
1686
- struct pedit_headers *vals)
2742
+ struct pedit_headers_action *hdrs)
16872743 {
16882744 u32 *curr_pmask, *curr_pval;
16892745
1690
- if (hdr_type >= __PEDIT_HDR_TYPE_MAX)
1691
- goto out_err;
1692
-
1693
- curr_pmask = (u32 *)(pedit_header(masks, hdr_type) + offset);
1694
- curr_pval = (u32 *)(pedit_header(vals, hdr_type) + offset);
2746
+ curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset);
2747
+ curr_pval = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset);
16952748
16962749 if (*curr_pmask & mask) /* disallow acting twice on the same location */
16972750 goto out_err;
....@@ -1707,74 +2760,138 @@
17072760
17082761 struct mlx5_fields {
17092762 u8 field;
1710
- u8 size;
2763
+ u8 field_bsize;
2764
+ u32 field_mask;
17112765 u32 offset;
2766
+ u32 match_offset;
17122767 };
17132768
1714
-#define OFFLOAD(fw_field, size, field, off) \
1715
- {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, size, offsetof(struct pedit_headers, field) + (off)}
2769
+#define OFFLOAD(fw_field, field_bsize, field_mask, field, off, match_field) \
2770
+ {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, field_bsize, field_mask, \
2771
+ offsetof(struct pedit_headers, field) + (off), \
2772
+ MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)}
2773
+
2774
+/* masked values are the same and there are no rewrites that do not have a
2775
+ * match.
2776
+ */
2777
+#define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \
2778
+ type matchmaskx = *(type *)(matchmaskp); \
2779
+ type matchvalx = *(type *)(matchvalp); \
2780
+ type maskx = *(type *)(maskp); \
2781
+ type valx = *(type *)(valp); \
2782
+ \
2783
+ (valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \
2784
+ matchmaskx)); \
2785
+})
2786
+
2787
+static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp,
2788
+ void *matchmaskp, u8 bsize)
2789
+{
2790
+ bool same = false;
2791
+
2792
+ switch (bsize) {
2793
+ case 8:
2794
+ same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp);
2795
+ break;
2796
+ case 16:
2797
+ same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp);
2798
+ break;
2799
+ case 32:
2800
+ same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp);
2801
+ break;
2802
+ }
2803
+
2804
+ return same;
2805
+}
17162806
17172807 static struct mlx5_fields fields[] = {
1718
- OFFLOAD(DMAC_47_16, 4, eth.h_dest[0], 0),
1719
- OFFLOAD(DMAC_15_0, 2, eth.h_dest[4], 0),
1720
- OFFLOAD(SMAC_47_16, 4, eth.h_source[0], 0),
1721
- OFFLOAD(SMAC_15_0, 2, eth.h_source[4], 0),
1722
- OFFLOAD(ETHERTYPE, 2, eth.h_proto, 0),
2808
+ OFFLOAD(DMAC_47_16, 32, U32_MAX, eth.h_dest[0], 0, dmac_47_16),
2809
+ OFFLOAD(DMAC_15_0, 16, U16_MAX, eth.h_dest[4], 0, dmac_15_0),
2810
+ OFFLOAD(SMAC_47_16, 32, U32_MAX, eth.h_source[0], 0, smac_47_16),
2811
+ OFFLOAD(SMAC_15_0, 16, U16_MAX, eth.h_source[4], 0, smac_15_0),
2812
+ OFFLOAD(ETHERTYPE, 16, U16_MAX, eth.h_proto, 0, ethertype),
2813
+ OFFLOAD(FIRST_VID, 16, U16_MAX, vlan.h_vlan_TCI, 0, first_vid),
17232814
1724
- OFFLOAD(IP_TTL, 1, ip4.ttl, 0),
1725
- OFFLOAD(SIPV4, 4, ip4.saddr, 0),
1726
- OFFLOAD(DIPV4, 4, ip4.daddr, 0),
2815
+ OFFLOAD(IP_DSCP, 8, 0xfc, ip4.tos, 0, ip_dscp),
2816
+ OFFLOAD(IP_TTL, 8, U8_MAX, ip4.ttl, 0, ttl_hoplimit),
2817
+ OFFLOAD(SIPV4, 32, U32_MAX, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4),
2818
+ OFFLOAD(DIPV4, 32, U32_MAX, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
17272819
1728
- OFFLOAD(SIPV6_127_96, 4, ip6.saddr.s6_addr32[0], 0),
1729
- OFFLOAD(SIPV6_95_64, 4, ip6.saddr.s6_addr32[1], 0),
1730
- OFFLOAD(SIPV6_63_32, 4, ip6.saddr.s6_addr32[2], 0),
1731
- OFFLOAD(SIPV6_31_0, 4, ip6.saddr.s6_addr32[3], 0),
1732
- OFFLOAD(DIPV6_127_96, 4, ip6.daddr.s6_addr32[0], 0),
1733
- OFFLOAD(DIPV6_95_64, 4, ip6.daddr.s6_addr32[1], 0),
1734
- OFFLOAD(DIPV6_63_32, 4, ip6.daddr.s6_addr32[2], 0),
1735
- OFFLOAD(DIPV6_31_0, 4, ip6.daddr.s6_addr32[3], 0),
1736
- OFFLOAD(IPV6_HOPLIMIT, 1, ip6.hop_limit, 0),
2820
+ OFFLOAD(SIPV6_127_96, 32, U32_MAX, ip6.saddr.s6_addr32[0], 0,
2821
+ src_ipv4_src_ipv6.ipv6_layout.ipv6[0]),
2822
+ OFFLOAD(SIPV6_95_64, 32, U32_MAX, ip6.saddr.s6_addr32[1], 0,
2823
+ src_ipv4_src_ipv6.ipv6_layout.ipv6[4]),
2824
+ OFFLOAD(SIPV6_63_32, 32, U32_MAX, ip6.saddr.s6_addr32[2], 0,
2825
+ src_ipv4_src_ipv6.ipv6_layout.ipv6[8]),
2826
+ OFFLOAD(SIPV6_31_0, 32, U32_MAX, ip6.saddr.s6_addr32[3], 0,
2827
+ src_ipv4_src_ipv6.ipv6_layout.ipv6[12]),
2828
+ OFFLOAD(DIPV6_127_96, 32, U32_MAX, ip6.daddr.s6_addr32[0], 0,
2829
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]),
2830
+ OFFLOAD(DIPV6_95_64, 32, U32_MAX, ip6.daddr.s6_addr32[1], 0,
2831
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]),
2832
+ OFFLOAD(DIPV6_63_32, 32, U32_MAX, ip6.daddr.s6_addr32[2], 0,
2833
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]),
2834
+ OFFLOAD(DIPV6_31_0, 32, U32_MAX, ip6.daddr.s6_addr32[3], 0,
2835
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]),
2836
+ OFFLOAD(IPV6_HOPLIMIT, 8, U8_MAX, ip6.hop_limit, 0, ttl_hoplimit),
2837
+ OFFLOAD(IP_DSCP, 16, 0xc00f, ip6, 0, ip_dscp),
17372838
1738
- OFFLOAD(TCP_SPORT, 2, tcp.source, 0),
1739
- OFFLOAD(TCP_DPORT, 2, tcp.dest, 0),
1740
- OFFLOAD(TCP_FLAGS, 1, tcp.ack_seq, 5),
2839
+ OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source, 0, tcp_sport),
2840
+ OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest, 0, tcp_dport),
2841
+ /* in linux iphdr tcp_flags is 8 bits long */
2842
+ OFFLOAD(TCP_FLAGS, 8, U8_MAX, tcp.ack_seq, 5, tcp_flags),
17412843
1742
- OFFLOAD(UDP_SPORT, 2, udp.source, 0),
1743
- OFFLOAD(UDP_DPORT, 2, udp.dest, 0),
2844
+ OFFLOAD(UDP_SPORT, 16, U16_MAX, udp.source, 0, udp_sport),
2845
+ OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest, 0, udp_dport),
17442846 };
17452847
1746
-/* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at
1747
- * max from the SW pedit action. On success, attr->num_mod_hdr_actions
1748
- * says how many HW actions were actually parsed.
1749
- */
1750
-static int offload_pedit_fields(struct pedit_headers *masks,
1751
- struct pedit_headers *vals,
1752
- struct mlx5e_tc_flow_parse_attr *parse_attr)
2848
+static unsigned long mask_to_le(unsigned long mask, int size)
17532849 {
1754
- struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
1755
- int i, action_size, nactions, max_actions, first, last, next_z;
1756
- void *s_masks_p, *a_masks_p, *vals_p;
1757
- struct mlx5_fields *f;
1758
- u8 cmd, field_bsize;
1759
- u32 s_mask, a_mask;
1760
- unsigned long mask;
17612850 __be32 mask_be32;
17622851 __be16 mask_be16;
1763
- void *action;
17642852
1765
- set_masks = &masks[TCA_PEDIT_KEY_EX_CMD_SET];
1766
- add_masks = &masks[TCA_PEDIT_KEY_EX_CMD_ADD];
1767
- set_vals = &vals[TCA_PEDIT_KEY_EX_CMD_SET];
1768
- add_vals = &vals[TCA_PEDIT_KEY_EX_CMD_ADD];
2853
+ if (size == 32) {
2854
+ mask_be32 = (__force __be32)(mask);
2855
+ mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
2856
+ } else if (size == 16) {
2857
+ mask_be32 = (__force __be32)(mask);
2858
+ mask_be16 = *(__be16 *)&mask_be32;
2859
+ mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
2860
+ }
17692861
1770
- action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
1771
- action = parse_attr->mod_hdr_actions +
1772
- parse_attr->num_mod_hdr_actions * action_size;
2862
+ return mask;
2863
+}
2864
+static int offload_pedit_fields(struct mlx5e_priv *priv,
2865
+ int namespace,
2866
+ struct pedit_headers_action *hdrs,
2867
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
2868
+ u32 *action_flags,
2869
+ struct netlink_ext_ack *extack)
2870
+{
2871
+ struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
2872
+ int i, action_size, first, last, next_z;
2873
+ void *headers_c, *headers_v, *action, *vals_p;
2874
+ u32 *s_masks_p, *a_masks_p, s_mask, a_mask;
2875
+ struct mlx5e_tc_mod_hdr_acts *mod_acts;
2876
+ struct mlx5_fields *f;
2877
+ unsigned long mask, field_mask;
2878
+ int err;
2879
+ u8 cmd;
17732880
1774
- max_actions = parse_attr->max_mod_hdr_actions;
1775
- nactions = parse_attr->num_mod_hdr_actions;
2881
+ mod_acts = &parse_attr->mod_hdr_acts;
2882
+ headers_c = get_match_headers_criteria(*action_flags, &parse_attr->spec);
2883
+ headers_v = get_match_headers_value(*action_flags, &parse_attr->spec);
2884
+
2885
+ set_masks = &hdrs[0].masks;
2886
+ add_masks = &hdrs[1].masks;
2887
+ set_vals = &hdrs[0].vals;
2888
+ add_vals = &hdrs[1].vals;
2889
+
2890
+ action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
17762891
17772892 for (i = 0; i < ARRAY_SIZE(fields); i++) {
2893
+ bool skip;
2894
+
17782895 f = &fields[i];
17792896 /* avoid seeing bits set from previous iterations */
17802897 s_mask = 0;
....@@ -1783,157 +2900,254 @@
17832900 s_masks_p = (void *)set_masks + f->offset;
17842901 a_masks_p = (void *)add_masks + f->offset;
17852902
1786
- memcpy(&s_mask, s_masks_p, f->size);
1787
- memcpy(&a_mask, a_masks_p, f->size);
2903
+ s_mask = *s_masks_p & f->field_mask;
2904
+ a_mask = *a_masks_p & f->field_mask;
17882905
17892906 if (!s_mask && !a_mask) /* nothing to offload here */
17902907 continue;
17912908
17922909 if (s_mask && a_mask) {
2910
+ NL_SET_ERR_MSG_MOD(extack,
2911
+ "can't set and add to the same HW field");
17932912 printk(KERN_WARNING "mlx5: can't set and add to the same HW field (%x)\n", f->field);
17942913 return -EOPNOTSUPP;
17952914 }
17962915
1797
- if (nactions == max_actions) {
1798
- printk(KERN_WARNING "mlx5: parsed %d pedit actions, can't do more\n", nactions);
1799
- return -EOPNOTSUPP;
1800
- }
1801
-
2916
+ skip = false;
18022917 if (s_mask) {
2918
+ void *match_mask = headers_c + f->match_offset;
2919
+ void *match_val = headers_v + f->match_offset;
2920
+
18032921 cmd = MLX5_ACTION_TYPE_SET;
18042922 mask = s_mask;
18052923 vals_p = (void *)set_vals + f->offset;
2924
+ /* don't rewrite if we have a match on the same value */
2925
+ if (cmp_val_mask(vals_p, s_masks_p, match_val,
2926
+ match_mask, f->field_bsize))
2927
+ skip = true;
18062928 /* clear to denote we consumed this field */
1807
- memset(s_masks_p, 0, f->size);
2929
+ *s_masks_p &= ~f->field_mask;
18082930 } else {
18092931 cmd = MLX5_ACTION_TYPE_ADD;
18102932 mask = a_mask;
18112933 vals_p = (void *)add_vals + f->offset;
2934
+ /* add 0 is no change */
2935
+ if ((*(u32 *)vals_p & f->field_mask) == 0)
2936
+ skip = true;
18122937 /* clear to denote we consumed this field */
1813
- memset(a_masks_p, 0, f->size);
2938
+ *a_masks_p &= ~f->field_mask;
18142939 }
2940
+ if (skip)
2941
+ continue;
18152942
1816
- field_bsize = f->size * BITS_PER_BYTE;
2943
+ mask = mask_to_le(mask, f->field_bsize);
18172944
1818
- if (field_bsize == 32) {
1819
- mask_be32 = *(__be32 *)&mask;
1820
- mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
1821
- } else if (field_bsize == 16) {
1822
- mask_be16 = *(__be16 *)&mask;
1823
- mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
1824
- }
1825
-
1826
- first = find_first_bit(&mask, field_bsize);
1827
- next_z = find_next_zero_bit(&mask, field_bsize, first);
1828
- last = find_last_bit(&mask, field_bsize);
2945
+ first = find_first_bit(&mask, f->field_bsize);
2946
+ next_z = find_next_zero_bit(&mask, f->field_bsize, first);
2947
+ last = find_last_bit(&mask, f->field_bsize);
18292948 if (first < next_z && next_z < last) {
2949
+ NL_SET_ERR_MSG_MOD(extack,
2950
+ "rewrite of few sub-fields isn't supported");
18302951 printk(KERN_WARNING "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
18312952 mask);
18322953 return -EOPNOTSUPP;
18332954 }
18342955
2956
+ err = alloc_mod_hdr_actions(priv->mdev, namespace, mod_acts);
2957
+ if (err) {
2958
+ NL_SET_ERR_MSG_MOD(extack,
2959
+ "too many pedit actions, can't offload");
2960
+ mlx5_core_warn(priv->mdev,
2961
+ "mlx5: parsed %d pedit actions, can't do more\n",
2962
+ mod_acts->num_actions);
2963
+ return err;
2964
+ }
2965
+
2966
+ action = mod_acts->actions +
2967
+ (mod_acts->num_actions * action_size);
18352968 MLX5_SET(set_action_in, action, action_type, cmd);
18362969 MLX5_SET(set_action_in, action, field, f->field);
18372970
18382971 if (cmd == MLX5_ACTION_TYPE_SET) {
1839
- MLX5_SET(set_action_in, action, offset, first);
2972
+ int start;
2973
+
2974
+ field_mask = mask_to_le(f->field_mask, f->field_bsize);
2975
+
2976
+ /* if field is bit sized it can start not from first bit */
2977
+ start = find_first_bit(&field_mask, f->field_bsize);
2978
+
2979
+ MLX5_SET(set_action_in, action, offset, first - start);
18402980 /* length is num of bits to be written, zero means length of 32 */
18412981 MLX5_SET(set_action_in, action, length, (last - first + 1));
18422982 }
18432983
1844
- if (field_bsize == 32)
2984
+ if (f->field_bsize == 32)
18452985 MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first);
1846
- else if (field_bsize == 16)
2986
+ else if (f->field_bsize == 16)
18472987 MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first);
1848
- else if (field_bsize == 8)
2988
+ else if (f->field_bsize == 8)
18492989 MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first);
18502990
1851
- action += action_size;
1852
- nactions++;
2991
+ ++mod_acts->num_actions;
18532992 }
18542993
1855
- parse_attr->num_mod_hdr_actions = nactions;
18562994 return 0;
18572995 }
18582996
1859
-static int alloc_mod_hdr_actions(struct mlx5e_priv *priv,
1860
- const struct tc_action *a, int namespace,
1861
- struct mlx5e_tc_flow_parse_attr *parse_attr)
2997
+static int mlx5e_flow_namespace_max_modify_action(struct mlx5_core_dev *mdev,
2998
+ int namespace)
18622999 {
1863
- int nkeys, action_size, max_actions;
1864
-
1865
- nkeys = tcf_pedit_nkeys(a);
1866
- action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
1867
-
18683000 if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */
1869
- max_actions = MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, max_modify_header_actions);
3001
+ return MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, max_modify_header_actions);
18703002 else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
1871
- max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, max_modify_header_actions);
3003
+ return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions);
3004
+}
18723005
1873
- /* can get up to crazingly 16 HW actions in 32 bits pedit SW key */
1874
- max_actions = min(max_actions, nkeys * 16);
3006
+int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev,
3007
+ int namespace,
3008
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
3009
+{
3010
+ int action_size, new_num_actions, max_hw_actions;
3011
+ size_t new_sz, old_sz;
3012
+ void *ret;
18753013
1876
- parse_attr->mod_hdr_actions = kcalloc(max_actions, action_size, GFP_KERNEL);
1877
- if (!parse_attr->mod_hdr_actions)
3014
+ if (mod_hdr_acts->num_actions < mod_hdr_acts->max_actions)
3015
+ return 0;
3016
+
3017
+ action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
3018
+
3019
+ max_hw_actions = mlx5e_flow_namespace_max_modify_action(mdev,
3020
+ namespace);
3021
+ new_num_actions = min(max_hw_actions,
3022
+ mod_hdr_acts->actions ?
3023
+ mod_hdr_acts->max_actions * 2 : 1);
3024
+ if (mod_hdr_acts->max_actions == new_num_actions)
3025
+ return -ENOSPC;
3026
+
3027
+ new_sz = action_size * new_num_actions;
3028
+ old_sz = mod_hdr_acts->max_actions * action_size;
3029
+ ret = krealloc(mod_hdr_acts->actions, new_sz, GFP_KERNEL);
3030
+ if (!ret)
18783031 return -ENOMEM;
18793032
1880
- parse_attr->max_mod_hdr_actions = max_actions;
3033
+ memset(ret + old_sz, 0, new_sz - old_sz);
3034
+ mod_hdr_acts->actions = ret;
3035
+ mod_hdr_acts->max_actions = new_num_actions;
3036
+
18813037 return 0;
3038
+}
3039
+
3040
+void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
3041
+{
3042
+ kfree(mod_hdr_acts->actions);
3043
+ mod_hdr_acts->actions = NULL;
3044
+ mod_hdr_acts->num_actions = 0;
3045
+ mod_hdr_acts->max_actions = 0;
18823046 }
18833047
18843048 static const struct pedit_headers zero_masks = {};
18853049
1886
-static int parse_tc_pedit_action(struct mlx5e_priv *priv,
1887
- const struct tc_action *a, int namespace,
1888
- struct mlx5e_tc_flow_parse_attr *parse_attr)
3050
+static int
3051
+parse_pedit_to_modify_hdr(struct mlx5e_priv *priv,
3052
+ const struct flow_action_entry *act, int namespace,
3053
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
3054
+ struct pedit_headers_action *hdrs,
3055
+ struct netlink_ext_ack *extack)
18893056 {
1890
- struct pedit_headers masks[__PEDIT_CMD_MAX], vals[__PEDIT_CMD_MAX], *cmd_masks;
1891
- int nkeys, i, err = -EOPNOTSUPP;
3057
+ u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1;
3058
+ int err = -EOPNOTSUPP;
18923059 u32 mask, val, offset;
1893
- u8 cmd, htype;
3060
+ u8 htype;
18943061
1895
- nkeys = tcf_pedit_nkeys(a);
3062
+ htype = act->mangle.htype;
3063
+ err = -EOPNOTSUPP; /* can't be all optimistic */
18963064
1897
- memset(masks, 0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX);
1898
- memset(vals, 0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX);
1899
-
1900
- for (i = 0; i < nkeys; i++) {
1901
- htype = tcf_pedit_htype(a, i);
1902
- cmd = tcf_pedit_cmd(a, i);
1903
- err = -EOPNOTSUPP; /* can't be all optimistic */
1904
-
1905
- if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK) {
1906
- netdev_warn(priv->netdev, "legacy pedit isn't offloaded\n");
1907
- goto out_err;
1908
- }
1909
-
1910
- if (cmd != TCA_PEDIT_KEY_EX_CMD_SET && cmd != TCA_PEDIT_KEY_EX_CMD_ADD) {
1911
- netdev_warn(priv->netdev, "pedit cmd %d isn't offloaded\n", cmd);
1912
- goto out_err;
1913
- }
1914
-
1915
- mask = tcf_pedit_mask(a, i);
1916
- val = tcf_pedit_val(a, i);
1917
- offset = tcf_pedit_offset(a, i);
1918
-
1919
- err = set_pedit_val(htype, ~mask, val, offset, &masks[cmd], &vals[cmd]);
1920
- if (err)
1921
- goto out_err;
3065
+ if (htype == FLOW_ACT_MANGLE_UNSPEC) {
3066
+ NL_SET_ERR_MSG_MOD(extack, "legacy pedit isn't offloaded");
3067
+ goto out_err;
19223068 }
19233069
1924
- if (!parse_attr->mod_hdr_actions) {
1925
- err = alloc_mod_hdr_actions(priv, a, namespace, parse_attr);
1926
- if (err)
1927
- goto out_err;
3070
+ if (!mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace)) {
3071
+ NL_SET_ERR_MSG_MOD(extack,
3072
+ "The pedit offload action is not supported");
3073
+ goto out_err;
19283074 }
19293075
1930
- err = offload_pedit_fields(masks, vals, parse_attr);
3076
+ mask = act->mangle.mask;
3077
+ val = act->mangle.val;
3078
+ offset = act->mangle.offset;
3079
+
3080
+ err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd]);
3081
+ if (err)
3082
+ goto out_err;
3083
+
3084
+ hdrs[cmd].pedits++;
3085
+
3086
+ return 0;
3087
+out_err:
3088
+ return err;
3089
+}
3090
+
3091
+static int
3092
+parse_pedit_to_reformat(struct mlx5e_priv *priv,
3093
+ const struct flow_action_entry *act,
3094
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
3095
+ struct netlink_ext_ack *extack)
3096
+{
3097
+ u32 mask, val, offset;
3098
+ u32 *p;
3099
+
3100
+ if (act->id != FLOW_ACTION_MANGLE)
3101
+ return -EOPNOTSUPP;
3102
+
3103
+ if (act->mangle.htype != FLOW_ACT_MANGLE_HDR_TYPE_ETH) {
3104
+ NL_SET_ERR_MSG_MOD(extack, "Only Ethernet modification is supported");
3105
+ return -EOPNOTSUPP;
3106
+ }
3107
+
3108
+ mask = ~act->mangle.mask;
3109
+ val = act->mangle.val;
3110
+ offset = act->mangle.offset;
3111
+ p = (u32 *)&parse_attr->eth;
3112
+ *(p + (offset >> 2)) |= (val & mask);
3113
+
3114
+ return 0;
3115
+}
3116
+
3117
+static int parse_tc_pedit_action(struct mlx5e_priv *priv,
3118
+ const struct flow_action_entry *act, int namespace,
3119
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
3120
+ struct pedit_headers_action *hdrs,
3121
+ struct mlx5e_tc_flow *flow,
3122
+ struct netlink_ext_ack *extack)
3123
+{
3124
+ if (flow && flow_flag_test(flow, L3_TO_L2_DECAP))
3125
+ return parse_pedit_to_reformat(priv, act, parse_attr, extack);
3126
+
3127
+ return parse_pedit_to_modify_hdr(priv, act, namespace,
3128
+ parse_attr, hdrs, extack);
3129
+}
3130
+
3131
+static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
3132
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
3133
+ struct pedit_headers_action *hdrs,
3134
+ u32 *action_flags,
3135
+ struct netlink_ext_ack *extack)
3136
+{
3137
+ struct pedit_headers *cmd_masks;
3138
+ int err;
3139
+ u8 cmd;
3140
+
3141
+ err = offload_pedit_fields(priv, namespace, hdrs, parse_attr,
3142
+ action_flags, extack);
19313143 if (err < 0)
19323144 goto out_dealloc_parsed_actions;
19333145
19343146 for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
1935
- cmd_masks = &masks[cmd];
3147
+ cmd_masks = &hdrs[cmd].masks;
19363148 if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
3149
+ NL_SET_ERR_MSG_MOD(extack,
3150
+ "attempt to offload an unsupported field");
19373151 netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd);
19383152 print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
19393153 16, 1, cmd_masks, sizeof(zero_masks), true);
....@@ -1945,24 +3159,30 @@
19453159 return 0;
19463160
19473161 out_dealloc_parsed_actions:
1948
- kfree(parse_attr->mod_hdr_actions);
1949
-out_err:
3162
+ dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
19503163 return err;
19513164 }
19523165
1953
-static bool csum_offload_supported(struct mlx5e_priv *priv, u32 action, u32 update_flags)
3166
+static bool csum_offload_supported(struct mlx5e_priv *priv,
3167
+ u32 action,
3168
+ u32 update_flags,
3169
+ struct netlink_ext_ack *extack)
19543170 {
19553171 u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP |
19563172 TCA_CSUM_UPDATE_FLAG_UDP;
19573173
19583174 /* The HW recalcs checksums only if re-writing headers */
19593175 if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) {
3176
+ NL_SET_ERR_MSG_MOD(extack,
3177
+ "TC csum action is only offloaded with pedit");
19603178 netdev_warn(priv->netdev,
19613179 "TC csum action is only offloaded with pedit\n");
19623180 return false;
19633181 }
19643182
19653183 if (update_flags & ~prot_flags) {
3184
+ NL_SET_ERR_MSG_MOD(extack,
3185
+ "can't offload TC csum action for some header/s");
19663186 netdev_warn(priv->netdev,
19673187 "can't offload TC csum action for some header/s - flags %#x\n",
19683188 update_flags);
....@@ -1972,46 +3192,162 @@
19723192 return true;
19733193 }
19743194
1975
-static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
1976
- struct tcf_exts *exts)
3195
+struct ip_ttl_word {
3196
+ __u8 ttl;
3197
+ __u8 protocol;
3198
+ __sum16 check;
3199
+};
3200
+
3201
+struct ipv6_hoplimit_word {
3202
+ __be16 payload_len;
3203
+ __u8 nexthdr;
3204
+ __u8 hop_limit;
3205
+};
3206
+
3207
+static int is_action_keys_supported(const struct flow_action_entry *act,
3208
+ bool ct_flow, bool *modify_ip_header,
3209
+ bool *modify_tuple,
3210
+ struct netlink_ext_ack *extack)
19773211 {
1978
- const struct tc_action *a;
1979
- bool modify_ip_header;
1980
- LIST_HEAD(actions);
1981
- u8 htype, ip_proto;
3212
+ u32 mask, offset;
3213
+ u8 htype;
3214
+
3215
+ htype = act->mangle.htype;
3216
+ offset = act->mangle.offset;
3217
+ mask = ~act->mangle.mask;
3218
+ /* For IPv4 & IPv6 header check 4 byte word,
3219
+ * to determine that modified fields
3220
+ * are NOT ttl & hop_limit only.
3221
+ */
3222
+ if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) {
3223
+ struct ip_ttl_word *ttl_word =
3224
+ (struct ip_ttl_word *)&mask;
3225
+
3226
+ if (offset != offsetof(struct iphdr, ttl) ||
3227
+ ttl_word->protocol ||
3228
+ ttl_word->check) {
3229
+ *modify_ip_header = true;
3230
+ }
3231
+
3232
+ if (offset >= offsetof(struct iphdr, saddr))
3233
+ *modify_tuple = true;
3234
+
3235
+ if (ct_flow && *modify_tuple) {
3236
+ NL_SET_ERR_MSG_MOD(extack,
3237
+ "can't offload re-write of ipv4 address with action ct");
3238
+ return -EOPNOTSUPP;
3239
+ }
3240
+ } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) {
3241
+ struct ipv6_hoplimit_word *hoplimit_word =
3242
+ (struct ipv6_hoplimit_word *)&mask;
3243
+
3244
+ if (offset != offsetof(struct ipv6hdr, payload_len) ||
3245
+ hoplimit_word->payload_len ||
3246
+ hoplimit_word->nexthdr) {
3247
+ *modify_ip_header = true;
3248
+ }
3249
+
3250
+ if (ct_flow && offset >= offsetof(struct ipv6hdr, saddr))
3251
+ *modify_tuple = true;
3252
+
3253
+ if (ct_flow && *modify_tuple) {
3254
+ NL_SET_ERR_MSG_MOD(extack,
3255
+ "can't offload re-write of ipv6 address with action ct");
3256
+ return -EOPNOTSUPP;
3257
+ }
3258
+ } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_TCP ||
3259
+ htype == FLOW_ACT_MANGLE_HDR_TYPE_UDP) {
3260
+ *modify_tuple = true;
3261
+ if (ct_flow) {
3262
+ NL_SET_ERR_MSG_MOD(extack,
3263
+ "can't offload re-write of transport header ports with action ct");
3264
+ return -EOPNOTSUPP;
3265
+ }
3266
+ }
3267
+
3268
+ return 0;
3269
+}
3270
+
3271
+static bool modify_tuple_supported(bool modify_tuple, bool ct_clear,
3272
+ bool ct_flow, struct netlink_ext_ack *extack,
3273
+ struct mlx5e_priv *priv,
3274
+ struct mlx5_flow_spec *spec)
3275
+{
3276
+ if (!modify_tuple || ct_clear)
3277
+ return true;
3278
+
3279
+ if (ct_flow) {
3280
+ NL_SET_ERR_MSG_MOD(extack,
3281
+ "can't offload tuple modification with non-clear ct()");
3282
+ netdev_info(priv->netdev,
3283
+ "can't offload tuple modification with non-clear ct()");
3284
+ return false;
3285
+ }
3286
+
3287
+ /* Add ct_state=-trk match so it will be offloaded for non ct flows
3288
+ * (or after clear action), as otherwise, since the tuple is changed,
3289
+ * we can't restore ct state
3290
+ */
3291
+ if (mlx5_tc_ct_add_no_trk_match(spec)) {
3292
+ NL_SET_ERR_MSG_MOD(extack,
3293
+ "can't offload tuple modification with ct matches and no ct(clear) action");
3294
+ netdev_info(priv->netdev,
3295
+ "can't offload tuple modification with ct matches and no ct(clear) action");
3296
+ return false;
3297
+ }
3298
+
3299
+ return true;
3300
+}
3301
+
3302
+static bool modify_header_match_supported(struct mlx5e_priv *priv,
3303
+ struct mlx5_flow_spec *spec,
3304
+ struct flow_action *flow_action,
3305
+ u32 actions, bool ct_flow,
3306
+ bool ct_clear,
3307
+ struct netlink_ext_ack *extack)
3308
+{
3309
+ const struct flow_action_entry *act;
3310
+ bool modify_ip_header, modify_tuple;
3311
+ void *headers_c;
19823312 void *headers_v;
19833313 u16 ethertype;
1984
- int nkeys, i;
3314
+ u8 ip_proto;
3315
+ int i, err;
19853316
1986
- headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
3317
+ headers_c = get_match_headers_criteria(actions, spec);
3318
+ headers_v = get_match_headers_value(actions, spec);
19873319 ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
19883320
19893321 /* for non-IP we only re-write MACs, so we're okay */
1990
- if (ethertype != ETH_P_IP && ethertype != ETH_P_IPV6)
3322
+ if (MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_version) == 0 &&
3323
+ ethertype != ETH_P_IP && ethertype != ETH_P_IPV6)
19913324 goto out_ok;
19923325
19933326 modify_ip_header = false;
1994
- tcf_exts_for_each_action(i, a, exts) {
1995
- int k;
1996
-
1997
- if (!is_tcf_pedit(a))
3327
+ modify_tuple = false;
3328
+ flow_action_for_each(i, act, flow_action) {
3329
+ if (act->id != FLOW_ACTION_MANGLE &&
3330
+ act->id != FLOW_ACTION_ADD)
19983331 continue;
19993332
2000
- nkeys = tcf_pedit_nkeys(a);
2001
- for (k = 0; k < nkeys; k++) {
2002
- htype = tcf_pedit_htype(a, k);
2003
- if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 ||
2004
- htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP6) {
2005
- modify_ip_header = true;
2006
- break;
2007
- }
2008
- }
3333
+ err = is_action_keys_supported(act, ct_flow,
3334
+ &modify_ip_header,
3335
+ &modify_tuple, extack);
3336
+ if (err)
3337
+ return err;
20093338 }
3339
+
3340
+ if (!modify_tuple_supported(modify_tuple, ct_clear, ct_flow, extack,
3341
+ priv, spec))
3342
+ return false;
20103343
20113344 ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
20123345 if (modify_ip_header && ip_proto != IPPROTO_TCP &&
20133346 ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) {
2014
- pr_info("can't offload re-write of ip proto %d\n", ip_proto);
3347
+ NL_SET_ERR_MSG_MOD(extack,
3348
+ "can't offload re-write of non TCP/UDP");
3349
+ netdev_info(priv->netdev, "can't offload re-write of ip proto %d\n",
3350
+ ip_proto);
20153351 return false;
20163352 }
20173353
....@@ -2020,25 +3356,42 @@
20203356 }
20213357
20223358 static bool actions_match_supported(struct mlx5e_priv *priv,
2023
- struct tcf_exts *exts,
3359
+ struct flow_action *flow_action,
20243360 struct mlx5e_tc_flow_parse_attr *parse_attr,
2025
- struct mlx5e_tc_flow *flow)
3361
+ struct mlx5e_tc_flow *flow,
3362
+ struct netlink_ext_ack *extack)
20263363 {
3364
+ bool ct_flow = false, ct_clear = false;
20273365 u32 actions;
20283366
2029
- if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
2030
- actions = flow->esw_attr->action;
2031
- else
2032
- actions = flow->nic_attr->action;
3367
+ ct_clear = flow->attr->ct_attr.ct_action &
3368
+ TCA_CT_ACT_CLEAR;
3369
+ ct_flow = flow_flag_test(flow, CT) && !ct_clear;
3370
+ actions = flow->attr->action;
20333371
2034
- if (flow->flags & MLX5E_TC_FLOW_EGRESS &&
2035
- !(actions & MLX5_FLOW_CONTEXT_ACTION_DECAP))
2036
- return false;
3372
+ if (mlx5e_is_eswitch_flow(flow)) {
3373
+ if (flow->attr->esw_attr->split_count && ct_flow) {
3374
+ /* All registers used by ct are cleared when using
3375
+ * split rules.
3376
+ */
3377
+ NL_SET_ERR_MSG_MOD(extack,
3378
+ "Can't offload mirroring with action ct");
3379
+ return false;
3380
+ }
3381
+ }
20373382
20383383 if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
2039
- return modify_header_match_supported(&parse_attr->spec, exts);
3384
+ return modify_header_match_supported(priv, &parse_attr->spec,
3385
+ flow_action, actions,
3386
+ ct_flow, ct_clear,
3387
+ extack);
20403388
20413389 return true;
3390
+}
3391
+
3392
+static bool same_port_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
3393
+{
3394
+ return priv->mdev == peer_priv->mdev;
20423395 }
20433396
20443397 static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
....@@ -2049,148 +3402,308 @@
20493402 fmdev = priv->mdev;
20503403 pmdev = peer_priv->mdev;
20513404
2052
- mlx5_query_nic_vport_system_image_guid(fmdev, &fsystem_guid);
2053
- mlx5_query_nic_vport_system_image_guid(pmdev, &psystem_guid);
3405
+ fsystem_guid = mlx5_query_nic_system_image_guid(fmdev);
3406
+ psystem_guid = mlx5_query_nic_system_image_guid(pmdev);
20543407
20553408 return (fsystem_guid == psystem_guid);
20563409 }
20573410
2058
-static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
2059
- struct mlx5e_tc_flow_parse_attr *parse_attr,
2060
- struct mlx5e_tc_flow *flow)
3411
+static int add_vlan_rewrite_action(struct mlx5e_priv *priv, int namespace,
3412
+ const struct flow_action_entry *act,
3413
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
3414
+ struct pedit_headers_action *hdrs,
3415
+ u32 *action, struct netlink_ext_ack *extack)
20613416 {
2062
- struct mlx5_nic_flow_attr *attr = flow->nic_attr;
2063
- const struct tc_action *a;
2064
- LIST_HEAD(actions);
3417
+ u16 mask16 = VLAN_VID_MASK;
3418
+ u16 val16 = act->vlan.vid & VLAN_VID_MASK;
3419
+ const struct flow_action_entry pedit_act = {
3420
+ .id = FLOW_ACTION_MANGLE,
3421
+ .mangle.htype = FLOW_ACT_MANGLE_HDR_TYPE_ETH,
3422
+ .mangle.offset = offsetof(struct vlan_ethhdr, h_vlan_TCI),
3423
+ .mangle.mask = ~(u32)be16_to_cpu(*(__be16 *)&mask16),
3424
+ .mangle.val = (u32)be16_to_cpu(*(__be16 *)&val16),
3425
+ };
3426
+ u8 match_prio_mask, match_prio_val;
3427
+ void *headers_c, *headers_v;
3428
+ int err;
3429
+
3430
+ headers_c = get_match_headers_criteria(*action, &parse_attr->spec);
3431
+ headers_v = get_match_headers_value(*action, &parse_attr->spec);
3432
+
3433
+ if (!(MLX5_GET(fte_match_set_lyr_2_4, headers_c, cvlan_tag) &&
3434
+ MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag))) {
3435
+ NL_SET_ERR_MSG_MOD(extack,
3436
+ "VLAN rewrite action must have VLAN protocol match");
3437
+ return -EOPNOTSUPP;
3438
+ }
3439
+
3440
+ match_prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
3441
+ match_prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
3442
+ if (act->vlan.prio != (match_prio_val & match_prio_mask)) {
3443
+ NL_SET_ERR_MSG_MOD(extack,
3444
+ "Changing VLAN prio is not supported");
3445
+ return -EOPNOTSUPP;
3446
+ }
3447
+
3448
+ err = parse_tc_pedit_action(priv, &pedit_act, namespace, parse_attr, hdrs, NULL, extack);
3449
+ *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3450
+
3451
+ return err;
3452
+}
3453
+
3454
+static int
3455
+add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv,
3456
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
3457
+ struct pedit_headers_action *hdrs,
3458
+ u32 *action, struct netlink_ext_ack *extack)
3459
+{
3460
+ const struct flow_action_entry prio_tag_act = {
3461
+ .vlan.vid = 0,
3462
+ .vlan.prio =
3463
+ MLX5_GET(fte_match_set_lyr_2_4,
3464
+ get_match_headers_value(*action,
3465
+ &parse_attr->spec),
3466
+ first_prio) &
3467
+ MLX5_GET(fte_match_set_lyr_2_4,
3468
+ get_match_headers_criteria(*action,
3469
+ &parse_attr->spec),
3470
+ first_prio),
3471
+ };
3472
+
3473
+ return add_vlan_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB,
3474
+ &prio_tag_act, parse_attr, hdrs, action,
3475
+ extack);
3476
+}
3477
+
3478
+static int validate_goto_chain(struct mlx5e_priv *priv,
3479
+ struct mlx5e_tc_flow *flow,
3480
+ const struct flow_action_entry *act,
3481
+ u32 actions,
3482
+ struct netlink_ext_ack *extack)
3483
+{
3484
+ bool is_esw = mlx5e_is_eswitch_flow(flow);
3485
+ struct mlx5_flow_attr *attr = flow->attr;
3486
+ bool ft_flow = mlx5e_is_ft_flow(flow);
3487
+ u32 dest_chain = act->chain_index;
3488
+ struct mlx5_fs_chains *chains;
3489
+ struct mlx5_eswitch *esw;
3490
+ u32 reformat_and_fwd;
3491
+ u32 max_chain;
3492
+
3493
+ esw = priv->mdev->priv.eswitch;
3494
+ chains = is_esw ? esw_chains(esw) : nic_chains(priv);
3495
+ max_chain = mlx5_chains_get_chain_range(chains);
3496
+ reformat_and_fwd = is_esw ?
3497
+ MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_and_fwd_to_table) :
3498
+ MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, reformat_and_fwd_to_table);
3499
+
3500
+ if (ft_flow) {
3501
+ NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported");
3502
+ return -EOPNOTSUPP;
3503
+ }
3504
+
3505
+ if (!mlx5_chains_backwards_supported(chains) &&
3506
+ dest_chain <= attr->chain) {
3507
+ NL_SET_ERR_MSG_MOD(extack,
3508
+ "Goto lower numbered chain isn't supported");
3509
+ return -EOPNOTSUPP;
3510
+ }
3511
+
3512
+ if (dest_chain > max_chain) {
3513
+ NL_SET_ERR_MSG_MOD(extack,
3514
+ "Requested destination chain is out of supported range");
3515
+ return -EOPNOTSUPP;
3516
+ }
3517
+
3518
+ if (actions & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
3519
+ MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
3520
+ !reformat_and_fwd) {
3521
+ NL_SET_ERR_MSG_MOD(extack,
3522
+ "Goto chain is not allowed if action has reformat or decap");
3523
+ return -EOPNOTSUPP;
3524
+ }
3525
+
3526
+ return 0;
3527
+}
3528
+
3529
+static int parse_tc_nic_actions(struct mlx5e_priv *priv,
3530
+ struct flow_action *flow_action,
3531
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
3532
+ struct mlx5e_tc_flow *flow,
3533
+ struct netlink_ext_ack *extack)
3534
+{
3535
+ struct mlx5_flow_attr *attr = flow->attr;
3536
+ struct pedit_headers_action hdrs[2] = {};
3537
+ const struct flow_action_entry *act;
3538
+ struct mlx5_nic_flow_attr *nic_attr;
20653539 u32 action = 0;
20663540 int err, i;
20673541
2068
- if (!tcf_exts_has_actions(exts))
3542
+ if (!flow_action_has_entries(flow_action))
20693543 return -EINVAL;
20703544
2071
- attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
3545
+ if (!flow_action_hw_stats_check(flow_action, extack,
3546
+ FLOW_ACTION_HW_STATS_DELAYED_BIT))
3547
+ return -EOPNOTSUPP;
20723548
2073
- tcf_exts_for_each_action(i, a, exts) {
2074
- if (is_tcf_gact_shot(a)) {
3549
+ nic_attr = attr->nic_attr;
3550
+
3551
+ nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
3552
+
3553
+ flow_action_for_each(i, act, flow_action) {
3554
+ switch (act->id) {
3555
+ case FLOW_ACTION_ACCEPT:
3556
+ action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
3557
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
3558
+ break;
3559
+ case FLOW_ACTION_DROP:
20753560 action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
20763561 if (MLX5_CAP_FLOWTABLE(priv->mdev,
20773562 flow_table_properties_nic_receive.flow_counter))
20783563 action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
2079
- continue;
2080
- }
2081
-
2082
- if (is_tcf_pedit(a)) {
2083
- err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_KERNEL,
2084
- parse_attr);
3564
+ break;
3565
+ case FLOW_ACTION_MANGLE:
3566
+ case FLOW_ACTION_ADD:
3567
+ err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_KERNEL,
3568
+ parse_attr, hdrs, NULL, extack);
20853569 if (err)
20863570 return err;
20873571
2088
- action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
2089
- MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
2090
- continue;
2091
- }
3572
+ action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3573
+ break;
3574
+ case FLOW_ACTION_VLAN_MANGLE:
3575
+ err = add_vlan_rewrite_action(priv,
3576
+ MLX5_FLOW_NAMESPACE_KERNEL,
3577
+ act, parse_attr, hdrs,
3578
+ &action, extack);
3579
+ if (err)
3580
+ return err;
20923581
2093
- if (is_tcf_csum(a)) {
3582
+ break;
3583
+ case FLOW_ACTION_CSUM:
20943584 if (csum_offload_supported(priv, action,
2095
- tcf_csum_update_flags(a)))
2096
- continue;
3585
+ act->csum_flags,
3586
+ extack))
3587
+ break;
20973588
20983589 return -EOPNOTSUPP;
2099
- }
2100
-
2101
- if (is_tcf_mirred_egress_redirect(a)) {
2102
- struct net_device *peer_dev = tcf_mirred_dev(a);
3590
+ case FLOW_ACTION_REDIRECT: {
3591
+ struct net_device *peer_dev = act->dev;
21033592
21043593 if (priv->netdev->netdev_ops == peer_dev->netdev_ops &&
21053594 same_hw_devs(priv, netdev_priv(peer_dev))) {
2106
- parse_attr->mirred_ifindex = peer_dev->ifindex;
2107
- flow->flags |= MLX5E_TC_FLOW_HAIRPIN;
3595
+ parse_attr->mirred_ifindex[0] = peer_dev->ifindex;
3596
+ flow_flag_set(flow, HAIRPIN);
21083597 action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
21093598 MLX5_FLOW_CONTEXT_ACTION_COUNT;
21103599 } else {
3600
+ NL_SET_ERR_MSG_MOD(extack,
3601
+ "device is not on same HW, can't offload");
21113602 netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n",
21123603 peer_dev->name);
21133604 return -EINVAL;
21143605 }
2115
- continue;
2116
- }
2117
-
2118
- if (is_tcf_skbedit_mark(a)) {
2119
- u32 mark = tcf_skbedit_mark(a);
3606
+ }
3607
+ break;
3608
+ case FLOW_ACTION_MARK: {
3609
+ u32 mark = act->mark;
21203610
21213611 if (mark & ~MLX5E_TC_FLOW_ID_MASK) {
2122
- netdev_warn(priv->netdev, "Bad flow mark - only 16 bit is supported: 0x%x\n",
2123
- mark);
3612
+ NL_SET_ERR_MSG_MOD(extack,
3613
+ "Bad flow mark - only 16 bit is supported");
21243614 return -EINVAL;
21253615 }
21263616
2127
- attr->flow_tag = mark;
3617
+ nic_attr->flow_tag = mark;
21283618 action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
2129
- continue;
2130
- }
3619
+ }
3620
+ break;
3621
+ case FLOW_ACTION_GOTO:
3622
+ err = validate_goto_chain(priv, flow, act, action,
3623
+ extack);
3624
+ if (err)
3625
+ return err;
21313626
2132
- return -EINVAL;
3627
+ action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
3628
+ attr->dest_chain = act->chain_index;
3629
+ break;
3630
+ case FLOW_ACTION_CT:
3631
+ err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, act, extack);
3632
+ if (err)
3633
+ return err;
3634
+
3635
+ flow_flag_set(flow, CT);
3636
+ break;
3637
+ default:
3638
+ NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
3639
+ return -EOPNOTSUPP;
3640
+ }
3641
+ }
3642
+
3643
+ if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits ||
3644
+ hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) {
3645
+ err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_KERNEL,
3646
+ parse_attr, hdrs, &action, extack);
3647
+ if (err)
3648
+ return err;
3649
+ /* in case all pedit actions are skipped, remove the MOD_HDR
3650
+ * flag.
3651
+ */
3652
+ if (parse_attr->mod_hdr_acts.num_actions == 0) {
3653
+ action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3654
+ dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
3655
+ }
21333656 }
21343657
21353658 attr->action = action;
2136
- if (!actions_match_supported(priv, exts, parse_attr, flow))
3659
+
3660
+ if (attr->dest_chain) {
3661
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
3662
+ NL_SET_ERR_MSG(extack, "Mirroring goto chain rules isn't supported");
3663
+ return -EOPNOTSUPP;
3664
+ }
3665
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3666
+ }
3667
+
3668
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
3669
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3670
+
3671
+ if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack))
21373672 return -EOPNOTSUPP;
21383673
21393674 return 0;
21403675 }
21413676
2142
-static inline int cmp_encap_info(struct ip_tunnel_key *a,
2143
- struct ip_tunnel_key *b)
3677
+struct encap_key {
3678
+ const struct ip_tunnel_key *ip_tun_key;
3679
+ struct mlx5e_tc_tunnel *tc_tunnel;
3680
+};
3681
+
3682
+static inline int cmp_encap_info(struct encap_key *a,
3683
+ struct encap_key *b)
21443684 {
2145
- return memcmp(a, b, sizeof(*a));
3685
+ return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) ||
3686
+ a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type;
21463687 }
21473688
2148
-static inline int hash_encap_info(struct ip_tunnel_key *key)
3689
+static inline int cmp_decap_info(struct mlx5e_decap_key *a,
3690
+ struct mlx5e_decap_key *b)
21493691 {
2150
- return jhash(key, sizeof(*key), 0);
3692
+ return memcmp(&a->key, &b->key, sizeof(b->key));
21513693 }
21523694
2153
-static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
2154
- struct net_device *mirred_dev,
2155
- struct net_device **out_dev,
2156
- struct flowi4 *fl4,
2157
- struct neighbour **out_n,
2158
- u8 *out_ttl)
3695
+static inline int hash_encap_info(struct encap_key *key)
21593696 {
2160
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2161
- struct mlx5e_rep_priv *uplink_rpriv;
2162
- struct rtable *rt;
2163
- struct neighbour *n = NULL;
2164
-
2165
-#if IS_ENABLED(CONFIG_INET)
2166
- int ret;
2167
-
2168
- rt = ip_route_output_key(dev_net(mirred_dev), fl4);
2169
- ret = PTR_ERR_OR_ZERO(rt);
2170
- if (ret)
2171
- return ret;
2172
-#else
2173
- return -EOPNOTSUPP;
2174
-#endif
2175
- uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
2176
- /* if the egress device isn't on the same HW e-switch, we use the uplink */
2177
- if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev))
2178
- *out_dev = uplink_rpriv->netdev;
2179
- else
2180
- *out_dev = rt->dst.dev;
2181
-
2182
- if (!(*out_ttl))
2183
- *out_ttl = ip4_dst_hoplimit(&rt->dst);
2184
- n = dst_neigh_lookup(&rt->dst, &fl4->daddr);
2185
- ip_rt_put(rt);
2186
- if (!n)
2187
- return -ENOMEM;
2188
-
2189
- *out_n = n;
2190
- return 0;
3697
+ return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
3698
+ key->tc_tunnel->tunnel_type);
21913699 }
21923700
2193
-static bool is_merged_eswitch_dev(struct mlx5e_priv *priv,
3701
+static inline int hash_decap_info(struct mlx5e_decap_key *key)
3702
+{
3703
+ return jhash(&key->key, sizeof(key->key), 0);
3704
+}
3705
+
3706
+static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv,
21943707 struct net_device *peer_netdev)
21953708 {
21963709 struct mlx5e_priv *peer_priv;
....@@ -2198,423 +3711,284 @@
21983711 peer_priv = netdev_priv(peer_netdev);
21993712
22003713 return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) &&
2201
- (priv->netdev->netdev_ops == peer_netdev->netdev_ops) &&
2202
- same_hw_devs(priv, peer_priv) &&
2203
- MLX5_VPORT_MANAGER(peer_priv->mdev) &&
2204
- (peer_priv->mdev->priv.eswitch->mode == SRIOV_OFFLOADS));
3714
+ mlx5e_eswitch_vf_rep(priv->netdev) &&
3715
+ mlx5e_eswitch_vf_rep(peer_netdev) &&
3716
+ same_hw_devs(priv, peer_priv));
22053717 }
22063718
2207
-static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
2208
- struct net_device *mirred_dev,
2209
- struct net_device **out_dev,
2210
- struct flowi6 *fl6,
2211
- struct neighbour **out_n,
2212
- u8 *out_ttl)
3719
+bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
22133720 {
2214
- struct neighbour *n = NULL;
2215
- struct dst_entry *dst;
2216
-
2217
-#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
2218
- struct mlx5e_rep_priv *uplink_rpriv;
2219
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2220
-
2221
- dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(mirred_dev), NULL, fl6,
2222
- NULL);
2223
- if (IS_ERR(dst))
2224
- return PTR_ERR(dst);
2225
-
2226
- if (!(*out_ttl))
2227
- *out_ttl = ip6_dst_hoplimit(dst);
2228
-
2229
- uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
2230
- /* if the egress device isn't on the same HW e-switch, we use the uplink */
2231
- if (!switchdev_port_same_parent_id(priv->netdev, dst->dev))
2232
- *out_dev = uplink_rpriv->netdev;
2233
- else
2234
- *out_dev = dst->dev;
2235
-#else
2236
- return -EOPNOTSUPP;
2237
-#endif
2238
-
2239
- n = dst_neigh_lookup(dst, &fl6->daddr);
2240
- dst_release(dst);
2241
- if (!n)
2242
- return -ENOMEM;
2243
-
2244
- *out_n = n;
2245
- return 0;
3721
+ return refcount_inc_not_zero(&e->refcnt);
22463722 }
22473723
2248
-static void gen_vxlan_header_ipv4(struct net_device *out_dev,
2249
- char buf[], int encap_size,
2250
- unsigned char h_dest[ETH_ALEN],
2251
- u8 tos, u8 ttl,
2252
- __be32 daddr,
2253
- __be32 saddr,
2254
- __be16 udp_dst_port,
2255
- __be32 vx_vni)
3724
+static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
22563725 {
2257
- struct ethhdr *eth = (struct ethhdr *)buf;
2258
- struct iphdr *ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr));
2259
- struct udphdr *udp = (struct udphdr *)((char *)ip + sizeof(struct iphdr));
2260
- struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
2261
-
2262
- memset(buf, 0, encap_size);
2263
-
2264
- ether_addr_copy(eth->h_dest, h_dest);
2265
- ether_addr_copy(eth->h_source, out_dev->dev_addr);
2266
- eth->h_proto = htons(ETH_P_IP);
2267
-
2268
- ip->daddr = daddr;
2269
- ip->saddr = saddr;
2270
-
2271
- ip->tos = tos;
2272
- ip->ttl = ttl;
2273
- ip->protocol = IPPROTO_UDP;
2274
- ip->version = 0x4;
2275
- ip->ihl = 0x5;
2276
-
2277
- udp->dest = udp_dst_port;
2278
- vxh->vx_flags = VXLAN_HF_VNI;
2279
- vxh->vx_vni = vxlan_vni_field(vx_vni);
3726
+ return refcount_inc_not_zero(&e->refcnt);
22803727 }
22813728
2282
-static void gen_vxlan_header_ipv6(struct net_device *out_dev,
2283
- char buf[], int encap_size,
2284
- unsigned char h_dest[ETH_ALEN],
2285
- u8 tos, u8 ttl,
2286
- struct in6_addr *daddr,
2287
- struct in6_addr *saddr,
2288
- __be16 udp_dst_port,
2289
- __be32 vx_vni)
2290
-{
2291
- struct ethhdr *eth = (struct ethhdr *)buf;
2292
- struct ipv6hdr *ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr));
2293
- struct udphdr *udp = (struct udphdr *)((char *)ip6h + sizeof(struct ipv6hdr));
2294
- struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
2295
-
2296
- memset(buf, 0, encap_size);
2297
-
2298
- ether_addr_copy(eth->h_dest, h_dest);
2299
- ether_addr_copy(eth->h_source, out_dev->dev_addr);
2300
- eth->h_proto = htons(ETH_P_IPV6);
2301
-
2302
- ip6_flow_hdr(ip6h, tos, 0);
2303
- /* the HW fills up ipv6 payload len */
2304
- ip6h->nexthdr = IPPROTO_UDP;
2305
- ip6h->hop_limit = ttl;
2306
- ip6h->daddr = *daddr;
2307
- ip6h->saddr = *saddr;
2308
-
2309
- udp->dest = udp_dst_port;
2310
- vxh->vx_flags = VXLAN_HF_VNI;
2311
- vxh->vx_vni = vxlan_vni_field(vx_vni);
2312
-}
2313
-
2314
-static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
2315
- struct net_device *mirred_dev,
2316
- struct mlx5e_encap_entry *e)
2317
-{
2318
- int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
2319
- int ipv4_encap_size = ETH_HLEN + sizeof(struct iphdr) + VXLAN_HLEN;
2320
- struct ip_tunnel_key *tun_key = &e->tun_info.key;
2321
- struct net_device *out_dev;
2322
- struct neighbour *n = NULL;
2323
- struct flowi4 fl4 = {};
2324
- u8 nud_state, tos, ttl;
2325
- char *encap_header;
2326
- int err;
2327
-
2328
- if (max_encap_size < ipv4_encap_size) {
2329
- mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
2330
- ipv4_encap_size, max_encap_size);
2331
- return -EOPNOTSUPP;
2332
- }
2333
-
2334
- encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL);
2335
- if (!encap_header)
2336
- return -ENOMEM;
2337
-
2338
- switch (e->tunnel_type) {
2339
- case MLX5_HEADER_TYPE_VXLAN:
2340
- fl4.flowi4_proto = IPPROTO_UDP;
2341
- fl4.fl4_dport = tun_key->tp_dst;
2342
- break;
2343
- default:
2344
- err = -EOPNOTSUPP;
2345
- goto free_encap;
2346
- }
2347
-
2348
- tos = tun_key->tos;
2349
- ttl = tun_key->ttl;
2350
-
2351
- fl4.flowi4_tos = tun_key->tos;
2352
- fl4.daddr = tun_key->u.ipv4.dst;
2353
- fl4.saddr = tun_key->u.ipv4.src;
2354
-
2355
- err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev,
2356
- &fl4, &n, &ttl);
2357
- if (err)
2358
- goto free_encap;
2359
-
2360
- /* used by mlx5e_detach_encap to lookup a neigh hash table
2361
- * entry in the neigh hash table when a user deletes a rule
2362
- */
2363
- e->m_neigh.dev = n->dev;
2364
- e->m_neigh.family = n->ops->family;
2365
- memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
2366
- e->out_dev = out_dev;
2367
-
2368
- /* It's importent to add the neigh to the hash table before checking
2369
- * the neigh validity state. So if we'll get a notification, in case the
2370
- * neigh changes it's validity state, we would find the relevant neigh
2371
- * in the hash.
2372
- */
2373
- err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
2374
- if (err)
2375
- goto free_encap;
2376
-
2377
- read_lock_bh(&n->lock);
2378
- nud_state = n->nud_state;
2379
- ether_addr_copy(e->h_dest, n->ha);
2380
- read_unlock_bh(&n->lock);
2381
-
2382
- switch (e->tunnel_type) {
2383
- case MLX5_HEADER_TYPE_VXLAN:
2384
- gen_vxlan_header_ipv4(out_dev, encap_header,
2385
- ipv4_encap_size, e->h_dest, tos, ttl,
2386
- fl4.daddr,
2387
- fl4.saddr, tun_key->tp_dst,
2388
- tunnel_id_to_key32(tun_key->tun_id));
2389
- break;
2390
- default:
2391
- err = -EOPNOTSUPP;
2392
- goto destroy_neigh_entry;
2393
- }
2394
- e->encap_size = ipv4_encap_size;
2395
- e->encap_header = encap_header;
2396
-
2397
- if (!(nud_state & NUD_VALID)) {
2398
- neigh_event_send(n, NULL);
2399
- err = -EAGAIN;
2400
- goto out;
2401
- }
2402
-
2403
- err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
2404
- ipv4_encap_size, encap_header, &e->encap_id);
2405
- if (err)
2406
- goto destroy_neigh_entry;
2407
-
2408
- e->flags |= MLX5_ENCAP_ENTRY_VALID;
2409
- mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
2410
- neigh_release(n);
2411
- return err;
2412
-
2413
-destroy_neigh_entry:
2414
- mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
2415
-free_encap:
2416
- kfree(encap_header);
2417
-out:
2418
- if (n)
2419
- neigh_release(n);
2420
- return err;
2421
-}
2422
-
2423
-static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
2424
- struct net_device *mirred_dev,
2425
- struct mlx5e_encap_entry *e)
2426
-{
2427
- int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
2428
- int ipv6_encap_size = ETH_HLEN + sizeof(struct ipv6hdr) + VXLAN_HLEN;
2429
- struct ip_tunnel_key *tun_key = &e->tun_info.key;
2430
- struct net_device *out_dev = NULL;
2431
- struct neighbour *n = NULL;
2432
- struct flowi6 fl6 = {};
2433
- u8 nud_state, tos, ttl;
2434
- char *encap_header;
2435
- int err;
2436
-
2437
- if (max_encap_size < ipv6_encap_size) {
2438
- mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
2439
- ipv6_encap_size, max_encap_size);
2440
- return -EOPNOTSUPP;
2441
- }
2442
-
2443
- encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL);
2444
- if (!encap_header)
2445
- return -ENOMEM;
2446
-
2447
- switch (e->tunnel_type) {
2448
- case MLX5_HEADER_TYPE_VXLAN:
2449
- fl6.flowi6_proto = IPPROTO_UDP;
2450
- fl6.fl6_dport = tun_key->tp_dst;
2451
- break;
2452
- default:
2453
- err = -EOPNOTSUPP;
2454
- goto free_encap;
2455
- }
2456
-
2457
- tos = tun_key->tos;
2458
- ttl = tun_key->ttl;
2459
-
2460
- fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
2461
- fl6.daddr = tun_key->u.ipv6.dst;
2462
- fl6.saddr = tun_key->u.ipv6.src;
2463
-
2464
- err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev,
2465
- &fl6, &n, &ttl);
2466
- if (err)
2467
- goto free_encap;
2468
-
2469
- /* used by mlx5e_detach_encap to lookup a neigh hash table
2470
- * entry in the neigh hash table when a user deletes a rule
2471
- */
2472
- e->m_neigh.dev = n->dev;
2473
- e->m_neigh.family = n->ops->family;
2474
- memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
2475
- e->out_dev = out_dev;
2476
-
2477
- /* It's importent to add the neigh to the hash table before checking
2478
- * the neigh validity state. So if we'll get a notification, in case the
2479
- * neigh changes it's validity state, we would find the relevant neigh
2480
- * in the hash.
2481
- */
2482
- err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
2483
- if (err)
2484
- goto free_encap;
2485
-
2486
- read_lock_bh(&n->lock);
2487
- nud_state = n->nud_state;
2488
- ether_addr_copy(e->h_dest, n->ha);
2489
- read_unlock_bh(&n->lock);
2490
-
2491
- switch (e->tunnel_type) {
2492
- case MLX5_HEADER_TYPE_VXLAN:
2493
- gen_vxlan_header_ipv6(out_dev, encap_header,
2494
- ipv6_encap_size, e->h_dest, tos, ttl,
2495
- &fl6.daddr,
2496
- &fl6.saddr, tun_key->tp_dst,
2497
- tunnel_id_to_key32(tun_key->tun_id));
2498
- break;
2499
- default:
2500
- err = -EOPNOTSUPP;
2501
- goto destroy_neigh_entry;
2502
- }
2503
-
2504
- e->encap_size = ipv6_encap_size;
2505
- e->encap_header = encap_header;
2506
-
2507
- if (!(nud_state & NUD_VALID)) {
2508
- neigh_event_send(n, NULL);
2509
- err = -EAGAIN;
2510
- goto out;
2511
- }
2512
-
2513
- err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
2514
- ipv6_encap_size, encap_header, &e->encap_id);
2515
- if (err)
2516
- goto destroy_neigh_entry;
2517
-
2518
- e->flags |= MLX5_ENCAP_ENTRY_VALID;
2519
- mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
2520
- neigh_release(n);
2521
- return err;
2522
-
2523
-destroy_neigh_entry:
2524
- mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
2525
-free_encap:
2526
- kfree(encap_header);
2527
-out:
2528
- if (n)
2529
- neigh_release(n);
2530
- return err;
2531
-}
2532
-
2533
-static int mlx5e_attach_encap(struct mlx5e_priv *priv,
2534
- struct ip_tunnel_info *tun_info,
2535
- struct net_device *mirred_dev,
2536
- struct net_device **encap_dev,
2537
- struct mlx5e_tc_flow *flow)
3729
+static struct mlx5e_encap_entry *
3730
+mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key,
3731
+ uintptr_t hash_key)
25383732 {
25393733 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2540
- unsigned short family = ip_tunnel_info_af(tun_info);
2541
- struct mlx5_esw_flow_attr *attr = flow->esw_attr;
2542
- struct ip_tunnel_key *key = &tun_info->key;
25433734 struct mlx5e_encap_entry *e;
2544
- int tunnel_type, err = 0;
2545
- uintptr_t hash_key;
2546
- bool found = false;
2547
-
2548
- /* udp dst port must be set */
2549
- if (!memchr_inv(&key->tp_dst, 0, sizeof(key->tp_dst)))
2550
- goto vxlan_encap_offload_err;
2551
-
2552
- /* setting udp src port isn't supported */
2553
- if (memchr_inv(&key->tp_src, 0, sizeof(key->tp_src))) {
2554
-vxlan_encap_offload_err:
2555
- netdev_warn(priv->netdev,
2556
- "must set udp dst port and not set udp src port\n");
2557
- return -EOPNOTSUPP;
2558
- }
2559
-
2560
- if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->tp_dst)) &&
2561
- MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) {
2562
- tunnel_type = MLX5_HEADER_TYPE_VXLAN;
2563
- } else {
2564
- netdev_warn(priv->netdev,
2565
- "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->tp_dst));
2566
- return -EOPNOTSUPP;
2567
- }
2568
-
2569
- hash_key = hash_encap_info(key);
3735
+ struct encap_key e_key;
25703736
25713737 hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
25723738 encap_hlist, hash_key) {
2573
- if (!cmp_encap_info(&e->tun_info.key, key)) {
2574
- found = true;
2575
- break;
2576
- }
3739
+ e_key.ip_tun_key = &e->tun_info->key;
3740
+ e_key.tc_tunnel = e->tunnel;
3741
+ if (!cmp_encap_info(&e_key, key) &&
3742
+ mlx5e_encap_take(e))
3743
+ return e;
25773744 }
25783745
3746
+ return NULL;
3747
+}
3748
+
3749
+static struct mlx5e_decap_entry *
3750
+mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
3751
+ uintptr_t hash_key)
3752
+{
3753
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3754
+ struct mlx5e_decap_key r_key;
3755
+ struct mlx5e_decap_entry *e;
3756
+
3757
+ hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
3758
+ hlist, hash_key) {
3759
+ r_key = e->key;
3760
+ if (!cmp_decap_info(&r_key, key) &&
3761
+ mlx5e_decap_take(e))
3762
+ return e;
3763
+ }
3764
+ return NULL;
3765
+}
3766
+
3767
+static struct ip_tunnel_info *dup_tun_info(const struct ip_tunnel_info *tun_info)
3768
+{
3769
+ size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
3770
+
3771
+ return kmemdup(tun_info, tun_size, GFP_KERNEL);
3772
+}
3773
+
3774
+static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
3775
+ struct mlx5e_tc_flow *flow,
3776
+ int out_index,
3777
+ struct mlx5e_encap_entry *e,
3778
+ struct netlink_ext_ack *extack)
3779
+{
3780
+ int i;
3781
+
3782
+ for (i = 0; i < out_index; i++) {
3783
+ if (flow->encaps[i].e != e)
3784
+ continue;
3785
+ NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
3786
+ netdev_err(priv->netdev, "can't duplicate encap action\n");
3787
+ return true;
3788
+ }
3789
+
3790
+ return false;
3791
+}
3792
+
3793
+static int mlx5e_attach_encap(struct mlx5e_priv *priv,
3794
+ struct mlx5e_tc_flow *flow,
3795
+ struct net_device *mirred_dev,
3796
+ int out_index,
3797
+ struct netlink_ext_ack *extack,
3798
+ struct net_device **encap_dev,
3799
+ bool *encap_valid)
3800
+{
3801
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3802
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
3803
+ struct mlx5_flow_attr *attr = flow->attr;
3804
+ const struct ip_tunnel_info *tun_info;
3805
+ struct encap_key key;
3806
+ struct mlx5e_encap_entry *e;
3807
+ unsigned short family;
3808
+ uintptr_t hash_key;
3809
+ int err = 0;
3810
+
3811
+ parse_attr = attr->parse_attr;
3812
+ tun_info = parse_attr->tun_info[out_index];
3813
+ family = ip_tunnel_info_af(tun_info);
3814
+ key.ip_tun_key = &tun_info->key;
3815
+ key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
3816
+ if (!key.tc_tunnel) {
3817
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
3818
+ return -EOPNOTSUPP;
3819
+ }
3820
+
3821
+ hash_key = hash_encap_info(&key);
3822
+
3823
+ mutex_lock(&esw->offloads.encap_tbl_lock);
3824
+ e = mlx5e_encap_get(priv, &key, hash_key);
3825
+
25793826 /* must verify if encap is valid or not */
2580
- if (found)
3827
+ if (e) {
3828
+ /* Check that entry was not already attached to this flow */
3829
+ if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
3830
+ err = -EOPNOTSUPP;
3831
+ goto out_err;
3832
+ }
3833
+
3834
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
3835
+ wait_for_completion(&e->res_ready);
3836
+
3837
+ /* Protect against concurrent neigh update. */
3838
+ mutex_lock(&esw->offloads.encap_tbl_lock);
3839
+ if (e->compl_result < 0) {
3840
+ err = -EREMOTEIO;
3841
+ goto out_err;
3842
+ }
25813843 goto attach_flow;
3844
+ }
25823845
25833846 e = kzalloc(sizeof(*e), GFP_KERNEL);
2584
- if (!e)
2585
- return -ENOMEM;
3847
+ if (!e) {
3848
+ err = -ENOMEM;
3849
+ goto out_err;
3850
+ }
25863851
2587
- e->tun_info = *tun_info;
2588
- e->tunnel_type = tunnel_type;
3852
+ refcount_set(&e->refcnt, 1);
3853
+ init_completion(&e->res_ready);
3854
+
3855
+ tun_info = dup_tun_info(tun_info);
3856
+ if (!tun_info) {
3857
+ err = -ENOMEM;
3858
+ goto out_err_init;
3859
+ }
3860
+ e->tun_info = tun_info;
3861
+ err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
3862
+ if (err)
3863
+ goto out_err_init;
3864
+
25893865 INIT_LIST_HEAD(&e->flows);
3866
+ hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
3867
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
25903868
25913869 if (family == AF_INET)
2592
- err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e);
3870
+ err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
25933871 else if (family == AF_INET6)
2594
- err = mlx5e_create_encap_header_ipv6(priv, mirred_dev, e);
3872
+ err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
25953873
2596
- if (err && err != -EAGAIN)
3874
+ /* Protect against concurrent neigh update. */
3875
+ mutex_lock(&esw->offloads.encap_tbl_lock);
3876
+ complete_all(&e->res_ready);
3877
+ if (err) {
3878
+ e->compl_result = err;
25973879 goto out_err;
2598
-
2599
- hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
3880
+ }
3881
+ e->compl_result = 1;
26003882
26013883 attach_flow:
2602
- list_add(&flow->encap, &e->flows);
3884
+ flow->encaps[out_index].e = e;
3885
+ list_add(&flow->encaps[out_index].list, &e->flows);
3886
+ flow->encaps[out_index].index = out_index;
26033887 *encap_dev = e->out_dev;
2604
- if (e->flags & MLX5_ENCAP_ENTRY_VALID)
2605
- attr->encap_id = e->encap_id;
2606
- else
2607
- err = -EAGAIN;
3888
+ if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
3889
+ attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
3890
+ attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
3891
+ *encap_valid = true;
3892
+ } else {
3893
+ *encap_valid = false;
3894
+ }
3895
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
26083896
26093897 return err;
26103898
26113899 out_err:
3900
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
3901
+ if (e)
3902
+ mlx5e_encap_put(priv, e);
3903
+ return err;
3904
+
3905
+out_err_init:
3906
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
3907
+ kfree(tun_info);
26123908 kfree(e);
26133909 return err;
26143910 }
26153911
3912
+static int mlx5e_attach_decap(struct mlx5e_priv *priv,
3913
+ struct mlx5e_tc_flow *flow,
3914
+ struct netlink_ext_ack *extack)
3915
+{
3916
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3917
+ struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
3918
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
3919
+ struct mlx5e_decap_entry *d;
3920
+ struct mlx5e_decap_key key;
3921
+ uintptr_t hash_key;
3922
+ int err = 0;
3923
+
3924
+ parse_attr = flow->attr->parse_attr;
3925
+ if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
3926
+ NL_SET_ERR_MSG_MOD(extack,
3927
+ "encap header larger than max supported");
3928
+ return -EOPNOTSUPP;
3929
+ }
3930
+
3931
+ key.key = parse_attr->eth;
3932
+ hash_key = hash_decap_info(&key);
3933
+ mutex_lock(&esw->offloads.decap_tbl_lock);
3934
+ d = mlx5e_decap_get(priv, &key, hash_key);
3935
+ if (d) {
3936
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
3937
+ wait_for_completion(&d->res_ready);
3938
+ mutex_lock(&esw->offloads.decap_tbl_lock);
3939
+ if (d->compl_result) {
3940
+ err = -EREMOTEIO;
3941
+ goto out_free;
3942
+ }
3943
+ goto found;
3944
+ }
3945
+
3946
+ d = kzalloc(sizeof(*d), GFP_KERNEL);
3947
+ if (!d) {
3948
+ err = -ENOMEM;
3949
+ goto out_err;
3950
+ }
3951
+
3952
+ d->key = key;
3953
+ refcount_set(&d->refcnt, 1);
3954
+ init_completion(&d->res_ready);
3955
+ INIT_LIST_HEAD(&d->flows);
3956
+ hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
3957
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
3958
+
3959
+ d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
3960
+ MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2,
3961
+ sizeof(parse_attr->eth),
3962
+ &parse_attr->eth,
3963
+ MLX5_FLOW_NAMESPACE_FDB);
3964
+ if (IS_ERR(d->pkt_reformat)) {
3965
+ err = PTR_ERR(d->pkt_reformat);
3966
+ d->compl_result = err;
3967
+ }
3968
+ mutex_lock(&esw->offloads.decap_tbl_lock);
3969
+ complete_all(&d->res_ready);
3970
+ if (err)
3971
+ goto out_free;
3972
+
3973
+found:
3974
+ flow->decap_reformat = d;
3975
+ attr->decap_pkt_reformat = d->pkt_reformat;
3976
+ list_add(&flow->l3_to_l2_reformat, &d->flows);
3977
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
3978
+ return 0;
3979
+
3980
+out_free:
3981
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
3982
+ mlx5e_decap_put(priv, d);
3983
+ return err;
3984
+
3985
+out_err:
3986
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
3987
+ return err;
3988
+}
3989
+
26163990 static int parse_tc_vlan_action(struct mlx5e_priv *priv,
2617
- const struct tc_action *a,
3991
+ const struct flow_action_entry *act,
26183992 struct mlx5_esw_flow_attr *attr,
26193993 u32 *action)
26203994 {
....@@ -2623,7 +3997,8 @@
26233997 if (vlan_idx >= MLX5_FS_VLAN_DEPTH)
26243998 return -EOPNOTSUPP;
26253999
2626
- if (tcf_vlan_action(a) == TCA_VLAN_ACT_POP) {
4000
+ switch (act->id) {
4001
+ case FLOW_ACTION_VLAN_POP:
26274002 if (vlan_idx) {
26284003 if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
26294004 MLX5_FS_VLAN_DEPTH))
....@@ -2633,10 +4008,11 @@
26334008 } else {
26344009 *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
26354010 }
2636
- } else if (tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) {
2637
- attr->vlan_vid[vlan_idx] = tcf_vlan_push_vid(a);
2638
- attr->vlan_prio[vlan_idx] = tcf_vlan_push_prio(a);
2639
- attr->vlan_proto[vlan_idx] = tcf_vlan_push_proto(a);
4011
+ break;
4012
+ case FLOW_ACTION_VLAN_PUSH:
4013
+ attr->vlan_vid[vlan_idx] = act->vlan.vid;
4014
+ attr->vlan_prio[vlan_idx] = act->vlan.prio;
4015
+ attr->vlan_proto[vlan_idx] = act->vlan.proto;
26404016 if (!attr->vlan_proto[vlan_idx])
26414017 attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q);
26424018
....@@ -2648,14 +4024,15 @@
26484024 *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
26494025 } else {
26504026 if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) &&
2651
- (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q) ||
2652
- tcf_vlan_push_prio(a)))
4027
+ (act->vlan.proto != htons(ETH_P_8021Q) ||
4028
+ act->vlan.prio))
26534029 return -EOPNOTSUPP;
26544030
26554031 *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
26564032 }
2657
- } else { /* action is TCA_VLAN_ACT_MODIFY */
2658
- return -EOPNOTSUPP;
4033
+ break;
4034
+ default:
4035
+ return -EINVAL;
26594036 }
26604037
26614038 attr->total_vlan = vlan_idx + 1;
....@@ -2663,121 +4040,516 @@
26634040 return 0;
26644041 }
26654042
2666
-static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
2667
- struct mlx5e_tc_flow_parse_attr *parse_attr,
2668
- struct mlx5e_tc_flow *flow)
4043
+static struct net_device *get_fdb_out_dev(struct net_device *uplink_dev,
4044
+ struct net_device *out_dev)
26694045 {
2670
- struct mlx5_esw_flow_attr *attr = flow->esw_attr;
2671
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
2672
- struct ip_tunnel_info *info = NULL;
2673
- const struct tc_action *a;
2674
- LIST_HEAD(actions);
2675
- bool encap = false;
2676
- u32 action = 0;
2677
- int err, i;
4046
+ struct net_device *fdb_out_dev = out_dev;
4047
+ struct net_device *uplink_upper;
26784048
2679
- if (!tcf_exts_has_actions(exts))
4049
+ rcu_read_lock();
4050
+ uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev);
4051
+ if (uplink_upper && netif_is_lag_master(uplink_upper) &&
4052
+ uplink_upper == out_dev) {
4053
+ fdb_out_dev = uplink_dev;
4054
+ } else if (netif_is_lag_master(out_dev)) {
4055
+ fdb_out_dev = bond_option_active_slave_get_rcu(netdev_priv(out_dev));
4056
+ if (fdb_out_dev &&
4057
+ (!mlx5e_eswitch_rep(fdb_out_dev) ||
4058
+ !netdev_port_same_parent_id(fdb_out_dev, uplink_dev)))
4059
+ fdb_out_dev = NULL;
4060
+ }
4061
+ rcu_read_unlock();
4062
+ return fdb_out_dev;
4063
+}
4064
+
4065
+static int add_vlan_push_action(struct mlx5e_priv *priv,
4066
+ struct mlx5_flow_attr *attr,
4067
+ struct net_device **out_dev,
4068
+ u32 *action)
4069
+{
4070
+ struct net_device *vlan_dev = *out_dev;
4071
+ struct flow_action_entry vlan_act = {
4072
+ .id = FLOW_ACTION_VLAN_PUSH,
4073
+ .vlan.vid = vlan_dev_vlan_id(vlan_dev),
4074
+ .vlan.proto = vlan_dev_vlan_proto(vlan_dev),
4075
+ .vlan.prio = 0,
4076
+ };
4077
+ int err;
4078
+
4079
+ err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action);
4080
+ if (err)
4081
+ return err;
4082
+
4083
+ rcu_read_lock();
4084
+ *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev), dev_get_iflink(vlan_dev));
4085
+ rcu_read_unlock();
4086
+ if (!*out_dev)
4087
+ return -ENODEV;
4088
+
4089
+ if (is_vlan_dev(*out_dev))
4090
+ err = add_vlan_push_action(priv, attr, out_dev, action);
4091
+
4092
+ return err;
4093
+}
4094
+
4095
+static int add_vlan_pop_action(struct mlx5e_priv *priv,
4096
+ struct mlx5_flow_attr *attr,
4097
+ u32 *action)
4098
+{
4099
+ struct flow_action_entry vlan_act = {
4100
+ .id = FLOW_ACTION_VLAN_POP,
4101
+ };
4102
+ int nest_level, err = 0;
4103
+
4104
+ nest_level = attr->parse_attr->filter_dev->lower_level -
4105
+ priv->netdev->lower_level;
4106
+ while (nest_level--) {
4107
+ err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action);
4108
+ if (err)
4109
+ return err;
4110
+ }
4111
+
4112
+ return err;
4113
+}
4114
+
4115
+static bool same_hw_reps(struct mlx5e_priv *priv,
4116
+ struct net_device *peer_netdev)
4117
+{
4118
+ struct mlx5e_priv *peer_priv;
4119
+
4120
+ peer_priv = netdev_priv(peer_netdev);
4121
+
4122
+ return mlx5e_eswitch_rep(priv->netdev) &&
4123
+ mlx5e_eswitch_rep(peer_netdev) &&
4124
+ same_hw_devs(priv, peer_priv);
4125
+}
4126
+
4127
+static bool is_lag_dev(struct mlx5e_priv *priv,
4128
+ struct net_device *peer_netdev)
4129
+{
4130
+ return ((mlx5_lag_is_sriov(priv->mdev) ||
4131
+ mlx5_lag_is_multipath(priv->mdev)) &&
4132
+ same_hw_reps(priv, peer_netdev));
4133
+}
4134
+
4135
+bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
4136
+ struct net_device *out_dev)
4137
+{
4138
+ if (is_merged_eswitch_vfs(priv, out_dev))
4139
+ return true;
4140
+
4141
+ if (is_lag_dev(priv, out_dev))
4142
+ return true;
4143
+
4144
+ return mlx5e_eswitch_rep(out_dev) &&
4145
+ same_port_devs(priv, netdev_priv(out_dev));
4146
+}
4147
+
4148
+static bool is_duplicated_output_device(struct net_device *dev,
4149
+ struct net_device *out_dev,
4150
+ int *ifindexes, int if_count,
4151
+ struct netlink_ext_ack *extack)
4152
+{
4153
+ int i;
4154
+
4155
+ for (i = 0; i < if_count; i++) {
4156
+ if (ifindexes[i] == out_dev->ifindex) {
4157
+ NL_SET_ERR_MSG_MOD(extack,
4158
+ "can't duplicate output to same device");
4159
+ netdev_err(dev, "can't duplicate output to same device: %s\n",
4160
+ out_dev->name);
4161
+ return true;
4162
+ }
4163
+ }
4164
+
4165
+ return false;
4166
+}
4167
+
4168
+static int verify_uplink_forwarding(struct mlx5e_priv *priv,
4169
+ struct mlx5e_tc_flow *flow,
4170
+ struct net_device *out_dev,
4171
+ struct netlink_ext_ack *extack)
4172
+{
4173
+ struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
4174
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4175
+ struct mlx5e_rep_priv *rep_priv;
4176
+
4177
+ /* Forwarding non encapsulated traffic between
4178
+ * uplink ports is allowed only if
4179
+ * termination_table_raw_traffic cap is set.
4180
+ *
4181
+ * Input vport was stored attr->in_rep.
4182
+ * In LAG case, *priv* is the private data of
4183
+ * uplink which may be not the input vport.
4184
+ */
4185
+ rep_priv = mlx5e_rep_to_rep_priv(attr->in_rep);
4186
+
4187
+ if (!(mlx5e_eswitch_uplink_rep(rep_priv->netdev) &&
4188
+ mlx5e_eswitch_uplink_rep(out_dev)))
4189
+ return 0;
4190
+
4191
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev,
4192
+ termination_table_raw_traffic)) {
4193
+ NL_SET_ERR_MSG_MOD(extack,
4194
+ "devices are both uplink, can't offload forwarding");
4195
+ pr_err("devices %s %s are both uplink, can't offload forwarding\n",
4196
+ priv->netdev->name, out_dev->name);
4197
+ return -EOPNOTSUPP;
4198
+ } else if (out_dev != rep_priv->netdev) {
4199
+ NL_SET_ERR_MSG_MOD(extack,
4200
+ "devices are not the same uplink, can't offload forwarding");
4201
+ pr_err("devices %s %s are both uplink but not the same, can't offload forwarding\n",
4202
+ priv->netdev->name, out_dev->name);
4203
+ return -EOPNOTSUPP;
4204
+ }
4205
+ return 0;
4206
+}
4207
+
4208
+static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
4209
+ struct flow_action *flow_action,
4210
+ struct mlx5e_tc_flow *flow,
4211
+ struct netlink_ext_ack *extack,
4212
+ struct net_device *filter_dev)
4213
+{
4214
+ struct pedit_headers_action hdrs[2] = {};
4215
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4216
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
4217
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
4218
+ const struct ip_tunnel_info *info = NULL;
4219
+ struct mlx5_flow_attr *attr = flow->attr;
4220
+ int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS];
4221
+ bool ft_flow = mlx5e_is_ft_flow(flow);
4222
+ const struct flow_action_entry *act;
4223
+ struct mlx5_esw_flow_attr *esw_attr;
4224
+ bool encap = false, decap = false;
4225
+ u32 action = attr->action;
4226
+ int err, i, if_count = 0;
4227
+ bool mpls_push = false;
4228
+
4229
+ if (!flow_action_has_entries(flow_action))
26804230 return -EINVAL;
26814231
2682
- attr->in_rep = rpriv->rep;
2683
- attr->in_mdev = priv->mdev;
4232
+ if (!flow_action_hw_stats_check(flow_action, extack,
4233
+ FLOW_ACTION_HW_STATS_DELAYED_BIT))
4234
+ return -EOPNOTSUPP;
26844235
2685
- tcf_exts_for_each_action(i, a, exts) {
2686
- if (is_tcf_gact_shot(a)) {
4236
+ esw_attr = attr->esw_attr;
4237
+ parse_attr = attr->parse_attr;
4238
+
4239
+ flow_action_for_each(i, act, flow_action) {
4240
+ switch (act->id) {
4241
+ case FLOW_ACTION_DROP:
26874242 action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
26884243 MLX5_FLOW_CONTEXT_ACTION_COUNT;
2689
- continue;
2690
- }
2691
-
2692
- if (is_tcf_pedit(a)) {
2693
- err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_FDB,
2694
- parse_attr);
2695
- if (err)
2696
- return err;
2697
-
2698
- action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
2699
- attr->mirror_count = attr->out_count;
2700
- continue;
2701
- }
2702
-
2703
- if (is_tcf_csum(a)) {
2704
- if (csum_offload_supported(priv, action,
2705
- tcf_csum_update_flags(a)))
2706
- continue;
2707
-
2708
- return -EOPNOTSUPP;
2709
- }
2710
-
2711
- if (is_tcf_mirred_egress_redirect(a) || is_tcf_mirred_egress_mirror(a)) {
2712
- struct mlx5e_priv *out_priv;
2713
- struct net_device *out_dev;
2714
-
2715
- out_dev = tcf_mirred_dev(a);
2716
-
2717
- if (attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) {
2718
- pr_err("can't support more than %d output ports, can't offload forwarding\n",
2719
- attr->out_count);
4244
+ break;
4245
+ case FLOW_ACTION_TRAP:
4246
+ if (!flow_offload_has_one_action(flow_action)) {
4247
+ NL_SET_ERR_MSG_MOD(extack,
4248
+ "action trap is supported as a sole action only");
4249
+ return -EOPNOTSUPP;
4250
+ }
4251
+ action |= (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
4252
+ MLX5_FLOW_CONTEXT_ACTION_COUNT);
4253
+ attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
4254
+ break;
4255
+ case FLOW_ACTION_MPLS_PUSH:
4256
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
4257
+ reformat_l2_to_l3_tunnel) ||
4258
+ act->mpls_push.proto != htons(ETH_P_MPLS_UC)) {
4259
+ NL_SET_ERR_MSG_MOD(extack,
4260
+ "mpls push is supported only for mpls_uc protocol");
4261
+ return -EOPNOTSUPP;
4262
+ }
4263
+ mpls_push = true;
4264
+ break;
4265
+ case FLOW_ACTION_MPLS_POP:
4266
+ /* we only support mpls pop if it is the first action
4267
+ * and the filter net device is bareudp. Subsequent
4268
+ * actions can be pedit and the last can be mirred
4269
+ * egress redirect.
4270
+ */
4271
+ if (i) {
4272
+ NL_SET_ERR_MSG_MOD(extack,
4273
+ "mpls pop supported only as first action");
4274
+ return -EOPNOTSUPP;
4275
+ }
4276
+ if (!netif_is_bareudp(filter_dev)) {
4277
+ NL_SET_ERR_MSG_MOD(extack,
4278
+ "mpls pop supported only on bareudp devices");
27204279 return -EOPNOTSUPP;
27214280 }
27224281
2723
- if (switchdev_port_same_parent_id(priv->netdev,
2724
- out_dev) ||
2725
- is_merged_eswitch_dev(priv, out_dev)) {
2726
- action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
2727
- MLX5_FLOW_CONTEXT_ACTION_COUNT;
2728
- out_priv = netdev_priv(out_dev);
2729
- rpriv = out_priv->ppriv;
2730
- attr->out_rep[attr->out_count] = rpriv->rep;
2731
- attr->out_mdev[attr->out_count++] = out_priv->mdev;
2732
- } else if (encap) {
2733
- parse_attr->mirred_ifindex = out_dev->ifindex;
2734
- parse_attr->tun_info = *info;
2735
- attr->parse_attr = parse_attr;
2736
- action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP |
2737
- MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
2738
- MLX5_FLOW_CONTEXT_ACTION_COUNT;
2739
- /* attr->out_rep is resolved when we handle encap */
2740
- } else {
2741
- pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
2742
- priv->netdev->name, out_dev->name);
4282
+ parse_attr->eth.h_proto = act->mpls_pop.proto;
4283
+ action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
4284
+ flow_flag_set(flow, L3_TO_L2_DECAP);
4285
+ break;
4286
+ case FLOW_ACTION_MANGLE:
4287
+ case FLOW_ACTION_ADD:
4288
+ err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB,
4289
+ parse_attr, hdrs, flow, extack);
4290
+ if (err)
4291
+ return err;
4292
+
4293
+ if (!flow_flag_test(flow, L3_TO_L2_DECAP)) {
4294
+ action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
4295
+ esw_attr->split_count = esw_attr->out_count;
4296
+ }
4297
+ break;
4298
+ case FLOW_ACTION_CSUM:
4299
+ if (csum_offload_supported(priv, action,
4300
+ act->csum_flags, extack))
4301
+ break;
4302
+
4303
+ return -EOPNOTSUPP;
4304
+ case FLOW_ACTION_REDIRECT:
4305
+ case FLOW_ACTION_MIRRED: {
4306
+ struct mlx5e_priv *out_priv;
4307
+ struct net_device *out_dev;
4308
+
4309
+ out_dev = act->dev;
4310
+ if (!out_dev) {
4311
+ /* out_dev is NULL when filters with
4312
+ * non-existing mirred device are replayed to
4313
+ * the driver.
4314
+ */
27434315 return -EINVAL;
27444316 }
2745
- continue;
2746
- }
27474317
2748
- if (is_tcf_tunnel_set(a)) {
2749
- info = tcf_tunnel_info(a);
4318
+ if (mpls_push && !netif_is_bareudp(out_dev)) {
4319
+ NL_SET_ERR_MSG_MOD(extack,
4320
+ "mpls is supported only through a bareudp device");
4321
+ return -EOPNOTSUPP;
4322
+ }
4323
+
4324
+ if (ft_flow && out_dev == priv->netdev) {
4325
+ /* Ignore forward to self rules generated
4326
+ * by adding both mlx5 devs to the flow table
4327
+ * block on a normal nft offload setup.
4328
+ */
4329
+ return -EOPNOTSUPP;
4330
+ }
4331
+
4332
+ if (esw_attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) {
4333
+ NL_SET_ERR_MSG_MOD(extack,
4334
+ "can't support more output ports, can't offload forwarding");
4335
+ netdev_warn(priv->netdev,
4336
+ "can't support more than %d output ports, can't offload forwarding\n",
4337
+ esw_attr->out_count);
4338
+ return -EOPNOTSUPP;
4339
+ }
4340
+
4341
+ action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
4342
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
4343
+ if (encap) {
4344
+ parse_attr->mirred_ifindex[esw_attr->out_count] =
4345
+ out_dev->ifindex;
4346
+ parse_attr->tun_info[esw_attr->out_count] = dup_tun_info(info);
4347
+ if (!parse_attr->tun_info[esw_attr->out_count])
4348
+ return -ENOMEM;
4349
+ encap = false;
4350
+ esw_attr->dests[esw_attr->out_count].flags |=
4351
+ MLX5_ESW_DEST_ENCAP;
4352
+ esw_attr->out_count++;
4353
+ /* attr->dests[].rep is resolved when we
4354
+ * handle encap
4355
+ */
4356
+ } else if (netdev_port_same_parent_id(priv->netdev, out_dev)) {
4357
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4358
+ struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
4359
+
4360
+ if (is_duplicated_output_device(priv->netdev,
4361
+ out_dev,
4362
+ ifindexes,
4363
+ if_count,
4364
+ extack))
4365
+ return -EOPNOTSUPP;
4366
+
4367
+ ifindexes[if_count] = out_dev->ifindex;
4368
+ if_count++;
4369
+
4370
+ out_dev = get_fdb_out_dev(uplink_dev, out_dev);
4371
+ if (!out_dev)
4372
+ return -ENODEV;
4373
+
4374
+ if (is_vlan_dev(out_dev)) {
4375
+ err = add_vlan_push_action(priv, attr,
4376
+ &out_dev,
4377
+ &action);
4378
+ if (err)
4379
+ return err;
4380
+ }
4381
+
4382
+ if (is_vlan_dev(parse_attr->filter_dev)) {
4383
+ err = add_vlan_pop_action(priv, attr,
4384
+ &action);
4385
+ if (err)
4386
+ return err;
4387
+ }
4388
+
4389
+ err = verify_uplink_forwarding(priv, flow, out_dev, extack);
4390
+ if (err)
4391
+ return err;
4392
+
4393
+ if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) {
4394
+ NL_SET_ERR_MSG_MOD(extack,
4395
+ "devices are not on same switch HW, can't offload forwarding");
4396
+ return -EOPNOTSUPP;
4397
+ }
4398
+
4399
+ out_priv = netdev_priv(out_dev);
4400
+ rpriv = out_priv->ppriv;
4401
+ esw_attr->dests[esw_attr->out_count].rep = rpriv->rep;
4402
+ esw_attr->dests[esw_attr->out_count].mdev = out_priv->mdev;
4403
+ esw_attr->out_count++;
4404
+ } else if (parse_attr->filter_dev != priv->netdev) {
4405
+ /* All mlx5 devices are called to configure
4406
+ * high level device filters. Therefore, the
4407
+ * *attempt* to install a filter on invalid
4408
+ * eswitch should not trigger an explicit error
4409
+ */
4410
+ return -EINVAL;
4411
+ } else {
4412
+ NL_SET_ERR_MSG_MOD(extack,
4413
+ "devices are not on same switch HW, can't offload forwarding");
4414
+ netdev_warn(priv->netdev,
4415
+ "devices %s %s not on same switch HW, can't offload forwarding\n",
4416
+ priv->netdev->name,
4417
+ out_dev->name);
4418
+ return -EINVAL;
4419
+ }
4420
+ }
4421
+ break;
4422
+ case FLOW_ACTION_TUNNEL_ENCAP:
4423
+ info = act->tunnel;
27504424 if (info)
27514425 encap = true;
27524426 else
27534427 return -EOPNOTSUPP;
2754
- attr->mirror_count = attr->out_count;
2755
- continue;
2756
- }
27574428
2758
- if (is_tcf_vlan(a)) {
2759
- err = parse_tc_vlan_action(priv, a, attr, &action);
2760
-
4429
+ break;
4430
+ case FLOW_ACTION_VLAN_PUSH:
4431
+ case FLOW_ACTION_VLAN_POP:
4432
+ if (act->id == FLOW_ACTION_VLAN_PUSH &&
4433
+ (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)) {
4434
+ /* Replace vlan pop+push with vlan modify */
4435
+ action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
4436
+ err = add_vlan_rewrite_action(priv,
4437
+ MLX5_FLOW_NAMESPACE_FDB,
4438
+ act, parse_attr, hdrs,
4439
+ &action, extack);
4440
+ } else {
4441
+ err = parse_tc_vlan_action(priv, act, esw_attr, &action);
4442
+ }
27614443 if (err)
27624444 return err;
27634445
2764
- attr->mirror_count = attr->out_count;
2765
- continue;
2766
- }
4446
+ esw_attr->split_count = esw_attr->out_count;
4447
+ break;
4448
+ case FLOW_ACTION_VLAN_MANGLE:
4449
+ err = add_vlan_rewrite_action(priv,
4450
+ MLX5_FLOW_NAMESPACE_FDB,
4451
+ act, parse_attr, hdrs,
4452
+ &action, extack);
4453
+ if (err)
4454
+ return err;
27674455
2768
- if (is_tcf_tunnel_release(a)) {
2769
- action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
2770
- continue;
2771
- }
4456
+ esw_attr->split_count = esw_attr->out_count;
4457
+ break;
4458
+ case FLOW_ACTION_TUNNEL_DECAP:
4459
+ decap = true;
4460
+ break;
4461
+ case FLOW_ACTION_GOTO:
4462
+ err = validate_goto_chain(priv, flow, act, action,
4463
+ extack);
4464
+ if (err)
4465
+ return err;
27724466
2773
- return -EINVAL;
4467
+ action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
4468
+ attr->dest_chain = act->chain_index;
4469
+ break;
4470
+ case FLOW_ACTION_CT:
4471
+ err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, act, extack);
4472
+ if (err)
4473
+ return err;
4474
+
4475
+ flow_flag_set(flow, CT);
4476
+ break;
4477
+ default:
4478
+ NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
4479
+ return -EOPNOTSUPP;
4480
+ }
4481
+ }
4482
+
4483
+ if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
4484
+ action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) {
4485
+ /* For prio tag mode, replace vlan pop with rewrite vlan prio
4486
+ * tag rewrite.
4487
+ */
4488
+ action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
4489
+ err = add_vlan_prio_tag_rewrite_action(priv, parse_attr, hdrs,
4490
+ &action, extack);
4491
+ if (err)
4492
+ return err;
4493
+ }
4494
+
4495
+ if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits ||
4496
+ hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) {
4497
+ err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_FDB,
4498
+ parse_attr, hdrs, &action, extack);
4499
+ if (err)
4500
+ return err;
4501
+ /* in case all pedit actions are skipped, remove the MOD_HDR
4502
+ * flag. we might have set split_count either by pedit or
4503
+ * pop/push. if there is no pop/push either, reset it too.
4504
+ */
4505
+ if (parse_attr->mod_hdr_acts.num_actions == 0) {
4506
+ action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
4507
+ dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
4508
+ if (!((action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
4509
+ (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)))
4510
+ esw_attr->split_count = 0;
4511
+ }
27744512 }
27754513
27764514 attr->action = action;
2777
- if (!actions_match_supported(priv, exts, parse_attr, flow))
4515
+ if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack))
27784516 return -EOPNOTSUPP;
27794517
2780
- if (attr->out_count > 1 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
4518
+ if (attr->dest_chain) {
4519
+ if (decap) {
4520
+ /* It can be supported if we'll create a mapping for
4521
+ * the tunnel device only (without tunnel), and set
4522
+ * this tunnel id with this decap flow.
4523
+ *
4524
+ * On restore (miss), we'll just set this saved tunnel
4525
+ * device.
4526
+ */
4527
+
4528
+ NL_SET_ERR_MSG(extack,
4529
+ "Decap with goto isn't supported");
4530
+ netdev_warn(priv->netdev,
4531
+ "Decap with goto isn't supported");
4532
+ return -EOPNOTSUPP;
4533
+ }
4534
+
4535
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
4536
+ NL_SET_ERR_MSG_MOD(extack,
4537
+ "Mirroring goto chain rules isn't supported");
4538
+ return -EOPNOTSUPP;
4539
+ }
4540
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
4541
+ }
4542
+
4543
+ if (!(attr->action &
4544
+ (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
4545
+ NL_SET_ERR_MSG_MOD(extack,
4546
+ "Rule must have at least one forward/drop action");
4547
+ return -EOPNOTSUPP;
4548
+ }
4549
+
4550
+ if (esw_attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
4551
+ NL_SET_ERR_MSG_MOD(extack,
4552
+ "current firmware doesn't support split rule for port mirroring");
27814553 netdev_warn_once(priv->netdev, "current firmware doesn't support split rule for port mirroring\n");
27824554 return -EOPNOTSUPP;
27834555 }
....@@ -2785,14 +4557,21 @@
27854557 return 0;
27864558 }
27874559
2788
-static void get_flags(int flags, u8 *flow_flags)
4560
+static void get_flags(int flags, unsigned long *flow_flags)
27894561 {
2790
- u8 __flow_flags = 0;
4562
+ unsigned long __flow_flags = 0;
27914563
2792
- if (flags & MLX5E_TC_INGRESS)
2793
- __flow_flags |= MLX5E_TC_FLOW_INGRESS;
2794
- if (flags & MLX5E_TC_EGRESS)
2795
- __flow_flags |= MLX5E_TC_FLOW_EGRESS;
4564
+ if (flags & MLX5_TC_FLAG(INGRESS))
4565
+ __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS);
4566
+ if (flags & MLX5_TC_FLAG(EGRESS))
4567
+ __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS);
4568
+
4569
+ if (flags & MLX5_TC_FLAG(ESW_OFFLOAD))
4570
+ __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
4571
+ if (flags & MLX5_TC_FLAG(NIC_OFFLOAD))
4572
+ __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
4573
+ if (flags & MLX5_TC_FLAG(FT_OFFLOAD))
4574
+ __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_FT);
27964575
27974576 *flow_flags = __flow_flags;
27984577 }
....@@ -2804,161 +4583,658 @@
28044583 .automatic_shrinking = true,
28054584 };
28064585
2807
-static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv)
4586
+static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv,
4587
+ unsigned long flags)
28084588 {
28094589 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
28104590 struct mlx5e_rep_priv *uplink_rpriv;
28114591
2812
- if (MLX5_VPORT_MANAGER(priv->mdev) && esw->mode == SRIOV_OFFLOADS) {
4592
+ if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) {
28134593 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
2814
- return &uplink_rpriv->tc_ht;
2815
- } else
4594
+ return &uplink_rpriv->uplink_priv.tc_ht;
4595
+ } else /* NIC offload */
28164596 return &priv->fs.tc.ht;
28174597 }
28184598
2819
-int mlx5e_configure_flower(struct mlx5e_priv *priv,
2820
- struct tc_cls_flower_offload *f, int flags)
4599
+static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
28214600 {
2822
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2823
- struct mlx5e_tc_flow_parse_attr *parse_attr;
2824
- struct rhashtable *tc_ht = get_tc_ht(priv);
2825
- struct mlx5e_tc_flow *flow;
2826
- int attr_size, err = 0;
2827
- u8 flow_flags = 0;
4601
+ struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
4602
+ struct mlx5_flow_attr *attr = flow->attr;
4603
+ bool is_rep_ingress = esw_attr->in_rep->vport != MLX5_VPORT_UPLINK &&
4604
+ flow_flag_test(flow, INGRESS);
4605
+ bool act_is_encap = !!(attr->action &
4606
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT);
4607
+ bool esw_paired = mlx5_devcom_is_paired(esw_attr->in_mdev->priv.devcom,
4608
+ MLX5_DEVCOM_ESW_OFFLOADS);
28284609
2829
- get_flags(flags, &flow_flags);
4610
+ if (!esw_paired)
4611
+ return false;
28304612
2831
- flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
2832
- if (flow) {
2833
- netdev_warn_once(priv->netdev, "flow cookie %lx already exists, ignoring\n", f->cookie);
2834
- return 0;
2835
- }
2836
-
2837
- if (esw && esw->mode == SRIOV_OFFLOADS) {
2838
- flow_flags |= MLX5E_TC_FLOW_ESWITCH;
2839
- attr_size = sizeof(struct mlx5_esw_flow_attr);
2840
- } else {
2841
- flow_flags |= MLX5E_TC_FLOW_NIC;
2842
- attr_size = sizeof(struct mlx5_nic_flow_attr);
2843
- }
2844
-
2845
- flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL);
2846
- parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
2847
- if (!parse_attr || !flow) {
2848
- err = -ENOMEM;
2849
- goto err_free;
2850
- }
2851
-
2852
- flow->cookie = f->cookie;
2853
- flow->flags = flow_flags;
2854
- flow->priv = priv;
2855
-
2856
- err = parse_cls_flower(priv, flow, &parse_attr->spec, f);
2857
- if (err < 0)
2858
- goto err_free;
2859
-
2860
- if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
2861
- err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow);
2862
- if (err < 0)
2863
- goto err_free;
2864
- flow->rule[0] = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow);
2865
- } else {
2866
- err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow);
2867
- if (err < 0)
2868
- goto err_free;
2869
- flow->rule[0] = mlx5e_tc_add_nic_flow(priv, parse_attr, flow);
2870
- }
2871
-
2872
- if (IS_ERR(flow->rule[0])) {
2873
- err = PTR_ERR(flow->rule[0]);
2874
- if (err != -EAGAIN)
2875
- goto err_free;
2876
- }
2877
-
2878
- if (err != -EAGAIN)
2879
- flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
2880
-
2881
- if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) ||
2882
- !(flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP))
2883
- kvfree(parse_attr);
2884
-
2885
- err = rhashtable_insert_fast(tc_ht, &flow->node, tc_ht_params);
2886
- if (err) {
2887
- mlx5e_tc_del_flow(priv, flow);
2888
- kfree(flow);
2889
- }
2890
-
2891
- return err;
2892
-
2893
-err_free:
2894
- kvfree(parse_attr);
2895
- kfree(flow);
2896
- return err;
2897
-}
2898
-
2899
-#define DIRECTION_MASK (MLX5E_TC_INGRESS | MLX5E_TC_EGRESS)
2900
-#define FLOW_DIRECTION_MASK (MLX5E_TC_FLOW_INGRESS | MLX5E_TC_FLOW_EGRESS)
2901
-
2902
-static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags)
2903
-{
2904
- if ((flow->flags & FLOW_DIRECTION_MASK) == (flags & DIRECTION_MASK))
4613
+ if ((mlx5_lag_is_sriov(esw_attr->in_mdev) ||
4614
+ mlx5_lag_is_multipath(esw_attr->in_mdev)) &&
4615
+ (is_rep_ingress || act_is_encap))
29054616 return true;
29064617
29074618 return false;
29084619 }
29094620
2910
-int mlx5e_delete_flower(struct mlx5e_priv *priv,
2911
- struct tc_cls_flower_offload *f, int flags)
4621
+struct mlx5_flow_attr *
4622
+mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type)
29124623 {
2913
- struct rhashtable *tc_ht = get_tc_ht(priv);
4624
+ u32 ex_attr_size = (type == MLX5_FLOW_NAMESPACE_FDB) ?
4625
+ sizeof(struct mlx5_esw_flow_attr) :
4626
+ sizeof(struct mlx5_nic_flow_attr);
4627
+ struct mlx5_flow_attr *attr;
4628
+
4629
+ return kzalloc(sizeof(*attr) + ex_attr_size, GFP_KERNEL);
4630
+}
4631
+
4632
+static int
4633
+mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
4634
+ struct flow_cls_offload *f, unsigned long flow_flags,
4635
+ struct mlx5e_tc_flow_parse_attr **__parse_attr,
4636
+ struct mlx5e_tc_flow **__flow)
4637
+{
4638
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
4639
+ struct mlx5_flow_attr *attr;
29144640 struct mlx5e_tc_flow *flow;
4641
+ int err = -ENOMEM;
4642
+ int out_index;
29154643
2916
- flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
2917
- if (!flow || !same_flow_direction(flow, flags))
2918
- return -EINVAL;
4644
+ flow = kzalloc(sizeof(*flow), GFP_KERNEL);
4645
+ parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
4646
+ if (!parse_attr || !flow)
4647
+ goto err_free;
29194648
2920
- rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params);
4649
+ flow->flags = flow_flags;
4650
+ flow->cookie = f->cookie;
4651
+ flow->priv = priv;
29214652
2922
- mlx5e_tc_del_flow(priv, flow);
4653
+ attr = mlx5_alloc_flow_attr(get_flow_name_space(flow));
4654
+ if (!attr)
4655
+ goto err_free;
29234656
4657
+ flow->attr = attr;
4658
+
4659
+ for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
4660
+ INIT_LIST_HEAD(&flow->encaps[out_index].list);
4661
+ INIT_LIST_HEAD(&flow->hairpin);
4662
+ INIT_LIST_HEAD(&flow->l3_to_l2_reformat);
4663
+ refcount_set(&flow->refcnt, 1);
4664
+ init_completion(&flow->init_done);
4665
+
4666
+ *__flow = flow;
4667
+ *__parse_attr = parse_attr;
4668
+
4669
+ return 0;
4670
+
4671
+err_free:
29244672 kfree(flow);
4673
+ kvfree(parse_attr);
4674
+ return err;
4675
+}
4676
+
4677
+static void
4678
+mlx5e_flow_attr_init(struct mlx5_flow_attr *attr,
4679
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
4680
+ struct flow_cls_offload *f)
4681
+{
4682
+ attr->parse_attr = parse_attr;
4683
+ attr->chain = f->common.chain_index;
4684
+ attr->prio = f->common.prio;
4685
+}
4686
+
4687
+static void
4688
+mlx5e_flow_esw_attr_init(struct mlx5_flow_attr *attr,
4689
+ struct mlx5e_priv *priv,
4690
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
4691
+ struct flow_cls_offload *f,
4692
+ struct mlx5_eswitch_rep *in_rep,
4693
+ struct mlx5_core_dev *in_mdev)
4694
+{
4695
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4696
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
4697
+
4698
+ mlx5e_flow_attr_init(attr, parse_attr, f);
4699
+
4700
+ esw_attr->in_rep = in_rep;
4701
+ esw_attr->in_mdev = in_mdev;
4702
+
4703
+ if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) ==
4704
+ MLX5_COUNTER_SOURCE_ESWITCH)
4705
+ esw_attr->counter_dev = in_mdev;
4706
+ else
4707
+ esw_attr->counter_dev = priv->mdev;
4708
+}
4709
+
4710
+static struct mlx5e_tc_flow *
4711
+__mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
4712
+ struct flow_cls_offload *f,
4713
+ unsigned long flow_flags,
4714
+ struct net_device *filter_dev,
4715
+ struct mlx5_eswitch_rep *in_rep,
4716
+ struct mlx5_core_dev *in_mdev)
4717
+{
4718
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
4719
+ struct netlink_ext_ack *extack = f->common.extack;
4720
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
4721
+ struct mlx5e_tc_flow *flow;
4722
+ int attr_size, err;
4723
+
4724
+ flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
4725
+ attr_size = sizeof(struct mlx5_esw_flow_attr);
4726
+ err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
4727
+ &parse_attr, &flow);
4728
+ if (err)
4729
+ goto out;
4730
+
4731
+ parse_attr->filter_dev = filter_dev;
4732
+ mlx5e_flow_esw_attr_init(flow->attr,
4733
+ priv, parse_attr,
4734
+ f, in_rep, in_mdev);
4735
+
4736
+ err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
4737
+ f, filter_dev);
4738
+ if (err)
4739
+ goto err_free;
4740
+
4741
+ /* actions validation depends on parsing the ct matches first */
4742
+ err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
4743
+ &flow->attr->ct_attr, extack);
4744
+ if (err)
4745
+ goto err_free;
4746
+
4747
+ err = parse_tc_fdb_actions(priv, &rule->action, flow, extack, filter_dev);
4748
+ if (err)
4749
+ goto err_free;
4750
+
4751
+ err = mlx5e_tc_add_fdb_flow(priv, flow, extack);
4752
+ complete_all(&flow->init_done);
4753
+ if (err) {
4754
+ if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev)))
4755
+ goto err_free;
4756
+
4757
+ add_unready_flow(flow);
4758
+ }
4759
+
4760
+ return flow;
4761
+
4762
+err_free:
4763
+ dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
4764
+ mlx5e_flow_put(priv, flow);
4765
+out:
4766
+ return ERR_PTR(err);
4767
+}
4768
+
4769
+static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
4770
+ struct mlx5e_tc_flow *flow,
4771
+ unsigned long flow_flags)
4772
+{
4773
+ struct mlx5e_priv *priv = flow->priv, *peer_priv;
4774
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
4775
+ struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
4776
+ struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
4777
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
4778
+ struct mlx5e_rep_priv *peer_urpriv;
4779
+ struct mlx5e_tc_flow *peer_flow;
4780
+ struct mlx5_core_dev *in_mdev;
4781
+ int err = 0;
4782
+
4783
+ peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4784
+ if (!peer_esw)
4785
+ return -ENODEV;
4786
+
4787
+ peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH);
4788
+ peer_priv = netdev_priv(peer_urpriv->netdev);
4789
+
4790
+ /* in_mdev is assigned of which the packet originated from.
4791
+ * So packets redirected to uplink use the same mdev of the
4792
+ * original flow and packets redirected from uplink use the
4793
+ * peer mdev.
4794
+ */
4795
+ if (attr->in_rep->vport == MLX5_VPORT_UPLINK)
4796
+ in_mdev = peer_priv->mdev;
4797
+ else
4798
+ in_mdev = priv->mdev;
4799
+
4800
+ parse_attr = flow->attr->parse_attr;
4801
+ peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags,
4802
+ parse_attr->filter_dev,
4803
+ attr->in_rep, in_mdev);
4804
+ if (IS_ERR(peer_flow)) {
4805
+ err = PTR_ERR(peer_flow);
4806
+ goto out;
4807
+ }
4808
+
4809
+ flow->peer_flow = peer_flow;
4810
+ flow_flag_set(flow, DUP);
4811
+ mutex_lock(&esw->offloads.peer_mutex);
4812
+ list_add_tail(&flow->peer, &esw->offloads.peer_flows);
4813
+ mutex_unlock(&esw->offloads.peer_mutex);
4814
+
4815
+out:
4816
+ mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4817
+ return err;
4818
+}
4819
+
4820
+static int
4821
+mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
4822
+ struct flow_cls_offload *f,
4823
+ unsigned long flow_flags,
4824
+ struct net_device *filter_dev,
4825
+ struct mlx5e_tc_flow **__flow)
4826
+{
4827
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
4828
+ struct mlx5_eswitch_rep *in_rep = rpriv->rep;
4829
+ struct mlx5_core_dev *in_mdev = priv->mdev;
4830
+ struct mlx5e_tc_flow *flow;
4831
+ int err;
4832
+
4833
+ flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep,
4834
+ in_mdev);
4835
+ if (IS_ERR(flow))
4836
+ return PTR_ERR(flow);
4837
+
4838
+ if (is_peer_flow_needed(flow)) {
4839
+ err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags);
4840
+ if (err) {
4841
+ mlx5e_tc_del_fdb_flow(priv, flow);
4842
+ goto out;
4843
+ }
4844
+ }
4845
+
4846
+ *__flow = flow;
4847
+
4848
+ return 0;
4849
+
4850
+out:
4851
+ return err;
4852
+}
4853
+
4854
+static int
4855
+mlx5e_add_nic_flow(struct mlx5e_priv *priv,
4856
+ struct flow_cls_offload *f,
4857
+ unsigned long flow_flags,
4858
+ struct net_device *filter_dev,
4859
+ struct mlx5e_tc_flow **__flow)
4860
+{
4861
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
4862
+ struct netlink_ext_ack *extack = f->common.extack;
4863
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
4864
+ struct mlx5e_tc_flow *flow;
4865
+ int attr_size, err;
4866
+
4867
+ if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
4868
+ if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common))
4869
+ return -EOPNOTSUPP;
4870
+ } else if (!tc_can_offload_extack(priv->netdev, f->common.extack)) {
4871
+ return -EOPNOTSUPP;
4872
+ }
4873
+
4874
+ flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
4875
+ attr_size = sizeof(struct mlx5_nic_flow_attr);
4876
+ err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
4877
+ &parse_attr, &flow);
4878
+ if (err)
4879
+ goto out;
4880
+
4881
+ parse_attr->filter_dev = filter_dev;
4882
+ mlx5e_flow_attr_init(flow->attr, parse_attr, f);
4883
+
4884
+ err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
4885
+ f, filter_dev);
4886
+ if (err)
4887
+ goto err_free;
4888
+
4889
+ err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
4890
+ &flow->attr->ct_attr, extack);
4891
+ if (err)
4892
+ goto err_free;
4893
+
4894
+ err = parse_tc_nic_actions(priv, &rule->action, parse_attr, flow, extack);
4895
+ if (err)
4896
+ goto err_free;
4897
+
4898
+ err = mlx5e_tc_add_nic_flow(priv, parse_attr, flow, extack);
4899
+ if (err)
4900
+ goto err_free;
4901
+
4902
+ flow_flag_set(flow, OFFLOADED);
4903
+ *__flow = flow;
4904
+
4905
+ return 0;
4906
+
4907
+err_free:
4908
+ dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
4909
+ mlx5e_flow_put(priv, flow);
4910
+out:
4911
+ return err;
4912
+}
4913
+
4914
+static int
4915
+mlx5e_tc_add_flow(struct mlx5e_priv *priv,
4916
+ struct flow_cls_offload *f,
4917
+ unsigned long flags,
4918
+ struct net_device *filter_dev,
4919
+ struct mlx5e_tc_flow **flow)
4920
+{
4921
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4922
+ unsigned long flow_flags;
4923
+ int err;
4924
+
4925
+ get_flags(flags, &flow_flags);
4926
+
4927
+ if (!tc_can_offload_extack(priv->netdev, f->common.extack))
4928
+ return -EOPNOTSUPP;
4929
+
4930
+ if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
4931
+ err = mlx5e_add_fdb_flow(priv, f, flow_flags,
4932
+ filter_dev, flow);
4933
+ else
4934
+ err = mlx5e_add_nic_flow(priv, f, flow_flags,
4935
+ filter_dev, flow);
4936
+
4937
+ return err;
4938
+}
4939
+
4940
+static bool is_flow_rule_duplicate_allowed(struct net_device *dev,
4941
+ struct mlx5e_rep_priv *rpriv)
4942
+{
4943
+ /* Offloaded flow rule is allowed to duplicate on non-uplink representor
4944
+ * sharing tc block with other slaves of a lag device. Rpriv can be NULL if this
4945
+ * function is called from NIC mode.
4946
+ */
4947
+ return netif_is_lag_port(dev) && rpriv && rpriv->rep->vport != MLX5_VPORT_UPLINK;
4948
+}
4949
+
4950
+int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
4951
+ struct flow_cls_offload *f, unsigned long flags)
4952
+{
4953
+ struct netlink_ext_ack *extack = f->common.extack;
4954
+ struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4955
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
4956
+ struct mlx5e_tc_flow *flow;
4957
+ int err = 0;
4958
+
4959
+ rcu_read_lock();
4960
+ flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
4961
+ if (flow) {
4962
+ /* Same flow rule offloaded to non-uplink representor sharing tc block,
4963
+ * just return 0.
4964
+ */
4965
+ if (is_flow_rule_duplicate_allowed(dev, rpriv) && flow->orig_dev != dev)
4966
+ goto rcu_unlock;
4967
+
4968
+ NL_SET_ERR_MSG_MOD(extack,
4969
+ "flow cookie already exists, ignoring");
4970
+ netdev_warn_once(priv->netdev,
4971
+ "flow cookie %lx already exists, ignoring\n",
4972
+ f->cookie);
4973
+ err = -EEXIST;
4974
+ goto rcu_unlock;
4975
+ }
4976
+rcu_unlock:
4977
+ rcu_read_unlock();
4978
+ if (flow)
4979
+ goto out;
4980
+
4981
+ trace_mlx5e_configure_flower(f);
4982
+ err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow);
4983
+ if (err)
4984
+ goto out;
4985
+
4986
+ /* Flow rule offloaded to non-uplink representor sharing tc block,
4987
+ * set the flow's owner dev.
4988
+ */
4989
+ if (is_flow_rule_duplicate_allowed(dev, rpriv))
4990
+ flow->orig_dev = dev;
4991
+
4992
+ err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params);
4993
+ if (err)
4994
+ goto err_free;
4995
+
4996
+ return 0;
4997
+
4998
+err_free:
4999
+ mlx5e_flow_put(priv, flow);
5000
+out:
5001
+ return err;
5002
+}
5003
+
5004
+static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags)
5005
+{
5006
+ bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS));
5007
+ bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS));
5008
+
5009
+ return flow_flag_test(flow, INGRESS) == dir_ingress &&
5010
+ flow_flag_test(flow, EGRESS) == dir_egress;
5011
+}
5012
+
5013
+int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
5014
+ struct flow_cls_offload *f, unsigned long flags)
5015
+{
5016
+ struct rhashtable *tc_ht = get_tc_ht(priv, flags);
5017
+ struct mlx5e_tc_flow *flow;
5018
+ int err;
5019
+
5020
+ rcu_read_lock();
5021
+ flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
5022
+ if (!flow || !same_flow_direction(flow, flags)) {
5023
+ err = -EINVAL;
5024
+ goto errout;
5025
+ }
5026
+
5027
+ /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag
5028
+ * set.
5029
+ */
5030
+ if (flow_flag_test_and_set(flow, DELETED)) {
5031
+ err = -EINVAL;
5032
+ goto errout;
5033
+ }
5034
+ rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params);
5035
+ rcu_read_unlock();
5036
+
5037
+ trace_mlx5e_delete_flower(f);
5038
+ mlx5e_flow_put(priv, flow);
5039
+
5040
+ return 0;
5041
+
5042
+errout:
5043
+ rcu_read_unlock();
5044
+ return err;
5045
+}
5046
+
5047
+int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
5048
+ struct flow_cls_offload *f, unsigned long flags)
5049
+{
5050
+ struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
5051
+ struct rhashtable *tc_ht = get_tc_ht(priv, flags);
5052
+ struct mlx5_eswitch *peer_esw;
5053
+ struct mlx5e_tc_flow *flow;
5054
+ struct mlx5_fc *counter;
5055
+ u64 lastuse = 0;
5056
+ u64 packets = 0;
5057
+ u64 bytes = 0;
5058
+ int err = 0;
5059
+
5060
+ rcu_read_lock();
5061
+ flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie,
5062
+ tc_ht_params));
5063
+ rcu_read_unlock();
5064
+ if (IS_ERR(flow))
5065
+ return PTR_ERR(flow);
5066
+
5067
+ if (!same_flow_direction(flow, flags)) {
5068
+ err = -EINVAL;
5069
+ goto errout;
5070
+ }
5071
+
5072
+ if (mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, CT)) {
5073
+ counter = mlx5e_tc_get_counter(flow);
5074
+ if (!counter)
5075
+ goto errout;
5076
+
5077
+ mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
5078
+ }
5079
+
5080
+ /* Under multipath it's possible for one rule to be currently
5081
+ * un-offloaded while the other rule is offloaded.
5082
+ */
5083
+ peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
5084
+ if (!peer_esw)
5085
+ goto out;
5086
+
5087
+ if (flow_flag_test(flow, DUP) &&
5088
+ flow_flag_test(flow->peer_flow, OFFLOADED)) {
5089
+ u64 bytes2;
5090
+ u64 packets2;
5091
+ u64 lastuse2;
5092
+
5093
+ counter = mlx5e_tc_get_counter(flow->peer_flow);
5094
+ if (!counter)
5095
+ goto no_peer_counter;
5096
+ mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2);
5097
+
5098
+ bytes += bytes2;
5099
+ packets += packets2;
5100
+ lastuse = max_t(u64, lastuse, lastuse2);
5101
+ }
5102
+
5103
+no_peer_counter:
5104
+ mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
5105
+out:
5106
+ flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
5107
+ FLOW_ACTION_HW_STATS_DELAYED);
5108
+ trace_mlx5e_stats_flower(f);
5109
+errout:
5110
+ mlx5e_flow_put(priv, flow);
5111
+ return err;
5112
+}
5113
+
5114
+static int apply_police_params(struct mlx5e_priv *priv, u64 rate,
5115
+ struct netlink_ext_ack *extack)
5116
+{
5117
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
5118
+ struct mlx5_eswitch *esw;
5119
+ u32 rate_mbps = 0;
5120
+ u16 vport_num;
5121
+ int err;
5122
+
5123
+ vport_num = rpriv->rep->vport;
5124
+ if (vport_num >= MLX5_VPORT_ECPF) {
5125
+ NL_SET_ERR_MSG_MOD(extack,
5126
+ "Ingress rate limit is supported only for Eswitch ports connected to VFs");
5127
+ return -EOPNOTSUPP;
5128
+ }
5129
+
5130
+ esw = priv->mdev->priv.eswitch;
5131
+ /* rate is given in bytes/sec.
5132
+ * First convert to bits/sec and then round to the nearest mbit/secs.
5133
+ * mbit means million bits.
5134
+ * Moreover, if rate is non zero we choose to configure to a minimum of
5135
+ * 1 mbit/sec.
5136
+ */
5137
+ if (rate) {
5138
+ rate = (rate * BITS_PER_BYTE) + 500000;
5139
+ rate_mbps = max_t(u32, do_div(rate, 1000000), 1);
5140
+ }
5141
+
5142
+ err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps);
5143
+ if (err)
5144
+ NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware");
5145
+
5146
+ return err;
5147
+}
5148
+
5149
+static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
5150
+ struct flow_action *flow_action,
5151
+ struct netlink_ext_ack *extack)
5152
+{
5153
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
5154
+ const struct flow_action_entry *act;
5155
+ int err;
5156
+ int i;
5157
+
5158
+ if (!flow_action_has_entries(flow_action)) {
5159
+ NL_SET_ERR_MSG_MOD(extack, "matchall called with no action");
5160
+ return -EINVAL;
5161
+ }
5162
+
5163
+ if (!flow_offload_has_one_action(flow_action)) {
5164
+ NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action");
5165
+ return -EOPNOTSUPP;
5166
+ }
5167
+
5168
+ if (!flow_action_basic_hw_stats_check(flow_action, extack))
5169
+ return -EOPNOTSUPP;
5170
+
5171
+ flow_action_for_each(i, act, flow_action) {
5172
+ switch (act->id) {
5173
+ case FLOW_ACTION_POLICE:
5174
+ err = apply_police_params(priv, act->police.rate_bytes_ps, extack);
5175
+ if (err)
5176
+ return err;
5177
+
5178
+ rpriv->prev_vf_vport_stats = priv->stats.vf_vport;
5179
+ break;
5180
+ default:
5181
+ NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall");
5182
+ return -EOPNOTSUPP;
5183
+ }
5184
+ }
29255185
29265186 return 0;
29275187 }
29285188
2929
-int mlx5e_stats_flower(struct mlx5e_priv *priv,
2930
- struct tc_cls_flower_offload *f, int flags)
5189
+int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv,
5190
+ struct tc_cls_matchall_offload *ma)
29315191 {
2932
- struct rhashtable *tc_ht = get_tc_ht(priv);
2933
- struct mlx5e_tc_flow *flow;
2934
- struct mlx5_fc *counter;
2935
- u64 bytes;
2936
- u64 packets;
2937
- u64 lastuse;
5192
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
5193
+ struct netlink_ext_ack *extack = ma->common.extack;
29385194
2939
- flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
2940
- if (!flow || !same_flow_direction(flow, flags))
5195
+ if (!mlx5_esw_qos_enabled(esw)) {
5196
+ NL_SET_ERR_MSG_MOD(extack, "QoS is not supported on this device");
5197
+ return -EOPNOTSUPP;
5198
+ }
5199
+
5200
+ if (ma->common.prio != 1) {
5201
+ NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported");
29415202 return -EINVAL;
5203
+ }
29425204
2943
- if (!(flow->flags & MLX5E_TC_FLOW_OFFLOADED))
2944
- return 0;
5205
+ return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack);
5206
+}
29455207
2946
- counter = mlx5_flow_rule_counter(flow->rule[0]);
2947
- if (!counter)
2948
- return 0;
5208
+int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv,
5209
+ struct tc_cls_matchall_offload *ma)
5210
+{
5211
+ struct netlink_ext_ack *extack = ma->common.extack;
29495212
2950
- mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
5213
+ return apply_police_params(priv, 0, extack);
5214
+}
29515215
2952
- tcf_exts_stats_update(f->exts, bytes, packets, lastuse);
5216
+void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
5217
+ struct tc_cls_matchall_offload *ma)
5218
+{
5219
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
5220
+ struct rtnl_link_stats64 cur_stats;
5221
+ u64 dbytes;
5222
+ u64 dpkts;
29535223
2954
- return 0;
5224
+ cur_stats = priv->stats.vf_vport;
5225
+ dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets;
5226
+ dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes;
5227
+ rpriv->prev_vf_vport_stats = cur_stats;
5228
+ flow_stats_update(&ma->stats, dbytes, dpkts, 0, jiffies,
5229
+ FLOW_ACTION_HW_STATS_DELAYED);
29555230 }
29565231
29575232 static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
29585233 struct mlx5e_priv *peer_priv)
29595234 {
29605235 struct mlx5_core_dev *peer_mdev = peer_priv->mdev;
2961
- struct mlx5e_hairpin_entry *hpe;
5236
+ struct mlx5e_hairpin_entry *hpe, *tmp;
5237
+ LIST_HEAD(init_wait_list);
29625238 u16 peer_vhca_id;
29635239 int bkt;
29645240
....@@ -2967,9 +5243,18 @@
29675243
29685244 peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
29695245
2970
- hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist) {
2971
- if (hpe->peer_vhca_id == peer_vhca_id)
2972
- hpe->hp->pair->peer_gone = true;
5246
+ mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
5247
+ hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist)
5248
+ if (refcount_inc_not_zero(&hpe->refcnt))
5249
+ list_add(&hpe->dead_peer_wait_list, &init_wait_list);
5250
+ mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
5251
+
5252
+ list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) {
5253
+ wait_for_completion(&hpe->res_ready);
5254
+ if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id)
5255
+ mlx5_core_hairpin_clear_dead_peer(hpe->hp->pair);
5256
+
5257
+ mlx5e_hairpin_put(priv, hpe);
29735258 }
29745259 }
29755260
....@@ -3000,24 +5285,79 @@
30005285 return NOTIFY_DONE;
30015286 }
30025287
5288
+static int mlx5e_tc_nic_get_ft_size(struct mlx5_core_dev *dev)
5289
+{
5290
+ int tc_grp_size, tc_tbl_size;
5291
+ u32 max_flow_counter;
5292
+
5293
+ max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
5294
+ MLX5_CAP_GEN(dev, max_flow_counter_15_0);
5295
+
5296
+ tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE);
5297
+
5298
+ tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS,
5299
+ BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size)));
5300
+
5301
+ return tc_tbl_size;
5302
+}
5303
+
30035304 int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
30045305 {
30055306 struct mlx5e_tc_table *tc = &priv->fs.tc;
5307
+ struct mlx5_core_dev *dev = priv->mdev;
5308
+ struct mlx5_chains_attr attr = {};
30065309 int err;
30075310
3008
- hash_init(tc->mod_hdr_tbl);
5311
+ mlx5e_mod_hdr_tbl_init(&tc->mod_hdr);
5312
+ mutex_init(&tc->t_lock);
5313
+ mutex_init(&tc->hairpin_tbl_lock);
30095314 hash_init(tc->hairpin_tbl);
30105315
30115316 err = rhashtable_init(&tc->ht, &tc_ht_params);
30125317 if (err)
30135318 return err;
30145319
3015
- tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
3016
- if (register_netdevice_notifier(&tc->netdevice_nb)) {
3017
- tc->netdevice_nb.notifier_call = NULL;
3018
- mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n");
5320
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
5321
+ attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED |
5322
+ MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
5323
+ attr.max_restore_tag = MLX5E_TC_TABLE_CHAIN_TAG_MASK;
5324
+ }
5325
+ attr.ns = MLX5_FLOW_NAMESPACE_KERNEL;
5326
+ attr.max_ft_sz = mlx5e_tc_nic_get_ft_size(dev);
5327
+ attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS;
5328
+ attr.default_ft = priv->fs.vlan.ft.t;
5329
+
5330
+ tc->chains = mlx5_chains_create(dev, &attr);
5331
+ if (IS_ERR(tc->chains)) {
5332
+ err = PTR_ERR(tc->chains);
5333
+ goto err_chains;
30195334 }
30205335
5336
+ tc->ct = mlx5_tc_ct_init(priv, tc->chains, &priv->fs.tc.mod_hdr,
5337
+ MLX5_FLOW_NAMESPACE_KERNEL);
5338
+ if (IS_ERR(tc->ct)) {
5339
+ err = PTR_ERR(tc->ct);
5340
+ goto err_ct;
5341
+ }
5342
+
5343
+ tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
5344
+ err = register_netdevice_notifier_dev_net(priv->netdev,
5345
+ &tc->netdevice_nb,
5346
+ &tc->netdevice_nn);
5347
+ if (err) {
5348
+ tc->netdevice_nb.notifier_call = NULL;
5349
+ mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n");
5350
+ goto err_reg;
5351
+ }
5352
+
5353
+ return 0;
5354
+
5355
+err_reg:
5356
+ mlx5_tc_ct_clean(tc->ct);
5357
+err_ct:
5358
+ mlx5_chains_destroy(tc->chains);
5359
+err_chains:
5360
+ rhashtable_destroy(&tc->ht);
30215361 return err;
30225362 }
30235363
....@@ -3035,29 +5375,194 @@
30355375 struct mlx5e_tc_table *tc = &priv->fs.tc;
30365376
30375377 if (tc->netdevice_nb.notifier_call)
3038
- unregister_netdevice_notifier(&tc->netdevice_nb);
5378
+ unregister_netdevice_notifier_dev_net(priv->netdev,
5379
+ &tc->netdevice_nb,
5380
+ &tc->netdevice_nn);
5381
+
5382
+ mlx5e_mod_hdr_tbl_destroy(&tc->mod_hdr);
5383
+ mutex_destroy(&tc->hairpin_tbl_lock);
30395384
30405385 rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL);
30415386
30425387 if (!IS_ERR_OR_NULL(tc->t)) {
3043
- mlx5_destroy_flow_table(tc->t);
5388
+ mlx5_chains_put_table(tc->chains, 0, 1, MLX5E_TC_FT_LEVEL);
30445389 tc->t = NULL;
30455390 }
5391
+ mutex_destroy(&tc->t_lock);
5392
+
5393
+ mlx5_tc_ct_clean(tc->ct);
5394
+ mlx5_chains_destroy(tc->chains);
30465395 }
30475396
30485397 int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
30495398 {
3050
- return rhashtable_init(tc_ht, &tc_ht_params);
5399
+ const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts);
5400
+ struct mlx5_rep_uplink_priv *uplink_priv;
5401
+ struct mlx5e_rep_priv *rpriv;
5402
+ struct mapping_ctx *mapping;
5403
+ struct mlx5_eswitch *esw;
5404
+ struct mlx5e_priv *priv;
5405
+ int err = 0;
5406
+
5407
+ uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
5408
+ rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
5409
+ priv = netdev_priv(rpriv->netdev);
5410
+ esw = priv->mdev->priv.eswitch;
5411
+
5412
+ uplink_priv->ct_priv = mlx5_tc_ct_init(netdev_priv(priv->netdev),
5413
+ esw_chains(esw),
5414
+ &esw->offloads.mod_hdr,
5415
+ MLX5_FLOW_NAMESPACE_FDB);
5416
+ if (IS_ERR(uplink_priv->ct_priv))
5417
+ goto err_ct;
5418
+
5419
+ mapping = mapping_create(sizeof(struct tunnel_match_key),
5420
+ TUNNEL_INFO_BITS_MASK, true);
5421
+ if (IS_ERR(mapping)) {
5422
+ err = PTR_ERR(mapping);
5423
+ goto err_tun_mapping;
5424
+ }
5425
+ uplink_priv->tunnel_mapping = mapping;
5426
+
5427
+ mapping = mapping_create(sz_enc_opts, ENC_OPTS_BITS_MASK, true);
5428
+ if (IS_ERR(mapping)) {
5429
+ err = PTR_ERR(mapping);
5430
+ goto err_enc_opts_mapping;
5431
+ }
5432
+ uplink_priv->tunnel_enc_opts_mapping = mapping;
5433
+
5434
+ err = rhashtable_init(tc_ht, &tc_ht_params);
5435
+ if (err)
5436
+ goto err_ht_init;
5437
+
5438
+ return err;
5439
+
5440
+err_ht_init:
5441
+ mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
5442
+err_enc_opts_mapping:
5443
+ mapping_destroy(uplink_priv->tunnel_mapping);
5444
+err_tun_mapping:
5445
+ mlx5_tc_ct_clean(uplink_priv->ct_priv);
5446
+err_ct:
5447
+ netdev_warn(priv->netdev,
5448
+ "Failed to initialize tc (eswitch), err: %d", err);
5449
+ return err;
30515450 }
30525451
30535452 void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
30545453 {
5454
+ struct mlx5_rep_uplink_priv *uplink_priv;
5455
+
30555456 rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
5457
+
5458
+ uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
5459
+
5460
+ mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
5461
+ mapping_destroy(uplink_priv->tunnel_mapping);
5462
+
5463
+ mlx5_tc_ct_clean(uplink_priv->ct_priv);
30565464 }
30575465
3058
-int mlx5e_tc_num_filters(struct mlx5e_priv *priv)
5466
+int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags)
30595467 {
3060
- struct rhashtable *tc_ht = get_tc_ht(priv);
5468
+ struct rhashtable *tc_ht = get_tc_ht(priv, flags);
30615469
30625470 return atomic_read(&tc_ht->nelems);
30635471 }
5472
+
5473
+void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
5474
+{
5475
+ struct mlx5e_tc_flow *flow, *tmp;
5476
+
5477
+ list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer)
5478
+ __mlx5e_tc_del_fdb_peer_flow(flow);
5479
+}
5480
+
5481
+void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
5482
+{
5483
+ struct mlx5_rep_uplink_priv *rpriv =
5484
+ container_of(work, struct mlx5_rep_uplink_priv,
5485
+ reoffload_flows_work);
5486
+ struct mlx5e_tc_flow *flow, *tmp;
5487
+
5488
+ mutex_lock(&rpriv->unready_flows_lock);
5489
+ list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) {
5490
+ if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL))
5491
+ unready_flow_del(flow);
5492
+ }
5493
+ mutex_unlock(&rpriv->unready_flows_lock);
5494
+}
5495
+
5496
+static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
5497
+ struct flow_cls_offload *cls_flower,
5498
+ unsigned long flags)
5499
+{
5500
+ switch (cls_flower->command) {
5501
+ case FLOW_CLS_REPLACE:
5502
+ return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
5503
+ flags);
5504
+ case FLOW_CLS_DESTROY:
5505
+ return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
5506
+ flags);
5507
+ case FLOW_CLS_STATS:
5508
+ return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
5509
+ flags);
5510
+ default:
5511
+ return -EOPNOTSUPP;
5512
+ }
5513
+}
5514
+
5515
+int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
5516
+ void *cb_priv)
5517
+{
5518
+ unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(NIC_OFFLOAD);
5519
+ struct mlx5e_priv *priv = cb_priv;
5520
+
5521
+ switch (type) {
5522
+ case TC_SETUP_CLSFLOWER:
5523
+ return mlx5e_setup_tc_cls_flower(priv, type_data, flags);
5524
+ default:
5525
+ return -EOPNOTSUPP;
5526
+ }
5527
+}
5528
+
5529
+bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe,
5530
+ struct sk_buff *skb)
5531
+{
5532
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
5533
+ u32 chain = 0, chain_tag, reg_b, zone_restore_id;
5534
+ struct mlx5e_priv *priv = netdev_priv(skb->dev);
5535
+ struct mlx5e_tc_table *tc = &priv->fs.tc;
5536
+ struct tc_skb_ext *tc_skb_ext;
5537
+ int err;
5538
+
5539
+ reg_b = be32_to_cpu(cqe->ft_metadata);
5540
+
5541
+ chain_tag = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK;
5542
+
5543
+ err = mlx5_get_chain_for_tag(nic_chains(priv), chain_tag, &chain);
5544
+ if (err) {
5545
+ netdev_dbg(priv->netdev,
5546
+ "Couldn't find chain for chain tag: %d, err: %d\n",
5547
+ chain_tag, err);
5548
+ return false;
5549
+ }
5550
+
5551
+ if (chain) {
5552
+ tc_skb_ext = tc_skb_ext_alloc(skb);
5553
+ if (WARN_ON(!tc_skb_ext))
5554
+ return false;
5555
+
5556
+ tc_skb_ext->chain = chain;
5557
+
5558
+ zone_restore_id = (reg_b >> REG_MAPPING_SHIFT(NIC_ZONE_RESTORE_TO_REG)) &
5559
+ ZONE_RESTORE_MAX;
5560
+
5561
+ if (!mlx5e_tc_ct_restore_flow(tc->ct, skb,
5562
+ zone_restore_id))
5563
+ return false;
5564
+ }
5565
+#endif /* CONFIG_NET_TC_SKB_EXT */
5566
+
5567
+ return true;
5568
+}