// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
|
/* Copyright (c) 2019 Mellanox Technologies. */
|
|
#include <net/netfilter/nf_conntrack.h>
|
#include <net/netfilter/nf_conntrack_core.h>
|
#include <net/netfilter/nf_conntrack_zones.h>
|
#include <net/netfilter/nf_conntrack_labels.h>
|
#include <net/netfilter/nf_conntrack_helper.h>
|
#include <net/netfilter/nf_conntrack_acct.h>
|
#include <uapi/linux/tc_act/tc_pedit.h>
|
#include <net/tc_act/tc_ct.h>
|
#include <net/flow_offload.h>
|
#include <net/netfilter/nf_flow_table.h>
|
#include <linux/workqueue.h>
|
#include <linux/refcount.h>
|
#include <linux/xarray.h>
|
|
#include "lib/fs_chains.h"
|
#include "en/tc_ct.h"
|
#include "en/mod_hdr.h"
|
#include "en/mapping.h"
|
#include "en.h"
|
#include "en_tc.h"
|
#include "en_rep.h"
|
|
#define MLX5_CT_ZONE_BITS (mlx5e_tc_attr_to_reg_mappings[ZONE_TO_REG].mlen * 8)
|
#define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0)
|
#define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
|
#define MLX5_CT_STATE_TRK_BIT BIT(2)
|
#define MLX5_CT_STATE_NAT_BIT BIT(3)
|
|
#define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8)
|
#define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0)
|
#define MLX5_FTE_ID_MASK MLX5_FTE_ID_MAX
|
|
#define MLX5_CT_LABELS_BITS (mlx5e_tc_attr_to_reg_mappings[LABELS_TO_REG].mlen * 8)
|
#define MLX5_CT_LABELS_MASK GENMASK(MLX5_CT_LABELS_BITS - 1, 0)
|
|
#define ct_dbg(fmt, args...)\
|
netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args)
|
|
struct mlx5_tc_ct_priv {
|
struct mlx5_core_dev *dev;
|
const struct net_device *netdev;
|
struct mod_hdr_tbl *mod_hdr_tbl;
|
struct idr fte_ids;
|
struct xarray tuple_ids;
|
struct rhashtable zone_ht;
|
struct rhashtable ct_tuples_ht;
|
struct rhashtable ct_tuples_nat_ht;
|
struct mlx5_flow_table *ct;
|
struct mlx5_flow_table *ct_nat;
|
struct mlx5_flow_table *post_ct;
|
struct mutex control_lock; /* guards parallel adds/dels */
|
struct mapping_ctx *zone_mapping;
|
struct mapping_ctx *labels_mapping;
|
enum mlx5_flow_namespace_type ns_type;
|
struct mlx5_fs_chains *chains;
|
spinlock_t ht_lock; /* protects ft entries */
|
};
|
|
struct mlx5_ct_flow {
|
struct mlx5_flow_attr *pre_ct_attr;
|
struct mlx5_flow_attr *post_ct_attr;
|
struct mlx5_flow_handle *pre_ct_rule;
|
struct mlx5_flow_handle *post_ct_rule;
|
struct mlx5_ct_ft *ft;
|
u32 fte_id;
|
u32 chain_mapping;
|
};
|
|
struct mlx5_ct_zone_rule {
|
struct mlx5_flow_handle *rule;
|
struct mlx5e_mod_hdr_handle *mh;
|
struct mlx5_flow_attr *attr;
|
bool nat;
|
};
|
|
struct mlx5_tc_ct_pre {
|
struct mlx5_flow_table *ft;
|
struct mlx5_flow_group *flow_grp;
|
struct mlx5_flow_group *miss_grp;
|
struct mlx5_flow_handle *flow_rule;
|
struct mlx5_flow_handle *miss_rule;
|
struct mlx5_modify_hdr *modify_hdr;
|
};
|
|
struct mlx5_ct_ft {
|
struct rhash_head node;
|
u16 zone;
|
u32 zone_restore_id;
|
refcount_t refcount;
|
struct nf_flowtable *nf_ft;
|
struct mlx5_tc_ct_priv *ct_priv;
|
struct rhashtable ct_entries_ht;
|
struct mlx5_tc_ct_pre pre_ct;
|
struct mlx5_tc_ct_pre pre_ct_nat;
|
};
|
|
struct mlx5_ct_tuple {
|
u16 addr_type;
|
__be16 n_proto;
|
u8 ip_proto;
|
struct {
|
union {
|
__be32 src_v4;
|
struct in6_addr src_v6;
|
};
|
union {
|
__be32 dst_v4;
|
struct in6_addr dst_v6;
|
};
|
} ip;
|
struct {
|
__be16 src;
|
__be16 dst;
|
} port;
|
|
u16 zone;
|
};
|
|
struct mlx5_ct_counter {
|
struct mlx5_fc *counter;
|
refcount_t refcount;
|
bool is_shared;
|
};
|
|
enum {
|
MLX5_CT_ENTRY_FLAG_VALID,
|
};
|
|
struct mlx5_ct_entry {
|
struct rhash_head node;
|
struct rhash_head tuple_node;
|
struct rhash_head tuple_nat_node;
|
struct mlx5_ct_counter *counter;
|
unsigned long cookie;
|
unsigned long restore_cookie;
|
struct mlx5_ct_tuple tuple;
|
struct mlx5_ct_tuple tuple_nat;
|
struct mlx5_ct_zone_rule zone_rules[2];
|
|
struct mlx5_tc_ct_priv *ct_priv;
|
struct work_struct work;
|
|
refcount_t refcnt;
|
unsigned long flags;
|
};
|
|
static const struct rhashtable_params cts_ht_params = {
|
.head_offset = offsetof(struct mlx5_ct_entry, node),
|
.key_offset = offsetof(struct mlx5_ct_entry, cookie),
|
.key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie),
|
.automatic_shrinking = true,
|
.min_size = 16 * 1024,
|
};
|
|
static const struct rhashtable_params zone_params = {
|
.head_offset = offsetof(struct mlx5_ct_ft, node),
|
.key_offset = offsetof(struct mlx5_ct_ft, zone),
|
.key_len = sizeof(((struct mlx5_ct_ft *)0)->zone),
|
.automatic_shrinking = true,
|
};
|
|
static const struct rhashtable_params tuples_ht_params = {
|
.head_offset = offsetof(struct mlx5_ct_entry, tuple_node),
|
.key_offset = offsetof(struct mlx5_ct_entry, tuple),
|
.key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple),
|
.automatic_shrinking = true,
|
.min_size = 16 * 1024,
|
};
|
|
static const struct rhashtable_params tuples_nat_ht_params = {
|
.head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node),
|
.key_offset = offsetof(struct mlx5_ct_entry, tuple_nat),
|
.key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat),
|
.automatic_shrinking = true,
|
.min_size = 16 * 1024,
|
};
|
|
static bool
|
mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry *entry)
|
{
|
return !!(entry->tuple_nat_node.next);
|
}
|
|
static int
|
mlx5_get_label_mapping(struct mlx5_tc_ct_priv *ct_priv,
|
u32 *labels, u32 *id)
|
{
|
if (!memchr_inv(labels, 0, sizeof(u32) * 4)) {
|
*id = 0;
|
return 0;
|
}
|
|
if (mapping_add(ct_priv->labels_mapping, labels, id))
|
return -EOPNOTSUPP;
|
|
return 0;
|
}
|
|
static void
|
mlx5_put_label_mapping(struct mlx5_tc_ct_priv *ct_priv, u32 id)
|
{
|
if (id)
|
mapping_remove(ct_priv->labels_mapping, id);
|
}
|
|
static int
|
mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule)
|
{
|
struct flow_match_control control;
|
struct flow_match_basic basic;
|
|
flow_rule_match_basic(rule, &basic);
|
flow_rule_match_control(rule, &control);
|
|
tuple->n_proto = basic.key->n_proto;
|
tuple->ip_proto = basic.key->ip_proto;
|
tuple->addr_type = control.key->addr_type;
|
|
if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
|
struct flow_match_ipv4_addrs match;
|
|
flow_rule_match_ipv4_addrs(rule, &match);
|
tuple->ip.src_v4 = match.key->src;
|
tuple->ip.dst_v4 = match.key->dst;
|
} else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
|
struct flow_match_ipv6_addrs match;
|
|
flow_rule_match_ipv6_addrs(rule, &match);
|
tuple->ip.src_v6 = match.key->src;
|
tuple->ip.dst_v6 = match.key->dst;
|
} else {
|
return -EOPNOTSUPP;
|
}
|
|
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
|
struct flow_match_ports match;
|
|
flow_rule_match_ports(rule, &match);
|
switch (tuple->ip_proto) {
|
case IPPROTO_TCP:
|
case IPPROTO_UDP:
|
tuple->port.src = match.key->src;
|
tuple->port.dst = match.key->dst;
|
break;
|
default:
|
return -EOPNOTSUPP;
|
}
|
} else {
|
return -EOPNOTSUPP;
|
}
|
|
return 0;
|
}
|
|
static int
|
mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple,
|
struct flow_rule *rule)
|
{
|
struct flow_action *flow_action = &rule->action;
|
struct flow_action_entry *act;
|
u32 offset, val, ip6_offset;
|
int i;
|
|
flow_action_for_each(i, act, flow_action) {
|
if (act->id != FLOW_ACTION_MANGLE)
|
continue;
|
|
offset = act->mangle.offset;
|
val = act->mangle.val;
|
switch (act->mangle.htype) {
|
case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
|
if (offset == offsetof(struct iphdr, saddr))
|
tuple->ip.src_v4 = cpu_to_be32(val);
|
else if (offset == offsetof(struct iphdr, daddr))
|
tuple->ip.dst_v4 = cpu_to_be32(val);
|
else
|
return -EOPNOTSUPP;
|
break;
|
|
case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
|
ip6_offset = (offset - offsetof(struct ipv6hdr, saddr));
|
ip6_offset /= 4;
|
if (ip6_offset < 4)
|
tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val);
|
else if (ip6_offset < 8)
|
tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val);
|
else
|
return -EOPNOTSUPP;
|
break;
|
|
case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
|
if (offset == offsetof(struct tcphdr, source))
|
tuple->port.src = cpu_to_be16(val);
|
else if (offset == offsetof(struct tcphdr, dest))
|
tuple->port.dst = cpu_to_be16(val);
|
else
|
return -EOPNOTSUPP;
|
break;
|
|
case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
|
if (offset == offsetof(struct udphdr, source))
|
tuple->port.src = cpu_to_be16(val);
|
else if (offset == offsetof(struct udphdr, dest))
|
tuple->port.dst = cpu_to_be16(val);
|
else
|
return -EOPNOTSUPP;
|
break;
|
|
default:
|
return -EOPNOTSUPP;
|
}
|
}
|
|
return 0;
|
}
|
|
static int
|
mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
|
struct flow_rule *rule)
|
{
|
void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
|
outer_headers);
|
void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
|
outer_headers);
|
u16 addr_type = 0;
|
u8 ip_proto = 0;
|
|
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
|
struct flow_match_basic match;
|
|
flow_rule_match_basic(rule, &match);
|
|
mlx5e_tc_set_ethertype(priv->mdev, &match, true, headers_c,
|
headers_v);
|
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
|
match.mask->ip_proto);
|
MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
|
match.key->ip_proto);
|
|
ip_proto = match.key->ip_proto;
|
}
|
|
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
|
struct flow_match_control match;
|
|
flow_rule_match_control(rule, &match);
|
addr_type = match.key->addr_type;
|
}
|
|
if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
|
struct flow_match_ipv4_addrs match;
|
|
flow_rule_match_ipv4_addrs(rule, &match);
|
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
|
src_ipv4_src_ipv6.ipv4_layout.ipv4),
|
&match.mask->src, sizeof(match.mask->src));
|
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
|
src_ipv4_src_ipv6.ipv4_layout.ipv4),
|
&match.key->src, sizeof(match.key->src));
|
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
|
dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
|
&match.mask->dst, sizeof(match.mask->dst));
|
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
|
dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
|
&match.key->dst, sizeof(match.key->dst));
|
}
|
|
if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
|
struct flow_match_ipv6_addrs match;
|
|
flow_rule_match_ipv6_addrs(rule, &match);
|
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
|
src_ipv4_src_ipv6.ipv6_layout.ipv6),
|
&match.mask->src, sizeof(match.mask->src));
|
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
|
src_ipv4_src_ipv6.ipv6_layout.ipv6),
|
&match.key->src, sizeof(match.key->src));
|
|
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
|
dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
|
&match.mask->dst, sizeof(match.mask->dst));
|
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
|
dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
|
&match.key->dst, sizeof(match.key->dst));
|
}
|
|
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
|
struct flow_match_ports match;
|
|
flow_rule_match_ports(rule, &match);
|
switch (ip_proto) {
|
case IPPROTO_TCP:
|
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
|
tcp_sport, ntohs(match.mask->src));
|
MLX5_SET(fte_match_set_lyr_2_4, headers_v,
|
tcp_sport, ntohs(match.key->src));
|
|
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
|
tcp_dport, ntohs(match.mask->dst));
|
MLX5_SET(fte_match_set_lyr_2_4, headers_v,
|
tcp_dport, ntohs(match.key->dst));
|
break;
|
|
case IPPROTO_UDP:
|
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
|
udp_sport, ntohs(match.mask->src));
|
MLX5_SET(fte_match_set_lyr_2_4, headers_v,
|
udp_sport, ntohs(match.key->src));
|
|
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
|
udp_dport, ntohs(match.mask->dst));
|
MLX5_SET(fte_match_set_lyr_2_4, headers_v,
|
udp_dport, ntohs(match.key->dst));
|
break;
|
default:
|
break;
|
}
|
}
|
|
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
|
struct flow_match_tcp match;
|
|
flow_rule_match_tcp(rule, &match);
|
MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
|
ntohs(match.mask->flags));
|
MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
|
ntohs(match.key->flags));
|
}
|
|
return 0;
|
}
|
|
static void
|
mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry)
|
{
|
if (entry->counter->is_shared &&
|
!refcount_dec_and_test(&entry->counter->refcount))
|
return;
|
|
mlx5_fc_destroy(ct_priv->dev, entry->counter->counter);
|
kfree(entry->counter);
|
}
|
|
static void
|
mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv,
|
struct mlx5_ct_entry *entry,
|
bool nat)
|
{
|
struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
|
struct mlx5_flow_attr *attr = zone_rule->attr;
|
|
ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone);
|
|
mlx5_tc_rule_delete(netdev_priv(ct_priv->netdev), zone_rule->rule, attr);
|
mlx5e_mod_hdr_detach(ct_priv->dev,
|
ct_priv->mod_hdr_tbl, zone_rule->mh);
|
mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
|
kfree(attr);
|
}
|
|
static void
|
mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
|
struct mlx5_ct_entry *entry)
|
{
|
mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
|
mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
|
}
|
|
static struct flow_action_entry *
|
mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule)
|
{
|
struct flow_action *flow_action = &flow_rule->action;
|
struct flow_action_entry *act;
|
int i;
|
|
flow_action_for_each(i, act, flow_action) {
|
if (act->id == FLOW_ACTION_CT_METADATA)
|
return act;
|
}
|
|
return NULL;
|
}
|
|
static int
|
mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
|
struct mlx5e_tc_mod_hdr_acts *mod_acts,
|
u8 ct_state,
|
u32 mark,
|
u32 labels_id,
|
u8 zone_restore_id)
|
{
|
enum mlx5_flow_namespace_type ns = ct_priv->ns_type;
|
struct mlx5_core_dev *dev = ct_priv->dev;
|
int err;
|
|
err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
|
CTSTATE_TO_REG, ct_state);
|
if (err)
|
return err;
|
|
err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
|
MARK_TO_REG, mark);
|
if (err)
|
return err;
|
|
err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
|
LABELS_TO_REG, labels_id);
|
if (err)
|
return err;
|
|
err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
|
ZONE_RESTORE_TO_REG, zone_restore_id);
|
if (err)
|
return err;
|
|
/* Make another copy of zone id in reg_b for
|
* NIC rx flows since we don't copy reg_c1 to
|
* reg_b upon miss.
|
*/
|
if (ns != MLX5_FLOW_NAMESPACE_FDB) {
|
err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
|
NIC_ZONE_RESTORE_TO_REG, zone_restore_id);
|
if (err)
|
return err;
|
}
|
return 0;
|
}
|
|
static int
|
mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act,
|
char *modact)
|
{
|
u32 offset = act->mangle.offset, field;
|
|
switch (act->mangle.htype) {
|
case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
|
MLX5_SET(set_action_in, modact, length, 0);
|
if (offset == offsetof(struct iphdr, saddr))
|
field = MLX5_ACTION_IN_FIELD_OUT_SIPV4;
|
else if (offset == offsetof(struct iphdr, daddr))
|
field = MLX5_ACTION_IN_FIELD_OUT_DIPV4;
|
else
|
return -EOPNOTSUPP;
|
break;
|
|
case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
|
MLX5_SET(set_action_in, modact, length, 0);
|
if (offset == offsetof(struct ipv6hdr, saddr) + 12)
|
field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0;
|
else if (offset == offsetof(struct ipv6hdr, saddr) + 8)
|
field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32;
|
else if (offset == offsetof(struct ipv6hdr, saddr) + 4)
|
field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64;
|
else if (offset == offsetof(struct ipv6hdr, saddr))
|
field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96;
|
else if (offset == offsetof(struct ipv6hdr, daddr) + 12)
|
field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0;
|
else if (offset == offsetof(struct ipv6hdr, daddr) + 8)
|
field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32;
|
else if (offset == offsetof(struct ipv6hdr, daddr) + 4)
|
field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64;
|
else if (offset == offsetof(struct ipv6hdr, daddr))
|
field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96;
|
else
|
return -EOPNOTSUPP;
|
break;
|
|
case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
|
MLX5_SET(set_action_in, modact, length, 16);
|
if (offset == offsetof(struct tcphdr, source))
|
field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT;
|
else if (offset == offsetof(struct tcphdr, dest))
|
field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT;
|
else
|
return -EOPNOTSUPP;
|
break;
|
|
case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
|
MLX5_SET(set_action_in, modact, length, 16);
|
if (offset == offsetof(struct udphdr, source))
|
field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT;
|
else if (offset == offsetof(struct udphdr, dest))
|
field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT;
|
else
|
return -EOPNOTSUPP;
|
break;
|
|
default:
|
return -EOPNOTSUPP;
|
}
|
|
MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
|
MLX5_SET(set_action_in, modact, offset, 0);
|
MLX5_SET(set_action_in, modact, field, field);
|
MLX5_SET(set_action_in, modact, data, act->mangle.val);
|
|
return 0;
|
}
|
|
static int
|
mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
|
struct flow_rule *flow_rule,
|
struct mlx5e_tc_mod_hdr_acts *mod_acts)
|
{
|
struct flow_action *flow_action = &flow_rule->action;
|
struct mlx5_core_dev *mdev = ct_priv->dev;
|
struct flow_action_entry *act;
|
size_t action_size;
|
char *modact;
|
int err, i;
|
|
action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
|
|
flow_action_for_each(i, act, flow_action) {
|
switch (act->id) {
|
case FLOW_ACTION_MANGLE: {
|
err = alloc_mod_hdr_actions(mdev, ct_priv->ns_type,
|
mod_acts);
|
if (err)
|
return err;
|
|
modact = mod_acts->actions +
|
mod_acts->num_actions * action_size;
|
|
err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact);
|
if (err)
|
return err;
|
|
mod_acts->num_actions++;
|
}
|
break;
|
|
case FLOW_ACTION_CT_METADATA:
|
/* Handled earlier */
|
continue;
|
default:
|
return -EOPNOTSUPP;
|
}
|
}
|
|
return 0;
|
}
|
|
static int
|
mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
|
struct mlx5_flow_attr *attr,
|
struct flow_rule *flow_rule,
|
struct mlx5e_mod_hdr_handle **mh,
|
u8 zone_restore_id, bool nat)
|
{
|
struct mlx5e_tc_mod_hdr_acts mod_acts = {};
|
struct flow_action_entry *meta;
|
u16 ct_state = 0;
|
int err;
|
|
meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
|
if (!meta)
|
return -EOPNOTSUPP;
|
|
err = mlx5_get_label_mapping(ct_priv, meta->ct_metadata.labels,
|
&attr->ct_attr.ct_labels_id);
|
if (err)
|
return -EOPNOTSUPP;
|
if (nat) {
|
err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule,
|
&mod_acts);
|
if (err)
|
goto err_mapping;
|
|
ct_state |= MLX5_CT_STATE_NAT_BIT;
|
}
|
|
ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT;
|
err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts,
|
ct_state,
|
meta->ct_metadata.mark,
|
attr->ct_attr.ct_labels_id,
|
zone_restore_id);
|
if (err)
|
goto err_mapping;
|
|
*mh = mlx5e_mod_hdr_attach(ct_priv->dev,
|
ct_priv->mod_hdr_tbl,
|
ct_priv->ns_type,
|
&mod_acts);
|
if (IS_ERR(*mh)) {
|
err = PTR_ERR(*mh);
|
goto err_mapping;
|
}
|
attr->modify_hdr = mlx5e_mod_hdr_get(*mh);
|
|
dealloc_mod_hdr_actions(&mod_acts);
|
return 0;
|
|
err_mapping:
|
dealloc_mod_hdr_actions(&mod_acts);
|
mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
|
return err;
|
}
|
|
static int
|
mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
|
struct flow_rule *flow_rule,
|
struct mlx5_ct_entry *entry,
|
bool nat, u8 zone_restore_id)
|
{
|
struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
|
struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
|
struct mlx5_flow_spec *spec = NULL;
|
struct mlx5_flow_attr *attr;
|
int err;
|
|
zone_rule->nat = nat;
|
|
spec = kzalloc(sizeof(*spec), GFP_KERNEL);
|
if (!spec)
|
return -ENOMEM;
|
|
attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
|
if (!attr) {
|
err = -ENOMEM;
|
goto err_attr;
|
}
|
|
err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule,
|
&zone_rule->mh,
|
zone_restore_id, nat);
|
if (err) {
|
ct_dbg("Failed to create ct entry mod hdr");
|
goto err_mod_hdr;
|
}
|
|
attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
|
MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
|
MLX5_FLOW_CONTEXT_ACTION_COUNT;
|
attr->dest_chain = 0;
|
attr->dest_ft = ct_priv->post_ct;
|
attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
|
attr->outer_match_level = MLX5_MATCH_L4;
|
attr->counter = entry->counter->counter;
|
attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
|
|
mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule);
|
mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
|
entry->tuple.zone & MLX5_CT_ZONE_MASK,
|
MLX5_CT_ZONE_MASK);
|
|
zone_rule->rule = mlx5_tc_rule_insert(priv, spec, attr);
|
if (IS_ERR(zone_rule->rule)) {
|
err = PTR_ERR(zone_rule->rule);
|
ct_dbg("Failed to add ct entry rule, nat: %d", nat);
|
goto err_rule;
|
}
|
|
zone_rule->attr = attr;
|
|
kfree(spec);
|
ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
|
|
return 0;
|
|
err_rule:
|
mlx5e_mod_hdr_detach(ct_priv->dev,
|
ct_priv->mod_hdr_tbl, zone_rule->mh);
|
mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
|
err_mod_hdr:
|
kfree(attr);
|
err_attr:
|
kfree(spec);
|
return err;
|
}
|
|
static bool
|
mlx5_tc_ct_entry_valid(struct mlx5_ct_entry *entry)
|
{
|
return test_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
|
}
|
|
static struct mlx5_ct_entry *
|
mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple *tuple)
|
{
|
struct mlx5_ct_entry *entry;
|
|
entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, tuple,
|
tuples_ht_params);
|
if (entry && mlx5_tc_ct_entry_valid(entry) &&
|
refcount_inc_not_zero(&entry->refcnt)) {
|
return entry;
|
} else if (!entry) {
|
entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht,
|
tuple, tuples_nat_ht_params);
|
if (entry && mlx5_tc_ct_entry_valid(entry) &&
|
refcount_inc_not_zero(&entry->refcnt))
|
return entry;
|
}
|
|
return entry ? ERR_PTR(-EINVAL) : NULL;
|
}
|
|
static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry)
|
{
|
struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
|
|
rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
|
&entry->tuple_nat_node,
|
tuples_nat_ht_params);
|
rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
|
tuples_ht_params);
|
}
|
|
static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry)
|
{
|
struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
|
|
mlx5_tc_ct_entry_del_rules(ct_priv, entry);
|
|
spin_lock_bh(&ct_priv->ht_lock);
|
mlx5_tc_ct_entry_remove_from_tuples(entry);
|
spin_unlock_bh(&ct_priv->ht_lock);
|
|
mlx5_tc_ct_counter_put(ct_priv, entry);
|
kfree(entry);
|
}
|
|
static void
|
mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
|
{
|
if (!refcount_dec_and_test(&entry->refcnt))
|
return;
|
|
mlx5_tc_ct_entry_del(entry);
|
}
|
|
static void mlx5_tc_ct_entry_del_work(struct work_struct *work)
|
{
|
struct mlx5_ct_entry *entry = container_of(work, struct mlx5_ct_entry, work);
|
|
mlx5_tc_ct_entry_del(entry);
|
}
|
|
static void
|
__mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
|
{
|
struct mlx5e_priv *priv;
|
|
if (!refcount_dec_and_test(&entry->refcnt))
|
return;
|
|
priv = netdev_priv(entry->ct_priv->netdev);
|
INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work);
|
queue_work(priv->wq, &entry->work);
|
}
|
|
static struct mlx5_ct_counter *
|
mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv)
|
{
|
struct mlx5_ct_counter *counter;
|
int ret;
|
|
counter = kzalloc(sizeof(*counter), GFP_KERNEL);
|
if (!counter)
|
return ERR_PTR(-ENOMEM);
|
|
counter->is_shared = false;
|
counter->counter = mlx5_fc_create(ct_priv->dev, true);
|
if (IS_ERR(counter->counter)) {
|
ct_dbg("Failed to create counter for ct entry");
|
ret = PTR_ERR(counter->counter);
|
kfree(counter);
|
return ERR_PTR(ret);
|
}
|
|
return counter;
|
}
|
|
static struct mlx5_ct_counter *
|
mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
|
struct mlx5_ct_entry *entry)
|
{
|
struct mlx5_ct_tuple rev_tuple = entry->tuple;
|
struct mlx5_ct_counter *shared_counter;
|
struct mlx5_ct_entry *rev_entry;
|
__be16 tmp_port;
|
int ret;
|
|
/* get the reversed tuple */
|
tmp_port = rev_tuple.port.src;
|
rev_tuple.port.src = rev_tuple.port.dst;
|
rev_tuple.port.dst = tmp_port;
|
|
if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
|
__be32 tmp_addr = rev_tuple.ip.src_v4;
|
|
rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4;
|
rev_tuple.ip.dst_v4 = tmp_addr;
|
} else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
|
struct in6_addr tmp_addr = rev_tuple.ip.src_v6;
|
|
rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6;
|
rev_tuple.ip.dst_v6 = tmp_addr;
|
} else {
|
return ERR_PTR(-EOPNOTSUPP);
|
}
|
|
/* Use the same counter as the reverse direction */
|
spin_lock_bh(&ct_priv->ht_lock);
|
rev_entry = mlx5_tc_ct_entry_get(ct_priv, &rev_tuple);
|
|
if (IS_ERR(rev_entry)) {
|
spin_unlock_bh(&ct_priv->ht_lock);
|
goto create_counter;
|
}
|
|
if (rev_entry && refcount_inc_not_zero(&rev_entry->counter->refcount)) {
|
ct_dbg("Using shared counter entry=0x%p rev=0x%p\n", entry, rev_entry);
|
shared_counter = rev_entry->counter;
|
spin_unlock_bh(&ct_priv->ht_lock);
|
|
mlx5_tc_ct_entry_put(rev_entry);
|
return shared_counter;
|
}
|
|
spin_unlock_bh(&ct_priv->ht_lock);
|
|
create_counter:
|
|
shared_counter = mlx5_tc_ct_counter_create(ct_priv);
|
if (IS_ERR(shared_counter)) {
|
ret = PTR_ERR(shared_counter);
|
return ERR_PTR(ret);
|
}
|
|
shared_counter->is_shared = true;
|
refcount_set(&shared_counter->refcount, 1);
|
return shared_counter;
|
}
|
|
static int
|
mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
|
struct flow_rule *flow_rule,
|
struct mlx5_ct_entry *entry,
|
u8 zone_restore_id)
|
{
|
int err;
|
|
if (nf_ct_acct_enabled(dev_net(ct_priv->netdev)))
|
entry->counter = mlx5_tc_ct_counter_create(ct_priv);
|
else
|
entry->counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry);
|
|
if (IS_ERR(entry->counter)) {
|
err = PTR_ERR(entry->counter);
|
return err;
|
}
|
|
err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false,
|
zone_restore_id);
|
if (err)
|
goto err_orig;
|
|
err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true,
|
zone_restore_id);
|
if (err)
|
goto err_nat;
|
|
return 0;
|
|
err_nat:
|
mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
|
err_orig:
|
mlx5_tc_ct_counter_put(ct_priv, entry);
|
return err;
|
}
|
|
static int
|
mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
|
struct flow_cls_offload *flow)
|
{
|
struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow);
|
struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
|
struct flow_action_entry *meta_action;
|
unsigned long cookie = flow->cookie;
|
struct mlx5_ct_entry *entry;
|
int err;
|
|
meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
|
if (!meta_action)
|
return -EOPNOTSUPP;
|
|
spin_lock_bh(&ct_priv->ht_lock);
|
entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
|
if (entry && refcount_inc_not_zero(&entry->refcnt)) {
|
spin_unlock_bh(&ct_priv->ht_lock);
|
mlx5_tc_ct_entry_put(entry);
|
return -EEXIST;
|
}
|
spin_unlock_bh(&ct_priv->ht_lock);
|
|
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
|
if (!entry)
|
return -ENOMEM;
|
|
entry->tuple.zone = ft->zone;
|
entry->cookie = flow->cookie;
|
entry->restore_cookie = meta_action->ct_metadata.cookie;
|
refcount_set(&entry->refcnt, 2);
|
entry->ct_priv = ct_priv;
|
|
err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule);
|
if (err)
|
goto err_set;
|
|
memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple));
|
err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule);
|
if (err)
|
goto err_set;
|
|
spin_lock_bh(&ct_priv->ht_lock);
|
|
err = rhashtable_lookup_insert_fast(&ft->ct_entries_ht, &entry->node,
|
cts_ht_params);
|
if (err)
|
goto err_entries;
|
|
err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht,
|
&entry->tuple_node,
|
tuples_ht_params);
|
if (err)
|
goto err_tuple;
|
|
if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) {
|
err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht,
|
&entry->tuple_nat_node,
|
tuples_nat_ht_params);
|
if (err)
|
goto err_tuple_nat;
|
}
|
spin_unlock_bh(&ct_priv->ht_lock);
|
|
err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry,
|
ft->zone_restore_id);
|
if (err)
|
goto err_rules;
|
|
set_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
|
mlx5_tc_ct_entry_put(entry); /* this function reference */
|
|
return 0;
|
|
err_rules:
|
spin_lock_bh(&ct_priv->ht_lock);
|
if (mlx5_tc_ct_entry_has_nat(entry))
|
rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
|
&entry->tuple_nat_node, tuples_nat_ht_params);
|
err_tuple_nat:
|
rhashtable_remove_fast(&ct_priv->ct_tuples_ht,
|
&entry->tuple_node,
|
tuples_ht_params);
|
err_tuple:
|
rhashtable_remove_fast(&ft->ct_entries_ht,
|
&entry->node,
|
cts_ht_params);
|
err_entries:
|
spin_unlock_bh(&ct_priv->ht_lock);
|
err_set:
|
kfree(entry);
|
if (err != -EEXIST)
|
netdev_warn(ct_priv->netdev, "Failed to offload ct entry, err: %d\n", err);
|
return err;
|
}
|
|
static int
|
mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft,
|
struct flow_cls_offload *flow)
|
{
|
struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
|
unsigned long cookie = flow->cookie;
|
struct mlx5_ct_entry *entry;
|
|
spin_lock_bh(&ct_priv->ht_lock);
|
entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
|
if (!entry) {
|
spin_unlock_bh(&ct_priv->ht_lock);
|
return -ENOENT;
|
}
|
|
if (!mlx5_tc_ct_entry_valid(entry)) {
|
spin_unlock_bh(&ct_priv->ht_lock);
|
return -EINVAL;
|
}
|
|
rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params);
|
mlx5_tc_ct_entry_remove_from_tuples(entry);
|
spin_unlock_bh(&ct_priv->ht_lock);
|
|
mlx5_tc_ct_entry_put(entry);
|
|
return 0;
|
}
|
|
static int
|
mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
|
struct flow_cls_offload *f)
|
{
|
struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
|
unsigned long cookie = f->cookie;
|
struct mlx5_ct_entry *entry;
|
u64 lastuse, packets, bytes;
|
|
spin_lock_bh(&ct_priv->ht_lock);
|
entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
|
if (!entry) {
|
spin_unlock_bh(&ct_priv->ht_lock);
|
return -ENOENT;
|
}
|
|
if (!mlx5_tc_ct_entry_valid(entry) || !refcount_inc_not_zero(&entry->refcnt)) {
|
spin_unlock_bh(&ct_priv->ht_lock);
|
return -EINVAL;
|
}
|
|
spin_unlock_bh(&ct_priv->ht_lock);
|
|
mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse);
|
flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
|
FLOW_ACTION_HW_STATS_DELAYED);
|
|
mlx5_tc_ct_entry_put(entry);
|
return 0;
|
}
|
|
static int
|
mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data,
|
void *cb_priv)
|
{
|
struct flow_cls_offload *f = type_data;
|
struct mlx5_ct_ft *ft = cb_priv;
|
|
if (type != TC_SETUP_CLSFLOWER)
|
return -EOPNOTSUPP;
|
|
switch (f->command) {
|
case FLOW_CLS_REPLACE:
|
return mlx5_tc_ct_block_flow_offload_add(ft, f);
|
case FLOW_CLS_DESTROY:
|
return mlx5_tc_ct_block_flow_offload_del(ft, f);
|
case FLOW_CLS_STATS:
|
return mlx5_tc_ct_block_flow_offload_stats(ft, f);
|
default:
|
break;
|
}
|
|
return -EOPNOTSUPP;
|
}
|
|
static bool
|
mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple,
|
u16 zone)
|
{
|
struct flow_keys flow_keys;
|
|
skb_reset_network_header(skb);
|
skb_flow_dissect_flow_keys(skb, &flow_keys, 0);
|
|
tuple->zone = zone;
|
|
if (flow_keys.basic.ip_proto != IPPROTO_TCP &&
|
flow_keys.basic.ip_proto != IPPROTO_UDP)
|
return false;
|
|
tuple->port.src = flow_keys.ports.src;
|
tuple->port.dst = flow_keys.ports.dst;
|
tuple->n_proto = flow_keys.basic.n_proto;
|
tuple->ip_proto = flow_keys.basic.ip_proto;
|
|
switch (flow_keys.basic.n_proto) {
|
case htons(ETH_P_IP):
|
tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
|
tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src;
|
tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst;
|
break;
|
|
case htons(ETH_P_IPV6):
|
tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
|
tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src;
|
tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst;
|
break;
|
default:
|
goto out;
|
}
|
|
return true;
|
|
out:
|
return false;
|
}
|
|
int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
|
{
|
u32 ctstate = 0, ctstate_mask = 0;
|
|
mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG,
|
&ctstate, &ctstate_mask);
|
|
if ((ctstate & ctstate_mask) == MLX5_CT_STATE_TRK_BIT)
|
return -EOPNOTSUPP;
|
|
ctstate_mask |= MLX5_CT_STATE_TRK_BIT;
|
mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
|
ctstate, ctstate_mask);
|
|
return 0;
|
}
|
|
void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr)
|
{
|
if (!priv || !ct_attr->ct_labels_id)
|
return;
|
|
mlx5_put_label_mapping(priv, ct_attr->ct_labels_id);
|
}
|
|
int
|
mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
|
struct mlx5_flow_spec *spec,
|
struct flow_cls_offload *f,
|
struct mlx5_ct_attr *ct_attr,
|
struct netlink_ext_ack *extack)
|
{
|
struct flow_rule *rule = flow_cls_offload_flow_rule(f);
|
struct flow_dissector_key_ct *mask, *key;
|
bool trk, est, untrk, unest, new;
|
u32 ctstate = 0, ctstate_mask = 0;
|
u16 ct_state_on, ct_state_off;
|
u16 ct_state, ct_state_mask;
|
struct flow_match_ct match;
|
u32 ct_labels[4];
|
|
if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT))
|
return 0;
|
|
if (!priv) {
|
NL_SET_ERR_MSG_MOD(extack,
|
"offload of ct matching isn't available");
|
return -EOPNOTSUPP;
|
}
|
|
flow_rule_match_ct(rule, &match);
|
|
key = match.key;
|
mask = match.mask;
|
|
ct_state = key->ct_state;
|
ct_state_mask = mask->ct_state;
|
|
if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
|
TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED |
|
TCA_FLOWER_KEY_CT_FLAGS_NEW)) {
|
NL_SET_ERR_MSG_MOD(extack,
|
"only ct_state trk, est and new are supported for offload");
|
return -EOPNOTSUPP;
|
}
|
|
ct_state_on = ct_state & ct_state_mask;
|
ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask;
|
trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
|
new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW;
|
est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
|
untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
|
unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
|
|
ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0;
|
ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
|
ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0;
|
ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
|
|
if (new) {
|
NL_SET_ERR_MSG_MOD(extack,
|
"matching on ct_state +new isn't supported");
|
return -EOPNOTSUPP;
|
}
|
|
if (mask->ct_zone)
|
mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
|
key->ct_zone, MLX5_CT_ZONE_MASK);
|
if (ctstate_mask)
|
mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
|
ctstate, ctstate_mask);
|
if (mask->ct_mark)
|
mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG,
|
key->ct_mark, mask->ct_mark);
|
if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] ||
|
mask->ct_labels[3]) {
|
ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0];
|
ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1];
|
ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2];
|
ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3];
|
if (mlx5_get_label_mapping(priv, ct_labels, &ct_attr->ct_labels_id))
|
return -EOPNOTSUPP;
|
mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id,
|
MLX5_CT_LABELS_MASK);
|
}
|
|
return 0;
|
}
|
|
int
|
mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
|
struct mlx5_flow_attr *attr,
|
const struct flow_action_entry *act,
|
struct netlink_ext_ack *extack)
|
{
|
if (!priv) {
|
NL_SET_ERR_MSG_MOD(extack,
|
"offload of ct action isn't available");
|
return -EOPNOTSUPP;
|
}
|
|
attr->ct_attr.zone = act->ct.zone;
|
attr->ct_attr.ct_action = act->ct.action;
|
attr->ct_attr.nf_ft = act->ct.flow_table;
|
|
return 0;
|
}
|
|
static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
|
struct mlx5_tc_ct_pre *pre_ct,
|
bool nat)
|
{
|
struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
|
struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
|
struct mlx5_core_dev *dev = ct_priv->dev;
|
struct mlx5_flow_table *ft = pre_ct->ft;
|
struct mlx5_flow_destination dest = {};
|
struct mlx5_flow_act flow_act = {};
|
struct mlx5_modify_hdr *mod_hdr;
|
struct mlx5_flow_handle *rule;
|
struct mlx5_flow_spec *spec;
|
u32 ctstate;
|
u16 zone;
|
int err;
|
|
spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
|
if (!spec)
|
return -ENOMEM;
|
|
zone = ct_ft->zone & MLX5_CT_ZONE_MASK;
|
err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ct_priv->ns_type,
|
ZONE_TO_REG, zone);
|
if (err) {
|
ct_dbg("Failed to set zone register mapping");
|
goto err_mapping;
|
}
|
|
mod_hdr = mlx5_modify_header_alloc(dev, ct_priv->ns_type,
|
pre_mod_acts.num_actions,
|
pre_mod_acts.actions);
|
|
if (IS_ERR(mod_hdr)) {
|
err = PTR_ERR(mod_hdr);
|
ct_dbg("Failed to create pre ct mod hdr");
|
goto err_mapping;
|
}
|
pre_ct->modify_hdr = mod_hdr;
|
|
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
|
MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
|
flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
|
flow_act.modify_hdr = mod_hdr;
|
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
|
|
/* add flow rule */
|
mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
|
zone, MLX5_CT_ZONE_MASK);
|
ctstate = MLX5_CT_STATE_TRK_BIT;
|
if (nat)
|
ctstate |= MLX5_CT_STATE_NAT_BIT;
|
mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
|
|
dest.ft = ct_priv->post_ct;
|
rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
|
if (IS_ERR(rule)) {
|
err = PTR_ERR(rule);
|
ct_dbg("Failed to add pre ct flow rule zone %d", zone);
|
goto err_flow_rule;
|
}
|
pre_ct->flow_rule = rule;
|
|
/* add miss rule */
|
memset(spec, 0, sizeof(*spec));
|
dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct;
|
rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
|
if (IS_ERR(rule)) {
|
err = PTR_ERR(rule);
|
ct_dbg("Failed to add pre ct miss rule zone %d", zone);
|
goto err_miss_rule;
|
}
|
pre_ct->miss_rule = rule;
|
|
dealloc_mod_hdr_actions(&pre_mod_acts);
|
kvfree(spec);
|
return 0;
|
|
err_miss_rule:
|
mlx5_del_flow_rules(pre_ct->flow_rule);
|
err_flow_rule:
|
mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
|
err_mapping:
|
dealloc_mod_hdr_actions(&pre_mod_acts);
|
kvfree(spec);
|
return err;
|
}
|
|
static void
|
tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft,
|
struct mlx5_tc_ct_pre *pre_ct)
|
{
|
struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
|
struct mlx5_core_dev *dev = ct_priv->dev;
|
|
mlx5_del_flow_rules(pre_ct->flow_rule);
|
mlx5_del_flow_rules(pre_ct->miss_rule);
|
mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
|
}
|
|
static int
|
mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
|
struct mlx5_tc_ct_pre *pre_ct,
|
bool nat)
|
{
|
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
|
struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
|
struct mlx5_core_dev *dev = ct_priv->dev;
|
struct mlx5_flow_table_attr ft_attr = {};
|
struct mlx5_flow_namespace *ns;
|
struct mlx5_flow_table *ft;
|
struct mlx5_flow_group *g;
|
u32 metadata_reg_c_2_mask;
|
u32 *flow_group_in;
|
void *misc;
|
int err;
|
|
ns = mlx5_get_flow_namespace(dev, ct_priv->ns_type);
|
if (!ns) {
|
err = -EOPNOTSUPP;
|
ct_dbg("Failed to get flow namespace");
|
return err;
|
}
|
|
flow_group_in = kvzalloc(inlen, GFP_KERNEL);
|
if (!flow_group_in)
|
return -ENOMEM;
|
|
ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
|
ft_attr.prio = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB ?
|
FDB_TC_OFFLOAD : MLX5E_TC_PRIO;
|
ft_attr.max_fte = 2;
|
ft_attr.level = 1;
|
ft = mlx5_create_flow_table(ns, &ft_attr);
|
if (IS_ERR(ft)) {
|
err = PTR_ERR(ft);
|
ct_dbg("Failed to create pre ct table");
|
goto out_free;
|
}
|
pre_ct->ft = ft;
|
|
/* create flow group */
|
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
|
MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
|
MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
|
MLX5_MATCH_MISC_PARAMETERS_2);
|
|
misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
|
match_criteria.misc_parameters_2);
|
|
metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK;
|
metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16);
|
if (nat)
|
metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16);
|
|
MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2,
|
metadata_reg_c_2_mask);
|
|
g = mlx5_create_flow_group(ft, flow_group_in);
|
if (IS_ERR(g)) {
|
err = PTR_ERR(g);
|
ct_dbg("Failed to create pre ct group");
|
goto err_flow_grp;
|
}
|
pre_ct->flow_grp = g;
|
|
/* create miss group */
|
memset(flow_group_in, 0, inlen);
|
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
|
MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
|
g = mlx5_create_flow_group(ft, flow_group_in);
|
if (IS_ERR(g)) {
|
err = PTR_ERR(g);
|
ct_dbg("Failed to create pre ct miss group");
|
goto err_miss_grp;
|
}
|
pre_ct->miss_grp = g;
|
|
err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat);
|
if (err)
|
goto err_add_rules;
|
|
kvfree(flow_group_in);
|
return 0;
|
|
err_add_rules:
|
mlx5_destroy_flow_group(pre_ct->miss_grp);
|
err_miss_grp:
|
mlx5_destroy_flow_group(pre_ct->flow_grp);
|
err_flow_grp:
|
mlx5_destroy_flow_table(ft);
|
out_free:
|
kvfree(flow_group_in);
|
return err;
|
}
|
|
static void
|
mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft,
|
struct mlx5_tc_ct_pre *pre_ct)
|
{
|
tc_ct_pre_ct_del_rules(ct_ft, pre_ct);
|
mlx5_destroy_flow_group(pre_ct->miss_grp);
|
mlx5_destroy_flow_group(pre_ct->flow_grp);
|
mlx5_destroy_flow_table(pre_ct->ft);
|
}
|
|
static int
|
mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft)
|
{
|
int err;
|
|
err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false);
|
if (err)
|
return err;
|
|
err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true);
|
if (err)
|
goto err_pre_ct_nat;
|
|
return 0;
|
|
err_pre_ct_nat:
|
mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
|
return err;
|
}
|
|
static void
|
mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft)
|
{
|
mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat);
|
mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
|
}
|
|
static struct mlx5_ct_ft *
|
mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
|
struct nf_flowtable *nf_ft)
|
{
|
struct mlx5_ct_ft *ft;
|
int err;
|
|
ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params);
|
if (ft) {
|
refcount_inc(&ft->refcount);
|
return ft;
|
}
|
|
ft = kzalloc(sizeof(*ft), GFP_KERNEL);
|
if (!ft)
|
return ERR_PTR(-ENOMEM);
|
|
err = mapping_add(ct_priv->zone_mapping, &zone, &ft->zone_restore_id);
|
if (err)
|
goto err_mapping;
|
|
ft->zone = zone;
|
ft->nf_ft = nf_ft;
|
ft->ct_priv = ct_priv;
|
refcount_set(&ft->refcount, 1);
|
|
err = mlx5_tc_ct_alloc_pre_ct_tables(ft);
|
if (err)
|
goto err_alloc_pre_ct;
|
|
err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params);
|
if (err)
|
goto err_init;
|
|
err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node,
|
zone_params);
|
if (err)
|
goto err_insert;
|
|
err = nf_flow_table_offload_add_cb(ft->nf_ft,
|
mlx5_tc_ct_block_flow_offload, ft);
|
if (err)
|
goto err_add_cb;
|
|
return ft;
|
|
err_add_cb:
|
rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
|
err_insert:
|
rhashtable_destroy(&ft->ct_entries_ht);
|
err_init:
|
mlx5_tc_ct_free_pre_ct_tables(ft);
|
err_alloc_pre_ct:
|
mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
|
err_mapping:
|
kfree(ft);
|
return ERR_PTR(err);
|
}
|
|
static void
|
mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg)
|
{
|
struct mlx5_ct_entry *entry = ptr;
|
|
mlx5_tc_ct_entry_put(entry);
|
}
|
|
static void
|
mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
|
{
|
struct mlx5e_priv *priv;
|
|
if (!refcount_dec_and_test(&ft->refcount))
|
return;
|
|
nf_flow_table_offload_del_cb(ft->nf_ft,
|
mlx5_tc_ct_block_flow_offload, ft);
|
rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
|
rhashtable_free_and_destroy(&ft->ct_entries_ht,
|
mlx5_tc_ct_flush_ft_entry,
|
ct_priv);
|
priv = netdev_priv(ct_priv->netdev);
|
flush_workqueue(priv->wq);
|
mlx5_tc_ct_free_pre_ct_tables(ft);
|
mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
|
kfree(ft);
|
}
|
|
/* We translate the tc filter with CT action to the following HW model:
|
*
|
* +---------------------+
|
* + ft prio (tc chain) +
|
* + original match +
|
* +---------------------+
|
* | set chain miss mapping
|
* | set fte_id
|
* | set tunnel_id
|
* | do decap
|
* v
|
* +---------------------+
|
* + pre_ct/pre_ct_nat + if matches +---------------------+
|
* + zone+nat match +---------------->+ post_ct (see below) +
|
* +---------------------+ set zone +---------------------+
|
* | set zone
|
* v
|
* +--------------------+
|
* + CT (nat or no nat) +
|
* + tuple + zone match +
|
* +--------------------+
|
* | set mark
|
* | set labels_id
|
* | set established
|
* | set zone_restore
|
* | do nat (if needed)
|
* v
|
* +--------------+
|
* + post_ct + original filter actions
|
* + fte_id match +------------------------>
|
* +--------------+
|
*/
|
static struct mlx5_flow_handle *
|
__mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
|
struct mlx5e_tc_flow *flow,
|
struct mlx5_flow_spec *orig_spec,
|
struct mlx5_flow_attr *attr)
|
{
|
bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
|
struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
|
struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
|
u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
|
struct mlx5_flow_spec *post_ct_spec = NULL;
|
struct mlx5_flow_attr *pre_ct_attr;
|
struct mlx5_modify_hdr *mod_hdr;
|
struct mlx5_flow_handle *rule;
|
struct mlx5_ct_flow *ct_flow;
|
int chain_mapping = 0, err;
|
struct mlx5_ct_ft *ft;
|
u32 fte_id = 1;
|
|
post_ct_spec = kzalloc(sizeof(*post_ct_spec), GFP_KERNEL);
|
ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
|
if (!post_ct_spec || !ct_flow) {
|
kfree(post_ct_spec);
|
kfree(ct_flow);
|
return ERR_PTR(-ENOMEM);
|
}
|
|
/* Register for CT established events */
|
ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone,
|
attr->ct_attr.nf_ft);
|
if (IS_ERR(ft)) {
|
err = PTR_ERR(ft);
|
ct_dbg("Failed to register to ft callback");
|
goto err_ft;
|
}
|
ct_flow->ft = ft;
|
|
err = idr_alloc_u32(&ct_priv->fte_ids, ct_flow, &fte_id,
|
MLX5_FTE_ID_MAX, GFP_KERNEL);
|
if (err) {
|
netdev_warn(priv->netdev,
|
"Failed to allocate fte id, err: %d\n", err);
|
goto err_idr;
|
}
|
ct_flow->fte_id = fte_id;
|
|
/* Base flow attributes of both rules on original rule attribute */
|
ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
|
if (!ct_flow->pre_ct_attr) {
|
err = -ENOMEM;
|
goto err_alloc_pre;
|
}
|
|
ct_flow->post_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
|
if (!ct_flow->post_ct_attr) {
|
err = -ENOMEM;
|
goto err_alloc_post;
|
}
|
|
pre_ct_attr = ct_flow->pre_ct_attr;
|
memcpy(pre_ct_attr, attr, attr_sz);
|
memcpy(ct_flow->post_ct_attr, attr, attr_sz);
|
|
/* Modify the original rule's action to fwd and modify, leave decap */
|
pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP;
|
pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
|
MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
|
|
/* Write chain miss tag for miss in ct table as we
|
* don't go though all prios of this chain as normal tc rules
|
* miss.
|
*/
|
err = mlx5_chains_get_chain_mapping(ct_priv->chains, attr->chain,
|
&chain_mapping);
|
if (err) {
|
ct_dbg("Failed to get chain register mapping for chain");
|
goto err_get_chain;
|
}
|
ct_flow->chain_mapping = chain_mapping;
|
|
err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type,
|
CHAIN_TO_REG, chain_mapping);
|
if (err) {
|
ct_dbg("Failed to set chain register mapping");
|
goto err_mapping;
|
}
|
|
err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type,
|
FTEID_TO_REG, fte_id);
|
if (err) {
|
ct_dbg("Failed to set fte_id register mapping");
|
goto err_mapping;
|
}
|
|
/* If original flow is decap, we do it before going into ct table
|
* so add a rewrite for the tunnel match_id.
|
*/
|
if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
|
attr->chain == 0) {
|
u32 tun_id = mlx5e_tc_get_flow_tun_id(flow);
|
|
err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts,
|
ct_priv->ns_type,
|
TUNNEL_TO_REG,
|
tun_id);
|
if (err) {
|
ct_dbg("Failed to set tunnel register mapping");
|
goto err_mapping;
|
}
|
}
|
|
mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
|
pre_mod_acts.num_actions,
|
pre_mod_acts.actions);
|
if (IS_ERR(mod_hdr)) {
|
err = PTR_ERR(mod_hdr);
|
ct_dbg("Failed to create pre ct mod hdr");
|
goto err_mapping;
|
}
|
pre_ct_attr->modify_hdr = mod_hdr;
|
|
/* Post ct rule matches on fte_id and executes original rule's
|
* tc rule action
|
*/
|
mlx5e_tc_match_to_reg_match(post_ct_spec, FTEID_TO_REG,
|
fte_id, MLX5_FTE_ID_MASK);
|
|
/* Put post_ct rule on post_ct flow table */
|
ct_flow->post_ct_attr->chain = 0;
|
ct_flow->post_ct_attr->prio = 0;
|
ct_flow->post_ct_attr->ft = ct_priv->post_ct;
|
|
ct_flow->post_ct_attr->inner_match_level = MLX5_MATCH_NONE;
|
ct_flow->post_ct_attr->outer_match_level = MLX5_MATCH_NONE;
|
ct_flow->post_ct_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP);
|
rule = mlx5_tc_rule_insert(priv, post_ct_spec,
|
ct_flow->post_ct_attr);
|
ct_flow->post_ct_rule = rule;
|
if (IS_ERR(ct_flow->post_ct_rule)) {
|
err = PTR_ERR(ct_flow->post_ct_rule);
|
ct_dbg("Failed to add post ct rule");
|
goto err_insert_post_ct;
|
}
|
|
/* Change original rule point to ct table */
|
pre_ct_attr->dest_chain = 0;
|
pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft;
|
ct_flow->pre_ct_rule = mlx5_tc_rule_insert(priv, orig_spec,
|
pre_ct_attr);
|
if (IS_ERR(ct_flow->pre_ct_rule)) {
|
err = PTR_ERR(ct_flow->pre_ct_rule);
|
ct_dbg("Failed to add pre ct rule");
|
goto err_insert_orig;
|
}
|
|
attr->ct_attr.ct_flow = ct_flow;
|
dealloc_mod_hdr_actions(&pre_mod_acts);
|
kfree(post_ct_spec);
|
|
return rule;
|
|
err_insert_orig:
|
mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule,
|
ct_flow->post_ct_attr);
|
err_insert_post_ct:
|
mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
|
err_mapping:
|
dealloc_mod_hdr_actions(&pre_mod_acts);
|
mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
|
err_get_chain:
|
kfree(ct_flow->post_ct_attr);
|
err_alloc_post:
|
kfree(ct_flow->pre_ct_attr);
|
err_alloc_pre:
|
idr_remove(&ct_priv->fte_ids, fte_id);
|
err_idr:
|
mlx5_tc_ct_del_ft_cb(ct_priv, ft);
|
err_ft:
|
kfree(post_ct_spec);
|
kfree(ct_flow);
|
netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
|
return ERR_PTR(err);
|
}
|
|
static struct mlx5_flow_handle *
|
__mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv,
|
struct mlx5_flow_spec *orig_spec,
|
struct mlx5_flow_attr *attr,
|
struct mlx5e_tc_mod_hdr_acts *mod_acts)
|
{
|
struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
|
u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
|
struct mlx5_flow_attr *pre_ct_attr;
|
struct mlx5_modify_hdr *mod_hdr;
|
struct mlx5_flow_handle *rule;
|
struct mlx5_ct_flow *ct_flow;
|
int err;
|
|
ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
|
if (!ct_flow)
|
return ERR_PTR(-ENOMEM);
|
|
/* Base esw attributes on original rule attribute */
|
pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
|
if (!pre_ct_attr) {
|
err = -ENOMEM;
|
goto err_attr;
|
}
|
|
memcpy(pre_ct_attr, attr, attr_sz);
|
|
err = mlx5_tc_ct_entry_set_registers(ct_priv, mod_acts, 0, 0, 0, 0);
|
if (err) {
|
ct_dbg("Failed to set register for ct clear");
|
goto err_set_registers;
|
}
|
|
mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
|
mod_acts->num_actions,
|
mod_acts->actions);
|
if (IS_ERR(mod_hdr)) {
|
err = PTR_ERR(mod_hdr);
|
ct_dbg("Failed to add create ct clear mod hdr");
|
goto err_set_registers;
|
}
|
|
dealloc_mod_hdr_actions(mod_acts);
|
pre_ct_attr->modify_hdr = mod_hdr;
|
pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
|
|
rule = mlx5_tc_rule_insert(priv, orig_spec, pre_ct_attr);
|
if (IS_ERR(rule)) {
|
err = PTR_ERR(rule);
|
ct_dbg("Failed to add ct clear rule");
|
goto err_insert;
|
}
|
|
attr->ct_attr.ct_flow = ct_flow;
|
ct_flow->pre_ct_attr = pre_ct_attr;
|
ct_flow->pre_ct_rule = rule;
|
return rule;
|
|
err_insert:
|
mlx5_modify_header_dealloc(priv->mdev, mod_hdr);
|
err_set_registers:
|
netdev_warn(priv->netdev,
|
"Failed to offload ct clear flow, err %d\n", err);
|
kfree(pre_ct_attr);
|
err_attr:
|
kfree(ct_flow);
|
|
return ERR_PTR(err);
|
}
|
|
struct mlx5_flow_handle *
|
mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
|
struct mlx5e_tc_flow *flow,
|
struct mlx5_flow_spec *spec,
|
struct mlx5_flow_attr *attr,
|
struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
|
{
|
bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
|
struct mlx5_flow_handle *rule;
|
|
if (!priv)
|
return ERR_PTR(-EOPNOTSUPP);
|
|
mutex_lock(&priv->control_lock);
|
|
if (clear_action)
|
rule = __mlx5_tc_ct_flow_offload_clear(priv, spec, attr, mod_hdr_acts);
|
else
|
rule = __mlx5_tc_ct_flow_offload(priv, flow, spec, attr);
|
mutex_unlock(&priv->control_lock);
|
|
return rule;
|
}
|
|
static void
|
__mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
|
struct mlx5e_tc_flow *flow,
|
struct mlx5_ct_flow *ct_flow)
|
{
|
struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr;
|
struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
|
|
mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule,
|
pre_ct_attr);
|
mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
|
|
if (ct_flow->post_ct_rule) {
|
mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule,
|
ct_flow->post_ct_attr);
|
mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
|
idr_remove(&ct_priv->fte_ids, ct_flow->fte_id);
|
mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
|
}
|
|
kfree(ct_flow->pre_ct_attr);
|
kfree(ct_flow->post_ct_attr);
|
kfree(ct_flow);
|
}
|
|
void
|
mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
|
struct mlx5e_tc_flow *flow,
|
struct mlx5_flow_attr *attr)
|
{
|
struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow;
|
|
/* We are called on error to clean up stuff from parsing
|
* but we don't have anything for now
|
*/
|
if (!ct_flow)
|
return;
|
|
mutex_lock(&priv->control_lock);
|
__mlx5_tc_ct_delete_flow(priv, flow, ct_flow);
|
mutex_unlock(&priv->control_lock);
|
}
|
|
static int
|
mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw,
|
const char **err_msg)
|
{
|
if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) {
|
*err_msg = "firmware level support is missing";
|
return -EOPNOTSUPP;
|
}
|
|
if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) {
|
/* vlan workaround should be avoided for multi chain rules.
|
* This is just a sanity check as pop vlan action should
|
* be supported by any FW that supports ignore_flow_level
|
*/
|
|
*err_msg = "firmware vlan actions support is missing";
|
return -EOPNOTSUPP;
|
}
|
|
if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev,
|
fdb_modify_header_fwd_to_table)) {
|
/* CT always writes to registers which are mod header actions.
|
* Therefore, mod header and goto is required
|
*/
|
|
*err_msg = "firmware fwd and modify support is missing";
|
return -EOPNOTSUPP;
|
}
|
|
if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
|
*err_msg = "register loopback isn't supported";
|
return -EOPNOTSUPP;
|
}
|
|
return 0;
|
}
|
|
static int
|
mlx5_tc_ct_init_check_nic_support(struct mlx5e_priv *priv,
|
const char **err_msg)
|
{
|
if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
|
*err_msg = "firmware level support is missing";
|
return -EOPNOTSUPP;
|
}
|
|
return 0;
|
}
|
|
static int
|
mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
|
enum mlx5_flow_namespace_type ns_type,
|
const char **err_msg)
|
{
|
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
|
|
#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
|
/* cannot restore chain ID on HW miss */
|
|
*err_msg = "tc skb extension missing";
|
return -EOPNOTSUPP;
|
#endif
|
if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
|
return mlx5_tc_ct_init_check_esw_support(esw, err_msg);
|
else
|
return mlx5_tc_ct_init_check_nic_support(priv, err_msg);
|
}
|
|
#define INIT_ERR_PREFIX "tc ct offload init failed"
|
|
struct mlx5_tc_ct_priv *
|
mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
|
struct mod_hdr_tbl *mod_hdr,
|
enum mlx5_flow_namespace_type ns_type)
|
{
|
struct mlx5_tc_ct_priv *ct_priv;
|
struct mlx5_core_dev *dev;
|
const char *msg;
|
int err;
|
|
dev = priv->mdev;
|
err = mlx5_tc_ct_init_check_support(priv, ns_type, &msg);
|
if (err) {
|
mlx5_core_warn(dev,
|
"tc ct offload not supported, %s\n",
|
msg);
|
goto err_support;
|
}
|
|
ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL);
|
if (!ct_priv)
|
goto err_alloc;
|
|
ct_priv->zone_mapping = mapping_create(sizeof(u16), 0, true);
|
if (IS_ERR(ct_priv->zone_mapping)) {
|
err = PTR_ERR(ct_priv->zone_mapping);
|
goto err_mapping_zone;
|
}
|
|
ct_priv->labels_mapping = mapping_create(sizeof(u32) * 4, 0, true);
|
if (IS_ERR(ct_priv->labels_mapping)) {
|
err = PTR_ERR(ct_priv->labels_mapping);
|
goto err_mapping_labels;
|
}
|
|
spin_lock_init(&ct_priv->ht_lock);
|
ct_priv->ns_type = ns_type;
|
ct_priv->chains = chains;
|
ct_priv->netdev = priv->netdev;
|
ct_priv->dev = priv->mdev;
|
ct_priv->mod_hdr_tbl = mod_hdr;
|
ct_priv->ct = mlx5_chains_create_global_table(chains);
|
if (IS_ERR(ct_priv->ct)) {
|
err = PTR_ERR(ct_priv->ct);
|
mlx5_core_warn(dev,
|
"%s, failed to create ct table err: %d\n",
|
INIT_ERR_PREFIX, err);
|
goto err_ct_tbl;
|
}
|
|
ct_priv->ct_nat = mlx5_chains_create_global_table(chains);
|
if (IS_ERR(ct_priv->ct_nat)) {
|
err = PTR_ERR(ct_priv->ct_nat);
|
mlx5_core_warn(dev,
|
"%s, failed to create ct nat table err: %d\n",
|
INIT_ERR_PREFIX, err);
|
goto err_ct_nat_tbl;
|
}
|
|
ct_priv->post_ct = mlx5_chains_create_global_table(chains);
|
if (IS_ERR(ct_priv->post_ct)) {
|
err = PTR_ERR(ct_priv->post_ct);
|
mlx5_core_warn(dev,
|
"%s, failed to create post ct table err: %d\n",
|
INIT_ERR_PREFIX, err);
|
goto err_post_ct_tbl;
|
}
|
|
idr_init(&ct_priv->fte_ids);
|
mutex_init(&ct_priv->control_lock);
|
rhashtable_init(&ct_priv->zone_ht, &zone_params);
|
rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params);
|
rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params);
|
|
return ct_priv;
|
|
err_post_ct_tbl:
|
mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
|
err_ct_nat_tbl:
|
mlx5_chains_destroy_global_table(chains, ct_priv->ct);
|
err_ct_tbl:
|
mapping_destroy(ct_priv->labels_mapping);
|
err_mapping_labels:
|
mapping_destroy(ct_priv->zone_mapping);
|
err_mapping_zone:
|
kfree(ct_priv);
|
err_alloc:
|
err_support:
|
|
return NULL;
|
}
|
|
void
|
mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
|
{
|
struct mlx5_fs_chains *chains;
|
|
if (!ct_priv)
|
return;
|
|
chains = ct_priv->chains;
|
|
mlx5_chains_destroy_global_table(chains, ct_priv->post_ct);
|
mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
|
mlx5_chains_destroy_global_table(chains, ct_priv->ct);
|
mapping_destroy(ct_priv->zone_mapping);
|
mapping_destroy(ct_priv->labels_mapping);
|
|
rhashtable_destroy(&ct_priv->ct_tuples_ht);
|
rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
|
rhashtable_destroy(&ct_priv->zone_ht);
|
mutex_destroy(&ct_priv->control_lock);
|
idr_destroy(&ct_priv->fte_ids);
|
kfree(ct_priv);
|
}
|
|
bool
|
mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
|
struct sk_buff *skb, u8 zone_restore_id)
|
{
|
struct mlx5_ct_tuple tuple = {};
|
struct mlx5_ct_entry *entry;
|
u16 zone;
|
|
if (!ct_priv || !zone_restore_id)
|
return true;
|
|
if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone))
|
return false;
|
|
if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone))
|
return false;
|
|
spin_lock(&ct_priv->ht_lock);
|
|
entry = mlx5_tc_ct_entry_get(ct_priv, &tuple);
|
if (!entry) {
|
spin_unlock(&ct_priv->ht_lock);
|
return false;
|
}
|
|
if (IS_ERR(entry)) {
|
spin_unlock(&ct_priv->ht_lock);
|
return false;
|
}
|
spin_unlock(&ct_priv->ht_lock);
|
|
tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie);
|
__mlx5_tc_ct_entry_put(entry);
|
|
return true;
|
}
|