From 244b2c5ca8b14627e4a17755e5922221e121c771 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Wed, 09 Oct 2024 06:15:07 +0000
Subject: [PATCH] change system file
---
kernel/net/openvswitch/datapath.c | 505 +++++++++++++++++++++++++++++++++++++------------------
1 files changed, 339 insertions(+), 166 deletions(-)
diff --git a/kernel/net/openvswitch/datapath.c b/kernel/net/openvswitch/datapath.c
index f350fae..b625ab5 100644
--- a/kernel/net/openvswitch/datapath.c
+++ b/kernel/net/openvswitch/datapath.c
@@ -1,19 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2007-2014 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -143,6 +130,8 @@
const struct dp_upcall_info *,
uint32_t cutlen);
+static void ovs_dp_masks_rebalance(struct work_struct *work);
+
/* Must be called with rcu_read_lock or ovs_mutex. */
const char *ovs_dp_name(const struct datapath *dp)
{
@@ -192,7 +181,8 @@
struct hlist_head *head;
head = vport_hash_bucket(dp, port_no);
- hlist_for_each_entry_rcu(vport, head, dp_hash_node) {
+ hlist_for_each_entry_rcu(vport, head, dp_hash_node,
+ lockdep_ovsl_is_held()) {
if (vport->port_no == port_no)
return vport;
}
@@ -235,31 +225,43 @@
struct dp_stats_percpu *stats;
u64 *stats_counter;
u32 n_mask_hit;
+ u32 n_cache_hit;
+ int error;
stats = this_cpu_ptr(dp->stats_percpu);
/* Look up flow. */
- flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit);
+ flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
+ &n_mask_hit, &n_cache_hit);
if (unlikely(!flow)) {
struct dp_upcall_info upcall;
- int error;
memset(&upcall, 0, sizeof(upcall));
upcall.cmd = OVS_PACKET_CMD_MISS;
upcall.portid = ovs_vport_find_upcall_portid(p, skb);
upcall.mru = OVS_CB(skb)->mru;
error = ovs_dp_upcall(dp, skb, key, &upcall, 0);
- if (unlikely(error))
- kfree_skb(skb);
- else
+ switch (error) {
+ case 0:
+ case -EAGAIN:
+ case -ERESTARTSYS:
+ case -EINTR:
consume_skb(skb);
+ break;
+ default:
+ kfree_skb(skb);
+ break;
+ }
stats_counter = &stats->n_missed;
goto out;
}
ovs_flow_stats_update(flow, key->tp.flags, skb);
sf_acts = rcu_dereference(flow->sf_acts);
- ovs_execute_actions(dp, skb, sf_acts, key);
+ error = ovs_execute_actions(dp, skb, sf_acts, key);
+ if (unlikely(error))
+ net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n",
+ ovs_dp_name(dp), error);
stats_counter = &stats->n_hit;
@@ -268,6 +270,7 @@
u64_stats_update_begin(&stats->syncp);
(*stats_counter)++;
stats->n_mask_hit += n_mask_hit;
+ stats->n_cache_hit += n_cache_hit;
u64_stats_update_end(&stats->syncp);
}
@@ -306,14 +309,14 @@
static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
const struct sw_flow_key *key,
const struct dp_upcall_info *upcall_info,
- uint32_t cutlen)
+ uint32_t cutlen)
{
unsigned int gso_type = skb_shinfo(skb)->gso_type;
struct sw_flow_key later_key;
struct sk_buff *segs, *nskb;
int err;
- BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_SGO_CB_OFFSET);
+ BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_GSO_CB_OFFSET);
segs = __skb_gso_segment(skb, NETIF_F_SG, false);
if (IS_ERR(segs))
return PTR_ERR(segs);
@@ -330,8 +333,7 @@
}
/* Queue all of the segments. */
- skb = segs;
- do {
+ skb_list_walk_safe(segs, skb, nskb) {
if (gso_type & SKB_GSO_UDP && skb != segs)
key = &later_key;
@@ -339,17 +341,15 @@
if (err)
break;
- } while ((skb = skb->next));
+ }
/* Free all of the segments. */
- skb = segs;
- do {
- nskb = skb->next;
+ skb_list_walk_safe(segs, skb, nskb) {
if (err)
kfree_skb(skb);
else
consume_skb(skb);
- } while ((skb = nskb));
+ }
return err;
}
@@ -359,7 +359,8 @@
size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
+ nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
+ nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */
- + nla_total_size(sizeof(unsigned int)); /* OVS_PACKET_ATTR_LEN */
+ + nla_total_size(sizeof(unsigned int)) /* OVS_PACKET_ATTR_LEN */
+ + nla_total_size(sizeof(u64)); /* OVS_PACKET_ATTR_HASH */
/* OVS_PACKET_ATTR_USERDATA */
if (upcall_info->userdata)
@@ -402,6 +403,7 @@
size_t len;
unsigned int hlen;
int err, dp_ifindex;
+ u64 hash;
dp_ifindex = get_dpifindex(dp);
if (!dp_ifindex)
@@ -448,10 +450,15 @@
upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
0, upcall_info->cmd);
+ if (!upcall) {
+ err = -EINVAL;
+ goto out;
+ }
upcall->dp_ifindex = dp_ifindex;
err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
- BUG_ON(err);
+ if (err)
+ goto out;
if (upcall_info->userdata)
__nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
@@ -459,15 +466,26 @@
nla_data(upcall_info->userdata));
if (upcall_info->egress_tun_info) {
- nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
+ nla = nla_nest_start_noflag(user_skb,
+ OVS_PACKET_ATTR_EGRESS_TUN_KEY);
+ if (!nla) {
+ err = -EMSGSIZE;
+ goto out;
+ }
err = ovs_nla_put_tunnel_info(user_skb,
upcall_info->egress_tun_info);
- BUG_ON(err);
+ if (err)
+ goto out;
+
nla_nest_end(user_skb, nla);
}
if (upcall_info->actions_len) {
- nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS);
+ nla = nla_nest_start_noflag(user_skb, OVS_PACKET_ATTR_ACTIONS);
+ if (!nla) {
+ err = -EMSGSIZE;
+ goto out;
+ }
err = ovs_nla_put_actions(upcall_info->actions,
upcall_info->actions_len,
user_skb);
@@ -478,23 +496,30 @@
}
/* Add OVS_PACKET_ATTR_MRU */
- if (upcall_info->mru) {
- if (nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU,
- upcall_info->mru)) {
- err = -ENOBUFS;
- goto out;
- }
- pad_packet(dp, user_skb);
+ if (upcall_info->mru &&
+ nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU, upcall_info->mru)) {
+ err = -ENOBUFS;
+ goto out;
}
/* Add OVS_PACKET_ATTR_LEN when packet is truncated */
- if (cutlen > 0) {
- if (nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN,
- skb->len)) {
- err = -ENOBUFS;
- goto out;
- }
- pad_packet(dp, user_skb);
+ if (cutlen > 0 &&
+ nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN, skb->len)) {
+ err = -ENOBUFS;
+ goto out;
+ }
+
+ /* Add OVS_PACKET_ATTR_HASH */
+ hash = skb_get_hash_raw(skb);
+ if (skb->sw_hash)
+ hash |= OVS_PACKET_HASH_SW_BIT;
+
+ if (skb->l4_hash)
+ hash |= OVS_PACKET_HASH_L4_BIT;
+
+ if (nla_put(user_skb, OVS_PACKET_ATTR_HASH, sizeof (u64), &hash)) {
+ err = -ENOBUFS;
+ goto out;
}
/* Only reserve room for attribute header, packet data is added
@@ -519,8 +544,9 @@
out:
if (err)
skb_tx_error(skb);
- kfree_skb(user_skb);
- kfree_skb(nskb);
+ consume_skb(user_skb);
+ consume_skb(nskb);
+
return err;
}
@@ -536,6 +562,7 @@
struct datapath *dp;
struct vport *input_vport;
u16 mru = 0;
+ u64 hash;
int len;
int err;
bool log = !a[OVS_PACKET_ATTR_PROBE];
@@ -560,6 +587,14 @@
packet->ignore_df = 1;
}
OVS_CB(packet)->mru = mru;
+
+ if (a[OVS_PACKET_ATTR_HASH]) {
+ hash = nla_get_u64(a[OVS_PACKET_ATTR_HASH]);
+
+ __skb_set_hash(packet, hash & 0xFFFFFFFFULL,
+ !!(hash & OVS_PACKET_HASH_SW_BIT),
+ !!(hash & OVS_PACKET_HASH_L4_BIT));
+ }
/* Build an sw_flow for sending this packet. */
flow = ovs_flow_alloc();
@@ -622,12 +657,13 @@
[OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
[OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
[OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
+ [OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 },
};
-static const struct genl_ops dp_packet_genl_ops[] = {
+static const struct genl_small_ops dp_packet_genl_ops[] = {
{ .cmd = OVS_PACKET_CMD_EXECUTE,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
- .policy = packet_policy,
.doit = ovs_packet_cmd_execute
}
};
@@ -637,10 +673,11 @@
.name = OVS_PACKET_FAMILY,
.version = OVS_PACKET_VERSION,
.maxattr = OVS_PACKET_ATTR_MAX,
+ .policy = packet_policy,
.netnsok = true,
.parallel_ops = true,
- .ops = dp_packet_genl_ops,
- .n_ops = ARRAY_SIZE(dp_packet_genl_ops),
+ .small_ops = dp_packet_genl_ops,
+ .n_small_ops = ARRAY_SIZE(dp_packet_genl_ops),
.module = THIS_MODULE,
};
@@ -672,6 +709,7 @@
stats->n_missed += local_stats.n_missed;
stats->n_lost += local_stats.n_lost;
mega_stats->n_mask_hit += local_stats.n_mask_hit;
+ mega_stats->n_cache_hit += local_stats.n_cache_hit;
}
}
@@ -768,7 +806,7 @@
* This can only fail for dump operations because the skb is always
* properly sized for single flows.
*/
- start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS);
+ start = nla_nest_start_noflag(skb, OVS_FLOW_ATTR_ACTIONS);
if (start) {
const struct sw_flow_actions *sf_acts;
@@ -895,6 +933,7 @@
struct sw_flow_mask mask;
struct sk_buff *reply;
struct datapath *dp;
+ struct sw_flow_key *key;
struct sw_flow_actions *acts;
struct sw_flow_match match;
u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
@@ -922,30 +961,32 @@
}
/* Extract key. */
- ovs_match_init(&match, &new_flow->key, false, &mask);
+ key = kzalloc(sizeof(*key), GFP_KERNEL);
+ if (!key) {
+ error = -ENOMEM;
+ goto err_kfree_flow;
+ }
+
+ ovs_match_init(&match, key, false, &mask);
error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
a[OVS_FLOW_ATTR_MASK], log);
if (error)
- goto err_kfree_flow;
+ goto err_kfree_key;
+
+ ovs_flow_mask_key(&new_flow->key, key, true, &mask);
/* Extract flow identifier. */
error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
- &new_flow->key, log);
+ key, log);
if (error)
- goto err_kfree_flow;
-
- /* unmasked key is needed to match when ufid is not used. */
- if (ovs_identifier_is_key(&new_flow->id))
- match.key = new_flow->id.unmasked_key;
-
- ovs_flow_mask_key(&new_flow->key, &new_flow->key, true, &mask);
+ goto err_kfree_key;
/* Validate actions. */
error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
&new_flow->key, &acts, log);
if (error) {
OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
- goto err_kfree_flow;
+ goto err_kfree_key;
}
reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
@@ -966,7 +1007,7 @@
if (ovs_identifier_is_ufid(&new_flow->id))
flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
if (!flow)
- flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->key);
+ flow = ovs_flow_tbl_lookup(&dp->table, key);
if (likely(!flow)) {
rcu_assign_pointer(new_flow->sf_acts, acts);
@@ -1036,6 +1077,8 @@
if (reply)
ovs_notify(&dp_flow_genl_family, reply, info);
+
+ kfree(key);
return 0;
err_unlock_ovs:
@@ -1043,6 +1086,8 @@
kfree_skb(reply);
err_kfree_acts:
ovs_nla_free_flow_actions(acts);
+err_kfree_key:
+ kfree(key);
err_kfree_flow:
ovs_flow_free(new_flow, false);
error:
@@ -1050,11 +1095,12 @@
}
/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
-static struct sw_flow_actions *get_flow_actions(struct net *net,
- const struct nlattr *a,
- const struct sw_flow_key *key,
- const struct sw_flow_mask *mask,
- bool log)
+static noinline_for_stack
+struct sw_flow_actions *get_flow_actions(struct net *net,
+ const struct nlattr *a,
+ const struct sw_flow_key *key,
+ const struct sw_flow_mask *mask,
+ bool log)
{
struct sw_flow_actions *acts;
struct sw_flow_key masked_key;
@@ -1084,12 +1130,13 @@
* we should not to return match object with dangling reference
* to mask.
* */
-static int ovs_nla_init_match_and_action(struct net *net,
- struct sw_flow_match *match,
- struct sw_flow_key *key,
- struct nlattr **a,
- struct sw_flow_actions **acts,
- bool log)
+static noinline_for_stack int
+ovs_nla_init_match_and_action(struct net *net,
+ struct sw_flow_match *match,
+ struct sw_flow_key *key,
+ struct nlattr **a,
+ struct sw_flow_actions **acts,
+ bool log)
{
struct sw_flow_mask mask;
int error = 0;
@@ -1189,14 +1236,14 @@
ovs_header->dp_ifindex,
reply, info->snd_portid,
info->snd_seq, 0,
- OVS_FLOW_CMD_NEW,
+ OVS_FLOW_CMD_SET,
ufid_flags);
BUG_ON(error < 0);
}
} else {
/* Could not alloc without acts before locking. */
reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
- info, OVS_FLOW_CMD_NEW, false,
+ info, OVS_FLOW_CMD_SET, false,
ufid_flags);
if (IS_ERR(reply)) {
@@ -1272,7 +1319,7 @@
}
reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
- OVS_FLOW_CMD_NEW, true, ufid_flags);
+ OVS_FLOW_CMD_GET, true, ufid_flags);
if (IS_ERR(reply)) {
err = PTR_ERR(reply);
goto unlock;
@@ -1337,7 +1384,7 @@
reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
&flow->id, info, false, ufid_flags);
if (likely(reply)) {
- if (likely(!IS_ERR(reply))) {
+ if (!IS_ERR(reply)) {
rcu_read_lock(); /*To keep RCU checker happy. */
err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
reply, info->snd_portid,
@@ -1352,7 +1399,8 @@
ovs_notify(&dp_flow_genl_family, reply, info);
} else {
- netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, PTR_ERR(reply));
+ netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0,
+ PTR_ERR(reply));
}
}
@@ -1373,8 +1421,8 @@
u32 ufid_flags;
int err;
- err = genlmsg_parse(cb->nlh, &dp_flow_genl_family, a,
- OVS_FLOW_ATTR_MAX, flow_policy, NULL);
+ err = genlmsg_parse_deprecated(cb->nlh, &dp_flow_genl_family, a,
+ OVS_FLOW_ATTR_MAX, flow_policy, NULL);
if (err)
return err;
ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
@@ -1400,7 +1448,7 @@
if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
- OVS_FLOW_CMD_NEW, ufid_flags) < 0)
+ OVS_FLOW_CMD_GET, ufid_flags) < 0)
break;
cb->args[0] = bucket;
@@ -1420,26 +1468,26 @@
[OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
};
-static const struct genl_ops dp_flow_genl_ops[] = {
+static const struct genl_small_ops dp_flow_genl_ops[] = {
{ .cmd = OVS_FLOW_CMD_NEW,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
- .policy = flow_policy,
.doit = ovs_flow_cmd_new
},
{ .cmd = OVS_FLOW_CMD_DEL,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
- .policy = flow_policy,
.doit = ovs_flow_cmd_del
},
{ .cmd = OVS_FLOW_CMD_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.flags = 0, /* OK for unprivileged users. */
- .policy = flow_policy,
.doit = ovs_flow_cmd_get,
.dumpit = ovs_flow_cmd_dump
},
{ .cmd = OVS_FLOW_CMD_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
- .policy = flow_policy,
.doit = ovs_flow_cmd_set,
},
};
@@ -1449,10 +1497,11 @@
.name = OVS_FLOW_FAMILY,
.version = OVS_FLOW_VERSION,
.maxattr = OVS_FLOW_ATTR_MAX,
+ .policy = flow_policy,
.netnsok = true,
.parallel_ops = true,
- .ops = dp_flow_genl_ops,
- .n_ops = ARRAY_SIZE(dp_flow_genl_ops),
+ .small_ops = dp_flow_genl_ops,
+ .n_small_ops = ARRAY_SIZE(dp_flow_genl_ops),
.mcgrps = &ovs_dp_flow_multicast_group,
.n_mcgrps = 1,
.module = THIS_MODULE,
@@ -1466,6 +1515,7 @@
msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats));
msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats));
msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
+ msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_MASKS_CACHE_SIZE */
return msgsize;
}
@@ -1480,7 +1530,7 @@
int err;
ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
- flags, cmd);
+ flags, cmd);
if (!ovs_header)
goto error;
@@ -1501,6 +1551,10 @@
goto nla_put_failure;
if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,
+ ovs_flow_tbl_masks_cache_size(&dp->table)))
goto nla_put_failure;
genlmsg_end(skb, ovs_header);
@@ -1535,22 +1589,84 @@
return dp ? dp : ERR_PTR(-ENODEV);
}
-static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *info)
+static void ovs_dp_reset_user_features(struct sk_buff *skb,
+ struct genl_info *info)
{
struct datapath *dp;
- dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
+ dp = lookup_datapath(sock_net(skb->sk), info->userhdr,
+ info->attrs);
if (IS_ERR(dp))
return;
- WARN(dp->user_features, "Dropping previously announced user features\n");
+ pr_warn("%s: Dropping previously announced user features\n",
+ ovs_dp_name(dp));
dp->user_features = 0;
}
-static void ovs_dp_change(struct datapath *dp, struct nlattr *a[])
+DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
+
+static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
{
- if (a[OVS_DP_ATTR_USER_FEATURES])
- dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
+ u32 user_features = 0;
+
+ if (a[OVS_DP_ATTR_USER_FEATURES]) {
+ user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
+
+ if (user_features & ~(OVS_DP_F_VPORT_PIDS |
+ OVS_DP_F_UNALIGNED |
+ OVS_DP_F_TC_RECIRC_SHARING))
+ return -EOPNOTSUPP;
+
+#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ if (user_features & OVS_DP_F_TC_RECIRC_SHARING)
+ return -EOPNOTSUPP;
+#endif
+ }
+
+ if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) {
+ int err;
+ u32 cache_size;
+
+ cache_size = nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
+ err = ovs_flow_tbl_masks_cache_resize(&dp->table, cache_size);
+ if (err)
+ return err;
+ }
+
+ dp->user_features = user_features;
+
+ if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
+ static_branch_enable(&tc_recirc_sharing_support);
+ else
+ static_branch_disable(&tc_recirc_sharing_support);
+
+ return 0;
+}
+
+static int ovs_dp_stats_init(struct datapath *dp)
+{
+ dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
+ if (!dp->stats_percpu)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int ovs_dp_vport_init(struct datapath *dp)
+{
+ int i;
+
+ dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
+ sizeof(struct hlist_head),
+ GFP_KERNEL);
+ if (!dp->ports)
+ return -ENOMEM;
+
+ for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
+ INIT_HLIST_HEAD(&dp->ports[i]);
+
+ return 0;
}
static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
@@ -1561,7 +1677,7 @@
struct datapath *dp;
struct vport *vport;
struct ovs_net *ovs_net;
- int err, i;
+ int err;
err = -EINVAL;
if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
@@ -1574,35 +1690,26 @@
err = -ENOMEM;
dp = kzalloc(sizeof(*dp), GFP_KERNEL);
if (dp == NULL)
- goto err_free_reply;
+ goto err_destroy_reply;
ovs_dp_set_net(dp, sock_net(skb->sk));
/* Allocate table. */
err = ovs_flow_tbl_init(&dp->table);
if (err)
- goto err_free_dp;
+ goto err_destroy_dp;
- dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
- if (!dp->stats_percpu) {
- err = -ENOMEM;
+ err = ovs_dp_stats_init(dp);
+ if (err)
goto err_destroy_table;
- }
- dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
- sizeof(struct hlist_head),
- GFP_KERNEL);
- if (!dp->ports) {
- err = -ENOMEM;
- goto err_destroy_percpu;
- }
-
- for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
- INIT_HLIST_HEAD(&dp->ports[i]);
+ err = ovs_dp_vport_init(dp);
+ if (err)
+ goto err_destroy_stats;
err = ovs_meters_init(dp);
if (err)
- goto err_destroy_ports_array;
+ goto err_destroy_ports;
/* Set up our datapath device. */
parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
@@ -1612,10 +1719,12 @@
parms.port_no = OVSP_LOCAL;
parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
- ovs_dp_change(dp, a);
-
/* So far only local changes have been made, now need the lock. */
ovs_lock();
+
+ err = ovs_dp_change(dp, a);
+ if (err)
+ goto err_unlock_and_destroy_meters;
vport = new_vport(&parms);
if (IS_ERR(vport)) {
@@ -1632,7 +1741,7 @@
ovs_dp_reset_user_features(skb, info);
}
- goto err_destroy_meters;
+ goto err_unlock_and_destroy_meters;
}
err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
@@ -1647,18 +1756,18 @@
ovs_notify(&dp_datapath_genl_family, reply, info);
return 0;
-err_destroy_meters:
+err_unlock_and_destroy_meters:
ovs_unlock();
ovs_meters_exit(dp);
-err_destroy_ports_array:
+err_destroy_ports:
kfree(dp->ports);
-err_destroy_percpu:
+err_destroy_stats:
free_percpu(dp->stats_percpu);
err_destroy_table:
ovs_flow_tbl_destroy(&dp->table);
-err_free_dp:
+err_destroy_dp:
kfree(dp);
-err_free_reply:
+err_destroy_reply:
kfree_skb(reply);
err:
return err;
@@ -1667,6 +1776,7 @@
/* Called with ovs_mutex. */
static void __dp_destroy(struct datapath *dp)
{
+ struct flow_table *table = &dp->table;
int i;
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
@@ -1685,7 +1795,14 @@
*/
ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
- /* RCU destroy the flow table */
+ /* Flush sw_flow in the tables. RCU cb only releases resource
+ * such as dp, ports and tables. That may avoid some issues
+ * such as RCU usage warning.
+ */
+ table_instance_flow_flush(table, ovsl_dereference(table->ti),
+ ovsl_dereference(table->ufid_ti));
+
+ /* RCU destroy the ports, meters and flow tables. */
call_rcu(&dp->rcu, destroy_dp_rcu);
}
@@ -1738,10 +1855,12 @@
if (IS_ERR(dp))
goto err_unlock_free;
- ovs_dp_change(dp, info->attrs);
+ err = ovs_dp_change(dp, info->attrs);
+ if (err)
+ goto err_unlock_free;
err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
- info->snd_seq, 0, OVS_DP_CMD_NEW);
+ info->snd_seq, 0, OVS_DP_CMD_SET);
BUG_ON(err < 0);
ovs_unlock();
@@ -1772,7 +1891,7 @@
goto err_unlock_free;
}
err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
- info->snd_seq, 0, OVS_DP_CMD_NEW);
+ info->snd_seq, 0, OVS_DP_CMD_GET);
BUG_ON(err < 0);
ovs_unlock();
@@ -1796,7 +1915,7 @@
if (i >= skip &&
ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
- OVS_DP_CMD_NEW) < 0)
+ OVS_DP_CMD_GET) < 0)
break;
i++;
}
@@ -1811,28 +1930,30 @@
[OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
[OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
[OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
+ [OVS_DP_ATTR_MASKS_CACHE_SIZE] = NLA_POLICY_RANGE(NLA_U32, 0,
+ PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)),
};
-static const struct genl_ops dp_datapath_genl_ops[] = {
+static const struct genl_small_ops dp_datapath_genl_ops[] = {
{ .cmd = OVS_DP_CMD_NEW,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
- .policy = datapath_policy,
.doit = ovs_dp_cmd_new
},
{ .cmd = OVS_DP_CMD_DEL,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
- .policy = datapath_policy,
.doit = ovs_dp_cmd_del
},
{ .cmd = OVS_DP_CMD_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.flags = 0, /* OK for unprivileged users. */
- .policy = datapath_policy,
.doit = ovs_dp_cmd_get,
.dumpit = ovs_dp_cmd_dump
},
{ .cmd = OVS_DP_CMD_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
- .policy = datapath_policy,
.doit = ovs_dp_cmd_set,
},
};
@@ -1842,10 +1963,11 @@
.name = OVS_DATAPATH_FAMILY,
.version = OVS_DATAPATH_VERSION,
.maxattr = OVS_DP_ATTR_MAX,
+ .policy = datapath_policy,
.netnsok = true,
.parallel_ops = true,
- .ops = dp_datapath_genl_ops,
- .n_ops = ARRAY_SIZE(dp_datapath_genl_ops),
+ .small_ops = dp_datapath_genl_ops,
+ .n_small_ops = ARRAY_SIZE(dp_datapath_genl_ops),
.mcgrps = &ovs_dp_datapath_multicast_group,
.n_mcgrps = 1,
.module = THIS_MODULE,
@@ -1964,16 +2086,16 @@
}
-/* Called with ovs_mutex */
-static void update_headroom(struct datapath *dp)
+static unsigned int ovs_get_max_headroom(struct datapath *dp)
{
- unsigned dev_headroom, max_headroom = 0;
+ unsigned int dev_headroom, max_headroom = 0;
struct net_device *dev;
struct vport *vport;
int i;
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
- hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
+ hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
+ lockdep_ovsl_is_held()) {
dev = vport->dev;
dev_headroom = netdev_get_fwd_headroom(dev);
if (dev_headroom > max_headroom)
@@ -1981,10 +2103,21 @@
}
}
- dp->max_headroom = max_headroom;
- for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
- hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node)
- netdev_set_rx_headroom(vport->dev, max_headroom);
+ return max_headroom;
+}
+
+/* Called with ovs_mutex */
+static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom)
+{
+ struct vport *vport;
+ int i;
+
+ dp->max_headroom = new_headroom;
+ for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
+ hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
+ lockdep_ovsl_is_held())
+ netdev_set_rx_headroom(vport->dev, new_headroom);
+ }
}
static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
@@ -1995,6 +2128,7 @@
struct sk_buff *reply;
struct vport *vport;
struct datapath *dp;
+ unsigned int new_headroom;
u32 port_no;
int err;
@@ -2056,8 +2190,10 @@
info->snd_portid, info->snd_seq, 0,
OVS_VPORT_CMD_NEW, GFP_KERNEL);
- if (netdev_get_fwd_headroom(vport->dev) > dp->max_headroom)
- update_headroom(dp);
+ new_headroom = netdev_get_fwd_headroom(vport->dev);
+
+ if (new_headroom > dp->max_headroom)
+ ovs_update_headroom(dp, new_headroom);
else
netdev_set_rx_headroom(vport->dev, dp->max_headroom);
@@ -2113,7 +2249,7 @@
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
info->snd_portid, info->snd_seq, 0,
- OVS_VPORT_CMD_NEW, GFP_ATOMIC);
+ OVS_VPORT_CMD_SET, GFP_KERNEL);
BUG_ON(err < 0);
ovs_unlock();
@@ -2128,11 +2264,12 @@
static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
{
- bool must_update_headroom = false;
+ bool update_headroom = false;
struct nlattr **a = info->attrs;
struct sk_buff *reply;
struct datapath *dp;
struct vport *vport;
+ unsigned int new_headroom;
int err;
reply = ovs_vport_cmd_alloc_info();
@@ -2158,12 +2295,17 @@
/* the vport deletion may trigger dp headroom update */
dp = vport->dp;
if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom)
- must_update_headroom = true;
+ update_headroom = true;
+
netdev_reset_rx_headroom(vport->dev);
ovs_dp_detach_port(vport);
- if (must_update_headroom)
- update_headroom(dp);
+ if (update_headroom) {
+ new_headroom = ovs_get_max_headroom(dp);
+
+ if (new_headroom < dp->max_headroom)
+ ovs_update_headroom(dp, new_headroom);
+ }
ovs_unlock();
ovs_notify(&dp_vport_genl_family, reply, info);
@@ -2194,7 +2336,7 @@
goto exit_unlock_free;
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
info->snd_portid, info->snd_seq, 0,
- OVS_VPORT_CMD_NEW, GFP_ATOMIC);
+ OVS_VPORT_CMD_GET, GFP_ATOMIC);
BUG_ON(err < 0);
rcu_read_unlock();
@@ -2230,7 +2372,7 @@
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NLM_F_MULTI,
- OVS_VPORT_CMD_NEW,
+ OVS_VPORT_CMD_GET,
GFP_ATOMIC) < 0)
goto out;
@@ -2247,6 +2389,23 @@
return skb->len;
}
+static void ovs_dp_masks_rebalance(struct work_struct *work)
+{
+ struct ovs_net *ovs_net = container_of(work, struct ovs_net,
+ masks_rebalance.work);
+ struct datapath *dp;
+
+ ovs_lock();
+
+ list_for_each_entry(dp, &ovs_net->dps, list_node)
+ ovs_flow_masks_rebalance(&dp->table);
+
+ ovs_unlock();
+
+ schedule_delayed_work(&ovs_net->masks_rebalance,
+ msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
+}
+
static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
[OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
[OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
@@ -2258,26 +2417,26 @@
[OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
};
-static const struct genl_ops dp_vport_genl_ops[] = {
+static const struct genl_small_ops dp_vport_genl_ops[] = {
{ .cmd = OVS_VPORT_CMD_NEW,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
- .policy = vport_policy,
.doit = ovs_vport_cmd_new
},
{ .cmd = OVS_VPORT_CMD_DEL,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
- .policy = vport_policy,
.doit = ovs_vport_cmd_del
},
{ .cmd = OVS_VPORT_CMD_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.flags = 0, /* OK for unprivileged users. */
- .policy = vport_policy,
.doit = ovs_vport_cmd_get,
.dumpit = ovs_vport_cmd_dump
},
{ .cmd = OVS_VPORT_CMD_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
- .policy = vport_policy,
.doit = ovs_vport_cmd_set,
},
};
@@ -2287,10 +2446,11 @@
.name = OVS_VPORT_FAMILY,
.version = OVS_VPORT_VERSION,
.maxattr = OVS_VPORT_ATTR_MAX,
+ .policy = vport_policy,
.netnsok = true,
.parallel_ops = true,
- .ops = dp_vport_genl_ops,
- .n_ops = ARRAY_SIZE(dp_vport_genl_ops),
+ .small_ops = dp_vport_genl_ops,
+ .n_small_ops = ARRAY_SIZE(dp_vport_genl_ops),
.mcgrps = &ovs_dp_vport_multicast_group,
.n_mcgrps = 1,
.module = THIS_MODULE,
@@ -2337,10 +2497,19 @@
static int __net_init ovs_init_net(struct net *net)
{
struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+ int err;
INIT_LIST_HEAD(&ovs_net->dps);
INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
- return ovs_ct_init(net);
+ INIT_DELAYED_WORK(&ovs_net->masks_rebalance, ovs_dp_masks_rebalance);
+
+ err = ovs_ct_init(net);
+ if (err)
+ return err;
+
+ schedule_delayed_work(&ovs_net->masks_rebalance,
+ msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
+ return 0;
}
static void __net_exit list_vports_from_net(struct net *net, struct net *dnet,
@@ -2374,8 +2543,10 @@
struct net *net;
LIST_HEAD(head);
- ovs_ct_exit(dnet);
ovs_lock();
+
+ ovs_ct_exit(dnet);
+
list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
__dp_destroy(dp);
@@ -2392,6 +2563,7 @@
ovs_unlock();
+ cancel_delayed_work_sync(&ovs_net->masks_rebalance);
cancel_work_sync(&ovs_net->dp_notify_work);
}
@@ -2406,7 +2578,8 @@
{
int err;
- BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
+ BUILD_BUG_ON(sizeof(struct ovs_skb_cb) >
+ sizeof_field(struct sk_buff, cb));
pr_info("Open vSwitch switching datapath\n");
--
Gitblit v1.6.2