From 8ac6c7a54ed1b98d142dce24b11c6de6a1e239a5 Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Tue, 22 Oct 2024 10:36:11 +0000 Subject: [PATCH] 修改4g拨号为QMI,需要在系统里后台执行quectel-CM --- kernel/net/openvswitch/datapath.c | 505 +++++++++++++++++++++++++++++++++++++------------------ 1 files changed, 339 insertions(+), 166 deletions(-) diff --git a/kernel/net/openvswitch/datapath.c b/kernel/net/openvswitch/datapath.c index f350fae..b625ab5 100644 --- a/kernel/net/openvswitch/datapath.c +++ b/kernel/net/openvswitch/datapath.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2007-2014 Nicira, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -143,6 +130,8 @@ const struct dp_upcall_info *, uint32_t cutlen); +static void ovs_dp_masks_rebalance(struct work_struct *work); + /* Must be called with rcu_read_lock or ovs_mutex. */ const char *ovs_dp_name(const struct datapath *dp) { @@ -192,7 +181,8 @@ struct hlist_head *head; head = vport_hash_bucket(dp, port_no); - hlist_for_each_entry_rcu(vport, head, dp_hash_node) { + hlist_for_each_entry_rcu(vport, head, dp_hash_node, + lockdep_ovsl_is_held()) { if (vport->port_no == port_no) return vport; } @@ -235,31 +225,43 @@ struct dp_stats_percpu *stats; u64 *stats_counter; u32 n_mask_hit; + u32 n_cache_hit; + int error; stats = this_cpu_ptr(dp->stats_percpu); /* Look up flow. */ - flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit); + flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb), + &n_mask_hit, &n_cache_hit); if (unlikely(!flow)) { struct dp_upcall_info upcall; - int error; memset(&upcall, 0, sizeof(upcall)); upcall.cmd = OVS_PACKET_CMD_MISS; upcall.portid = ovs_vport_find_upcall_portid(p, skb); upcall.mru = OVS_CB(skb)->mru; error = ovs_dp_upcall(dp, skb, key, &upcall, 0); - if (unlikely(error)) - kfree_skb(skb); - else + switch (error) { + case 0: + case -EAGAIN: + case -ERESTARTSYS: + case -EINTR: consume_skb(skb); + break; + default: + kfree_skb(skb); + break; + } stats_counter = &stats->n_missed; goto out; } ovs_flow_stats_update(flow, key->tp.flags, skb); sf_acts = rcu_dereference(flow->sf_acts); - ovs_execute_actions(dp, skb, sf_acts, key); + error = ovs_execute_actions(dp, skb, sf_acts, key); + if (unlikely(error)) + net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n", + ovs_dp_name(dp), error); stats_counter = &stats->n_hit; @@ -268,6 +270,7 @@ u64_stats_update_begin(&stats->syncp); (*stats_counter)++; stats->n_mask_hit += n_mask_hit; + stats->n_cache_hit += n_cache_hit; u64_stats_update_end(&stats->syncp); } @@ -306,14 +309,14 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, const struct sw_flow_key *key, const struct dp_upcall_info *upcall_info, - uint32_t cutlen) + uint32_t cutlen) { unsigned int gso_type = skb_shinfo(skb)->gso_type; struct sw_flow_key later_key; struct sk_buff *segs, *nskb; int err; - BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_SGO_CB_OFFSET); + BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_GSO_CB_OFFSET); segs = __skb_gso_segment(skb, NETIF_F_SG, false); if (IS_ERR(segs)) return PTR_ERR(segs); @@ -330,8 +333,7 @@ } /* Queue all of the segments. */ - skb = segs; - do { + skb_list_walk_safe(segs, skb, nskb) { if (gso_type & SKB_GSO_UDP && skb != segs) key = &later_key; @@ -339,17 +341,15 @@ if (err) break; - } while ((skb = skb->next)); + } /* Free all of the segments. */ - skb = segs; - do { - nskb = skb->next; + skb_list_walk_safe(segs, skb, nskb) { if (err) kfree_skb(skb); else consume_skb(skb); - } while ((skb = nskb)); + } return err; } @@ -359,7 +359,8 @@ size_t size = NLMSG_ALIGN(sizeof(struct ovs_header)) + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */ + nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */ - + nla_total_size(sizeof(unsigned int)); /* OVS_PACKET_ATTR_LEN */ + + nla_total_size(sizeof(unsigned int)) /* OVS_PACKET_ATTR_LEN */ + + nla_total_size(sizeof(u64)); /* OVS_PACKET_ATTR_HASH */ /* OVS_PACKET_ATTR_USERDATA */ if (upcall_info->userdata) @@ -402,6 +403,7 @@ size_t len; unsigned int hlen; int err, dp_ifindex; + u64 hash; dp_ifindex = get_dpifindex(dp); if (!dp_ifindex) @@ -448,10 +450,15 @@ upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, 0, upcall_info->cmd); + if (!upcall) { + err = -EINVAL; + goto out; + } upcall->dp_ifindex = dp_ifindex; err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb); - BUG_ON(err); + if (err) + goto out; if (upcall_info->userdata) __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA, @@ -459,15 +466,26 @@ nla_data(upcall_info->userdata)); if (upcall_info->egress_tun_info) { - nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY); + nla = nla_nest_start_noflag(user_skb, + OVS_PACKET_ATTR_EGRESS_TUN_KEY); + if (!nla) { + err = -EMSGSIZE; + goto out; + } err = ovs_nla_put_tunnel_info(user_skb, upcall_info->egress_tun_info); - BUG_ON(err); + if (err) + goto out; + nla_nest_end(user_skb, nla); } if (upcall_info->actions_len) { - nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS); + nla = nla_nest_start_noflag(user_skb, OVS_PACKET_ATTR_ACTIONS); + if (!nla) { + err = -EMSGSIZE; + goto out; + } err = ovs_nla_put_actions(upcall_info->actions, upcall_info->actions_len, user_skb); @@ -478,23 +496,30 @@ } /* Add OVS_PACKET_ATTR_MRU */ - if (upcall_info->mru) { - if (nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU, - upcall_info->mru)) { - err = -ENOBUFS; - goto out; - } - pad_packet(dp, user_skb); + if (upcall_info->mru && + nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU, upcall_info->mru)) { + err = -ENOBUFS; + goto out; } /* Add OVS_PACKET_ATTR_LEN when packet is truncated */ - if (cutlen > 0) { - if (nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN, - skb->len)) { - err = -ENOBUFS; - goto out; - } - pad_packet(dp, user_skb); + if (cutlen > 0 && + nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN, skb->len)) { + err = -ENOBUFS; + goto out; + } + + /* Add OVS_PACKET_ATTR_HASH */ + hash = skb_get_hash_raw(skb); + if (skb->sw_hash) + hash |= OVS_PACKET_HASH_SW_BIT; + + if (skb->l4_hash) + hash |= OVS_PACKET_HASH_L4_BIT; + + if (nla_put(user_skb, OVS_PACKET_ATTR_HASH, sizeof (u64), &hash)) { + err = -ENOBUFS; + goto out; } /* Only reserve room for attribute header, packet data is added @@ -519,8 +544,9 @@ out: if (err) skb_tx_error(skb); - kfree_skb(user_skb); - kfree_skb(nskb); + consume_skb(user_skb); + consume_skb(nskb); + return err; } @@ -536,6 +562,7 @@ struct datapath *dp; struct vport *input_vport; u16 mru = 0; + u64 hash; int len; int err; bool log = !a[OVS_PACKET_ATTR_PROBE]; @@ -560,6 +587,14 @@ packet->ignore_df = 1; } OVS_CB(packet)->mru = mru; + + if (a[OVS_PACKET_ATTR_HASH]) { + hash = nla_get_u64(a[OVS_PACKET_ATTR_HASH]); + + __skb_set_hash(packet, hash & 0xFFFFFFFFULL, + !!(hash & OVS_PACKET_HASH_SW_BIT), + !!(hash & OVS_PACKET_HASH_L4_BIT)); + } /* Build an sw_flow for sending this packet. */ flow = ovs_flow_alloc(); @@ -622,12 +657,13 @@ [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED }, [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG }, [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 }, + [OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 }, }; -static const struct genl_ops dp_packet_genl_ops[] = { +static const struct genl_small_ops dp_packet_genl_ops[] = { { .cmd = OVS_PACKET_CMD_EXECUTE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = packet_policy, .doit = ovs_packet_cmd_execute } }; @@ -637,10 +673,11 @@ .name = OVS_PACKET_FAMILY, .version = OVS_PACKET_VERSION, .maxattr = OVS_PACKET_ATTR_MAX, + .policy = packet_policy, .netnsok = true, .parallel_ops = true, - .ops = dp_packet_genl_ops, - .n_ops = ARRAY_SIZE(dp_packet_genl_ops), + .small_ops = dp_packet_genl_ops, + .n_small_ops = ARRAY_SIZE(dp_packet_genl_ops), .module = THIS_MODULE, }; @@ -672,6 +709,7 @@ stats->n_missed += local_stats.n_missed; stats->n_lost += local_stats.n_lost; mega_stats->n_mask_hit += local_stats.n_mask_hit; + mega_stats->n_cache_hit += local_stats.n_cache_hit; } } @@ -768,7 +806,7 @@ * This can only fail for dump operations because the skb is always * properly sized for single flows. */ - start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS); + start = nla_nest_start_noflag(skb, OVS_FLOW_ATTR_ACTIONS); if (start) { const struct sw_flow_actions *sf_acts; @@ -895,6 +933,7 @@ struct sw_flow_mask mask; struct sk_buff *reply; struct datapath *dp; + struct sw_flow_key *key; struct sw_flow_actions *acts; struct sw_flow_match match; u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); @@ -922,30 +961,32 @@ } /* Extract key. */ - ovs_match_init(&match, &new_flow->key, false, &mask); + key = kzalloc(sizeof(*key), GFP_KERNEL); + if (!key) { + error = -ENOMEM; + goto err_kfree_flow; + } + + ovs_match_init(&match, key, false, &mask); error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK], log); if (error) - goto err_kfree_flow; + goto err_kfree_key; + + ovs_flow_mask_key(&new_flow->key, key, true, &mask); /* Extract flow identifier. */ error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID], - &new_flow->key, log); + key, log); if (error) - goto err_kfree_flow; - - /* unmasked key is needed to match when ufid is not used. */ - if (ovs_identifier_is_key(&new_flow->id)) - match.key = new_flow->id.unmasked_key; - - ovs_flow_mask_key(&new_flow->key, &new_flow->key, true, &mask); + goto err_kfree_key; /* Validate actions. */ error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, &acts, log); if (error) { OVS_NLERR(log, "Flow actions may not be safe on all matching packets."); - goto err_kfree_flow; + goto err_kfree_key; } reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false, @@ -966,7 +1007,7 @@ if (ovs_identifier_is_ufid(&new_flow->id)) flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id); if (!flow) - flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->key); + flow = ovs_flow_tbl_lookup(&dp->table, key); if (likely(!flow)) { rcu_assign_pointer(new_flow->sf_acts, acts); @@ -1036,6 +1077,8 @@ if (reply) ovs_notify(&dp_flow_genl_family, reply, info); + + kfree(key); return 0; err_unlock_ovs: @@ -1043,6 +1086,8 @@ kfree_skb(reply); err_kfree_acts: ovs_nla_free_flow_actions(acts); +err_kfree_key: + kfree(key); err_kfree_flow: ovs_flow_free(new_flow, false); error: @@ -1050,11 +1095,12 @@ } /* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */ -static struct sw_flow_actions *get_flow_actions(struct net *net, - const struct nlattr *a, - const struct sw_flow_key *key, - const struct sw_flow_mask *mask, - bool log) +static noinline_for_stack +struct sw_flow_actions *get_flow_actions(struct net *net, + const struct nlattr *a, + const struct sw_flow_key *key, + const struct sw_flow_mask *mask, + bool log) { struct sw_flow_actions *acts; struct sw_flow_key masked_key; @@ -1084,12 +1130,13 @@ * we should not to return match object with dangling reference * to mask. * */ -static int ovs_nla_init_match_and_action(struct net *net, - struct sw_flow_match *match, - struct sw_flow_key *key, - struct nlattr **a, - struct sw_flow_actions **acts, - bool log) +static noinline_for_stack int +ovs_nla_init_match_and_action(struct net *net, + struct sw_flow_match *match, + struct sw_flow_key *key, + struct nlattr **a, + struct sw_flow_actions **acts, + bool log) { struct sw_flow_mask mask; int error = 0; @@ -1189,14 +1236,14 @@ ovs_header->dp_ifindex, reply, info->snd_portid, info->snd_seq, 0, - OVS_FLOW_CMD_NEW, + OVS_FLOW_CMD_SET, ufid_flags); BUG_ON(error < 0); } } else { /* Could not alloc without acts before locking. */ reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, - info, OVS_FLOW_CMD_NEW, false, + info, OVS_FLOW_CMD_SET, false, ufid_flags); if (IS_ERR(reply)) { @@ -1272,7 +1319,7 @@ } reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info, - OVS_FLOW_CMD_NEW, true, ufid_flags); + OVS_FLOW_CMD_GET, true, ufid_flags); if (IS_ERR(reply)) { err = PTR_ERR(reply); goto unlock; @@ -1337,7 +1384,7 @@ reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts, &flow->id, info, false, ufid_flags); if (likely(reply)) { - if (likely(!IS_ERR(reply))) { + if (!IS_ERR(reply)) { rcu_read_lock(); /*To keep RCU checker happy. */ err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, reply, info->snd_portid, @@ -1352,7 +1399,8 @@ ovs_notify(&dp_flow_genl_family, reply, info); } else { - netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, PTR_ERR(reply)); + netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, + PTR_ERR(reply)); } } @@ -1373,8 +1421,8 @@ u32 ufid_flags; int err; - err = genlmsg_parse(cb->nlh, &dp_flow_genl_family, a, - OVS_FLOW_ATTR_MAX, flow_policy, NULL); + err = genlmsg_parse_deprecated(cb->nlh, &dp_flow_genl_family, a, + OVS_FLOW_ATTR_MAX, flow_policy, NULL); if (err) return err; ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); @@ -1400,7 +1448,7 @@ if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - OVS_FLOW_CMD_NEW, ufid_flags) < 0) + OVS_FLOW_CMD_GET, ufid_flags) < 0) break; cb->args[0] = bucket; @@ -1420,26 +1468,26 @@ [OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 }, }; -static const struct genl_ops dp_flow_genl_ops[] = { +static const struct genl_small_ops dp_flow_genl_ops[] = { { .cmd = OVS_FLOW_CMD_NEW, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = flow_policy, .doit = ovs_flow_cmd_new }, { .cmd = OVS_FLOW_CMD_DEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = flow_policy, .doit = ovs_flow_cmd_del }, { .cmd = OVS_FLOW_CMD_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, /* OK for unprivileged users. */ - .policy = flow_policy, .doit = ovs_flow_cmd_get, .dumpit = ovs_flow_cmd_dump }, { .cmd = OVS_FLOW_CMD_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = flow_policy, .doit = ovs_flow_cmd_set, }, }; @@ -1449,10 +1497,11 @@ .name = OVS_FLOW_FAMILY, .version = OVS_FLOW_VERSION, .maxattr = OVS_FLOW_ATTR_MAX, + .policy = flow_policy, .netnsok = true, .parallel_ops = true, - .ops = dp_flow_genl_ops, - .n_ops = ARRAY_SIZE(dp_flow_genl_ops), + .small_ops = dp_flow_genl_ops, + .n_small_ops = ARRAY_SIZE(dp_flow_genl_ops), .mcgrps = &ovs_dp_flow_multicast_group, .n_mcgrps = 1, .module = THIS_MODULE, @@ -1466,6 +1515,7 @@ msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats)); msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats)); msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */ + msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_MASKS_CACHE_SIZE */ return msgsize; } @@ -1480,7 +1530,7 @@ int err; ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family, - flags, cmd); + flags, cmd); if (!ovs_header) goto error; @@ -1501,6 +1551,10 @@ goto nla_put_failure; if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features)) + goto nla_put_failure; + + if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE, + ovs_flow_tbl_masks_cache_size(&dp->table))) goto nla_put_failure; genlmsg_end(skb, ovs_header); @@ -1535,22 +1589,84 @@ return dp ? dp : ERR_PTR(-ENODEV); } -static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *info) +static void ovs_dp_reset_user_features(struct sk_buff *skb, + struct genl_info *info) { struct datapath *dp; - dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); + dp = lookup_datapath(sock_net(skb->sk), info->userhdr, + info->attrs); if (IS_ERR(dp)) return; - WARN(dp->user_features, "Dropping previously announced user features\n"); + pr_warn("%s: Dropping previously announced user features\n", + ovs_dp_name(dp)); dp->user_features = 0; } -static void ovs_dp_change(struct datapath *dp, struct nlattr *a[]) +DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support); + +static int ovs_dp_change(struct datapath *dp, struct nlattr *a[]) { - if (a[OVS_DP_ATTR_USER_FEATURES]) - dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]); + u32 user_features = 0; + + if (a[OVS_DP_ATTR_USER_FEATURES]) { + user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]); + + if (user_features & ~(OVS_DP_F_VPORT_PIDS | + OVS_DP_F_UNALIGNED | + OVS_DP_F_TC_RECIRC_SHARING)) + return -EOPNOTSUPP; + +#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + if (user_features & OVS_DP_F_TC_RECIRC_SHARING) + return -EOPNOTSUPP; +#endif + } + + if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) { + int err; + u32 cache_size; + + cache_size = nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]); + err = ovs_flow_tbl_masks_cache_resize(&dp->table, cache_size); + if (err) + return err; + } + + dp->user_features = user_features; + + if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) + static_branch_enable(&tc_recirc_sharing_support); + else + static_branch_disable(&tc_recirc_sharing_support); + + return 0; +} + +static int ovs_dp_stats_init(struct datapath *dp) +{ + dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu); + if (!dp->stats_percpu) + return -ENOMEM; + + return 0; +} + +static int ovs_dp_vport_init(struct datapath *dp) +{ + int i; + + dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS, + sizeof(struct hlist_head), + GFP_KERNEL); + if (!dp->ports) + return -ENOMEM; + + for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) + INIT_HLIST_HEAD(&dp->ports[i]); + + return 0; } static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) @@ -1561,7 +1677,7 @@ struct datapath *dp; struct vport *vport; struct ovs_net *ovs_net; - int err, i; + int err; err = -EINVAL; if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) @@ -1574,35 +1690,26 @@ err = -ENOMEM; dp = kzalloc(sizeof(*dp), GFP_KERNEL); if (dp == NULL) - goto err_free_reply; + goto err_destroy_reply; ovs_dp_set_net(dp, sock_net(skb->sk)); /* Allocate table. */ err = ovs_flow_tbl_init(&dp->table); if (err) - goto err_free_dp; + goto err_destroy_dp; - dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu); - if (!dp->stats_percpu) { - err = -ENOMEM; + err = ovs_dp_stats_init(dp); + if (err) goto err_destroy_table; - } - dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS, - sizeof(struct hlist_head), - GFP_KERNEL); - if (!dp->ports) { - err = -ENOMEM; - goto err_destroy_percpu; - } - - for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) - INIT_HLIST_HEAD(&dp->ports[i]); + err = ovs_dp_vport_init(dp); + if (err) + goto err_destroy_stats; err = ovs_meters_init(dp); if (err) - goto err_destroy_ports_array; + goto err_destroy_ports; /* Set up our datapath device. */ parms.name = nla_data(a[OVS_DP_ATTR_NAME]); @@ -1612,10 +1719,12 @@ parms.port_no = OVSP_LOCAL; parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID]; - ovs_dp_change(dp, a); - /* So far only local changes have been made, now need the lock. */ ovs_lock(); + + err = ovs_dp_change(dp, a); + if (err) + goto err_unlock_and_destroy_meters; vport = new_vport(&parms); if (IS_ERR(vport)) { @@ -1632,7 +1741,7 @@ ovs_dp_reset_user_features(skb, info); } - goto err_destroy_meters; + goto err_unlock_and_destroy_meters; } err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, @@ -1647,18 +1756,18 @@ ovs_notify(&dp_datapath_genl_family, reply, info); return 0; -err_destroy_meters: +err_unlock_and_destroy_meters: ovs_unlock(); ovs_meters_exit(dp); -err_destroy_ports_array: +err_destroy_ports: kfree(dp->ports); -err_destroy_percpu: +err_destroy_stats: free_percpu(dp->stats_percpu); err_destroy_table: ovs_flow_tbl_destroy(&dp->table); -err_free_dp: +err_destroy_dp: kfree(dp); -err_free_reply: +err_destroy_reply: kfree_skb(reply); err: return err; @@ -1667,6 +1776,7 @@ /* Called with ovs_mutex. */ static void __dp_destroy(struct datapath *dp) { + struct flow_table *table = &dp->table; int i; for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { @@ -1685,7 +1795,14 @@ */ ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL)); - /* RCU destroy the flow table */ + /* Flush sw_flow in the tables. RCU cb only releases resource + * such as dp, ports and tables. That may avoid some issues + * such as RCU usage warning. + */ + table_instance_flow_flush(table, ovsl_dereference(table->ti), + ovsl_dereference(table->ufid_ti)); + + /* RCU destroy the ports, meters and flow tables. */ call_rcu(&dp->rcu, destroy_dp_rcu); } @@ -1738,10 +1855,12 @@ if (IS_ERR(dp)) goto err_unlock_free; - ovs_dp_change(dp, info->attrs); + err = ovs_dp_change(dp, info->attrs); + if (err) + goto err_unlock_free; err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, - info->snd_seq, 0, OVS_DP_CMD_NEW); + info->snd_seq, 0, OVS_DP_CMD_SET); BUG_ON(err < 0); ovs_unlock(); @@ -1772,7 +1891,7 @@ goto err_unlock_free; } err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, - info->snd_seq, 0, OVS_DP_CMD_NEW); + info->snd_seq, 0, OVS_DP_CMD_GET); BUG_ON(err < 0); ovs_unlock(); @@ -1796,7 +1915,7 @@ if (i >= skip && ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - OVS_DP_CMD_NEW) < 0) + OVS_DP_CMD_GET) < 0) break; i++; } @@ -1811,28 +1930,30 @@ [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 }, + [OVS_DP_ATTR_MASKS_CACHE_SIZE] = NLA_POLICY_RANGE(NLA_U32, 0, + PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)), }; -static const struct genl_ops dp_datapath_genl_ops[] = { +static const struct genl_small_ops dp_datapath_genl_ops[] = { { .cmd = OVS_DP_CMD_NEW, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = datapath_policy, .doit = ovs_dp_cmd_new }, { .cmd = OVS_DP_CMD_DEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = datapath_policy, .doit = ovs_dp_cmd_del }, { .cmd = OVS_DP_CMD_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, /* OK for unprivileged users. */ - .policy = datapath_policy, .doit = ovs_dp_cmd_get, .dumpit = ovs_dp_cmd_dump }, { .cmd = OVS_DP_CMD_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = datapath_policy, .doit = ovs_dp_cmd_set, }, }; @@ -1842,10 +1963,11 @@ .name = OVS_DATAPATH_FAMILY, .version = OVS_DATAPATH_VERSION, .maxattr = OVS_DP_ATTR_MAX, + .policy = datapath_policy, .netnsok = true, .parallel_ops = true, - .ops = dp_datapath_genl_ops, - .n_ops = ARRAY_SIZE(dp_datapath_genl_ops), + .small_ops = dp_datapath_genl_ops, + .n_small_ops = ARRAY_SIZE(dp_datapath_genl_ops), .mcgrps = &ovs_dp_datapath_multicast_group, .n_mcgrps = 1, .module = THIS_MODULE, @@ -1964,16 +2086,16 @@ } -/* Called with ovs_mutex */ -static void update_headroom(struct datapath *dp) +static unsigned int ovs_get_max_headroom(struct datapath *dp) { - unsigned dev_headroom, max_headroom = 0; + unsigned int dev_headroom, max_headroom = 0; struct net_device *dev; struct vport *vport; int i; for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { - hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) { + hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node, + lockdep_ovsl_is_held()) { dev = vport->dev; dev_headroom = netdev_get_fwd_headroom(dev); if (dev_headroom > max_headroom) @@ -1981,10 +2103,21 @@ } } - dp->max_headroom = max_headroom; - for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) - hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) - netdev_set_rx_headroom(vport->dev, max_headroom); + return max_headroom; +} + +/* Called with ovs_mutex */ +static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom) +{ + struct vport *vport; + int i; + + dp->max_headroom = new_headroom; + for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { + hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node, + lockdep_ovsl_is_held()) + netdev_set_rx_headroom(vport->dev, new_headroom); + } } static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) @@ -1995,6 +2128,7 @@ struct sk_buff *reply; struct vport *vport; struct datapath *dp; + unsigned int new_headroom; u32 port_no; int err; @@ -2056,8 +2190,10 @@ info->snd_portid, info->snd_seq, 0, OVS_VPORT_CMD_NEW, GFP_KERNEL); - if (netdev_get_fwd_headroom(vport->dev) > dp->max_headroom) - update_headroom(dp); + new_headroom = netdev_get_fwd_headroom(vport->dev); + + if (new_headroom > dp->max_headroom) + ovs_update_headroom(dp, new_headroom); else netdev_set_rx_headroom(vport->dev, dp->max_headroom); @@ -2113,7 +2249,7 @@ err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), info->snd_portid, info->snd_seq, 0, - OVS_VPORT_CMD_NEW, GFP_ATOMIC); + OVS_VPORT_CMD_SET, GFP_KERNEL); BUG_ON(err < 0); ovs_unlock(); @@ -2128,11 +2264,12 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) { - bool must_update_headroom = false; + bool update_headroom = false; struct nlattr **a = info->attrs; struct sk_buff *reply; struct datapath *dp; struct vport *vport; + unsigned int new_headroom; int err; reply = ovs_vport_cmd_alloc_info(); @@ -2158,12 +2295,17 @@ /* the vport deletion may trigger dp headroom update */ dp = vport->dp; if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom) - must_update_headroom = true; + update_headroom = true; + netdev_reset_rx_headroom(vport->dev); ovs_dp_detach_port(vport); - if (must_update_headroom) - update_headroom(dp); + if (update_headroom) { + new_headroom = ovs_get_max_headroom(dp); + + if (new_headroom < dp->max_headroom) + ovs_update_headroom(dp, new_headroom); + } ovs_unlock(); ovs_notify(&dp_vport_genl_family, reply, info); @@ -2194,7 +2336,7 @@ goto exit_unlock_free; err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), info->snd_portid, info->snd_seq, 0, - OVS_VPORT_CMD_NEW, GFP_ATOMIC); + OVS_VPORT_CMD_GET, GFP_ATOMIC); BUG_ON(err < 0); rcu_read_unlock(); @@ -2230,7 +2372,7 @@ NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - OVS_VPORT_CMD_NEW, + OVS_VPORT_CMD_GET, GFP_ATOMIC) < 0) goto out; @@ -2247,6 +2389,23 @@ return skb->len; } +static void ovs_dp_masks_rebalance(struct work_struct *work) +{ + struct ovs_net *ovs_net = container_of(work, struct ovs_net, + masks_rebalance.work); + struct datapath *dp; + + ovs_lock(); + + list_for_each_entry(dp, &ovs_net->dps, list_node) + ovs_flow_masks_rebalance(&dp->table); + + ovs_unlock(); + + schedule_delayed_work(&ovs_net->masks_rebalance, + msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL)); +} + static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) }, @@ -2258,26 +2417,26 @@ [OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 }, }; -static const struct genl_ops dp_vport_genl_ops[] = { +static const struct genl_small_ops dp_vport_genl_ops[] = { { .cmd = OVS_VPORT_CMD_NEW, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = vport_policy, .doit = ovs_vport_cmd_new }, { .cmd = OVS_VPORT_CMD_DEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = vport_policy, .doit = ovs_vport_cmd_del }, { .cmd = OVS_VPORT_CMD_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, /* OK for unprivileged users. */ - .policy = vport_policy, .doit = ovs_vport_cmd_get, .dumpit = ovs_vport_cmd_dump }, { .cmd = OVS_VPORT_CMD_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = vport_policy, .doit = ovs_vport_cmd_set, }, }; @@ -2287,10 +2446,11 @@ .name = OVS_VPORT_FAMILY, .version = OVS_VPORT_VERSION, .maxattr = OVS_VPORT_ATTR_MAX, + .policy = vport_policy, .netnsok = true, .parallel_ops = true, - .ops = dp_vport_genl_ops, - .n_ops = ARRAY_SIZE(dp_vport_genl_ops), + .small_ops = dp_vport_genl_ops, + .n_small_ops = ARRAY_SIZE(dp_vport_genl_ops), .mcgrps = &ovs_dp_vport_multicast_group, .n_mcgrps = 1, .module = THIS_MODULE, @@ -2337,10 +2497,19 @@ static int __net_init ovs_init_net(struct net *net) { struct ovs_net *ovs_net = net_generic(net, ovs_net_id); + int err; INIT_LIST_HEAD(&ovs_net->dps); INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq); - return ovs_ct_init(net); + INIT_DELAYED_WORK(&ovs_net->masks_rebalance, ovs_dp_masks_rebalance); + + err = ovs_ct_init(net); + if (err) + return err; + + schedule_delayed_work(&ovs_net->masks_rebalance, + msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL)); + return 0; } static void __net_exit list_vports_from_net(struct net *net, struct net *dnet, @@ -2374,8 +2543,10 @@ struct net *net; LIST_HEAD(head); - ovs_ct_exit(dnet); ovs_lock(); + + ovs_ct_exit(dnet); + list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node) __dp_destroy(dp); @@ -2392,6 +2563,7 @@ ovs_unlock(); + cancel_delayed_work_sync(&ovs_net->masks_rebalance); cancel_work_sync(&ovs_net->dp_notify_work); } @@ -2406,7 +2578,8 @@ { int err; - BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb)); + BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > + sizeof_field(struct sk_buff, cb)); pr_info("Open vSwitch switching datapath\n"); -- Gitblit v1.6.2