From 2f7c68cb55ecb7331f2381deb497c27155f32faf Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Wed, 03 Jan 2024 09:43:39 +0000 Subject: [PATCH] update kernel to 5.10.198 --- kernel/net/netfilter/nf_tables_api.c | 1323 +++++++++++++++++++++++++++++++++++++++++++-------------- 1 files changed, 989 insertions(+), 334 deletions(-) diff --git a/kernel/net/netfilter/nf_tables_api.c b/kernel/net/netfilter/nf_tables_api.c index 2143eda..78b268b 100644 --- a/kernel/net/netfilter/nf_tables_api.c +++ b/kernel/net/netfilter/nf_tables_api.c @@ -21,15 +21,20 @@ #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_offload.h> #include <net/net_namespace.h> +#include <net/netns/generic.h> #include <net/sock.h> #define NFT_MODULE_AUTOLOAD_LIMIT (MODULE_NAME_LEN - sizeof("nft-expr-255-")) + +unsigned int nf_tables_net_id __read_mostly; static LIST_HEAD(nf_tables_expressions); static LIST_HEAD(nf_tables_objects); static LIST_HEAD(nf_tables_flowtables); static LIST_HEAD(nf_tables_destroy_list); +static LIST_HEAD(nf_tables_gc_list); static DEFINE_SPINLOCK(nf_tables_destroy_list_lock); +static DEFINE_SPINLOCK(nf_tables_gc_list_lock); static u64 table_handle; enum { @@ -103,7 +108,9 @@ static void nft_validate_state_update(struct net *net, u8 new_validate_state) { - switch (net->nft.validate_state) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + + switch (nft_net->validate_state) { case NFT_VALIDATE_SKIP: WARN_ON_ONCE(new_validate_state == NFT_VALIDATE_DO); break; @@ -114,10 +121,13 @@ return; } - net->nft.validate_state = new_validate_state; + nft_net->validate_state = new_validate_state; } static void nf_tables_trans_destroy_work(struct work_struct *w); static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work); + +static void nft_trans_gc_work(struct work_struct *work); +static DECLARE_WORK(trans_gc_work, nft_trans_gc_work); static void nft_ctx_init(struct nft_ctx *ctx, struct net *net, @@ -150,6 +160,7 @@ return NULL; INIT_LIST_HEAD(&trans->list); + INIT_LIST_HEAD(&trans->binding_list); trans->msg_type = msg_type; trans->ctx = *ctx; @@ -162,32 +173,107 @@ return nft_trans_alloc_gfp(ctx, msg_type, size, GFP_KERNEL); } -static void nft_trans_destroy(struct nft_trans *trans) +static void nft_trans_list_del(struct nft_trans *trans) { list_del(&trans->list); + list_del(&trans->binding_list); +} + +static void nft_trans_destroy(struct nft_trans *trans) +{ + nft_trans_list_del(trans); kfree(trans); } -static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) +static void __nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set, + bool bind) { + struct nftables_pernet *nft_net; struct net *net = ctx->net; struct nft_trans *trans; if (!nft_set_is_anonymous(set)) return; - list_for_each_entry_reverse(trans, &net->nft.commit_list, list) { + nft_net = net_generic(net, nf_tables_net_id); + list_for_each_entry_reverse(trans, &nft_net->commit_list, list) { switch (trans->msg_type) { case NFT_MSG_NEWSET: if (nft_trans_set(trans) == set) - nft_trans_set_bound(trans) = true; + nft_trans_set_bound(trans) = bind; break; case NFT_MSG_NEWSETELEM: if (nft_trans_elem_set(trans) == set) - nft_trans_elem_set_bound(trans) = true; + nft_trans_elem_set_bound(trans) = bind; break; } } +} + +static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) +{ + return __nft_set_trans_bind(ctx, set, true); +} + +static void nft_set_trans_unbind(const struct nft_ctx *ctx, struct nft_set *set) +{ + return __nft_set_trans_bind(ctx, set, false); +} + +static void __nft_chain_trans_bind(const struct nft_ctx *ctx, + struct nft_chain *chain, bool bind) +{ + struct nftables_pernet *nft_net; + struct net *net = ctx->net; + struct nft_trans *trans; + + if (!nft_chain_binding(chain)) + return; + + nft_net = net_generic(net, nf_tables_net_id); + list_for_each_entry_reverse(trans, &nft_net->commit_list, list) { + switch (trans->msg_type) { + case NFT_MSG_NEWCHAIN: + if (nft_trans_chain(trans) == chain) + nft_trans_chain_bound(trans) = bind; + break; + case NFT_MSG_NEWRULE: + if (trans->ctx.chain == chain) + nft_trans_rule_bound(trans) = bind; + break; + } + } +} + +static void nft_chain_trans_bind(const struct nft_ctx *ctx, + struct nft_chain *chain) +{ + __nft_chain_trans_bind(ctx, chain, true); +} + +int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain) +{ + if (!nft_chain_binding(chain)) + return 0; + + if (nft_chain_binding(ctx->chain)) + return -EOPNOTSUPP; + + if (chain->bound) + return -EBUSY; + + if (!nft_use_inc(&chain->use)) + return -EMFILE; + + chain->bound = true; + nft_chain_trans_bind(ctx, chain); + + return 0; +} + +void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain) +{ + __nft_chain_trans_bind(ctx, chain, false); } static int nft_netdev_register_hooks(struct net *net, @@ -217,12 +303,18 @@ } static void nft_netdev_unregister_hooks(struct net *net, - struct list_head *hook_list) + struct list_head *hook_list, + bool release_netdev) { - struct nft_hook *hook; + struct nft_hook *hook, *next; - list_for_each_entry(hook, hook_list, list) + list_for_each_entry_safe(hook, next, hook_list, list) { nf_unregister_net_hook(net, &hook->ops); + if (release_netdev) { + list_del(&hook->list); + kfree_rcu(hook, rcu); + } + } } static int nf_tables_register_hook(struct net *net, @@ -248,9 +340,10 @@ return nf_register_net_hook(net, &basechain->ops); } -static void nf_tables_unregister_hook(struct net *net, - const struct nft_table *table, - struct nft_chain *chain) +static void __nf_tables_unregister_hook(struct net *net, + const struct nft_table *table, + struct nft_chain *chain, + bool release_netdev) { struct nft_base_chain *basechain; const struct nf_hook_ops *ops; @@ -265,9 +358,38 @@ return basechain->type->ops_unregister(net, ops); if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) - nft_netdev_unregister_hooks(net, &basechain->hook_list); + nft_netdev_unregister_hooks(net, &basechain->hook_list, + release_netdev); else nf_unregister_net_hook(net, &basechain->ops); +} + +static void nf_tables_unregister_hook(struct net *net, + const struct nft_table *table, + struct nft_chain *chain) +{ + return __nf_tables_unregister_hook(net, table, chain, false); +} + +static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans) +{ + struct nftables_pernet *nft_net; + + nft_net = net_generic(net, nf_tables_net_id); + + switch (trans->msg_type) { + case NFT_MSG_NEWSET: + if (nft_set_is_anonymous(nft_trans_set(trans))) + list_add_tail(&trans->binding_list, &nft_net->binding_list); + break; + case NFT_MSG_NEWCHAIN: + if (!nft_trans_chain_update(trans) && + nft_chain_binding(nft_trans_chain(trans))) + list_add_tail(&trans->binding_list, &nft_net->binding_list); + break; + } + + list_add_tail(&trans->list, &nft_net->commit_list); } static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) @@ -281,7 +403,7 @@ if (msg_type == NFT_MSG_NEWTABLE) nft_activate_next(ctx->net, ctx->table); - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; } @@ -313,8 +435,9 @@ ntohl(nla_get_be32(ctx->nla[NFTA_CHAIN_ID])); } } + nft_trans_chain(trans) = ctx->chain; + nft_trans_commit_list_add_tail(ctx->net, trans); - list_add_tail(&trans->list, &ctx->net->nft.commit_list); return trans; } @@ -326,14 +449,13 @@ if (IS_ERR(trans)) return PTR_ERR(trans); - ctx->table->use--; + nft_use_dec(&ctx->table->use); nft_deactivate_next(ctx->net, ctx->chain); return 0; } -static void nft_rule_expr_activate(const struct nft_ctx *ctx, - struct nft_rule *rule) +void nft_rule_expr_activate(const struct nft_ctx *ctx, struct nft_rule *rule) { struct nft_expr *expr; @@ -346,9 +468,8 @@ } } -static void nft_rule_expr_deactivate(const struct nft_ctx *ctx, - struct nft_rule *rule, - enum nft_trans_phase phase) +void nft_rule_expr_deactivate(const struct nft_ctx *ctx, struct nft_rule *rule, + enum nft_trans_phase phase) { struct nft_expr *expr; @@ -367,7 +488,7 @@ /* You cannot delete the same rule twice */ if (nft_is_active_next(ctx->net, rule)) { nft_deactivate_next(ctx->net, rule); - ctx->chain->use--; + nft_use_dec(&ctx->chain->use); return 0; } return -ENOENT; @@ -387,7 +508,7 @@ ntohl(nla_get_be32(ctx->nla[NFTA_RULE_ID])); } nft_trans_rule(trans) = rule; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return trans; } @@ -453,9 +574,30 @@ nft_activate_next(ctx->net, set); } nft_trans_set(trans) = set; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; +} + +static int nft_mapelem_deactivate(const struct nft_ctx *ctx, + struct nft_set *set, + const struct nft_set_iter *iter, + struct nft_set_elem *elem) +{ + nft_setelem_data_deactivate(ctx->net, set, elem); + + return 0; +} + +static void nft_map_deactivate(const struct nft_ctx *ctx, struct nft_set *set) +{ + struct nft_set_iter iter = { + .genmask = nft_genmask_next(ctx->net), + .fn = nft_mapelem_deactivate, + }; + + set->ops->walk(ctx, set, &iter); + WARN_ON_ONCE(iter.err); } static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set) @@ -466,8 +608,11 @@ if (err < 0) return err; + if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_deactivate(ctx, set); + nft_deactivate_next(ctx->net, set); - ctx->table->use--; + nft_use_dec(&ctx->table->use); return err; } @@ -485,7 +630,7 @@ nft_activate_next(ctx->net, obj); nft_trans_obj(trans) = obj; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; } @@ -499,7 +644,7 @@ return err; nft_deactivate_next(ctx->net, obj); - ctx->table->use--; + nft_use_dec(&ctx->table->use); return err; } @@ -519,7 +664,7 @@ INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans)); nft_trans_flowtable(trans) = flowtable; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; } @@ -534,7 +679,7 @@ return err; nft_deactivate_next(ctx->net, flowtable); - ctx->table->use--; + nft_use_dec(&ctx->table->use); return err; } @@ -547,13 +692,15 @@ const struct nlattr *nla, u8 family, u8 genmask) { + struct nftables_pernet *nft_net; struct nft_table *table; if (nla == NULL) return ERR_PTR(-EINVAL); - list_for_each_entry_rcu(table, &net->nft.tables, list, - lockdep_is_held(&net->nft.commit_mutex)) { + nft_net = net_generic(net, nf_tables_net_id); + list_for_each_entry_rcu(table, &nft_net->tables, list, + lockdep_is_held(&nft_net->commit_mutex)) { if (!nla_strcmp(nla, table->name) && table->family == family && nft_active_genmask(table, genmask)) @@ -567,9 +714,11 @@ const struct nlattr *nla, u8 genmask) { + struct nftables_pernet *nft_net; struct nft_table *table; - list_for_each_entry(table, &net->nft.tables, list) { + nft_net = net_generic(net, nf_tables_net_id); + list_for_each_entry(table, &nft_net->tables, list) { if (be64_to_cpu(nla_get_be64(nla)) == table->handle && nft_active_genmask(table, genmask)) return table; @@ -621,6 +770,7 @@ static int nft_request_module(struct net *net, const char *fmt, ...) { char module_name[MODULE_NAME_LEN]; + struct nftables_pernet *nft_net; struct nft_module_request *req; va_list args; int ret; @@ -631,7 +781,8 @@ if (ret >= MODULE_NAME_LEN) return 0; - list_for_each_entry(req, &net->nft.module_list, list) { + nft_net = net_generic(net, nf_tables_net_id); + list_for_each_entry(req, &nft_net->module_list, list) { if (!strcmp(req->module, module_name)) { if (req->done) return 0; @@ -647,7 +798,7 @@ req->done = false; strlcpy(req->module, module_name, MODULE_NAME_LEN); - list_add_tail(&req->list, &net->nft.module_list); + list_add_tail(&req->list, &nft_net->module_list); return -EAGAIN; } @@ -685,7 +836,9 @@ static __be16 nft_base_seq(const struct net *net) { - return htons(net->nft.base_seq & 0xffff); + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + + return htons(nft_net->base_seq & 0xffff); } static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { @@ -743,6 +896,7 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event) { + struct nftables_pernet *nft_net; struct sk_buff *skb; int err; @@ -761,7 +915,8 @@ goto err; } - nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); + nft_net = net_generic(ctx->net, nf_tables_net_id); + nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -771,15 +926,17 @@ struct netlink_callback *cb) { const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + struct nftables_pernet *nft_net; const struct nft_table *table; unsigned int idx = 0, s_idx = cb->args[0]; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; rcu_read_lock(); - cb->seq = net->nft.base_seq; + nft_net = net_generic(net, nf_tables_net_id); + cb->seq = nft_net->base_seq; - list_for_each_entry_rcu(table, &net->nft.tables, list) { + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) continue; @@ -954,7 +1111,7 @@ goto err; nft_trans_table_update(trans) = true; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; err: nft_trans_destroy(trans); @@ -1017,6 +1174,7 @@ const struct nlattr * const nla[], struct netlink_ext_ack *extack) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); int family = nfmsg->nfgen_family; @@ -1026,7 +1184,7 @@ u32 flags = 0; int err; - lockdep_assert_held(&net->nft.commit_mutex); + lockdep_assert_held(&nft_net->commit_mutex); attr = nla[NFTA_TABLE_NAME]; table = nft_table_lookup(net, attr, family, genmask); if (IS_ERR(table)) { @@ -1084,7 +1242,7 @@ if (err < 0) goto err_trans; - list_add_tail_rcu(&table->list, &net->nft.tables); + list_add_tail_rcu(&table->list, &nft_net->tables); return 0; err_trans: rhltable_destroy(&table->chains_ht); @@ -1110,7 +1268,7 @@ if (!nft_is_active_next(ctx->net, chain)) continue; - if (nft_chain_is_bound(chain)) + if (nft_chain_binding(chain)) continue; ctx->chain = chain; @@ -1124,8 +1282,7 @@ if (!nft_is_active_next(ctx->net, set)) continue; - if (nft_set_is_anonymous(set) && - !list_empty(&set->bindings)) + if (nft_set_is_anonymous(set)) continue; err = nft_delset(ctx, set); @@ -1155,7 +1312,7 @@ if (!nft_is_active_next(ctx->net, chain)) continue; - if (nft_chain_is_bound(chain)) + if (nft_chain_binding(chain)) continue; ctx->chain = chain; @@ -1172,11 +1329,12 @@ static int nft_flush(struct nft_ctx *ctx, int family) { + struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id); struct nft_table *table, *nt; const struct nlattr * const *nla = ctx->nla; int err = 0; - list_for_each_entry_safe(table, nt, &ctx->net->nft.tables, list) { + list_for_each_entry_safe(table, nt, &nft_net->tables, list) { if (family != AF_UNSPEC && table->family != family) continue; @@ -1291,7 +1449,9 @@ static bool lockdep_commit_lock_is_held(const struct net *net) { #ifdef CONFIG_PROVE_LOCKING - return lockdep_is_held(&net->nft.commit_mutex); + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + + return lockdep_is_held(&nft_net->commit_mutex); #else return true; #endif @@ -1494,6 +1654,7 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event) { + struct nftables_pernet *nft_net; struct sk_buff *skb; int err; @@ -1513,7 +1674,8 @@ goto err; } - nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); + nft_net = net_generic(ctx->net, nf_tables_net_id); + nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -1528,11 +1690,13 @@ unsigned int idx = 0, s_idx = cb->args[0]; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; + struct nftables_pernet *nft_net; rcu_read_lock(); - cb->seq = net->nft.base_seq; + nft_net = net_generic(net, nf_tables_net_id); + cb->seq = nft_net->base_seq; - list_for_each_entry_rcu(table, &net->nft.tables, list) { + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) continue; @@ -1847,11 +2011,12 @@ struct nft_chain_hook *hook, u8 family, bool autoload) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nlattr *ha[NFTA_HOOK_MAX + 1]; const struct nft_chain_type *type; int err; - lockdep_assert_held(&net->nft.commit_mutex); + lockdep_assert_held(&nft_net->commit_mutex); lockdep_nfnl_nft_mutex_not_held(); err = nla_parse_nested_deprecated(ha, NFTA_HOOK_MAX, @@ -1981,7 +2146,7 @@ return 0; } -static int nft_chain_add(struct nft_table *table, struct nft_chain *chain) +int nft_chain_add(struct nft_table *table, struct nft_chain *chain) { int err; @@ -2009,9 +2174,6 @@ struct nft_chain *chain; struct nft_rule **rules; int err; - - if (table->use == UINT_MAX) - return -EOVERFLOW; if (nla[NFTA_CHAIN_HOOK]) { struct nft_stats __percpu *stats = NULL; @@ -2108,6 +2270,11 @@ if (err < 0) goto err_destroy_chain; + if (!nft_use_inc(&table->use)) { + err = -EMFILE; + goto err_use; + } + trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN); if (IS_ERR(trans)) { err = PTR_ERR(trans); @@ -2124,10 +2291,11 @@ goto err_unregister_hook; } - table->use++; - return 0; + err_unregister_hook: + nft_use_dec_restore(&table->use); +err_use: nf_tables_unregister_hook(net, table, chain); err_destroy_chain: nf_tables_chain_destroy(ctx); @@ -2244,6 +2412,7 @@ if (nla[NFTA_CHAIN_HANDLE] && nla[NFTA_CHAIN_NAME]) { + struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id); struct nft_trans *tmp; char *name; @@ -2253,7 +2422,7 @@ goto err; err = -EEXIST; - list_for_each_entry(tmp, &ctx->net->nft.commit_list, list) { + list_for_each_entry(tmp, &nft_net->commit_list, list) { if (tmp->msg_type == NFT_MSG_NEWCHAIN && tmp->ctx.table == table && nft_trans_chain_update(tmp) && @@ -2267,7 +2436,7 @@ nft_trans_chain_name(trans) = name; } - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; err: @@ -2278,17 +2447,19 @@ static struct nft_chain *nft_chain_lookup_byid(const struct net *net, const struct nft_table *table, - const struct nlattr *nla) + const struct nlattr *nla, u8 genmask) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); u32 id = ntohl(nla_get_be32(nla)); struct nft_trans *trans; - list_for_each_entry(trans, &net->nft.commit_list, list) { + list_for_each_entry(trans, &nft_net->commit_list, list) { struct nft_chain *chain = trans->ctx.chain; if (trans->msg_type == NFT_MSG_NEWCHAIN && chain->table == table && - id == nft_trans_chain_id(trans)) + id == nft_trans_chain_id(trans) && + nft_active_genmask(chain, genmask)) return chain; } return ERR_PTR(-ENOENT); @@ -2299,6 +2470,7 @@ const struct nlattr * const nla[], struct netlink_ext_ack *extack) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); int family = nfmsg->nfgen_family; @@ -2310,7 +2482,7 @@ u64 handle = 0; u32 flags = 0; - lockdep_assert_held(&net->nft.commit_mutex); + lockdep_assert_held(&nft_net->commit_mutex); table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask); if (IS_ERR(table)) { @@ -2426,6 +2598,9 @@ NL_SET_BAD_ATTR(extack, attr); return PTR_ERR(chain); } + + if (nft_chain_binding(chain)) + return -EOPNOTSUPP; if (nlh->nlmsg_flags & NLM_F_NONREC && chain->use > 0) @@ -2848,6 +3023,7 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx, const struct nft_rule *rule, int event) { + struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id); struct sk_buff *skb; int err; @@ -2867,7 +3043,7 @@ goto err; } - nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); + nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -2925,11 +3101,13 @@ unsigned int idx = 0; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; + struct nftables_pernet *nft_net; rcu_read_lock(); - cb->seq = net->nft.base_seq; + nft_net = net_generic(net, nf_tables_net_id); + cb->seq = nft_net->base_seq; - list_for_each_entry_rcu(table, &net->nft.tables, list) { + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) continue; @@ -3076,8 +3254,7 @@ return err; } -static void nf_tables_rule_destroy(const struct nft_ctx *ctx, - struct nft_rule *rule) +void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule) { struct nft_expr *expr, *next; @@ -3094,7 +3271,7 @@ kfree(rule); } -void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule) +static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule) { nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_RELEASE); nf_tables_rule_destroy(ctx, rule); @@ -3145,6 +3322,8 @@ err = nft_chain_validate(&ctx, chain); if (err < 0) return err; + + cond_resched(); } return 0; @@ -3161,6 +3340,7 @@ const struct nlattr * const nla[], struct netlink_ext_ack *extack) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); struct nft_expr_info *info = NULL; @@ -3178,7 +3358,7 @@ int err, rem; u64 handle, pos_handle; - lockdep_assert_held(&net->nft.commit_mutex); + lockdep_assert_held(&nft_net->commit_mutex); table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask); if (IS_ERR(table)) { @@ -3193,11 +3373,10 @@ NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]); return PTR_ERR(chain); } - if (nft_chain_is_bound(chain)) - return -EOPNOTSUPP; } else if (nla[NFTA_RULE_CHAIN_ID]) { - chain = nft_chain_lookup_byid(net, table, nla[NFTA_RULE_CHAIN_ID]); + chain = nft_chain_lookup_byid(net, table, nla[NFTA_RULE_CHAIN_ID], + genmask); if (IS_ERR(chain)) { NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN_ID]); return PTR_ERR(chain); @@ -3205,6 +3384,9 @@ } else { return -EINVAL; } + + if (nft_chain_is_bound(chain)) + return -EOPNOTSUPP; if (nla[NFTA_RULE_HANDLE]) { handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE])); @@ -3227,9 +3409,6 @@ nlh->nlmsg_flags & NLM_F_REPLACE) return -EINVAL; handle = nf_tables_alloc_handle(table); - - if (chain->use == UINT_MAX) - return -EOVERFLOW; if (nla[NFTA_RULE_POSITION]) { pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION])); @@ -3316,16 +3495,26 @@ expr = nft_expr_next(expr); } + if (!nft_use_inc(&chain->use)) { + err = -EMFILE; + goto err2; + } + if (nlh->nlmsg_flags & NLM_F_REPLACE) { + if (nft_chain_binding(chain)) { + err = -EOPNOTSUPP; + goto err_destroy_flow_rule; + } + trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule); if (trans == NULL) { err = -ENOMEM; - goto err2; + goto err_destroy_flow_rule; } err = nft_delrule(&ctx, old_rule); if (err < 0) { nft_trans_destroy(trans); - goto err2; + goto err_destroy_flow_rule; } list_add_tail_rcu(&rule->list, &old_rule->list); @@ -3333,7 +3522,7 @@ trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule); if (!trans) { err = -ENOMEM; - goto err2; + goto err_destroy_flow_rule; } if (nlh->nlmsg_flags & NLM_F_APPEND) { @@ -3349,9 +3538,8 @@ } } kvfree(info); - chain->use++; - if (net->nft.validate_state == NFT_VALIDATE_DO) + if (nft_net->validate_state == NFT_VALIDATE_DO) return nft_table_validate(net, table); if (chain->flags & NFT_CHAIN_HW_OFFLOAD) { @@ -3363,8 +3551,12 @@ } return 0; + +err_destroy_flow_rule: + nft_use_dec_restore(&chain->use); err2: - nf_tables_rule_release(&ctx, rule); + nft_rule_expr_deactivate(&ctx, rule, NFT_TRANS_PREPARE_ERROR); + nf_tables_rule_destroy(&ctx, rule); err1: for (i = 0; i < n; i++) { if (info[i].ops) { @@ -3381,10 +3573,11 @@ const struct nft_chain *chain, const struct nlattr *nla) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); u32 id = ntohl(nla_get_be32(nla)); struct nft_trans *trans; - list_for_each_entry(trans, &net->nft.commit_list, list) { + list_for_each_entry(trans, &nft_net->commit_list, list) { struct nft_rule *rule = nft_trans_rule(trans); if (trans->msg_type == NFT_MSG_NEWRULE && @@ -3421,7 +3614,7 @@ NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]); return PTR_ERR(chain); } - if (nft_chain_is_bound(chain)) + if (nft_chain_binding(chain)) return -EOPNOTSUPP; } @@ -3450,6 +3643,8 @@ } else { list_for_each_entry(chain, &table->chains, list) { if (!nft_is_active_next(net, chain)) + continue; + if (nft_chain_binding(chain)) continue; ctx.chain = chain; @@ -3497,13 +3692,14 @@ const struct nft_set_desc *desc, enum nft_set_policies policy) { + struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id); const struct nft_set_ops *ops, *bops; struct nft_set_estimate est, best; const struct nft_set_type *type; u32 flags = 0; int i; - lockdep_assert_held(&ctx->net->nft.commit_mutex); + lockdep_assert_held(&nft_net->commit_mutex); lockdep_nfnl_nft_mutex_not_held(); if (nla[NFTA_SET_FLAGS] != NULL) @@ -3641,10 +3837,11 @@ const struct nft_table *table, const struct nlattr *nla, u8 genmask) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_trans *trans; u32 id = ntohl(nla_get_be32(nla)); - list_for_each_entry(trans, &net->nft.commit_list, list) { + list_for_each_entry(trans, &nft_net->commit_list, list) { if (trans->msg_type == NFT_MSG_NEWSET) { struct nft_set *set = nft_trans_set(trans); @@ -3867,6 +4064,7 @@ const struct nft_set *set, int event, gfp_t gfp_flags) { + struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id); struct sk_buff *skb; u32 portid = ctx->portid; int err; @@ -3885,7 +4083,7 @@ goto err; } - nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); + nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list); return; err: nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -3898,14 +4096,16 @@ struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2]; struct net *net = sock_net(skb->sk); struct nft_ctx *ctx = cb->data, ctx_set; + struct nftables_pernet *nft_net; if (cb->args[1]) return skb->len; rcu_read_lock(); - cb->seq = net->nft.base_seq; + nft_net = net_generic(net, nf_tables_net_id); + cb->seq = nft_net->base_seq; - list_for_each_entry_rcu(table, &net->nft.tables, list) { + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (ctx->family != NFPROTO_UNSPEC && ctx->family != table->family) continue; @@ -4269,9 +4469,15 @@ alloc_size = sizeof(*set) + size + udlen; if (alloc_size < size || alloc_size > INT_MAX) return -ENOMEM; + + if (!nft_use_inc(&table->use)) + return -EMFILE; + set = kvzalloc(alloc_size, GFP_KERNEL); - if (!set) - return -ENOMEM; + if (!set) { + err = -ENOMEM; + goto err_alloc; + } name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL); if (!name) { @@ -4291,6 +4497,7 @@ } INIT_LIST_HEAD(&set->bindings); + refcount_set(&set->refs, 1); set->table = table; write_pnet(&set->net, net); set->ops = ops; @@ -4326,25 +4533,37 @@ } set->handle = nf_tables_alloc_handle(table); + INIT_LIST_HEAD(&set->pending_update); err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set); if (err < 0) goto err_set_expr_alloc; list_add_tail_rcu(&set->list, &table->sets); - table->use++; + return 0; err_set_expr_alloc: if (set->expr) nft_expr_destroy(&ctx, set->expr); - ops->destroy(set); + ops->destroy(&ctx, set); err_set_init: kfree(set->name); err_set_name: kvfree(set); +err_alloc: + nft_use_dec_restore(&table->use); + return err; +} + +static void nft_set_put(struct nft_set *set) +{ + if (refcount_dec_and_test(&set->refs)) { + kfree(set->name); + kvfree(set); + } } static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set) @@ -4355,9 +4574,8 @@ if (set->expr) nft_expr_destroy(ctx, set->expr); - set->ops->destroy(set); - kfree(set->name); - kvfree(set); + set->ops->destroy(ctx, set); + nft_set_put(set); } static int nf_tables_delset(struct net *net, struct sock *nlsk, @@ -4409,10 +4627,9 @@ enum nft_data_types type, unsigned int len); -static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx, - struct nft_set *set, - const struct nft_set_iter *iter, - struct nft_set_elem *elem) +static int nft_setelem_data_validate(const struct nft_ctx *ctx, + struct nft_set *set, + struct nft_set_elem *elem) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); enum nft_registers dreg; @@ -4424,14 +4641,19 @@ set->dlen); } +static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx, + struct nft_set *set, + const struct nft_set_iter *iter, + struct nft_set_elem *elem) +{ + return nft_setelem_data_validate(ctx, set, elem); +} + int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding) { struct nft_set_binding *i; struct nft_set_iter iter; - - if (set->use == UINT_MAX) - return -EOVERFLOW; if (!list_empty(&set->bindings) && nft_set_is_anonymous(set)) return -EBUSY; @@ -4457,10 +4679,12 @@ return iter.err; } bind: + if (!nft_use_inc(&set->use)) + return -EMFILE; + binding->chain = ctx->chain; list_add_tail_rcu(&binding->list, &set->bindings); nft_set_trans_bind(ctx, set); - set->use++; return 0; } @@ -4479,17 +4703,74 @@ } } +static void nft_setelem_data_activate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem); + +static int nft_mapelem_activate(const struct nft_ctx *ctx, + struct nft_set *set, + const struct nft_set_iter *iter, + struct nft_set_elem *elem) +{ + nft_setelem_data_activate(ctx->net, set, elem); + + return 0; +} + +static void nft_map_activate(const struct nft_ctx *ctx, struct nft_set *set) +{ + struct nft_set_iter iter = { + .genmask = nft_genmask_next(ctx->net), + .fn = nft_mapelem_activate, + }; + + set->ops->walk(ctx, set, &iter); + WARN_ON_ONCE(iter.err); +} + +void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set) +{ + if (nft_set_is_anonymous(set)) { + if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_activate(ctx, set); + + nft_clear(ctx->net, set); + } + + nft_use_inc_restore(&set->use); +} +EXPORT_SYMBOL_GPL(nf_tables_activate_set); + void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding, enum nft_trans_phase phase) { switch (phase) { + case NFT_TRANS_PREPARE_ERROR: + nft_set_trans_unbind(ctx, set); + if (nft_set_is_anonymous(set)) + nft_deactivate_next(ctx->net, set); + else + list_del_rcu(&binding->list); + + nft_use_dec(&set->use); + break; case NFT_TRANS_PREPARE: - set->use--; + if (nft_set_is_anonymous(set)) { + if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_deactivate(ctx, set); + + nft_deactivate_next(ctx->net, set); + } + nft_use_dec(&set->use); return; case NFT_TRANS_ABORT: case NFT_TRANS_RELEASE: - set->use--; + if (nft_set_is_anonymous(set) && + set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_deactivate(ctx, set); + + nft_use_dec(&set->use); fallthrough; default: nf_tables_unbind_set(ctx, set, binding, @@ -4679,7 +4960,11 @@ const struct nft_set_iter *iter, struct nft_set_elem *elem) { + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); struct nft_set_dump_args *args; + + if (nft_set_elem_expired(ext)) + return 0; args = container_of(iter, struct nft_set_dump_args, iter); return nf_tables_fill_setelem(args->skb, set, elem); @@ -4694,6 +4979,7 @@ { struct nft_set_dump_ctx *dump_ctx = cb->data; struct net *net = sock_net(skb->sk); + struct nftables_pernet *nft_net; struct nft_table *table; struct nft_set *set; struct nft_set_dump_args args; @@ -4704,7 +4990,8 @@ int event; rcu_read_lock(); - list_for_each_entry_rcu(table, &net->nft.tables, list) { + nft_net = net_generic(net, nf_tables_net_id); + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (dump_ctx->ctx.family != NFPROTO_UNSPEC && dump_ctx->ctx.family != table->family) continue; @@ -4983,6 +5270,7 @@ const struct nft_set_elem *elem, int event, u16 flags) { + struct nftables_pernet *nft_net; struct net *net = ctx->net; u32 portid = ctx->portid; struct sk_buff *skb; @@ -5002,7 +5290,8 @@ goto err; } - nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); + nft_net = net_generic(net, nf_tables_net_id); + nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list); return; err: nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -5091,6 +5380,7 @@ } } +/* Drop references and destroy. Called from gc, dynset and abort path. */ void nft_set_elem_destroy(const struct nft_set *set, void *elem, bool destroy_expr) { @@ -5107,16 +5397,16 @@ nft_set_elem_expr_destroy(&ctx, nft_set_ext_expr(ext)); if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) - (*nft_set_ext_obj(ext))->use--; + nft_use_dec(&(*nft_set_ext_obj(ext))->use); kfree(elem); } EXPORT_SYMBOL_GPL(nft_set_elem_destroy); -/* Only called from commit path, nft_set_elem_deactivate() already deals with - * the refcounting from the preparation phase. +/* Destroy element. References have been already dropped in the preparation + * path via nft_setelem_data_deactivate(). */ -static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, - const struct nft_set *set, void *elem) +void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, + const struct nft_set *set, void *elem) { struct nft_set_ext *ext = nft_set_elem_ext(set, elem); @@ -5281,8 +5571,16 @@ set->objtype, genmask); if (IS_ERR(obj)) { err = PTR_ERR(obj); + obj = NULL; goto err_parse_key_end; } + + if (!nft_use_inc(&obj->use)) { + err = -EMFILE; + obj = NULL; + goto err_parse_key_end; + } + nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF); } @@ -5347,10 +5645,8 @@ udata->len = ulen - 1; nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen); } - if (obj) { + if (obj) *nft_set_ext_obj(ext) = obj; - obj->use++; - } err = nft_set_elem_expr_setup(ctx, ext, expr); if (err < 0) @@ -5363,7 +5659,8 @@ goto err_elem_expr; } - ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK; + ext->genmask = nft_genmask_cur(ctx->net); + err = set->ops->insert(ctx->net, set, &elem, &ext2); if (err) { if (err == -EEXIST) { @@ -5398,7 +5695,7 @@ } nft_trans_elem(trans) = elem; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; err_set_full: @@ -5406,14 +5703,14 @@ err_element_clash: kfree(trans); err_elem_expr: - if (obj) - obj->use--; - nf_tables_set_elem_destroy(ctx, set, elem.priv); err_parse_data: if (nla[NFTA_SET_ELEM_DATA] != NULL) nft_data_release(&elem.data.val, desc.type); err_parse_key_end: + if (obj) + nft_use_dec_restore(&obj->use); + nft_data_release(&elem.key_end.val, NFT_DATA_VALUE); err_parse_key: nft_data_release(&elem.key.val, NFT_DATA_VALUE); @@ -5429,6 +5726,7 @@ const struct nlattr * const nla[], struct netlink_ext_ack *extack) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); u8 genmask = nft_genmask_next(net); const struct nlattr *attr; struct nft_set *set; @@ -5448,7 +5746,8 @@ if (IS_ERR(set)) return PTR_ERR(set); - if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) + if (!list_empty(&set->bindings) && + (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS))) return -EBUSY; nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { @@ -5457,7 +5756,7 @@ return err; } - if (net->nft.validate_state == NFT_VALIDATE_DO) + if (nft_net->validate_state == NFT_VALIDATE_DO) return nft_table_validate(net, ctx.table); return 0; @@ -5477,50 +5776,40 @@ void nft_data_hold(const struct nft_data *data, enum nft_data_types type) { struct nft_chain *chain; - struct nft_rule *rule; if (type == NFT_DATA_VERDICT) { switch (data->verdict.code) { case NFT_JUMP: case NFT_GOTO: chain = data->verdict.chain; - chain->use++; - - if (!nft_chain_is_bound(chain)) - break; - - chain->table->use++; - list_for_each_entry(rule, &chain->rules, list) - chain->use++; - - nft_chain_add(chain->table, chain); + nft_use_inc_restore(&chain->use); break; } } } -static void nft_set_elem_activate(const struct net *net, - const struct nft_set *set, - struct nft_set_elem *elem) +static void nft_setelem_data_activate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) nft_data_hold(nft_set_ext_data(ext), set->dtype); if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) - (*nft_set_ext_obj(ext))->use++; + nft_use_inc_restore(&(*nft_set_ext_obj(ext))->use); } -static void nft_set_elem_deactivate(const struct net *net, - const struct nft_set *set, - struct nft_set_elem *elem) +void nft_setelem_data_deactivate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) nft_data_release(nft_set_ext_data(ext), set->dtype); if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) - (*nft_set_ext_obj(ext))->use--; + nft_use_dec(&(*nft_set_ext_obj(ext))->use); } static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, @@ -5590,10 +5879,10 @@ kfree(elem.priv); elem.priv = priv; - nft_set_elem_deactivate(ctx->net, set, &elem); + nft_setelem_data_deactivate(ctx->net, set, &elem); nft_trans_elem(trans) = elem; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; fail_ops: @@ -5624,10 +5913,10 @@ } set->ndeact++; - nft_set_elem_deactivate(ctx->net, set, elem); + nft_setelem_data_deactivate(ctx->net, set, elem); nft_trans_elem_set(trans) = set; nft_trans_elem(trans) = *elem; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; err1: @@ -5654,7 +5943,11 @@ set = nft_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], genmask); if (IS_ERR(set)) return PTR_ERR(set); - if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) + + if (nft_set_is_anonymous(set)) + return -EOPNOTSUPP; + + if (!list_empty(&set->bindings) && (set->flags & NFT_SET_CONSTANT)) return -EBUSY; if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) { @@ -5675,29 +5968,6 @@ set->ndeact++; } return err; -} - -void nft_set_gc_batch_release(struct rcu_head *rcu) -{ - struct nft_set_gc_batch *gcb; - unsigned int i; - - gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu); - for (i = 0; i < gcb->head.cnt; i++) - nft_set_elem_destroy(gcb->head.set, gcb->elems[i], true); - kfree(gcb); -} - -struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set, - gfp_t gfp) -{ - struct nft_set_gc_batch *gcb; - - gcb = kzalloc(sizeof(*gcb), gfp); - if (gcb == NULL) - return gcb; - gcb->head.set = set; - return gcb; } /* @@ -5924,7 +6194,7 @@ nft_trans_obj(trans) = obj; nft_trans_obj_update(trans) = true; nft_trans_obj_newobj(trans) = newobj; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; @@ -5985,9 +6255,14 @@ nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); + if (!nft_use_inc(&table->use)) + return -EMFILE; + type = nft_obj_type_get(net, objtype); - if (IS_ERR(type)) - return PTR_ERR(type); + if (IS_ERR(type)) { + err = PTR_ERR(type); + goto err_type; + } obj = nft_obj_init(&ctx, type, nla[NFTA_OBJ_DATA]); if (IS_ERR(obj)) { @@ -6021,7 +6296,7 @@ goto err_obj_ht; list_add_tail_rcu(&obj->list, &table->objects); - table->use++; + return 0; err_obj_ht: /* queued in transaction log */ @@ -6037,6 +6312,9 @@ kfree(obj); err_init: module_put(type->owner); +err_type: + nft_use_dec_restore(&table->use); + return err; } @@ -6087,6 +6365,7 @@ struct nft_obj_filter *filter = cb->data; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; + struct nftables_pernet *nft_net; struct nft_object *obj; bool reset = false; @@ -6094,9 +6373,10 @@ reset = true; rcu_read_lock(); - cb->seq = net->nft.base_seq; + nft_net = net_generic(net, nf_tables_net_id); + cb->seq = nft_net->base_seq; - list_for_each_entry_rcu(table, &net->nft.tables, list) { + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) continue; @@ -6119,7 +6399,7 @@ char *buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, - net->nft.base_seq); + nft_net->base_seq); audit_log_nfcfg(buf, family, @@ -6240,8 +6520,11 @@ reset = true; if (reset) { - char *buf = kasprintf(GFP_ATOMIC, "%s:%u", - table->name, net->nft.base_seq); + const struct nftables_pernet *nft_net; + char *buf; + + nft_net = net_generic(net, nf_tables_net_id); + buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, nft_net->base_seq); audit_log_nfcfg(buf, family, @@ -6326,10 +6609,11 @@ struct nft_object *obj, u32 portid, u32 seq, int event, int family, int report, gfp_t gfp) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct sk_buff *skb; int err; char *buf = kasprintf(gfp, "%s:%u", - table->name, net->nft.base_seq); + table->name, nft_net->base_seq); audit_log_nfcfg(buf, family, @@ -6355,7 +6639,7 @@ goto err; } - nft_notify_enqueue(skb, report, &net->nft.notify_list); + nft_notify_enqueue(skb, report, &nft_net->notify_list); return; err: nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -6417,10 +6701,11 @@ enum nft_trans_phase phase) { switch (phase) { + case NFT_TRANS_PREPARE_ERROR: case NFT_TRANS_PREPARE: case NFT_TRANS_ABORT: case NFT_TRANS_RELEASE: - flowtable->use--; + nft_use_dec(&flowtable->use); fallthrough; default: return; @@ -6560,13 +6845,25 @@ FLOW_BLOCK_UNBIND); } +static void __nft_unregister_flowtable_net_hooks(struct net *net, + struct list_head *hook_list, + bool release_netdev) +{ + struct nft_hook *hook, *next; + + list_for_each_entry_safe(hook, next, hook_list, list) { + nf_unregister_net_hook(net, &hook->ops); + if (release_netdev) { + list_del(&hook->list); + kfree_rcu(hook, rcu); + } + } +} + static void nft_unregister_flowtable_net_hooks(struct net *net, struct list_head *hook_list) { - struct nft_hook *hook; - - list_for_each_entry(hook, hook_list, list) - nf_unregister_net_hook(net, &hook->ops); + __nft_unregister_flowtable_net_hooks(net, hook_list, false); } static int nft_register_flowtable_net_hooks(struct net *net, @@ -6691,7 +6988,7 @@ INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans)); list_splice(&flowtable_hook.list, &nft_trans_flowtable_hooks(trans)); - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; @@ -6757,9 +7054,14 @@ nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); + if (!nft_use_inc(&table->use)) + return -EMFILE; + flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL); - if (!flowtable) - return -ENOMEM; + if (!flowtable) { + err = -ENOMEM; + goto flowtable_alloc; + } flowtable->table = table; flowtable->handle = nf_tables_alloc_handle(table); @@ -6814,7 +7116,6 @@ goto err5; list_add_tail_rcu(&flowtable->list, &table->flowtables); - table->use++; return 0; err5: @@ -6831,6 +7132,9 @@ kfree(flowtable->name); err1: kfree(flowtable); +flowtable_alloc: + nft_use_dec_restore(&table->use); + return err; } @@ -6881,7 +7185,7 @@ list_splice(&flowtable_del_list, &nft_trans_flowtable_hooks(trans)); nft_flowtable_hook_release(&flowtable_hook); - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; @@ -7007,12 +7311,14 @@ struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; struct nft_flowtable *flowtable; + struct nftables_pernet *nft_net; const struct nft_table *table; rcu_read_lock(); - cb->seq = net->nft.base_seq; + nft_net = net_generic(net, nf_tables_net_id); + cb->seq = nft_net->base_seq; - list_for_each_entry_rcu(table, &net->nft.tables, list) { + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) continue; @@ -7147,6 +7453,7 @@ struct list_head *hook_list, int event) { + struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id); struct sk_buff *skb; int err; @@ -7166,7 +7473,7 @@ goto err; } - nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); + nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -7191,6 +7498,7 @@ static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net, u32 portid, u32 seq) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nlmsghdr *nlh; char buf[TASK_COMM_LEN]; int event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWGEN); @@ -7200,7 +7508,7 @@ if (!nlh) goto nla_put_failure; - if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)) || + if (nla_put_be32(skb, NFTA_GEN_ID, htonl(nft_net->base_seq)) || nla_put_be32(skb, NFTA_GEN_PROC_PID, htonl(task_pid_nr(current))) || nla_put_string(skb, NFTA_GEN_PROC_NAME, get_task_comm(buf, current))) goto nla_put_failure; @@ -7235,6 +7543,7 @@ { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct nft_flowtable *flowtable; + struct nftables_pernet *nft_net; struct nft_table *table; struct net *net; @@ -7242,13 +7551,14 @@ return 0; net = dev_net(dev); - mutex_lock(&net->nft.commit_mutex); - list_for_each_entry(table, &net->nft.tables, list) { + nft_net = net_generic(net, nf_tables_net_id); + mutex_lock(&nft_net->commit_mutex); + list_for_each_entry(table, &nft_net->tables, list) { list_for_each_entry(flowtable, &table->flowtables, list) { nft_flowtable_event(event, dev, flowtable); } } - mutex_unlock(&net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); return NOTIFY_DONE; } @@ -7429,19 +7739,22 @@ static int nf_tables_validate(struct net *net) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_table *table; - switch (net->nft.validate_state) { + switch (nft_net->validate_state) { case NFT_VALIDATE_SKIP: break; case NFT_VALIDATE_NEED: nft_validate_state_update(net, NFT_VALIDATE_DO); fallthrough; case NFT_VALIDATE_DO: - list_for_each_entry(table, &net->nft.tables, list) { + list_for_each_entry(table, &nft_net->tables, list) { if (nft_table_validate(net, table) < 0) return -EAGAIN; } + + nft_validate_state_update(net, NFT_VALIDATE_SKIP); break; } @@ -7569,7 +7882,7 @@ synchronize_rcu(); list_for_each_entry_safe(trans, next, &head, list) { - list_del(&trans->list); + nft_trans_list_del(trans); nft_commit_release(trans); } } @@ -7613,9 +7926,10 @@ static void nf_tables_commit_chain_prepare_cancel(struct net *net) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_trans *trans, *next; - list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { + list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { struct nft_chain *chain = trans->ctx.chain; if (trans->msg_type == NFT_MSG_NEWRULE || @@ -7711,12 +8025,197 @@ list_del_rcu(&chain->list); } +static void nft_trans_gc_setelem_remove(struct nft_ctx *ctx, + struct nft_trans_gc *trans) +{ + void **priv = trans->priv; + unsigned int i; + + for (i = 0; i < trans->count; i++) { + struct nft_set_elem elem = { + .priv = priv[i], + }; + + nft_setelem_data_deactivate(ctx->net, trans->set, &elem); + trans->set->ops->remove(trans->net, trans->set, &elem); + } +} + +void nft_trans_gc_destroy(struct nft_trans_gc *trans) +{ + nft_set_put(trans->set); + put_net(trans->net); + kfree(trans); +} + +static void nft_trans_gc_trans_free(struct rcu_head *rcu) +{ + struct nft_set_elem elem = {}; + struct nft_trans_gc *trans; + struct nft_ctx ctx = {}; + unsigned int i; + + trans = container_of(rcu, struct nft_trans_gc, rcu); + ctx.net = read_pnet(&trans->set->net); + + for (i = 0; i < trans->count; i++) { + elem.priv = trans->priv[i]; + atomic_dec(&trans->set->nelems); + + nf_tables_set_elem_destroy(&ctx, trans->set, elem.priv); + } + + nft_trans_gc_destroy(trans); +} + +static bool nft_trans_gc_work_done(struct nft_trans_gc *trans) +{ + struct nftables_pernet *nft_net; + struct nft_ctx ctx = {}; + + nft_net = net_generic(trans->net, nf_tables_net_id); + + mutex_lock(&nft_net->commit_mutex); + + /* Check for race with transaction, otherwise this batch refers to + * stale objects that might not be there anymore. Skip transaction if + * set has been destroyed from control plane transaction in case gc + * worker loses race. + */ + if (READ_ONCE(nft_net->gc_seq) != trans->seq || trans->set->dead) { + mutex_unlock(&nft_net->commit_mutex); + return false; + } + + ctx.net = trans->net; + ctx.table = trans->set->table; + + nft_trans_gc_setelem_remove(&ctx, trans); + mutex_unlock(&nft_net->commit_mutex); + + return true; +} + +static void nft_trans_gc_work(struct work_struct *work) +{ + struct nft_trans_gc *trans, *next; + LIST_HEAD(trans_gc_list); + + spin_lock(&nf_tables_gc_list_lock); + list_splice_init(&nf_tables_gc_list, &trans_gc_list); + spin_unlock(&nf_tables_gc_list_lock); + + list_for_each_entry_safe(trans, next, &trans_gc_list, list) { + list_del(&trans->list); + if (!nft_trans_gc_work_done(trans)) { + nft_trans_gc_destroy(trans); + continue; + } + call_rcu(&trans->rcu, nft_trans_gc_trans_free); + } +} + +struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set, + unsigned int gc_seq, gfp_t gfp) +{ + struct net *net = read_pnet(&set->net); + struct nft_trans_gc *trans; + + trans = kzalloc(sizeof(*trans), gfp); + if (!trans) + return NULL; + + trans->net = maybe_get_net(net); + if (!trans->net) { + kfree(trans); + return NULL; + } + + refcount_inc(&set->refs); + trans->set = set; + trans->seq = gc_seq; + + return trans; +} + +void nft_trans_gc_elem_add(struct nft_trans_gc *trans, void *priv) +{ + trans->priv[trans->count++] = priv; +} + +static void nft_trans_gc_queue_work(struct nft_trans_gc *trans) +{ + spin_lock(&nf_tables_gc_list_lock); + list_add_tail(&trans->list, &nf_tables_gc_list); + spin_unlock(&nf_tables_gc_list_lock); + + schedule_work(&trans_gc_work); +} + +static int nft_trans_gc_space(struct nft_trans_gc *trans) +{ + return NFT_TRANS_GC_BATCHCOUNT - trans->count; +} + +struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc, + unsigned int gc_seq, gfp_t gfp) +{ + struct nft_set *set; + + if (nft_trans_gc_space(gc)) + return gc; + + set = gc->set; + nft_trans_gc_queue_work(gc); + + return nft_trans_gc_alloc(set, gc_seq, gfp); +} + +void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans) +{ + if (trans->count == 0) { + nft_trans_gc_destroy(trans); + return; + } + + nft_trans_gc_queue_work(trans); +} + +struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp) +{ + struct nft_set *set; + + if (WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net))) + return NULL; + + if (nft_trans_gc_space(gc)) + return gc; + + set = gc->set; + call_rcu(&gc->rcu, nft_trans_gc_trans_free); + + return nft_trans_gc_alloc(set, 0, gfp); +} + +void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans) +{ + WARN_ON_ONCE(!lockdep_commit_lock_is_held(trans->net)); + + if (trans->count == 0) { + nft_trans_gc_destroy(trans); + return; + } + + call_rcu(&trans->rcu, nft_trans_gc_trans_free); +} + static void nf_tables_module_autoload_cleanup(struct net *net) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_module_request *req, *next; - WARN_ON_ONCE(!list_empty(&net->nft.commit_list)); - list_for_each_entry_safe(req, next, &net->nft.module_list, list) { + WARN_ON_ONCE(!list_empty(&nft_net->commit_list)); + list_for_each_entry_safe(req, next, &nft_net->module_list, list) { WARN_ON_ONCE(!req->done); list_del(&req->list); kfree(req); @@ -7725,6 +8224,7 @@ static void nf_tables_commit_release(struct net *net) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_trans *trans; /* all side effects have to be made visible. @@ -7734,35 +8234,36 @@ * Memory reclaim happens asynchronously from work queue * to prevent expensive synchronize_rcu() in commit phase. */ - if (list_empty(&net->nft.commit_list)) { + if (list_empty(&nft_net->commit_list)) { nf_tables_module_autoload_cleanup(net); - mutex_unlock(&net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); return; } - trans = list_last_entry(&net->nft.commit_list, + trans = list_last_entry(&nft_net->commit_list, struct nft_trans, list); get_net(trans->ctx.net); WARN_ON_ONCE(trans->put_net); trans->put_net = true; spin_lock(&nf_tables_destroy_list_lock); - list_splice_tail_init(&net->nft.commit_list, &nf_tables_destroy_list); + list_splice_tail_init(&nft_net->commit_list, &nf_tables_destroy_list); spin_unlock(&nf_tables_destroy_list_lock); nf_tables_module_autoload_cleanup(net); schedule_work(&trans_destroy_work); - mutex_unlock(&net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); } static void nft_commit_notify(struct net *net, u32 portid) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct sk_buff *batch_skb = NULL, *nskb, *skb; unsigned char *data; int len; - list_for_each_entry_safe(skb, nskb, &net->nft.notify_list, list) { + list_for_each_entry_safe(skb, nskb, &nft_net->notify_list, list) { if (!batch_skb) { new_batch: batch_skb = skb; @@ -7788,7 +8289,7 @@ NFT_CB(batch_skb).report, GFP_KERNEL); } - WARN_ON_ONCE(!list_empty(&net->nft.notify_list)); + WARN_ON_ONCE(!list_empty(&nft_net->notify_list)); } static int nf_tables_commit_audit_alloc(struct list_head *adl, @@ -7852,18 +8353,71 @@ } } +static void nft_set_commit_update(struct list_head *set_update_list) +{ + struct nft_set *set, *next; + + list_for_each_entry_safe(set, next, set_update_list, pending_update) { + list_del_init(&set->pending_update); + + if (!set->ops->commit) + continue; + + set->ops->commit(set); + } +} + +static unsigned int nft_gc_seq_begin(struct nftables_pernet *nft_net) +{ + unsigned int gc_seq; + + /* Bump gc counter, it becomes odd, this is the busy mark. */ + gc_seq = READ_ONCE(nft_net->gc_seq); + WRITE_ONCE(nft_net->gc_seq, ++gc_seq); + + return gc_seq; +} + +static void nft_gc_seq_end(struct nftables_pernet *nft_net, unsigned int gc_seq) +{ + WRITE_ONCE(nft_net->gc_seq, ++gc_seq); +} + static int nf_tables_commit(struct net *net, struct sk_buff *skb) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_trans *trans, *next; + LIST_HEAD(set_update_list); struct nft_trans_elem *te; struct nft_chain *chain; struct nft_table *table; + unsigned int gc_seq; LIST_HEAD(adl); int err; - if (list_empty(&net->nft.commit_list)) { - mutex_unlock(&net->nft.commit_mutex); + if (list_empty(&nft_net->commit_list)) { + mutex_unlock(&nft_net->commit_mutex); return 0; + } + + list_for_each_entry(trans, &nft_net->binding_list, binding_list) { + switch (trans->msg_type) { + case NFT_MSG_NEWSET: + if (nft_set_is_anonymous(nft_trans_set(trans)) && + !nft_trans_set_bound(trans)) { + pr_warn_once("nftables ruleset with unbound set\n"); + return -EINVAL; + } + break; + case NFT_MSG_NEWCHAIN: + if (!nft_trans_chain_update(trans) && + nft_chain_binding(nft_trans_chain(trans)) && + !nft_trans_chain_bound(trans)) { + pr_warn_once("nftables ruleset with unbound chain\n"); + return -EINVAL; + } + break; + } } /* 0. Validate ruleset, otherwise roll back for error reporting. */ @@ -7875,7 +8429,7 @@ return err; /* 1. Allocate space for next generation rules_gen_X[] */ - list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { + list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { int ret; ret = nf_tables_commit_audit_alloc(&adl, trans->ctx.table); @@ -7898,7 +8452,7 @@ } /* step 2. Make rules_gen_X visible to packet path */ - list_for_each_entry(table, &net->nft.tables, list) { + list_for_each_entry(table, &nft_net->tables, list) { list_for_each_entry(chain, &table->chains, list) nf_tables_commit_chain(net, chain); } @@ -7907,12 +8461,15 @@ * Bump generation counter, invalidate any dump in progress. * Cannot fail after this point. */ - while (++net->nft.base_seq == 0); + while (++nft_net->base_seq == 0) + ; + + gc_seq = nft_gc_seq_begin(nft_net); /* step 3. Start new generation, rules_gen_X now in use. */ net->nft.gencursor = nft_gencursor_next(net); - list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { + list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { nf_tables_commit_audit_collect(&adl, trans->ctx.table, trans->msg_type); switch (trans->msg_type) { @@ -7981,13 +8538,14 @@ */ if (nft_set_is_anonymous(nft_trans_set(trans)) && !list_empty(&nft_trans_set(trans)->bindings)) - trans->ctx.table->use--; + nft_use_dec(&trans->ctx.table->use); nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), NFT_MSG_NEWSET, GFP_KERNEL); nft_trans_destroy(trans); break; case NFT_MSG_DELSET: + nft_trans_set(trans)->dead = 1; list_del_rcu(&nft_trans_set(trans)->list); nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), NFT_MSG_DELSET, GFP_KERNEL); @@ -7999,6 +8557,11 @@ nf_tables_setelem_notify(&trans->ctx, te->set, &te->elem, NFT_MSG_NEWSETELEM, 0); + if (te->set->ops->commit && + list_empty(&te->set->pending_update)) { + list_add_tail(&te->set->pending_update, + &set_update_list); + } nft_trans_destroy(trans); break; case NFT_MSG_DELSETELEM: @@ -8010,6 +8573,11 @@ te->set->ops->remove(net, te->set, &te->elem); atomic_dec(&te->set->nelems); te->set->ndeact--; + if (te->set->ops->commit && + list_empty(&te->set->pending_update)) { + list_add_tail(&te->set->pending_update, + &set_update_list); + } break; case NFT_MSG_NEWOBJ: if (nft_trans_obj_update(trans)) { @@ -8070,9 +8638,13 @@ } } + nft_set_commit_update(&set_update_list); + nft_commit_notify(net, NETLINK_CB(skb).portid); nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); - nf_tables_commit_audit_log(&adl, net->nft.base_seq); + nf_tables_commit_audit_log(&adl, nft_net->base_seq); + + nft_gc_seq_end(nft_net, gc_seq); nf_tables_commit_release(net); return 0; @@ -8080,17 +8652,18 @@ static void nf_tables_module_autoload(struct net *net) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_module_request *req, *next; LIST_HEAD(module_list); - list_splice_init(&net->nft.module_list, &module_list); - mutex_unlock(&net->nft.commit_mutex); + list_splice_init(&nft_net->module_list, &module_list); + mutex_unlock(&nft_net->commit_mutex); list_for_each_entry_safe(req, next, &module_list, list) { request_module("%s", req->module); req->done = true; } - mutex_lock(&net->nft.commit_mutex); - list_splice(&module_list, &net->nft.module_list); + mutex_lock(&nft_net->commit_mutex); + list_splice(&module_list, &nft_net->module_list); } static void nf_tables_abort_release(struct nft_trans *trans) @@ -8125,16 +8698,32 @@ kfree(trans); } +static void nft_set_abort_update(struct list_head *set_update_list) +{ + struct nft_set *set, *next; + + list_for_each_entry_safe(set, next, set_update_list, pending_update) { + list_del_init(&set->pending_update); + + if (!set->ops->abort) + continue; + + set->ops->abort(set); + } +} + static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_trans *trans, *next; + LIST_HEAD(set_update_list); struct nft_trans_elem *te; if (action == NFNL_ABORT_VALIDATE && nf_tables_validate(net) < 0) return -EAGAIN; - list_for_each_entry_safe_reverse(trans, next, &net->nft.commit_list, + list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list, list) { switch (trans->msg_type) { case NFT_MSG_NEWTABLE: @@ -8159,11 +8748,11 @@ kfree(nft_trans_chain_name(trans)); nft_trans_destroy(trans); } else { - if (nft_chain_is_bound(trans->ctx.chain)) { + if (nft_trans_chain_bound(trans)) { nft_trans_destroy(trans); break; } - trans->ctx.table->use--; + nft_use_dec_restore(&trans->ctx.table->use); nft_chain_del(trans->ctx.chain); nf_tables_unregister_hook(trans->ctx.net, trans->ctx.table, @@ -8171,25 +8760,29 @@ } break; case NFT_MSG_DELCHAIN: - trans->ctx.table->use++; + nft_use_inc_restore(&trans->ctx.table->use); nft_clear(trans->ctx.net, trans->ctx.chain); nft_trans_destroy(trans); break; case NFT_MSG_NEWRULE: - trans->ctx.chain->use--; + if (nft_trans_rule_bound(trans)) { + nft_trans_destroy(trans); + break; + } + nft_use_dec_restore(&trans->ctx.chain->use); list_del_rcu(&nft_trans_rule(trans)->list); nft_rule_expr_deactivate(&trans->ctx, nft_trans_rule(trans), NFT_TRANS_ABORT); break; case NFT_MSG_DELRULE: - trans->ctx.chain->use++; + nft_use_inc_restore(&trans->ctx.chain->use); nft_clear(trans->ctx.net, nft_trans_rule(trans)); nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans)); nft_trans_destroy(trans); break; case NFT_MSG_NEWSET: - trans->ctx.table->use--; + nft_use_dec_restore(&trans->ctx.table->use); if (nft_trans_set_bound(trans)) { nft_trans_destroy(trans); break; @@ -8197,8 +8790,11 @@ list_del_rcu(&nft_trans_set(trans)->list); break; case NFT_MSG_DELSET: - trans->ctx.table->use++; + nft_use_inc_restore(&trans->ctx.table->use); nft_clear(trans->ctx.net, nft_trans_set(trans)); + if (nft_trans_set(trans)->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_activate(&trans->ctx, nft_trans_set(trans)); + nft_trans_destroy(trans); break; case NFT_MSG_NEWSETELEM: @@ -8209,14 +8805,25 @@ te = (struct nft_trans_elem *)trans->data; te->set->ops->remove(net, te->set, &te->elem); atomic_dec(&te->set->nelems); + + if (te->set->ops->abort && + list_empty(&te->set->pending_update)) { + list_add_tail(&te->set->pending_update, + &set_update_list); + } break; case NFT_MSG_DELSETELEM: te = (struct nft_trans_elem *)trans->data; - nft_set_elem_activate(net, te->set, &te->elem); + nft_setelem_data_activate(net, te->set, &te->elem); te->set->ops->activate(net, te->set, &te->elem); te->set->ndeact--; + if (te->set->ops->abort && + list_empty(&te->set->pending_update)) { + list_add_tail(&te->set->pending_update, + &set_update_list); + } nft_trans_destroy(trans); break; case NFT_MSG_NEWOBJ: @@ -8224,12 +8831,12 @@ nft_obj_destroy(&trans->ctx, nft_trans_obj_newobj(trans)); nft_trans_destroy(trans); } else { - trans->ctx.table->use--; + nft_use_dec_restore(&trans->ctx.table->use); nft_obj_del(nft_trans_obj(trans)); } break; case NFT_MSG_DELOBJ: - trans->ctx.table->use++; + nft_use_inc_restore(&trans->ctx.table->use); nft_clear(trans->ctx.net, nft_trans_obj(trans)); nft_trans_destroy(trans); break; @@ -8238,7 +8845,7 @@ nft_unregister_flowtable_net_hooks(net, &nft_trans_flowtable_hooks(trans)); } else { - trans->ctx.table->use--; + nft_use_dec_restore(&trans->ctx.table->use); list_del_rcu(&nft_trans_flowtable(trans)->list); nft_unregister_flowtable_net_hooks(net, &nft_trans_flowtable(trans)->hook_list); @@ -8249,7 +8856,7 @@ list_splice(&nft_trans_flowtable_hooks(trans), &nft_trans_flowtable(trans)->hook_list); } else { - trans->ctx.table->use++; + nft_use_inc_restore(&trans->ctx.table->use); nft_clear(trans->ctx.net, nft_trans_flowtable(trans)); } nft_trans_destroy(trans); @@ -8257,11 +8864,13 @@ } } + nft_set_abort_update(&set_update_list); + synchronize_rcu(); list_for_each_entry_safe_reverse(trans, next, - &net->nft.commit_list, list) { - list_del(&trans->list); + &nft_net->commit_list, list) { + nft_trans_list_del(trans); nf_tables_abort_release(trans); } @@ -8273,30 +8882,32 @@ return 0; } -static void nf_tables_cleanup(struct net *net) -{ - nft_validate_state_update(net, NFT_VALIDATE_SKIP); -} - static int nf_tables_abort(struct net *net, struct sk_buff *skb, enum nfnl_abort_action action) { - int ret = __nf_tables_abort(net, action); + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + unsigned int gc_seq; + int ret; - mutex_unlock(&net->nft.commit_mutex); + gc_seq = nft_gc_seq_begin(nft_net); + ret = __nf_tables_abort(net, action); + nft_gc_seq_end(nft_net, gc_seq); + + mutex_unlock(&nft_net->commit_mutex); return ret; } static bool nf_tables_valid_genid(struct net *net, u32 genid) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); bool genid_ok; - mutex_lock(&net->nft.commit_mutex); + mutex_lock(&nft_net->commit_mutex); - genid_ok = genid == 0 || net->nft.base_seq == genid; + genid_ok = genid == 0 || nft_net->base_seq == genid; if (!genid_ok) - mutex_unlock(&net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); /* else, commit mutex has to be released by commit or abort function */ return genid_ok; @@ -8309,7 +8920,6 @@ .cb = nf_tables_cb, .commit = nf_tables_commit, .abort = nf_tables_abort, - .cleanup = nf_tables_cleanup, .valid_genid = nf_tables_valid_genid, .owner = THIS_MODULE, }; @@ -8472,28 +9082,24 @@ } EXPORT_SYMBOL_GPL(nft_parse_u32_check); -/** - * nft_parse_register - parse a register value from a netlink attribute - * - * @attr: netlink attribute - * - * Parse and translate a register value from a netlink attribute. - * Registers used to be 128 bit wide, these register numbers will be - * mapped to the corresponding 32 bit register numbers. - */ -unsigned int nft_parse_register(const struct nlattr *attr) +static int nft_parse_register(const struct nlattr *attr, u32 *preg) { unsigned int reg; reg = ntohl(nla_get_be32(attr)); switch (reg) { case NFT_REG_VERDICT...NFT_REG_4: - return reg * NFT_REG_SIZE / NFT_REG32_SIZE; + *preg = reg * NFT_REG_SIZE / NFT_REG32_SIZE; + break; + case NFT_REG32_00...NFT_REG32_15: + *preg = reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00; + break; default: - return reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00; + return -ERANGE; } + + return 0; } -EXPORT_SYMBOL_GPL(nft_parse_register); /** * nft_dump_register - dump a register value to a netlink attribute @@ -8543,7 +9149,10 @@ u32 reg; int err; - reg = nft_parse_register(attr); + err = nft_parse_register(attr, ®); + if (err < 0) + return err; + err = nft_validate_register_load(reg, len); if (err < 0) return err; @@ -8612,7 +9221,10 @@ int err; u32 reg; - reg = nft_parse_register(attr); + err = nft_parse_register(attr, ®); + if (err < 0) + return err; + err = nft_validate_register_store(ctx, reg, data, type, len); if (err < 0) return err; @@ -8644,6 +9256,9 @@ if (!tb[NFTA_VERDICT_CODE]) return -EINVAL; + + /* zero padding hole for memcmp */ + memset(data, 0, sizeof(*data)); data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE])); switch (data->verdict.code) { @@ -8669,7 +9284,8 @@ genmask); } else if (tb[NFTA_VERDICT_CHAIN_ID]) { chain = nft_chain_lookup_byid(ctx->net, ctx->table, - tb[NFTA_VERDICT_CHAIN_ID]); + tb[NFTA_VERDICT_CHAIN_ID], + genmask); if (IS_ERR(chain)) return PTR_ERR(chain); } else { @@ -8685,8 +9301,9 @@ if (desc->flags & NFT_DATA_DESC_SETELEM && chain->flags & NFT_CHAIN_BINDING) return -EINVAL; + if (!nft_use_inc(&chain->use)) + return -EMFILE; - chain->use++; data->verdict.chain = chain; break; } @@ -8699,22 +9316,12 @@ static void nft_verdict_uninit(const struct nft_data *data) { struct nft_chain *chain; - struct nft_rule *rule; switch (data->verdict.code) { case NFT_JUMP: case NFT_GOTO: chain = data->verdict.chain; - chain->use--; - - if (!nft_chain_is_bound(chain)) - break; - - chain->table->use--; - list_for_each_entry(rule, &chain->rules, list) - chain->use--; - - nft_chain_del(chain); + nft_use_dec(&chain->use); break; } } @@ -8883,32 +9490,41 @@ nf_tables_unregister_hook(ctx->net, ctx->chain->table, ctx->chain); list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) { list_del(&rule->list); - ctx->chain->use--; + nft_use_dec(&ctx->chain->use); nf_tables_rule_release(ctx, rule); } nft_chain_del(ctx->chain); - ctx->table->use--; + nft_use_dec(&ctx->table->use); nf_tables_chain_destroy(ctx); return 0; } EXPORT_SYMBOL_GPL(__nft_release_basechain); -static void __nft_release_hooks(struct net *net) +static void __nft_release_hook(struct net *net, struct nft_table *table) { - struct nft_table *table; + struct nft_flowtable *flowtable; struct nft_chain *chain; - list_for_each_entry(table, &net->nft.tables, list) { - list_for_each_entry(chain, &table->chains, list) - nf_tables_unregister_hook(net, table, chain); - } + list_for_each_entry(chain, &table->chains, list) + __nf_tables_unregister_hook(net, table, chain, true); + list_for_each_entry(flowtable, &table->flowtables, list) + __nft_unregister_flowtable_net_hooks(net, &flowtable->hook_list, + true); } -static void __nft_release_tables(struct net *net) +static void __nft_release_hooks(struct net *net) +{ + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + struct nft_table *table; + + list_for_each_entry(table, &nft_net->tables, list) + __nft_release_hook(net, table); +} + +static void __nft_release_table(struct net *net, struct nft_table *table) { struct nft_flowtable *flowtable, *nf; - struct nft_table *table, *nt; struct nft_chain *chain, *nc; struct nft_object *obj, *ne; struct nft_rule *rule, *nr; @@ -8918,77 +9534,115 @@ .family = NFPROTO_NETDEV, }; - list_for_each_entry_safe(table, nt, &net->nft.tables, list) { - ctx.family = table->family; - ctx.table = table; - list_for_each_entry(chain, &table->chains, list) { - ctx.chain = chain; - list_for_each_entry_safe(rule, nr, &chain->rules, list) { - list_del(&rule->list); - chain->use--; - nf_tables_rule_release(&ctx, rule); - } + ctx.family = table->family; + ctx.table = table; + list_for_each_entry(chain, &table->chains, list) { + if (nft_chain_binding(chain)) + continue; + + ctx.chain = chain; + list_for_each_entry_safe(rule, nr, &chain->rules, list) { + list_del(&rule->list); + nft_use_dec(&chain->use); + nf_tables_rule_release(&ctx, rule); } - list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) { - list_del(&flowtable->list); - table->use--; - nf_tables_flowtable_destroy(flowtable); - } - list_for_each_entry_safe(set, ns, &table->sets, list) { - list_del(&set->list); - table->use--; - nft_set_destroy(&ctx, set); - } - list_for_each_entry_safe(obj, ne, &table->objects, list) { - nft_obj_del(obj); - table->use--; - nft_obj_destroy(&ctx, obj); - } - list_for_each_entry_safe(chain, nc, &table->chains, list) { - ctx.chain = chain; - nft_chain_del(chain); - table->use--; - nf_tables_chain_destroy(&ctx); - } - list_del(&table->list); - nf_tables_table_destroy(&ctx); } + list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) { + list_del(&flowtable->list); + nft_use_dec(&table->use); + nf_tables_flowtable_destroy(flowtable); + } + list_for_each_entry_safe(set, ns, &table->sets, list) { + list_del(&set->list); + nft_use_dec(&table->use); + if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_deactivate(&ctx, set); + + nft_set_destroy(&ctx, set); + } + list_for_each_entry_safe(obj, ne, &table->objects, list) { + nft_obj_del(obj); + nft_use_dec(&table->use); + nft_obj_destroy(&ctx, obj); + } + list_for_each_entry_safe(chain, nc, &table->chains, list) { + ctx.chain = chain; + nft_chain_del(chain); + nft_use_dec(&table->use); + nf_tables_chain_destroy(&ctx); + } + list_del(&table->list); + nf_tables_table_destroy(&ctx); +} + +static void __nft_release_tables(struct net *net) +{ + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + struct nft_table *table, *nt; + + list_for_each_entry_safe(table, nt, &nft_net->tables, list) + __nft_release_table(net, table); } static int __net_init nf_tables_init_net(struct net *net) { - INIT_LIST_HEAD(&net->nft.tables); - INIT_LIST_HEAD(&net->nft.commit_list); - INIT_LIST_HEAD(&net->nft.module_list); - INIT_LIST_HEAD(&net->nft.notify_list); - mutex_init(&net->nft.commit_mutex); - net->nft.base_seq = 1; - net->nft.validate_state = NFT_VALIDATE_SKIP; + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + + INIT_LIST_HEAD(&nft_net->tables); + INIT_LIST_HEAD(&nft_net->commit_list); + INIT_LIST_HEAD(&nft_net->binding_list); + INIT_LIST_HEAD(&nft_net->module_list); + INIT_LIST_HEAD(&nft_net->notify_list); + mutex_init(&nft_net->commit_mutex); + nft_net->base_seq = 1; + nft_net->validate_state = NFT_VALIDATE_SKIP; + nft_net->gc_seq = 0; return 0; } static void __net_exit nf_tables_pre_exit_net(struct net *net) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + + mutex_lock(&nft_net->commit_mutex); __nft_release_hooks(net); + mutex_unlock(&nft_net->commit_mutex); } static void __net_exit nf_tables_exit_net(struct net *net) { - mutex_lock(&net->nft.commit_mutex); - if (!list_empty(&net->nft.commit_list)) + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + unsigned int gc_seq; + + mutex_lock(&nft_net->commit_mutex); + + gc_seq = nft_gc_seq_begin(nft_net); + + if (!list_empty(&nft_net->commit_list)) __nf_tables_abort(net, NFNL_ABORT_NONE); __nft_release_tables(net); - mutex_unlock(&net->nft.commit_mutex); - WARN_ON_ONCE(!list_empty(&net->nft.tables)); - WARN_ON_ONCE(!list_empty(&net->nft.module_list)); - WARN_ON_ONCE(!list_empty(&net->nft.notify_list)); + + nft_gc_seq_end(nft_net, gc_seq); + + mutex_unlock(&nft_net->commit_mutex); + WARN_ON_ONCE(!list_empty(&nft_net->tables)); + WARN_ON_ONCE(!list_empty(&nft_net->module_list)); + WARN_ON_ONCE(!list_empty(&nft_net->notify_list)); +} + +static void nf_tables_exit_batch(struct list_head *net_exit_list) +{ + flush_work(&trans_gc_work); } static struct pernet_operations nf_tables_net_ops = { .init = nf_tables_init_net, .pre_exit = nf_tables_pre_exit_net, .exit = nf_tables_exit_net, + .exit_batch = nf_tables_exit_batch, + .id = &nf_tables_net_id, + .size = sizeof(struct nftables_pernet), }; static int __init nf_tables_module_init(void) @@ -9051,6 +9705,7 @@ nft_chain_filter_fini(); nft_chain_route_fini(); unregister_pernet_subsys(&nf_tables_net_ops); + cancel_work_sync(&trans_gc_work); cancel_work_sync(&trans_destroy_work); rcu_barrier(); rhltable_destroy(&nft_objname_ht); -- Gitblit v1.6.2