hc
2023-12-08 01573e231f18eb2d99162747186f59511f56b64d
kernel/net/openvswitch/datapath.c
....@@ -1,19 +1,6 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Copyright (c) 2007-2014 Nicira, Inc.
3
- *
4
- * This program is free software; you can redistribute it and/or
5
- * modify it under the terms of version 2 of the GNU General Public
6
- * License as published by the Free Software Foundation.
7
- *
8
- * This program is distributed in the hope that it will be useful, but
9
- * WITHOUT ANY WARRANTY; without even the implied warranty of
10
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
- * General Public License for more details.
12
- *
13
- * You should have received a copy of the GNU General Public License
14
- * along with this program; if not, write to the Free Software
15
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16
- * 02110-1301, USA
174 */
185
196 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
....@@ -143,6 +130,8 @@
143130 const struct dp_upcall_info *,
144131 uint32_t cutlen);
145132
133
+static void ovs_dp_masks_rebalance(struct work_struct *work);
134
+
146135 /* Must be called with rcu_read_lock or ovs_mutex. */
147136 const char *ovs_dp_name(const struct datapath *dp)
148137 {
....@@ -192,7 +181,8 @@
192181 struct hlist_head *head;
193182
194183 head = vport_hash_bucket(dp, port_no);
195
- hlist_for_each_entry_rcu(vport, head, dp_hash_node) {
184
+ hlist_for_each_entry_rcu(vport, head, dp_hash_node,
185
+ lockdep_ovsl_is_held()) {
196186 if (vport->port_no == port_no)
197187 return vport;
198188 }
....@@ -235,31 +225,43 @@
235225 struct dp_stats_percpu *stats;
236226 u64 *stats_counter;
237227 u32 n_mask_hit;
228
+ u32 n_cache_hit;
229
+ int error;
238230
239231 stats = this_cpu_ptr(dp->stats_percpu);
240232
241233 /* Look up flow. */
242
- flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit);
234
+ flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
235
+ &n_mask_hit, &n_cache_hit);
243236 if (unlikely(!flow)) {
244237 struct dp_upcall_info upcall;
245
- int error;
246238
247239 memset(&upcall, 0, sizeof(upcall));
248240 upcall.cmd = OVS_PACKET_CMD_MISS;
249241 upcall.portid = ovs_vport_find_upcall_portid(p, skb);
250242 upcall.mru = OVS_CB(skb)->mru;
251243 error = ovs_dp_upcall(dp, skb, key, &upcall, 0);
252
- if (unlikely(error))
253
- kfree_skb(skb);
254
- else
244
+ switch (error) {
245
+ case 0:
246
+ case -EAGAIN:
247
+ case -ERESTARTSYS:
248
+ case -EINTR:
255249 consume_skb(skb);
250
+ break;
251
+ default:
252
+ kfree_skb(skb);
253
+ break;
254
+ }
256255 stats_counter = &stats->n_missed;
257256 goto out;
258257 }
259258
260259 ovs_flow_stats_update(flow, key->tp.flags, skb);
261260 sf_acts = rcu_dereference(flow->sf_acts);
262
- ovs_execute_actions(dp, skb, sf_acts, key);
261
+ error = ovs_execute_actions(dp, skb, sf_acts, key);
262
+ if (unlikely(error))
263
+ net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n",
264
+ ovs_dp_name(dp), error);
263265
264266 stats_counter = &stats->n_hit;
265267
....@@ -268,6 +270,7 @@
268270 u64_stats_update_begin(&stats->syncp);
269271 (*stats_counter)++;
270272 stats->n_mask_hit += n_mask_hit;
273
+ stats->n_cache_hit += n_cache_hit;
271274 u64_stats_update_end(&stats->syncp);
272275 }
273276
....@@ -306,14 +309,14 @@
306309 static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
307310 const struct sw_flow_key *key,
308311 const struct dp_upcall_info *upcall_info,
309
- uint32_t cutlen)
312
+ uint32_t cutlen)
310313 {
311314 unsigned int gso_type = skb_shinfo(skb)->gso_type;
312315 struct sw_flow_key later_key;
313316 struct sk_buff *segs, *nskb;
314317 int err;
315318
316
- BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_SGO_CB_OFFSET);
319
+ BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_GSO_CB_OFFSET);
317320 segs = __skb_gso_segment(skb, NETIF_F_SG, false);
318321 if (IS_ERR(segs))
319322 return PTR_ERR(segs);
....@@ -330,8 +333,7 @@
330333 }
331334
332335 /* Queue all of the segments. */
333
- skb = segs;
334
- do {
336
+ skb_list_walk_safe(segs, skb, nskb) {
335337 if (gso_type & SKB_GSO_UDP && skb != segs)
336338 key = &later_key;
337339
....@@ -339,17 +341,15 @@
339341 if (err)
340342 break;
341343
342
- } while ((skb = skb->next));
344
+ }
343345
344346 /* Free all of the segments. */
345
- skb = segs;
346
- do {
347
- nskb = skb->next;
347
+ skb_list_walk_safe(segs, skb, nskb) {
348348 if (err)
349349 kfree_skb(skb);
350350 else
351351 consume_skb(skb);
352
- } while ((skb = nskb));
352
+ }
353353 return err;
354354 }
355355
....@@ -359,7 +359,8 @@
359359 size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
360360 + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
361361 + nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */
362
- + nla_total_size(sizeof(unsigned int)); /* OVS_PACKET_ATTR_LEN */
362
+ + nla_total_size(sizeof(unsigned int)) /* OVS_PACKET_ATTR_LEN */
363
+ + nla_total_size(sizeof(u64)); /* OVS_PACKET_ATTR_HASH */
363364
364365 /* OVS_PACKET_ATTR_USERDATA */
365366 if (upcall_info->userdata)
....@@ -402,6 +403,7 @@
402403 size_t len;
403404 unsigned int hlen;
404405 int err, dp_ifindex;
406
+ u64 hash;
405407
406408 dp_ifindex = get_dpifindex(dp);
407409 if (!dp_ifindex)
....@@ -448,10 +450,15 @@
448450
449451 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
450452 0, upcall_info->cmd);
453
+ if (!upcall) {
454
+ err = -EINVAL;
455
+ goto out;
456
+ }
451457 upcall->dp_ifindex = dp_ifindex;
452458
453459 err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
454
- BUG_ON(err);
460
+ if (err)
461
+ goto out;
455462
456463 if (upcall_info->userdata)
457464 __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
....@@ -459,15 +466,26 @@
459466 nla_data(upcall_info->userdata));
460467
461468 if (upcall_info->egress_tun_info) {
462
- nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
469
+ nla = nla_nest_start_noflag(user_skb,
470
+ OVS_PACKET_ATTR_EGRESS_TUN_KEY);
471
+ if (!nla) {
472
+ err = -EMSGSIZE;
473
+ goto out;
474
+ }
463475 err = ovs_nla_put_tunnel_info(user_skb,
464476 upcall_info->egress_tun_info);
465
- BUG_ON(err);
477
+ if (err)
478
+ goto out;
479
+
466480 nla_nest_end(user_skb, nla);
467481 }
468482
469483 if (upcall_info->actions_len) {
470
- nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS);
484
+ nla = nla_nest_start_noflag(user_skb, OVS_PACKET_ATTR_ACTIONS);
485
+ if (!nla) {
486
+ err = -EMSGSIZE;
487
+ goto out;
488
+ }
471489 err = ovs_nla_put_actions(upcall_info->actions,
472490 upcall_info->actions_len,
473491 user_skb);
....@@ -478,23 +496,30 @@
478496 }
479497
480498 /* Add OVS_PACKET_ATTR_MRU */
481
- if (upcall_info->mru) {
482
- if (nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU,
483
- upcall_info->mru)) {
484
- err = -ENOBUFS;
485
- goto out;
486
- }
487
- pad_packet(dp, user_skb);
499
+ if (upcall_info->mru &&
500
+ nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU, upcall_info->mru)) {
501
+ err = -ENOBUFS;
502
+ goto out;
488503 }
489504
490505 /* Add OVS_PACKET_ATTR_LEN when packet is truncated */
491
- if (cutlen > 0) {
492
- if (nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN,
493
- skb->len)) {
494
- err = -ENOBUFS;
495
- goto out;
496
- }
497
- pad_packet(dp, user_skb);
506
+ if (cutlen > 0 &&
507
+ nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN, skb->len)) {
508
+ err = -ENOBUFS;
509
+ goto out;
510
+ }
511
+
512
+ /* Add OVS_PACKET_ATTR_HASH */
513
+ hash = skb_get_hash_raw(skb);
514
+ if (skb->sw_hash)
515
+ hash |= OVS_PACKET_HASH_SW_BIT;
516
+
517
+ if (skb->l4_hash)
518
+ hash |= OVS_PACKET_HASH_L4_BIT;
519
+
520
+ if (nla_put(user_skb, OVS_PACKET_ATTR_HASH, sizeof (u64), &hash)) {
521
+ err = -ENOBUFS;
522
+ goto out;
498523 }
499524
500525 /* Only reserve room for attribute header, packet data is added
....@@ -519,8 +544,9 @@
519544 out:
520545 if (err)
521546 skb_tx_error(skb);
522
- kfree_skb(user_skb);
523
- kfree_skb(nskb);
547
+ consume_skb(user_skb);
548
+ consume_skb(nskb);
549
+
524550 return err;
525551 }
526552
....@@ -536,6 +562,7 @@
536562 struct datapath *dp;
537563 struct vport *input_vport;
538564 u16 mru = 0;
565
+ u64 hash;
539566 int len;
540567 int err;
541568 bool log = !a[OVS_PACKET_ATTR_PROBE];
....@@ -560,6 +587,14 @@
560587 packet->ignore_df = 1;
561588 }
562589 OVS_CB(packet)->mru = mru;
590
+
591
+ if (a[OVS_PACKET_ATTR_HASH]) {
592
+ hash = nla_get_u64(a[OVS_PACKET_ATTR_HASH]);
593
+
594
+ __skb_set_hash(packet, hash & 0xFFFFFFFFULL,
595
+ !!(hash & OVS_PACKET_HASH_SW_BIT),
596
+ !!(hash & OVS_PACKET_HASH_L4_BIT));
597
+ }
563598
564599 /* Build an sw_flow for sending this packet. */
565600 flow = ovs_flow_alloc();
....@@ -622,12 +657,13 @@
622657 [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
623658 [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
624659 [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
660
+ [OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 },
625661 };
626662
627
-static const struct genl_ops dp_packet_genl_ops[] = {
663
+static const struct genl_small_ops dp_packet_genl_ops[] = {
628664 { .cmd = OVS_PACKET_CMD_EXECUTE,
665
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
629666 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
630
- .policy = packet_policy,
631667 .doit = ovs_packet_cmd_execute
632668 }
633669 };
....@@ -637,10 +673,11 @@
637673 .name = OVS_PACKET_FAMILY,
638674 .version = OVS_PACKET_VERSION,
639675 .maxattr = OVS_PACKET_ATTR_MAX,
676
+ .policy = packet_policy,
640677 .netnsok = true,
641678 .parallel_ops = true,
642
- .ops = dp_packet_genl_ops,
643
- .n_ops = ARRAY_SIZE(dp_packet_genl_ops),
679
+ .small_ops = dp_packet_genl_ops,
680
+ .n_small_ops = ARRAY_SIZE(dp_packet_genl_ops),
644681 .module = THIS_MODULE,
645682 };
646683
....@@ -672,6 +709,7 @@
672709 stats->n_missed += local_stats.n_missed;
673710 stats->n_lost += local_stats.n_lost;
674711 mega_stats->n_mask_hit += local_stats.n_mask_hit;
712
+ mega_stats->n_cache_hit += local_stats.n_cache_hit;
675713 }
676714 }
677715
....@@ -768,7 +806,7 @@
768806 * This can only fail for dump operations because the skb is always
769807 * properly sized for single flows.
770808 */
771
- start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS);
809
+ start = nla_nest_start_noflag(skb, OVS_FLOW_ATTR_ACTIONS);
772810 if (start) {
773811 const struct sw_flow_actions *sf_acts;
774812
....@@ -1050,11 +1088,12 @@
10501088 }
10511089
10521090 /* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
1053
-static struct sw_flow_actions *get_flow_actions(struct net *net,
1054
- const struct nlattr *a,
1055
- const struct sw_flow_key *key,
1056
- const struct sw_flow_mask *mask,
1057
- bool log)
1091
+static noinline_for_stack
1092
+struct sw_flow_actions *get_flow_actions(struct net *net,
1093
+ const struct nlattr *a,
1094
+ const struct sw_flow_key *key,
1095
+ const struct sw_flow_mask *mask,
1096
+ bool log)
10581097 {
10591098 struct sw_flow_actions *acts;
10601099 struct sw_flow_key masked_key;
....@@ -1084,12 +1123,13 @@
10841123 * we should not to return match object with dangling reference
10851124 * to mask.
10861125 * */
1087
-static int ovs_nla_init_match_and_action(struct net *net,
1088
- struct sw_flow_match *match,
1089
- struct sw_flow_key *key,
1090
- struct nlattr **a,
1091
- struct sw_flow_actions **acts,
1092
- bool log)
1126
+static noinline_for_stack int
1127
+ovs_nla_init_match_and_action(struct net *net,
1128
+ struct sw_flow_match *match,
1129
+ struct sw_flow_key *key,
1130
+ struct nlattr **a,
1131
+ struct sw_flow_actions **acts,
1132
+ bool log)
10931133 {
10941134 struct sw_flow_mask mask;
10951135 int error = 0;
....@@ -1189,14 +1229,14 @@
11891229 ovs_header->dp_ifindex,
11901230 reply, info->snd_portid,
11911231 info->snd_seq, 0,
1192
- OVS_FLOW_CMD_NEW,
1232
+ OVS_FLOW_CMD_SET,
11931233 ufid_flags);
11941234 BUG_ON(error < 0);
11951235 }
11961236 } else {
11971237 /* Could not alloc without acts before locking. */
11981238 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
1199
- info, OVS_FLOW_CMD_NEW, false,
1239
+ info, OVS_FLOW_CMD_SET, false,
12001240 ufid_flags);
12011241
12021242 if (IS_ERR(reply)) {
....@@ -1272,7 +1312,7 @@
12721312 }
12731313
12741314 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
1275
- OVS_FLOW_CMD_NEW, true, ufid_flags);
1315
+ OVS_FLOW_CMD_GET, true, ufid_flags);
12761316 if (IS_ERR(reply)) {
12771317 err = PTR_ERR(reply);
12781318 goto unlock;
....@@ -1337,7 +1377,7 @@
13371377 reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
13381378 &flow->id, info, false, ufid_flags);
13391379 if (likely(reply)) {
1340
- if (likely(!IS_ERR(reply))) {
1380
+ if (!IS_ERR(reply)) {
13411381 rcu_read_lock(); /*To keep RCU checker happy. */
13421382 err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
13431383 reply, info->snd_portid,
....@@ -1352,7 +1392,8 @@
13521392
13531393 ovs_notify(&dp_flow_genl_family, reply, info);
13541394 } else {
1355
- netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, PTR_ERR(reply));
1395
+ netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0,
1396
+ PTR_ERR(reply));
13561397 }
13571398 }
13581399
....@@ -1373,8 +1414,8 @@
13731414 u32 ufid_flags;
13741415 int err;
13751416
1376
- err = genlmsg_parse(cb->nlh, &dp_flow_genl_family, a,
1377
- OVS_FLOW_ATTR_MAX, flow_policy, NULL);
1417
+ err = genlmsg_parse_deprecated(cb->nlh, &dp_flow_genl_family, a,
1418
+ OVS_FLOW_ATTR_MAX, flow_policy, NULL);
13781419 if (err)
13791420 return err;
13801421 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
....@@ -1400,7 +1441,7 @@
14001441 if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
14011442 NETLINK_CB(cb->skb).portid,
14021443 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1403
- OVS_FLOW_CMD_NEW, ufid_flags) < 0)
1444
+ OVS_FLOW_CMD_GET, ufid_flags) < 0)
14041445 break;
14051446
14061447 cb->args[0] = bucket;
....@@ -1420,26 +1461,26 @@
14201461 [OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
14211462 };
14221463
1423
-static const struct genl_ops dp_flow_genl_ops[] = {
1464
+static const struct genl_small_ops dp_flow_genl_ops[] = {
14241465 { .cmd = OVS_FLOW_CMD_NEW,
1466
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
14251467 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1426
- .policy = flow_policy,
14271468 .doit = ovs_flow_cmd_new
14281469 },
14291470 { .cmd = OVS_FLOW_CMD_DEL,
1471
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
14301472 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1431
- .policy = flow_policy,
14321473 .doit = ovs_flow_cmd_del
14331474 },
14341475 { .cmd = OVS_FLOW_CMD_GET,
1476
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
14351477 .flags = 0, /* OK for unprivileged users. */
1436
- .policy = flow_policy,
14371478 .doit = ovs_flow_cmd_get,
14381479 .dumpit = ovs_flow_cmd_dump
14391480 },
14401481 { .cmd = OVS_FLOW_CMD_SET,
1482
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
14411483 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1442
- .policy = flow_policy,
14431484 .doit = ovs_flow_cmd_set,
14441485 },
14451486 };
....@@ -1449,10 +1490,11 @@
14491490 .name = OVS_FLOW_FAMILY,
14501491 .version = OVS_FLOW_VERSION,
14511492 .maxattr = OVS_FLOW_ATTR_MAX,
1493
+ .policy = flow_policy,
14521494 .netnsok = true,
14531495 .parallel_ops = true,
1454
- .ops = dp_flow_genl_ops,
1455
- .n_ops = ARRAY_SIZE(dp_flow_genl_ops),
1496
+ .small_ops = dp_flow_genl_ops,
1497
+ .n_small_ops = ARRAY_SIZE(dp_flow_genl_ops),
14561498 .mcgrps = &ovs_dp_flow_multicast_group,
14571499 .n_mcgrps = 1,
14581500 .module = THIS_MODULE,
....@@ -1466,6 +1508,7 @@
14661508 msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats));
14671509 msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats));
14681510 msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
1511
+ msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_MASKS_CACHE_SIZE */
14691512
14701513 return msgsize;
14711514 }
....@@ -1480,7 +1523,7 @@
14801523 int err;
14811524
14821525 ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
1483
- flags, cmd);
1526
+ flags, cmd);
14841527 if (!ovs_header)
14851528 goto error;
14861529
....@@ -1501,6 +1544,10 @@
15011544 goto nla_put_failure;
15021545
15031546 if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
1547
+ goto nla_put_failure;
1548
+
1549
+ if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,
1550
+ ovs_flow_tbl_masks_cache_size(&dp->table)))
15041551 goto nla_put_failure;
15051552
15061553 genlmsg_end(skb, ovs_header);
....@@ -1535,22 +1582,84 @@
15351582 return dp ? dp : ERR_PTR(-ENODEV);
15361583 }
15371584
1538
-static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *info)
1585
+static void ovs_dp_reset_user_features(struct sk_buff *skb,
1586
+ struct genl_info *info)
15391587 {
15401588 struct datapath *dp;
15411589
1542
- dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1590
+ dp = lookup_datapath(sock_net(skb->sk), info->userhdr,
1591
+ info->attrs);
15431592 if (IS_ERR(dp))
15441593 return;
15451594
1546
- WARN(dp->user_features, "Dropping previously announced user features\n");
1595
+ pr_warn("%s: Dropping previously announced user features\n",
1596
+ ovs_dp_name(dp));
15471597 dp->user_features = 0;
15481598 }
15491599
1550
-static void ovs_dp_change(struct datapath *dp, struct nlattr *a[])
1600
+DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
1601
+
1602
+static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
15511603 {
1552
- if (a[OVS_DP_ATTR_USER_FEATURES])
1553
- dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
1604
+ u32 user_features = 0;
1605
+
1606
+ if (a[OVS_DP_ATTR_USER_FEATURES]) {
1607
+ user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
1608
+
1609
+ if (user_features & ~(OVS_DP_F_VPORT_PIDS |
1610
+ OVS_DP_F_UNALIGNED |
1611
+ OVS_DP_F_TC_RECIRC_SHARING))
1612
+ return -EOPNOTSUPP;
1613
+
1614
+#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
1615
+ if (user_features & OVS_DP_F_TC_RECIRC_SHARING)
1616
+ return -EOPNOTSUPP;
1617
+#endif
1618
+ }
1619
+
1620
+ if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) {
1621
+ int err;
1622
+ u32 cache_size;
1623
+
1624
+ cache_size = nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
1625
+ err = ovs_flow_tbl_masks_cache_resize(&dp->table, cache_size);
1626
+ if (err)
1627
+ return err;
1628
+ }
1629
+
1630
+ dp->user_features = user_features;
1631
+
1632
+ if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
1633
+ static_branch_enable(&tc_recirc_sharing_support);
1634
+ else
1635
+ static_branch_disable(&tc_recirc_sharing_support);
1636
+
1637
+ return 0;
1638
+}
1639
+
1640
+static int ovs_dp_stats_init(struct datapath *dp)
1641
+{
1642
+ dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
1643
+ if (!dp->stats_percpu)
1644
+ return -ENOMEM;
1645
+
1646
+ return 0;
1647
+}
1648
+
1649
+static int ovs_dp_vport_init(struct datapath *dp)
1650
+{
1651
+ int i;
1652
+
1653
+ dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
1654
+ sizeof(struct hlist_head),
1655
+ GFP_KERNEL);
1656
+ if (!dp->ports)
1657
+ return -ENOMEM;
1658
+
1659
+ for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1660
+ INIT_HLIST_HEAD(&dp->ports[i]);
1661
+
1662
+ return 0;
15541663 }
15551664
15561665 static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
....@@ -1561,7 +1670,7 @@
15611670 struct datapath *dp;
15621671 struct vport *vport;
15631672 struct ovs_net *ovs_net;
1564
- int err, i;
1673
+ int err;
15651674
15661675 err = -EINVAL;
15671676 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
....@@ -1574,35 +1683,26 @@
15741683 err = -ENOMEM;
15751684 dp = kzalloc(sizeof(*dp), GFP_KERNEL);
15761685 if (dp == NULL)
1577
- goto err_free_reply;
1686
+ goto err_destroy_reply;
15781687
15791688 ovs_dp_set_net(dp, sock_net(skb->sk));
15801689
15811690 /* Allocate table. */
15821691 err = ovs_flow_tbl_init(&dp->table);
15831692 if (err)
1584
- goto err_free_dp;
1693
+ goto err_destroy_dp;
15851694
1586
- dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
1587
- if (!dp->stats_percpu) {
1588
- err = -ENOMEM;
1695
+ err = ovs_dp_stats_init(dp);
1696
+ if (err)
15891697 goto err_destroy_table;
1590
- }
15911698
1592
- dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
1593
- sizeof(struct hlist_head),
1594
- GFP_KERNEL);
1595
- if (!dp->ports) {
1596
- err = -ENOMEM;
1597
- goto err_destroy_percpu;
1598
- }
1599
-
1600
- for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1601
- INIT_HLIST_HEAD(&dp->ports[i]);
1699
+ err = ovs_dp_vport_init(dp);
1700
+ if (err)
1701
+ goto err_destroy_stats;
16021702
16031703 err = ovs_meters_init(dp);
16041704 if (err)
1605
- goto err_destroy_ports_array;
1705
+ goto err_destroy_ports;
16061706
16071707 /* Set up our datapath device. */
16081708 parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
....@@ -1612,10 +1712,12 @@
16121712 parms.port_no = OVSP_LOCAL;
16131713 parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
16141714
1615
- ovs_dp_change(dp, a);
1616
-
16171715 /* So far only local changes have been made, now need the lock. */
16181716 ovs_lock();
1717
+
1718
+ err = ovs_dp_change(dp, a);
1719
+ if (err)
1720
+ goto err_unlock_and_destroy_meters;
16191721
16201722 vport = new_vport(&parms);
16211723 if (IS_ERR(vport)) {
....@@ -1632,7 +1734,7 @@
16321734 ovs_dp_reset_user_features(skb, info);
16331735 }
16341736
1635
- goto err_destroy_meters;
1737
+ goto err_unlock_and_destroy_meters;
16361738 }
16371739
16381740 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
....@@ -1647,18 +1749,18 @@
16471749 ovs_notify(&dp_datapath_genl_family, reply, info);
16481750 return 0;
16491751
1650
-err_destroy_meters:
1752
+err_unlock_and_destroy_meters:
16511753 ovs_unlock();
16521754 ovs_meters_exit(dp);
1653
-err_destroy_ports_array:
1755
+err_destroy_ports:
16541756 kfree(dp->ports);
1655
-err_destroy_percpu:
1757
+err_destroy_stats:
16561758 free_percpu(dp->stats_percpu);
16571759 err_destroy_table:
16581760 ovs_flow_tbl_destroy(&dp->table);
1659
-err_free_dp:
1761
+err_destroy_dp:
16601762 kfree(dp);
1661
-err_free_reply:
1763
+err_destroy_reply:
16621764 kfree_skb(reply);
16631765 err:
16641766 return err;
....@@ -1667,6 +1769,7 @@
16671769 /* Called with ovs_mutex. */
16681770 static void __dp_destroy(struct datapath *dp)
16691771 {
1772
+ struct flow_table *table = &dp->table;
16701773 int i;
16711774
16721775 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
....@@ -1685,7 +1788,14 @@
16851788 */
16861789 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
16871790
1688
- /* RCU destroy the flow table */
1791
+ /* Flush sw_flow in the tables. RCU cb only releases resource
1792
+ * such as dp, ports and tables. That may avoid some issues
1793
+ * such as RCU usage warning.
1794
+ */
1795
+ table_instance_flow_flush(table, ovsl_dereference(table->ti),
1796
+ ovsl_dereference(table->ufid_ti));
1797
+
1798
+ /* RCU destroy the ports, meters and flow tables. */
16891799 call_rcu(&dp->rcu, destroy_dp_rcu);
16901800 }
16911801
....@@ -1738,10 +1848,12 @@
17381848 if (IS_ERR(dp))
17391849 goto err_unlock_free;
17401850
1741
- ovs_dp_change(dp, info->attrs);
1851
+ err = ovs_dp_change(dp, info->attrs);
1852
+ if (err)
1853
+ goto err_unlock_free;
17421854
17431855 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1744
- info->snd_seq, 0, OVS_DP_CMD_NEW);
1856
+ info->snd_seq, 0, OVS_DP_CMD_SET);
17451857 BUG_ON(err < 0);
17461858
17471859 ovs_unlock();
....@@ -1772,7 +1884,7 @@
17721884 goto err_unlock_free;
17731885 }
17741886 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1775
- info->snd_seq, 0, OVS_DP_CMD_NEW);
1887
+ info->snd_seq, 0, OVS_DP_CMD_GET);
17761888 BUG_ON(err < 0);
17771889 ovs_unlock();
17781890
....@@ -1796,7 +1908,7 @@
17961908 if (i >= skip &&
17971909 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
17981910 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1799
- OVS_DP_CMD_NEW) < 0)
1911
+ OVS_DP_CMD_GET) < 0)
18001912 break;
18011913 i++;
18021914 }
....@@ -1811,28 +1923,30 @@
18111923 [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
18121924 [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
18131925 [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
1926
+ [OVS_DP_ATTR_MASKS_CACHE_SIZE] = NLA_POLICY_RANGE(NLA_U32, 0,
1927
+ PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)),
18141928 };
18151929
1816
-static const struct genl_ops dp_datapath_genl_ops[] = {
1930
+static const struct genl_small_ops dp_datapath_genl_ops[] = {
18171931 { .cmd = OVS_DP_CMD_NEW,
1932
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
18181933 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1819
- .policy = datapath_policy,
18201934 .doit = ovs_dp_cmd_new
18211935 },
18221936 { .cmd = OVS_DP_CMD_DEL,
1937
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
18231938 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1824
- .policy = datapath_policy,
18251939 .doit = ovs_dp_cmd_del
18261940 },
18271941 { .cmd = OVS_DP_CMD_GET,
1942
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
18281943 .flags = 0, /* OK for unprivileged users. */
1829
- .policy = datapath_policy,
18301944 .doit = ovs_dp_cmd_get,
18311945 .dumpit = ovs_dp_cmd_dump
18321946 },
18331947 { .cmd = OVS_DP_CMD_SET,
1948
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
18341949 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1835
- .policy = datapath_policy,
18361950 .doit = ovs_dp_cmd_set,
18371951 },
18381952 };
....@@ -1842,10 +1956,11 @@
18421956 .name = OVS_DATAPATH_FAMILY,
18431957 .version = OVS_DATAPATH_VERSION,
18441958 .maxattr = OVS_DP_ATTR_MAX,
1959
+ .policy = datapath_policy,
18451960 .netnsok = true,
18461961 .parallel_ops = true,
1847
- .ops = dp_datapath_genl_ops,
1848
- .n_ops = ARRAY_SIZE(dp_datapath_genl_ops),
1962
+ .small_ops = dp_datapath_genl_ops,
1963
+ .n_small_ops = ARRAY_SIZE(dp_datapath_genl_ops),
18491964 .mcgrps = &ovs_dp_datapath_multicast_group,
18501965 .n_mcgrps = 1,
18511966 .module = THIS_MODULE,
....@@ -1964,16 +2079,16 @@
19642079
19652080 }
19662081
1967
-/* Called with ovs_mutex */
1968
-static void update_headroom(struct datapath *dp)
2082
+static unsigned int ovs_get_max_headroom(struct datapath *dp)
19692083 {
1970
- unsigned dev_headroom, max_headroom = 0;
2084
+ unsigned int dev_headroom, max_headroom = 0;
19712085 struct net_device *dev;
19722086 struct vport *vport;
19732087 int i;
19742088
19752089 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1976
- hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
2090
+ hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
2091
+ lockdep_ovsl_is_held()) {
19772092 dev = vport->dev;
19782093 dev_headroom = netdev_get_fwd_headroom(dev);
19792094 if (dev_headroom > max_headroom)
....@@ -1981,10 +2096,21 @@
19812096 }
19822097 }
19832098
1984
- dp->max_headroom = max_headroom;
1985
- for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1986
- hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node)
1987
- netdev_set_rx_headroom(vport->dev, max_headroom);
2099
+ return max_headroom;
2100
+}
2101
+
2102
+/* Called with ovs_mutex */
2103
+static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom)
2104
+{
2105
+ struct vport *vport;
2106
+ int i;
2107
+
2108
+ dp->max_headroom = new_headroom;
2109
+ for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2110
+ hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
2111
+ lockdep_ovsl_is_held())
2112
+ netdev_set_rx_headroom(vport->dev, new_headroom);
2113
+ }
19882114 }
19892115
19902116 static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
....@@ -1995,6 +2121,7 @@
19952121 struct sk_buff *reply;
19962122 struct vport *vport;
19972123 struct datapath *dp;
2124
+ unsigned int new_headroom;
19982125 u32 port_no;
19992126 int err;
20002127
....@@ -2056,8 +2183,10 @@
20562183 info->snd_portid, info->snd_seq, 0,
20572184 OVS_VPORT_CMD_NEW, GFP_KERNEL);
20582185
2059
- if (netdev_get_fwd_headroom(vport->dev) > dp->max_headroom)
2060
- update_headroom(dp);
2186
+ new_headroom = netdev_get_fwd_headroom(vport->dev);
2187
+
2188
+ if (new_headroom > dp->max_headroom)
2189
+ ovs_update_headroom(dp, new_headroom);
20612190 else
20622191 netdev_set_rx_headroom(vport->dev, dp->max_headroom);
20632192
....@@ -2113,7 +2242,7 @@
21132242
21142243 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
21152244 info->snd_portid, info->snd_seq, 0,
2116
- OVS_VPORT_CMD_NEW, GFP_ATOMIC);
2245
+ OVS_VPORT_CMD_SET, GFP_KERNEL);
21172246 BUG_ON(err < 0);
21182247
21192248 ovs_unlock();
....@@ -2128,11 +2257,12 @@
21282257
21292258 static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
21302259 {
2131
- bool must_update_headroom = false;
2260
+ bool update_headroom = false;
21322261 struct nlattr **a = info->attrs;
21332262 struct sk_buff *reply;
21342263 struct datapath *dp;
21352264 struct vport *vport;
2265
+ unsigned int new_headroom;
21362266 int err;
21372267
21382268 reply = ovs_vport_cmd_alloc_info();
....@@ -2158,12 +2288,17 @@
21582288 /* the vport deletion may trigger dp headroom update */
21592289 dp = vport->dp;
21602290 if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom)
2161
- must_update_headroom = true;
2291
+ update_headroom = true;
2292
+
21622293 netdev_reset_rx_headroom(vport->dev);
21632294 ovs_dp_detach_port(vport);
21642295
2165
- if (must_update_headroom)
2166
- update_headroom(dp);
2296
+ if (update_headroom) {
2297
+ new_headroom = ovs_get_max_headroom(dp);
2298
+
2299
+ if (new_headroom < dp->max_headroom)
2300
+ ovs_update_headroom(dp, new_headroom);
2301
+ }
21672302 ovs_unlock();
21682303
21692304 ovs_notify(&dp_vport_genl_family, reply, info);
....@@ -2194,7 +2329,7 @@
21942329 goto exit_unlock_free;
21952330 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
21962331 info->snd_portid, info->snd_seq, 0,
2197
- OVS_VPORT_CMD_NEW, GFP_ATOMIC);
2332
+ OVS_VPORT_CMD_GET, GFP_ATOMIC);
21982333 BUG_ON(err < 0);
21992334 rcu_read_unlock();
22002335
....@@ -2230,7 +2365,7 @@
22302365 NETLINK_CB(cb->skb).portid,
22312366 cb->nlh->nlmsg_seq,
22322367 NLM_F_MULTI,
2233
- OVS_VPORT_CMD_NEW,
2368
+ OVS_VPORT_CMD_GET,
22342369 GFP_ATOMIC) < 0)
22352370 goto out;
22362371
....@@ -2247,6 +2382,23 @@
22472382 return skb->len;
22482383 }
22492384
2385
+static void ovs_dp_masks_rebalance(struct work_struct *work)
2386
+{
2387
+ struct ovs_net *ovs_net = container_of(work, struct ovs_net,
2388
+ masks_rebalance.work);
2389
+ struct datapath *dp;
2390
+
2391
+ ovs_lock();
2392
+
2393
+ list_for_each_entry(dp, &ovs_net->dps, list_node)
2394
+ ovs_flow_masks_rebalance(&dp->table);
2395
+
2396
+ ovs_unlock();
2397
+
2398
+ schedule_delayed_work(&ovs_net->masks_rebalance,
2399
+ msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
2400
+}
2401
+
22502402 static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
22512403 [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
22522404 [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
....@@ -2258,26 +2410,26 @@
22582410 [OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
22592411 };
22602412
2261
-static const struct genl_ops dp_vport_genl_ops[] = {
2413
+static const struct genl_small_ops dp_vport_genl_ops[] = {
22622414 { .cmd = OVS_VPORT_CMD_NEW,
2415
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
22632416 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2264
- .policy = vport_policy,
22652417 .doit = ovs_vport_cmd_new
22662418 },
22672419 { .cmd = OVS_VPORT_CMD_DEL,
2420
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
22682421 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2269
- .policy = vport_policy,
22702422 .doit = ovs_vport_cmd_del
22712423 },
22722424 { .cmd = OVS_VPORT_CMD_GET,
2425
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
22732426 .flags = 0, /* OK for unprivileged users. */
2274
- .policy = vport_policy,
22752427 .doit = ovs_vport_cmd_get,
22762428 .dumpit = ovs_vport_cmd_dump
22772429 },
22782430 { .cmd = OVS_VPORT_CMD_SET,
2431
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
22792432 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2280
- .policy = vport_policy,
22812433 .doit = ovs_vport_cmd_set,
22822434 },
22832435 };
....@@ -2287,10 +2439,11 @@
22872439 .name = OVS_VPORT_FAMILY,
22882440 .version = OVS_VPORT_VERSION,
22892441 .maxattr = OVS_VPORT_ATTR_MAX,
2442
+ .policy = vport_policy,
22902443 .netnsok = true,
22912444 .parallel_ops = true,
2292
- .ops = dp_vport_genl_ops,
2293
- .n_ops = ARRAY_SIZE(dp_vport_genl_ops),
2445
+ .small_ops = dp_vport_genl_ops,
2446
+ .n_small_ops = ARRAY_SIZE(dp_vport_genl_ops),
22942447 .mcgrps = &ovs_dp_vport_multicast_group,
22952448 .n_mcgrps = 1,
22962449 .module = THIS_MODULE,
....@@ -2337,10 +2490,19 @@
23372490 static int __net_init ovs_init_net(struct net *net)
23382491 {
23392492 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2493
+ int err;
23402494
23412495 INIT_LIST_HEAD(&ovs_net->dps);
23422496 INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
2343
- return ovs_ct_init(net);
2497
+ INIT_DELAYED_WORK(&ovs_net->masks_rebalance, ovs_dp_masks_rebalance);
2498
+
2499
+ err = ovs_ct_init(net);
2500
+ if (err)
2501
+ return err;
2502
+
2503
+ schedule_delayed_work(&ovs_net->masks_rebalance,
2504
+ msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
2505
+ return 0;
23442506 }
23452507
23462508 static void __net_exit list_vports_from_net(struct net *net, struct net *dnet,
....@@ -2374,8 +2536,10 @@
23742536 struct net *net;
23752537 LIST_HEAD(head);
23762538
2377
- ovs_ct_exit(dnet);
23782539 ovs_lock();
2540
+
2541
+ ovs_ct_exit(dnet);
2542
+
23792543 list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
23802544 __dp_destroy(dp);
23812545
....@@ -2392,6 +2556,7 @@
23922556
23932557 ovs_unlock();
23942558
2559
+ cancel_delayed_work_sync(&ovs_net->masks_rebalance);
23952560 cancel_work_sync(&ovs_net->dp_notify_work);
23962561 }
23972562
....@@ -2406,7 +2571,8 @@
24062571 {
24072572 int err;
24082573
2409
- BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
2574
+ BUILD_BUG_ON(sizeof(struct ovs_skb_cb) >
2575
+ sizeof_field(struct sk_buff, cb));
24102576
24112577 pr_info("Open vSwitch switching datapath\n");
24122578