hc
2024-01-05 071106ecf68c401173c58808b1cf5f68cc50d390
kernel/net/ipv4/fib_frontend.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * INET An implementation of the TCP/IP protocol suite for the LINUX
34 * operating system. INET is implemented using the BSD Socket
....@@ -6,11 +7,6 @@
67 * IPv4 Forwarding Information Base: FIB frontend.
78 *
89 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9
- *
10
- * This program is free software; you can redistribute it and/or
11
- * modify it under the terms of the GNU General Public License
12
- * as published by the Free Software Foundation; either version
13
- * 2 of the License, or (at your option) any later version.
1410 */
1511
1612 #include <linux/module.h>
....@@ -43,6 +39,7 @@
4339 #include <net/sock.h>
4440 #include <net/arp.h>
4541 #include <net/ip_fib.h>
42
+#include <net/nexthop.h>
4643 #include <net/rtnetlink.h>
4744 #include <net/xfrm.h>
4845 #include <net/l3mdev.h>
....@@ -72,11 +69,6 @@
7269 fail:
7370 fib_free_table(main_table);
7471 return -ENOMEM;
75
-}
76
-
77
-static bool fib4_has_custom_rules(struct net *net)
78
-{
79
- return false;
8072 }
8173 #else
8274
....@@ -127,16 +119,12 @@
127119 h = id & (FIB_TABLE_HASHSZ - 1);
128120
129121 head = &net->ipv4.fib_table_hash[h];
130
- hlist_for_each_entry_rcu(tb, head, tb_hlist) {
122
+ hlist_for_each_entry_rcu(tb, head, tb_hlist,
123
+ lockdep_rtnl_is_held()) {
131124 if (tb->tb_id == id)
132125 return tb;
133126 }
134127 return NULL;
135
-}
136
-
137
-static bool fib4_has_custom_rules(struct net *net)
138
-{
139
- return net->ipv4.fib_has_custom_rules;
140128 }
141129 #endif /* CONFIG_IP_MULTIPLE_TABLES */
142130
....@@ -192,7 +180,7 @@
192180 return 0;
193181 }
194182
195
-static void fib_flush(struct net *net)
183
+void fib_flush(struct net *net)
196184 {
197185 int flushed = 0;
198186 unsigned int h;
....@@ -234,7 +222,9 @@
234222 if (table) {
235223 ret = RTN_UNICAST;
236224 if (!fib_table_lookup(table, &fl4, &res, FIB_LOOKUP_NOREF)) {
237
- if (!dev || dev == res.fi->fib_dev)
225
+ struct fib_nh_common *nhc = fib_info_nhc(res.fi, 0);
226
+
227
+ if (!dev || dev == nhc->nhc_dev)
238228 ret = res.type;
239229 }
240230 }
....@@ -307,13 +297,40 @@
307297 .flowi4_mark = vmark ? skb->mark : 0,
308298 };
309299 if (!fib_lookup(net, &fl4, &res, 0))
310
- return FIB_RES_PREFSRC(net, res);
300
+ return fib_result_prefsrc(net, &res);
311301 } else {
312302 scope = RT_SCOPE_LINK;
313303 }
314304
315305 return inet_select_addr(dev, ip_hdr(skb)->saddr, scope);
316306 }
307
+
308
+bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev)
309
+{
310
+ bool dev_match = false;
311
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
312
+ if (unlikely(fi->nh)) {
313
+ dev_match = nexthop_uses_dev(fi->nh, dev);
314
+ } else {
315
+ int ret;
316
+
317
+ for (ret = 0; ret < fib_info_num_path(fi); ret++) {
318
+ const struct fib_nh_common *nhc = fib_info_nhc(fi, ret);
319
+
320
+ if (nhc_l3mdev_matches_dev(nhc, dev)) {
321
+ dev_match = true;
322
+ break;
323
+ }
324
+ }
325
+ }
326
+#else
327
+ if (fib_info_nhc(fi, 0)->nhc_dev == dev)
328
+ dev_match = true;
329
+#endif
330
+
331
+ return dev_match;
332
+}
333
+EXPORT_SYMBOL_GPL(fib_info_nh_uses_dev);
317334
318335 /* Given (packet source, input interface) and optional (dst, oif, tos):
319336 * - (main) check, that source is valid i.e. not broadcast or our local
....@@ -345,6 +362,7 @@
345362 fl4.flowi4_tun_key.tun_id = 0;
346363 fl4.flowi4_flags = 0;
347364 fl4.flowi4_uid = sock_net_uid(net, NULL);
365
+ fl4.flowi4_multipath_hash = 0;
348366
349367 no_addr = idev->ifa_list == NULL;
350368
....@@ -363,26 +381,15 @@
363381 (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev)))
364382 goto e_inval;
365383 fib_combine_itag(itag, &res);
366
- dev_match = false;
367384
368
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
369
- for (ret = 0; ret < res.fi->fib_nhs; ret++) {
370
- struct fib_nh *nh = &res.fi->fib_nh[ret];
371
-
372
- if (nh->nh_dev == dev) {
373
- dev_match = true;
374
- break;
375
- } else if (l3mdev_master_ifindex_rcu(nh->nh_dev) == dev->ifindex) {
376
- dev_match = true;
377
- break;
378
- }
379
- }
380
-#else
381
- if (FIB_RES_DEV(res) == dev)
382
- dev_match = true;
383
-#endif
385
+ dev_match = fib_info_nh_uses_dev(res.fi, dev);
386
+ /* This is not common, loopback packets retain skb_dst so normally they
387
+ * would not even hit this slow path.
388
+ */
389
+ dev_match = dev_match || (res.type == RTN_LOCAL &&
390
+ dev == net->loopback_dev);
384391 if (dev_match) {
385
- ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
392
+ ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_LINK;
386393 return ret;
387394 }
388395 if (no_addr)
....@@ -394,7 +401,7 @@
394401 ret = 0;
395402 if (fib_lookup(net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE) == 0) {
396403 if (res.type == RTN_UNICAST)
397
- ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
404
+ ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_LINK;
398405 }
399406 return ret;
400407
....@@ -532,14 +539,22 @@
532539 cfg->fc_oif = dev->ifindex;
533540 cfg->fc_table = l3mdev_fib_table(dev);
534541 if (colon) {
535
- struct in_ifaddr *ifa;
536
- struct in_device *in_dev = __in_dev_get_rtnl(dev);
542
+ const struct in_ifaddr *ifa;
543
+ struct in_device *in_dev;
544
+
545
+ in_dev = __in_dev_get_rtnl(dev);
537546 if (!in_dev)
538547 return -ENODEV;
548
+
539549 *colon = ':';
540
- for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
550
+
551
+ rcu_read_lock();
552
+ in_dev_for_each_ifa_rcu(ifa, in_dev) {
541553 if (strcmp(ifa->ifa_label, devname) == 0)
542554 break;
555
+ }
556
+ rcu_read_unlock();
557
+
543558 if (!ifa)
544559 return -ENODEV;
545560 cfg->fc_prefsrc = ifa->ifa_local;
....@@ -550,17 +565,21 @@
550565 if (rt->rt_gateway.sa_family == AF_INET && addr) {
551566 unsigned int addr_type;
552567
553
- cfg->fc_gw = addr;
568
+ cfg->fc_gw4 = addr;
569
+ cfg->fc_gw_family = AF_INET;
554570 addr_type = inet_addr_type_table(net, addr, cfg->fc_table);
555571 if (rt->rt_flags & RTF_GATEWAY &&
556572 addr_type == RTN_UNICAST)
557573 cfg->fc_scope = RT_SCOPE_UNIVERSE;
558574 }
559575
576
+ if (!cfg->fc_table)
577
+ cfg->fc_table = RT_TABLE_MAIN;
578
+
560579 if (cmd == SIOCDELRT)
561580 return 0;
562581
563
- if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
582
+ if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw_family)
564583 return -EINVAL;
565584
566585 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
....@@ -636,6 +655,7 @@
636655 }
637656
638657 const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
658
+ [RTA_UNSPEC] = { .strict_start_type = RTA_DPORT + 1 },
639659 [RTA_DST] = { .type = NLA_U32 },
640660 [RTA_SRC] = { .type = NLA_U32 },
641661 [RTA_IIF] = { .type = NLA_U32 },
....@@ -654,18 +674,64 @@
654674 [RTA_IP_PROTO] = { .type = NLA_U8 },
655675 [RTA_SPORT] = { .type = NLA_U16 },
656676 [RTA_DPORT] = { .type = NLA_U16 },
677
+ [RTA_NH_ID] = { .type = NLA_U32 },
657678 };
679
+
680
+int fib_gw_from_via(struct fib_config *cfg, struct nlattr *nla,
681
+ struct netlink_ext_ack *extack)
682
+{
683
+ struct rtvia *via;
684
+ int alen;
685
+
686
+ if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr)) {
687
+ NL_SET_ERR_MSG(extack, "Invalid attribute length for RTA_VIA");
688
+ return -EINVAL;
689
+ }
690
+
691
+ via = nla_data(nla);
692
+ alen = nla_len(nla) - offsetof(struct rtvia, rtvia_addr);
693
+
694
+ switch (via->rtvia_family) {
695
+ case AF_INET:
696
+ if (alen != sizeof(__be32)) {
697
+ NL_SET_ERR_MSG(extack, "Invalid IPv4 address in RTA_VIA");
698
+ return -EINVAL;
699
+ }
700
+ cfg->fc_gw_family = AF_INET;
701
+ cfg->fc_gw4 = *((__be32 *)via->rtvia_addr);
702
+ break;
703
+ case AF_INET6:
704
+#if IS_ENABLED(CONFIG_IPV6)
705
+ if (alen != sizeof(struct in6_addr)) {
706
+ NL_SET_ERR_MSG(extack, "Invalid IPv6 address in RTA_VIA");
707
+ return -EINVAL;
708
+ }
709
+ cfg->fc_gw_family = AF_INET6;
710
+ cfg->fc_gw6 = *((struct in6_addr *)via->rtvia_addr);
711
+#else
712
+ NL_SET_ERR_MSG(extack, "IPv6 support not enabled in kernel");
713
+ return -EINVAL;
714
+#endif
715
+ break;
716
+ default:
717
+ NL_SET_ERR_MSG(extack, "Unsupported address family in RTA_VIA");
718
+ return -EINVAL;
719
+ }
720
+
721
+ return 0;
722
+}
658723
659724 static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
660725 struct nlmsghdr *nlh, struct fib_config *cfg,
661726 struct netlink_ext_ack *extack)
662727 {
728
+ bool has_gw = false, has_via = false;
663729 struct nlattr *attr;
664730 int err, remaining;
665731 struct rtmsg *rtm;
666732
667
- err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy,
668
- extack);
733
+ err = nlmsg_validate_deprecated(nlh, sizeof(*rtm), RTA_MAX,
734
+ rtm_ipv4_policy, extack);
669735 if (err < 0)
670736 goto errout;
671737
....@@ -700,12 +766,17 @@
700766 cfg->fc_oif = nla_get_u32(attr);
701767 break;
702768 case RTA_GATEWAY:
703
- cfg->fc_gw = nla_get_be32(attr);
769
+ has_gw = true;
770
+ cfg->fc_gw4 = nla_get_be32(attr);
771
+ if (cfg->fc_gw4)
772
+ cfg->fc_gw_family = AF_INET;
704773 break;
705774 case RTA_VIA:
706
- NL_SET_ERR_MSG(extack, "IPv4 does not support RTA_VIA attribute");
707
- err = -EINVAL;
708
- goto errout;
775
+ has_via = true;
776
+ err = fib_gw_from_via(cfg, attr, extack);
777
+ if (err)
778
+ goto errout;
779
+ break;
709780 case RTA_PRIORITY:
710781 cfg->fc_priority = nla_get_u32(attr);
711782 break;
....@@ -741,8 +812,29 @@
741812 if (err < 0)
742813 goto errout;
743814 break;
815
+ case RTA_NH_ID:
816
+ cfg->fc_nh_id = nla_get_u32(attr);
817
+ break;
744818 }
745819 }
820
+
821
+ if (cfg->fc_nh_id) {
822
+ if (cfg->fc_oif || cfg->fc_gw_family ||
823
+ cfg->fc_encap || cfg->fc_mp) {
824
+ NL_SET_ERR_MSG(extack,
825
+ "Nexthop specification and nexthop id are mutually exclusive");
826
+ return -EINVAL;
827
+ }
828
+ }
829
+
830
+ if (has_gw && has_via) {
831
+ NL_SET_ERR_MSG(extack,
832
+ "Nexthop configuration can not contain both GATEWAY and VIA");
833
+ return -EINVAL;
834
+ }
835
+
836
+ if (!cfg->fc_table)
837
+ cfg->fc_table = RT_TABLE_MAIN;
746838
747839 return 0;
748840 errout:
....@@ -760,6 +852,12 @@
760852 err = rtm_to_fib_config(net, skb, nlh, &cfg, extack);
761853 if (err < 0)
762854 goto errout;
855
+
856
+ if (cfg.fc_nh_id && !nexthop_find_by_id(net, cfg.fc_nh_id)) {
857
+ NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
858
+ err = -EINVAL;
859
+ goto errout;
860
+ }
763861
764862 tb = fib_get_table(net, cfg.fc_table);
765863 if (!tb) {
....@@ -798,8 +896,85 @@
798896 return err;
799897 }
800898
899
+int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
900
+ struct fib_dump_filter *filter,
901
+ struct netlink_callback *cb)
902
+{
903
+ struct netlink_ext_ack *extack = cb->extack;
904
+ struct nlattr *tb[RTA_MAX + 1];
905
+ struct rtmsg *rtm;
906
+ int err, i;
907
+
908
+ ASSERT_RTNL();
909
+
910
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
911
+ NL_SET_ERR_MSG(extack, "Invalid header for FIB dump request");
912
+ return -EINVAL;
913
+ }
914
+
915
+ rtm = nlmsg_data(nlh);
916
+ if (rtm->rtm_dst_len || rtm->rtm_src_len || rtm->rtm_tos ||
917
+ rtm->rtm_scope) {
918
+ NL_SET_ERR_MSG(extack, "Invalid values in header for FIB dump request");
919
+ return -EINVAL;
920
+ }
921
+
922
+ if (rtm->rtm_flags & ~(RTM_F_CLONED | RTM_F_PREFIX)) {
923
+ NL_SET_ERR_MSG(extack, "Invalid flags for FIB dump request");
924
+ return -EINVAL;
925
+ }
926
+ if (rtm->rtm_flags & RTM_F_CLONED)
927
+ filter->dump_routes = false;
928
+ else
929
+ filter->dump_exceptions = false;
930
+
931
+ filter->flags = rtm->rtm_flags;
932
+ filter->protocol = rtm->rtm_protocol;
933
+ filter->rt_type = rtm->rtm_type;
934
+ filter->table_id = rtm->rtm_table;
935
+
936
+ err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
937
+ rtm_ipv4_policy, extack);
938
+ if (err < 0)
939
+ return err;
940
+
941
+ for (i = 0; i <= RTA_MAX; ++i) {
942
+ int ifindex;
943
+
944
+ if (!tb[i])
945
+ continue;
946
+
947
+ switch (i) {
948
+ case RTA_TABLE:
949
+ filter->table_id = nla_get_u32(tb[i]);
950
+ break;
951
+ case RTA_OIF:
952
+ ifindex = nla_get_u32(tb[i]);
953
+ filter->dev = __dev_get_by_index(net, ifindex);
954
+ if (!filter->dev)
955
+ return -ENODEV;
956
+ break;
957
+ default:
958
+ NL_SET_ERR_MSG(extack, "Unsupported attribute in dump request");
959
+ return -EINVAL;
960
+ }
961
+ }
962
+
963
+ if (filter->flags || filter->protocol || filter->rt_type ||
964
+ filter->table_id || filter->dev) {
965
+ filter->filter_set = 1;
966
+ cb->answer_flags = NLM_F_DUMP_FILTERED;
967
+ }
968
+
969
+ return 0;
970
+}
971
+EXPORT_SYMBOL_GPL(ip_valid_fib_dump_req);
972
+
801973 static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
802974 {
975
+ struct fib_dump_filter filter = { .dump_routes = true,
976
+ .dump_exceptions = true };
977
+ const struct nlmsghdr *nlh = cb->nlh;
803978 struct net *net = sock_net(skb->sk);
804979 unsigned int h, s_h;
805980 unsigned int e = 0, s_e;
....@@ -807,9 +982,35 @@
807982 struct hlist_head *head;
808983 int dumped = 0, err;
809984
810
- if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
811
- ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
985
+ if (cb->strict_check) {
986
+ err = ip_valid_fib_dump_req(net, nlh, &filter, cb);
987
+ if (err < 0)
988
+ return err;
989
+ } else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) {
990
+ struct rtmsg *rtm = nlmsg_data(nlh);
991
+
992
+ filter.flags = rtm->rtm_flags & (RTM_F_PREFIX | RTM_F_CLONED);
993
+ }
994
+
995
+ /* ipv4 does not use prefix flag */
996
+ if (filter.flags & RTM_F_PREFIX)
812997 return skb->len;
998
+
999
+ if (filter.table_id) {
1000
+ tb = fib_get_table(net, filter.table_id);
1001
+ if (!tb) {
1002
+ if (rtnl_msg_family(cb->nlh) != PF_INET)
1003
+ return skb->len;
1004
+
1005
+ NL_SET_ERR_MSG(cb->extack, "ipv4: FIB table does not exist");
1006
+ return -ENOENT;
1007
+ }
1008
+
1009
+ rcu_read_lock();
1010
+ err = fib_table_dump(tb, skb, cb, &filter);
1011
+ rcu_read_unlock();
1012
+ return skb->len ? : err;
1013
+ }
8131014
8141015 s_h = cb->args[0];
8151016 s_e = cb->args[1];
....@@ -825,7 +1026,7 @@
8251026 if (dumped)
8261027 memset(&cb->args[2], 0, sizeof(cb->args) -
8271028 2 * sizeof(cb->args[0]));
828
- err = fib_table_dump(tb, skb, cb);
1029
+ err = fib_table_dump(tb, skb, cb, &filter);
8291030 if (err < 0) {
8301031 if (likely(skb->len))
8311032 goto out;
....@@ -917,9 +1118,11 @@
9171118 return;
9181119
9191120 /* Add broadcast address, if it is explicitly assigned. */
920
- if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
1121
+ if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) {
9211122 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32,
9221123 prim, 0);
1124
+ arp_invalidate(dev, ifa->ifa_broadcast, false);
1125
+ }
9231126
9241127 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) &&
9251128 (prefix != addr || ifa->ifa_prefixlen < 32)) {
....@@ -935,6 +1138,7 @@
9351138 prim, 0);
9361139 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask,
9371140 32, prim, 0);
1141
+ arp_invalidate(dev, prefix | ~mask, false);
9381142 }
9391143 }
9401144 }
....@@ -1015,8 +1219,8 @@
10151219 *
10161220 * Scan address list to be sure that addresses are really gone.
10171221 */
1018
-
1019
- for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
1222
+ rcu_read_lock();
1223
+ in_dev_for_each_ifa_rcu(ifa1, in_dev) {
10201224 if (ifa1 == ifa) {
10211225 /* promotion, keep the IP */
10221226 gone = 0;
....@@ -1084,6 +1288,7 @@
10841288 }
10851289 }
10861290 }
1291
+ rcu_read_unlock();
10871292
10881293 no_promotions:
10891294 if (!(ok & BRD_OK))
....@@ -1253,6 +1458,7 @@
12531458 struct netdev_notifier_info_ext *info_ext = ptr;
12541459 struct in_device *in_dev;
12551460 struct net *net = dev_net(dev);
1461
+ struct in_ifaddr *ifa;
12561462 unsigned int flags;
12571463
12581464 if (event == NETDEV_UNREGISTER) {
....@@ -1267,9 +1473,9 @@
12671473
12681474 switch (event) {
12691475 case NETDEV_UP:
1270
- for_ifa(in_dev) {
1476
+ in_dev_for_each_ifa_rtnl(ifa, in_dev) {
12711477 fib_add_ifaddr(ifa);
1272
- } endfor_ifa(in_dev);
1478
+ }
12731479 #ifdef CONFIG_IP_ROUTE_MULTIPATH
12741480 fib_sync_up(dev, RTNH_F_DEAD);
12751481 #endif
....@@ -1381,7 +1587,7 @@
13811587 int error;
13821588
13831589 #ifdef CONFIG_IP_ROUTE_CLASSID
1384
- net->ipv4.fib_num_tclassid_users = 0;
1590
+ atomic_set(&net->ipv4.fib_num_tclassid_users, 0);
13851591 #endif
13861592 error = ip_fib_net_init(net);
13871593 if (error < 0)