forked from ~ljy/RK356X_SDK_RELEASE

hc
2023-12-11 072de836f53be56a70cecf70b43ae43b7ce17376
kernel/net/ipv4/fib_frontend.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * INET An implementation of the TCP/IP protocol suite for the LINUX
34 * operating system. INET is implemented using the BSD Socket
....@@ -6,11 +7,6 @@
67 * IPv4 Forwarding Information Base: FIB frontend.
78 *
89 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9
- *
10
- * This program is free software; you can redistribute it and/or
11
- * modify it under the terms of the GNU General Public License
12
- * as published by the Free Software Foundation; either version
13
- * 2 of the License, or (at your option) any later version.
1410 */
1511
1612 #include <linux/module.h>
....@@ -43,6 +39,7 @@
4339 #include <net/sock.h>
4440 #include <net/arp.h>
4541 #include <net/ip_fib.h>
42
+#include <net/nexthop.h>
4643 #include <net/rtnetlink.h>
4744 #include <net/xfrm.h>
4845 #include <net/l3mdev.h>
....@@ -72,11 +69,6 @@
7269 fail:
7370 fib_free_table(main_table);
7471 return -ENOMEM;
75
-}
76
-
77
-static bool fib4_has_custom_rules(struct net *net)
78
-{
79
- return false;
8072 }
8173 #else
8274
....@@ -127,16 +119,12 @@
127119 h = id & (FIB_TABLE_HASHSZ - 1);
128120
129121 head = &net->ipv4.fib_table_hash[h];
130
- hlist_for_each_entry_rcu(tb, head, tb_hlist) {
122
+ hlist_for_each_entry_rcu(tb, head, tb_hlist,
123
+ lockdep_rtnl_is_held()) {
131124 if (tb->tb_id == id)
132125 return tb;
133126 }
134127 return NULL;
135
-}
136
-
137
-static bool fib4_has_custom_rules(struct net *net)
138
-{
139
- return net->ipv4.fib_has_custom_rules;
140128 }
141129 #endif /* CONFIG_IP_MULTIPLE_TABLES */
142130
....@@ -192,7 +180,7 @@
192180 return 0;
193181 }
194182
195
-static void fib_flush(struct net *net)
183
+void fib_flush(struct net *net)
196184 {
197185 int flushed = 0;
198186 unsigned int h;
....@@ -234,7 +222,9 @@
234222 if (table) {
235223 ret = RTN_UNICAST;
236224 if (!fib_table_lookup(table, &fl4, &res, FIB_LOOKUP_NOREF)) {
237
- if (!dev || dev == res.fi->fib_dev)
225
+ struct fib_nh_common *nhc = fib_info_nhc(res.fi, 0);
226
+
227
+ if (!dev || dev == nhc->nhc_dev)
238228 ret = res.type;
239229 }
240230 }
....@@ -307,13 +297,40 @@
307297 .flowi4_mark = vmark ? skb->mark : 0,
308298 };
309299 if (!fib_lookup(net, &fl4, &res, 0))
310
- return FIB_RES_PREFSRC(net, res);
300
+ return fib_result_prefsrc(net, &res);
311301 } else {
312302 scope = RT_SCOPE_LINK;
313303 }
314304
315305 return inet_select_addr(dev, ip_hdr(skb)->saddr, scope);
316306 }
307
+
308
+bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev)
309
+{
310
+ bool dev_match = false;
311
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
312
+ if (unlikely(fi->nh)) {
313
+ dev_match = nexthop_uses_dev(fi->nh, dev);
314
+ } else {
315
+ int ret;
316
+
317
+ for (ret = 0; ret < fib_info_num_path(fi); ret++) {
318
+ const struct fib_nh_common *nhc = fib_info_nhc(fi, ret);
319
+
320
+ if (nhc_l3mdev_matches_dev(nhc, dev)) {
321
+ dev_match = true;
322
+ break;
323
+ }
324
+ }
325
+ }
326
+#else
327
+ if (fib_info_nhc(fi, 0)->nhc_dev == dev)
328
+ dev_match = true;
329
+#endif
330
+
331
+ return dev_match;
332
+}
333
+EXPORT_SYMBOL_GPL(fib_info_nh_uses_dev);
317334
318335 /* Given (packet source, input interface) and optional (dst, oif, tos):
319336 * - (main) check, that source is valid i.e. not broadcast or our local
....@@ -345,6 +362,7 @@
345362 fl4.flowi4_tun_key.tun_id = 0;
346363 fl4.flowi4_flags = 0;
347364 fl4.flowi4_uid = sock_net_uid(net, NULL);
365
+ fl4.flowi4_multipath_hash = 0;
348366
349367 no_addr = idev->ifa_list == NULL;
350368
....@@ -363,26 +381,15 @@
363381 (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev)))
364382 goto e_inval;
365383 fib_combine_itag(itag, &res);
366
- dev_match = false;
367384
368
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
369
- for (ret = 0; ret < res.fi->fib_nhs; ret++) {
370
- struct fib_nh *nh = &res.fi->fib_nh[ret];
371
-
372
- if (nh->nh_dev == dev) {
373
- dev_match = true;
374
- break;
375
- } else if (l3mdev_master_ifindex_rcu(nh->nh_dev) == dev->ifindex) {
376
- dev_match = true;
377
- break;
378
- }
379
- }
380
-#else
381
- if (FIB_RES_DEV(res) == dev)
382
- dev_match = true;
383
-#endif
385
+ dev_match = fib_info_nh_uses_dev(res.fi, dev);
386
+ /* This is not common, loopback packets retain skb_dst so normally they
387
+ * would not even hit this slow path.
388
+ */
389
+ dev_match = dev_match || (res.type == RTN_LOCAL &&
390
+ dev == net->loopback_dev);
384391 if (dev_match) {
385
- ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
392
+ ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_LINK;
386393 return ret;
387394 }
388395 if (no_addr)
....@@ -394,7 +401,7 @@
394401 ret = 0;
395402 if (fib_lookup(net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE) == 0) {
396403 if (res.type == RTN_UNICAST)
397
- ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
404
+ ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_LINK;
398405 }
399406 return ret;
400407
....@@ -532,14 +539,22 @@
532539 cfg->fc_oif = dev->ifindex;
533540 cfg->fc_table = l3mdev_fib_table(dev);
534541 if (colon) {
535
- struct in_ifaddr *ifa;
536
- struct in_device *in_dev = __in_dev_get_rtnl(dev);
542
+ const struct in_ifaddr *ifa;
543
+ struct in_device *in_dev;
544
+
545
+ in_dev = __in_dev_get_rtnl(dev);
537546 if (!in_dev)
538547 return -ENODEV;
548
+
539549 *colon = ':';
540
- for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
550
+
551
+ rcu_read_lock();
552
+ in_dev_for_each_ifa_rcu(ifa, in_dev) {
541553 if (strcmp(ifa->ifa_label, devname) == 0)
542554 break;
555
+ }
556
+ rcu_read_unlock();
557
+
543558 if (!ifa)
544559 return -ENODEV;
545560 cfg->fc_prefsrc = ifa->ifa_local;
....@@ -550,7 +565,8 @@
550565 if (rt->rt_gateway.sa_family == AF_INET && addr) {
551566 unsigned int addr_type;
552567
553
- cfg->fc_gw = addr;
568
+ cfg->fc_gw4 = addr;
569
+ cfg->fc_gw_family = AF_INET;
554570 addr_type = inet_addr_type_table(net, addr, cfg->fc_table);
555571 if (rt->rt_flags & RTF_GATEWAY &&
556572 addr_type == RTN_UNICAST)
....@@ -560,7 +576,7 @@
560576 if (cmd == SIOCDELRT)
561577 return 0;
562578
563
- if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
579
+ if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw_family)
564580 return -EINVAL;
565581
566582 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
....@@ -636,6 +652,7 @@
636652 }
637653
638654 const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
655
+ [RTA_UNSPEC] = { .strict_start_type = RTA_DPORT + 1 },
639656 [RTA_DST] = { .type = NLA_U32 },
640657 [RTA_SRC] = { .type = NLA_U32 },
641658 [RTA_IIF] = { .type = NLA_U32 },
....@@ -654,18 +671,64 @@
654671 [RTA_IP_PROTO] = { .type = NLA_U8 },
655672 [RTA_SPORT] = { .type = NLA_U16 },
656673 [RTA_DPORT] = { .type = NLA_U16 },
674
+ [RTA_NH_ID] = { .type = NLA_U32 },
657675 };
676
+
677
+int fib_gw_from_via(struct fib_config *cfg, struct nlattr *nla,
678
+ struct netlink_ext_ack *extack)
679
+{
680
+ struct rtvia *via;
681
+ int alen;
682
+
683
+ if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr)) {
684
+ NL_SET_ERR_MSG(extack, "Invalid attribute length for RTA_VIA");
685
+ return -EINVAL;
686
+ }
687
+
688
+ via = nla_data(nla);
689
+ alen = nla_len(nla) - offsetof(struct rtvia, rtvia_addr);
690
+
691
+ switch (via->rtvia_family) {
692
+ case AF_INET:
693
+ if (alen != sizeof(__be32)) {
694
+ NL_SET_ERR_MSG(extack, "Invalid IPv4 address in RTA_VIA");
695
+ return -EINVAL;
696
+ }
697
+ cfg->fc_gw_family = AF_INET;
698
+ cfg->fc_gw4 = *((__be32 *)via->rtvia_addr);
699
+ break;
700
+ case AF_INET6:
701
+#if IS_ENABLED(CONFIG_IPV6)
702
+ if (alen != sizeof(struct in6_addr)) {
703
+ NL_SET_ERR_MSG(extack, "Invalid IPv6 address in RTA_VIA");
704
+ return -EINVAL;
705
+ }
706
+ cfg->fc_gw_family = AF_INET6;
707
+ cfg->fc_gw6 = *((struct in6_addr *)via->rtvia_addr);
708
+#else
709
+ NL_SET_ERR_MSG(extack, "IPv6 support not enabled in kernel");
710
+ return -EINVAL;
711
+#endif
712
+ break;
713
+ default:
714
+ NL_SET_ERR_MSG(extack, "Unsupported address family in RTA_VIA");
715
+ return -EINVAL;
716
+ }
717
+
718
+ return 0;
719
+}
658720
659721 static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
660722 struct nlmsghdr *nlh, struct fib_config *cfg,
661723 struct netlink_ext_ack *extack)
662724 {
725
+ bool has_gw = false, has_via = false;
663726 struct nlattr *attr;
664727 int err, remaining;
665728 struct rtmsg *rtm;
666729
667
- err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy,
668
- extack);
730
+ err = nlmsg_validate_deprecated(nlh, sizeof(*rtm), RTA_MAX,
731
+ rtm_ipv4_policy, extack);
669732 if (err < 0)
670733 goto errout;
671734
....@@ -700,12 +763,17 @@
700763 cfg->fc_oif = nla_get_u32(attr);
701764 break;
702765 case RTA_GATEWAY:
703
- cfg->fc_gw = nla_get_be32(attr);
766
+ has_gw = true;
767
+ cfg->fc_gw4 = nla_get_be32(attr);
768
+ if (cfg->fc_gw4)
769
+ cfg->fc_gw_family = AF_INET;
704770 break;
705771 case RTA_VIA:
706
- NL_SET_ERR_MSG(extack, "IPv4 does not support RTA_VIA attribute");
707
- err = -EINVAL;
708
- goto errout;
772
+ has_via = true;
773
+ err = fib_gw_from_via(cfg, attr, extack);
774
+ if (err)
775
+ goto errout;
776
+ break;
709777 case RTA_PRIORITY:
710778 cfg->fc_priority = nla_get_u32(attr);
711779 break;
....@@ -741,8 +809,29 @@
741809 if (err < 0)
742810 goto errout;
743811 break;
812
+ case RTA_NH_ID:
813
+ cfg->fc_nh_id = nla_get_u32(attr);
814
+ break;
744815 }
745816 }
817
+
818
+ if (cfg->fc_nh_id) {
819
+ if (cfg->fc_oif || cfg->fc_gw_family ||
820
+ cfg->fc_encap || cfg->fc_mp) {
821
+ NL_SET_ERR_MSG(extack,
822
+ "Nexthop specification and nexthop id are mutually exclusive");
823
+ return -EINVAL;
824
+ }
825
+ }
826
+
827
+ if (has_gw && has_via) {
828
+ NL_SET_ERR_MSG(extack,
829
+ "Nexthop configuration can not contain both GATEWAY and VIA");
830
+ return -EINVAL;
831
+ }
832
+
833
+ if (!cfg->fc_table)
834
+ cfg->fc_table = RT_TABLE_MAIN;
746835
747836 return 0;
748837 errout:
....@@ -760,6 +849,12 @@
760849 err = rtm_to_fib_config(net, skb, nlh, &cfg, extack);
761850 if (err < 0)
762851 goto errout;
852
+
853
+ if (cfg.fc_nh_id && !nexthop_find_by_id(net, cfg.fc_nh_id)) {
854
+ NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
855
+ err = -EINVAL;
856
+ goto errout;
857
+ }
763858
764859 tb = fib_get_table(net, cfg.fc_table);
765860 if (!tb) {
....@@ -798,8 +893,85 @@
798893 return err;
799894 }
800895
896
+int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
897
+ struct fib_dump_filter *filter,
898
+ struct netlink_callback *cb)
899
+{
900
+ struct netlink_ext_ack *extack = cb->extack;
901
+ struct nlattr *tb[RTA_MAX + 1];
902
+ struct rtmsg *rtm;
903
+ int err, i;
904
+
905
+ ASSERT_RTNL();
906
+
907
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
908
+ NL_SET_ERR_MSG(extack, "Invalid header for FIB dump request");
909
+ return -EINVAL;
910
+ }
911
+
912
+ rtm = nlmsg_data(nlh);
913
+ if (rtm->rtm_dst_len || rtm->rtm_src_len || rtm->rtm_tos ||
914
+ rtm->rtm_scope) {
915
+ NL_SET_ERR_MSG(extack, "Invalid values in header for FIB dump request");
916
+ return -EINVAL;
917
+ }
918
+
919
+ if (rtm->rtm_flags & ~(RTM_F_CLONED | RTM_F_PREFIX)) {
920
+ NL_SET_ERR_MSG(extack, "Invalid flags for FIB dump request");
921
+ return -EINVAL;
922
+ }
923
+ if (rtm->rtm_flags & RTM_F_CLONED)
924
+ filter->dump_routes = false;
925
+ else
926
+ filter->dump_exceptions = false;
927
+
928
+ filter->flags = rtm->rtm_flags;
929
+ filter->protocol = rtm->rtm_protocol;
930
+ filter->rt_type = rtm->rtm_type;
931
+ filter->table_id = rtm->rtm_table;
932
+
933
+ err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
934
+ rtm_ipv4_policy, extack);
935
+ if (err < 0)
936
+ return err;
937
+
938
+ for (i = 0; i <= RTA_MAX; ++i) {
939
+ int ifindex;
940
+
941
+ if (!tb[i])
942
+ continue;
943
+
944
+ switch (i) {
945
+ case RTA_TABLE:
946
+ filter->table_id = nla_get_u32(tb[i]);
947
+ break;
948
+ case RTA_OIF:
949
+ ifindex = nla_get_u32(tb[i]);
950
+ filter->dev = __dev_get_by_index(net, ifindex);
951
+ if (!filter->dev)
952
+ return -ENODEV;
953
+ break;
954
+ default:
955
+ NL_SET_ERR_MSG(extack, "Unsupported attribute in dump request");
956
+ return -EINVAL;
957
+ }
958
+ }
959
+
960
+ if (filter->flags || filter->protocol || filter->rt_type ||
961
+ filter->table_id || filter->dev) {
962
+ filter->filter_set = 1;
963
+ cb->answer_flags = NLM_F_DUMP_FILTERED;
964
+ }
965
+
966
+ return 0;
967
+}
968
+EXPORT_SYMBOL_GPL(ip_valid_fib_dump_req);
969
+
801970 static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
802971 {
972
+ struct fib_dump_filter filter = { .dump_routes = true,
973
+ .dump_exceptions = true };
974
+ const struct nlmsghdr *nlh = cb->nlh;
803975 struct net *net = sock_net(skb->sk);
804976 unsigned int h, s_h;
805977 unsigned int e = 0, s_e;
....@@ -807,9 +979,35 @@
807979 struct hlist_head *head;
808980 int dumped = 0, err;
809981
810
- if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
811
- ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
982
+ if (cb->strict_check) {
983
+ err = ip_valid_fib_dump_req(net, nlh, &filter, cb);
984
+ if (err < 0)
985
+ return err;
986
+ } else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) {
987
+ struct rtmsg *rtm = nlmsg_data(nlh);
988
+
989
+ filter.flags = rtm->rtm_flags & (RTM_F_PREFIX | RTM_F_CLONED);
990
+ }
991
+
992
+ /* ipv4 does not use prefix flag */
993
+ if (filter.flags & RTM_F_PREFIX)
812994 return skb->len;
995
+
996
+ if (filter.table_id) {
997
+ tb = fib_get_table(net, filter.table_id);
998
+ if (!tb) {
999
+ if (rtnl_msg_family(cb->nlh) != PF_INET)
1000
+ return skb->len;
1001
+
1002
+ NL_SET_ERR_MSG(cb->extack, "ipv4: FIB table does not exist");
1003
+ return -ENOENT;
1004
+ }
1005
+
1006
+ rcu_read_lock();
1007
+ err = fib_table_dump(tb, skb, cb, &filter);
1008
+ rcu_read_unlock();
1009
+ return skb->len ? : err;
1010
+ }
8131011
8141012 s_h = cb->args[0];
8151013 s_e = cb->args[1];
....@@ -825,7 +1023,7 @@
8251023 if (dumped)
8261024 memset(&cb->args[2], 0, sizeof(cb->args) -
8271025 2 * sizeof(cb->args[0]));
828
- err = fib_table_dump(tb, skb, cb);
1026
+ err = fib_table_dump(tb, skb, cb, &filter);
8291027 if (err < 0) {
8301028 if (likely(skb->len))
8311029 goto out;
....@@ -917,9 +1115,11 @@
9171115 return;
9181116
9191117 /* Add broadcast address, if it is explicitly assigned. */
920
- if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
1118
+ if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) {
9211119 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32,
9221120 prim, 0);
1121
+ arp_invalidate(dev, ifa->ifa_broadcast, false);
1122
+ }
9231123
9241124 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) &&
9251125 (prefix != addr || ifa->ifa_prefixlen < 32)) {
....@@ -935,6 +1135,7 @@
9351135 prim, 0);
9361136 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask,
9371137 32, prim, 0);
1138
+ arp_invalidate(dev, prefix | ~mask, false);
9381139 }
9391140 }
9401141 }
....@@ -1015,8 +1216,8 @@
10151216 *
10161217 * Scan address list to be sure that addresses are really gone.
10171218 */
1018
-
1019
- for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
1219
+ rcu_read_lock();
1220
+ in_dev_for_each_ifa_rcu(ifa1, in_dev) {
10201221 if (ifa1 == ifa) {
10211222 /* promotion, keep the IP */
10221223 gone = 0;
....@@ -1084,6 +1285,7 @@
10841285 }
10851286 }
10861287 }
1288
+ rcu_read_unlock();
10871289
10881290 no_promotions:
10891291 if (!(ok & BRD_OK))
....@@ -1253,6 +1455,7 @@
12531455 struct netdev_notifier_info_ext *info_ext = ptr;
12541456 struct in_device *in_dev;
12551457 struct net *net = dev_net(dev);
1458
+ struct in_ifaddr *ifa;
12561459 unsigned int flags;
12571460
12581461 if (event == NETDEV_UNREGISTER) {
....@@ -1267,9 +1470,9 @@
12671470
12681471 switch (event) {
12691472 case NETDEV_UP:
1270
- for_ifa(in_dev) {
1473
+ in_dev_for_each_ifa_rtnl(ifa, in_dev) {
12711474 fib_add_ifaddr(ifa);
1272
- } endfor_ifa(in_dev);
1475
+ }
12731476 #ifdef CONFIG_IP_ROUTE_MULTIPATH
12741477 fib_sync_up(dev, RTNH_F_DEAD);
12751478 #endif
....@@ -1381,7 +1584,7 @@
13811584 int error;
13821585
13831586 #ifdef CONFIG_IP_ROUTE_CLASSID
1384
- net->ipv4.fib_num_tclassid_users = 0;
1587
+ atomic_set(&net->ipv4.fib_num_tclassid_users, 0);
13851588 #endif
13861589 error = ip_fib_net_init(net);
13871590 if (error < 0)