forked from ~ljy/RK356X_SDK_RELEASE

hc
2023-12-11 072de836f53be56a70cecf70b43ae43b7ce17376
kernel/net/ipv4/route.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * INET An implementation of the TCP/IP protocol suite for the LINUX
34 * operating system. INET is implemented using the BSD Socket
....@@ -55,11 +56,6 @@
5556 * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes.
5657 * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect
5758 * Ilia Sotnikov : Removed TOS from hash calculations
58
- *
59
- * This program is free software; you can redistribute it and/or
60
- * modify it under the terms of the GNU General Public License
61
- * as published by the Free Software Foundation; either version
62
- * 2 of the License, or (at your option) any later version.
6359 */
6460
6561 #define pr_fmt(fmt) "IPv4: " fmt
....@@ -70,7 +66,7 @@
7066 #include <linux/types.h>
7167 #include <linux/kernel.h>
7268 #include <linux/mm.h>
73
-#include <linux/bootmem.h>
69
+#include <linux/memblock.h>
7470 #include <linux/string.h>
7571 #include <linux/socket.h>
7672 #include <linux/sockios.h>
....@@ -100,6 +96,7 @@
10096 #include <net/inetpeer.h>
10197 #include <net/sock.h>
10298 #include <net/ip_fib.h>
99
+#include <net/nexthop.h>
103100 #include <net/arp.h>
104101 #include <net/tcp.h>
105102 #include <net/icmp.h>
....@@ -241,11 +238,11 @@
241238 return seq_open(file, &rt_cache_seq_ops);
242239 }
243240
244
-static const struct file_operations rt_cache_seq_fops = {
245
- .open = rt_cache_seq_open,
246
- .read = seq_read,
247
- .llseek = seq_lseek,
248
- .release = seq_release,
241
+static const struct proc_ops rt_cache_proc_ops = {
242
+ .proc_open = rt_cache_seq_open,
243
+ .proc_read = seq_read,
244
+ .proc_lseek = seq_lseek,
245
+ .proc_release = seq_release,
249246 };
250247
251248
....@@ -332,11 +329,11 @@
332329 return seq_open(file, &rt_cpu_seq_ops);
333330 }
334331
335
-static const struct file_operations rt_cpu_seq_fops = {
336
- .open = rt_cpu_seq_open,
337
- .read = seq_read,
338
- .llseek = seq_lseek,
339
- .release = seq_release,
332
+static const struct proc_ops rt_cpu_proc_ops = {
333
+ .proc_open = rt_cpu_seq_open,
334
+ .proc_read = seq_read,
335
+ .proc_lseek = seq_lseek,
336
+ .proc_release = seq_release,
340337 };
341338
342339 #ifdef CONFIG_IP_ROUTE_CLASSID
....@@ -370,12 +367,12 @@
370367 struct proc_dir_entry *pde;
371368
372369 pde = proc_create("rt_cache", 0444, net->proc_net,
373
- &rt_cache_seq_fops);
370
+ &rt_cache_proc_ops);
374371 if (!pde)
375372 goto err1;
376373
377374 pde = proc_create("rt_cache", 0444,
378
- net->proc_net_stat, &rt_cpu_seq_fops);
375
+ net->proc_net_stat, &rt_cpu_proc_ops);
379376 if (!pde)
380377 goto err2;
381378
....@@ -437,37 +434,46 @@
437434 struct sk_buff *skb,
438435 const void *daddr)
439436 {
437
+ const struct rtable *rt = container_of(dst, struct rtable, dst);
440438 struct net_device *dev = dst->dev;
441
- const __be32 *pkey = daddr;
442
- const struct rtable *rt;
443439 struct neighbour *n;
444440
445
- rt = (const struct rtable *) dst;
446
- if (rt->rt_gateway)
447
- pkey = (const __be32 *) &rt->rt_gateway;
448
- else if (skb)
449
- pkey = &ip_hdr(skb)->daddr;
441
+ rcu_read_lock_bh();
450442
451
- n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
452
- if (n)
453
- return n;
454
- return neigh_create(&arp_tbl, pkey, dev);
443
+ if (likely(rt->rt_gw_family == AF_INET)) {
444
+ n = ip_neigh_gw4(dev, rt->rt_gw4);
445
+ } else if (rt->rt_gw_family == AF_INET6) {
446
+ n = ip_neigh_gw6(dev, &rt->rt_gw6);
447
+ } else {
448
+ __be32 pkey;
449
+
450
+ pkey = skb ? ip_hdr(skb)->daddr : *((__be32 *) daddr);
451
+ n = ip_neigh_gw4(dev, pkey);
452
+ }
453
+
454
+ if (!IS_ERR(n) && !refcount_inc_not_zero(&n->refcnt))
455
+ n = NULL;
456
+
457
+ rcu_read_unlock_bh();
458
+
459
+ return n;
455460 }
456461
457462 static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr)
458463 {
464
+ const struct rtable *rt = container_of(dst, struct rtable, dst);
459465 struct net_device *dev = dst->dev;
460466 const __be32 *pkey = daddr;
461
- const struct rtable *rt;
462467
463
- rt = (const struct rtable *)dst;
464
- if (rt->rt_gateway)
465
- pkey = (const __be32 *)&rt->rt_gateway;
466
- else if (!daddr ||
468
+ if (rt->rt_gw_family == AF_INET) {
469
+ pkey = (const __be32 *)&rt->rt_gw4;
470
+ } else if (rt->rt_gw_family == AF_INET6) {
471
+ return __ipv6_confirm_neigh_stub(dev, &rt->rt_gw6);
472
+ } else if (!daddr ||
467473 (rt->rt_flags &
468
- (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL)))
474
+ (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL))) {
469475 return;
470
-
476
+ }
471477 __ipv4_confirm_neigh(dev, *(__force u32 *)pkey);
472478 }
473479
....@@ -522,6 +528,15 @@
522528 iph->id = htons(id);
523529 }
524530 EXPORT_SYMBOL(__ip_select_ident);
531
+
532
+static void ip_rt_fix_tos(struct flowi4 *fl4)
533
+{
534
+ __u8 tos = RT_FL_TOS(fl4);
535
+
536
+ fl4->flowi4_tos = tos & IPTOS_RT_MASK;
537
+ fl4->flowi4_scope = tos & RTO_ONLINK ?
538
+ RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
539
+}
525540
526541 static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
527542 const struct sock *sk,
....@@ -643,13 +658,15 @@
643658
644659 if (fnhe->fnhe_gw) {
645660 rt->rt_flags |= RTCF_REDIRECTED;
646
- rt->rt_gateway = fnhe->fnhe_gw;
647661 rt->rt_uses_gateway = 1;
662
+ rt->rt_gw_family = AF_INET;
663
+ rt->rt_gw4 = fnhe->fnhe_gw;
648664 }
649665 }
650666
651
-static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
652
- u32 pmtu, bool lock, unsigned long expires)
667
+static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr,
668
+ __be32 gw, u32 pmtu, bool lock,
669
+ unsigned long expires)
653670 {
654671 struct fnhe_hash_bucket *hash;
655672 struct fib_nh_exception *fnhe;
....@@ -658,17 +675,17 @@
658675 unsigned int i;
659676 int depth;
660677
661
- genid = fnhe_genid(dev_net(nh->nh_dev));
678
+ genid = fnhe_genid(dev_net(nhc->nhc_dev));
662679 hval = fnhe_hashfun(daddr);
663680
664681 spin_lock_bh(&fnhe_lock);
665682
666
- hash = rcu_dereference(nh->nh_exceptions);
683
+ hash = rcu_dereference(nhc->nhc_exceptions);
667684 if (!hash) {
668685 hash = kcalloc(FNHE_HASH_SIZE, sizeof(*hash), GFP_ATOMIC);
669686 if (!hash)
670687 goto out_unlock;
671
- rcu_assign_pointer(nh->nh_exceptions, hash);
688
+ rcu_assign_pointer(nhc->nhc_exceptions, hash);
672689 }
673690
674691 hash += hval;
....@@ -727,13 +744,13 @@
727744 * stale, so anyone caching it rechecks if this exception
728745 * applies to them.
729746 */
730
- rt = rcu_dereference(nh->nh_rth_input);
747
+ rt = rcu_dereference(nhc->nhc_rth_input);
731748 if (rt)
732749 rt->dst.obsolete = DST_OBSOLETE_KILL;
733750
734751 for_each_possible_cpu(i) {
735752 struct rtable __rcu **prt;
736
- prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i);
753
+ prt = per_cpu_ptr(nhc->nhc_pcpu_rth_output, i);
737754 rt = rcu_dereference(*prt);
738755 if (rt)
739756 rt->dst.obsolete = DST_OBSOLETE_KILL;
....@@ -768,7 +785,7 @@
768785 return;
769786 }
770787
771
- if (rt->rt_gateway != old_gw)
788
+ if (rt->rt_gw_family != AF_INET || rt->rt_gw4 != old_gw)
772789 return;
773790
774791 in_dev = __in_dev_get_rcu(dev);
....@@ -799,11 +816,11 @@
799816 neigh_event_send(n, NULL);
800817 } else {
801818 if (fib_lookup(net, fl4, &res, 0) == 0) {
802
- struct fib_nh *nh;
819
+ struct fib_nh_common *nhc;
803820
804821 fib_select_path(net, &res, fl4, skb);
805
- nh = &FIB_RES_NH(res);
806
- update_or_create_fnhe(nh, fl4->daddr, new_gw,
822
+ nhc = FIB_RES_NHC(res);
823
+ update_or_create_fnhe(nhc, fl4->daddr, new_gw,
807824 0, false,
808825 jiffies + ip_rt_gc_timeout);
809826 }
....@@ -845,6 +862,7 @@
845862 rt = (struct rtable *) dst;
846863
847864 __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0);
865
+ ip_rt_fix_tos(&fl4);
848866 __ip_do_redirect(rt, skb, &fl4, true);
849867 }
850868
....@@ -1029,13 +1047,14 @@
10291047 {
10301048 struct dst_entry *dst = &rt->dst;
10311049 struct net *net = dev_net(dst->dev);
1032
- u32 old_mtu = ipv4_mtu(dst);
10331050 struct fib_result res;
10341051 bool lock = false;
1052
+ u32 old_mtu;
10351053
10361054 if (ip_mtu_locked(dst))
10371055 return;
10381056
1057
+ old_mtu = ipv4_mtu(dst);
10391058 if (old_mtu < mtu)
10401059 return;
10411060
....@@ -1050,11 +1069,11 @@
10501069
10511070 rcu_read_lock();
10521071 if (fib_lookup(net, fl4, &res, 0) == 0) {
1053
- struct fib_nh *nh;
1072
+ struct fib_nh_common *nhc;
10541073
10551074 fib_select_path(net, &res, fl4, NULL);
1056
- nh = &FIB_RES_NH(res);
1057
- update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock,
1075
+ nhc = FIB_RES_NHC(res);
1076
+ update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock,
10581077 jiffies + ip_rt_mtu_expires);
10591078 }
10601079 rcu_read_unlock();
....@@ -1068,21 +1087,25 @@
10681087 struct flowi4 fl4;
10691088
10701089 ip_rt_build_flow_key(&fl4, sk, skb);
1090
+ ip_rt_fix_tos(&fl4);
1091
+
1092
+ /* Don't make lookup fail for bridged encapsulations */
1093
+ if (skb && netif_is_any_bridge_port(skb->dev))
1094
+ fl4.flowi4_oif = 0;
1095
+
10711096 __ip_rt_update_pmtu(rt, &fl4, mtu);
10721097 }
10731098
10741099 void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
1075
- int oif, u32 mark, u8 protocol, int flow_flags)
1100
+ int oif, u8 protocol)
10761101 {
1077
- const struct iphdr *iph = (const struct iphdr *) skb->data;
1102
+ const struct iphdr *iph = (const struct iphdr *)skb->data;
10781103 struct flowi4 fl4;
10791104 struct rtable *rt;
1080
-
1081
- if (!mark)
1082
- mark = IP4_REPLY_MARK(net, skb->mark);
1105
+ u32 mark = IP4_REPLY_MARK(net, skb->mark);
10831106
10841107 __build_flow_key(net, &fl4, NULL, iph, oif,
1085
- RT_TOS(iph->tos), protocol, mark, flow_flags);
1108
+ RT_TOS(iph->tos), protocol, mark, 0);
10861109 rt = __ip_route_output_key(net, &fl4);
10871110 if (!IS_ERR(rt)) {
10881111 __ip_rt_update_pmtu(rt, &fl4, mtu);
....@@ -1093,7 +1116,7 @@
10931116
10941117 static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
10951118 {
1096
- const struct iphdr *iph = (const struct iphdr *) skb->data;
1119
+ const struct iphdr *iph = (const struct iphdr *)skb->data;
10971120 struct flowi4 fl4;
10981121 struct rtable *rt;
10991122
....@@ -1111,7 +1134,7 @@
11111134
11121135 void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
11131136 {
1114
- const struct iphdr *iph = (const struct iphdr *) skb->data;
1137
+ const struct iphdr *iph = (const struct iphdr *)skb->data;
11151138 struct flowi4 fl4;
11161139 struct rtable *rt;
11171140 struct dst_entry *odst = NULL;
....@@ -1139,9 +1162,11 @@
11391162 goto out;
11401163
11411164 new = true;
1165
+ } else {
1166
+ ip_rt_fix_tos(&fl4);
11421167 }
11431168
1144
- __ip_rt_update_pmtu((struct rtable *) xfrm_dst_path(&rt->dst), &fl4, mtu);
1169
+ __ip_rt_update_pmtu((struct rtable *)xfrm_dst_path(&rt->dst), &fl4, mtu);
11451170
11461171 if (!dst_check(&rt->dst, 0)) {
11471172 if (new)
....@@ -1164,14 +1189,14 @@
11641189 EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
11651190
11661191 void ipv4_redirect(struct sk_buff *skb, struct net *net,
1167
- int oif, u32 mark, u8 protocol, int flow_flags)
1192
+ int oif, u8 protocol)
11681193 {
1169
- const struct iphdr *iph = (const struct iphdr *) skb->data;
1194
+ const struct iphdr *iph = (const struct iphdr *)skb->data;
11701195 struct flowi4 fl4;
11711196 struct rtable *rt;
11721197
11731198 __build_flow_key(net, &fl4, NULL, iph, oif,
1174
- RT_TOS(iph->tos), protocol, mark, flow_flags);
1199
+ RT_TOS(iph->tos), protocol, 0, 0);
11751200 rt = __ip_route_output_key(net, &fl4);
11761201 if (!IS_ERR(rt)) {
11771202 __ip_do_redirect(rt, skb, &fl4, false);
....@@ -1182,7 +1207,7 @@
11821207
11831208 void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
11841209 {
1185
- const struct iphdr *iph = (const struct iphdr *) skb->data;
1210
+ const struct iphdr *iph = (const struct iphdr *)skb->data;
11861211 struct flowi4 fl4;
11871212 struct rtable *rt;
11881213 struct net *net = sock_net(sk);
....@@ -1206,7 +1231,7 @@
12061231 *
12071232 * When a PMTU/redirect information update invalidates a route,
12081233 * this is indicated by setting obsolete to DST_OBSOLETE_KILL or
1209
- * DST_OBSOLETE_DEAD by dst_free().
1234
+ * DST_OBSOLETE_DEAD.
12101235 */
12111236 if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt))
12121237 return NULL;
....@@ -1279,22 +1304,19 @@
12791304 src = ip_hdr(skb)->saddr;
12801305 else {
12811306 struct fib_result res;
1282
- struct flowi4 fl4;
1283
- struct iphdr *iph;
1284
-
1285
- iph = ip_hdr(skb);
1286
-
1287
- memset(&fl4, 0, sizeof(fl4));
1288
- fl4.daddr = iph->daddr;
1289
- fl4.saddr = iph->saddr;
1290
- fl4.flowi4_tos = RT_TOS(iph->tos);
1291
- fl4.flowi4_oif = rt->dst.dev->ifindex;
1292
- fl4.flowi4_iif = skb->dev->ifindex;
1293
- fl4.flowi4_mark = skb->mark;
1307
+ struct iphdr *iph = ip_hdr(skb);
1308
+ struct flowi4 fl4 = {
1309
+ .daddr = iph->daddr,
1310
+ .saddr = iph->saddr,
1311
+ .flowi4_tos = RT_TOS(iph->tos),
1312
+ .flowi4_oif = rt->dst.dev->ifindex,
1313
+ .flowi4_iif = skb->dev->ifindex,
1314
+ .flowi4_mark = skb->mark,
1315
+ };
12941316
12951317 rcu_read_lock();
12961318 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0)
1297
- src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
1319
+ src = fib_result_prefsrc(dev_net(rt->dst.dev), &res);
12981320 else
12991321 src = inet_select_addr(rt->dst.dev,
13001322 rt_nexthop(rt, iph->daddr),
....@@ -1325,7 +1347,7 @@
13251347
13261348 static unsigned int ipv4_mtu(const struct dst_entry *dst)
13271349 {
1328
- const struct rtable *rt = (const struct rtable *) dst;
1350
+ const struct rtable *rt = (const struct rtable *)dst;
13291351 unsigned int mtu = rt->rt_pmtu;
13301352
13311353 if (!mtu || time_after_eq(jiffies, rt->dst.expires))
....@@ -1347,7 +1369,7 @@
13471369 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
13481370 }
13491371
1350
-static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
1372
+static void ip_del_fnhe(struct fib_nh_common *nhc, __be32 daddr)
13511373 {
13521374 struct fnhe_hash_bucket *hash;
13531375 struct fib_nh_exception *fnhe, __rcu **fnhe_p;
....@@ -1355,7 +1377,7 @@
13551377
13561378 spin_lock_bh(&fnhe_lock);
13571379
1358
- hash = rcu_dereference_protected(nh->nh_exceptions,
1380
+ hash = rcu_dereference_protected(nhc->nhc_exceptions,
13591381 lockdep_is_held(&fnhe_lock));
13601382 hash += hval;
13611383
....@@ -1381,9 +1403,10 @@
13811403 spin_unlock_bh(&fnhe_lock);
13821404 }
13831405
1384
-static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
1406
+static struct fib_nh_exception *find_exception(struct fib_nh_common *nhc,
1407
+ __be32 daddr)
13851408 {
1386
- struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions);
1409
+ struct fnhe_hash_bucket *hash = rcu_dereference(nhc->nhc_exceptions);
13871410 struct fib_nh_exception *fnhe;
13881411 u32 hval;
13891412
....@@ -1397,7 +1420,7 @@
13971420 if (fnhe->fnhe_daddr == daddr) {
13981421 if (fnhe->fnhe_expires &&
13991422 time_after(jiffies, fnhe->fnhe_expires)) {
1400
- ip_del_fnhe(nh, daddr);
1423
+ ip_del_fnhe(nhc, daddr);
14011424 break;
14021425 }
14031426 return fnhe;
....@@ -1414,19 +1437,19 @@
14141437
14151438 u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
14161439 {
1440
+ struct fib_nh_common *nhc = res->nhc;
1441
+ struct net_device *dev = nhc->nhc_dev;
14171442 struct fib_info *fi = res->fi;
1418
- struct fib_nh *nh = &fi->fib_nh[res->nh_sel];
1419
- struct net_device *dev = nh->nh_dev;
14201443 u32 mtu = 0;
14211444
1422
- if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu ||
1445
+ if (READ_ONCE(dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu) ||
14231446 fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU))
14241447 mtu = fi->fib_mtu;
14251448
14261449 if (likely(!mtu)) {
14271450 struct fib_nh_exception *fnhe;
14281451
1429
- fnhe = find_exception(nh, daddr);
1452
+ fnhe = find_exception(nhc, daddr);
14301453 if (fnhe && !time_after_eq(jiffies, fnhe->fnhe_expires))
14311454 mtu = fnhe->fnhe_pmtu;
14321455 }
....@@ -1434,7 +1457,7 @@
14341457 if (likely(!mtu))
14351458 mtu = min(READ_ONCE(dev->mtu), IP_MAX_MTU);
14361459
1437
- return mtu - lwtunnel_headroom(nh->nh_lwtstate, mtu);
1460
+ return mtu - lwtunnel_headroom(nhc->nhc_lwtstate, mtu);
14381461 }
14391462
14401463 static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
....@@ -1465,8 +1488,10 @@
14651488 orig = NULL;
14661489 }
14671490 fill_route_from_fnhe(rt, fnhe);
1468
- if (!rt->rt_gateway)
1469
- rt->rt_gateway = daddr;
1491
+ if (!rt->rt_gw4) {
1492
+ rt->rt_gw4 = daddr;
1493
+ rt->rt_gw_family = AF_INET;
1494
+ }
14701495
14711496 if (do_cache) {
14721497 dst_hold(&rt->dst);
....@@ -1485,15 +1510,15 @@
14851510 return ret;
14861511 }
14871512
1488
-static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
1513
+static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt)
14891514 {
14901515 struct rtable *orig, *prev, **p;
14911516 bool ret = true;
14921517
14931518 if (rt_is_input_route(rt)) {
1494
- p = (struct rtable **)&nh->nh_rth_input;
1519
+ p = (struct rtable **)&nhc->nhc_rth_input;
14951520 } else {
1496
- p = (struct rtable **)raw_cpu_ptr(nh->nh_pcpu_rth_output);
1521
+ p = (struct rtable **)raw_cpu_ptr(nhc->nhc_pcpu_rth_output);
14971522 }
14981523 orig = *p;
14991524
....@@ -1546,18 +1571,14 @@
15461571
15471572 static void ipv4_dst_destroy(struct dst_entry *dst)
15481573 {
1549
- struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
15501574 struct rtable *rt = (struct rtable *)dst;
15511575
1552
- if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
1553
- kfree(p);
1554
-
1576
+ ip_dst_metrics_put(dst);
15551577 rt_del_uncached_list(rt);
15561578 }
15571579
15581580 void rt_flush_dev(struct net_device *dev)
15591581 {
1560
- struct net *net = dev_net(dev);
15611582 struct rtable *rt;
15621583 int cpu;
15631584
....@@ -1568,7 +1589,7 @@
15681589 list_for_each_entry(rt, &ul->head, rt_uncached) {
15691590 if (rt->dst.dev != dev)
15701591 continue;
1571
- rt->dst.dev = net->loopback_dev;
1592
+ rt->dst.dev = blackhole_netdev;
15721593 dev_hold(rt->dst.dev);
15731594 dev_put(dev);
15741595 }
....@@ -1592,33 +1613,43 @@
15921613 bool cached = false;
15931614
15941615 if (fi) {
1595
- struct fib_nh *nh = &FIB_RES_NH(*res);
1616
+ struct fib_nh_common *nhc = FIB_RES_NHC(*res);
15961617
1597
- if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) {
1598
- rt->rt_gateway = nh->nh_gw;
1618
+ if (nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK) {
15991619 rt->rt_uses_gateway = 1;
1620
+ rt->rt_gw_family = nhc->nhc_gw_family;
1621
+ /* only INET and INET6 are supported */
1622
+ if (likely(nhc->nhc_gw_family == AF_INET))
1623
+ rt->rt_gw4 = nhc->nhc_gw.ipv4;
1624
+ else
1625
+ rt->rt_gw6 = nhc->nhc_gw.ipv6;
16001626 }
1601
- dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true);
1602
- if (fi->fib_metrics != &dst_default_metrics) {
1603
- rt->dst._metrics |= DST_METRICS_REFCOUNTED;
1604
- refcount_inc(&fi->fib_metrics->refcnt);
1605
- }
1627
+
1628
+ ip_dst_init_metrics(&rt->dst, fi->fib_metrics);
1629
+
16061630 #ifdef CONFIG_IP_ROUTE_CLASSID
1607
- rt->dst.tclassid = nh->nh_tclassid;
1631
+ if (nhc->nhc_family == AF_INET) {
1632
+ struct fib_nh *nh;
1633
+
1634
+ nh = container_of(nhc, struct fib_nh, nh_common);
1635
+ rt->dst.tclassid = nh->nh_tclassid;
1636
+ }
16081637 #endif
1609
- rt->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
1638
+ rt->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate);
16101639 if (unlikely(fnhe))
16111640 cached = rt_bind_exception(rt, fnhe, daddr, do_cache);
16121641 else if (do_cache)
1613
- cached = rt_cache_route(nh, rt);
1642
+ cached = rt_cache_route(nhc, rt);
16141643 if (unlikely(!cached)) {
16151644 /* Routes we intend to cache in nexthop exception or
16161645 * FIB nexthop have the DST_NOCACHE bit clear.
16171646 * However, if we are unsuccessful at storing this
16181647 * route into the cache we really need to set it.
16191648 */
1620
- if (!rt->rt_gateway)
1621
- rt->rt_gateway = daddr;
1649
+ if (!rt->rt_gw4) {
1650
+ rt->rt_gw_family = AF_INET;
1651
+ rt->rt_gw4 = daddr;
1652
+ }
16221653 rt_add_uncached_list(rt);
16231654 }
16241655 } else
....@@ -1634,12 +1665,11 @@
16341665
16351666 struct rtable *rt_dst_alloc(struct net_device *dev,
16361667 unsigned int flags, u16 type,
1637
- bool nopolicy, bool noxfrm, bool will_cache)
1668
+ bool nopolicy, bool noxfrm)
16381669 {
16391670 struct rtable *rt;
16401671
16411672 rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
1642
- (will_cache ? 0 : DST_HOST) |
16431673 (nopolicy ? DST_NOPOLICY : 0) |
16441674 (noxfrm ? DST_NOXFRM : 0));
16451675
....@@ -1651,8 +1681,9 @@
16511681 rt->rt_iif = 0;
16521682 rt->rt_pmtu = 0;
16531683 rt->rt_mtu_locked = 0;
1654
- rt->rt_gateway = 0;
16551684 rt->rt_uses_gateway = 0;
1685
+ rt->rt_gw_family = 0;
1686
+ rt->rt_gw4 = 0;
16561687 INIT_LIST_HEAD(&rt->rt_uncached);
16571688
16581689 rt->dst.output = ip_output;
....@@ -1663,6 +1694,38 @@
16631694 return rt;
16641695 }
16651696 EXPORT_SYMBOL(rt_dst_alloc);
1697
+
1698
+struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt)
1699
+{
1700
+ struct rtable *new_rt;
1701
+
1702
+ new_rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
1703
+ rt->dst.flags);
1704
+
1705
+ if (new_rt) {
1706
+ new_rt->rt_genid = rt_genid_ipv4(dev_net(dev));
1707
+ new_rt->rt_flags = rt->rt_flags;
1708
+ new_rt->rt_type = rt->rt_type;
1709
+ new_rt->rt_is_input = rt->rt_is_input;
1710
+ new_rt->rt_iif = rt->rt_iif;
1711
+ new_rt->rt_pmtu = rt->rt_pmtu;
1712
+ new_rt->rt_mtu_locked = rt->rt_mtu_locked;
1713
+ new_rt->rt_gw_family = rt->rt_gw_family;
1714
+ if (rt->rt_gw_family == AF_INET)
1715
+ new_rt->rt_gw4 = rt->rt_gw4;
1716
+ else if (rt->rt_gw_family == AF_INET6)
1717
+ new_rt->rt_gw6 = rt->rt_gw6;
1718
+ INIT_LIST_HEAD(&new_rt->rt_uncached);
1719
+
1720
+ new_rt->dst.input = rt->dst.input;
1721
+ new_rt->dst.output = rt->dst.output;
1722
+ new_rt->dst.error = rt->dst.error;
1723
+ new_rt->dst.lastuse = jiffies;
1724
+ new_rt->dst.lwtstate = lwtstate_get(rt->dst.lwtstate);
1725
+ }
1726
+ return new_rt;
1727
+}
1728
+EXPORT_SYMBOL(rt_dst_clone);
16661729
16671730 /* called in rcu_read_lock() section */
16681731 int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
....@@ -1683,7 +1746,8 @@
16831746 return -EINVAL;
16841747
16851748 if (ipv4_is_zeronet(saddr)) {
1686
- if (!ipv4_is_local_multicast(daddr))
1749
+ if (!ipv4_is_local_multicast(daddr) &&
1750
+ ip_hdr(skb)->protocol != IPPROTO_IGMP)
16871751 return -EINVAL;
16881752 } else {
16891753 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
....@@ -1712,7 +1776,7 @@
17121776 flags |= RTCF_LOCAL;
17131777
17141778 rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
1715
- IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
1779
+ IN_DEV_ORCONF(in_dev, NOPOLICY), false);
17161780 if (!rth)
17171781 return -ENOBUFS;
17181782
....@@ -1728,6 +1792,7 @@
17281792 #endif
17291793 RT_CACHE_STAT_INC(in_slow_mc);
17301794
1795
+ skb_dst_drop(skb);
17311796 skb_dst_set(skb, &rth->dst);
17321797 return 0;
17331798 }
....@@ -1752,7 +1817,7 @@
17521817 print_hex_dump(KERN_WARNING, "ll header: ",
17531818 DUMP_PREFIX_OFFSET, 16, 1,
17541819 skb_mac_header(skb),
1755
- dev->hard_header_len, true);
1820
+ dev->hard_header_len, false);
17561821 }
17571822 }
17581823 #endif
....@@ -1764,6 +1829,8 @@
17641829 struct in_device *in_dev,
17651830 __be32 daddr, __be32 saddr, u32 tos)
17661831 {
1832
+ struct fib_nh_common *nhc = FIB_RES_NHC(*res);
1833
+ struct net_device *dev = nhc->nhc_dev;
17671834 struct fib_nh_exception *fnhe;
17681835 struct rtable *rth;
17691836 int err;
....@@ -1772,7 +1839,7 @@
17721839 u32 itag = 0;
17731840
17741841 /* get a working reference to the output device */
1775
- out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
1842
+ out_dev = __in_dev_get_rcu(dev);
17761843 if (!out_dev) {
17771844 net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
17781845 return -EINVAL;
....@@ -1789,10 +1856,14 @@
17891856
17901857 do_cache = res->fi && !itag;
17911858 if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
1792
- skb->protocol == htons(ETH_P_IP) &&
1793
- (IN_DEV_SHARED_MEDIA(out_dev) ||
1794
- inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1795
- IPCB(skb)->flags |= IPSKB_DOREDIRECT;
1859
+ skb->protocol == htons(ETH_P_IP)) {
1860
+ __be32 gw;
1861
+
1862
+ gw = nhc->nhc_gw_family == AF_INET ? nhc->nhc_gw.ipv4 : 0;
1863
+ if (IN_DEV_SHARED_MEDIA(out_dev) ||
1864
+ inet_addr_onlink(out_dev, saddr, gw))
1865
+ IPCB(skb)->flags |= IPSKB_DOREDIRECT;
1866
+ }
17961867
17971868 if (skb->protocol != htons(ETH_P_IP)) {
17981869 /* Not IP (i.e. ARP). Do not create route, if it is
....@@ -1809,12 +1880,12 @@
18091880 }
18101881 }
18111882
1812
- fnhe = find_exception(&FIB_RES_NH(*res), daddr);
1883
+ fnhe = find_exception(nhc, daddr);
18131884 if (do_cache) {
18141885 if (fnhe)
18151886 rth = rcu_dereference(fnhe->fnhe_rth_input);
18161887 else
1817
- rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
1888
+ rth = rcu_dereference(nhc->nhc_rth_input);
18181889 if (rt_cache_valid(rth)) {
18191890 skb_dst_set_noref(skb, &rth->dst);
18201891 goto out;
....@@ -1822,8 +1893,8 @@
18221893 }
18231894
18241895 rth = rt_dst_alloc(out_dev->dev, 0, res->type,
1825
- IN_DEV_CONF_GET(in_dev, NOPOLICY),
1826
- IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
1896
+ IN_DEV_ORCONF(in_dev, NOPOLICY),
1897
+ IN_DEV_ORCONF(out_dev, NOXFRM));
18271898 if (!rth) {
18281899 err = -ENOBUFS;
18291900 goto cleanup;
....@@ -1869,10 +1940,7 @@
18691940 if (!icmph)
18701941 goto out;
18711942
1872
- if (icmph->type != ICMP_DEST_UNREACH &&
1873
- icmph->type != ICMP_REDIRECT &&
1874
- icmph->type != ICMP_TIME_EXCEEDED &&
1875
- icmph->type != ICMP_PARAMETERPROB)
1943
+ if (!icmp_is_err(icmph->type))
18761944 goto out;
18771945
18781946 inner_iph = skb_header_pointer(skb,
....@@ -1891,6 +1959,7 @@
18911959 int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
18921960 const struct sk_buff *skb, struct flow_keys *flkeys)
18931961 {
1962
+ u32 multipath_hash = fl4 ? fl4->flowi4_multipath_hash : 0;
18941963 struct flow_keys hash_keys;
18951964 u32 mhash;
18961965
....@@ -1938,8 +2007,41 @@
19382007 hash_keys.basic.ip_proto = fl4->flowi4_proto;
19392008 }
19402009 break;
2010
+ case 2:
2011
+ memset(&hash_keys, 0, sizeof(hash_keys));
2012
+ /* skb is currently provided only when forwarding */
2013
+ if (skb) {
2014
+ struct flow_keys keys;
2015
+
2016
+ skb_flow_dissect_flow_keys(skb, &keys, 0);
2017
+ /* Inner can be v4 or v6 */
2018
+ if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
2019
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
2020
+ hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
2021
+ hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
2022
+ } else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
2023
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2024
+ hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
2025
+ hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst;
2026
+ hash_keys.tags.flow_label = keys.tags.flow_label;
2027
+ hash_keys.basic.ip_proto = keys.basic.ip_proto;
2028
+ } else {
2029
+ /* Same as case 0 */
2030
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
2031
+ ip_multipath_l3_keys(skb, &hash_keys);
2032
+ }
2033
+ } else {
2034
+ /* Same as case 0 */
2035
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
2036
+ hash_keys.addrs.v4addrs.src = fl4->saddr;
2037
+ hash_keys.addrs.v4addrs.dst = fl4->daddr;
2038
+ }
2039
+ break;
19412040 }
19422041 mhash = flow_hash_from_keys(&hash_keys);
2042
+
2043
+ if (multipath_hash)
2044
+ mhash = jhash_2words(mhash, multipath_hash, 0);
19432045
19442046 return mhash >> 1;
19452047 }
....@@ -1952,7 +2054,7 @@
19522054 struct flow_keys *hkeys)
19532055 {
19542056 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1955
- if (res->fi && res->fi->fib_nhs > 1) {
2057
+ if (res->fi && fib_info_num_path(res->fi) > 1) {
19562058 int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
19572059
19582060 fib_select_multipath(res, h);
....@@ -1963,10 +2065,65 @@
19632065 return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
19642066 }
19652067
2068
+/* Implements all the saddr-related checks as ip_route_input_slow(),
2069
+ * assuming daddr is valid and the destination is not a local broadcast one.
2070
+ * Uses the provided hint instead of performing a route lookup.
2071
+ */
2072
+int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2073
+ u8 tos, struct net_device *dev,
2074
+ const struct sk_buff *hint)
2075
+{
2076
+ struct in_device *in_dev = __in_dev_get_rcu(dev);
2077
+ struct rtable *rt = skb_rtable(hint);
2078
+ struct net *net = dev_net(dev);
2079
+ int err = -EINVAL;
2080
+ u32 tag = 0;
2081
+
2082
+ if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
2083
+ goto martian_source;
2084
+
2085
+ if (ipv4_is_zeronet(saddr))
2086
+ goto martian_source;
2087
+
2088
+ if (ipv4_is_loopback(saddr) && !IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
2089
+ goto martian_source;
2090
+
2091
+ if (rt->rt_type != RTN_LOCAL)
2092
+ goto skip_validate_source;
2093
+
2094
+ tos &= IPTOS_RT_MASK;
2095
+ err = fib_validate_source(skb, saddr, daddr, tos, 0, dev, in_dev, &tag);
2096
+ if (err < 0)
2097
+ goto martian_source;
2098
+
2099
+skip_validate_source:
2100
+ skb_dst_copy(skb, hint);
2101
+ return 0;
2102
+
2103
+martian_source:
2104
+ ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
2105
+ return err;
2106
+}
2107
+
2108
+/* get device for dst_alloc with local routes */
2109
+static struct net_device *ip_rt_get_dev(struct net *net,
2110
+ const struct fib_result *res)
2111
+{
2112
+ struct fib_nh_common *nhc = res->fi ? res->nhc : NULL;
2113
+ struct net_device *dev = NULL;
2114
+
2115
+ if (nhc)
2116
+ dev = l3mdev_master_dev_rcu(nhc->nhc_dev);
2117
+
2118
+ return dev ? : net->loopback_dev;
2119
+}
2120
+
19662121 /*
19672122 * NOTE. We drop all the packets that has local source
19682123 * addresses, because every properly looped back packet
19692124 * must have correct destination already attached by output routine.
2125
+ * Changes in the enforced policies must be applied also to
2126
+ * ip_route_use_hint().
19702127 *
19712128 * Such approach solves two big problems:
19722129 * 1. Not simplex devices are handled properly.
....@@ -2045,6 +2202,7 @@
20452202 fl4.daddr = daddr;
20462203 fl4.saddr = saddr;
20472204 fl4.flowi4_uid = sock_net_uid(net, NULL);
2205
+ fl4.flowi4_multipath_hash = 0;
20482206
20492207 if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) {
20502208 flkeys = &_flkeys;
....@@ -2106,7 +2264,9 @@
21062264 local_input:
21072265 do_cache &= res->fi && !itag;
21082266 if (do_cache) {
2109
- rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
2267
+ struct fib_nh_common *nhc = FIB_RES_NHC(*res);
2268
+
2269
+ rth = rcu_dereference(nhc->nhc_rth_input);
21102270 if (rt_cache_valid(rth)) {
21112271 skb_dst_set_noref(skb, &rth->dst);
21122272 err = 0;
....@@ -2114,9 +2274,9 @@
21142274 }
21152275 }
21162276
2117
- rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev,
2277
+ rth = rt_dst_alloc(ip_rt_get_dev(net, res),
21182278 flags | RTCF_LOCAL, res->type,
2119
- IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
2279
+ IN_DEV_ORCONF(in_dev, NOPOLICY), false);
21202280 if (!rth)
21212281 goto e_nobufs;
21222282
....@@ -2134,16 +2294,16 @@
21342294 }
21352295
21362296 if (do_cache) {
2137
- struct fib_nh *nh = &FIB_RES_NH(*res);
2297
+ struct fib_nh_common *nhc = FIB_RES_NHC(*res);
21382298
2139
- rth->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
2299
+ rth->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate);
21402300 if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
21412301 WARN_ON(rth->dst.input == lwtunnel_input);
21422302 rth->dst.lwtstate->orig_input = rth->dst.input;
21432303 rth->dst.input = lwtunnel_input;
21442304 }
21452305
2146
- if (unlikely(!rt_cache_route(nh, rth)))
2306
+ if (unlikely(!rt_cache_route(nhc, rth)))
21472307 rt_add_uncached_list(rth);
21482308 }
21492309 skb_dst_set(skb, &rth->dst);
....@@ -2314,10 +2474,10 @@
23142474 fnhe = NULL;
23152475 do_cache &= fi != NULL;
23162476 if (fi) {
2477
+ struct fib_nh_common *nhc = FIB_RES_NHC(*res);
23172478 struct rtable __rcu **prth;
2318
- struct fib_nh *nh = &FIB_RES_NH(*res);
23192479
2320
- fnhe = find_exception(nh, fl4->daddr);
2480
+ fnhe = find_exception(nhc, fl4->daddr);
23212481 if (!do_cache)
23222482 goto add;
23232483 if (fnhe) {
....@@ -2325,12 +2485,12 @@
23252485 } else {
23262486 if (unlikely(fl4->flowi4_flags &
23272487 FLOWI_FLAG_KNOWN_NH &&
2328
- !(nh->nh_gw &&
2329
- nh->nh_scope == RT_SCOPE_LINK))) {
2488
+ !(nhc->nhc_gw_family &&
2489
+ nhc->nhc_scope == RT_SCOPE_LINK))) {
23302490 do_cache = false;
23312491 goto add;
23322492 }
2333
- prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
2493
+ prth = raw_cpu_ptr(nhc->nhc_pcpu_rth_output);
23342494 }
23352495 rth = rcu_dereference(*prth);
23362496 if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst))
....@@ -2339,9 +2499,8 @@
23392499
23402500 add:
23412501 rth = rt_dst_alloc(dev_out, flags, type,
2342
- IN_DEV_CONF_GET(in_dev, NOPOLICY),
2343
- IN_DEV_CONF_GET(in_dev, NOXFRM),
2344
- do_cache);
2502
+ IN_DEV_ORCONF(in_dev, NOPOLICY),
2503
+ IN_DEV_ORCONF(in_dev, NOXFRM));
23452504 if (!rth)
23462505 return ERR_PTR(-ENOBUFS);
23472506
....@@ -2379,7 +2538,6 @@
23792538 struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
23802539 const struct sk_buff *skb)
23812540 {
2382
- __u8 tos = RT_FL_TOS(fl4);
23832541 struct fib_result res = {
23842542 .type = RTN_UNSPEC,
23852543 .fi = NULL,
....@@ -2389,9 +2547,7 @@
23892547 struct rtable *rth;
23902548
23912549 fl4->flowi4_iif = LOOPBACK_IFINDEX;
2392
- fl4->flowi4_tos = tos & IPTOS_RT_MASK;
2393
- fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
2394
- RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
2550
+ ip_rt_fix_tos(fl4);
23952551
23962552 rcu_read_lock();
23972553 rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb);
....@@ -2572,44 +2728,15 @@
25722728 return rth;
25732729 }
25742730
2575
-static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
2576
-{
2577
- return NULL;
2578
-}
2579
-
2580
-static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
2581
-{
2582
- unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2583
-
2584
- return mtu ? : dst->dev->mtu;
2585
-}
2586
-
2587
-static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2588
- struct sk_buff *skb, u32 mtu,
2589
- bool confirm_neigh)
2590
-{
2591
-}
2592
-
2593
-static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2594
- struct sk_buff *skb)
2595
-{
2596
-}
2597
-
2598
-static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
2599
- unsigned long old)
2600
-{
2601
- return NULL;
2602
-}
2603
-
26042731 static struct dst_ops ipv4_dst_blackhole_ops = {
2605
- .family = AF_INET,
2606
- .check = ipv4_blackhole_dst_check,
2607
- .mtu = ipv4_blackhole_mtu,
2608
- .default_advmss = ipv4_default_advmss,
2609
- .update_pmtu = ipv4_rt_blackhole_update_pmtu,
2610
- .redirect = ipv4_rt_blackhole_redirect,
2611
- .cow_metrics = ipv4_rt_blackhole_cow_metrics,
2612
- .neigh_lookup = ipv4_neigh_lookup,
2732
+ .family = AF_INET,
2733
+ .default_advmss = ipv4_default_advmss,
2734
+ .neigh_lookup = ipv4_neigh_lookup,
2735
+ .check = dst_blackhole_check,
2736
+ .cow_metrics = dst_blackhole_cow_metrics,
2737
+ .update_pmtu = dst_blackhole_update_pmtu,
2738
+ .redirect = dst_blackhole_redirect,
2739
+ .mtu = dst_blackhole_mtu,
26132740 };
26142741
26152742 struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
....@@ -2637,8 +2764,12 @@
26372764 rt->rt_genid = rt_genid_ipv4(net);
26382765 rt->rt_flags = ort->rt_flags;
26392766 rt->rt_type = ort->rt_type;
2640
- rt->rt_gateway = ort->rt_gateway;
26412767 rt->rt_uses_gateway = ort->rt_uses_gateway;
2768
+ rt->rt_gw_family = ort->rt_gw_family;
2769
+ if (rt->rt_gw_family == AF_INET)
2770
+ rt->rt_gw4 = ort->rt_gw4;
2771
+ else if (rt->rt_gw_family == AF_INET6)
2772
+ rt->rt_gw6 = ort->rt_gw6;
26422773
26432774 INIT_LIST_HEAD(&rt->rt_uncached);
26442775 }
....@@ -2667,10 +2798,59 @@
26672798 }
26682799 EXPORT_SYMBOL_GPL(ip_route_output_flow);
26692800
2801
+struct rtable *ip_route_output_tunnel(struct sk_buff *skb,
2802
+ struct net_device *dev,
2803
+ struct net *net, __be32 *saddr,
2804
+ const struct ip_tunnel_info *info,
2805
+ u8 protocol, bool use_cache)
2806
+{
2807
+#ifdef CONFIG_DST_CACHE
2808
+ struct dst_cache *dst_cache;
2809
+#endif
2810
+ struct rtable *rt = NULL;
2811
+ struct flowi4 fl4;
2812
+ __u8 tos;
2813
+
2814
+#ifdef CONFIG_DST_CACHE
2815
+ dst_cache = (struct dst_cache *)&info->dst_cache;
2816
+ if (use_cache) {
2817
+ rt = dst_cache_get_ip4(dst_cache, saddr);
2818
+ if (rt)
2819
+ return rt;
2820
+ }
2821
+#endif
2822
+ memset(&fl4, 0, sizeof(fl4));
2823
+ fl4.flowi4_mark = skb->mark;
2824
+ fl4.flowi4_proto = protocol;
2825
+ fl4.daddr = info->key.u.ipv4.dst;
2826
+ fl4.saddr = info->key.u.ipv4.src;
2827
+ tos = info->key.tos;
2828
+ fl4.flowi4_tos = RT_TOS(tos);
2829
+
2830
+ rt = ip_route_output_key(net, &fl4);
2831
+ if (IS_ERR(rt)) {
2832
+ netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr);
2833
+ return ERR_PTR(-ENETUNREACH);
2834
+ }
2835
+ if (rt->dst.dev == dev) { /* is this necessary? */
2836
+ netdev_dbg(dev, "circular route to %pI4\n", &fl4.daddr);
2837
+ ip_rt_put(rt);
2838
+ return ERR_PTR(-ELOOP);
2839
+ }
2840
+#ifdef CONFIG_DST_CACHE
2841
+ if (use_cache)
2842
+ dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr);
2843
+#endif
2844
+ *saddr = fl4.saddr;
2845
+ return rt;
2846
+}
2847
+EXPORT_SYMBOL_GPL(ip_route_output_tunnel);
2848
+
26702849 /* called with rcu_read_lock held */
26712850 static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
26722851 struct rtable *rt, u32 table_id, struct flowi4 *fl4,
2673
- struct sk_buff *skb, u32 portid, u32 seq)
2852
+ struct sk_buff *skb, u32 portid, u32 seq,
2853
+ unsigned int flags)
26742854 {
26752855 struct rtmsg *r;
26762856 struct nlmsghdr *nlh;
....@@ -2678,7 +2858,7 @@
26782858 u32 error;
26792859 u32 metrics[RTAX_MAX];
26802860
2681
- nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), 0);
2861
+ nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), flags);
26822862 if (!nlh)
26832863 return -EMSGSIZE;
26842864
....@@ -2686,7 +2866,7 @@
26862866 r->rtm_family = AF_INET;
26872867 r->rtm_dst_len = 32;
26882868 r->rtm_src_len = 0;
2689
- r->rtm_tos = fl4->flowi4_tos;
2869
+ r->rtm_tos = fl4 ? fl4->flowi4_tos : 0;
26902870 r->rtm_table = table_id < 256 ? table_id : RT_TABLE_COMPAT;
26912871 if (nla_put_u32(skb, RTA_TABLE, table_id))
26922872 goto nla_put_failure;
....@@ -2714,14 +2894,29 @@
27142894 nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
27152895 goto nla_put_failure;
27162896 #endif
2717
- if (!rt_is_input_route(rt) &&
2897
+ if (fl4 && !rt_is_input_route(rt) &&
27182898 fl4->saddr != src) {
27192899 if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr))
27202900 goto nla_put_failure;
27212901 }
2722
- if (rt->rt_uses_gateway &&
2723
- nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gateway))
2724
- goto nla_put_failure;
2902
+ if (rt->rt_uses_gateway) {
2903
+ if (rt->rt_gw_family == AF_INET &&
2904
+ nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gw4)) {
2905
+ goto nla_put_failure;
2906
+ } else if (rt->rt_gw_family == AF_INET6) {
2907
+ int alen = sizeof(struct in6_addr);
2908
+ struct nlattr *nla;
2909
+ struct rtvia *via;
2910
+
2911
+ nla = nla_reserve(skb, RTA_VIA, alen + 2);
2912
+ if (!nla)
2913
+ goto nla_put_failure;
2914
+
2915
+ via = nla_data(nla);
2916
+ via->rtvia_family = AF_INET6;
2917
+ memcpy(via->rtvia_addr, &rt->rt_gw6, alen);
2918
+ }
2919
+ }
27252920
27262921 expires = rt->dst.expires;
27272922 if (expires) {
....@@ -2741,35 +2936,39 @@
27412936 if (rtnetlink_put_metrics(skb, metrics) < 0)
27422937 goto nla_put_failure;
27432938
2744
- if (fl4->flowi4_mark &&
2745
- nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
2746
- goto nla_put_failure;
2939
+ if (fl4) {
2940
+ if (fl4->flowi4_mark &&
2941
+ nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
2942
+ goto nla_put_failure;
27472943
2748
- if (!uid_eq(fl4->flowi4_uid, INVALID_UID) &&
2749
- nla_put_u32(skb, RTA_UID,
2750
- from_kuid_munged(current_user_ns(), fl4->flowi4_uid)))
2751
- goto nla_put_failure;
2944
+ if (!uid_eq(fl4->flowi4_uid, INVALID_UID) &&
2945
+ nla_put_u32(skb, RTA_UID,
2946
+ from_kuid_munged(current_user_ns(),
2947
+ fl4->flowi4_uid)))
2948
+ goto nla_put_failure;
2949
+
2950
+ if (rt_is_input_route(rt)) {
2951
+#ifdef CONFIG_IP_MROUTE
2952
+ if (ipv4_is_multicast(dst) &&
2953
+ !ipv4_is_local_multicast(dst) &&
2954
+ IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
2955
+ int err = ipmr_get_route(net, skb,
2956
+ fl4->saddr, fl4->daddr,
2957
+ r, portid);
2958
+
2959
+ if (err <= 0) {
2960
+ if (err == 0)
2961
+ return 0;
2962
+ goto nla_put_failure;
2963
+ }
2964
+ } else
2965
+#endif
2966
+ if (nla_put_u32(skb, RTA_IIF, fl4->flowi4_iif))
2967
+ goto nla_put_failure;
2968
+ }
2969
+ }
27522970
27532971 error = rt->dst.error;
2754
-
2755
- if (rt_is_input_route(rt)) {
2756
-#ifdef CONFIG_IP_MROUTE
2757
- if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
2758
- IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
2759
- int err = ipmr_get_route(net, skb,
2760
- fl4->saddr, fl4->daddr,
2761
- r, portid);
2762
-
2763
- if (err <= 0) {
2764
- if (err == 0)
2765
- return 0;
2766
- goto nla_put_failure;
2767
- }
2768
- } else
2769
-#endif
2770
- if (nla_put_u32(skb, RTA_IIF, fl4->flowi4_iif))
2771
- goto nla_put_failure;
2772
- }
27732972
27742973 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
27752974 goto nla_put_failure;
....@@ -2780,6 +2979,81 @@
27802979 nla_put_failure:
27812980 nlmsg_cancel(skb, nlh);
27822981 return -EMSGSIZE;
2982
+}
2983
+
2984
+static int fnhe_dump_bucket(struct net *net, struct sk_buff *skb,
2985
+ struct netlink_callback *cb, u32 table_id,
2986
+ struct fnhe_hash_bucket *bucket, int genid,
2987
+ int *fa_index, int fa_start, unsigned int flags)
2988
+{
2989
+ int i;
2990
+
2991
+ for (i = 0; i < FNHE_HASH_SIZE; i++) {
2992
+ struct fib_nh_exception *fnhe;
2993
+
2994
+ for (fnhe = rcu_dereference(bucket[i].chain); fnhe;
2995
+ fnhe = rcu_dereference(fnhe->fnhe_next)) {
2996
+ struct rtable *rt;
2997
+ int err;
2998
+
2999
+ if (*fa_index < fa_start)
3000
+ goto next;
3001
+
3002
+ if (fnhe->fnhe_genid != genid)
3003
+ goto next;
3004
+
3005
+ if (fnhe->fnhe_expires &&
3006
+ time_after(jiffies, fnhe->fnhe_expires))
3007
+ goto next;
3008
+
3009
+ rt = rcu_dereference(fnhe->fnhe_rth_input);
3010
+ if (!rt)
3011
+ rt = rcu_dereference(fnhe->fnhe_rth_output);
3012
+ if (!rt)
3013
+ goto next;
3014
+
3015
+ err = rt_fill_info(net, fnhe->fnhe_daddr, 0, rt,
3016
+ table_id, NULL, skb,
3017
+ NETLINK_CB(cb->skb).portid,
3018
+ cb->nlh->nlmsg_seq, flags);
3019
+ if (err)
3020
+ return err;
3021
+next:
3022
+ (*fa_index)++;
3023
+ }
3024
+ }
3025
+
3026
+ return 0;
3027
+}
3028
+
3029
+int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb,
3030
+ u32 table_id, struct fib_info *fi,
3031
+ int *fa_index, int fa_start, unsigned int flags)
3032
+{
3033
+ struct net *net = sock_net(cb->skb->sk);
3034
+ int nhsel, genid = fnhe_genid(net);
3035
+
3036
+ for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) {
3037
+ struct fib_nh_common *nhc = fib_info_nhc(fi, nhsel);
3038
+ struct fnhe_hash_bucket *bucket;
3039
+ int err;
3040
+
3041
+ if (nhc->nhc_flags & RTNH_F_DEAD)
3042
+ continue;
3043
+
3044
+ rcu_read_lock();
3045
+ bucket = rcu_dereference(nhc->nhc_exceptions);
3046
+ err = 0;
3047
+ if (bucket)
3048
+ err = fnhe_dump_bucket(net, skb, cb, table_id, bucket,
3049
+ genid, fa_index, fa_start,
3050
+ flags);
3051
+ rcu_read_unlock();
3052
+ if (err)
3053
+ return err;
3054
+ }
3055
+
3056
+ return 0;
27833057 }
27843058
27853059 static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst,
....@@ -2843,6 +3117,75 @@
28433117 return skb;
28443118 }
28453119
3120
+static int inet_rtm_valid_getroute_req(struct sk_buff *skb,
3121
+ const struct nlmsghdr *nlh,
3122
+ struct nlattr **tb,
3123
+ struct netlink_ext_ack *extack)
3124
+{
3125
+ struct rtmsg *rtm;
3126
+ int i, err;
3127
+
3128
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
3129
+ NL_SET_ERR_MSG(extack,
3130
+ "ipv4: Invalid header for route get request");
3131
+ return -EINVAL;
3132
+ }
3133
+
3134
+ if (!netlink_strict_get_check(skb))
3135
+ return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
3136
+ rtm_ipv4_policy, extack);
3137
+
3138
+ rtm = nlmsg_data(nlh);
3139
+ if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) ||
3140
+ (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) ||
3141
+ rtm->rtm_table || rtm->rtm_protocol ||
3142
+ rtm->rtm_scope || rtm->rtm_type) {
3143
+ NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for route get request");
3144
+ return -EINVAL;
3145
+ }
3146
+
3147
+ if (rtm->rtm_flags & ~(RTM_F_NOTIFY |
3148
+ RTM_F_LOOKUP_TABLE |
3149
+ RTM_F_FIB_MATCH)) {
3150
+ NL_SET_ERR_MSG(extack, "ipv4: Unsupported rtm_flags for route get request");
3151
+ return -EINVAL;
3152
+ }
3153
+
3154
+ err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
3155
+ rtm_ipv4_policy, extack);
3156
+ if (err)
3157
+ return err;
3158
+
3159
+ if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
3160
+ (tb[RTA_DST] && !rtm->rtm_dst_len)) {
3161
+ NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4");
3162
+ return -EINVAL;
3163
+ }
3164
+
3165
+ for (i = 0; i <= RTA_MAX; i++) {
3166
+ if (!tb[i])
3167
+ continue;
3168
+
3169
+ switch (i) {
3170
+ case RTA_IIF:
3171
+ case RTA_OIF:
3172
+ case RTA_SRC:
3173
+ case RTA_DST:
3174
+ case RTA_IP_PROTO:
3175
+ case RTA_SPORT:
3176
+ case RTA_DPORT:
3177
+ case RTA_MARK:
3178
+ case RTA_UID:
3179
+ break;
3180
+ default:
3181
+ NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in route get request");
3182
+ return -EINVAL;
3183
+ }
3184
+ }
3185
+
3186
+ return 0;
3187
+}
3188
+
28463189 static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
28473190 struct netlink_ext_ack *extack)
28483191 {
....@@ -2855,7 +3198,7 @@
28553198 struct rtable *rt = NULL;
28563199 struct sk_buff *skb;
28573200 struct rtmsg *rtm;
2858
- struct flowi4 fl4;
3201
+ struct flowi4 fl4 = {};
28593202 __be32 dst = 0;
28603203 __be32 src = 0;
28613204 kuid_t uid;
....@@ -2863,8 +3206,7 @@
28633206 int err;
28643207 int mark;
28653208
2866
- err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy,
2867
- extack);
3209
+ err = inet_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
28683210 if (err < 0)
28693211 return err;
28703212
....@@ -2895,7 +3237,6 @@
28953237 if (!skb)
28963238 return -ENOBUFS;
28973239
2898
- memset(&fl4, 0, sizeof(fl4));
28993240 fl4.daddr = dst;
29003241 fl4.saddr = src;
29013242 fl4.flowi4_tos = rtm->rtm_tos & IPTOS_RT_MASK;
....@@ -2931,6 +3272,7 @@
29313272 err = -rt->dst.error;
29323273 } else {
29333274 fl4.flowi4_iif = LOOPBACK_IFINDEX;
3275
+ skb->dev = net->loopback_dev;
29343276 rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb);
29353277 err = 0;
29363278 if (IS_ERR(rt))
....@@ -2955,19 +3297,45 @@
29553297 skb_reset_mac_header(skb);
29563298
29573299 if (rtm->rtm_flags & RTM_F_FIB_MATCH) {
3300
+ struct fib_rt_info fri;
3301
+
29583302 if (!res.fi) {
29593303 err = fib_props[res.type].error;
29603304 if (!err)
29613305 err = -EHOSTUNREACH;
29623306 goto errout_rcu;
29633307 }
3308
+ fri.fi = res.fi;
3309
+ fri.tb_id = table_id;
3310
+ fri.dst = res.prefix;
3311
+ fri.dst_len = res.prefixlen;
3312
+ fri.tos = fl4.flowi4_tos;
3313
+ fri.type = rt->rt_type;
3314
+ fri.offload = 0;
3315
+ fri.trap = 0;
3316
+ if (res.fa_head) {
3317
+ struct fib_alias *fa;
3318
+
3319
+ hlist_for_each_entry_rcu(fa, res.fa_head, fa_list) {
3320
+ u8 slen = 32 - fri.dst_len;
3321
+
3322
+ if (fa->fa_slen == slen &&
3323
+ fa->tb_id == fri.tb_id &&
3324
+ fa->fa_tos == fri.tos &&
3325
+ fa->fa_info == res.fi &&
3326
+ fa->fa_type == fri.type) {
3327
+ fri.offload = fa->offload;
3328
+ fri.trap = fa->trap;
3329
+ break;
3330
+ }
3331
+ }
3332
+ }
29643333 err = fib_dump_info(skb, NETLINK_CB(in_skb).portid,
2965
- nlh->nlmsg_seq, RTM_NEWROUTE, table_id,
2966
- rt->rt_type, res.prefix, res.prefixlen,
2967
- fl4.flowi4_tos, res.fi, 0);
3334
+ nlh->nlmsg_seq, RTM_NEWROUTE, &fri, 0);
29683335 } else {
29693336 err = rt_fill_info(net, dst, src, rt, table_id, &fl4, skb,
2970
- NETLINK_CB(in_skb).portid, nlh->nlmsg_seq);
3337
+ NETLINK_CB(in_skb).portid,
3338
+ nlh->nlmsg_seq, 0);
29713339 }
29723340 if (err < 0)
29733341 goto errout_rcu;
....@@ -2996,8 +3364,7 @@
29963364 static int ip_min_valid_pmtu __read_mostly = IPV4_MIN_MTU;
29973365
29983366 static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write,
2999
- void __user *buffer,
3000
- size_t *lenp, loff_t *ppos)
3367
+ void *buffer, size_t *lenp, loff_t *ppos)
30013368 {
30023369 struct net *net = (struct net *)__ctl->extra1;
30033370
....@@ -3122,9 +3489,11 @@
31223489 { }
31233490 };
31243491
3492
+static const char ipv4_route_flush_procname[] = "flush";
3493
+
31253494 static struct ctl_table ipv4_route_flush_table[] = {
31263495 {
3127
- .procname = "flush",
3496
+ .procname = ipv4_route_flush_procname,
31283497 .maxlen = sizeof(int),
31293498 .mode = 0200,
31303499 .proc_handler = ipv4_sysctl_rtcache_flush,
....@@ -3142,9 +3511,11 @@
31423511 if (!tbl)
31433512 goto err_dup;
31443513
3145
- /* Don't export sysctls to unprivileged users */
3146
- if (net->user_ns != &init_user_ns)
3147
- tbl[0].procname = NULL;
3514
+ /* Don't export non-whitelisted sysctls to unprivileged users */
3515
+ if (net->user_ns != &init_user_ns) {
3516
+ if (tbl[0].procname != ipv4_route_flush_procname)
3517
+ tbl[0].procname = NULL;
3518
+ }
31483519 }
31493520 tbl[0].extra1 = net;
31503521