.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
---|
1 | 2 | /* |
---|
2 | 3 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
---|
3 | 4 | * operating system. INET is implemented using the BSD Socket |
---|
.. | .. |
---|
55 | 56 | * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes. |
---|
56 | 57 | * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect |
---|
57 | 58 | * Ilia Sotnikov : Removed TOS from hash calculations |
---|
58 | | - * |
---|
59 | | - * This program is free software; you can redistribute it and/or |
---|
60 | | - * modify it under the terms of the GNU General Public License |
---|
61 | | - * as published by the Free Software Foundation; either version |
---|
62 | | - * 2 of the License, or (at your option) any later version. |
---|
63 | 59 | */ |
---|
64 | 60 | |
---|
65 | 61 | #define pr_fmt(fmt) "IPv4: " fmt |
---|
.. | .. |
---|
70 | 66 | #include <linux/types.h> |
---|
71 | 67 | #include <linux/kernel.h> |
---|
72 | 68 | #include <linux/mm.h> |
---|
73 | | -#include <linux/bootmem.h> |
---|
| 69 | +#include <linux/memblock.h> |
---|
74 | 70 | #include <linux/string.h> |
---|
75 | 71 | #include <linux/socket.h> |
---|
76 | 72 | #include <linux/sockios.h> |
---|
.. | .. |
---|
100 | 96 | #include <net/inetpeer.h> |
---|
101 | 97 | #include <net/sock.h> |
---|
102 | 98 | #include <net/ip_fib.h> |
---|
| 99 | +#include <net/nexthop.h> |
---|
103 | 100 | #include <net/arp.h> |
---|
104 | 101 | #include <net/tcp.h> |
---|
105 | 102 | #include <net/icmp.h> |
---|
.. | .. |
---|
241 | 238 | return seq_open(file, &rt_cache_seq_ops); |
---|
242 | 239 | } |
---|
243 | 240 | |
---|
244 | | -static const struct file_operations rt_cache_seq_fops = { |
---|
245 | | - .open = rt_cache_seq_open, |
---|
246 | | - .read = seq_read, |
---|
247 | | - .llseek = seq_lseek, |
---|
248 | | - .release = seq_release, |
---|
| 241 | +static const struct proc_ops rt_cache_proc_ops = { |
---|
| 242 | + .proc_open = rt_cache_seq_open, |
---|
| 243 | + .proc_read = seq_read, |
---|
| 244 | + .proc_lseek = seq_lseek, |
---|
| 245 | + .proc_release = seq_release, |
---|
249 | 246 | }; |
---|
250 | 247 | |
---|
251 | 248 | |
---|
.. | .. |
---|
332 | 329 | return seq_open(file, &rt_cpu_seq_ops); |
---|
333 | 330 | } |
---|
334 | 331 | |
---|
335 | | -static const struct file_operations rt_cpu_seq_fops = { |
---|
336 | | - .open = rt_cpu_seq_open, |
---|
337 | | - .read = seq_read, |
---|
338 | | - .llseek = seq_lseek, |
---|
339 | | - .release = seq_release, |
---|
| 332 | +static const struct proc_ops rt_cpu_proc_ops = { |
---|
| 333 | + .proc_open = rt_cpu_seq_open, |
---|
| 334 | + .proc_read = seq_read, |
---|
| 335 | + .proc_lseek = seq_lseek, |
---|
| 336 | + .proc_release = seq_release, |
---|
340 | 337 | }; |
---|
341 | 338 | |
---|
342 | 339 | #ifdef CONFIG_IP_ROUTE_CLASSID |
---|
.. | .. |
---|
370 | 367 | struct proc_dir_entry *pde; |
---|
371 | 368 | |
---|
372 | 369 | pde = proc_create("rt_cache", 0444, net->proc_net, |
---|
373 | | - &rt_cache_seq_fops); |
---|
| 370 | + &rt_cache_proc_ops); |
---|
374 | 371 | if (!pde) |
---|
375 | 372 | goto err1; |
---|
376 | 373 | |
---|
377 | 374 | pde = proc_create("rt_cache", 0444, |
---|
378 | | - net->proc_net_stat, &rt_cpu_seq_fops); |
---|
| 375 | + net->proc_net_stat, &rt_cpu_proc_ops); |
---|
379 | 376 | if (!pde) |
---|
380 | 377 | goto err2; |
---|
381 | 378 | |
---|
.. | .. |
---|
437 | 434 | struct sk_buff *skb, |
---|
438 | 435 | const void *daddr) |
---|
439 | 436 | { |
---|
| 437 | + const struct rtable *rt = container_of(dst, struct rtable, dst); |
---|
440 | 438 | struct net_device *dev = dst->dev; |
---|
441 | | - const __be32 *pkey = daddr; |
---|
442 | | - const struct rtable *rt; |
---|
443 | 439 | struct neighbour *n; |
---|
444 | 440 | |
---|
445 | | - rt = (const struct rtable *) dst; |
---|
446 | | - if (rt->rt_gateway) |
---|
447 | | - pkey = (const __be32 *) &rt->rt_gateway; |
---|
448 | | - else if (skb) |
---|
449 | | - pkey = &ip_hdr(skb)->daddr; |
---|
| 441 | + rcu_read_lock_bh(); |
---|
450 | 442 | |
---|
451 | | - n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey); |
---|
452 | | - if (n) |
---|
453 | | - return n; |
---|
454 | | - return neigh_create(&arp_tbl, pkey, dev); |
---|
| 443 | + if (likely(rt->rt_gw_family == AF_INET)) { |
---|
| 444 | + n = ip_neigh_gw4(dev, rt->rt_gw4); |
---|
| 445 | + } else if (rt->rt_gw_family == AF_INET6) { |
---|
| 446 | + n = ip_neigh_gw6(dev, &rt->rt_gw6); |
---|
| 447 | + } else { |
---|
| 448 | + __be32 pkey; |
---|
| 449 | + |
---|
| 450 | + pkey = skb ? ip_hdr(skb)->daddr : *((__be32 *) daddr); |
---|
| 451 | + n = ip_neigh_gw4(dev, pkey); |
---|
| 452 | + } |
---|
| 453 | + |
---|
| 454 | + if (!IS_ERR(n) && !refcount_inc_not_zero(&n->refcnt)) |
---|
| 455 | + n = NULL; |
---|
| 456 | + |
---|
| 457 | + rcu_read_unlock_bh(); |
---|
| 458 | + |
---|
| 459 | + return n; |
---|
455 | 460 | } |
---|
456 | 461 | |
---|
457 | 462 | static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr) |
---|
458 | 463 | { |
---|
| 464 | + const struct rtable *rt = container_of(dst, struct rtable, dst); |
---|
459 | 465 | struct net_device *dev = dst->dev; |
---|
460 | 466 | const __be32 *pkey = daddr; |
---|
461 | | - const struct rtable *rt; |
---|
462 | 467 | |
---|
463 | | - rt = (const struct rtable *)dst; |
---|
464 | | - if (rt->rt_gateway) |
---|
465 | | - pkey = (const __be32 *)&rt->rt_gateway; |
---|
466 | | - else if (!daddr || |
---|
| 468 | + if (rt->rt_gw_family == AF_INET) { |
---|
| 469 | + pkey = (const __be32 *)&rt->rt_gw4; |
---|
| 470 | + } else if (rt->rt_gw_family == AF_INET6) { |
---|
| 471 | + return __ipv6_confirm_neigh_stub(dev, &rt->rt_gw6); |
---|
| 472 | + } else if (!daddr || |
---|
467 | 473 | (rt->rt_flags & |
---|
468 | | - (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL))) |
---|
| 474 | + (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL))) { |
---|
469 | 475 | return; |
---|
470 | | - |
---|
| 476 | + } |
---|
471 | 477 | __ipv4_confirm_neigh(dev, *(__force u32 *)pkey); |
---|
472 | 478 | } |
---|
473 | 479 | |
---|
.. | .. |
---|
522 | 528 | iph->id = htons(id); |
---|
523 | 529 | } |
---|
524 | 530 | EXPORT_SYMBOL(__ip_select_ident); |
---|
| 531 | + |
---|
| 532 | +static void ip_rt_fix_tos(struct flowi4 *fl4) |
---|
| 533 | +{ |
---|
| 534 | + __u8 tos = RT_FL_TOS(fl4); |
---|
| 535 | + |
---|
| 536 | + fl4->flowi4_tos = tos & IPTOS_RT_MASK; |
---|
| 537 | + fl4->flowi4_scope = tos & RTO_ONLINK ? |
---|
| 538 | + RT_SCOPE_LINK : RT_SCOPE_UNIVERSE; |
---|
| 539 | +} |
---|
525 | 540 | |
---|
526 | 541 | static void __build_flow_key(const struct net *net, struct flowi4 *fl4, |
---|
527 | 542 | const struct sock *sk, |
---|
.. | .. |
---|
643 | 658 | |
---|
644 | 659 | if (fnhe->fnhe_gw) { |
---|
645 | 660 | rt->rt_flags |= RTCF_REDIRECTED; |
---|
646 | | - rt->rt_gateway = fnhe->fnhe_gw; |
---|
647 | 661 | rt->rt_uses_gateway = 1; |
---|
| 662 | + rt->rt_gw_family = AF_INET; |
---|
| 663 | + rt->rt_gw4 = fnhe->fnhe_gw; |
---|
648 | 664 | } |
---|
649 | 665 | } |
---|
650 | 666 | |
---|
651 | | -static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, |
---|
652 | | - u32 pmtu, bool lock, unsigned long expires) |
---|
| 667 | +static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr, |
---|
| 668 | + __be32 gw, u32 pmtu, bool lock, |
---|
| 669 | + unsigned long expires) |
---|
653 | 670 | { |
---|
654 | 671 | struct fnhe_hash_bucket *hash; |
---|
655 | 672 | struct fib_nh_exception *fnhe; |
---|
.. | .. |
---|
658 | 675 | unsigned int i; |
---|
659 | 676 | int depth; |
---|
660 | 677 | |
---|
661 | | - genid = fnhe_genid(dev_net(nh->nh_dev)); |
---|
| 678 | + genid = fnhe_genid(dev_net(nhc->nhc_dev)); |
---|
662 | 679 | hval = fnhe_hashfun(daddr); |
---|
663 | 680 | |
---|
664 | 681 | spin_lock_bh(&fnhe_lock); |
---|
665 | 682 | |
---|
666 | | - hash = rcu_dereference(nh->nh_exceptions); |
---|
| 683 | + hash = rcu_dereference(nhc->nhc_exceptions); |
---|
667 | 684 | if (!hash) { |
---|
668 | 685 | hash = kcalloc(FNHE_HASH_SIZE, sizeof(*hash), GFP_ATOMIC); |
---|
669 | 686 | if (!hash) |
---|
670 | 687 | goto out_unlock; |
---|
671 | | - rcu_assign_pointer(nh->nh_exceptions, hash); |
---|
| 688 | + rcu_assign_pointer(nhc->nhc_exceptions, hash); |
---|
672 | 689 | } |
---|
673 | 690 | |
---|
674 | 691 | hash += hval; |
---|
.. | .. |
---|
727 | 744 | * stale, so anyone caching it rechecks if this exception |
---|
728 | 745 | * applies to them. |
---|
729 | 746 | */ |
---|
730 | | - rt = rcu_dereference(nh->nh_rth_input); |
---|
| 747 | + rt = rcu_dereference(nhc->nhc_rth_input); |
---|
731 | 748 | if (rt) |
---|
732 | 749 | rt->dst.obsolete = DST_OBSOLETE_KILL; |
---|
733 | 750 | |
---|
734 | 751 | for_each_possible_cpu(i) { |
---|
735 | 752 | struct rtable __rcu **prt; |
---|
736 | | - prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i); |
---|
| 753 | + prt = per_cpu_ptr(nhc->nhc_pcpu_rth_output, i); |
---|
737 | 754 | rt = rcu_dereference(*prt); |
---|
738 | 755 | if (rt) |
---|
739 | 756 | rt->dst.obsolete = DST_OBSOLETE_KILL; |
---|
.. | .. |
---|
768 | 785 | return; |
---|
769 | 786 | } |
---|
770 | 787 | |
---|
771 | | - if (rt->rt_gateway != old_gw) |
---|
| 788 | + if (rt->rt_gw_family != AF_INET || rt->rt_gw4 != old_gw) |
---|
772 | 789 | return; |
---|
773 | 790 | |
---|
774 | 791 | in_dev = __in_dev_get_rcu(dev); |
---|
.. | .. |
---|
799 | 816 | neigh_event_send(n, NULL); |
---|
800 | 817 | } else { |
---|
801 | 818 | if (fib_lookup(net, fl4, &res, 0) == 0) { |
---|
802 | | - struct fib_nh *nh; |
---|
| 819 | + struct fib_nh_common *nhc; |
---|
803 | 820 | |
---|
804 | 821 | fib_select_path(net, &res, fl4, skb); |
---|
805 | | - nh = &FIB_RES_NH(res); |
---|
806 | | - update_or_create_fnhe(nh, fl4->daddr, new_gw, |
---|
| 822 | + nhc = FIB_RES_NHC(res); |
---|
| 823 | + update_or_create_fnhe(nhc, fl4->daddr, new_gw, |
---|
807 | 824 | 0, false, |
---|
808 | 825 | jiffies + ip_rt_gc_timeout); |
---|
809 | 826 | } |
---|
.. | .. |
---|
845 | 862 | rt = (struct rtable *) dst; |
---|
846 | 863 | |
---|
847 | 864 | __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0); |
---|
| 865 | + ip_rt_fix_tos(&fl4); |
---|
848 | 866 | __ip_do_redirect(rt, skb, &fl4, true); |
---|
849 | 867 | } |
---|
850 | 868 | |
---|
.. | .. |
---|
1029 | 1047 | { |
---|
1030 | 1048 | struct dst_entry *dst = &rt->dst; |
---|
1031 | 1049 | struct net *net = dev_net(dst->dev); |
---|
1032 | | - u32 old_mtu = ipv4_mtu(dst); |
---|
1033 | 1050 | struct fib_result res; |
---|
1034 | 1051 | bool lock = false; |
---|
| 1052 | + u32 old_mtu; |
---|
1035 | 1053 | |
---|
1036 | 1054 | if (ip_mtu_locked(dst)) |
---|
1037 | 1055 | return; |
---|
1038 | 1056 | |
---|
| 1057 | + old_mtu = ipv4_mtu(dst); |
---|
1039 | 1058 | if (old_mtu < mtu) |
---|
1040 | 1059 | return; |
---|
1041 | 1060 | |
---|
.. | .. |
---|
1050 | 1069 | |
---|
1051 | 1070 | rcu_read_lock(); |
---|
1052 | 1071 | if (fib_lookup(net, fl4, &res, 0) == 0) { |
---|
1053 | | - struct fib_nh *nh; |
---|
| 1072 | + struct fib_nh_common *nhc; |
---|
1054 | 1073 | |
---|
1055 | 1074 | fib_select_path(net, &res, fl4, NULL); |
---|
1056 | | - nh = &FIB_RES_NH(res); |
---|
1057 | | - update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock, |
---|
| 1075 | + nhc = FIB_RES_NHC(res); |
---|
| 1076 | + update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock, |
---|
1058 | 1077 | jiffies + ip_rt_mtu_expires); |
---|
1059 | 1078 | } |
---|
1060 | 1079 | rcu_read_unlock(); |
---|
.. | .. |
---|
1068 | 1087 | struct flowi4 fl4; |
---|
1069 | 1088 | |
---|
1070 | 1089 | ip_rt_build_flow_key(&fl4, sk, skb); |
---|
| 1090 | + ip_rt_fix_tos(&fl4); |
---|
| 1091 | + |
---|
| 1092 | + /* Don't make lookup fail for bridged encapsulations */ |
---|
| 1093 | + if (skb && netif_is_any_bridge_port(skb->dev)) |
---|
| 1094 | + fl4.flowi4_oif = 0; |
---|
| 1095 | + |
---|
1071 | 1096 | __ip_rt_update_pmtu(rt, &fl4, mtu); |
---|
1072 | 1097 | } |
---|
1073 | 1098 | |
---|
1074 | 1099 | void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, |
---|
1075 | | - int oif, u32 mark, u8 protocol, int flow_flags) |
---|
| 1100 | + int oif, u8 protocol) |
---|
1076 | 1101 | { |
---|
1077 | | - const struct iphdr *iph = (const struct iphdr *) skb->data; |
---|
| 1102 | + const struct iphdr *iph = (const struct iphdr *)skb->data; |
---|
1078 | 1103 | struct flowi4 fl4; |
---|
1079 | 1104 | struct rtable *rt; |
---|
1080 | | - |
---|
1081 | | - if (!mark) |
---|
1082 | | - mark = IP4_REPLY_MARK(net, skb->mark); |
---|
| 1105 | + u32 mark = IP4_REPLY_MARK(net, skb->mark); |
---|
1083 | 1106 | |
---|
1084 | 1107 | __build_flow_key(net, &fl4, NULL, iph, oif, |
---|
1085 | | - RT_TOS(iph->tos), protocol, mark, flow_flags); |
---|
| 1108 | + RT_TOS(iph->tos), protocol, mark, 0); |
---|
1086 | 1109 | rt = __ip_route_output_key(net, &fl4); |
---|
1087 | 1110 | if (!IS_ERR(rt)) { |
---|
1088 | 1111 | __ip_rt_update_pmtu(rt, &fl4, mtu); |
---|
.. | .. |
---|
1093 | 1116 | |
---|
1094 | 1117 | static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) |
---|
1095 | 1118 | { |
---|
1096 | | - const struct iphdr *iph = (const struct iphdr *) skb->data; |
---|
| 1119 | + const struct iphdr *iph = (const struct iphdr *)skb->data; |
---|
1097 | 1120 | struct flowi4 fl4; |
---|
1098 | 1121 | struct rtable *rt; |
---|
1099 | 1122 | |
---|
.. | .. |
---|
1111 | 1134 | |
---|
1112 | 1135 | void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) |
---|
1113 | 1136 | { |
---|
1114 | | - const struct iphdr *iph = (const struct iphdr *) skb->data; |
---|
| 1137 | + const struct iphdr *iph = (const struct iphdr *)skb->data; |
---|
1115 | 1138 | struct flowi4 fl4; |
---|
1116 | 1139 | struct rtable *rt; |
---|
1117 | 1140 | struct dst_entry *odst = NULL; |
---|
.. | .. |
---|
1139 | 1162 | goto out; |
---|
1140 | 1163 | |
---|
1141 | 1164 | new = true; |
---|
| 1165 | + } else { |
---|
| 1166 | + ip_rt_fix_tos(&fl4); |
---|
1142 | 1167 | } |
---|
1143 | 1168 | |
---|
1144 | | - __ip_rt_update_pmtu((struct rtable *) xfrm_dst_path(&rt->dst), &fl4, mtu); |
---|
| 1169 | + __ip_rt_update_pmtu((struct rtable *)xfrm_dst_path(&rt->dst), &fl4, mtu); |
---|
1145 | 1170 | |
---|
1146 | 1171 | if (!dst_check(&rt->dst, 0)) { |
---|
1147 | 1172 | if (new) |
---|
.. | .. |
---|
1164 | 1189 | EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); |
---|
1165 | 1190 | |
---|
1166 | 1191 | void ipv4_redirect(struct sk_buff *skb, struct net *net, |
---|
1167 | | - int oif, u32 mark, u8 protocol, int flow_flags) |
---|
| 1192 | + int oif, u8 protocol) |
---|
1168 | 1193 | { |
---|
1169 | | - const struct iphdr *iph = (const struct iphdr *) skb->data; |
---|
| 1194 | + const struct iphdr *iph = (const struct iphdr *)skb->data; |
---|
1170 | 1195 | struct flowi4 fl4; |
---|
1171 | 1196 | struct rtable *rt; |
---|
1172 | 1197 | |
---|
1173 | 1198 | __build_flow_key(net, &fl4, NULL, iph, oif, |
---|
1174 | | - RT_TOS(iph->tos), protocol, mark, flow_flags); |
---|
| 1199 | + RT_TOS(iph->tos), protocol, 0, 0); |
---|
1175 | 1200 | rt = __ip_route_output_key(net, &fl4); |
---|
1176 | 1201 | if (!IS_ERR(rt)) { |
---|
1177 | 1202 | __ip_do_redirect(rt, skb, &fl4, false); |
---|
.. | .. |
---|
1182 | 1207 | |
---|
1183 | 1208 | void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk) |
---|
1184 | 1209 | { |
---|
1185 | | - const struct iphdr *iph = (const struct iphdr *) skb->data; |
---|
| 1210 | + const struct iphdr *iph = (const struct iphdr *)skb->data; |
---|
1186 | 1211 | struct flowi4 fl4; |
---|
1187 | 1212 | struct rtable *rt; |
---|
1188 | 1213 | struct net *net = sock_net(sk); |
---|
.. | .. |
---|
1206 | 1231 | * |
---|
1207 | 1232 | * When a PMTU/redirect information update invalidates a route, |
---|
1208 | 1233 | * this is indicated by setting obsolete to DST_OBSOLETE_KILL or |
---|
1209 | | - * DST_OBSOLETE_DEAD by dst_free(). |
---|
| 1234 | + * DST_OBSOLETE_DEAD. |
---|
1210 | 1235 | */ |
---|
1211 | 1236 | if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt)) |
---|
1212 | 1237 | return NULL; |
---|
.. | .. |
---|
1215 | 1240 | |
---|
1216 | 1241 | static void ipv4_send_dest_unreach(struct sk_buff *skb) |
---|
1217 | 1242 | { |
---|
| 1243 | + struct net_device *dev; |
---|
1218 | 1244 | struct ip_options opt; |
---|
1219 | 1245 | int res; |
---|
1220 | 1246 | |
---|
.. | .. |
---|
1232 | 1258 | opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr); |
---|
1233 | 1259 | |
---|
1234 | 1260 | rcu_read_lock(); |
---|
1235 | | - res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL); |
---|
| 1261 | + dev = skb->dev ? skb->dev : skb_rtable(skb)->dst.dev; |
---|
| 1262 | + res = __ip_options_compile(dev_net(dev), &opt, skb, NULL); |
---|
1236 | 1263 | rcu_read_unlock(); |
---|
1237 | 1264 | |
---|
1238 | 1265 | if (res) |
---|
.. | .. |
---|
1279 | 1306 | src = ip_hdr(skb)->saddr; |
---|
1280 | 1307 | else { |
---|
1281 | 1308 | struct fib_result res; |
---|
1282 | | - struct flowi4 fl4; |
---|
1283 | | - struct iphdr *iph; |
---|
1284 | | - |
---|
1285 | | - iph = ip_hdr(skb); |
---|
1286 | | - |
---|
1287 | | - memset(&fl4, 0, sizeof(fl4)); |
---|
1288 | | - fl4.daddr = iph->daddr; |
---|
1289 | | - fl4.saddr = iph->saddr; |
---|
1290 | | - fl4.flowi4_tos = RT_TOS(iph->tos); |
---|
1291 | | - fl4.flowi4_oif = rt->dst.dev->ifindex; |
---|
1292 | | - fl4.flowi4_iif = skb->dev->ifindex; |
---|
1293 | | - fl4.flowi4_mark = skb->mark; |
---|
| 1309 | + struct iphdr *iph = ip_hdr(skb); |
---|
| 1310 | + struct flowi4 fl4 = { |
---|
| 1311 | + .daddr = iph->daddr, |
---|
| 1312 | + .saddr = iph->saddr, |
---|
| 1313 | + .flowi4_tos = RT_TOS(iph->tos), |
---|
| 1314 | + .flowi4_oif = rt->dst.dev->ifindex, |
---|
| 1315 | + .flowi4_iif = skb->dev->ifindex, |
---|
| 1316 | + .flowi4_mark = skb->mark, |
---|
| 1317 | + }; |
---|
1294 | 1318 | |
---|
1295 | 1319 | rcu_read_lock(); |
---|
1296 | 1320 | if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0) |
---|
1297 | | - src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res); |
---|
| 1321 | + src = fib_result_prefsrc(dev_net(rt->dst.dev), &res); |
---|
1298 | 1322 | else |
---|
1299 | 1323 | src = inet_select_addr(rt->dst.dev, |
---|
1300 | 1324 | rt_nexthop(rt, iph->daddr), |
---|
.. | .. |
---|
1325 | 1349 | |
---|
1326 | 1350 | static unsigned int ipv4_mtu(const struct dst_entry *dst) |
---|
1327 | 1351 | { |
---|
1328 | | - const struct rtable *rt = (const struct rtable *) dst; |
---|
| 1352 | + const struct rtable *rt = (const struct rtable *)dst; |
---|
1329 | 1353 | unsigned int mtu = rt->rt_pmtu; |
---|
1330 | 1354 | |
---|
1331 | 1355 | if (!mtu || time_after_eq(jiffies, rt->dst.expires)) |
---|
.. | .. |
---|
1347 | 1371 | return mtu - lwtunnel_headroom(dst->lwtstate, mtu); |
---|
1348 | 1372 | } |
---|
1349 | 1373 | |
---|
1350 | | -static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) |
---|
| 1374 | +static void ip_del_fnhe(struct fib_nh_common *nhc, __be32 daddr) |
---|
1351 | 1375 | { |
---|
1352 | 1376 | struct fnhe_hash_bucket *hash; |
---|
1353 | 1377 | struct fib_nh_exception *fnhe, __rcu **fnhe_p; |
---|
.. | .. |
---|
1355 | 1379 | |
---|
1356 | 1380 | spin_lock_bh(&fnhe_lock); |
---|
1357 | 1381 | |
---|
1358 | | - hash = rcu_dereference_protected(nh->nh_exceptions, |
---|
| 1382 | + hash = rcu_dereference_protected(nhc->nhc_exceptions, |
---|
1359 | 1383 | lockdep_is_held(&fnhe_lock)); |
---|
1360 | 1384 | hash += hval; |
---|
1361 | 1385 | |
---|
.. | .. |
---|
1381 | 1405 | spin_unlock_bh(&fnhe_lock); |
---|
1382 | 1406 | } |
---|
1383 | 1407 | |
---|
1384 | | -static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) |
---|
| 1408 | +static struct fib_nh_exception *find_exception(struct fib_nh_common *nhc, |
---|
| 1409 | + __be32 daddr) |
---|
1385 | 1410 | { |
---|
1386 | | - struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions); |
---|
| 1411 | + struct fnhe_hash_bucket *hash = rcu_dereference(nhc->nhc_exceptions); |
---|
1387 | 1412 | struct fib_nh_exception *fnhe; |
---|
1388 | 1413 | u32 hval; |
---|
1389 | 1414 | |
---|
.. | .. |
---|
1397 | 1422 | if (fnhe->fnhe_daddr == daddr) { |
---|
1398 | 1423 | if (fnhe->fnhe_expires && |
---|
1399 | 1424 | time_after(jiffies, fnhe->fnhe_expires)) { |
---|
1400 | | - ip_del_fnhe(nh, daddr); |
---|
| 1425 | + ip_del_fnhe(nhc, daddr); |
---|
1401 | 1426 | break; |
---|
1402 | 1427 | } |
---|
1403 | 1428 | return fnhe; |
---|
.. | .. |
---|
1414 | 1439 | |
---|
1415 | 1440 | u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr) |
---|
1416 | 1441 | { |
---|
| 1442 | + struct fib_nh_common *nhc = res->nhc; |
---|
| 1443 | + struct net_device *dev = nhc->nhc_dev; |
---|
1417 | 1444 | struct fib_info *fi = res->fi; |
---|
1418 | | - struct fib_nh *nh = &fi->fib_nh[res->nh_sel]; |
---|
1419 | | - struct net_device *dev = nh->nh_dev; |
---|
1420 | 1445 | u32 mtu = 0; |
---|
1421 | 1446 | |
---|
1422 | | - if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu || |
---|
| 1447 | + if (READ_ONCE(dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu) || |
---|
1423 | 1448 | fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU)) |
---|
1424 | 1449 | mtu = fi->fib_mtu; |
---|
1425 | 1450 | |
---|
1426 | 1451 | if (likely(!mtu)) { |
---|
1427 | 1452 | struct fib_nh_exception *fnhe; |
---|
1428 | 1453 | |
---|
1429 | | - fnhe = find_exception(nh, daddr); |
---|
| 1454 | + fnhe = find_exception(nhc, daddr); |
---|
1430 | 1455 | if (fnhe && !time_after_eq(jiffies, fnhe->fnhe_expires)) |
---|
1431 | 1456 | mtu = fnhe->fnhe_pmtu; |
---|
1432 | 1457 | } |
---|
.. | .. |
---|
1434 | 1459 | if (likely(!mtu)) |
---|
1435 | 1460 | mtu = min(READ_ONCE(dev->mtu), IP_MAX_MTU); |
---|
1436 | 1461 | |
---|
1437 | | - return mtu - lwtunnel_headroom(nh->nh_lwtstate, mtu); |
---|
| 1462 | + return mtu - lwtunnel_headroom(nhc->nhc_lwtstate, mtu); |
---|
1438 | 1463 | } |
---|
1439 | 1464 | |
---|
1440 | 1465 | static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, |
---|
.. | .. |
---|
1465 | 1490 | orig = NULL; |
---|
1466 | 1491 | } |
---|
1467 | 1492 | fill_route_from_fnhe(rt, fnhe); |
---|
1468 | | - if (!rt->rt_gateway) |
---|
1469 | | - rt->rt_gateway = daddr; |
---|
| 1493 | + if (!rt->rt_gw4) { |
---|
| 1494 | + rt->rt_gw4 = daddr; |
---|
| 1495 | + rt->rt_gw_family = AF_INET; |
---|
| 1496 | + } |
---|
1470 | 1497 | |
---|
1471 | 1498 | if (do_cache) { |
---|
1472 | 1499 | dst_hold(&rt->dst); |
---|
.. | .. |
---|
1485 | 1512 | return ret; |
---|
1486 | 1513 | } |
---|
1487 | 1514 | |
---|
1488 | | -static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt) |
---|
| 1515 | +static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt) |
---|
1489 | 1516 | { |
---|
1490 | 1517 | struct rtable *orig, *prev, **p; |
---|
1491 | 1518 | bool ret = true; |
---|
1492 | 1519 | |
---|
1493 | 1520 | if (rt_is_input_route(rt)) { |
---|
1494 | | - p = (struct rtable **)&nh->nh_rth_input; |
---|
| 1521 | + p = (struct rtable **)&nhc->nhc_rth_input; |
---|
1495 | 1522 | } else { |
---|
1496 | | - p = (struct rtable **)raw_cpu_ptr(nh->nh_pcpu_rth_output); |
---|
| 1523 | + p = (struct rtable **)raw_cpu_ptr(nhc->nhc_pcpu_rth_output); |
---|
1497 | 1524 | } |
---|
1498 | 1525 | orig = *p; |
---|
1499 | 1526 | |
---|
.. | .. |
---|
1546 | 1573 | |
---|
1547 | 1574 | static void ipv4_dst_destroy(struct dst_entry *dst) |
---|
1548 | 1575 | { |
---|
1549 | | - struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst); |
---|
1550 | 1576 | struct rtable *rt = (struct rtable *)dst; |
---|
1551 | 1577 | |
---|
1552 | | - if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt)) |
---|
1553 | | - kfree(p); |
---|
1554 | | - |
---|
| 1578 | + ip_dst_metrics_put(dst); |
---|
1555 | 1579 | rt_del_uncached_list(rt); |
---|
1556 | 1580 | } |
---|
1557 | 1581 | |
---|
1558 | 1582 | void rt_flush_dev(struct net_device *dev) |
---|
1559 | 1583 | { |
---|
1560 | | - struct net *net = dev_net(dev); |
---|
1561 | 1584 | struct rtable *rt; |
---|
1562 | 1585 | int cpu; |
---|
1563 | 1586 | |
---|
.. | .. |
---|
1568 | 1591 | list_for_each_entry(rt, &ul->head, rt_uncached) { |
---|
1569 | 1592 | if (rt->dst.dev != dev) |
---|
1570 | 1593 | continue; |
---|
1571 | | - rt->dst.dev = net->loopback_dev; |
---|
| 1594 | + rt->dst.dev = blackhole_netdev; |
---|
1572 | 1595 | dev_hold(rt->dst.dev); |
---|
1573 | 1596 | dev_put(dev); |
---|
1574 | 1597 | } |
---|
.. | .. |
---|
1592 | 1615 | bool cached = false; |
---|
1593 | 1616 | |
---|
1594 | 1617 | if (fi) { |
---|
1595 | | - struct fib_nh *nh = &FIB_RES_NH(*res); |
---|
| 1618 | + struct fib_nh_common *nhc = FIB_RES_NHC(*res); |
---|
1596 | 1619 | |
---|
1597 | | - if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) { |
---|
1598 | | - rt->rt_gateway = nh->nh_gw; |
---|
| 1620 | + if (nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK) { |
---|
1599 | 1621 | rt->rt_uses_gateway = 1; |
---|
| 1622 | + rt->rt_gw_family = nhc->nhc_gw_family; |
---|
| 1623 | + /* only INET and INET6 are supported */ |
---|
| 1624 | + if (likely(nhc->nhc_gw_family == AF_INET)) |
---|
| 1625 | + rt->rt_gw4 = nhc->nhc_gw.ipv4; |
---|
| 1626 | + else |
---|
| 1627 | + rt->rt_gw6 = nhc->nhc_gw.ipv6; |
---|
1600 | 1628 | } |
---|
1601 | | - dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true); |
---|
1602 | | - if (fi->fib_metrics != &dst_default_metrics) { |
---|
1603 | | - rt->dst._metrics |= DST_METRICS_REFCOUNTED; |
---|
1604 | | - refcount_inc(&fi->fib_metrics->refcnt); |
---|
1605 | | - } |
---|
| 1629 | + |
---|
| 1630 | + ip_dst_init_metrics(&rt->dst, fi->fib_metrics); |
---|
| 1631 | + |
---|
1606 | 1632 | #ifdef CONFIG_IP_ROUTE_CLASSID |
---|
1607 | | - rt->dst.tclassid = nh->nh_tclassid; |
---|
| 1633 | + if (nhc->nhc_family == AF_INET) { |
---|
| 1634 | + struct fib_nh *nh; |
---|
| 1635 | + |
---|
| 1636 | + nh = container_of(nhc, struct fib_nh, nh_common); |
---|
| 1637 | + rt->dst.tclassid = nh->nh_tclassid; |
---|
| 1638 | + } |
---|
1608 | 1639 | #endif |
---|
1609 | | - rt->dst.lwtstate = lwtstate_get(nh->nh_lwtstate); |
---|
| 1640 | + rt->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate); |
---|
1610 | 1641 | if (unlikely(fnhe)) |
---|
1611 | 1642 | cached = rt_bind_exception(rt, fnhe, daddr, do_cache); |
---|
1612 | 1643 | else if (do_cache) |
---|
1613 | | - cached = rt_cache_route(nh, rt); |
---|
| 1644 | + cached = rt_cache_route(nhc, rt); |
---|
1614 | 1645 | if (unlikely(!cached)) { |
---|
1615 | 1646 | /* Routes we intend to cache in nexthop exception or |
---|
1616 | 1647 | * FIB nexthop have the DST_NOCACHE bit clear. |
---|
1617 | 1648 | * However, if we are unsuccessful at storing this |
---|
1618 | 1649 | * route into the cache we really need to set it. |
---|
1619 | 1650 | */ |
---|
1620 | | - if (!rt->rt_gateway) |
---|
1621 | | - rt->rt_gateway = daddr; |
---|
| 1651 | + if (!rt->rt_gw4) { |
---|
| 1652 | + rt->rt_gw_family = AF_INET; |
---|
| 1653 | + rt->rt_gw4 = daddr; |
---|
| 1654 | + } |
---|
1622 | 1655 | rt_add_uncached_list(rt); |
---|
1623 | 1656 | } |
---|
1624 | 1657 | } else |
---|
.. | .. |
---|
1634 | 1667 | |
---|
1635 | 1668 | struct rtable *rt_dst_alloc(struct net_device *dev, |
---|
1636 | 1669 | unsigned int flags, u16 type, |
---|
1637 | | - bool nopolicy, bool noxfrm, bool will_cache) |
---|
| 1670 | + bool nopolicy, bool noxfrm) |
---|
1638 | 1671 | { |
---|
1639 | 1672 | struct rtable *rt; |
---|
1640 | 1673 | |
---|
1641 | 1674 | rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, |
---|
1642 | | - (will_cache ? 0 : DST_HOST) | |
---|
1643 | 1675 | (nopolicy ? DST_NOPOLICY : 0) | |
---|
1644 | 1676 | (noxfrm ? DST_NOXFRM : 0)); |
---|
1645 | 1677 | |
---|
.. | .. |
---|
1651 | 1683 | rt->rt_iif = 0; |
---|
1652 | 1684 | rt->rt_pmtu = 0; |
---|
1653 | 1685 | rt->rt_mtu_locked = 0; |
---|
1654 | | - rt->rt_gateway = 0; |
---|
1655 | 1686 | rt->rt_uses_gateway = 0; |
---|
| 1687 | + rt->rt_gw_family = 0; |
---|
| 1688 | + rt->rt_gw4 = 0; |
---|
1656 | 1689 | INIT_LIST_HEAD(&rt->rt_uncached); |
---|
1657 | 1690 | |
---|
1658 | 1691 | rt->dst.output = ip_output; |
---|
.. | .. |
---|
1663 | 1696 | return rt; |
---|
1664 | 1697 | } |
---|
1665 | 1698 | EXPORT_SYMBOL(rt_dst_alloc); |
---|
| 1699 | + |
---|
| 1700 | +struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt) |
---|
| 1701 | +{ |
---|
| 1702 | + struct rtable *new_rt; |
---|
| 1703 | + |
---|
| 1704 | + new_rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, |
---|
| 1705 | + rt->dst.flags); |
---|
| 1706 | + |
---|
| 1707 | + if (new_rt) { |
---|
| 1708 | + new_rt->rt_genid = rt_genid_ipv4(dev_net(dev)); |
---|
| 1709 | + new_rt->rt_flags = rt->rt_flags; |
---|
| 1710 | + new_rt->rt_type = rt->rt_type; |
---|
| 1711 | + new_rt->rt_is_input = rt->rt_is_input; |
---|
| 1712 | + new_rt->rt_iif = rt->rt_iif; |
---|
| 1713 | + new_rt->rt_pmtu = rt->rt_pmtu; |
---|
| 1714 | + new_rt->rt_mtu_locked = rt->rt_mtu_locked; |
---|
| 1715 | + new_rt->rt_gw_family = rt->rt_gw_family; |
---|
| 1716 | + if (rt->rt_gw_family == AF_INET) |
---|
| 1717 | + new_rt->rt_gw4 = rt->rt_gw4; |
---|
| 1718 | + else if (rt->rt_gw_family == AF_INET6) |
---|
| 1719 | + new_rt->rt_gw6 = rt->rt_gw6; |
---|
| 1720 | + INIT_LIST_HEAD(&new_rt->rt_uncached); |
---|
| 1721 | + |
---|
| 1722 | + new_rt->dst.input = rt->dst.input; |
---|
| 1723 | + new_rt->dst.output = rt->dst.output; |
---|
| 1724 | + new_rt->dst.error = rt->dst.error; |
---|
| 1725 | + new_rt->dst.lastuse = jiffies; |
---|
| 1726 | + new_rt->dst.lwtstate = lwtstate_get(rt->dst.lwtstate); |
---|
| 1727 | + } |
---|
| 1728 | + return new_rt; |
---|
| 1729 | +} |
---|
| 1730 | +EXPORT_SYMBOL(rt_dst_clone); |
---|
1666 | 1731 | |
---|
1667 | 1732 | /* called in rcu_read_lock() section */ |
---|
1668 | 1733 | int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, |
---|
.. | .. |
---|
1683 | 1748 | return -EINVAL; |
---|
1684 | 1749 | |
---|
1685 | 1750 | if (ipv4_is_zeronet(saddr)) { |
---|
1686 | | - if (!ipv4_is_local_multicast(daddr)) |
---|
| 1751 | + if (!ipv4_is_local_multicast(daddr) && |
---|
| 1752 | + ip_hdr(skb)->protocol != IPPROTO_IGMP) |
---|
1687 | 1753 | return -EINVAL; |
---|
1688 | 1754 | } else { |
---|
1689 | 1755 | err = fib_validate_source(skb, saddr, 0, tos, 0, dev, |
---|
.. | .. |
---|
1712 | 1778 | flags |= RTCF_LOCAL; |
---|
1713 | 1779 | |
---|
1714 | 1780 | rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST, |
---|
1715 | | - IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false); |
---|
| 1781 | + IN_DEV_ORCONF(in_dev, NOPOLICY), false); |
---|
1716 | 1782 | if (!rth) |
---|
1717 | 1783 | return -ENOBUFS; |
---|
1718 | 1784 | |
---|
.. | .. |
---|
1728 | 1794 | #endif |
---|
1729 | 1795 | RT_CACHE_STAT_INC(in_slow_mc); |
---|
1730 | 1796 | |
---|
| 1797 | + skb_dst_drop(skb); |
---|
1731 | 1798 | skb_dst_set(skb, &rth->dst); |
---|
1732 | 1799 | return 0; |
---|
1733 | 1800 | } |
---|
.. | .. |
---|
1752 | 1819 | print_hex_dump(KERN_WARNING, "ll header: ", |
---|
1753 | 1820 | DUMP_PREFIX_OFFSET, 16, 1, |
---|
1754 | 1821 | skb_mac_header(skb), |
---|
1755 | | - dev->hard_header_len, true); |
---|
| 1822 | + dev->hard_header_len, false); |
---|
1756 | 1823 | } |
---|
1757 | 1824 | } |
---|
1758 | 1825 | #endif |
---|
.. | .. |
---|
1764 | 1831 | struct in_device *in_dev, |
---|
1765 | 1832 | __be32 daddr, __be32 saddr, u32 tos) |
---|
1766 | 1833 | { |
---|
| 1834 | + struct fib_nh_common *nhc = FIB_RES_NHC(*res); |
---|
| 1835 | + struct net_device *dev = nhc->nhc_dev; |
---|
1767 | 1836 | struct fib_nh_exception *fnhe; |
---|
1768 | 1837 | struct rtable *rth; |
---|
1769 | 1838 | int err; |
---|
.. | .. |
---|
1772 | 1841 | u32 itag = 0; |
---|
1773 | 1842 | |
---|
1774 | 1843 | /* get a working reference to the output device */ |
---|
1775 | | - out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res)); |
---|
| 1844 | + out_dev = __in_dev_get_rcu(dev); |
---|
1776 | 1845 | if (!out_dev) { |
---|
1777 | 1846 | net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n"); |
---|
1778 | 1847 | return -EINVAL; |
---|
.. | .. |
---|
1789 | 1858 | |
---|
1790 | 1859 | do_cache = res->fi && !itag; |
---|
1791 | 1860 | if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) && |
---|
1792 | | - skb->protocol == htons(ETH_P_IP) && |
---|
1793 | | - (IN_DEV_SHARED_MEDIA(out_dev) || |
---|
1794 | | - inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) |
---|
1795 | | - IPCB(skb)->flags |= IPSKB_DOREDIRECT; |
---|
| 1861 | + skb->protocol == htons(ETH_P_IP)) { |
---|
| 1862 | + __be32 gw; |
---|
| 1863 | + |
---|
| 1864 | + gw = nhc->nhc_gw_family == AF_INET ? nhc->nhc_gw.ipv4 : 0; |
---|
| 1865 | + if (IN_DEV_SHARED_MEDIA(out_dev) || |
---|
| 1866 | + inet_addr_onlink(out_dev, saddr, gw)) |
---|
| 1867 | + IPCB(skb)->flags |= IPSKB_DOREDIRECT; |
---|
| 1868 | + } |
---|
1796 | 1869 | |
---|
1797 | 1870 | if (skb->protocol != htons(ETH_P_IP)) { |
---|
1798 | 1871 | /* Not IP (i.e. ARP). Do not create route, if it is |
---|
.. | .. |
---|
1809 | 1882 | } |
---|
1810 | 1883 | } |
---|
1811 | 1884 | |
---|
1812 | | - fnhe = find_exception(&FIB_RES_NH(*res), daddr); |
---|
| 1885 | + fnhe = find_exception(nhc, daddr); |
---|
1813 | 1886 | if (do_cache) { |
---|
1814 | 1887 | if (fnhe) |
---|
1815 | 1888 | rth = rcu_dereference(fnhe->fnhe_rth_input); |
---|
1816 | 1889 | else |
---|
1817 | | - rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); |
---|
| 1890 | + rth = rcu_dereference(nhc->nhc_rth_input); |
---|
1818 | 1891 | if (rt_cache_valid(rth)) { |
---|
1819 | 1892 | skb_dst_set_noref(skb, &rth->dst); |
---|
1820 | 1893 | goto out; |
---|
.. | .. |
---|
1822 | 1895 | } |
---|
1823 | 1896 | |
---|
1824 | 1897 | rth = rt_dst_alloc(out_dev->dev, 0, res->type, |
---|
1825 | | - IN_DEV_CONF_GET(in_dev, NOPOLICY), |
---|
1826 | | - IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache); |
---|
| 1898 | + IN_DEV_ORCONF(in_dev, NOPOLICY), |
---|
| 1899 | + IN_DEV_ORCONF(out_dev, NOXFRM)); |
---|
1827 | 1900 | if (!rth) { |
---|
1828 | 1901 | err = -ENOBUFS; |
---|
1829 | 1902 | goto cleanup; |
---|
.. | .. |
---|
1869 | 1942 | if (!icmph) |
---|
1870 | 1943 | goto out; |
---|
1871 | 1944 | |
---|
1872 | | - if (icmph->type != ICMP_DEST_UNREACH && |
---|
1873 | | - icmph->type != ICMP_REDIRECT && |
---|
1874 | | - icmph->type != ICMP_TIME_EXCEEDED && |
---|
1875 | | - icmph->type != ICMP_PARAMETERPROB) |
---|
| 1945 | + if (!icmp_is_err(icmph->type)) |
---|
1876 | 1946 | goto out; |
---|
1877 | 1947 | |
---|
1878 | 1948 | inner_iph = skb_header_pointer(skb, |
---|
.. | .. |
---|
1891 | 1961 | int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, |
---|
1892 | 1962 | const struct sk_buff *skb, struct flow_keys *flkeys) |
---|
1893 | 1963 | { |
---|
| 1964 | + u32 multipath_hash = fl4 ? fl4->flowi4_multipath_hash : 0; |
---|
1894 | 1965 | struct flow_keys hash_keys; |
---|
1895 | 1966 | u32 mhash; |
---|
1896 | 1967 | |
---|
.. | .. |
---|
1938 | 2009 | hash_keys.basic.ip_proto = fl4->flowi4_proto; |
---|
1939 | 2010 | } |
---|
1940 | 2011 | break; |
---|
| 2012 | + case 2: |
---|
| 2013 | + memset(&hash_keys, 0, sizeof(hash_keys)); |
---|
| 2014 | + /* skb is currently provided only when forwarding */ |
---|
| 2015 | + if (skb) { |
---|
| 2016 | + struct flow_keys keys; |
---|
| 2017 | + |
---|
| 2018 | + skb_flow_dissect_flow_keys(skb, &keys, 0); |
---|
| 2019 | + /* Inner can be v4 or v6 */ |
---|
| 2020 | + if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { |
---|
| 2021 | + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; |
---|
| 2022 | + hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; |
---|
| 2023 | + hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; |
---|
| 2024 | + } else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { |
---|
| 2025 | + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; |
---|
| 2026 | + hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src; |
---|
| 2027 | + hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst; |
---|
| 2028 | + hash_keys.tags.flow_label = keys.tags.flow_label; |
---|
| 2029 | + hash_keys.basic.ip_proto = keys.basic.ip_proto; |
---|
| 2030 | + } else { |
---|
| 2031 | + /* Same as case 0 */ |
---|
| 2032 | + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; |
---|
| 2033 | + ip_multipath_l3_keys(skb, &hash_keys); |
---|
| 2034 | + } |
---|
| 2035 | + } else { |
---|
| 2036 | + /* Same as case 0 */ |
---|
| 2037 | + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; |
---|
| 2038 | + hash_keys.addrs.v4addrs.src = fl4->saddr; |
---|
| 2039 | + hash_keys.addrs.v4addrs.dst = fl4->daddr; |
---|
| 2040 | + } |
---|
| 2041 | + break; |
---|
1941 | 2042 | } |
---|
1942 | 2043 | mhash = flow_hash_from_keys(&hash_keys); |
---|
| 2044 | + |
---|
| 2045 | + if (multipath_hash) |
---|
| 2046 | + mhash = jhash_2words(mhash, multipath_hash, 0); |
---|
1943 | 2047 | |
---|
1944 | 2048 | return mhash >> 1; |
---|
1945 | 2049 | } |
---|
.. | .. |
---|
1952 | 2056 | struct flow_keys *hkeys) |
---|
1953 | 2057 | { |
---|
1954 | 2058 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
---|
1955 | | - if (res->fi && res->fi->fib_nhs > 1) { |
---|
| 2059 | + if (res->fi && fib_info_num_path(res->fi) > 1) { |
---|
1956 | 2060 | int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys); |
---|
1957 | 2061 | |
---|
1958 | 2062 | fib_select_multipath(res, h); |
---|
| 2063 | + IPCB(skb)->flags |= IPSKB_MULTIPATH; |
---|
1959 | 2064 | } |
---|
1960 | 2065 | #endif |
---|
1961 | 2066 | |
---|
.. | .. |
---|
1963 | 2068 | return __mkroute_input(skb, res, in_dev, daddr, saddr, tos); |
---|
1964 | 2069 | } |
---|
1965 | 2070 | |
---|
| 2071 | +/* Implements all the saddr-related checks as ip_route_input_slow(), |
---|
| 2072 | + * assuming daddr is valid and the destination is not a local broadcast one. |
---|
| 2073 | + * Uses the provided hint instead of performing a route lookup. |
---|
| 2074 | + */ |
---|
| 2075 | +int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, |
---|
| 2076 | + u8 tos, struct net_device *dev, |
---|
| 2077 | + const struct sk_buff *hint) |
---|
| 2078 | +{ |
---|
| 2079 | + struct in_device *in_dev = __in_dev_get_rcu(dev); |
---|
| 2080 | + struct rtable *rt = skb_rtable(hint); |
---|
| 2081 | + struct net *net = dev_net(dev); |
---|
| 2082 | + int err = -EINVAL; |
---|
| 2083 | + u32 tag = 0; |
---|
| 2084 | + |
---|
| 2085 | + if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) |
---|
| 2086 | + goto martian_source; |
---|
| 2087 | + |
---|
| 2088 | + if (ipv4_is_zeronet(saddr)) |
---|
| 2089 | + goto martian_source; |
---|
| 2090 | + |
---|
| 2091 | + if (ipv4_is_loopback(saddr) && !IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) |
---|
| 2092 | + goto martian_source; |
---|
| 2093 | + |
---|
| 2094 | + if (rt->rt_type != RTN_LOCAL) |
---|
| 2095 | + goto skip_validate_source; |
---|
| 2096 | + |
---|
| 2097 | + tos &= IPTOS_RT_MASK; |
---|
| 2098 | + err = fib_validate_source(skb, saddr, daddr, tos, 0, dev, in_dev, &tag); |
---|
| 2099 | + if (err < 0) |
---|
| 2100 | + goto martian_source; |
---|
| 2101 | + |
---|
| 2102 | +skip_validate_source: |
---|
| 2103 | + skb_dst_copy(skb, hint); |
---|
| 2104 | + return 0; |
---|
| 2105 | + |
---|
| 2106 | +martian_source: |
---|
| 2107 | + ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); |
---|
| 2108 | + return err; |
---|
| 2109 | +} |
---|
| 2110 | + |
---|
| 2111 | +/* get device for dst_alloc with local routes */ |
---|
| 2112 | +static struct net_device *ip_rt_get_dev(struct net *net, |
---|
| 2113 | + const struct fib_result *res) |
---|
| 2114 | +{ |
---|
| 2115 | + struct fib_nh_common *nhc = res->fi ? res->nhc : NULL; |
---|
| 2116 | + struct net_device *dev = NULL; |
---|
| 2117 | + |
---|
| 2118 | + if (nhc) |
---|
| 2119 | + dev = l3mdev_master_dev_rcu(nhc->nhc_dev); |
---|
| 2120 | + |
---|
| 2121 | + return dev ? : net->loopback_dev; |
---|
| 2122 | +} |
---|
| 2123 | + |
---|
1966 | 2124 | /* |
---|
1967 | 2125 | * NOTE. We drop all the packets that has local source |
---|
1968 | 2126 | * addresses, because every properly looped back packet |
---|
1969 | 2127 | * must have correct destination already attached by output routine. |
---|
| 2128 | + * Changes in the enforced policies must be applied also to |
---|
| 2129 | + * ip_route_use_hint(). |
---|
1970 | 2130 | * |
---|
1971 | 2131 | * Such approach solves two big problems: |
---|
1972 | 2132 | * 1. Not simplex devices are handled properly. |
---|
.. | .. |
---|
2045 | 2205 | fl4.daddr = daddr; |
---|
2046 | 2206 | fl4.saddr = saddr; |
---|
2047 | 2207 | fl4.flowi4_uid = sock_net_uid(net, NULL); |
---|
| 2208 | + fl4.flowi4_multipath_hash = 0; |
---|
2048 | 2209 | |
---|
2049 | 2210 | if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) { |
---|
2050 | 2211 | flkeys = &_flkeys; |
---|
.. | .. |
---|
2106 | 2267 | local_input: |
---|
2107 | 2268 | do_cache &= res->fi && !itag; |
---|
2108 | 2269 | if (do_cache) { |
---|
2109 | | - rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); |
---|
| 2270 | + struct fib_nh_common *nhc = FIB_RES_NHC(*res); |
---|
| 2271 | + |
---|
| 2272 | + rth = rcu_dereference(nhc->nhc_rth_input); |
---|
2110 | 2273 | if (rt_cache_valid(rth)) { |
---|
2111 | 2274 | skb_dst_set_noref(skb, &rth->dst); |
---|
2112 | 2275 | err = 0; |
---|
.. | .. |
---|
2114 | 2277 | } |
---|
2115 | 2278 | } |
---|
2116 | 2279 | |
---|
2117 | | - rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev, |
---|
| 2280 | + rth = rt_dst_alloc(ip_rt_get_dev(net, res), |
---|
2118 | 2281 | flags | RTCF_LOCAL, res->type, |
---|
2119 | | - IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache); |
---|
| 2282 | + IN_DEV_ORCONF(in_dev, NOPOLICY), false); |
---|
2120 | 2283 | if (!rth) |
---|
2121 | 2284 | goto e_nobufs; |
---|
2122 | 2285 | |
---|
.. | .. |
---|
2134 | 2297 | } |
---|
2135 | 2298 | |
---|
2136 | 2299 | if (do_cache) { |
---|
2137 | | - struct fib_nh *nh = &FIB_RES_NH(*res); |
---|
| 2300 | + struct fib_nh_common *nhc = FIB_RES_NHC(*res); |
---|
2138 | 2301 | |
---|
2139 | | - rth->dst.lwtstate = lwtstate_get(nh->nh_lwtstate); |
---|
| 2302 | + rth->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate); |
---|
2140 | 2303 | if (lwtunnel_input_redirect(rth->dst.lwtstate)) { |
---|
2141 | 2304 | WARN_ON(rth->dst.input == lwtunnel_input); |
---|
2142 | 2305 | rth->dst.lwtstate->orig_input = rth->dst.input; |
---|
2143 | 2306 | rth->dst.input = lwtunnel_input; |
---|
2144 | 2307 | } |
---|
2145 | 2308 | |
---|
2146 | | - if (unlikely(!rt_cache_route(nh, rth))) |
---|
| 2309 | + if (unlikely(!rt_cache_route(nhc, rth))) |
---|
2147 | 2310 | rt_add_uncached_list(rth); |
---|
2148 | 2311 | } |
---|
2149 | 2312 | skb_dst_set(skb, &rth->dst); |
---|
.. | .. |
---|
2314 | 2477 | fnhe = NULL; |
---|
2315 | 2478 | do_cache &= fi != NULL; |
---|
2316 | 2479 | if (fi) { |
---|
| 2480 | + struct fib_nh_common *nhc = FIB_RES_NHC(*res); |
---|
2317 | 2481 | struct rtable __rcu **prth; |
---|
2318 | | - struct fib_nh *nh = &FIB_RES_NH(*res); |
---|
2319 | 2482 | |
---|
2320 | | - fnhe = find_exception(nh, fl4->daddr); |
---|
| 2483 | + fnhe = find_exception(nhc, fl4->daddr); |
---|
2321 | 2484 | if (!do_cache) |
---|
2322 | 2485 | goto add; |
---|
2323 | 2486 | if (fnhe) { |
---|
.. | .. |
---|
2325 | 2488 | } else { |
---|
2326 | 2489 | if (unlikely(fl4->flowi4_flags & |
---|
2327 | 2490 | FLOWI_FLAG_KNOWN_NH && |
---|
2328 | | - !(nh->nh_gw && |
---|
2329 | | - nh->nh_scope == RT_SCOPE_LINK))) { |
---|
| 2491 | + !(nhc->nhc_gw_family && |
---|
| 2492 | + nhc->nhc_scope == RT_SCOPE_LINK))) { |
---|
2330 | 2493 | do_cache = false; |
---|
2331 | 2494 | goto add; |
---|
2332 | 2495 | } |
---|
2333 | | - prth = raw_cpu_ptr(nh->nh_pcpu_rth_output); |
---|
| 2496 | + prth = raw_cpu_ptr(nhc->nhc_pcpu_rth_output); |
---|
2334 | 2497 | } |
---|
2335 | 2498 | rth = rcu_dereference(*prth); |
---|
2336 | 2499 | if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst)) |
---|
.. | .. |
---|
2339 | 2502 | |
---|
2340 | 2503 | add: |
---|
2341 | 2504 | rth = rt_dst_alloc(dev_out, flags, type, |
---|
2342 | | - IN_DEV_CONF_GET(in_dev, NOPOLICY), |
---|
2343 | | - IN_DEV_CONF_GET(in_dev, NOXFRM), |
---|
2344 | | - do_cache); |
---|
| 2505 | + IN_DEV_ORCONF(in_dev, NOPOLICY), |
---|
| 2506 | + IN_DEV_ORCONF(in_dev, NOXFRM)); |
---|
2345 | 2507 | if (!rth) |
---|
2346 | 2508 | return ERR_PTR(-ENOBUFS); |
---|
2347 | 2509 | |
---|
.. | .. |
---|
2379 | 2541 | struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, |
---|
2380 | 2542 | const struct sk_buff *skb) |
---|
2381 | 2543 | { |
---|
2382 | | - __u8 tos = RT_FL_TOS(fl4); |
---|
2383 | 2544 | struct fib_result res = { |
---|
2384 | 2545 | .type = RTN_UNSPEC, |
---|
2385 | 2546 | .fi = NULL, |
---|
.. | .. |
---|
2389 | 2550 | struct rtable *rth; |
---|
2390 | 2551 | |
---|
2391 | 2552 | fl4->flowi4_iif = LOOPBACK_IFINDEX; |
---|
2392 | | - fl4->flowi4_tos = tos & IPTOS_RT_MASK; |
---|
2393 | | - fl4->flowi4_scope = ((tos & RTO_ONLINK) ? |
---|
2394 | | - RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); |
---|
| 2553 | + ip_rt_fix_tos(fl4); |
---|
2395 | 2554 | |
---|
2396 | 2555 | rcu_read_lock(); |
---|
2397 | 2556 | rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb); |
---|
.. | .. |
---|
2572 | 2731 | return rth; |
---|
2573 | 2732 | } |
---|
2574 | 2733 | |
---|
2575 | | -static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie) |
---|
2576 | | -{ |
---|
2577 | | - return NULL; |
---|
2578 | | -} |
---|
2579 | | - |
---|
2580 | | -static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst) |
---|
2581 | | -{ |
---|
2582 | | - unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); |
---|
2583 | | - |
---|
2584 | | - return mtu ? : dst->dev->mtu; |
---|
2585 | | -} |
---|
2586 | | - |
---|
2587 | | -static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, |
---|
2588 | | - struct sk_buff *skb, u32 mtu, |
---|
2589 | | - bool confirm_neigh) |
---|
2590 | | -{ |
---|
2591 | | -} |
---|
2592 | | - |
---|
2593 | | -static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk, |
---|
2594 | | - struct sk_buff *skb) |
---|
2595 | | -{ |
---|
2596 | | -} |
---|
2597 | | - |
---|
2598 | | -static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst, |
---|
2599 | | - unsigned long old) |
---|
2600 | | -{ |
---|
2601 | | - return NULL; |
---|
2602 | | -} |
---|
2603 | | - |
---|
2604 | 2734 | static struct dst_ops ipv4_dst_blackhole_ops = { |
---|
2605 | | - .family = AF_INET, |
---|
2606 | | - .check = ipv4_blackhole_dst_check, |
---|
2607 | | - .mtu = ipv4_blackhole_mtu, |
---|
2608 | | - .default_advmss = ipv4_default_advmss, |
---|
2609 | | - .update_pmtu = ipv4_rt_blackhole_update_pmtu, |
---|
2610 | | - .redirect = ipv4_rt_blackhole_redirect, |
---|
2611 | | - .cow_metrics = ipv4_rt_blackhole_cow_metrics, |
---|
2612 | | - .neigh_lookup = ipv4_neigh_lookup, |
---|
| 2735 | + .family = AF_INET, |
---|
| 2736 | + .default_advmss = ipv4_default_advmss, |
---|
| 2737 | + .neigh_lookup = ipv4_neigh_lookup, |
---|
| 2738 | + .check = dst_blackhole_check, |
---|
| 2739 | + .cow_metrics = dst_blackhole_cow_metrics, |
---|
| 2740 | + .update_pmtu = dst_blackhole_update_pmtu, |
---|
| 2741 | + .redirect = dst_blackhole_redirect, |
---|
| 2742 | + .mtu = dst_blackhole_mtu, |
---|
2613 | 2743 | }; |
---|
2614 | 2744 | |
---|
2615 | 2745 | struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) |
---|
.. | .. |
---|
2637 | 2767 | rt->rt_genid = rt_genid_ipv4(net); |
---|
2638 | 2768 | rt->rt_flags = ort->rt_flags; |
---|
2639 | 2769 | rt->rt_type = ort->rt_type; |
---|
2640 | | - rt->rt_gateway = ort->rt_gateway; |
---|
2641 | 2770 | rt->rt_uses_gateway = ort->rt_uses_gateway; |
---|
| 2771 | + rt->rt_gw_family = ort->rt_gw_family; |
---|
| 2772 | + if (rt->rt_gw_family == AF_INET) |
---|
| 2773 | + rt->rt_gw4 = ort->rt_gw4; |
---|
| 2774 | + else if (rt->rt_gw_family == AF_INET6) |
---|
| 2775 | + rt->rt_gw6 = ort->rt_gw6; |
---|
2642 | 2776 | |
---|
2643 | 2777 | INIT_LIST_HEAD(&rt->rt_uncached); |
---|
2644 | 2778 | } |
---|
.. | .. |
---|
2667 | 2801 | } |
---|
2668 | 2802 | EXPORT_SYMBOL_GPL(ip_route_output_flow); |
---|
2669 | 2803 | |
---|
| 2804 | +struct rtable *ip_route_output_tunnel(struct sk_buff *skb, |
---|
| 2805 | + struct net_device *dev, |
---|
| 2806 | + struct net *net, __be32 *saddr, |
---|
| 2807 | + const struct ip_tunnel_info *info, |
---|
| 2808 | + u8 protocol, bool use_cache) |
---|
| 2809 | +{ |
---|
| 2810 | +#ifdef CONFIG_DST_CACHE |
---|
| 2811 | + struct dst_cache *dst_cache; |
---|
| 2812 | +#endif |
---|
| 2813 | + struct rtable *rt = NULL; |
---|
| 2814 | + struct flowi4 fl4; |
---|
| 2815 | + __u8 tos; |
---|
| 2816 | + |
---|
| 2817 | +#ifdef CONFIG_DST_CACHE |
---|
| 2818 | + dst_cache = (struct dst_cache *)&info->dst_cache; |
---|
| 2819 | + if (use_cache) { |
---|
| 2820 | + rt = dst_cache_get_ip4(dst_cache, saddr); |
---|
| 2821 | + if (rt) |
---|
| 2822 | + return rt; |
---|
| 2823 | + } |
---|
| 2824 | +#endif |
---|
| 2825 | + memset(&fl4, 0, sizeof(fl4)); |
---|
| 2826 | + fl4.flowi4_mark = skb->mark; |
---|
| 2827 | + fl4.flowi4_proto = protocol; |
---|
| 2828 | + fl4.daddr = info->key.u.ipv4.dst; |
---|
| 2829 | + fl4.saddr = info->key.u.ipv4.src; |
---|
| 2830 | + tos = info->key.tos; |
---|
| 2831 | + fl4.flowi4_tos = RT_TOS(tos); |
---|
| 2832 | + |
---|
| 2833 | + rt = ip_route_output_key(net, &fl4); |
---|
| 2834 | + if (IS_ERR(rt)) { |
---|
| 2835 | + netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr); |
---|
| 2836 | + return ERR_PTR(-ENETUNREACH); |
---|
| 2837 | + } |
---|
| 2838 | + if (rt->dst.dev == dev) { /* is this necessary? */ |
---|
| 2839 | + netdev_dbg(dev, "circular route to %pI4\n", &fl4.daddr); |
---|
| 2840 | + ip_rt_put(rt); |
---|
| 2841 | + return ERR_PTR(-ELOOP); |
---|
| 2842 | + } |
---|
| 2843 | +#ifdef CONFIG_DST_CACHE |
---|
| 2844 | + if (use_cache) |
---|
| 2845 | + dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr); |
---|
| 2846 | +#endif |
---|
| 2847 | + *saddr = fl4.saddr; |
---|
| 2848 | + return rt; |
---|
| 2849 | +} |
---|
| 2850 | +EXPORT_SYMBOL_GPL(ip_route_output_tunnel); |
---|
| 2851 | + |
---|
2670 | 2852 | /* called with rcu_read_lock held */ |
---|
2671 | 2853 | static int rt_fill_info(struct net *net, __be32 dst, __be32 src, |
---|
2672 | 2854 | struct rtable *rt, u32 table_id, struct flowi4 *fl4, |
---|
2673 | | - struct sk_buff *skb, u32 portid, u32 seq) |
---|
| 2855 | + struct sk_buff *skb, u32 portid, u32 seq, |
---|
| 2856 | + unsigned int flags) |
---|
2674 | 2857 | { |
---|
2675 | 2858 | struct rtmsg *r; |
---|
2676 | 2859 | struct nlmsghdr *nlh; |
---|
.. | .. |
---|
2678 | 2861 | u32 error; |
---|
2679 | 2862 | u32 metrics[RTAX_MAX]; |
---|
2680 | 2863 | |
---|
2681 | | - nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), 0); |
---|
| 2864 | + nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), flags); |
---|
2682 | 2865 | if (!nlh) |
---|
2683 | 2866 | return -EMSGSIZE; |
---|
2684 | 2867 | |
---|
.. | .. |
---|
2686 | 2869 | r->rtm_family = AF_INET; |
---|
2687 | 2870 | r->rtm_dst_len = 32; |
---|
2688 | 2871 | r->rtm_src_len = 0; |
---|
2689 | | - r->rtm_tos = fl4->flowi4_tos; |
---|
| 2872 | + r->rtm_tos = fl4 ? fl4->flowi4_tos : 0; |
---|
2690 | 2873 | r->rtm_table = table_id < 256 ? table_id : RT_TABLE_COMPAT; |
---|
2691 | 2874 | if (nla_put_u32(skb, RTA_TABLE, table_id)) |
---|
2692 | 2875 | goto nla_put_failure; |
---|
.. | .. |
---|
2714 | 2897 | nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid)) |
---|
2715 | 2898 | goto nla_put_failure; |
---|
2716 | 2899 | #endif |
---|
2717 | | - if (!rt_is_input_route(rt) && |
---|
| 2900 | + if (fl4 && !rt_is_input_route(rt) && |
---|
2718 | 2901 | fl4->saddr != src) { |
---|
2719 | 2902 | if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr)) |
---|
2720 | 2903 | goto nla_put_failure; |
---|
2721 | 2904 | } |
---|
2722 | | - if (rt->rt_uses_gateway && |
---|
2723 | | - nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gateway)) |
---|
2724 | | - goto nla_put_failure; |
---|
| 2905 | + if (rt->rt_uses_gateway) { |
---|
| 2906 | + if (rt->rt_gw_family == AF_INET && |
---|
| 2907 | + nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gw4)) { |
---|
| 2908 | + goto nla_put_failure; |
---|
| 2909 | + } else if (rt->rt_gw_family == AF_INET6) { |
---|
| 2910 | + int alen = sizeof(struct in6_addr); |
---|
| 2911 | + struct nlattr *nla; |
---|
| 2912 | + struct rtvia *via; |
---|
| 2913 | + |
---|
| 2914 | + nla = nla_reserve(skb, RTA_VIA, alen + 2); |
---|
| 2915 | + if (!nla) |
---|
| 2916 | + goto nla_put_failure; |
---|
| 2917 | + |
---|
| 2918 | + via = nla_data(nla); |
---|
| 2919 | + via->rtvia_family = AF_INET6; |
---|
| 2920 | + memcpy(via->rtvia_addr, &rt->rt_gw6, alen); |
---|
| 2921 | + } |
---|
| 2922 | + } |
---|
2725 | 2923 | |
---|
2726 | 2924 | expires = rt->dst.expires; |
---|
2727 | 2925 | if (expires) { |
---|
.. | .. |
---|
2741 | 2939 | if (rtnetlink_put_metrics(skb, metrics) < 0) |
---|
2742 | 2940 | goto nla_put_failure; |
---|
2743 | 2941 | |
---|
2744 | | - if (fl4->flowi4_mark && |
---|
2745 | | - nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark)) |
---|
2746 | | - goto nla_put_failure; |
---|
| 2942 | + if (fl4) { |
---|
| 2943 | + if (fl4->flowi4_mark && |
---|
| 2944 | + nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark)) |
---|
| 2945 | + goto nla_put_failure; |
---|
2747 | 2946 | |
---|
2748 | | - if (!uid_eq(fl4->flowi4_uid, INVALID_UID) && |
---|
2749 | | - nla_put_u32(skb, RTA_UID, |
---|
2750 | | - from_kuid_munged(current_user_ns(), fl4->flowi4_uid))) |
---|
2751 | | - goto nla_put_failure; |
---|
| 2947 | + if (!uid_eq(fl4->flowi4_uid, INVALID_UID) && |
---|
| 2948 | + nla_put_u32(skb, RTA_UID, |
---|
| 2949 | + from_kuid_munged(current_user_ns(), |
---|
| 2950 | + fl4->flowi4_uid))) |
---|
| 2951 | + goto nla_put_failure; |
---|
| 2952 | + |
---|
| 2953 | + if (rt_is_input_route(rt)) { |
---|
| 2954 | +#ifdef CONFIG_IP_MROUTE |
---|
| 2955 | + if (ipv4_is_multicast(dst) && |
---|
| 2956 | + !ipv4_is_local_multicast(dst) && |
---|
| 2957 | + IPV4_DEVCONF_ALL(net, MC_FORWARDING)) { |
---|
| 2958 | + int err = ipmr_get_route(net, skb, |
---|
| 2959 | + fl4->saddr, fl4->daddr, |
---|
| 2960 | + r, portid); |
---|
| 2961 | + |
---|
| 2962 | + if (err <= 0) { |
---|
| 2963 | + if (err == 0) |
---|
| 2964 | + return 0; |
---|
| 2965 | + goto nla_put_failure; |
---|
| 2966 | + } |
---|
| 2967 | + } else |
---|
| 2968 | +#endif |
---|
| 2969 | + if (nla_put_u32(skb, RTA_IIF, fl4->flowi4_iif)) |
---|
| 2970 | + goto nla_put_failure; |
---|
| 2971 | + } |
---|
| 2972 | + } |
---|
2752 | 2973 | |
---|
2753 | 2974 | error = rt->dst.error; |
---|
2754 | | - |
---|
2755 | | - if (rt_is_input_route(rt)) { |
---|
2756 | | -#ifdef CONFIG_IP_MROUTE |
---|
2757 | | - if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) && |
---|
2758 | | - IPV4_DEVCONF_ALL(net, MC_FORWARDING)) { |
---|
2759 | | - int err = ipmr_get_route(net, skb, |
---|
2760 | | - fl4->saddr, fl4->daddr, |
---|
2761 | | - r, portid); |
---|
2762 | | - |
---|
2763 | | - if (err <= 0) { |
---|
2764 | | - if (err == 0) |
---|
2765 | | - return 0; |
---|
2766 | | - goto nla_put_failure; |
---|
2767 | | - } |
---|
2768 | | - } else |
---|
2769 | | -#endif |
---|
2770 | | - if (nla_put_u32(skb, RTA_IIF, fl4->flowi4_iif)) |
---|
2771 | | - goto nla_put_failure; |
---|
2772 | | - } |
---|
2773 | 2975 | |
---|
2774 | 2976 | if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0) |
---|
2775 | 2977 | goto nla_put_failure; |
---|
.. | .. |
---|
2780 | 2982 | nla_put_failure: |
---|
2781 | 2983 | nlmsg_cancel(skb, nlh); |
---|
2782 | 2984 | return -EMSGSIZE; |
---|
| 2985 | +} |
---|
| 2986 | + |
---|
| 2987 | +static int fnhe_dump_bucket(struct net *net, struct sk_buff *skb, |
---|
| 2988 | + struct netlink_callback *cb, u32 table_id, |
---|
| 2989 | + struct fnhe_hash_bucket *bucket, int genid, |
---|
| 2990 | + int *fa_index, int fa_start, unsigned int flags) |
---|
| 2991 | +{ |
---|
| 2992 | + int i; |
---|
| 2993 | + |
---|
| 2994 | + for (i = 0; i < FNHE_HASH_SIZE; i++) { |
---|
| 2995 | + struct fib_nh_exception *fnhe; |
---|
| 2996 | + |
---|
| 2997 | + for (fnhe = rcu_dereference(bucket[i].chain); fnhe; |
---|
| 2998 | + fnhe = rcu_dereference(fnhe->fnhe_next)) { |
---|
| 2999 | + struct rtable *rt; |
---|
| 3000 | + int err; |
---|
| 3001 | + |
---|
| 3002 | + if (*fa_index < fa_start) |
---|
| 3003 | + goto next; |
---|
| 3004 | + |
---|
| 3005 | + if (fnhe->fnhe_genid != genid) |
---|
| 3006 | + goto next; |
---|
| 3007 | + |
---|
| 3008 | + if (fnhe->fnhe_expires && |
---|
| 3009 | + time_after(jiffies, fnhe->fnhe_expires)) |
---|
| 3010 | + goto next; |
---|
| 3011 | + |
---|
| 3012 | + rt = rcu_dereference(fnhe->fnhe_rth_input); |
---|
| 3013 | + if (!rt) |
---|
| 3014 | + rt = rcu_dereference(fnhe->fnhe_rth_output); |
---|
| 3015 | + if (!rt) |
---|
| 3016 | + goto next; |
---|
| 3017 | + |
---|
| 3018 | + err = rt_fill_info(net, fnhe->fnhe_daddr, 0, rt, |
---|
| 3019 | + table_id, NULL, skb, |
---|
| 3020 | + NETLINK_CB(cb->skb).portid, |
---|
| 3021 | + cb->nlh->nlmsg_seq, flags); |
---|
| 3022 | + if (err) |
---|
| 3023 | + return err; |
---|
| 3024 | +next: |
---|
| 3025 | + (*fa_index)++; |
---|
| 3026 | + } |
---|
| 3027 | + } |
---|
| 3028 | + |
---|
| 3029 | + return 0; |
---|
| 3030 | +} |
---|
| 3031 | + |
---|
| 3032 | +int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb, |
---|
| 3033 | + u32 table_id, struct fib_info *fi, |
---|
| 3034 | + int *fa_index, int fa_start, unsigned int flags) |
---|
| 3035 | +{ |
---|
| 3036 | + struct net *net = sock_net(cb->skb->sk); |
---|
| 3037 | + int nhsel, genid = fnhe_genid(net); |
---|
| 3038 | + |
---|
| 3039 | + for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) { |
---|
| 3040 | + struct fib_nh_common *nhc = fib_info_nhc(fi, nhsel); |
---|
| 3041 | + struct fnhe_hash_bucket *bucket; |
---|
| 3042 | + int err; |
---|
| 3043 | + |
---|
| 3044 | + if (nhc->nhc_flags & RTNH_F_DEAD) |
---|
| 3045 | + continue; |
---|
| 3046 | + |
---|
| 3047 | + rcu_read_lock(); |
---|
| 3048 | + bucket = rcu_dereference(nhc->nhc_exceptions); |
---|
| 3049 | + err = 0; |
---|
| 3050 | + if (bucket) |
---|
| 3051 | + err = fnhe_dump_bucket(net, skb, cb, table_id, bucket, |
---|
| 3052 | + genid, fa_index, fa_start, |
---|
| 3053 | + flags); |
---|
| 3054 | + rcu_read_unlock(); |
---|
| 3055 | + if (err) |
---|
| 3056 | + return err; |
---|
| 3057 | + } |
---|
| 3058 | + |
---|
| 3059 | + return 0; |
---|
2783 | 3060 | } |
---|
2784 | 3061 | |
---|
2785 | 3062 | static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst, |
---|
.. | .. |
---|
2843 | 3120 | return skb; |
---|
2844 | 3121 | } |
---|
2845 | 3122 | |
---|
| 3123 | +static int inet_rtm_valid_getroute_req(struct sk_buff *skb, |
---|
| 3124 | + const struct nlmsghdr *nlh, |
---|
| 3125 | + struct nlattr **tb, |
---|
| 3126 | + struct netlink_ext_ack *extack) |
---|
| 3127 | +{ |
---|
| 3128 | + struct rtmsg *rtm; |
---|
| 3129 | + int i, err; |
---|
| 3130 | + |
---|
| 3131 | + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { |
---|
| 3132 | + NL_SET_ERR_MSG(extack, |
---|
| 3133 | + "ipv4: Invalid header for route get request"); |
---|
| 3134 | + return -EINVAL; |
---|
| 3135 | + } |
---|
| 3136 | + |
---|
| 3137 | + if (!netlink_strict_get_check(skb)) |
---|
| 3138 | + return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, |
---|
| 3139 | + rtm_ipv4_policy, extack); |
---|
| 3140 | + |
---|
| 3141 | + rtm = nlmsg_data(nlh); |
---|
| 3142 | + if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) || |
---|
| 3143 | + (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) || |
---|
| 3144 | + rtm->rtm_table || rtm->rtm_protocol || |
---|
| 3145 | + rtm->rtm_scope || rtm->rtm_type) { |
---|
| 3146 | + NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for route get request"); |
---|
| 3147 | + return -EINVAL; |
---|
| 3148 | + } |
---|
| 3149 | + |
---|
| 3150 | + if (rtm->rtm_flags & ~(RTM_F_NOTIFY | |
---|
| 3151 | + RTM_F_LOOKUP_TABLE | |
---|
| 3152 | + RTM_F_FIB_MATCH)) { |
---|
| 3153 | + NL_SET_ERR_MSG(extack, "ipv4: Unsupported rtm_flags for route get request"); |
---|
| 3154 | + return -EINVAL; |
---|
| 3155 | + } |
---|
| 3156 | + |
---|
| 3157 | + err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX, |
---|
| 3158 | + rtm_ipv4_policy, extack); |
---|
| 3159 | + if (err) |
---|
| 3160 | + return err; |
---|
| 3161 | + |
---|
| 3162 | + if ((tb[RTA_SRC] && !rtm->rtm_src_len) || |
---|
| 3163 | + (tb[RTA_DST] && !rtm->rtm_dst_len)) { |
---|
| 3164 | + NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4"); |
---|
| 3165 | + return -EINVAL; |
---|
| 3166 | + } |
---|
| 3167 | + |
---|
| 3168 | + for (i = 0; i <= RTA_MAX; i++) { |
---|
| 3169 | + if (!tb[i]) |
---|
| 3170 | + continue; |
---|
| 3171 | + |
---|
| 3172 | + switch (i) { |
---|
| 3173 | + case RTA_IIF: |
---|
| 3174 | + case RTA_OIF: |
---|
| 3175 | + case RTA_SRC: |
---|
| 3176 | + case RTA_DST: |
---|
| 3177 | + case RTA_IP_PROTO: |
---|
| 3178 | + case RTA_SPORT: |
---|
| 3179 | + case RTA_DPORT: |
---|
| 3180 | + case RTA_MARK: |
---|
| 3181 | + case RTA_UID: |
---|
| 3182 | + break; |
---|
| 3183 | + default: |
---|
| 3184 | + NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in route get request"); |
---|
| 3185 | + return -EINVAL; |
---|
| 3186 | + } |
---|
| 3187 | + } |
---|
| 3188 | + |
---|
| 3189 | + return 0; |
---|
| 3190 | +} |
---|
| 3191 | + |
---|
2846 | 3192 | static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, |
---|
2847 | 3193 | struct netlink_ext_ack *extack) |
---|
2848 | 3194 | { |
---|
.. | .. |
---|
2855 | 3201 | struct rtable *rt = NULL; |
---|
2856 | 3202 | struct sk_buff *skb; |
---|
2857 | 3203 | struct rtmsg *rtm; |
---|
2858 | | - struct flowi4 fl4; |
---|
| 3204 | + struct flowi4 fl4 = {}; |
---|
2859 | 3205 | __be32 dst = 0; |
---|
2860 | 3206 | __be32 src = 0; |
---|
2861 | 3207 | kuid_t uid; |
---|
.. | .. |
---|
2863 | 3209 | int err; |
---|
2864 | 3210 | int mark; |
---|
2865 | 3211 | |
---|
2866 | | - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy, |
---|
2867 | | - extack); |
---|
| 3212 | + err = inet_rtm_valid_getroute_req(in_skb, nlh, tb, extack); |
---|
2868 | 3213 | if (err < 0) |
---|
2869 | 3214 | return err; |
---|
2870 | 3215 | |
---|
.. | .. |
---|
2895 | 3240 | if (!skb) |
---|
2896 | 3241 | return -ENOBUFS; |
---|
2897 | 3242 | |
---|
2898 | | - memset(&fl4, 0, sizeof(fl4)); |
---|
2899 | 3243 | fl4.daddr = dst; |
---|
2900 | 3244 | fl4.saddr = src; |
---|
2901 | 3245 | fl4.flowi4_tos = rtm->rtm_tos & IPTOS_RT_MASK; |
---|
.. | .. |
---|
2931 | 3275 | err = -rt->dst.error; |
---|
2932 | 3276 | } else { |
---|
2933 | 3277 | fl4.flowi4_iif = LOOPBACK_IFINDEX; |
---|
| 3278 | + skb->dev = net->loopback_dev; |
---|
2934 | 3279 | rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb); |
---|
2935 | 3280 | err = 0; |
---|
2936 | 3281 | if (IS_ERR(rt)) |
---|
.. | .. |
---|
2955 | 3300 | skb_reset_mac_header(skb); |
---|
2956 | 3301 | |
---|
2957 | 3302 | if (rtm->rtm_flags & RTM_F_FIB_MATCH) { |
---|
| 3303 | + struct fib_rt_info fri; |
---|
| 3304 | + |
---|
2958 | 3305 | if (!res.fi) { |
---|
2959 | 3306 | err = fib_props[res.type].error; |
---|
2960 | 3307 | if (!err) |
---|
2961 | 3308 | err = -EHOSTUNREACH; |
---|
2962 | 3309 | goto errout_rcu; |
---|
2963 | 3310 | } |
---|
| 3311 | + fri.fi = res.fi; |
---|
| 3312 | + fri.tb_id = table_id; |
---|
| 3313 | + fri.dst = res.prefix; |
---|
| 3314 | + fri.dst_len = res.prefixlen; |
---|
| 3315 | + fri.tos = fl4.flowi4_tos; |
---|
| 3316 | + fri.type = rt->rt_type; |
---|
| 3317 | + fri.offload = 0; |
---|
| 3318 | + fri.trap = 0; |
---|
| 3319 | + if (res.fa_head) { |
---|
| 3320 | + struct fib_alias *fa; |
---|
| 3321 | + |
---|
| 3322 | + hlist_for_each_entry_rcu(fa, res.fa_head, fa_list) { |
---|
| 3323 | + u8 slen = 32 - fri.dst_len; |
---|
| 3324 | + |
---|
| 3325 | + if (fa->fa_slen == slen && |
---|
| 3326 | + fa->tb_id == fri.tb_id && |
---|
| 3327 | + fa->fa_tos == fri.tos && |
---|
| 3328 | + fa->fa_info == res.fi && |
---|
| 3329 | + fa->fa_type == fri.type) { |
---|
| 3330 | + fri.offload = fa->offload; |
---|
| 3331 | + fri.trap = fa->trap; |
---|
| 3332 | + break; |
---|
| 3333 | + } |
---|
| 3334 | + } |
---|
| 3335 | + } |
---|
2964 | 3336 | err = fib_dump_info(skb, NETLINK_CB(in_skb).portid, |
---|
2965 | | - nlh->nlmsg_seq, RTM_NEWROUTE, table_id, |
---|
2966 | | - rt->rt_type, res.prefix, res.prefixlen, |
---|
2967 | | - fl4.flowi4_tos, res.fi, 0); |
---|
| 3337 | + nlh->nlmsg_seq, RTM_NEWROUTE, &fri, 0); |
---|
2968 | 3338 | } else { |
---|
2969 | 3339 | err = rt_fill_info(net, dst, src, rt, table_id, &fl4, skb, |
---|
2970 | | - NETLINK_CB(in_skb).portid, nlh->nlmsg_seq); |
---|
| 3340 | + NETLINK_CB(in_skb).portid, |
---|
| 3341 | + nlh->nlmsg_seq, 0); |
---|
2971 | 3342 | } |
---|
2972 | 3343 | if (err < 0) |
---|
2973 | 3344 | goto errout_rcu; |
---|
.. | .. |
---|
2996 | 3367 | static int ip_min_valid_pmtu __read_mostly = IPV4_MIN_MTU; |
---|
2997 | 3368 | |
---|
2998 | 3369 | static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write, |
---|
2999 | | - void __user *buffer, |
---|
3000 | | - size_t *lenp, loff_t *ppos) |
---|
| 3370 | + void *buffer, size_t *lenp, loff_t *ppos) |
---|
3001 | 3371 | { |
---|
3002 | 3372 | struct net *net = (struct net *)__ctl->extra1; |
---|
3003 | 3373 | |
---|
.. | .. |
---|
3122 | 3492 | { } |
---|
3123 | 3493 | }; |
---|
3124 | 3494 | |
---|
| 3495 | +static const char ipv4_route_flush_procname[] = "flush"; |
---|
| 3496 | + |
---|
3125 | 3497 | static struct ctl_table ipv4_route_flush_table[] = { |
---|
3126 | 3498 | { |
---|
3127 | | - .procname = "flush", |
---|
| 3499 | + .procname = ipv4_route_flush_procname, |
---|
3128 | 3500 | .maxlen = sizeof(int), |
---|
3129 | 3501 | .mode = 0200, |
---|
3130 | 3502 | .proc_handler = ipv4_sysctl_rtcache_flush, |
---|
.. | .. |
---|
3142 | 3514 | if (!tbl) |
---|
3143 | 3515 | goto err_dup; |
---|
3144 | 3516 | |
---|
3145 | | - /* Don't export sysctls to unprivileged users */ |
---|
3146 | | - if (net->user_ns != &init_user_ns) |
---|
3147 | | - tbl[0].procname = NULL; |
---|
| 3517 | + /* Don't export non-whitelisted sysctls to unprivileged users */ |
---|
| 3518 | + if (net->user_ns != &init_user_ns) { |
---|
| 3519 | + if (tbl[0].procname != ipv4_route_flush_procname) |
---|
| 3520 | + tbl[0].procname = NULL; |
---|
| 3521 | + } |
---|
3148 | 3522 | } |
---|
3149 | 3523 | tbl[0].extra1 = net; |
---|
3150 | 3524 | |
---|