.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
---|
1 | 2 | /* |
---|
2 | 3 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
---|
3 | 4 | * operating system. INET is implemented using the BSD Socket |
---|
.. | .. |
---|
13 | 14 | * Stefan Becker, <stefanb@yello.ping.de> |
---|
14 | 15 | * Jorge Cwik, <jorge@laser.satlink.net> |
---|
15 | 16 | * Arnt Gulbrandsen, <agulbra@nvg.unit.no> |
---|
16 | | - * |
---|
17 | 17 | * |
---|
18 | 18 | * Fixes: |
---|
19 | 19 | * Alan Cox : Commented a couple of minor bits of surplus code |
---|
.. | .. |
---|
96 | 96 | * Jos Vos : Do accounting *before* call_in_firewall |
---|
97 | 97 | * Willy Konynenberg : Transparent proxying support |
---|
98 | 98 | * |
---|
99 | | - * |
---|
100 | | - * |
---|
101 | 99 | * To Fix: |
---|
102 | 100 | * IP fragmentation wants rewriting cleanly. The RFC815 algorithm is much more efficient |
---|
103 | 101 | * and could be made very efficient with the addition of some virtual memory hacks to permit |
---|
.. | .. |
---|
106 | 104 | * interleaved copy algorithm so that fragmenting has a one copy overhead. Actual packet |
---|
107 | 105 | * output should probably do its own fragmentation at the UDP/RAW layer. TCP shouldn't cause |
---|
108 | 106 | * fragmentation anyway. |
---|
109 | | - * |
---|
110 | | - * This program is free software; you can redistribute it and/or |
---|
111 | | - * modify it under the terms of the GNU General Public License |
---|
112 | | - * as published by the Free Software Foundation; either version |
---|
113 | | - * 2 of the License, or (at your option) any later version. |
---|
114 | 107 | */ |
---|
115 | 108 | |
---|
116 | 109 | #define pr_fmt(fmt) "IPv4: " fmt |
---|
.. | .. |
---|
130 | 123 | #include <linux/inetdevice.h> |
---|
131 | 124 | #include <linux/netdevice.h> |
---|
132 | 125 | #include <linux/etherdevice.h> |
---|
| 126 | +#include <linux/indirect_call_wrapper.h> |
---|
133 | 127 | |
---|
134 | 128 | #include <net/snmp.h> |
---|
135 | 129 | #include <net/ip.h> |
---|
.. | .. |
---|
188 | 182 | return false; |
---|
189 | 183 | } |
---|
190 | 184 | |
---|
| 185 | +INDIRECT_CALLABLE_DECLARE(int udp_rcv(struct sk_buff *)); |
---|
| 186 | +INDIRECT_CALLABLE_DECLARE(int tcp_v4_rcv(struct sk_buff *)); |
---|
| 187 | +void ip_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int protocol) |
---|
| 188 | +{ |
---|
| 189 | + const struct net_protocol *ipprot; |
---|
| 190 | + int raw, ret; |
---|
| 191 | + |
---|
| 192 | +resubmit: |
---|
| 193 | + raw = raw_local_deliver(skb, protocol); |
---|
| 194 | + |
---|
| 195 | + ipprot = rcu_dereference(inet_protos[protocol]); |
---|
| 196 | + if (ipprot) { |
---|
| 197 | + if (!ipprot->no_policy) { |
---|
| 198 | + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { |
---|
| 199 | + kfree_skb(skb); |
---|
| 200 | + return; |
---|
| 201 | + } |
---|
| 202 | + nf_reset_ct(skb); |
---|
| 203 | + } |
---|
| 204 | + ret = INDIRECT_CALL_2(ipprot->handler, tcp_v4_rcv, udp_rcv, |
---|
| 205 | + skb); |
---|
| 206 | + if (ret < 0) { |
---|
| 207 | + protocol = -ret; |
---|
| 208 | + goto resubmit; |
---|
| 209 | + } |
---|
| 210 | + __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS); |
---|
| 211 | + } else { |
---|
| 212 | + if (!raw) { |
---|
| 213 | + if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { |
---|
| 214 | + __IP_INC_STATS(net, IPSTATS_MIB_INUNKNOWNPROTOS); |
---|
| 215 | + icmp_send(skb, ICMP_DEST_UNREACH, |
---|
| 216 | + ICMP_PROT_UNREACH, 0); |
---|
| 217 | + } |
---|
| 218 | + kfree_skb(skb); |
---|
| 219 | + } else { |
---|
| 220 | + __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS); |
---|
| 221 | + consume_skb(skb); |
---|
| 222 | + } |
---|
| 223 | + } |
---|
| 224 | +} |
---|
| 225 | + |
---|
191 | 226 | static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb) |
---|
192 | 227 | { |
---|
193 | 228 | __skb_pull(skb, skb_network_header_len(skb)); |
---|
194 | 229 | |
---|
195 | 230 | rcu_read_lock(); |
---|
196 | | - { |
---|
197 | | - int protocol = ip_hdr(skb)->protocol; |
---|
198 | | - const struct net_protocol *ipprot; |
---|
199 | | - int raw; |
---|
200 | | - |
---|
201 | | - resubmit: |
---|
202 | | - raw = raw_local_deliver(skb, protocol); |
---|
203 | | - |
---|
204 | | - ipprot = rcu_dereference(inet_protos[protocol]); |
---|
205 | | - if (ipprot) { |
---|
206 | | - int ret; |
---|
207 | | - |
---|
208 | | - if (!ipprot->no_policy) { |
---|
209 | | - if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { |
---|
210 | | - kfree_skb(skb); |
---|
211 | | - goto out; |
---|
212 | | - } |
---|
213 | | - nf_reset(skb); |
---|
214 | | - } |
---|
215 | | - ret = ipprot->handler(skb); |
---|
216 | | - if (ret < 0) { |
---|
217 | | - protocol = -ret; |
---|
218 | | - goto resubmit; |
---|
219 | | - } |
---|
220 | | - __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS); |
---|
221 | | - } else { |
---|
222 | | - if (!raw) { |
---|
223 | | - if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { |
---|
224 | | - __IP_INC_STATS(net, IPSTATS_MIB_INUNKNOWNPROTOS); |
---|
225 | | - icmp_send(skb, ICMP_DEST_UNREACH, |
---|
226 | | - ICMP_PROT_UNREACH, 0); |
---|
227 | | - } |
---|
228 | | - kfree_skb(skb); |
---|
229 | | - } else { |
---|
230 | | - __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS); |
---|
231 | | - consume_skb(skb); |
---|
232 | | - } |
---|
233 | | - } |
---|
234 | | - } |
---|
235 | | - out: |
---|
| 231 | + ip_protocol_deliver_rcu(net, skb, ip_hdr(skb)->protocol); |
---|
236 | 232 | rcu_read_unlock(); |
---|
237 | 233 | |
---|
238 | 234 | return 0; |
---|
.. | .. |
---|
306 | 302 | return true; |
---|
307 | 303 | } |
---|
308 | 304 | |
---|
| 305 | +static bool ip_can_use_hint(const struct sk_buff *skb, const struct iphdr *iph, |
---|
| 306 | + const struct sk_buff *hint) |
---|
| 307 | +{ |
---|
| 308 | + return hint && !skb_dst(skb) && ip_hdr(hint)->daddr == iph->daddr && |
---|
| 309 | + ip_hdr(hint)->tos == iph->tos; |
---|
| 310 | +} |
---|
| 311 | + |
---|
| 312 | +int tcp_v4_early_demux(struct sk_buff *skb); |
---|
| 313 | +int udp_v4_early_demux(struct sk_buff *skb); |
---|
309 | 314 | static int ip_rcv_finish_core(struct net *net, struct sock *sk, |
---|
310 | | - struct sk_buff *skb, struct net_device *dev) |
---|
| 315 | + struct sk_buff *skb, struct net_device *dev, |
---|
| 316 | + const struct sk_buff *hint) |
---|
311 | 317 | { |
---|
312 | 318 | const struct iphdr *iph = ip_hdr(skb); |
---|
313 | | - int (*edemux)(struct sk_buff *skb); |
---|
314 | 319 | struct rtable *rt; |
---|
315 | 320 | int err; |
---|
316 | 321 | |
---|
317 | | - if (net->ipv4.sysctl_ip_early_demux && |
---|
| 322 | + if (ip_can_use_hint(skb, iph, hint)) { |
---|
| 323 | + err = ip_route_use_hint(skb, iph->daddr, iph->saddr, iph->tos, |
---|
| 324 | + dev, hint); |
---|
| 325 | + if (unlikely(err)) |
---|
| 326 | + goto drop_error; |
---|
| 327 | + } |
---|
| 328 | + |
---|
| 329 | + if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) && |
---|
318 | 330 | !skb_dst(skb) && |
---|
319 | 331 | !skb->sk && |
---|
320 | 332 | !ip_is_fragment(iph)) { |
---|
321 | | - const struct net_protocol *ipprot; |
---|
322 | | - int protocol = iph->protocol; |
---|
| 333 | + switch (iph->protocol) { |
---|
| 334 | + case IPPROTO_TCP: |
---|
| 335 | + if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux)) { |
---|
| 336 | + tcp_v4_early_demux(skb); |
---|
323 | 337 | |
---|
324 | | - ipprot = rcu_dereference(inet_protos[protocol]); |
---|
325 | | - if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) { |
---|
326 | | - err = edemux(skb); |
---|
327 | | - if (unlikely(err)) |
---|
328 | | - goto drop_error; |
---|
329 | | - /* must reload iph, skb->head might have changed */ |
---|
330 | | - iph = ip_hdr(skb); |
---|
| 338 | + /* must reload iph, skb->head might have changed */ |
---|
| 339 | + iph = ip_hdr(skb); |
---|
| 340 | + } |
---|
| 341 | + break; |
---|
| 342 | + case IPPROTO_UDP: |
---|
| 343 | + if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) { |
---|
| 344 | + err = udp_v4_early_demux(skb); |
---|
| 345 | + if (unlikely(err)) |
---|
| 346 | + goto drop_error; |
---|
| 347 | + |
---|
| 348 | + /* must reload iph, skb->head might have changed */ |
---|
| 349 | + iph = ip_hdr(skb); |
---|
| 350 | + } |
---|
| 351 | + break; |
---|
331 | 352 | } |
---|
332 | 353 | } |
---|
333 | 354 | |
---|
.. | .. |
---|
409 | 430 | if (!skb) |
---|
410 | 431 | return NET_RX_SUCCESS; |
---|
411 | 432 | |
---|
412 | | - ret = ip_rcv_finish_core(net, sk, skb, dev); |
---|
| 433 | + ret = ip_rcv_finish_core(net, sk, skb, dev, NULL); |
---|
413 | 434 | if (ret != NET_RX_DROP) |
---|
414 | 435 | ret = dst_input(skb); |
---|
415 | 436 | return ret; |
---|
.. | .. |
---|
428 | 449 | */ |
---|
429 | 450 | if (skb->pkt_type == PACKET_OTHERHOST) |
---|
430 | 451 | goto drop; |
---|
431 | | - |
---|
432 | 452 | |
---|
433 | 453 | __IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len); |
---|
434 | 454 | |
---|
.. | .. |
---|
496 | 516 | IPCB(skb)->iif = skb->skb_iif; |
---|
497 | 517 | |
---|
498 | 518 | /* Must drop socket now because of tproxy. */ |
---|
499 | | - skb_orphan(skb); |
---|
| 519 | + if (!skb_sk_is_prefetched(skb)) |
---|
| 520 | + skb_orphan(skb); |
---|
500 | 521 | |
---|
501 | 522 | return skb; |
---|
502 | 523 | |
---|
.. | .. |
---|
521 | 542 | skb = ip_rcv_core(skb, net); |
---|
522 | 543 | if (skb == NULL) |
---|
523 | 544 | return NET_RX_DROP; |
---|
| 545 | + |
---|
524 | 546 | return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, |
---|
525 | 547 | net, NULL, skb, dev, NULL, |
---|
526 | 548 | ip_rcv_finish); |
---|
.. | .. |
---|
531 | 553 | struct sk_buff *skb, *next; |
---|
532 | 554 | |
---|
533 | 555 | list_for_each_entry_safe(skb, next, head, list) { |
---|
534 | | - list_del(&skb->list); |
---|
535 | | - /* Handle ip{6}_forward case, as sch_direct_xmit have |
---|
536 | | - * another kind of SKB-list usage (see validate_xmit_skb_list) |
---|
537 | | - */ |
---|
538 | | - skb->next = NULL; |
---|
| 556 | + skb_list_del_init(skb); |
---|
539 | 557 | dst_input(skb); |
---|
540 | 558 | } |
---|
| 559 | +} |
---|
| 560 | + |
---|
| 561 | +static struct sk_buff *ip_extract_route_hint(const struct net *net, |
---|
| 562 | + struct sk_buff *skb, int rt_type) |
---|
| 563 | +{ |
---|
| 564 | + if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST || |
---|
| 565 | + IPCB(skb)->flags & IPSKB_MULTIPATH) |
---|
| 566 | + return NULL; |
---|
| 567 | + |
---|
| 568 | + return skb; |
---|
541 | 569 | } |
---|
542 | 570 | |
---|
543 | 571 | static void ip_list_rcv_finish(struct net *net, struct sock *sk, |
---|
544 | 572 | struct list_head *head) |
---|
545 | 573 | { |
---|
| 574 | + struct sk_buff *skb, *next, *hint = NULL; |
---|
546 | 575 | struct dst_entry *curr_dst = NULL; |
---|
547 | | - struct sk_buff *skb, *next; |
---|
548 | 576 | struct list_head sublist; |
---|
549 | 577 | |
---|
550 | 578 | INIT_LIST_HEAD(&sublist); |
---|
.. | .. |
---|
559 | 587 | skb = l3mdev_ip_rcv(skb); |
---|
560 | 588 | if (!skb) |
---|
561 | 589 | continue; |
---|
562 | | - if (ip_rcv_finish_core(net, sk, skb, dev) == NET_RX_DROP) |
---|
| 590 | + if (ip_rcv_finish_core(net, sk, skb, dev, hint) == NET_RX_DROP) |
---|
563 | 591 | continue; |
---|
564 | 592 | |
---|
565 | 593 | dst = skb_dst(skb); |
---|
566 | 594 | if (curr_dst != dst) { |
---|
| 595 | + hint = ip_extract_route_hint(net, skb, |
---|
| 596 | + ((struct rtable *)dst)->rt_type); |
---|
| 597 | + |
---|
567 | 598 | /* dispatch old sublist */ |
---|
568 | 599 | if (!list_empty(&sublist)) |
---|
569 | 600 | ip_sublist_rcv_finish(&sublist); |
---|
.. | .. |
---|
616 | 647 | list_add_tail(&skb->list, &sublist); |
---|
617 | 648 | } |
---|
618 | 649 | /* dispatch final sublist */ |
---|
619 | | - ip_sublist_rcv(&sublist, curr_dev, curr_net); |
---|
| 650 | + if (!list_empty(&sublist)) |
---|
| 651 | + ip_sublist_rcv(&sublist, curr_dev, curr_net); |
---|
620 | 652 | } |
---|