hc
2024-05-16 8d2a02b24d66aa359e83eebc1ed3c0f85367a1cb
kernel/net/ipv4/ip_input.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * INET An implementation of the TCP/IP protocol suite for the LINUX
34 * operating system. INET is implemented using the BSD Socket
....@@ -13,7 +14,6 @@
1314 * Stefan Becker, <stefanb@yello.ping.de>
1415 * Jorge Cwik, <jorge@laser.satlink.net>
1516 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
16
- *
1717 *
1818 * Fixes:
1919 * Alan Cox : Commented a couple of minor bits of surplus code
....@@ -96,8 +96,6 @@
9696 * Jos Vos : Do accounting *before* call_in_firewall
9797 * Willy Konynenberg : Transparent proxying support
9898 *
99
- *
100
- *
10199 * To Fix:
102100 * IP fragmentation wants rewriting cleanly. The RFC815 algorithm is much more efficient
103101 * and could be made very efficient with the addition of some virtual memory hacks to permit
....@@ -106,11 +104,6 @@
106104 * interleaved copy algorithm so that fragmenting has a one copy overhead. Actual packet
107105 * output should probably do its own fragmentation at the UDP/RAW layer. TCP shouldn't cause
108106 * fragmentation anyway.
109
- *
110
- * This program is free software; you can redistribute it and/or
111
- * modify it under the terms of the GNU General Public License
112
- * as published by the Free Software Foundation; either version
113
- * 2 of the License, or (at your option) any later version.
114107 */
115108
116109 #define pr_fmt(fmt) "IPv4: " fmt
....@@ -130,6 +123,7 @@
130123 #include <linux/inetdevice.h>
131124 #include <linux/netdevice.h>
132125 #include <linux/etherdevice.h>
126
+#include <linux/indirect_call_wrapper.h>
133127
134128 #include <net/snmp.h>
135129 #include <net/ip.h>
....@@ -188,51 +182,53 @@
188182 return false;
189183 }
190184
185
+INDIRECT_CALLABLE_DECLARE(int udp_rcv(struct sk_buff *));
186
+INDIRECT_CALLABLE_DECLARE(int tcp_v4_rcv(struct sk_buff *));
187
+void ip_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int protocol)
188
+{
189
+ const struct net_protocol *ipprot;
190
+ int raw, ret;
191
+
192
+resubmit:
193
+ raw = raw_local_deliver(skb, protocol);
194
+
195
+ ipprot = rcu_dereference(inet_protos[protocol]);
196
+ if (ipprot) {
197
+ if (!ipprot->no_policy) {
198
+ if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
199
+ kfree_skb(skb);
200
+ return;
201
+ }
202
+ nf_reset_ct(skb);
203
+ }
204
+ ret = INDIRECT_CALL_2(ipprot->handler, tcp_v4_rcv, udp_rcv,
205
+ skb);
206
+ if (ret < 0) {
207
+ protocol = -ret;
208
+ goto resubmit;
209
+ }
210
+ __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
211
+ } else {
212
+ if (!raw) {
213
+ if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
214
+ __IP_INC_STATS(net, IPSTATS_MIB_INUNKNOWNPROTOS);
215
+ icmp_send(skb, ICMP_DEST_UNREACH,
216
+ ICMP_PROT_UNREACH, 0);
217
+ }
218
+ kfree_skb(skb);
219
+ } else {
220
+ __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
221
+ consume_skb(skb);
222
+ }
223
+ }
224
+}
225
+
191226 static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
192227 {
193228 __skb_pull(skb, skb_network_header_len(skb));
194229
195230 rcu_read_lock();
196
- {
197
- int protocol = ip_hdr(skb)->protocol;
198
- const struct net_protocol *ipprot;
199
- int raw;
200
-
201
- resubmit:
202
- raw = raw_local_deliver(skb, protocol);
203
-
204
- ipprot = rcu_dereference(inet_protos[protocol]);
205
- if (ipprot) {
206
- int ret;
207
-
208
- if (!ipprot->no_policy) {
209
- if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
210
- kfree_skb(skb);
211
- goto out;
212
- }
213
- nf_reset(skb);
214
- }
215
- ret = ipprot->handler(skb);
216
- if (ret < 0) {
217
- protocol = -ret;
218
- goto resubmit;
219
- }
220
- __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
221
- } else {
222
- if (!raw) {
223
- if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
224
- __IP_INC_STATS(net, IPSTATS_MIB_INUNKNOWNPROTOS);
225
- icmp_send(skb, ICMP_DEST_UNREACH,
226
- ICMP_PROT_UNREACH, 0);
227
- }
228
- kfree_skb(skb);
229
- } else {
230
- __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
231
- consume_skb(skb);
232
- }
233
- }
234
- }
235
- out:
231
+ ip_protocol_deliver_rcu(net, skb, ip_hdr(skb)->protocol);
236232 rcu_read_unlock();
237233
238234 return 0;
....@@ -306,28 +302,53 @@
306302 return true;
307303 }
308304
305
+static bool ip_can_use_hint(const struct sk_buff *skb, const struct iphdr *iph,
306
+ const struct sk_buff *hint)
307
+{
308
+ return hint && !skb_dst(skb) && ip_hdr(hint)->daddr == iph->daddr &&
309
+ ip_hdr(hint)->tos == iph->tos;
310
+}
311
+
312
+int tcp_v4_early_demux(struct sk_buff *skb);
313
+int udp_v4_early_demux(struct sk_buff *skb);
309314 static int ip_rcv_finish_core(struct net *net, struct sock *sk,
310
- struct sk_buff *skb, struct net_device *dev)
315
+ struct sk_buff *skb, struct net_device *dev,
316
+ const struct sk_buff *hint)
311317 {
312318 const struct iphdr *iph = ip_hdr(skb);
313
- int (*edemux)(struct sk_buff *skb);
314319 struct rtable *rt;
315320 int err;
316321
317
- if (net->ipv4.sysctl_ip_early_demux &&
322
+ if (ip_can_use_hint(skb, iph, hint)) {
323
+ err = ip_route_use_hint(skb, iph->daddr, iph->saddr, iph->tos,
324
+ dev, hint);
325
+ if (unlikely(err))
326
+ goto drop_error;
327
+ }
328
+
329
+ if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) &&
318330 !skb_dst(skb) &&
319331 !skb->sk &&
320332 !ip_is_fragment(iph)) {
321
- const struct net_protocol *ipprot;
322
- int protocol = iph->protocol;
333
+ switch (iph->protocol) {
334
+ case IPPROTO_TCP:
335
+ if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux)) {
336
+ tcp_v4_early_demux(skb);
323337
324
- ipprot = rcu_dereference(inet_protos[protocol]);
325
- if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
326
- err = edemux(skb);
327
- if (unlikely(err))
328
- goto drop_error;
329
- /* must reload iph, skb->head might have changed */
330
- iph = ip_hdr(skb);
338
+ /* must reload iph, skb->head might have changed */
339
+ iph = ip_hdr(skb);
340
+ }
341
+ break;
342
+ case IPPROTO_UDP:
343
+ if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) {
344
+ err = udp_v4_early_demux(skb);
345
+ if (unlikely(err))
346
+ goto drop_error;
347
+
348
+ /* must reload iph, skb->head might have changed */
349
+ iph = ip_hdr(skb);
350
+ }
351
+ break;
331352 }
332353 }
333354
....@@ -409,7 +430,7 @@
409430 if (!skb)
410431 return NET_RX_SUCCESS;
411432
412
- ret = ip_rcv_finish_core(net, sk, skb, dev);
433
+ ret = ip_rcv_finish_core(net, sk, skb, dev, NULL);
413434 if (ret != NET_RX_DROP)
414435 ret = dst_input(skb);
415436 return ret;
....@@ -428,7 +449,6 @@
428449 */
429450 if (skb->pkt_type == PACKET_OTHERHOST)
430451 goto drop;
431
-
432452
433453 __IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len);
434454
....@@ -496,7 +516,8 @@
496516 IPCB(skb)->iif = skb->skb_iif;
497517
498518 /* Must drop socket now because of tproxy. */
499
- skb_orphan(skb);
519
+ if (!skb_sk_is_prefetched(skb))
520
+ skb_orphan(skb);
500521
501522 return skb;
502523
....@@ -521,6 +542,7 @@
521542 skb = ip_rcv_core(skb, net);
522543 if (skb == NULL)
523544 return NET_RX_DROP;
545
+
524546 return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
525547 net, NULL, skb, dev, NULL,
526548 ip_rcv_finish);
....@@ -531,20 +553,26 @@
531553 struct sk_buff *skb, *next;
532554
533555 list_for_each_entry_safe(skb, next, head, list) {
534
- list_del(&skb->list);
535
- /* Handle ip{6}_forward case, as sch_direct_xmit have
536
- * another kind of SKB-list usage (see validate_xmit_skb_list)
537
- */
538
- skb->next = NULL;
556
+ skb_list_del_init(skb);
539557 dst_input(skb);
540558 }
559
+}
560
+
561
+static struct sk_buff *ip_extract_route_hint(const struct net *net,
562
+ struct sk_buff *skb, int rt_type)
563
+{
564
+ if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST ||
565
+ IPCB(skb)->flags & IPSKB_MULTIPATH)
566
+ return NULL;
567
+
568
+ return skb;
541569 }
542570
543571 static void ip_list_rcv_finish(struct net *net, struct sock *sk,
544572 struct list_head *head)
545573 {
574
+ struct sk_buff *skb, *next, *hint = NULL;
546575 struct dst_entry *curr_dst = NULL;
547
- struct sk_buff *skb, *next;
548576 struct list_head sublist;
549577
550578 INIT_LIST_HEAD(&sublist);
....@@ -559,11 +587,14 @@
559587 skb = l3mdev_ip_rcv(skb);
560588 if (!skb)
561589 continue;
562
- if (ip_rcv_finish_core(net, sk, skb, dev) == NET_RX_DROP)
590
+ if (ip_rcv_finish_core(net, sk, skb, dev, hint) == NET_RX_DROP)
563591 continue;
564592
565593 dst = skb_dst(skb);
566594 if (curr_dst != dst) {
595
+ hint = ip_extract_route_hint(net, skb,
596
+ ((struct rtable *)dst)->rt_type);
597
+
567598 /* dispatch old sublist */
568599 if (!list_empty(&sublist))
569600 ip_sublist_rcv_finish(&sublist);
....@@ -616,5 +647,6 @@
616647 list_add_tail(&skb->list, &sublist);
617648 }
618649 /* dispatch final sublist */
619
- ip_sublist_rcv(&sublist, curr_dev, curr_net);
650
+ if (!list_empty(&sublist))
651
+ ip_sublist_rcv(&sublist, curr_dev, curr_net);
620652 }