hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/net/ipv4/udp.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * INET An implementation of the TCP/IP protocol suite for the LINUX
34 * operating system. INET is implemented using the BSD Socket
....@@ -69,19 +70,13 @@
6970 * a single port at the same time.
7071 * Derek Atkins <derek@ihtfp.com>: Add Encapulation Support
7172 * James Chapman : Add L2TP encapsulation type.
72
- *
73
- *
74
- * This program is free software; you can redistribute it and/or
75
- * modify it under the terms of the GNU General Public License
76
- * as published by the Free Software Foundation; either version
77
- * 2 of the License, or (at your option) any later version.
7873 */
7974
8075 #define pr_fmt(fmt) "UDP: " fmt
8176
8277 #include <linux/uaccess.h>
8378 #include <asm/ioctls.h>
84
-#include <linux/bootmem.h>
79
+#include <linux/memblock.h>
8580 #include <linux/highmem.h>
8681 #include <linux/swap.h>
8782 #include <linux/types.h>
....@@ -105,16 +100,23 @@
105100 #include <net/net_namespace.h>
106101 #include <net/icmp.h>
107102 #include <net/inet_hashtables.h>
103
+#include <net/ip_tunnels.h>
108104 #include <net/route.h>
109105 #include <net/checksum.h>
110106 #include <net/xfrm.h>
111107 #include <trace/events/udp.h>
112108 #include <linux/static_key.h>
109
+#include <linux/btf_ids.h>
113110 #include <trace/events/skb.h>
114111 #include <net/busy_poll.h>
115112 #include "udp_impl.h"
116113 #include <net/sock_reuseport.h>
117114 #include <net/addrconf.h>
115
+#include <net/udp_tunnel.h>
116
+#if IS_ENABLED(CONFIG_IPV6)
117
+#include <net/ipv6_stubs.h>
118
+#endif
119
+#include <trace/hooks/ipv4.h>
118120
119121 struct udp_table udp_table __read_mostly;
120122 EXPORT_SYMBOL(udp_table);
....@@ -127,17 +129,6 @@
127129
128130 #define MAX_UDP_PORTS 65536
129131 #define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN)
130
-
131
-/* IPCB reference means this can not be used from early demux */
132
-static bool udp_lib_exact_dif_match(struct net *net, struct sk_buff *skb)
133
-{
134
-#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
135
- if (!net->ipv4.sysctl_udp_l3mdev_accept &&
136
- skb && ipv4_l3mdev_skb(IPCB(skb)->flags))
137
- return true;
138
-#endif
139
- return false;
140
-}
141132
142133 static int udp_lib_lport_inuse(struct net *net, __u16 num,
143134 const struct udp_hslot *hslot,
....@@ -367,25 +358,23 @@
367358 static int compute_score(struct sock *sk, struct net *net,
368359 __be32 saddr, __be16 sport,
369360 __be32 daddr, unsigned short hnum,
370
- int dif, int sdif, bool exact_dif)
361
+ int dif, int sdif)
371362 {
372363 int score;
373364 struct inet_sock *inet;
365
+ bool dev_match;
374366
375367 if (!net_eq(sock_net(sk), net) ||
376368 udp_sk(sk)->udp_port_hash != hnum ||
377369 ipv6_only_sock(sk))
378370 return -1;
379371
372
+ if (sk->sk_rcv_saddr != daddr)
373
+ return -1;
374
+
380375 score = (sk->sk_family == PF_INET) ? 2 : 1;
376
+
381377 inet = inet_sk(sk);
382
-
383
- if (inet->inet_rcv_saddr) {
384
- if (inet->inet_rcv_saddr != daddr)
385
- return -1;
386
- score += 4;
387
- }
388
-
389378 if (inet->inet_daddr) {
390379 if (inet->inet_daddr != saddr)
391380 return -1;
....@@ -398,15 +387,12 @@
398387 score += 4;
399388 }
400389
401
- if (sk->sk_bound_dev_if || exact_dif) {
402
- bool dev_match = (sk->sk_bound_dev_if == dif ||
403
- sk->sk_bound_dev_if == sdif);
404
-
405
- if (!dev_match)
406
- return -1;
407
- if (sk->sk_bound_dev_if)
408
- score += 4;
409
- }
390
+ dev_match = udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if,
391
+ dif, sdif);
392
+ if (!dev_match)
393
+ return -1;
394
+ if (sk->sk_bound_dev_if)
395
+ score += 4;
410396
411397 if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
412398 score++;
....@@ -425,41 +411,83 @@
425411 udp_ehash_secret + net_hash_mix(net));
426412 }
427413
414
+static struct sock *lookup_reuseport(struct net *net, struct sock *sk,
415
+ struct sk_buff *skb,
416
+ __be32 saddr, __be16 sport,
417
+ __be32 daddr, unsigned short hnum)
418
+{
419
+ struct sock *reuse_sk = NULL;
420
+ u32 hash;
421
+
422
+ if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) {
423
+ hash = udp_ehashfn(net, daddr, hnum, saddr, sport);
424
+ reuse_sk = reuseport_select_sock(sk, hash, skb,
425
+ sizeof(struct udphdr));
426
+ }
427
+ return reuse_sk;
428
+}
429
+
428430 /* called with rcu_read_lock() */
429431 static struct sock *udp4_lib_lookup2(struct net *net,
430432 __be32 saddr, __be16 sport,
431433 __be32 daddr, unsigned int hnum,
432
- int dif, int sdif, bool exact_dif,
434
+ int dif, int sdif,
433435 struct udp_hslot *hslot2,
434436 struct sk_buff *skb)
435437 {
436
- struct sock *sk, *result, *reuseport_result;
438
+ struct sock *sk, *result;
437439 int score, badness;
438
- u32 hash = 0;
439440
440441 result = NULL;
441442 badness = 0;
442443 udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
443444 score = compute_score(sk, net, saddr, sport,
444
- daddr, hnum, dif, sdif, exact_dif);
445
+ daddr, hnum, dif, sdif);
445446 if (score > badness) {
446
- reuseport_result = NULL;
447
-
448
- if (sk->sk_reuseport &&
449
- sk->sk_state != TCP_ESTABLISHED) {
450
- hash = udp_ehashfn(net, daddr, hnum,
451
- saddr, sport);
452
- reuseport_result = reuseport_select_sock(sk, hash, skb,
453
- sizeof(struct udphdr));
454
- if (reuseport_result && !reuseport_has_conns(sk, false))
455
- return reuseport_result;
447
+ badness = score;
448
+ result = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum);
449
+ if (!result) {
450
+ result = sk;
451
+ continue;
456452 }
457453
458
- result = reuseport_result ? : sk;
459
- badness = score;
454
+ /* Fall back to scoring if group has connections */
455
+ if (!reuseport_has_conns(sk))
456
+ return result;
457
+
458
+ /* Reuseport logic returned an error, keep original score. */
459
+ if (IS_ERR(result))
460
+ continue;
461
+
462
+ badness = compute_score(result, net, saddr, sport,
463
+ daddr, hnum, dif, sdif);
464
+
460465 }
461466 }
462467 return result;
468
+}
469
+
470
+static struct sock *udp4_lookup_run_bpf(struct net *net,
471
+ struct udp_table *udptable,
472
+ struct sk_buff *skb,
473
+ __be32 saddr, __be16 sport,
474
+ __be32 daddr, u16 hnum)
475
+{
476
+ struct sock *sk, *reuse_sk;
477
+ bool no_reuseport;
478
+
479
+ if (udptable != &udp_table)
480
+ return NULL; /* only UDP is supported */
481
+
482
+ no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_UDP,
483
+ saddr, sport, daddr, hnum, &sk);
484
+ if (no_reuseport || IS_ERR_OR_NULL(sk))
485
+ return sk;
486
+
487
+ reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum);
488
+ if (reuse_sk)
489
+ sk = reuse_sk;
490
+ return sk;
463491 }
464492
465493 /* UDP is nearly always wildcards out the wazoo, it makes no sense to try
....@@ -469,65 +497,47 @@
469497 __be16 sport, __be32 daddr, __be16 dport, int dif,
470498 int sdif, struct udp_table *udptable, struct sk_buff *skb)
471499 {
472
- struct sock *sk, *result;
473500 unsigned short hnum = ntohs(dport);
474
- unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
475
- struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
476
- bool exact_dif = udp_lib_exact_dif_match(net, skb);
477
- int score, badness;
478
- u32 hash = 0;
501
+ unsigned int hash2, slot2;
502
+ struct udp_hslot *hslot2;
503
+ struct sock *result, *sk;
479504
480
- if (hslot->count > 10) {
481
- hash2 = ipv4_portaddr_hash(net, daddr, hnum);
482
- slot2 = hash2 & udptable->mask;
483
- hslot2 = &udptable->hash2[slot2];
484
- if (hslot->count < hslot2->count)
485
- goto begin;
505
+ hash2 = ipv4_portaddr_hash(net, daddr, hnum);
506
+ slot2 = hash2 & udptable->mask;
507
+ hslot2 = &udptable->hash2[slot2];
486508
487
- result = udp4_lib_lookup2(net, saddr, sport,
488
- daddr, hnum, dif, sdif,
489
- exact_dif, hslot2, skb);
490
- if (!result) {
491
- unsigned int old_slot2 = slot2;
492
- hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
493
- slot2 = hash2 & udptable->mask;
494
- /* avoid searching the same slot again. */
495
- if (unlikely(slot2 == old_slot2))
496
- return result;
509
+ /* Lookup connected or non-wildcard socket */
510
+ result = udp4_lib_lookup2(net, saddr, sport,
511
+ daddr, hnum, dif, sdif,
512
+ hslot2, skb);
513
+ if (!IS_ERR_OR_NULL(result) && result->sk_state == TCP_ESTABLISHED)
514
+ goto done;
497515
498
- hslot2 = &udptable->hash2[slot2];
499
- if (hslot->count < hslot2->count)
500
- goto begin;
501
-
502
- result = udp4_lib_lookup2(net, saddr, sport,
503
- daddr, hnum, dif, sdif,
504
- exact_dif, hslot2, skb);
505
- }
506
- if (unlikely(IS_ERR(result)))
507
- return NULL;
508
- return result;
509
- }
510
-begin:
511
- result = NULL;
512
- badness = 0;
513
- sk_for_each_rcu(sk, &hslot->head) {
514
- score = compute_score(sk, net, saddr, sport,
515
- daddr, hnum, dif, sdif, exact_dif);
516
- if (score > badness) {
517
- if (sk->sk_reuseport) {
518
- hash = udp_ehashfn(net, daddr, hnum,
519
- saddr, sport);
520
- result = reuseport_select_sock(sk, hash, skb,
521
- sizeof(struct udphdr));
522
- if (unlikely(IS_ERR(result)))
523
- return NULL;
524
- if (result)
525
- return result;
526
- }
516
+ /* Lookup redirect from BPF */
517
+ if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
518
+ sk = udp4_lookup_run_bpf(net, udptable, skb,
519
+ saddr, sport, daddr, hnum);
520
+ if (sk) {
527521 result = sk;
528
- badness = score;
522
+ goto done;
529523 }
530524 }
525
+
526
+ /* Got non-wildcard socket or error on first lookup */
527
+ if (result)
528
+ goto done;
529
+
530
+ /* Lookup wildcard sockets */
531
+ hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
532
+ slot2 = hash2 & udptable->mask;
533
+ hslot2 = &udptable->hash2[slot2];
534
+
535
+ result = udp4_lib_lookup2(net, saddr, sport,
536
+ htonl(INADDR_ANY), hnum, dif, sdif,
537
+ hslot2, skb);
538
+done:
539
+ if (IS_ERR(result))
540
+ return NULL;
531541 return result;
532542 }
533543 EXPORT_SYMBOL_GPL(__udp4_lib_lookup);
....@@ -585,12 +595,102 @@
585595 (inet->inet_dport != rmt_port && inet->inet_dport) ||
586596 (inet->inet_rcv_saddr && inet->inet_rcv_saddr != loc_addr) ||
587597 ipv6_only_sock(sk) ||
588
- (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif &&
589
- sk->sk_bound_dev_if != sdif))
598
+ !udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
590599 return false;
591600 if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, dif, sdif))
592601 return false;
593602 return true;
603
+}
604
+
605
+DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
606
+void udp_encap_enable(void)
607
+{
608
+ static_branch_inc(&udp_encap_needed_key);
609
+}
610
+EXPORT_SYMBOL(udp_encap_enable);
611
+
612
+void udp_encap_disable(void)
613
+{
614
+ static_branch_dec(&udp_encap_needed_key);
615
+}
616
+EXPORT_SYMBOL(udp_encap_disable);
617
+
618
+/* Handler for tunnels with arbitrary destination ports: no socket lookup, go
619
+ * through error handlers in encapsulations looking for a match.
620
+ */
621
+static int __udp4_lib_err_encap_no_sk(struct sk_buff *skb, u32 info)
622
+{
623
+ int i;
624
+
625
+ for (i = 0; i < MAX_IPTUN_ENCAP_OPS; i++) {
626
+ int (*handler)(struct sk_buff *skb, u32 info);
627
+ const struct ip_tunnel_encap_ops *encap;
628
+
629
+ encap = rcu_dereference(iptun_encaps[i]);
630
+ if (!encap)
631
+ continue;
632
+ handler = encap->err_handler;
633
+ if (handler && !handler(skb, info))
634
+ return 0;
635
+ }
636
+
637
+ return -ENOENT;
638
+}
639
+
640
+/* Try to match ICMP errors to UDP tunnels by looking up a socket without
641
+ * reversing source and destination port: this will match tunnels that force the
642
+ * same destination port on both endpoints (e.g. VXLAN, GENEVE). Note that
643
+ * lwtunnels might actually break this assumption by being configured with
644
+ * different destination ports on endpoints, in this case we won't be able to
645
+ * trace ICMP messages back to them.
646
+ *
647
+ * If this doesn't match any socket, probe tunnels with arbitrary destination
648
+ * ports (e.g. FoU, GUE): there, the receiving socket is useless, as the port
649
+ * we've sent packets to won't necessarily match the local destination port.
650
+ *
651
+ * Then ask the tunnel implementation to match the error against a valid
652
+ * association.
653
+ *
654
+ * Return an error if we can't find a match, the socket if we need further
655
+ * processing, zero otherwise.
656
+ */
657
+static struct sock *__udp4_lib_err_encap(struct net *net,
658
+ const struct iphdr *iph,
659
+ struct udphdr *uh,
660
+ struct udp_table *udptable,
661
+ struct sk_buff *skb, u32 info)
662
+{
663
+ int network_offset, transport_offset;
664
+ struct sock *sk;
665
+
666
+ network_offset = skb_network_offset(skb);
667
+ transport_offset = skb_transport_offset(skb);
668
+
669
+ /* Network header needs to point to the outer IPv4 header inside ICMP */
670
+ skb_reset_network_header(skb);
671
+
672
+ /* Transport header needs to point to the UDP header */
673
+ skb_set_transport_header(skb, iph->ihl << 2);
674
+
675
+ sk = __udp4_lib_lookup(net, iph->daddr, uh->source,
676
+ iph->saddr, uh->dest, skb->dev->ifindex, 0,
677
+ udptable, NULL);
678
+ if (sk) {
679
+ int (*lookup)(struct sock *sk, struct sk_buff *skb);
680
+ struct udp_sock *up = udp_sk(sk);
681
+
682
+ lookup = READ_ONCE(up->encap_err_lookup);
683
+ if (!lookup || lookup(sk, skb))
684
+ sk = NULL;
685
+ }
686
+
687
+ if (!sk)
688
+ sk = ERR_PTR(__udp4_lib_err_encap_no_sk(skb, info));
689
+
690
+ skb_set_transport_header(skb, transport_offset);
691
+ skb_set_network_header(skb, network_offset);
692
+
693
+ return sk;
594694 }
595695
596696 /*
....@@ -604,24 +704,38 @@
604704 * to find the appropriate port.
605705 */
606706
607
-void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
707
+int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
608708 {
609709 struct inet_sock *inet;
610710 const struct iphdr *iph = (const struct iphdr *)skb->data;
611711 struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2));
612712 const int type = icmp_hdr(skb)->type;
613713 const int code = icmp_hdr(skb)->code;
714
+ bool tunnel = false;
614715 struct sock *sk;
615716 int harderr;
616717 int err;
617718 struct net *net = dev_net(skb->dev);
618719
619720 sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
620
- iph->saddr, uh->source, skb->dev->ifindex, 0,
621
- udptable, NULL);
721
+ iph->saddr, uh->source, skb->dev->ifindex,
722
+ inet_sdif(skb), udptable, NULL);
622723 if (!sk) {
623
- __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
624
- return; /* No socket for error */
724
+ /* No socket for error: try tunnels before discarding */
725
+ sk = ERR_PTR(-ENOENT);
726
+ if (static_branch_unlikely(&udp_encap_needed_key)) {
727
+ sk = __udp4_lib_err_encap(net, iph, uh, udptable, skb,
728
+ info);
729
+ if (!sk)
730
+ return 0;
731
+ }
732
+
733
+ if (IS_ERR(sk)) {
734
+ __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
735
+ return PTR_ERR(sk);
736
+ }
737
+
738
+ tunnel = true;
625739 }
626740
627741 err = 0;
....@@ -664,6 +778,10 @@
664778 * RFC1122: OK. Passes ICMP errors back to application, as per
665779 * 4.1.3.3.
666780 */
781
+ if (tunnel) {
782
+ /* ...not for tunnels though: we don't have a sending socket */
783
+ goto out;
784
+ }
667785 if (!inet->recverr) {
668786 if (!harderr || sk->sk_state != TCP_ESTABLISHED)
669787 goto out;
....@@ -673,12 +791,12 @@
673791 sk->sk_err = err;
674792 sk->sk_error_report(sk);
675793 out:
676
- return;
794
+ return 0;
677795 }
678796
679
-void udp_err(struct sk_buff *skb, u32 info)
797
+int udp_err(struct sk_buff *skb, u32 info)
680798 {
681
- __udp4_lib_err(skb, info, &udp_table);
799
+ return __udp4_lib_err(skb, info, &udp_table);
682800 }
683801
684802 /*
....@@ -949,6 +1067,7 @@
9491067
9501068 if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message compatibility */
9511069 return -EOPNOTSUPP;
1070
+ trace_android_rvh_udp_sendmsg(sk);
9521071
9531072 getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
9541073
....@@ -1061,7 +1180,7 @@
10611180 }
10621181
10631182 if (ipv4_is_multicast(daddr)) {
1064
- if (!ipc.oif)
1183
+ if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif))
10651184 ipc.oif = inet->mc_index;
10661185 if (!saddr)
10671186 saddr = inet->mc_addr;
....@@ -1070,7 +1189,7 @@
10701189 ipc.oif = inet->uc_index;
10711190 } else if (ipv4_is_lbcast(daddr) && inet->uc_index) {
10721191 /* oif is set, packet is to local broadcast and
1073
- * and uc_index is set. oif is most likely set
1192
+ * uc_index is set. oif is most likely set
10741193 * by sk_bound_dev_if. If uc_index != oif check if the
10751194 * oif is an L3 master and uc_index is an L3 slave.
10761195 * If so, we want to allow the send using the uc_index.
....@@ -1091,13 +1210,13 @@
10911210
10921211 fl4 = &fl4_stack;
10931212
1094
- flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
1213
+ flowi4_init_output(fl4, ipc.oif, ipc.sockc.mark, tos,
10951214 RT_SCOPE_UNIVERSE, sk->sk_protocol,
10961215 flow_flags,
10971216 faddr, saddr, dport, inet->inet_sport,
10981217 sk->sk_uid);
10991218
1100
- security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
1219
+ security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4));
11011220 rt = ip_route_output_flow(net, fl4, sk);
11021221 if (IS_ERR(rt)) {
11031222 err = PTR_ERR(rt);
....@@ -1254,6 +1373,27 @@
12541373
12551374 #define UDP_SKB_IS_STATELESS 0x80000000
12561375
1376
+/* all head states (dst, sk, nf conntrack) except skb extensions are
1377
+ * cleared by udp_rcv().
1378
+ *
1379
+ * We need to preserve secpath, if present, to eventually process
1380
+ * IP_CMSG_PASSSEC at recvmsg() time.
1381
+ *
1382
+ * Other extensions can be cleared.
1383
+ */
1384
+static bool udp_try_make_stateless(struct sk_buff *skb)
1385
+{
1386
+ if (!skb_has_extensions(skb))
1387
+ return true;
1388
+
1389
+ if (!secpath_exists(skb)) {
1390
+ skb_ext_reset(skb);
1391
+ return true;
1392
+ }
1393
+
1394
+ return false;
1395
+}
1396
+
12571397 static void udp_set_dev_scratch(struct sk_buff *skb)
12581398 {
12591399 struct udp_dev_scratch *scratch = udp_skb_scratch(skb);
....@@ -1265,11 +1405,7 @@
12651405 scratch->csum_unnecessary = !!skb_csum_unnecessary(skb);
12661406 scratch->is_linear = !skb_is_nonlinear(skb);
12671407 #endif
1268
- /* all head states execept sp (dst, sk, nf) are always cleared by
1269
- * udp_rcv() and we need to preserve secpath, if present, to eventually
1270
- * process IP_CMSG_PASSSEC at recvmsg() time
1271
- */
1272
- if (likely(!skb_sec_path(skb)))
1408
+ if (udp_try_make_stateless(skb))
12731409 scratch->_tsize_state |= UDP_SKB_IS_STATELESS;
12741410 }
12751411
....@@ -1458,7 +1594,7 @@
14581594 }
14591595 EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb);
14601596
1461
-void udp_destruct_sock(struct sock *sk)
1597
+void udp_destruct_common(struct sock *sk)
14621598 {
14631599 /* reclaim completely the forward allocated memory */
14641600 struct udp_sock *up = udp_sk(sk);
....@@ -1471,10 +1607,14 @@
14711607 kfree_skb(skb);
14721608 }
14731609 udp_rmem_release(sk, total, 0, true);
1610
+}
1611
+EXPORT_SYMBOL_GPL(udp_destruct_common);
14741612
1613
+static void udp_destruct_sock(struct sock *sk)
1614
+{
1615
+ udp_destruct_common(sk);
14751616 inet_sock_destruct(sk);
14761617 }
1477
-EXPORT_SYMBOL_GPL(udp_destruct_sock);
14781618
14791619 int udp_init_sock(struct sock *sk)
14801620 {
....@@ -1482,7 +1622,6 @@
14821622 sk->sk_destruct = udp_destruct_sock;
14831623 return 0;
14841624 }
1485
-EXPORT_SYMBOL_GPL(udp_init_sock);
14861625
14871626 void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
14881627 {
....@@ -1590,7 +1729,7 @@
15901729 EXPORT_SYMBOL(udp_ioctl);
15911730
15921731 struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
1593
- int noblock, int *peeked, int *off, int *err)
1732
+ int noblock, int *off, int *err)
15941733 {
15951734 struct sk_buff_head *sk_queue = &sk->sk_receive_queue;
15961735 struct sk_buff_head *queue;
....@@ -1609,14 +1748,13 @@
16091748 break;
16101749
16111750 error = -EAGAIN;
1612
- *peeked = 0;
16131751 do {
16141752 spin_lock_bh(&queue->lock);
1615
- skb = __skb_try_recv_from_queue(sk, queue, flags,
1616
- udp_skb_destructor,
1617
- peeked, off, err,
1618
- &last);
1753
+ skb = __skb_try_recv_from_queue(sk, queue, flags, off,
1754
+ err, &last);
16191755 if (skb) {
1756
+ if (!(flags & MSG_PEEK))
1757
+ udp_skb_destructor(sk, skb);
16201758 spin_unlock_bh(&queue->lock);
16211759 return skb;
16221760 }
....@@ -1634,10 +1772,10 @@
16341772 spin_lock(&sk_queue->lock);
16351773 skb_queue_splice_tail_init(sk_queue, queue);
16361774
1637
- skb = __skb_try_recv_from_queue(sk, queue, flags,
1638
- udp_skb_dtor_locked,
1639
- peeked, off, err,
1640
- &last);
1775
+ skb = __skb_try_recv_from_queue(sk, queue, flags, off,
1776
+ err, &last);
1777
+ if (skb && !(flags & MSG_PEEK))
1778
+ udp_skb_dtor_locked(sk, skb);
16411779 spin_unlock(&sk_queue->lock);
16421780 spin_unlock_bh(&queue->lock);
16431781 if (skb)
....@@ -1652,7 +1790,8 @@
16521790
16531791 /* sk_queue is empty, reader_queue may contain peeked packets */
16541792 } while (timeo &&
1655
- !__skb_wait_for_more_packets(sk, &error, &timeo,
1793
+ !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
1794
+ &error, &timeo,
16561795 (struct sk_buff *)sk_queue));
16571796
16581797 *err = error;
....@@ -1672,8 +1811,7 @@
16721811 DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
16731812 struct sk_buff *skb;
16741813 unsigned int ulen, copied;
1675
- int peeked, peeking, off;
1676
- int err;
1814
+ int off, err, peeking = flags & MSG_PEEK;
16771815 int is_udplite = IS_UDPLITE(sk);
16781816 bool checksum_valid = false;
16791817
....@@ -1681,11 +1819,11 @@
16811819 return ip_recv_error(sk, msg, len, addr_len);
16821820
16831821 try_again:
1684
- peeking = flags & MSG_PEEK;
16851822 off = sk_peek_offset(sk, flags);
1686
- skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err);
1823
+ skb = __skb_recv_udp(sk, flags, noblock, &off, &err);
16871824 if (!skb)
16881825 return err;
1826
+ trace_android_rvh_udp_recvmsg(sk);
16891827
16901828 ulen = udp_skb_len(skb);
16911829 copied = len;
....@@ -1721,7 +1859,7 @@
17211859 }
17221860
17231861 if (unlikely(err)) {
1724
- if (!peeked) {
1862
+ if (!peeking) {
17251863 atomic_inc(&sk->sk_drops);
17261864 UDP_INC_STATS(sock_net(sk),
17271865 UDP_MIB_INERRORS, is_udplite);
....@@ -1730,7 +1868,7 @@
17301868 return err;
17311869 }
17321870
1733
- if (!peeked)
1871
+ if (!peeking)
17341872 UDP_INC_STATS(sock_net(sk),
17351873 UDP_MIB_INDATAGRAMS, is_udplite);
17361874
....@@ -1748,6 +1886,10 @@
17481886 BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk,
17491887 (struct sockaddr *)sin);
17501888 }
1889
+
1890
+ if (udp_sk(sk)->gro_enabled)
1891
+ udp_cmsg_recv(msg, sk, skb);
1892
+
17511893 if (inet->cmsg_flags)
17521894 ip_cmsg_recv_offset(msg, sk, skb, sizeof(struct udphdr), off);
17531895
....@@ -1797,8 +1939,12 @@
17971939 inet->inet_dport = 0;
17981940 sock_rps_reset_rxhash(sk);
17991941 sk->sk_bound_dev_if = 0;
1800
- if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
1942
+ if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) {
18011943 inet_reset_saddr(sk);
1944
+ if (sk->sk_prot->rehash &&
1945
+ (sk->sk_userlocks & SOCK_BINDPORT_LOCK))
1946
+ sk->sk_prot->rehash(sk);
1947
+ }
18021948
18031949 if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) {
18041950 sk->sk_prot->unhash(sk);
....@@ -1887,7 +2033,7 @@
18872033 }
18882034 EXPORT_SYMBOL(udp_lib_rehash);
18892035
1890
-static void udp_v4_rehash(struct sock *sk)
2036
+void udp_v4_rehash(struct sock *sk)
18912037 {
18922038 u16 new_hash = ipv4_portaddr_hash(sock_net(sk),
18932039 inet_sk(sk)->inet_rcv_saddr,
....@@ -1924,13 +2070,6 @@
19242070 return 0;
19252071 }
19262072
1927
-static DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
1928
-void udp_encap_enable(void)
1929
-{
1930
- static_branch_enable(&udp_encap_needed_key);
1931
-}
1932
-EXPORT_SYMBOL(udp_encap_enable);
1933
-
19342073 /* returns:
19352074 * -1: error
19362075 * 0: success
....@@ -1939,7 +2078,7 @@
19392078 * Note that in the success and error cases, the skb is assumed to
19402079 * have either been requeued or freed.
19412080 */
1942
-static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
2081
+static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
19432082 {
19442083 struct udp_sock *up = udp_sk(sk);
19452084 int is_udplite = IS_UDPLITE(sk);
....@@ -1949,7 +2088,7 @@
19492088 */
19502089 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
19512090 goto drop;
1952
- nf_reset(skb);
2091
+ nf_reset_ct(skb);
19532092
19542093 if (static_branch_unlikely(&udp_encap_needed_key) && up->encap_type) {
19552094 int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
....@@ -2042,6 +2181,26 @@
20422181 return -1;
20432182 }
20442183
2184
+static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
2185
+{
2186
+ struct sk_buff *next, *segs;
2187
+ int ret;
2188
+
2189
+ if (likely(!udp_unexpected_gso(sk, skb)))
2190
+ return udp_queue_rcv_one_skb(sk, skb);
2191
+
2192
+ BUILD_BUG_ON(sizeof(struct udp_skb_cb) > SKB_GSO_CB_OFFSET);
2193
+ __skb_push(skb, -skb_mac_offset(skb));
2194
+ segs = udp_rcv_segment(sk, skb, true);
2195
+ skb_list_walk_safe(segs, skb, next) {
2196
+ __skb_pull(skb, skb_transport_offset(skb));
2197
+ ret = udp_queue_rcv_one_skb(sk, skb);
2198
+ if (ret > 0)
2199
+ ip_protocol_deliver_rcu(dev_net(skb->dev), skb, ret);
2200
+ }
2201
+ return 0;
2202
+}
2203
+
20452204 /* For TCP sockets, sk_rx_dst is protected by socket lock
20462205 * For UDP, we use xchg() to guard against concurrent changes.
20472206 */
....@@ -2050,7 +2209,7 @@
20502209 struct dst_entry *old;
20512210
20522211 if (dst_hold_safe(dst)) {
2053
- old = xchg(&sk->sk_rx_dst, dst);
2212
+ old = xchg((__force struct dst_entry **)&sk->sk_rx_dst, dst);
20542213 dst_release(old);
20552214 return old != dst;
20562215 }
....@@ -2130,7 +2289,7 @@
21302289
21312290 /* Initialize UDP checksum. If exited with zero value (success),
21322291 * CHECKSUM_UNNECESSARY means, that no more checks are required.
2133
- * Otherwise, csum completion requires chacksumming packet body,
2292
+ * Otherwise, csum completion requires checksumming packet body,
21342293 * including udp header and folding it to skb->csum.
21352294 */
21362295 static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
....@@ -2184,8 +2343,7 @@
21842343 int ret;
21852344
21862345 if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk))
2187
- skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
2188
- inet_compute_pseudo);
2346
+ skb_checksum_try_convert(skb, IPPROTO_UDP, inet_compute_pseudo);
21892347
21902348 ret = udp_queue_rcv_skb(sk, skb);
21912349
....@@ -2210,6 +2368,7 @@
22102368 struct rtable *rt = skb_rtable(skb);
22112369 __be32 saddr, daddr;
22122370 struct net *net = dev_net(skb->dev);
2371
+ bool refcounted;
22132372
22142373 /*
22152374 * Validate the packet.
....@@ -2235,16 +2394,17 @@
22352394 if (udp4_csum_init(skb, uh, proto))
22362395 goto csum_error;
22372396
2238
- sk = skb_steal_sock(skb);
2397
+ sk = skb_steal_sock(skb, &refcounted);
22392398 if (sk) {
22402399 struct dst_entry *dst = skb_dst(skb);
22412400 int ret;
22422401
2243
- if (unlikely(sk->sk_rx_dst != dst))
2402
+ if (unlikely(rcu_dereference(sk->sk_rx_dst) != dst))
22442403 udp_sk_rx_dst_set(sk, dst);
22452404
22462405 ret = udp_unicast_rcv_skb(sk, skb, uh);
2247
- sock_put(sk);
2406
+ if (refcounted)
2407
+ sock_put(sk);
22482408 return ret;
22492409 }
22502410
....@@ -2258,7 +2418,7 @@
22582418
22592419 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
22602420 goto drop;
2261
- nf_reset(skb);
2421
+ nf_reset_ct(skb);
22622422
22632423 /* No socket. Drop packet silently, if checksum is wrong */
22642424 if (udp_lib_checksum_complete(skb))
....@@ -2346,8 +2506,7 @@
23462506 struct sock *sk;
23472507
23482508 udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
2349
- if (INET_MATCH(sk, net, acookie, rmt_addr,
2350
- loc_addr, ports, dif, sdif))
2509
+ if (INET_MATCH(net, sk, acookie, ports, dif, sdif))
23512510 return sk;
23522511 /* Only check first socket in chain */
23532512 break;
....@@ -2398,7 +2557,7 @@
23982557
23992558 skb->sk = sk;
24002559 skb->destructor = sock_efree;
2401
- dst = READ_ONCE(sk->sk_rx_dst);
2560
+ dst = rcu_dereference(sk->sk_rx_dst);
24022561
24032562 if (dst)
24042563 dst = dst_check(dst, 0);
....@@ -2437,11 +2596,15 @@
24372596 sock_set_flag(sk, SOCK_DEAD);
24382597 udp_flush_pending_frames(sk);
24392598 unlock_sock_fast(sk, slow);
2440
- if (static_branch_unlikely(&udp_encap_needed_key) && up->encap_type) {
2441
- void (*encap_destroy)(struct sock *sk);
2442
- encap_destroy = READ_ONCE(up->encap_destroy);
2443
- if (encap_destroy)
2444
- encap_destroy(sk);
2599
+ if (static_branch_unlikely(&udp_encap_needed_key)) {
2600
+ if (up->encap_type) {
2601
+ void (*encap_destroy)(struct sock *sk);
2602
+ encap_destroy = READ_ONCE(up->encap_destroy);
2603
+ if (encap_destroy)
2604
+ encap_destroy(sk);
2605
+ }
2606
+ if (up->encap_enabled)
2607
+ static_branch_dec(&udp_encap_needed_key);
24452608 }
24462609 }
24472610
....@@ -2449,7 +2612,7 @@
24492612 * Socket option code for UDP
24502613 */
24512614 int udp_lib_setsockopt(struct sock *sk, int level, int optname,
2452
- char __user *optval, unsigned int optlen,
2615
+ sockptr_t optval, unsigned int optlen,
24532616 int (*push_pending_frames)(struct sock *))
24542617 {
24552618 struct udp_sock *up = udp_sk(sk);
....@@ -2460,7 +2623,7 @@
24602623 if (optlen < sizeof(int))
24612624 return -EINVAL;
24622625
2463
- if (get_user(val, (int __user *)optval))
2626
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
24642627 return -EFAULT;
24652628
24662629 valbool = val ? 1 : 0;
....@@ -2480,13 +2643,22 @@
24802643 case UDP_ENCAP:
24812644 switch (val) {
24822645 case 0:
2646
+#ifdef CONFIG_XFRM
24832647 case UDP_ENCAP_ESPINUDP:
24842648 case UDP_ENCAP_ESPINUDP_NON_IKE:
2485
- up->encap_rcv = xfrm4_udp_encap_rcv;
2486
- /* FALLTHROUGH */
2649
+#if IS_ENABLED(CONFIG_IPV6)
2650
+ if (sk->sk_family == AF_INET6)
2651
+ up->encap_rcv = ipv6_stub->xfrm6_udp_encap_rcv;
2652
+ else
2653
+#endif
2654
+ up->encap_rcv = xfrm4_udp_encap_rcv;
2655
+#endif
2656
+ fallthrough;
24872657 case UDP_ENCAP_L2TPINUDP:
24882658 up->encap_type = val;
2489
- udp_encap_enable();
2659
+ lock_sock(sk);
2660
+ udp_tunnel_encap_enable(sk->sk_socket);
2661
+ release_sock(sk);
24902662 break;
24912663 default:
24922664 err = -ENOPROTOOPT;
....@@ -2506,6 +2678,17 @@
25062678 if (val < 0 || val > USHRT_MAX)
25072679 return -EINVAL;
25082680 WRITE_ONCE(up->gso_size, val);
2681
+ break;
2682
+
2683
+ case UDP_GRO:
2684
+ lock_sock(sk);
2685
+
2686
+ /* when enabling GRO, accept the related GSO packet type */
2687
+ if (valbool)
2688
+ udp_tunnel_encap_enable(sk->sk_socket);
2689
+ up->gro_enabled = valbool;
2690
+ up->accept_udp_l4 = valbool;
2691
+ release_sock(sk);
25092692 break;
25102693
25112694 /*
....@@ -2547,25 +2730,15 @@
25472730 }
25482731 EXPORT_SYMBOL(udp_lib_setsockopt);
25492732
2550
-int udp_setsockopt(struct sock *sk, int level, int optname,
2551
- char __user *optval, unsigned int optlen)
2733
+int udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
2734
+ unsigned int optlen)
25522735 {
25532736 if (level == SOL_UDP || level == SOL_UDPLITE)
2554
- return udp_lib_setsockopt(sk, level, optname, optval, optlen,
2737
+ return udp_lib_setsockopt(sk, level, optname,
2738
+ optval, optlen,
25552739 udp_push_pending_frames);
25562740 return ip_setsockopt(sk, level, optname, optval, optlen);
25572741 }
2558
-
2559
-#ifdef CONFIG_COMPAT
2560
-int compat_udp_setsockopt(struct sock *sk, int level, int optname,
2561
- char __user *optval, unsigned int optlen)
2562
-{
2563
- if (level == SOL_UDP || level == SOL_UDPLITE)
2564
- return udp_lib_setsockopt(sk, level, optname, optval, optlen,
2565
- udp_push_pending_frames);
2566
- return compat_ip_setsockopt(sk, level, optname, optval, optlen);
2567
-}
2568
-#endif
25692742
25702743 int udp_lib_getsockopt(struct sock *sk, int level, int optname,
25712744 char __user *optval, int __user *optlen)
....@@ -2602,6 +2775,10 @@
26022775 val = READ_ONCE(up->gso_size);
26032776 break;
26042777
2778
+ case UDP_GRO:
2779
+ val = up->gro_enabled;
2780
+ break;
2781
+
26052782 /* The following two cannot be changed on UDP sockets, the return is
26062783 * always 0 (which corresponds to the full checksum coverage of UDP). */
26072784 case UDPLITE_SEND_CSCOV:
....@@ -2632,20 +2809,11 @@
26322809 return ip_getsockopt(sk, level, optname, optval, optlen);
26332810 }
26342811
2635
-#ifdef CONFIG_COMPAT
2636
-int compat_udp_getsockopt(struct sock *sk, int level, int optname,
2637
- char __user *optval, int __user *optlen)
2638
-{
2639
- if (level == SOL_UDP || level == SOL_UDPLITE)
2640
- return udp_lib_getsockopt(sk, level, optname, optval, optlen);
2641
- return compat_ip_getsockopt(sk, level, optname, optval, optlen);
2642
-}
2643
-#endif
26442812 /**
26452813 * udp_poll - wait for a UDP event.
2646
- * @file - file struct
2647
- * @sock - socket
2648
- * @wait - poll table
2814
+ * @file: - file struct
2815
+ * @sock: - socket
2816
+ * @wait: - poll table
26492817 *
26502818 * This is same as datagram poll, except for the special case of
26512819 * blocking sockets. If application is using a blocking fd
....@@ -2719,10 +2887,6 @@
27192887 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
27202888 .obj_size = sizeof(struct udp_sock),
27212889 .h.udp_table = &udp_table,
2722
-#ifdef CONFIG_COMPAT
2723
- .compat_setsockopt = compat_udp_setsockopt,
2724
- .compat_getsockopt = compat_udp_getsockopt,
2725
-#endif
27262890 .diag_destroy = udp_abort,
27272891 };
27282892 EXPORT_SYMBOL(udp_prot);
....@@ -2733,9 +2897,14 @@
27332897 static struct sock *udp_get_first(struct seq_file *seq, int start)
27342898 {
27352899 struct sock *sk;
2736
- struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
2900
+ struct udp_seq_afinfo *afinfo;
27372901 struct udp_iter_state *state = seq->private;
27382902 struct net *net = seq_file_net(seq);
2903
+
2904
+ if (state->bpf_seq_afinfo)
2905
+ afinfo = state->bpf_seq_afinfo;
2906
+ else
2907
+ afinfo = PDE_DATA(file_inode(seq->file));
27392908
27402909 for (state->bucket = start; state->bucket <= afinfo->udp_table->mask;
27412910 ++state->bucket) {
....@@ -2748,7 +2917,8 @@
27482917 sk_for_each(sk, &hslot->head) {
27492918 if (!net_eq(sock_net(sk), net))
27502919 continue;
2751
- if (sk->sk_family == afinfo->family)
2920
+ if (afinfo->family == AF_UNSPEC ||
2921
+ sk->sk_family == afinfo->family)
27522922 goto found;
27532923 }
27542924 spin_unlock_bh(&hslot->lock);
....@@ -2760,13 +2930,20 @@
27602930
27612931 static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
27622932 {
2763
- struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
2933
+ struct udp_seq_afinfo *afinfo;
27642934 struct udp_iter_state *state = seq->private;
27652935 struct net *net = seq_file_net(seq);
27662936
2937
+ if (state->bpf_seq_afinfo)
2938
+ afinfo = state->bpf_seq_afinfo;
2939
+ else
2940
+ afinfo = PDE_DATA(file_inode(seq->file));
2941
+
27672942 do {
27682943 sk = sk_next(sk);
2769
- } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != afinfo->family));
2944
+ } while (sk && (!net_eq(sock_net(sk), net) ||
2945
+ (afinfo->family != AF_UNSPEC &&
2946
+ sk->sk_family != afinfo->family)));
27702947
27712948 if (!sk) {
27722949 if (state->bucket <= afinfo->udp_table->mask)
....@@ -2811,8 +2988,13 @@
28112988
28122989 void udp_seq_stop(struct seq_file *seq, void *v)
28132990 {
2814
- struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
2991
+ struct udp_seq_afinfo *afinfo;
28152992 struct udp_iter_state *state = seq->private;
2993
+
2994
+ if (state->bpf_seq_afinfo)
2995
+ afinfo = state->bpf_seq_afinfo;
2996
+ else
2997
+ afinfo = PDE_DATA(file_inode(seq->file));
28162998
28172999 if (state->bucket <= afinfo->udp_table->mask)
28183000 spin_unlock_bh(&afinfo->udp_table->hash[state->bucket].lock);
....@@ -2830,7 +3012,7 @@
28303012 __u16 srcp = ntohs(inet->inet_sport);
28313013
28323014 seq_printf(f, "%5d: %08X:%04X %08X:%04X"
2833
- " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d",
3015
+ " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u",
28343016 bucket, src, srcp, dest, destp, sp->sk_state,
28353017 sk_wmem_alloc_get(sp),
28363018 udp_rqueue_get(sp),
....@@ -2856,6 +3038,67 @@
28563038 seq_pad(seq, '\n');
28573039 return 0;
28583040 }
3041
+
3042
+#ifdef CONFIG_BPF_SYSCALL
3043
+struct bpf_iter__udp {
3044
+ __bpf_md_ptr(struct bpf_iter_meta *, meta);
3045
+ __bpf_md_ptr(struct udp_sock *, udp_sk);
3046
+ uid_t uid __aligned(8);
3047
+ int bucket __aligned(8);
3048
+};
3049
+
3050
+static int udp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
3051
+ struct udp_sock *udp_sk, uid_t uid, int bucket)
3052
+{
3053
+ struct bpf_iter__udp ctx;
3054
+
3055
+ meta->seq_num--; /* skip SEQ_START_TOKEN */
3056
+ ctx.meta = meta;
3057
+ ctx.udp_sk = udp_sk;
3058
+ ctx.uid = uid;
3059
+ ctx.bucket = bucket;
3060
+ return bpf_iter_run_prog(prog, &ctx);
3061
+}
3062
+
3063
+static int bpf_iter_udp_seq_show(struct seq_file *seq, void *v)
3064
+{
3065
+ struct udp_iter_state *state = seq->private;
3066
+ struct bpf_iter_meta meta;
3067
+ struct bpf_prog *prog;
3068
+ struct sock *sk = v;
3069
+ uid_t uid;
3070
+
3071
+ if (v == SEQ_START_TOKEN)
3072
+ return 0;
3073
+
3074
+ uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
3075
+ meta.seq = seq;
3076
+ prog = bpf_iter_get_info(&meta, false);
3077
+ return udp_prog_seq_show(prog, &meta, v, uid, state->bucket);
3078
+}
3079
+
3080
+static void bpf_iter_udp_seq_stop(struct seq_file *seq, void *v)
3081
+{
3082
+ struct bpf_iter_meta meta;
3083
+ struct bpf_prog *prog;
3084
+
3085
+ if (!v) {
3086
+ meta.seq = seq;
3087
+ prog = bpf_iter_get_info(&meta, true);
3088
+ if (prog)
3089
+ (void)udp_prog_seq_show(prog, &meta, v, 0, 0);
3090
+ }
3091
+
3092
+ udp_seq_stop(seq, v);
3093
+}
3094
+
3095
+static const struct seq_operations bpf_iter_udp_seq_ops = {
3096
+ .start = udp_seq_start,
3097
+ .next = udp_seq_next,
3098
+ .stop = bpf_iter_udp_seq_stop,
3099
+ .show = bpf_iter_udp_seq_show,
3100
+};
3101
+#endif
28593102
28603103 const struct seq_operations udp_seq_ops = {
28613104 .start = udp_seq_start,
....@@ -2974,6 +3217,62 @@
29743217 .init = udp_sysctl_init,
29753218 };
29763219
3220
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3221
+DEFINE_BPF_ITER_FUNC(udp, struct bpf_iter_meta *meta,
3222
+ struct udp_sock *udp_sk, uid_t uid, int bucket)
3223
+
3224
+static int bpf_iter_init_udp(void *priv_data, struct bpf_iter_aux_info *aux)
3225
+{
3226
+ struct udp_iter_state *st = priv_data;
3227
+ struct udp_seq_afinfo *afinfo;
3228
+ int ret;
3229
+
3230
+ afinfo = kmalloc(sizeof(*afinfo), GFP_USER | __GFP_NOWARN);
3231
+ if (!afinfo)
3232
+ return -ENOMEM;
3233
+
3234
+ afinfo->family = AF_UNSPEC;
3235
+ afinfo->udp_table = &udp_table;
3236
+ st->bpf_seq_afinfo = afinfo;
3237
+ ret = bpf_iter_init_seq_net(priv_data, aux);
3238
+ if (ret)
3239
+ kfree(afinfo);
3240
+ return ret;
3241
+}
3242
+
3243
+static void bpf_iter_fini_udp(void *priv_data)
3244
+{
3245
+ struct udp_iter_state *st = priv_data;
3246
+
3247
+ kfree(st->bpf_seq_afinfo);
3248
+ bpf_iter_fini_seq_net(priv_data);
3249
+}
3250
+
3251
+static const struct bpf_iter_seq_info udp_seq_info = {
3252
+ .seq_ops = &bpf_iter_udp_seq_ops,
3253
+ .init_seq_private = bpf_iter_init_udp,
3254
+ .fini_seq_private = bpf_iter_fini_udp,
3255
+ .seq_priv_size = sizeof(struct udp_iter_state),
3256
+};
3257
+
3258
+static struct bpf_iter_reg udp_reg_info = {
3259
+ .target = "udp",
3260
+ .ctx_arg_info_size = 1,
3261
+ .ctx_arg_info = {
3262
+ { offsetof(struct bpf_iter__udp, udp_sk),
3263
+ PTR_TO_BTF_ID_OR_NULL },
3264
+ },
3265
+ .seq_info = &udp_seq_info,
3266
+};
3267
+
3268
+static void __init bpf_iter_register(void)
3269
+{
3270
+ udp_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UDP];
3271
+ if (bpf_iter_reg_target(&udp_reg_info))
3272
+ pr_warn("Warning: could not register bpf iterator udp\n");
3273
+}
3274
+#endif
3275
+
29773276 void __init udp_init(void)
29783277 {
29793278 unsigned long limit;
....@@ -2999,4 +3298,8 @@
29993298
30003299 if (register_pernet_subsys(&udp_sysctl_ops))
30013300 panic("UDP: failed to init sysctl parameters.\n");
3301
+
3302
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3303
+ bpf_iter_register();
3304
+#endif
30023305 }