.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
---|
1 | 2 | /* |
---|
2 | 3 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
---|
3 | 4 | * operating system. INET is implemented using the BSD Socket |
---|
.. | .. |
---|
69 | 70 | * a single port at the same time. |
---|
70 | 71 | * Derek Atkins <derek@ihtfp.com>: Add Encapulation Support |
---|
71 | 72 | * James Chapman : Add L2TP encapsulation type. |
---|
72 | | - * |
---|
73 | | - * |
---|
74 | | - * This program is free software; you can redistribute it and/or |
---|
75 | | - * modify it under the terms of the GNU General Public License |
---|
76 | | - * as published by the Free Software Foundation; either version |
---|
77 | | - * 2 of the License, or (at your option) any later version. |
---|
78 | 73 | */ |
---|
79 | 74 | |
---|
80 | 75 | #define pr_fmt(fmt) "UDP: " fmt |
---|
81 | 76 | |
---|
82 | 77 | #include <linux/uaccess.h> |
---|
83 | 78 | #include <asm/ioctls.h> |
---|
84 | | -#include <linux/bootmem.h> |
---|
| 79 | +#include <linux/memblock.h> |
---|
85 | 80 | #include <linux/highmem.h> |
---|
86 | 81 | #include <linux/swap.h> |
---|
87 | 82 | #include <linux/types.h> |
---|
.. | .. |
---|
105 | 100 | #include <net/net_namespace.h> |
---|
106 | 101 | #include <net/icmp.h> |
---|
107 | 102 | #include <net/inet_hashtables.h> |
---|
| 103 | +#include <net/ip_tunnels.h> |
---|
108 | 104 | #include <net/route.h> |
---|
109 | 105 | #include <net/checksum.h> |
---|
110 | 106 | #include <net/xfrm.h> |
---|
111 | 107 | #include <trace/events/udp.h> |
---|
112 | 108 | #include <linux/static_key.h> |
---|
| 109 | +#include <linux/btf_ids.h> |
---|
113 | 110 | #include <trace/events/skb.h> |
---|
114 | 111 | #include <net/busy_poll.h> |
---|
115 | 112 | #include "udp_impl.h" |
---|
116 | 113 | #include <net/sock_reuseport.h> |
---|
117 | 114 | #include <net/addrconf.h> |
---|
| 115 | +#include <net/udp_tunnel.h> |
---|
| 116 | +#if IS_ENABLED(CONFIG_IPV6) |
---|
| 117 | +#include <net/ipv6_stubs.h> |
---|
| 118 | +#endif |
---|
| 119 | +#include <trace/hooks/ipv4.h> |
---|
118 | 120 | |
---|
119 | 121 | struct udp_table udp_table __read_mostly; |
---|
120 | 122 | EXPORT_SYMBOL(udp_table); |
---|
.. | .. |
---|
127 | 129 | |
---|
128 | 130 | #define MAX_UDP_PORTS 65536 |
---|
129 | 131 | #define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN) |
---|
130 | | - |
---|
131 | | -/* IPCB reference means this can not be used from early demux */ |
---|
132 | | -static bool udp_lib_exact_dif_match(struct net *net, struct sk_buff *skb) |
---|
133 | | -{ |
---|
134 | | -#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) |
---|
135 | | - if (!net->ipv4.sysctl_udp_l3mdev_accept && |
---|
136 | | - skb && ipv4_l3mdev_skb(IPCB(skb)->flags)) |
---|
137 | | - return true; |
---|
138 | | -#endif |
---|
139 | | - return false; |
---|
140 | | -} |
---|
141 | 132 | |
---|
142 | 133 | static int udp_lib_lport_inuse(struct net *net, __u16 num, |
---|
143 | 134 | const struct udp_hslot *hslot, |
---|
.. | .. |
---|
367 | 358 | static int compute_score(struct sock *sk, struct net *net, |
---|
368 | 359 | __be32 saddr, __be16 sport, |
---|
369 | 360 | __be32 daddr, unsigned short hnum, |
---|
370 | | - int dif, int sdif, bool exact_dif) |
---|
| 361 | + int dif, int sdif) |
---|
371 | 362 | { |
---|
372 | 363 | int score; |
---|
373 | 364 | struct inet_sock *inet; |
---|
| 365 | + bool dev_match; |
---|
374 | 366 | |
---|
375 | 367 | if (!net_eq(sock_net(sk), net) || |
---|
376 | 368 | udp_sk(sk)->udp_port_hash != hnum || |
---|
377 | 369 | ipv6_only_sock(sk)) |
---|
378 | 370 | return -1; |
---|
379 | 371 | |
---|
| 372 | + if (sk->sk_rcv_saddr != daddr) |
---|
| 373 | + return -1; |
---|
| 374 | + |
---|
380 | 375 | score = (sk->sk_family == PF_INET) ? 2 : 1; |
---|
| 376 | + |
---|
381 | 377 | inet = inet_sk(sk); |
---|
382 | | - |
---|
383 | | - if (inet->inet_rcv_saddr) { |
---|
384 | | - if (inet->inet_rcv_saddr != daddr) |
---|
385 | | - return -1; |
---|
386 | | - score += 4; |
---|
387 | | - } |
---|
388 | | - |
---|
389 | 378 | if (inet->inet_daddr) { |
---|
390 | 379 | if (inet->inet_daddr != saddr) |
---|
391 | 380 | return -1; |
---|
.. | .. |
---|
398 | 387 | score += 4; |
---|
399 | 388 | } |
---|
400 | 389 | |
---|
401 | | - if (sk->sk_bound_dev_if || exact_dif) { |
---|
402 | | - bool dev_match = (sk->sk_bound_dev_if == dif || |
---|
403 | | - sk->sk_bound_dev_if == sdif); |
---|
404 | | - |
---|
405 | | - if (!dev_match) |
---|
406 | | - return -1; |
---|
407 | | - if (sk->sk_bound_dev_if) |
---|
408 | | - score += 4; |
---|
409 | | - } |
---|
| 390 | + dev_match = udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, |
---|
| 391 | + dif, sdif); |
---|
| 392 | + if (!dev_match) |
---|
| 393 | + return -1; |
---|
| 394 | + if (sk->sk_bound_dev_if) |
---|
| 395 | + score += 4; |
---|
410 | 396 | |
---|
411 | 397 | if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) |
---|
412 | 398 | score++; |
---|
.. | .. |
---|
425 | 411 | udp_ehash_secret + net_hash_mix(net)); |
---|
426 | 412 | } |
---|
427 | 413 | |
---|
| 414 | +static struct sock *lookup_reuseport(struct net *net, struct sock *sk, |
---|
| 415 | + struct sk_buff *skb, |
---|
| 416 | + __be32 saddr, __be16 sport, |
---|
| 417 | + __be32 daddr, unsigned short hnum) |
---|
| 418 | +{ |
---|
| 419 | + struct sock *reuse_sk = NULL; |
---|
| 420 | + u32 hash; |
---|
| 421 | + |
---|
| 422 | + if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) { |
---|
| 423 | + hash = udp_ehashfn(net, daddr, hnum, saddr, sport); |
---|
| 424 | + reuse_sk = reuseport_select_sock(sk, hash, skb, |
---|
| 425 | + sizeof(struct udphdr)); |
---|
| 426 | + } |
---|
| 427 | + return reuse_sk; |
---|
| 428 | +} |
---|
| 429 | + |
---|
428 | 430 | /* called with rcu_read_lock() */ |
---|
429 | 431 | static struct sock *udp4_lib_lookup2(struct net *net, |
---|
430 | 432 | __be32 saddr, __be16 sport, |
---|
431 | 433 | __be32 daddr, unsigned int hnum, |
---|
432 | | - int dif, int sdif, bool exact_dif, |
---|
| 434 | + int dif, int sdif, |
---|
433 | 435 | struct udp_hslot *hslot2, |
---|
434 | 436 | struct sk_buff *skb) |
---|
435 | 437 | { |
---|
436 | | - struct sock *sk, *result, *reuseport_result; |
---|
| 438 | + struct sock *sk, *result; |
---|
437 | 439 | int score, badness; |
---|
438 | | - u32 hash = 0; |
---|
439 | 440 | |
---|
440 | 441 | result = NULL; |
---|
441 | 442 | badness = 0; |
---|
442 | 443 | udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { |
---|
443 | 444 | score = compute_score(sk, net, saddr, sport, |
---|
444 | | - daddr, hnum, dif, sdif, exact_dif); |
---|
| 445 | + daddr, hnum, dif, sdif); |
---|
445 | 446 | if (score > badness) { |
---|
446 | | - reuseport_result = NULL; |
---|
447 | | - |
---|
448 | | - if (sk->sk_reuseport && |
---|
449 | | - sk->sk_state != TCP_ESTABLISHED) { |
---|
450 | | - hash = udp_ehashfn(net, daddr, hnum, |
---|
451 | | - saddr, sport); |
---|
452 | | - reuseport_result = reuseport_select_sock(sk, hash, skb, |
---|
453 | | - sizeof(struct udphdr)); |
---|
454 | | - if (reuseport_result && !reuseport_has_conns(sk, false)) |
---|
455 | | - return reuseport_result; |
---|
| 447 | + badness = score; |
---|
| 448 | + result = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum); |
---|
| 449 | + if (!result) { |
---|
| 450 | + result = sk; |
---|
| 451 | + continue; |
---|
456 | 452 | } |
---|
457 | 453 | |
---|
458 | | - result = reuseport_result ? : sk; |
---|
459 | | - badness = score; |
---|
| 454 | + /* Fall back to scoring if group has connections */ |
---|
| 455 | + if (!reuseport_has_conns(sk)) |
---|
| 456 | + return result; |
---|
| 457 | + |
---|
| 458 | + /* Reuseport logic returned an error, keep original score. */ |
---|
| 459 | + if (IS_ERR(result)) |
---|
| 460 | + continue; |
---|
| 461 | + |
---|
| 462 | + badness = compute_score(result, net, saddr, sport, |
---|
| 463 | + daddr, hnum, dif, sdif); |
---|
| 464 | + |
---|
460 | 465 | } |
---|
461 | 466 | } |
---|
462 | 467 | return result; |
---|
| 468 | +} |
---|
| 469 | + |
---|
| 470 | +static struct sock *udp4_lookup_run_bpf(struct net *net, |
---|
| 471 | + struct udp_table *udptable, |
---|
| 472 | + struct sk_buff *skb, |
---|
| 473 | + __be32 saddr, __be16 sport, |
---|
| 474 | + __be32 daddr, u16 hnum) |
---|
| 475 | +{ |
---|
| 476 | + struct sock *sk, *reuse_sk; |
---|
| 477 | + bool no_reuseport; |
---|
| 478 | + |
---|
| 479 | + if (udptable != &udp_table) |
---|
| 480 | + return NULL; /* only UDP is supported */ |
---|
| 481 | + |
---|
| 482 | + no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_UDP, |
---|
| 483 | + saddr, sport, daddr, hnum, &sk); |
---|
| 484 | + if (no_reuseport || IS_ERR_OR_NULL(sk)) |
---|
| 485 | + return sk; |
---|
| 486 | + |
---|
| 487 | + reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum); |
---|
| 488 | + if (reuse_sk) |
---|
| 489 | + sk = reuse_sk; |
---|
| 490 | + return sk; |
---|
463 | 491 | } |
---|
464 | 492 | |
---|
465 | 493 | /* UDP is nearly always wildcards out the wazoo, it makes no sense to try |
---|
.. | .. |
---|
469 | 497 | __be16 sport, __be32 daddr, __be16 dport, int dif, |
---|
470 | 498 | int sdif, struct udp_table *udptable, struct sk_buff *skb) |
---|
471 | 499 | { |
---|
472 | | - struct sock *sk, *result; |
---|
473 | 500 | unsigned short hnum = ntohs(dport); |
---|
474 | | - unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); |
---|
475 | | - struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; |
---|
476 | | - bool exact_dif = udp_lib_exact_dif_match(net, skb); |
---|
477 | | - int score, badness; |
---|
478 | | - u32 hash = 0; |
---|
| 501 | + unsigned int hash2, slot2; |
---|
| 502 | + struct udp_hslot *hslot2; |
---|
| 503 | + struct sock *result, *sk; |
---|
479 | 504 | |
---|
480 | | - if (hslot->count > 10) { |
---|
481 | | - hash2 = ipv4_portaddr_hash(net, daddr, hnum); |
---|
482 | | - slot2 = hash2 & udptable->mask; |
---|
483 | | - hslot2 = &udptable->hash2[slot2]; |
---|
484 | | - if (hslot->count < hslot2->count) |
---|
485 | | - goto begin; |
---|
| 505 | + hash2 = ipv4_portaddr_hash(net, daddr, hnum); |
---|
| 506 | + slot2 = hash2 & udptable->mask; |
---|
| 507 | + hslot2 = &udptable->hash2[slot2]; |
---|
486 | 508 | |
---|
487 | | - result = udp4_lib_lookup2(net, saddr, sport, |
---|
488 | | - daddr, hnum, dif, sdif, |
---|
489 | | - exact_dif, hslot2, skb); |
---|
490 | | - if (!result) { |
---|
491 | | - unsigned int old_slot2 = slot2; |
---|
492 | | - hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum); |
---|
493 | | - slot2 = hash2 & udptable->mask; |
---|
494 | | - /* avoid searching the same slot again. */ |
---|
495 | | - if (unlikely(slot2 == old_slot2)) |
---|
496 | | - return result; |
---|
| 509 | + /* Lookup connected or non-wildcard socket */ |
---|
| 510 | + result = udp4_lib_lookup2(net, saddr, sport, |
---|
| 511 | + daddr, hnum, dif, sdif, |
---|
| 512 | + hslot2, skb); |
---|
| 513 | + if (!IS_ERR_OR_NULL(result) && result->sk_state == TCP_ESTABLISHED) |
---|
| 514 | + goto done; |
---|
497 | 515 | |
---|
498 | | - hslot2 = &udptable->hash2[slot2]; |
---|
499 | | - if (hslot->count < hslot2->count) |
---|
500 | | - goto begin; |
---|
501 | | - |
---|
502 | | - result = udp4_lib_lookup2(net, saddr, sport, |
---|
503 | | - daddr, hnum, dif, sdif, |
---|
504 | | - exact_dif, hslot2, skb); |
---|
505 | | - } |
---|
506 | | - if (unlikely(IS_ERR(result))) |
---|
507 | | - return NULL; |
---|
508 | | - return result; |
---|
509 | | - } |
---|
510 | | -begin: |
---|
511 | | - result = NULL; |
---|
512 | | - badness = 0; |
---|
513 | | - sk_for_each_rcu(sk, &hslot->head) { |
---|
514 | | - score = compute_score(sk, net, saddr, sport, |
---|
515 | | - daddr, hnum, dif, sdif, exact_dif); |
---|
516 | | - if (score > badness) { |
---|
517 | | - if (sk->sk_reuseport) { |
---|
518 | | - hash = udp_ehashfn(net, daddr, hnum, |
---|
519 | | - saddr, sport); |
---|
520 | | - result = reuseport_select_sock(sk, hash, skb, |
---|
521 | | - sizeof(struct udphdr)); |
---|
522 | | - if (unlikely(IS_ERR(result))) |
---|
523 | | - return NULL; |
---|
524 | | - if (result) |
---|
525 | | - return result; |
---|
526 | | - } |
---|
| 516 | + /* Lookup redirect from BPF */ |
---|
| 517 | + if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { |
---|
| 518 | + sk = udp4_lookup_run_bpf(net, udptable, skb, |
---|
| 519 | + saddr, sport, daddr, hnum); |
---|
| 520 | + if (sk) { |
---|
527 | 521 | result = sk; |
---|
528 | | - badness = score; |
---|
| 522 | + goto done; |
---|
529 | 523 | } |
---|
530 | 524 | } |
---|
| 525 | + |
---|
| 526 | + /* Got non-wildcard socket or error on first lookup */ |
---|
| 527 | + if (result) |
---|
| 528 | + goto done; |
---|
| 529 | + |
---|
| 530 | + /* Lookup wildcard sockets */ |
---|
| 531 | + hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum); |
---|
| 532 | + slot2 = hash2 & udptable->mask; |
---|
| 533 | + hslot2 = &udptable->hash2[slot2]; |
---|
| 534 | + |
---|
| 535 | + result = udp4_lib_lookup2(net, saddr, sport, |
---|
| 536 | + htonl(INADDR_ANY), hnum, dif, sdif, |
---|
| 537 | + hslot2, skb); |
---|
| 538 | +done: |
---|
| 539 | + if (IS_ERR(result)) |
---|
| 540 | + return NULL; |
---|
531 | 541 | return result; |
---|
532 | 542 | } |
---|
533 | 543 | EXPORT_SYMBOL_GPL(__udp4_lib_lookup); |
---|
.. | .. |
---|
585 | 595 | (inet->inet_dport != rmt_port && inet->inet_dport) || |
---|
586 | 596 | (inet->inet_rcv_saddr && inet->inet_rcv_saddr != loc_addr) || |
---|
587 | 597 | ipv6_only_sock(sk) || |
---|
588 | | - (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif && |
---|
589 | | - sk->sk_bound_dev_if != sdif)) |
---|
| 598 | + !udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif)) |
---|
590 | 599 | return false; |
---|
591 | 600 | if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, dif, sdif)) |
---|
592 | 601 | return false; |
---|
593 | 602 | return true; |
---|
| 603 | +} |
---|
| 604 | + |
---|
| 605 | +DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key); |
---|
| 606 | +void udp_encap_enable(void) |
---|
| 607 | +{ |
---|
| 608 | + static_branch_inc(&udp_encap_needed_key); |
---|
| 609 | +} |
---|
| 610 | +EXPORT_SYMBOL(udp_encap_enable); |
---|
| 611 | + |
---|
| 612 | +void udp_encap_disable(void) |
---|
| 613 | +{ |
---|
| 614 | + static_branch_dec(&udp_encap_needed_key); |
---|
| 615 | +} |
---|
| 616 | +EXPORT_SYMBOL(udp_encap_disable); |
---|
| 617 | + |
---|
| 618 | +/* Handler for tunnels with arbitrary destination ports: no socket lookup, go |
---|
| 619 | + * through error handlers in encapsulations looking for a match. |
---|
| 620 | + */ |
---|
| 621 | +static int __udp4_lib_err_encap_no_sk(struct sk_buff *skb, u32 info) |
---|
| 622 | +{ |
---|
| 623 | + int i; |
---|
| 624 | + |
---|
| 625 | + for (i = 0; i < MAX_IPTUN_ENCAP_OPS; i++) { |
---|
| 626 | + int (*handler)(struct sk_buff *skb, u32 info); |
---|
| 627 | + const struct ip_tunnel_encap_ops *encap; |
---|
| 628 | + |
---|
| 629 | + encap = rcu_dereference(iptun_encaps[i]); |
---|
| 630 | + if (!encap) |
---|
| 631 | + continue; |
---|
| 632 | + handler = encap->err_handler; |
---|
| 633 | + if (handler && !handler(skb, info)) |
---|
| 634 | + return 0; |
---|
| 635 | + } |
---|
| 636 | + |
---|
| 637 | + return -ENOENT; |
---|
| 638 | +} |
---|
| 639 | + |
---|
| 640 | +/* Try to match ICMP errors to UDP tunnels by looking up a socket without |
---|
| 641 | + * reversing source and destination port: this will match tunnels that force the |
---|
| 642 | + * same destination port on both endpoints (e.g. VXLAN, GENEVE). Note that |
---|
| 643 | + * lwtunnels might actually break this assumption by being configured with |
---|
| 644 | + * different destination ports on endpoints, in this case we won't be able to |
---|
| 645 | + * trace ICMP messages back to them. |
---|
| 646 | + * |
---|
| 647 | + * If this doesn't match any socket, probe tunnels with arbitrary destination |
---|
| 648 | + * ports (e.g. FoU, GUE): there, the receiving socket is useless, as the port |
---|
| 649 | + * we've sent packets to won't necessarily match the local destination port. |
---|
| 650 | + * |
---|
| 651 | + * Then ask the tunnel implementation to match the error against a valid |
---|
| 652 | + * association. |
---|
| 653 | + * |
---|
| 654 | + * Return an error if we can't find a match, the socket if we need further |
---|
| 655 | + * processing, zero otherwise. |
---|
| 656 | + */ |
---|
| 657 | +static struct sock *__udp4_lib_err_encap(struct net *net, |
---|
| 658 | + const struct iphdr *iph, |
---|
| 659 | + struct udphdr *uh, |
---|
| 660 | + struct udp_table *udptable, |
---|
| 661 | + struct sk_buff *skb, u32 info) |
---|
| 662 | +{ |
---|
| 663 | + int network_offset, transport_offset; |
---|
| 664 | + struct sock *sk; |
---|
| 665 | + |
---|
| 666 | + network_offset = skb_network_offset(skb); |
---|
| 667 | + transport_offset = skb_transport_offset(skb); |
---|
| 668 | + |
---|
| 669 | + /* Network header needs to point to the outer IPv4 header inside ICMP */ |
---|
| 670 | + skb_reset_network_header(skb); |
---|
| 671 | + |
---|
| 672 | + /* Transport header needs to point to the UDP header */ |
---|
| 673 | + skb_set_transport_header(skb, iph->ihl << 2); |
---|
| 674 | + |
---|
| 675 | + sk = __udp4_lib_lookup(net, iph->daddr, uh->source, |
---|
| 676 | + iph->saddr, uh->dest, skb->dev->ifindex, 0, |
---|
| 677 | + udptable, NULL); |
---|
| 678 | + if (sk) { |
---|
| 679 | + int (*lookup)(struct sock *sk, struct sk_buff *skb); |
---|
| 680 | + struct udp_sock *up = udp_sk(sk); |
---|
| 681 | + |
---|
| 682 | + lookup = READ_ONCE(up->encap_err_lookup); |
---|
| 683 | + if (!lookup || lookup(sk, skb)) |
---|
| 684 | + sk = NULL; |
---|
| 685 | + } |
---|
| 686 | + |
---|
| 687 | + if (!sk) |
---|
| 688 | + sk = ERR_PTR(__udp4_lib_err_encap_no_sk(skb, info)); |
---|
| 689 | + |
---|
| 690 | + skb_set_transport_header(skb, transport_offset); |
---|
| 691 | + skb_set_network_header(skb, network_offset); |
---|
| 692 | + |
---|
| 693 | + return sk; |
---|
594 | 694 | } |
---|
595 | 695 | |
---|
596 | 696 | /* |
---|
.. | .. |
---|
604 | 704 | * to find the appropriate port. |
---|
605 | 705 | */ |
---|
606 | 706 | |
---|
607 | | -void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) |
---|
| 707 | +int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) |
---|
608 | 708 | { |
---|
609 | 709 | struct inet_sock *inet; |
---|
610 | 710 | const struct iphdr *iph = (const struct iphdr *)skb->data; |
---|
611 | 711 | struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2)); |
---|
612 | 712 | const int type = icmp_hdr(skb)->type; |
---|
613 | 713 | const int code = icmp_hdr(skb)->code; |
---|
| 714 | + bool tunnel = false; |
---|
614 | 715 | struct sock *sk; |
---|
615 | 716 | int harderr; |
---|
616 | 717 | int err; |
---|
617 | 718 | struct net *net = dev_net(skb->dev); |
---|
618 | 719 | |
---|
619 | 720 | sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, |
---|
620 | | - iph->saddr, uh->source, skb->dev->ifindex, 0, |
---|
621 | | - udptable, NULL); |
---|
| 721 | + iph->saddr, uh->source, skb->dev->ifindex, |
---|
| 722 | + inet_sdif(skb), udptable, NULL); |
---|
622 | 723 | if (!sk) { |
---|
623 | | - __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); |
---|
624 | | - return; /* No socket for error */ |
---|
| 724 | + /* No socket for error: try tunnels before discarding */ |
---|
| 725 | + sk = ERR_PTR(-ENOENT); |
---|
| 726 | + if (static_branch_unlikely(&udp_encap_needed_key)) { |
---|
| 727 | + sk = __udp4_lib_err_encap(net, iph, uh, udptable, skb, |
---|
| 728 | + info); |
---|
| 729 | + if (!sk) |
---|
| 730 | + return 0; |
---|
| 731 | + } |
---|
| 732 | + |
---|
| 733 | + if (IS_ERR(sk)) { |
---|
| 734 | + __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); |
---|
| 735 | + return PTR_ERR(sk); |
---|
| 736 | + } |
---|
| 737 | + |
---|
| 738 | + tunnel = true; |
---|
625 | 739 | } |
---|
626 | 740 | |
---|
627 | 741 | err = 0; |
---|
.. | .. |
---|
664 | 778 | * RFC1122: OK. Passes ICMP errors back to application, as per |
---|
665 | 779 | * 4.1.3.3. |
---|
666 | 780 | */ |
---|
| 781 | + if (tunnel) { |
---|
| 782 | + /* ...not for tunnels though: we don't have a sending socket */ |
---|
| 783 | + goto out; |
---|
| 784 | + } |
---|
667 | 785 | if (!inet->recverr) { |
---|
668 | 786 | if (!harderr || sk->sk_state != TCP_ESTABLISHED) |
---|
669 | 787 | goto out; |
---|
.. | .. |
---|
673 | 791 | sk->sk_err = err; |
---|
674 | 792 | sk->sk_error_report(sk); |
---|
675 | 793 | out: |
---|
676 | | - return; |
---|
| 794 | + return 0; |
---|
677 | 795 | } |
---|
678 | 796 | |
---|
679 | | -void udp_err(struct sk_buff *skb, u32 info) |
---|
| 797 | +int udp_err(struct sk_buff *skb, u32 info) |
---|
680 | 798 | { |
---|
681 | | - __udp4_lib_err(skb, info, &udp_table); |
---|
| 799 | + return __udp4_lib_err(skb, info, &udp_table); |
---|
682 | 800 | } |
---|
683 | 801 | |
---|
684 | 802 | /* |
---|
.. | .. |
---|
949 | 1067 | |
---|
950 | 1068 | if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message compatibility */ |
---|
951 | 1069 | return -EOPNOTSUPP; |
---|
| 1070 | + trace_android_rvh_udp_sendmsg(sk); |
---|
952 | 1071 | |
---|
953 | 1072 | getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; |
---|
954 | 1073 | |
---|
.. | .. |
---|
1061 | 1180 | } |
---|
1062 | 1181 | |
---|
1063 | 1182 | if (ipv4_is_multicast(daddr)) { |
---|
1064 | | - if (!ipc.oif) |
---|
| 1183 | + if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif)) |
---|
1065 | 1184 | ipc.oif = inet->mc_index; |
---|
1066 | 1185 | if (!saddr) |
---|
1067 | 1186 | saddr = inet->mc_addr; |
---|
.. | .. |
---|
1070 | 1189 | ipc.oif = inet->uc_index; |
---|
1071 | 1190 | } else if (ipv4_is_lbcast(daddr) && inet->uc_index) { |
---|
1072 | 1191 | /* oif is set, packet is to local broadcast and |
---|
1073 | | - * and uc_index is set. oif is most likely set |
---|
| 1192 | + * uc_index is set. oif is most likely set |
---|
1074 | 1193 | * by sk_bound_dev_if. If uc_index != oif check if the |
---|
1075 | 1194 | * oif is an L3 master and uc_index is an L3 slave. |
---|
1076 | 1195 | * If so, we want to allow the send using the uc_index. |
---|
.. | .. |
---|
1091 | 1210 | |
---|
1092 | 1211 | fl4 = &fl4_stack; |
---|
1093 | 1212 | |
---|
1094 | | - flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, |
---|
| 1213 | + flowi4_init_output(fl4, ipc.oif, ipc.sockc.mark, tos, |
---|
1095 | 1214 | RT_SCOPE_UNIVERSE, sk->sk_protocol, |
---|
1096 | 1215 | flow_flags, |
---|
1097 | 1216 | faddr, saddr, dport, inet->inet_sport, |
---|
1098 | 1217 | sk->sk_uid); |
---|
1099 | 1218 | |
---|
1100 | | - security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); |
---|
| 1219 | + security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); |
---|
1101 | 1220 | rt = ip_route_output_flow(net, fl4, sk); |
---|
1102 | 1221 | if (IS_ERR(rt)) { |
---|
1103 | 1222 | err = PTR_ERR(rt); |
---|
.. | .. |
---|
1254 | 1373 | |
---|
1255 | 1374 | #define UDP_SKB_IS_STATELESS 0x80000000 |
---|
1256 | 1375 | |
---|
| 1376 | +/* all head states (dst, sk, nf conntrack) except skb extensions are |
---|
| 1377 | + * cleared by udp_rcv(). |
---|
| 1378 | + * |
---|
| 1379 | + * We need to preserve secpath, if present, to eventually process |
---|
| 1380 | + * IP_CMSG_PASSSEC at recvmsg() time. |
---|
| 1381 | + * |
---|
| 1382 | + * Other extensions can be cleared. |
---|
| 1383 | + */ |
---|
| 1384 | +static bool udp_try_make_stateless(struct sk_buff *skb) |
---|
| 1385 | +{ |
---|
| 1386 | + if (!skb_has_extensions(skb)) |
---|
| 1387 | + return true; |
---|
| 1388 | + |
---|
| 1389 | + if (!secpath_exists(skb)) { |
---|
| 1390 | + skb_ext_reset(skb); |
---|
| 1391 | + return true; |
---|
| 1392 | + } |
---|
| 1393 | + |
---|
| 1394 | + return false; |
---|
| 1395 | +} |
---|
| 1396 | + |
---|
1257 | 1397 | static void udp_set_dev_scratch(struct sk_buff *skb) |
---|
1258 | 1398 | { |
---|
1259 | 1399 | struct udp_dev_scratch *scratch = udp_skb_scratch(skb); |
---|
.. | .. |
---|
1265 | 1405 | scratch->csum_unnecessary = !!skb_csum_unnecessary(skb); |
---|
1266 | 1406 | scratch->is_linear = !skb_is_nonlinear(skb); |
---|
1267 | 1407 | #endif |
---|
1268 | | - /* all head states execept sp (dst, sk, nf) are always cleared by |
---|
1269 | | - * udp_rcv() and we need to preserve secpath, if present, to eventually |
---|
1270 | | - * process IP_CMSG_PASSSEC at recvmsg() time |
---|
1271 | | - */ |
---|
1272 | | - if (likely(!skb_sec_path(skb))) |
---|
| 1408 | + if (udp_try_make_stateless(skb)) |
---|
1273 | 1409 | scratch->_tsize_state |= UDP_SKB_IS_STATELESS; |
---|
1274 | 1410 | } |
---|
1275 | 1411 | |
---|
.. | .. |
---|
1458 | 1594 | } |
---|
1459 | 1595 | EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb); |
---|
1460 | 1596 | |
---|
1461 | | -void udp_destruct_sock(struct sock *sk) |
---|
| 1597 | +void udp_destruct_common(struct sock *sk) |
---|
1462 | 1598 | { |
---|
1463 | 1599 | /* reclaim completely the forward allocated memory */ |
---|
1464 | 1600 | struct udp_sock *up = udp_sk(sk); |
---|
.. | .. |
---|
1471 | 1607 | kfree_skb(skb); |
---|
1472 | 1608 | } |
---|
1473 | 1609 | udp_rmem_release(sk, total, 0, true); |
---|
| 1610 | +} |
---|
| 1611 | +EXPORT_SYMBOL_GPL(udp_destruct_common); |
---|
1474 | 1612 | |
---|
| 1613 | +static void udp_destruct_sock(struct sock *sk) |
---|
| 1614 | +{ |
---|
| 1615 | + udp_destruct_common(sk); |
---|
1475 | 1616 | inet_sock_destruct(sk); |
---|
1476 | 1617 | } |
---|
1477 | | -EXPORT_SYMBOL_GPL(udp_destruct_sock); |
---|
1478 | 1618 | |
---|
1479 | 1619 | int udp_init_sock(struct sock *sk) |
---|
1480 | 1620 | { |
---|
.. | .. |
---|
1482 | 1622 | sk->sk_destruct = udp_destruct_sock; |
---|
1483 | 1623 | return 0; |
---|
1484 | 1624 | } |
---|
1485 | | -EXPORT_SYMBOL_GPL(udp_init_sock); |
---|
1486 | 1625 | |
---|
1487 | 1626 | void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len) |
---|
1488 | 1627 | { |
---|
.. | .. |
---|
1590 | 1729 | EXPORT_SYMBOL(udp_ioctl); |
---|
1591 | 1730 | |
---|
1592 | 1731 | struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, |
---|
1593 | | - int noblock, int *peeked, int *off, int *err) |
---|
| 1732 | + int noblock, int *off, int *err) |
---|
1594 | 1733 | { |
---|
1595 | 1734 | struct sk_buff_head *sk_queue = &sk->sk_receive_queue; |
---|
1596 | 1735 | struct sk_buff_head *queue; |
---|
.. | .. |
---|
1609 | 1748 | break; |
---|
1610 | 1749 | |
---|
1611 | 1750 | error = -EAGAIN; |
---|
1612 | | - *peeked = 0; |
---|
1613 | 1751 | do { |
---|
1614 | 1752 | spin_lock_bh(&queue->lock); |
---|
1615 | | - skb = __skb_try_recv_from_queue(sk, queue, flags, |
---|
1616 | | - udp_skb_destructor, |
---|
1617 | | - peeked, off, err, |
---|
1618 | | - &last); |
---|
| 1753 | + skb = __skb_try_recv_from_queue(sk, queue, flags, off, |
---|
| 1754 | + err, &last); |
---|
1619 | 1755 | if (skb) { |
---|
| 1756 | + if (!(flags & MSG_PEEK)) |
---|
| 1757 | + udp_skb_destructor(sk, skb); |
---|
1620 | 1758 | spin_unlock_bh(&queue->lock); |
---|
1621 | 1759 | return skb; |
---|
1622 | 1760 | } |
---|
.. | .. |
---|
1634 | 1772 | spin_lock(&sk_queue->lock); |
---|
1635 | 1773 | skb_queue_splice_tail_init(sk_queue, queue); |
---|
1636 | 1774 | |
---|
1637 | | - skb = __skb_try_recv_from_queue(sk, queue, flags, |
---|
1638 | | - udp_skb_dtor_locked, |
---|
1639 | | - peeked, off, err, |
---|
1640 | | - &last); |
---|
| 1775 | + skb = __skb_try_recv_from_queue(sk, queue, flags, off, |
---|
| 1776 | + err, &last); |
---|
| 1777 | + if (skb && !(flags & MSG_PEEK)) |
---|
| 1778 | + udp_skb_dtor_locked(sk, skb); |
---|
1641 | 1779 | spin_unlock(&sk_queue->lock); |
---|
1642 | 1780 | spin_unlock_bh(&queue->lock); |
---|
1643 | 1781 | if (skb) |
---|
.. | .. |
---|
1652 | 1790 | |
---|
1653 | 1791 | /* sk_queue is empty, reader_queue may contain peeked packets */ |
---|
1654 | 1792 | } while (timeo && |
---|
1655 | | - !__skb_wait_for_more_packets(sk, &error, &timeo, |
---|
| 1793 | + !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue, |
---|
| 1794 | + &error, &timeo, |
---|
1656 | 1795 | (struct sk_buff *)sk_queue)); |
---|
1657 | 1796 | |
---|
1658 | 1797 | *err = error; |
---|
.. | .. |
---|
1672 | 1811 | DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); |
---|
1673 | 1812 | struct sk_buff *skb; |
---|
1674 | 1813 | unsigned int ulen, copied; |
---|
1675 | | - int peeked, peeking, off; |
---|
1676 | | - int err; |
---|
| 1814 | + int off, err, peeking = flags & MSG_PEEK; |
---|
1677 | 1815 | int is_udplite = IS_UDPLITE(sk); |
---|
1678 | 1816 | bool checksum_valid = false; |
---|
1679 | 1817 | |
---|
.. | .. |
---|
1681 | 1819 | return ip_recv_error(sk, msg, len, addr_len); |
---|
1682 | 1820 | |
---|
1683 | 1821 | try_again: |
---|
1684 | | - peeking = flags & MSG_PEEK; |
---|
1685 | 1822 | off = sk_peek_offset(sk, flags); |
---|
1686 | | - skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err); |
---|
| 1823 | + skb = __skb_recv_udp(sk, flags, noblock, &off, &err); |
---|
1687 | 1824 | if (!skb) |
---|
1688 | 1825 | return err; |
---|
| 1826 | + trace_android_rvh_udp_recvmsg(sk); |
---|
1689 | 1827 | |
---|
1690 | 1828 | ulen = udp_skb_len(skb); |
---|
1691 | 1829 | copied = len; |
---|
.. | .. |
---|
1721 | 1859 | } |
---|
1722 | 1860 | |
---|
1723 | 1861 | if (unlikely(err)) { |
---|
1724 | | - if (!peeked) { |
---|
| 1862 | + if (!peeking) { |
---|
1725 | 1863 | atomic_inc(&sk->sk_drops); |
---|
1726 | 1864 | UDP_INC_STATS(sock_net(sk), |
---|
1727 | 1865 | UDP_MIB_INERRORS, is_udplite); |
---|
.. | .. |
---|
1730 | 1868 | return err; |
---|
1731 | 1869 | } |
---|
1732 | 1870 | |
---|
1733 | | - if (!peeked) |
---|
| 1871 | + if (!peeking) |
---|
1734 | 1872 | UDP_INC_STATS(sock_net(sk), |
---|
1735 | 1873 | UDP_MIB_INDATAGRAMS, is_udplite); |
---|
1736 | 1874 | |
---|
.. | .. |
---|
1748 | 1886 | BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, |
---|
1749 | 1887 | (struct sockaddr *)sin); |
---|
1750 | 1888 | } |
---|
| 1889 | + |
---|
| 1890 | + if (udp_sk(sk)->gro_enabled) |
---|
| 1891 | + udp_cmsg_recv(msg, sk, skb); |
---|
| 1892 | + |
---|
1751 | 1893 | if (inet->cmsg_flags) |
---|
1752 | 1894 | ip_cmsg_recv_offset(msg, sk, skb, sizeof(struct udphdr), off); |
---|
1753 | 1895 | |
---|
.. | .. |
---|
1797 | 1939 | inet->inet_dport = 0; |
---|
1798 | 1940 | sock_rps_reset_rxhash(sk); |
---|
1799 | 1941 | sk->sk_bound_dev_if = 0; |
---|
1800 | | - if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) |
---|
| 1942 | + if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) { |
---|
1801 | 1943 | inet_reset_saddr(sk); |
---|
| 1944 | + if (sk->sk_prot->rehash && |
---|
| 1945 | + (sk->sk_userlocks & SOCK_BINDPORT_LOCK)) |
---|
| 1946 | + sk->sk_prot->rehash(sk); |
---|
| 1947 | + } |
---|
1802 | 1948 | |
---|
1803 | 1949 | if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) { |
---|
1804 | 1950 | sk->sk_prot->unhash(sk); |
---|
.. | .. |
---|
1887 | 2033 | } |
---|
1888 | 2034 | EXPORT_SYMBOL(udp_lib_rehash); |
---|
1889 | 2035 | |
---|
1890 | | -static void udp_v4_rehash(struct sock *sk) |
---|
| 2036 | +void udp_v4_rehash(struct sock *sk) |
---|
1891 | 2037 | { |
---|
1892 | 2038 | u16 new_hash = ipv4_portaddr_hash(sock_net(sk), |
---|
1893 | 2039 | inet_sk(sk)->inet_rcv_saddr, |
---|
.. | .. |
---|
1924 | 2070 | return 0; |
---|
1925 | 2071 | } |
---|
1926 | 2072 | |
---|
1927 | | -static DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key); |
---|
1928 | | -void udp_encap_enable(void) |
---|
1929 | | -{ |
---|
1930 | | - static_branch_enable(&udp_encap_needed_key); |
---|
1931 | | -} |
---|
1932 | | -EXPORT_SYMBOL(udp_encap_enable); |
---|
1933 | | - |
---|
1934 | 2073 | /* returns: |
---|
1935 | 2074 | * -1: error |
---|
1936 | 2075 | * 0: success |
---|
.. | .. |
---|
1939 | 2078 | * Note that in the success and error cases, the skb is assumed to |
---|
1940 | 2079 | * have either been requeued or freed. |
---|
1941 | 2080 | */ |
---|
1942 | | -static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) |
---|
| 2081 | +static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) |
---|
1943 | 2082 | { |
---|
1944 | 2083 | struct udp_sock *up = udp_sk(sk); |
---|
1945 | 2084 | int is_udplite = IS_UDPLITE(sk); |
---|
.. | .. |
---|
1949 | 2088 | */ |
---|
1950 | 2089 | if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) |
---|
1951 | 2090 | goto drop; |
---|
1952 | | - nf_reset(skb); |
---|
| 2091 | + nf_reset_ct(skb); |
---|
1953 | 2092 | |
---|
1954 | 2093 | if (static_branch_unlikely(&udp_encap_needed_key) && up->encap_type) { |
---|
1955 | 2094 | int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); |
---|
.. | .. |
---|
2042 | 2181 | return -1; |
---|
2043 | 2182 | } |
---|
2044 | 2183 | |
---|
| 2184 | +static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) |
---|
| 2185 | +{ |
---|
| 2186 | + struct sk_buff *next, *segs; |
---|
| 2187 | + int ret; |
---|
| 2188 | + |
---|
| 2189 | + if (likely(!udp_unexpected_gso(sk, skb))) |
---|
| 2190 | + return udp_queue_rcv_one_skb(sk, skb); |
---|
| 2191 | + |
---|
| 2192 | + BUILD_BUG_ON(sizeof(struct udp_skb_cb) > SKB_GSO_CB_OFFSET); |
---|
| 2193 | + __skb_push(skb, -skb_mac_offset(skb)); |
---|
| 2194 | + segs = udp_rcv_segment(sk, skb, true); |
---|
| 2195 | + skb_list_walk_safe(segs, skb, next) { |
---|
| 2196 | + __skb_pull(skb, skb_transport_offset(skb)); |
---|
| 2197 | + ret = udp_queue_rcv_one_skb(sk, skb); |
---|
| 2198 | + if (ret > 0) |
---|
| 2199 | + ip_protocol_deliver_rcu(dev_net(skb->dev), skb, ret); |
---|
| 2200 | + } |
---|
| 2201 | + return 0; |
---|
| 2202 | +} |
---|
| 2203 | + |
---|
2045 | 2204 | /* For TCP sockets, sk_rx_dst is protected by socket lock |
---|
2046 | 2205 | * For UDP, we use xchg() to guard against concurrent changes. |
---|
2047 | 2206 | */ |
---|
.. | .. |
---|
2050 | 2209 | struct dst_entry *old; |
---|
2051 | 2210 | |
---|
2052 | 2211 | if (dst_hold_safe(dst)) { |
---|
2053 | | - old = xchg(&sk->sk_rx_dst, dst); |
---|
| 2212 | + old = xchg((__force struct dst_entry **)&sk->sk_rx_dst, dst); |
---|
2054 | 2213 | dst_release(old); |
---|
2055 | 2214 | return old != dst; |
---|
2056 | 2215 | } |
---|
.. | .. |
---|
2130 | 2289 | |
---|
2131 | 2290 | /* Initialize UDP checksum. If exited with zero value (success), |
---|
2132 | 2291 | * CHECKSUM_UNNECESSARY means, that no more checks are required. |
---|
2133 | | - * Otherwise, csum completion requires chacksumming packet body, |
---|
| 2292 | + * Otherwise, csum completion requires checksumming packet body, |
---|
2134 | 2293 | * including udp header and folding it to skb->csum. |
---|
2135 | 2294 | */ |
---|
2136 | 2295 | static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, |
---|
.. | .. |
---|
2184 | 2343 | int ret; |
---|
2185 | 2344 | |
---|
2186 | 2345 | if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk)) |
---|
2187 | | - skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check, |
---|
2188 | | - inet_compute_pseudo); |
---|
| 2346 | + skb_checksum_try_convert(skb, IPPROTO_UDP, inet_compute_pseudo); |
---|
2189 | 2347 | |
---|
2190 | 2348 | ret = udp_queue_rcv_skb(sk, skb); |
---|
2191 | 2349 | |
---|
.. | .. |
---|
2210 | 2368 | struct rtable *rt = skb_rtable(skb); |
---|
2211 | 2369 | __be32 saddr, daddr; |
---|
2212 | 2370 | struct net *net = dev_net(skb->dev); |
---|
| 2371 | + bool refcounted; |
---|
2213 | 2372 | |
---|
2214 | 2373 | /* |
---|
2215 | 2374 | * Validate the packet. |
---|
.. | .. |
---|
2235 | 2394 | if (udp4_csum_init(skb, uh, proto)) |
---|
2236 | 2395 | goto csum_error; |
---|
2237 | 2396 | |
---|
2238 | | - sk = skb_steal_sock(skb); |
---|
| 2397 | + sk = skb_steal_sock(skb, &refcounted); |
---|
2239 | 2398 | if (sk) { |
---|
2240 | 2399 | struct dst_entry *dst = skb_dst(skb); |
---|
2241 | 2400 | int ret; |
---|
2242 | 2401 | |
---|
2243 | | - if (unlikely(sk->sk_rx_dst != dst)) |
---|
| 2402 | + if (unlikely(rcu_dereference(sk->sk_rx_dst) != dst)) |
---|
2244 | 2403 | udp_sk_rx_dst_set(sk, dst); |
---|
2245 | 2404 | |
---|
2246 | 2405 | ret = udp_unicast_rcv_skb(sk, skb, uh); |
---|
2247 | | - sock_put(sk); |
---|
| 2406 | + if (refcounted) |
---|
| 2407 | + sock_put(sk); |
---|
2248 | 2408 | return ret; |
---|
2249 | 2409 | } |
---|
2250 | 2410 | |
---|
.. | .. |
---|
2258 | 2418 | |
---|
2259 | 2419 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) |
---|
2260 | 2420 | goto drop; |
---|
2261 | | - nf_reset(skb); |
---|
| 2421 | + nf_reset_ct(skb); |
---|
2262 | 2422 | |
---|
2263 | 2423 | /* No socket. Drop packet silently, if checksum is wrong */ |
---|
2264 | 2424 | if (udp_lib_checksum_complete(skb)) |
---|
.. | .. |
---|
2346 | 2506 | struct sock *sk; |
---|
2347 | 2507 | |
---|
2348 | 2508 | udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { |
---|
2349 | | - if (INET_MATCH(sk, net, acookie, rmt_addr, |
---|
2350 | | - loc_addr, ports, dif, sdif)) |
---|
| 2509 | + if (INET_MATCH(net, sk, acookie, ports, dif, sdif)) |
---|
2351 | 2510 | return sk; |
---|
2352 | 2511 | /* Only check first socket in chain */ |
---|
2353 | 2512 | break; |
---|
.. | .. |
---|
2398 | 2557 | |
---|
2399 | 2558 | skb->sk = sk; |
---|
2400 | 2559 | skb->destructor = sock_efree; |
---|
2401 | | - dst = READ_ONCE(sk->sk_rx_dst); |
---|
| 2560 | + dst = rcu_dereference(sk->sk_rx_dst); |
---|
2402 | 2561 | |
---|
2403 | 2562 | if (dst) |
---|
2404 | 2563 | dst = dst_check(dst, 0); |
---|
.. | .. |
---|
2437 | 2596 | sock_set_flag(sk, SOCK_DEAD); |
---|
2438 | 2597 | udp_flush_pending_frames(sk); |
---|
2439 | 2598 | unlock_sock_fast(sk, slow); |
---|
2440 | | - if (static_branch_unlikely(&udp_encap_needed_key) && up->encap_type) { |
---|
2441 | | - void (*encap_destroy)(struct sock *sk); |
---|
2442 | | - encap_destroy = READ_ONCE(up->encap_destroy); |
---|
2443 | | - if (encap_destroy) |
---|
2444 | | - encap_destroy(sk); |
---|
| 2599 | + if (static_branch_unlikely(&udp_encap_needed_key)) { |
---|
| 2600 | + if (up->encap_type) { |
---|
| 2601 | + void (*encap_destroy)(struct sock *sk); |
---|
| 2602 | + encap_destroy = READ_ONCE(up->encap_destroy); |
---|
| 2603 | + if (encap_destroy) |
---|
| 2604 | + encap_destroy(sk); |
---|
| 2605 | + } |
---|
| 2606 | + if (up->encap_enabled) |
---|
| 2607 | + static_branch_dec(&udp_encap_needed_key); |
---|
2445 | 2608 | } |
---|
2446 | 2609 | } |
---|
2447 | 2610 | |
---|
.. | .. |
---|
2449 | 2612 | * Socket option code for UDP |
---|
2450 | 2613 | */ |
---|
2451 | 2614 | int udp_lib_setsockopt(struct sock *sk, int level, int optname, |
---|
2452 | | - char __user *optval, unsigned int optlen, |
---|
| 2615 | + sockptr_t optval, unsigned int optlen, |
---|
2453 | 2616 | int (*push_pending_frames)(struct sock *)) |
---|
2454 | 2617 | { |
---|
2455 | 2618 | struct udp_sock *up = udp_sk(sk); |
---|
.. | .. |
---|
2460 | 2623 | if (optlen < sizeof(int)) |
---|
2461 | 2624 | return -EINVAL; |
---|
2462 | 2625 | |
---|
2463 | | - if (get_user(val, (int __user *)optval)) |
---|
| 2626 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
---|
2464 | 2627 | return -EFAULT; |
---|
2465 | 2628 | |
---|
2466 | 2629 | valbool = val ? 1 : 0; |
---|
.. | .. |
---|
2480 | 2643 | case UDP_ENCAP: |
---|
2481 | 2644 | switch (val) { |
---|
2482 | 2645 | case 0: |
---|
| 2646 | +#ifdef CONFIG_XFRM |
---|
2483 | 2647 | case UDP_ENCAP_ESPINUDP: |
---|
2484 | 2648 | case UDP_ENCAP_ESPINUDP_NON_IKE: |
---|
2485 | | - up->encap_rcv = xfrm4_udp_encap_rcv; |
---|
2486 | | - /* FALLTHROUGH */ |
---|
| 2649 | +#if IS_ENABLED(CONFIG_IPV6) |
---|
| 2650 | + if (sk->sk_family == AF_INET6) |
---|
| 2651 | + up->encap_rcv = ipv6_stub->xfrm6_udp_encap_rcv; |
---|
| 2652 | + else |
---|
| 2653 | +#endif |
---|
| 2654 | + up->encap_rcv = xfrm4_udp_encap_rcv; |
---|
| 2655 | +#endif |
---|
| 2656 | + fallthrough; |
---|
2487 | 2657 | case UDP_ENCAP_L2TPINUDP: |
---|
2488 | 2658 | up->encap_type = val; |
---|
2489 | | - udp_encap_enable(); |
---|
| 2659 | + lock_sock(sk); |
---|
| 2660 | + udp_tunnel_encap_enable(sk->sk_socket); |
---|
| 2661 | + release_sock(sk); |
---|
2490 | 2662 | break; |
---|
2491 | 2663 | default: |
---|
2492 | 2664 | err = -ENOPROTOOPT; |
---|
.. | .. |
---|
2506 | 2678 | if (val < 0 || val > USHRT_MAX) |
---|
2507 | 2679 | return -EINVAL; |
---|
2508 | 2680 | WRITE_ONCE(up->gso_size, val); |
---|
| 2681 | + break; |
---|
| 2682 | + |
---|
| 2683 | + case UDP_GRO: |
---|
| 2684 | + lock_sock(sk); |
---|
| 2685 | + |
---|
| 2686 | + /* when enabling GRO, accept the related GSO packet type */ |
---|
| 2687 | + if (valbool) |
---|
| 2688 | + udp_tunnel_encap_enable(sk->sk_socket); |
---|
| 2689 | + up->gro_enabled = valbool; |
---|
| 2690 | + up->accept_udp_l4 = valbool; |
---|
| 2691 | + release_sock(sk); |
---|
2509 | 2692 | break; |
---|
2510 | 2693 | |
---|
2511 | 2694 | /* |
---|
.. | .. |
---|
2547 | 2730 | } |
---|
2548 | 2731 | EXPORT_SYMBOL(udp_lib_setsockopt); |
---|
2549 | 2732 | |
---|
2550 | | -int udp_setsockopt(struct sock *sk, int level, int optname, |
---|
2551 | | - char __user *optval, unsigned int optlen) |
---|
| 2733 | +int udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, |
---|
| 2734 | + unsigned int optlen) |
---|
2552 | 2735 | { |
---|
2553 | 2736 | if (level == SOL_UDP || level == SOL_UDPLITE) |
---|
2554 | | - return udp_lib_setsockopt(sk, level, optname, optval, optlen, |
---|
| 2737 | + return udp_lib_setsockopt(sk, level, optname, |
---|
| 2738 | + optval, optlen, |
---|
2555 | 2739 | udp_push_pending_frames); |
---|
2556 | 2740 | return ip_setsockopt(sk, level, optname, optval, optlen); |
---|
2557 | 2741 | } |
---|
2558 | | - |
---|
2559 | | -#ifdef CONFIG_COMPAT |
---|
2560 | | -int compat_udp_setsockopt(struct sock *sk, int level, int optname, |
---|
2561 | | - char __user *optval, unsigned int optlen) |
---|
2562 | | -{ |
---|
2563 | | - if (level == SOL_UDP || level == SOL_UDPLITE) |
---|
2564 | | - return udp_lib_setsockopt(sk, level, optname, optval, optlen, |
---|
2565 | | - udp_push_pending_frames); |
---|
2566 | | - return compat_ip_setsockopt(sk, level, optname, optval, optlen); |
---|
2567 | | -} |
---|
2568 | | -#endif |
---|
2569 | 2742 | |
---|
2570 | 2743 | int udp_lib_getsockopt(struct sock *sk, int level, int optname, |
---|
2571 | 2744 | char __user *optval, int __user *optlen) |
---|
.. | .. |
---|
2602 | 2775 | val = READ_ONCE(up->gso_size); |
---|
2603 | 2776 | break; |
---|
2604 | 2777 | |
---|
| 2778 | + case UDP_GRO: |
---|
| 2779 | + val = up->gro_enabled; |
---|
| 2780 | + break; |
---|
| 2781 | + |
---|
2605 | 2782 | /* The following two cannot be changed on UDP sockets, the return is |
---|
2606 | 2783 | * always 0 (which corresponds to the full checksum coverage of UDP). */ |
---|
2607 | 2784 | case UDPLITE_SEND_CSCOV: |
---|
.. | .. |
---|
2632 | 2809 | return ip_getsockopt(sk, level, optname, optval, optlen); |
---|
2633 | 2810 | } |
---|
2634 | 2811 | |
---|
2635 | | -#ifdef CONFIG_COMPAT |
---|
2636 | | -int compat_udp_getsockopt(struct sock *sk, int level, int optname, |
---|
2637 | | - char __user *optval, int __user *optlen) |
---|
2638 | | -{ |
---|
2639 | | - if (level == SOL_UDP || level == SOL_UDPLITE) |
---|
2640 | | - return udp_lib_getsockopt(sk, level, optname, optval, optlen); |
---|
2641 | | - return compat_ip_getsockopt(sk, level, optname, optval, optlen); |
---|
2642 | | -} |
---|
2643 | | -#endif |
---|
2644 | 2812 | /** |
---|
2645 | 2813 | * udp_poll - wait for a UDP event. |
---|
2646 | | - * @file - file struct |
---|
2647 | | - * @sock - socket |
---|
2648 | | - * @wait - poll table |
---|
| 2814 | + * @file: - file struct |
---|
| 2815 | + * @sock: - socket |
---|
| 2816 | + * @wait: - poll table |
---|
2649 | 2817 | * |
---|
2650 | 2818 | * This is same as datagram poll, except for the special case of |
---|
2651 | 2819 | * blocking sockets. If application is using a blocking fd |
---|
.. | .. |
---|
2719 | 2887 | .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min), |
---|
2720 | 2888 | .obj_size = sizeof(struct udp_sock), |
---|
2721 | 2889 | .h.udp_table = &udp_table, |
---|
2722 | | -#ifdef CONFIG_COMPAT |
---|
2723 | | - .compat_setsockopt = compat_udp_setsockopt, |
---|
2724 | | - .compat_getsockopt = compat_udp_getsockopt, |
---|
2725 | | -#endif |
---|
2726 | 2890 | .diag_destroy = udp_abort, |
---|
2727 | 2891 | }; |
---|
2728 | 2892 | EXPORT_SYMBOL(udp_prot); |
---|
.. | .. |
---|
2733 | 2897 | static struct sock *udp_get_first(struct seq_file *seq, int start) |
---|
2734 | 2898 | { |
---|
2735 | 2899 | struct sock *sk; |
---|
2736 | | - struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file)); |
---|
| 2900 | + struct udp_seq_afinfo *afinfo; |
---|
2737 | 2901 | struct udp_iter_state *state = seq->private; |
---|
2738 | 2902 | struct net *net = seq_file_net(seq); |
---|
| 2903 | + |
---|
| 2904 | + if (state->bpf_seq_afinfo) |
---|
| 2905 | + afinfo = state->bpf_seq_afinfo; |
---|
| 2906 | + else |
---|
| 2907 | + afinfo = PDE_DATA(file_inode(seq->file)); |
---|
2739 | 2908 | |
---|
2740 | 2909 | for (state->bucket = start; state->bucket <= afinfo->udp_table->mask; |
---|
2741 | 2910 | ++state->bucket) { |
---|
.. | .. |
---|
2748 | 2917 | sk_for_each(sk, &hslot->head) { |
---|
2749 | 2918 | if (!net_eq(sock_net(sk), net)) |
---|
2750 | 2919 | continue; |
---|
2751 | | - if (sk->sk_family == afinfo->family) |
---|
| 2920 | + if (afinfo->family == AF_UNSPEC || |
---|
| 2921 | + sk->sk_family == afinfo->family) |
---|
2752 | 2922 | goto found; |
---|
2753 | 2923 | } |
---|
2754 | 2924 | spin_unlock_bh(&hslot->lock); |
---|
.. | .. |
---|
2760 | 2930 | |
---|
2761 | 2931 | static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) |
---|
2762 | 2932 | { |
---|
2763 | | - struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file)); |
---|
| 2933 | + struct udp_seq_afinfo *afinfo; |
---|
2764 | 2934 | struct udp_iter_state *state = seq->private; |
---|
2765 | 2935 | struct net *net = seq_file_net(seq); |
---|
2766 | 2936 | |
---|
| 2937 | + if (state->bpf_seq_afinfo) |
---|
| 2938 | + afinfo = state->bpf_seq_afinfo; |
---|
| 2939 | + else |
---|
| 2940 | + afinfo = PDE_DATA(file_inode(seq->file)); |
---|
| 2941 | + |
---|
2767 | 2942 | do { |
---|
2768 | 2943 | sk = sk_next(sk); |
---|
2769 | | - } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != afinfo->family)); |
---|
| 2944 | + } while (sk && (!net_eq(sock_net(sk), net) || |
---|
| 2945 | + (afinfo->family != AF_UNSPEC && |
---|
| 2946 | + sk->sk_family != afinfo->family))); |
---|
2770 | 2947 | |
---|
2771 | 2948 | if (!sk) { |
---|
2772 | 2949 | if (state->bucket <= afinfo->udp_table->mask) |
---|
.. | .. |
---|
2811 | 2988 | |
---|
2812 | 2989 | void udp_seq_stop(struct seq_file *seq, void *v) |
---|
2813 | 2990 | { |
---|
2814 | | - struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file)); |
---|
| 2991 | + struct udp_seq_afinfo *afinfo; |
---|
2815 | 2992 | struct udp_iter_state *state = seq->private; |
---|
| 2993 | + |
---|
| 2994 | + if (state->bpf_seq_afinfo) |
---|
| 2995 | + afinfo = state->bpf_seq_afinfo; |
---|
| 2996 | + else |
---|
| 2997 | + afinfo = PDE_DATA(file_inode(seq->file)); |
---|
2816 | 2998 | |
---|
2817 | 2999 | if (state->bucket <= afinfo->udp_table->mask) |
---|
2818 | 3000 | spin_unlock_bh(&afinfo->udp_table->hash[state->bucket].lock); |
---|
.. | .. |
---|
2830 | 3012 | __u16 srcp = ntohs(inet->inet_sport); |
---|
2831 | 3013 | |
---|
2832 | 3014 | seq_printf(f, "%5d: %08X:%04X %08X:%04X" |
---|
2833 | | - " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d", |
---|
| 3015 | + " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u", |
---|
2834 | 3016 | bucket, src, srcp, dest, destp, sp->sk_state, |
---|
2835 | 3017 | sk_wmem_alloc_get(sp), |
---|
2836 | 3018 | udp_rqueue_get(sp), |
---|
.. | .. |
---|
2856 | 3038 | seq_pad(seq, '\n'); |
---|
2857 | 3039 | return 0; |
---|
2858 | 3040 | } |
---|
| 3041 | + |
---|
| 3042 | +#ifdef CONFIG_BPF_SYSCALL |
---|
| 3043 | +struct bpf_iter__udp { |
---|
| 3044 | + __bpf_md_ptr(struct bpf_iter_meta *, meta); |
---|
| 3045 | + __bpf_md_ptr(struct udp_sock *, udp_sk); |
---|
| 3046 | + uid_t uid __aligned(8); |
---|
| 3047 | + int bucket __aligned(8); |
---|
| 3048 | +}; |
---|
| 3049 | + |
---|
| 3050 | +static int udp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta, |
---|
| 3051 | + struct udp_sock *udp_sk, uid_t uid, int bucket) |
---|
| 3052 | +{ |
---|
| 3053 | + struct bpf_iter__udp ctx; |
---|
| 3054 | + |
---|
| 3055 | + meta->seq_num--; /* skip SEQ_START_TOKEN */ |
---|
| 3056 | + ctx.meta = meta; |
---|
| 3057 | + ctx.udp_sk = udp_sk; |
---|
| 3058 | + ctx.uid = uid; |
---|
| 3059 | + ctx.bucket = bucket; |
---|
| 3060 | + return bpf_iter_run_prog(prog, &ctx); |
---|
| 3061 | +} |
---|
| 3062 | + |
---|
| 3063 | +static int bpf_iter_udp_seq_show(struct seq_file *seq, void *v) |
---|
| 3064 | +{ |
---|
| 3065 | + struct udp_iter_state *state = seq->private; |
---|
| 3066 | + struct bpf_iter_meta meta; |
---|
| 3067 | + struct bpf_prog *prog; |
---|
| 3068 | + struct sock *sk = v; |
---|
| 3069 | + uid_t uid; |
---|
| 3070 | + |
---|
| 3071 | + if (v == SEQ_START_TOKEN) |
---|
| 3072 | + return 0; |
---|
| 3073 | + |
---|
| 3074 | + uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)); |
---|
| 3075 | + meta.seq = seq; |
---|
| 3076 | + prog = bpf_iter_get_info(&meta, false); |
---|
| 3077 | + return udp_prog_seq_show(prog, &meta, v, uid, state->bucket); |
---|
| 3078 | +} |
---|
| 3079 | + |
---|
| 3080 | +static void bpf_iter_udp_seq_stop(struct seq_file *seq, void *v) |
---|
| 3081 | +{ |
---|
| 3082 | + struct bpf_iter_meta meta; |
---|
| 3083 | + struct bpf_prog *prog; |
---|
| 3084 | + |
---|
| 3085 | + if (!v) { |
---|
| 3086 | + meta.seq = seq; |
---|
| 3087 | + prog = bpf_iter_get_info(&meta, true); |
---|
| 3088 | + if (prog) |
---|
| 3089 | + (void)udp_prog_seq_show(prog, &meta, v, 0, 0); |
---|
| 3090 | + } |
---|
| 3091 | + |
---|
| 3092 | + udp_seq_stop(seq, v); |
---|
| 3093 | +} |
---|
| 3094 | + |
---|
| 3095 | +static const struct seq_operations bpf_iter_udp_seq_ops = { |
---|
| 3096 | + .start = udp_seq_start, |
---|
| 3097 | + .next = udp_seq_next, |
---|
| 3098 | + .stop = bpf_iter_udp_seq_stop, |
---|
| 3099 | + .show = bpf_iter_udp_seq_show, |
---|
| 3100 | +}; |
---|
| 3101 | +#endif |
---|
2859 | 3102 | |
---|
2860 | 3103 | const struct seq_operations udp_seq_ops = { |
---|
2861 | 3104 | .start = udp_seq_start, |
---|
.. | .. |
---|
2974 | 3217 | .init = udp_sysctl_init, |
---|
2975 | 3218 | }; |
---|
2976 | 3219 | |
---|
| 3220 | +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) |
---|
| 3221 | +DEFINE_BPF_ITER_FUNC(udp, struct bpf_iter_meta *meta, |
---|
| 3222 | + struct udp_sock *udp_sk, uid_t uid, int bucket) |
---|
| 3223 | + |
---|
| 3224 | +static int bpf_iter_init_udp(void *priv_data, struct bpf_iter_aux_info *aux) |
---|
| 3225 | +{ |
---|
| 3226 | + struct udp_iter_state *st = priv_data; |
---|
| 3227 | + struct udp_seq_afinfo *afinfo; |
---|
| 3228 | + int ret; |
---|
| 3229 | + |
---|
| 3230 | + afinfo = kmalloc(sizeof(*afinfo), GFP_USER | __GFP_NOWARN); |
---|
| 3231 | + if (!afinfo) |
---|
| 3232 | + return -ENOMEM; |
---|
| 3233 | + |
---|
| 3234 | + afinfo->family = AF_UNSPEC; |
---|
| 3235 | + afinfo->udp_table = &udp_table; |
---|
| 3236 | + st->bpf_seq_afinfo = afinfo; |
---|
| 3237 | + ret = bpf_iter_init_seq_net(priv_data, aux); |
---|
| 3238 | + if (ret) |
---|
| 3239 | + kfree(afinfo); |
---|
| 3240 | + return ret; |
---|
| 3241 | +} |
---|
| 3242 | + |
---|
| 3243 | +static void bpf_iter_fini_udp(void *priv_data) |
---|
| 3244 | +{ |
---|
| 3245 | + struct udp_iter_state *st = priv_data; |
---|
| 3246 | + |
---|
| 3247 | + kfree(st->bpf_seq_afinfo); |
---|
| 3248 | + bpf_iter_fini_seq_net(priv_data); |
---|
| 3249 | +} |
---|
| 3250 | + |
---|
| 3251 | +static const struct bpf_iter_seq_info udp_seq_info = { |
---|
| 3252 | + .seq_ops = &bpf_iter_udp_seq_ops, |
---|
| 3253 | + .init_seq_private = bpf_iter_init_udp, |
---|
| 3254 | + .fini_seq_private = bpf_iter_fini_udp, |
---|
| 3255 | + .seq_priv_size = sizeof(struct udp_iter_state), |
---|
| 3256 | +}; |
---|
| 3257 | + |
---|
| 3258 | +static struct bpf_iter_reg udp_reg_info = { |
---|
| 3259 | + .target = "udp", |
---|
| 3260 | + .ctx_arg_info_size = 1, |
---|
| 3261 | + .ctx_arg_info = { |
---|
| 3262 | + { offsetof(struct bpf_iter__udp, udp_sk), |
---|
| 3263 | + PTR_TO_BTF_ID_OR_NULL }, |
---|
| 3264 | + }, |
---|
| 3265 | + .seq_info = &udp_seq_info, |
---|
| 3266 | +}; |
---|
| 3267 | + |
---|
| 3268 | +static void __init bpf_iter_register(void) |
---|
| 3269 | +{ |
---|
| 3270 | + udp_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UDP]; |
---|
| 3271 | + if (bpf_iter_reg_target(&udp_reg_info)) |
---|
| 3272 | + pr_warn("Warning: could not register bpf iterator udp\n"); |
---|
| 3273 | +} |
---|
| 3274 | +#endif |
---|
| 3275 | + |
---|
2977 | 3276 | void __init udp_init(void) |
---|
2978 | 3277 | { |
---|
2979 | 3278 | unsigned long limit; |
---|
.. | .. |
---|
2999 | 3298 | |
---|
3000 | 3299 | if (register_pernet_subsys(&udp_sysctl_ops)) |
---|
3001 | 3300 | panic("UDP: failed to init sysctl parameters.\n"); |
---|
| 3301 | + |
---|
| 3302 | +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) |
---|
| 3303 | + bpf_iter_register(); |
---|
| 3304 | +#endif |
---|
3002 | 3305 | } |
---|