| .. | .. |
|---|
| 1 | +/* SPDX-License-Identifier: GPL-2.0-or-later */ |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
|---|
| 3 | 4 | * operating system. INET is implemented using the BSD Socket |
|---|
| .. | .. |
|---|
| 9 | 10 | * |
|---|
| 10 | 11 | * Authors: Ross Biro |
|---|
| 11 | 12 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> |
|---|
| 12 | | - * |
|---|
| 13 | | - * This program is free software; you can redistribute it and/or |
|---|
| 14 | | - * modify it under the terms of the GNU General Public License |
|---|
| 15 | | - * as published by the Free Software Foundation; either version |
|---|
| 16 | | - * 2 of the License, or (at your option) any later version. |
|---|
| 17 | 13 | */ |
|---|
| 18 | 14 | #ifndef _TCP_H |
|---|
| 19 | 15 | #define _TCP_H |
|---|
| .. | .. |
|---|
| 27 | 23 | #include <linux/cache.h> |
|---|
| 28 | 24 | #include <linux/percpu.h> |
|---|
| 29 | 25 | #include <linux/skbuff.h> |
|---|
| 30 | | -#include <linux/cryptohash.h> |
|---|
| 31 | 26 | #include <linux/kref.h> |
|---|
| 32 | 27 | #include <linux/ktime.h> |
|---|
| 28 | +#include <linux/indirect_call_wrapper.h> |
|---|
| 33 | 29 | |
|---|
| 34 | 30 | #include <net/inet_connection_sock.h> |
|---|
| 35 | 31 | #include <net/inet_timewait_sock.h> |
|---|
| .. | .. |
|---|
| 43 | 39 | #include <net/tcp_states.h> |
|---|
| 44 | 40 | #include <net/inet_ecn.h> |
|---|
| 45 | 41 | #include <net/dst.h> |
|---|
| 42 | +#include <net/mptcp.h> |
|---|
| 46 | 43 | |
|---|
| 47 | 44 | #include <linux/seq_file.h> |
|---|
| 48 | 45 | #include <linux/memcontrol.h> |
|---|
| 49 | 46 | #include <linux/bpf-cgroup.h> |
|---|
| 47 | +#include <linux/siphash.h> |
|---|
| 50 | 48 | |
|---|
| 51 | 49 | extern struct inet_hashinfo tcp_hashinfo; |
|---|
| 52 | 50 | |
|---|
| .. | .. |
|---|
| 67 | 65 | /* Minimal accepted MSS. It is (60+60+8) - (20+20). */ |
|---|
| 68 | 66 | #define TCP_MIN_MSS 88U |
|---|
| 69 | 67 | |
|---|
| 70 | | -/* The least MTU to use for probing */ |
|---|
| 68 | +/* The initial MTU to use for probing */ |
|---|
| 71 | 69 | #define TCP_BASE_MSS 1024 |
|---|
| 72 | 70 | |
|---|
| 73 | 71 | /* probing interval, default to 10 minutes as per RFC4821 */ |
|---|
| .. | .. |
|---|
| 128 | 126 | * to combine FIN-WAIT-2 timeout with |
|---|
| 129 | 127 | * TIME-WAIT timer. |
|---|
| 130 | 128 | */ |
|---|
| 129 | +#define TCP_FIN_TIMEOUT_MAX (120 * HZ) /* max TCP_LINGER2 value (two minutes) */ |
|---|
| 131 | 130 | |
|---|
| 132 | 131 | #define TCP_DELACK_MAX ((unsigned)(HZ/5)) /* maximal time to delay before sending an ACK */ |
|---|
| 133 | 132 | #if HZ >= 100 |
|---|
| .. | .. |
|---|
| 185 | 184 | #define TCPOPT_SACK 5 /* SACK Block */ |
|---|
| 186 | 185 | #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ |
|---|
| 187 | 186 | #define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */ |
|---|
| 187 | +#define TCPOPT_MPTCP 30 /* Multipath TCP (RFC6824) */ |
|---|
| 188 | 188 | #define TCPOPT_FASTOPEN 34 /* Fast open (RFC7413) */ |
|---|
| 189 | 189 | #define TCPOPT_EXP 254 /* Experimental */ |
|---|
| 190 | 190 | /* Magic number to be after the option value for sharing TCP |
|---|
| .. | .. |
|---|
| 315 | 315 | |
|---|
| 316 | 316 | void tcp_tasklet_init(void); |
|---|
| 317 | 317 | |
|---|
| 318 | | -void tcp_v4_err(struct sk_buff *skb, u32); |
|---|
| 318 | +int tcp_v4_err(struct sk_buff *skb, u32); |
|---|
| 319 | 319 | |
|---|
| 320 | 320 | void tcp_shutdown(struct sock *sk, int how); |
|---|
| 321 | 321 | |
|---|
| .. | .. |
|---|
| 331 | 331 | size_t size, int flags); |
|---|
| 332 | 332 | ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, |
|---|
| 333 | 333 | size_t size, int flags); |
|---|
| 334 | +int tcp_send_mss(struct sock *sk, int *size_goal, int flags); |
|---|
| 335 | +void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle, |
|---|
| 336 | + int size_goal); |
|---|
| 334 | 337 | void tcp_release_cb(struct sock *sk); |
|---|
| 335 | 338 | void tcp_wfree(struct sk_buff *skb); |
|---|
| 336 | 339 | void tcp_write_timer_handler(struct sock *sk); |
|---|
| .. | .. |
|---|
| 391 | 394 | bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst); |
|---|
| 392 | 395 | void tcp_close(struct sock *sk, long timeout); |
|---|
| 393 | 396 | void tcp_init_sock(struct sock *sk); |
|---|
| 394 | | -void tcp_init_transfer(struct sock *sk, int bpf_op); |
|---|
| 397 | +void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb); |
|---|
| 395 | 398 | __poll_t tcp_poll(struct file *file, struct socket *sock, |
|---|
| 396 | 399 | struct poll_table_struct *wait); |
|---|
| 397 | 400 | int tcp_getsockopt(struct sock *sk, int level, int optname, |
|---|
| 398 | 401 | char __user *optval, int __user *optlen); |
|---|
| 399 | | -int tcp_setsockopt(struct sock *sk, int level, int optname, |
|---|
| 400 | | - char __user *optval, unsigned int optlen); |
|---|
| 401 | | -int compat_tcp_getsockopt(struct sock *sk, int level, int optname, |
|---|
| 402 | | - char __user *optval, int __user *optlen); |
|---|
| 403 | | -int compat_tcp_setsockopt(struct sock *sk, int level, int optname, |
|---|
| 404 | | - char __user *optval, unsigned int optlen); |
|---|
| 402 | +int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, |
|---|
| 403 | + unsigned int optlen); |
|---|
| 405 | 404 | void tcp_set_keepalive(struct sock *sk, int val); |
|---|
| 406 | 405 | void tcp_syn_ack_timeout(const struct request_sock *req); |
|---|
| 407 | 406 | int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, |
|---|
| 408 | 407 | int flags, int *addr_len); |
|---|
| 409 | 408 | int tcp_set_rcvlowat(struct sock *sk, int val); |
|---|
| 410 | 409 | void tcp_data_ready(struct sock *sk); |
|---|
| 410 | +#ifdef CONFIG_MMU |
|---|
| 411 | 411 | int tcp_mmap(struct file *file, struct socket *sock, |
|---|
| 412 | 412 | struct vm_area_struct *vma); |
|---|
| 413 | +#endif |
|---|
| 413 | 414 | void tcp_parse_options(const struct net *net, const struct sk_buff *skb, |
|---|
| 414 | 415 | struct tcp_options_received *opt_rx, |
|---|
| 415 | 416 | int estab, struct tcp_fastopen_cookie *foc); |
|---|
| 416 | 417 | const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); |
|---|
| 417 | 418 | |
|---|
| 419 | +/* |
|---|
| 420 | + * BPF SKB-less helpers |
|---|
| 421 | + */ |
|---|
| 422 | +u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph, |
|---|
| 423 | + struct tcphdr *th, u32 *cookie); |
|---|
| 424 | +u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, |
|---|
| 425 | + struct tcphdr *th, u32 *cookie); |
|---|
| 426 | +u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops, |
|---|
| 427 | + const struct tcp_request_sock_ops *af_ops, |
|---|
| 428 | + struct sock *sk, struct tcphdr *th); |
|---|
| 418 | 429 | /* |
|---|
| 419 | 430 | * TCP v4 functions exported for the inet6 API |
|---|
| 420 | 431 | */ |
|---|
| .. | .. |
|---|
| 422 | 433 | void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb); |
|---|
| 423 | 434 | void tcp_v4_mtu_reduced(struct sock *sk); |
|---|
| 424 | 435 | void tcp_req_err(struct sock *sk, u32 seq, bool abort); |
|---|
| 436 | +void tcp_ld_RTO_revert(struct sock *sk, u32 seq); |
|---|
| 425 | 437 | int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb); |
|---|
| 426 | 438 | struct sock *tcp_create_openreq_child(const struct sock *sk, |
|---|
| 427 | 439 | struct request_sock *req, |
|---|
| .. | .. |
|---|
| 443 | 455 | struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, |
|---|
| 444 | 456 | struct request_sock *req, |
|---|
| 445 | 457 | struct tcp_fastopen_cookie *foc, |
|---|
| 446 | | - enum tcp_synack_type synack_type); |
|---|
| 458 | + enum tcp_synack_type synack_type, |
|---|
| 459 | + struct sk_buff *syn_skb); |
|---|
| 447 | 460 | int tcp_disconnect(struct sock *sk, int flags); |
|---|
| 448 | 461 | |
|---|
| 449 | 462 | void tcp_finish_connect(struct sock *sk, struct sk_buff *skb); |
|---|
| .. | .. |
|---|
| 457 | 470 | int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, |
|---|
| 458 | 471 | u32 cookie); |
|---|
| 459 | 472 | struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); |
|---|
| 473 | +struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, |
|---|
| 474 | + const struct tcp_request_sock_ops *af_ops, |
|---|
| 475 | + struct sock *sk, struct sk_buff *skb); |
|---|
| 460 | 476 | #ifdef CONFIG_SYN_COOKIES |
|---|
| 461 | 477 | |
|---|
| 462 | 478 | /* Syncookies use a monotonic timer which increments every 60 seconds. |
|---|
| .. | .. |
|---|
| 539 | 555 | u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th, |
|---|
| 540 | 556 | u16 *mssp); |
|---|
| 541 | 557 | __u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss); |
|---|
| 542 | | -u64 cookie_init_timestamp(struct request_sock *req); |
|---|
| 558 | +u64 cookie_init_timestamp(struct request_sock *req, u64 now); |
|---|
| 543 | 559 | bool cookie_timestamp_decode(const struct net *net, |
|---|
| 544 | 560 | struct tcp_options_received *opt); |
|---|
| 545 | 561 | bool cookie_ecn_ok(const struct tcp_options_received *opt, |
|---|
| .. | .. |
|---|
| 594 | 610 | void tcp_reset(struct sock *sk); |
|---|
| 595 | 611 | void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb); |
|---|
| 596 | 612 | void tcp_fin(struct sock *sk); |
|---|
| 613 | +void tcp_check_space(struct sock *sk); |
|---|
| 597 | 614 | |
|---|
| 598 | 615 | /* tcp_timer.c */ |
|---|
| 599 | 616 | void tcp_init_xmit_timers(struct sock *); |
|---|
| .. | .. |
|---|
| 610 | 627 | |
|---|
| 611 | 628 | unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu); |
|---|
| 612 | 629 | unsigned int tcp_current_mss(struct sock *sk); |
|---|
| 630 | +u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when); |
|---|
| 613 | 631 | |
|---|
| 614 | 632 | /* Bound MSS / TSO packet size with the half of the window */ |
|---|
| 615 | 633 | static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) |
|---|
| .. | .. |
|---|
| 646 | 664 | int tcp_mtu_to_mss(struct sock *sk, int pmtu); |
|---|
| 647 | 665 | int tcp_mss_to_mtu(struct sock *sk, int mss); |
|---|
| 648 | 666 | void tcp_mtup_init(struct sock *sk); |
|---|
| 649 | | -void tcp_init_buffer_space(struct sock *sk); |
|---|
| 650 | 667 | |
|---|
| 651 | 668 | static inline void tcp_bound_rto(const struct sock *sk) |
|---|
| 652 | 669 | { |
|---|
| .. | .. |
|---|
| 661 | 678 | |
|---|
| 662 | 679 | static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) |
|---|
| 663 | 680 | { |
|---|
| 681 | + /* mptcp hooks are only on the slow path */ |
|---|
| 682 | + if (sk_is_mptcp((struct sock *)tp)) |
|---|
| 683 | + return; |
|---|
| 684 | + |
|---|
| 664 | 685 | tp->pred_flags = htonl((tp->tcp_header_len << 26) | |
|---|
| 665 | 686 | ntohl(TCP_FLAG_ACK) | |
|---|
| 666 | 687 | snd_wnd); |
|---|
| .. | .. |
|---|
| 686 | 707 | static inline u32 tcp_rto_min(struct sock *sk) |
|---|
| 687 | 708 | { |
|---|
| 688 | 709 | const struct dst_entry *dst = __sk_dst_get(sk); |
|---|
| 689 | | - u32 rto_min = TCP_RTO_MIN; |
|---|
| 710 | + u32 rto_min = inet_csk(sk)->icsk_rto_min; |
|---|
| 690 | 711 | |
|---|
| 691 | 712 | if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) |
|---|
| 692 | 713 | rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN); |
|---|
| .. | .. |
|---|
| 745 | 766 | |
|---|
| 746 | 767 | static inline u64 tcp_clock_ns(void) |
|---|
| 747 | 768 | { |
|---|
| 748 | | - return local_clock(); |
|---|
| 769 | + return ktime_get_ns(); |
|---|
| 749 | 770 | } |
|---|
| 750 | 771 | |
|---|
| 751 | 772 | static inline u64 tcp_clock_us(void) |
|---|
| .. | .. |
|---|
| 759 | 780 | return div_u64(tp->tcp_mstamp, USEC_PER_SEC / TCP_TS_HZ); |
|---|
| 760 | 781 | } |
|---|
| 761 | 782 | |
|---|
| 783 | +/* Convert a nsec timestamp into TCP TSval timestamp (ms based currently) */ |
|---|
| 784 | +static inline u32 tcp_ns_to_ts(u64 ns) |
|---|
| 785 | +{ |
|---|
| 786 | + return div_u64(ns, NSEC_PER_SEC / TCP_TS_HZ); |
|---|
| 787 | +} |
|---|
| 788 | + |
|---|
| 762 | 789 | /* Could use tcp_clock_us() / 1000, but this version uses a single divide */ |
|---|
| 763 | 790 | static inline u32 tcp_time_stamp_raw(void) |
|---|
| 764 | 791 | { |
|---|
| 765 | | - return div_u64(tcp_clock_ns(), NSEC_PER_SEC / TCP_TS_HZ); |
|---|
| 792 | + return tcp_ns_to_ts(tcp_clock_ns()); |
|---|
| 766 | 793 | } |
|---|
| 767 | 794 | |
|---|
| 768 | | - |
|---|
| 769 | | -/* Refresh 1us clock of a TCP socket, |
|---|
| 770 | | - * ensuring monotically increasing values. |
|---|
| 771 | | - */ |
|---|
| 772 | | -static inline void tcp_mstamp_refresh(struct tcp_sock *tp) |
|---|
| 773 | | -{ |
|---|
| 774 | | - u64 val = tcp_clock_us(); |
|---|
| 775 | | - |
|---|
| 776 | | - if (val > tp->tcp_mstamp) |
|---|
| 777 | | - tp->tcp_mstamp = val; |
|---|
| 778 | | -} |
|---|
| 795 | +void tcp_mstamp_refresh(struct tcp_sock *tp); |
|---|
| 779 | 796 | |
|---|
| 780 | 797 | static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0) |
|---|
| 781 | 798 | { |
|---|
| .. | .. |
|---|
| 784 | 801 | |
|---|
| 785 | 802 | static inline u32 tcp_skb_timestamp(const struct sk_buff *skb) |
|---|
| 786 | 803 | { |
|---|
| 787 | | - return div_u64(skb->skb_mstamp, USEC_PER_SEC / TCP_TS_HZ); |
|---|
| 804 | + return tcp_ns_to_ts(skb->skb_mstamp_ns); |
|---|
| 805 | +} |
|---|
| 806 | + |
|---|
| 807 | +/* provide the departure time in us unit */ |
|---|
| 808 | +static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb) |
|---|
| 809 | +{ |
|---|
| 810 | + return div_u64(skb->skb_mstamp_ns, NSEC_PER_USEC); |
|---|
| 788 | 811 | } |
|---|
| 789 | 812 | |
|---|
| 790 | 813 | |
|---|
| .. | .. |
|---|
| 830 | 853 | #define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */ |
|---|
| 831 | 854 | #define TCPCB_LOST 0x04 /* SKB is lost */ |
|---|
| 832 | 855 | #define TCPCB_TAGBITS 0x07 /* All tag bits */ |
|---|
| 833 | | -#define TCPCB_REPAIRED 0x10 /* SKB repaired (no skb_mstamp) */ |
|---|
| 856 | +#define TCPCB_REPAIRED 0x10 /* SKB repaired (no skb_mstamp_ns) */ |
|---|
| 834 | 857 | #define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */ |
|---|
| 835 | 858 | #define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS| \ |
|---|
| 836 | 859 | TCPCB_REPAIRED) |
|---|
| .. | .. |
|---|
| 875 | 898 | TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb); |
|---|
| 876 | 899 | } |
|---|
| 877 | 900 | |
|---|
| 901 | +static inline bool tcp_skb_bpf_ingress(const struct sk_buff *skb) |
|---|
| 902 | +{ |
|---|
| 903 | + return TCP_SKB_CB(skb)->bpf.flags & BPF_F_INGRESS; |
|---|
| 904 | +} |
|---|
| 905 | + |
|---|
| 906 | +static inline struct sock *tcp_skb_bpf_redirect_fetch(struct sk_buff *skb) |
|---|
| 907 | +{ |
|---|
| 908 | + return TCP_SKB_CB(skb)->bpf.sk_redir; |
|---|
| 909 | +} |
|---|
| 910 | + |
|---|
| 911 | +static inline void tcp_skb_bpf_redirect_clear(struct sk_buff *skb) |
|---|
| 912 | +{ |
|---|
| 913 | + TCP_SKB_CB(skb)->bpf.sk_redir = NULL; |
|---|
| 914 | +} |
|---|
| 915 | + |
|---|
| 916 | +extern const struct inet_connection_sock_af_ops ipv4_specific; |
|---|
| 917 | + |
|---|
| 878 | 918 | #if IS_ENABLED(CONFIG_IPV6) |
|---|
| 879 | 919 | /* This is the variant of inet6_iif() that must be used by TCP, |
|---|
| 880 | 920 | * as TCP moves IP6CB into a different location in skb->cb[] |
|---|
| .. | .. |
|---|
| 900 | 940 | #endif |
|---|
| 901 | 941 | return 0; |
|---|
| 902 | 942 | } |
|---|
| 903 | | -#endif |
|---|
| 904 | 943 | |
|---|
| 905 | | -static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb) |
|---|
| 906 | | -{ |
|---|
| 907 | | -#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) |
|---|
| 908 | | - if (!net->ipv4.sysctl_tcp_l3mdev_accept && |
|---|
| 909 | | - skb && ipv4_l3mdev_skb(IPCB(skb)->flags)) |
|---|
| 910 | | - return true; |
|---|
| 944 | +extern const struct inet_connection_sock_af_ops ipv6_specific; |
|---|
| 945 | + |
|---|
| 946 | +INDIRECT_CALLABLE_DECLARE(void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)); |
|---|
| 947 | +INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *skb)); |
|---|
| 948 | +void tcp_v6_early_demux(struct sk_buff *skb); |
|---|
| 949 | + |
|---|
| 911 | 950 | #endif |
|---|
| 912 | | - return false; |
|---|
| 913 | | -} |
|---|
| 914 | 951 | |
|---|
| 915 | 952 | /* TCP_SKB_CB reference means this can not be used from early demux */ |
|---|
| 916 | 953 | static inline int tcp_v4_sdif(struct sk_buff *skb) |
|---|
| .. | .. |
|---|
| 951 | 988 | return likely(!TCP_SKB_CB(skb)->eor); |
|---|
| 952 | 989 | } |
|---|
| 953 | 990 | |
|---|
| 991 | +static inline bool tcp_skb_can_collapse(const struct sk_buff *to, |
|---|
| 992 | + const struct sk_buff *from) |
|---|
| 993 | +{ |
|---|
| 994 | + return likely(tcp_skb_can_collapse_to(to) && |
|---|
| 995 | + mptcp_skb_can_collapse(to, from)); |
|---|
| 996 | +} |
|---|
| 997 | + |
|---|
| 954 | 998 | /* Events passed to congestion control interface */ |
|---|
| 955 | 999 | enum tcp_ca_event { |
|---|
| 956 | 1000 | CA_EVENT_TX_START, /* first transmit when no packets in flight */ |
|---|
| .. | .. |
|---|
| 981 | 1025 | #define TCP_CONG_NON_RESTRICTED 0x1 |
|---|
| 982 | 1026 | /* Requires ECN/ECT set on all packets */ |
|---|
| 983 | 1027 | #define TCP_CONG_NEEDS_ECN 0x2 |
|---|
| 1028 | +#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN) |
|---|
| 984 | 1029 | |
|---|
| 985 | 1030 | union tcp_cc_info; |
|---|
| 986 | 1031 | |
|---|
| .. | .. |
|---|
| 1066 | 1111 | void tcp_get_allowed_congestion_control(char *buf, size_t len); |
|---|
| 1067 | 1112 | int tcp_set_allowed_congestion_control(char *allowed); |
|---|
| 1068 | 1113 | int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, |
|---|
| 1069 | | - bool reinit, bool cap_net_admin); |
|---|
| 1114 | + bool cap_net_admin); |
|---|
| 1070 | 1115 | u32 tcp_slow_start(struct tcp_sock *tp, u32 acked); |
|---|
| 1071 | 1116 | void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked); |
|---|
| 1072 | 1117 | |
|---|
| .. | .. |
|---|
| 1075 | 1120 | void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked); |
|---|
| 1076 | 1121 | extern struct tcp_congestion_ops tcp_reno; |
|---|
| 1077 | 1122 | |
|---|
| 1123 | +struct tcp_congestion_ops *tcp_ca_find(const char *name); |
|---|
| 1078 | 1124 | struct tcp_congestion_ops *tcp_ca_find_key(u32 key); |
|---|
| 1079 | 1125 | u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca); |
|---|
| 1080 | 1126 | #ifdef CONFIG_INET |
|---|
| .. | .. |
|---|
| 1127 | 1173 | */ |
|---|
| 1128 | 1174 | static inline int tcp_is_sack(const struct tcp_sock *tp) |
|---|
| 1129 | 1175 | { |
|---|
| 1130 | | - return tp->rx_opt.sack_ok; |
|---|
| 1176 | + return likely(tp->rx_opt.sack_ok); |
|---|
| 1131 | 1177 | } |
|---|
| 1132 | 1178 | |
|---|
| 1133 | 1179 | static inline bool tcp_is_reno(const struct tcp_sock *tp) |
|---|
| .. | .. |
|---|
| 1230 | 1276 | { |
|---|
| 1231 | 1277 | const struct tcp_sock *tp = tcp_sk(sk); |
|---|
| 1232 | 1278 | |
|---|
| 1279 | + if (tp->is_cwnd_limited) |
|---|
| 1280 | + return true; |
|---|
| 1281 | + |
|---|
| 1233 | 1282 | /* If in slow start, ensure cwnd grows to twice what was ACKed. */ |
|---|
| 1234 | 1283 | if (tcp_in_slow_start(tp)) |
|---|
| 1235 | 1284 | return tp->snd_cwnd < 2 * tp->max_packets_out; |
|---|
| 1236 | 1285 | |
|---|
| 1237 | | - return tp->is_cwnd_limited; |
|---|
| 1286 | + return false; |
|---|
| 1238 | 1287 | } |
|---|
| 1239 | 1288 | |
|---|
| 1240 | 1289 | /* BBR congestion control needs pacing. |
|---|
| .. | .. |
|---|
| 1248 | 1297 | return smp_load_acquire(&sk->sk_pacing_status) == SK_PACING_NEEDED; |
|---|
| 1249 | 1298 | } |
|---|
| 1250 | 1299 | |
|---|
| 1300 | +/* Estimates in how many jiffies next packet for this flow can be sent. |
|---|
| 1301 | + * Scheduling a retransmit timer too early would be silly. |
|---|
| 1302 | + */ |
|---|
| 1303 | +static inline unsigned long tcp_pacing_delay(const struct sock *sk) |
|---|
| 1304 | +{ |
|---|
| 1305 | + s64 delay = tcp_sk(sk)->tcp_wstamp_ns - tcp_sk(sk)->tcp_clock_cache; |
|---|
| 1306 | + |
|---|
| 1307 | + return delay > 0 ? nsecs_to_jiffies(delay) : 0; |
|---|
| 1308 | +} |
|---|
| 1309 | + |
|---|
| 1310 | +static inline void tcp_reset_xmit_timer(struct sock *sk, |
|---|
| 1311 | + const int what, |
|---|
| 1312 | + unsigned long when, |
|---|
| 1313 | + const unsigned long max_when) |
|---|
| 1314 | +{ |
|---|
| 1315 | + inet_csk_reset_xmit_timer(sk, what, when + tcp_pacing_delay(sk), |
|---|
| 1316 | + max_when); |
|---|
| 1317 | +} |
|---|
| 1318 | + |
|---|
| 1251 | 1319 | /* Something is really bad, we could not queue an additional packet, |
|---|
| 1252 | | - * because qdisc is full or receiver sent a 0 window. |
|---|
| 1320 | + * because qdisc is full or receiver sent a 0 window, or we are paced. |
|---|
| 1253 | 1321 | * We do not want to add fuel to the fire, or abort too early, |
|---|
| 1254 | 1322 | * so make sure the timer we arm now is at least 200ms in the future, |
|---|
| 1255 | 1323 | * regardless of current icsk_rto value (as it could be ~2ms) |
|---|
| .. | .. |
|---|
| 1271 | 1339 | static inline void tcp_check_probe_timer(struct sock *sk) |
|---|
| 1272 | 1340 | { |
|---|
| 1273 | 1341 | if (!tcp_sk(sk)->packets_out && !inet_csk(sk)->icsk_pending) |
|---|
| 1274 | | - inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, |
|---|
| 1275 | | - tcp_probe0_base(sk), TCP_RTO_MAX); |
|---|
| 1342 | + tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, |
|---|
| 1343 | + tcp_probe0_base(sk), TCP_RTO_MAX); |
|---|
| 1276 | 1344 | } |
|---|
| 1277 | 1345 | |
|---|
| 1278 | 1346 | static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq) |
|---|
| .. | .. |
|---|
| 1291 | 1359 | static inline __sum16 tcp_v4_check(int len, __be32 saddr, |
|---|
| 1292 | 1360 | __be32 daddr, __wsum base) |
|---|
| 1293 | 1361 | { |
|---|
| 1294 | | - return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base); |
|---|
| 1295 | | -} |
|---|
| 1296 | | - |
|---|
| 1297 | | -static inline __sum16 __tcp_checksum_complete(struct sk_buff *skb) |
|---|
| 1298 | | -{ |
|---|
| 1299 | | - return __skb_checksum_complete(skb); |
|---|
| 1362 | + return csum_tcpudp_magic(saddr, daddr, len, IPPROTO_TCP, base); |
|---|
| 1300 | 1363 | } |
|---|
| 1301 | 1364 | |
|---|
| 1302 | 1365 | static inline bool tcp_checksum_complete(struct sk_buff *skb) |
|---|
| 1303 | 1366 | { |
|---|
| 1304 | 1367 | return !skb_csum_unnecessary(skb) && |
|---|
| 1305 | | - __tcp_checksum_complete(skb); |
|---|
| 1368 | + __skb_checksum_complete(skb); |
|---|
| 1306 | 1369 | } |
|---|
| 1307 | 1370 | |
|---|
| 1308 | 1371 | bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb); |
|---|
| 1309 | 1372 | int tcp_filter(struct sock *sk, struct sk_buff *skb); |
|---|
| 1310 | | - |
|---|
| 1311 | | -#undef STATE_TRACE |
|---|
| 1312 | | - |
|---|
| 1313 | | -#ifdef STATE_TRACE |
|---|
| 1314 | | -static const char *statename[]={ |
|---|
| 1315 | | - "Unused","Established","Syn Sent","Syn Recv", |
|---|
| 1316 | | - "Fin Wait 1","Fin Wait 2","Time Wait", "Close", |
|---|
| 1317 | | - "Close Wait","Last ACK","Listen","Closing" |
|---|
| 1318 | | -}; |
|---|
| 1319 | | -#endif |
|---|
| 1320 | 1373 | void tcp_set_state(struct sock *sk, int state); |
|---|
| 1321 | | - |
|---|
| 1322 | 1374 | void tcp_done(struct sock *sk); |
|---|
| 1323 | | - |
|---|
| 1324 | 1375 | int tcp_abort(struct sock *sk, int err); |
|---|
| 1325 | 1376 | |
|---|
| 1326 | 1377 | static inline void tcp_sack_reset(struct tcp_options_received *rx_opt) |
|---|
| .. | .. |
|---|
| 1329 | 1380 | rx_opt->num_sacks = 0; |
|---|
| 1330 | 1381 | } |
|---|
| 1331 | 1382 | |
|---|
| 1332 | | -u32 tcp_default_init_rwnd(u32 mss); |
|---|
| 1333 | 1383 | void tcp_cwnd_restart(struct sock *sk, s32 delta); |
|---|
| 1334 | 1384 | |
|---|
| 1335 | 1385 | static inline void tcp_slow_start_after_idle_check(struct sock *sk) |
|---|
| .. | .. |
|---|
| 1338 | 1388 | struct tcp_sock *tp = tcp_sk(sk); |
|---|
| 1339 | 1389 | s32 delta; |
|---|
| 1340 | 1390 | |
|---|
| 1341 | | - if (!sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle || tp->packets_out || |
|---|
| 1342 | | - ca_ops->cong_control) |
|---|
| 1391 | + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) || |
|---|
| 1392 | + tp->packets_out || ca_ops->cong_control) |
|---|
| 1343 | 1393 | return; |
|---|
| 1344 | 1394 | delta = tcp_jiffies32 - tp->lsndtime; |
|---|
| 1345 | 1395 | if (delta > inet_csk(sk)->icsk_rto) |
|---|
| .. | .. |
|---|
| 1354 | 1404 | |
|---|
| 1355 | 1405 | static inline int tcp_win_from_space(const struct sock *sk, int space) |
|---|
| 1356 | 1406 | { |
|---|
| 1357 | | - int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale; |
|---|
| 1407 | + int tcp_adv_win_scale = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale); |
|---|
| 1358 | 1408 | |
|---|
| 1359 | 1409 | return tcp_adv_win_scale <= 0 ? |
|---|
| 1360 | 1410 | (space>>(-tcp_adv_win_scale)) : |
|---|
| .. | .. |
|---|
| 1364 | 1414 | /* Note: caller must be prepared to deal with negative returns */ |
|---|
| 1365 | 1415 | static inline int tcp_space(const struct sock *sk) |
|---|
| 1366 | 1416 | { |
|---|
| 1367 | | - return tcp_win_from_space(sk, sk->sk_rcvbuf - sk->sk_backlog.len - |
|---|
| 1417 | + return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - |
|---|
| 1418 | + READ_ONCE(sk->sk_backlog.len) - |
|---|
| 1368 | 1419 | atomic_read(&sk->sk_rmem_alloc)); |
|---|
| 1369 | 1420 | } |
|---|
| 1370 | 1421 | |
|---|
| 1371 | 1422 | static inline int tcp_full_space(const struct sock *sk) |
|---|
| 1372 | 1423 | { |
|---|
| 1373 | | - return tcp_win_from_space(sk, sk->sk_rcvbuf); |
|---|
| 1424 | + return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf)); |
|---|
| 1374 | 1425 | } |
|---|
| 1426 | + |
|---|
| 1427 | +void tcp_cleanup_rbuf(struct sock *sk, int copied); |
|---|
| 1375 | 1428 | |
|---|
| 1376 | 1429 | /* We provision sk_rcvbuf around 200% of sk_rcvlowat. |
|---|
| 1377 | 1430 | * If 87.5 % (7/8) of the space has been consumed, we want to override |
|---|
| .. | .. |
|---|
| 1402 | 1455 | { |
|---|
| 1403 | 1456 | struct net *net = sock_net((struct sock *)tp); |
|---|
| 1404 | 1457 | |
|---|
| 1405 | | - return tp->keepalive_intvl ? : net->ipv4.sysctl_tcp_keepalive_intvl; |
|---|
| 1458 | + return tp->keepalive_intvl ? : |
|---|
| 1459 | + READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl); |
|---|
| 1406 | 1460 | } |
|---|
| 1407 | 1461 | |
|---|
| 1408 | 1462 | static inline int keepalive_time_when(const struct tcp_sock *tp) |
|---|
| 1409 | 1463 | { |
|---|
| 1410 | 1464 | struct net *net = sock_net((struct sock *)tp); |
|---|
| 1411 | 1465 | |
|---|
| 1412 | | - return tp->keepalive_time ? : net->ipv4.sysctl_tcp_keepalive_time; |
|---|
| 1466 | + return tp->keepalive_time ? : |
|---|
| 1467 | + READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time); |
|---|
| 1413 | 1468 | } |
|---|
| 1414 | 1469 | |
|---|
| 1415 | 1470 | static inline int keepalive_probes(const struct tcp_sock *tp) |
|---|
| 1416 | 1471 | { |
|---|
| 1417 | 1472 | struct net *net = sock_net((struct sock *)tp); |
|---|
| 1418 | 1473 | |
|---|
| 1419 | | - return tp->keepalive_probes ? : net->ipv4.sysctl_tcp_keepalive_probes; |
|---|
| 1474 | + return tp->keepalive_probes ? : |
|---|
| 1475 | + READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes); |
|---|
| 1420 | 1476 | } |
|---|
| 1421 | 1477 | |
|---|
| 1422 | 1478 | static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) |
|---|
| .. | .. |
|---|
| 1429 | 1485 | |
|---|
| 1430 | 1486 | static inline int tcp_fin_time(const struct sock *sk) |
|---|
| 1431 | 1487 | { |
|---|
| 1432 | | - int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout; |
|---|
| 1488 | + int fin_timeout = tcp_sk(sk)->linger2 ? : |
|---|
| 1489 | + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fin_timeout); |
|---|
| 1433 | 1490 | const int rto = inet_csk(sk)->icsk_rto; |
|---|
| 1434 | 1491 | |
|---|
| 1435 | 1492 | if (fin_timeout < (rto << 2) - (rto >> 1)) |
|---|
| .. | .. |
|---|
| 1516 | 1573 | struct hlist_node node; |
|---|
| 1517 | 1574 | u8 keylen; |
|---|
| 1518 | 1575 | u8 family; /* AF_INET or AF_INET6 */ |
|---|
| 1519 | | - union tcp_md5_addr addr; |
|---|
| 1520 | 1576 | u8 prefixlen; |
|---|
| 1577 | + union tcp_md5_addr addr; |
|---|
| 1578 | + int l3index; /* set if key added with L3 scope */ |
|---|
| 1521 | 1579 | u8 key[TCP_MD5SIG_MAXKEYLEN]; |
|---|
| 1522 | 1580 | struct rcu_head rcu; |
|---|
| 1523 | 1581 | }; |
|---|
| .. | .. |
|---|
| 1561 | 1619 | int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, |
|---|
| 1562 | 1620 | const struct sock *sk, const struct sk_buff *skb); |
|---|
| 1563 | 1621 | int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, |
|---|
| 1564 | | - int family, u8 prefixlen, const u8 *newkey, u8 newkeylen, |
|---|
| 1565 | | - gfp_t gfp); |
|---|
| 1622 | + int family, u8 prefixlen, int l3index, |
|---|
| 1623 | + const u8 *newkey, u8 newkeylen, gfp_t gfp); |
|---|
| 1566 | 1624 | int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, |
|---|
| 1567 | | - int family, u8 prefixlen); |
|---|
| 1625 | + int family, u8 prefixlen, int l3index); |
|---|
| 1568 | 1626 | struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, |
|---|
| 1569 | 1627 | const struct sock *addr_sk); |
|---|
| 1570 | 1628 | |
|---|
| 1571 | 1629 | #ifdef CONFIG_TCP_MD5SIG |
|---|
| 1572 | | -struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk, |
|---|
| 1573 | | - const union tcp_md5_addr *addr, |
|---|
| 1574 | | - int family); |
|---|
| 1630 | +#include <linux/jump_label.h> |
|---|
| 1631 | +extern struct static_key_false tcp_md5_needed; |
|---|
| 1632 | +struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index, |
|---|
| 1633 | + const union tcp_md5_addr *addr, |
|---|
| 1634 | + int family); |
|---|
| 1635 | +static inline struct tcp_md5sig_key * |
|---|
| 1636 | +tcp_md5_do_lookup(const struct sock *sk, int l3index, |
|---|
| 1637 | + const union tcp_md5_addr *addr, int family) |
|---|
| 1638 | +{ |
|---|
| 1639 | + if (!static_branch_unlikely(&tcp_md5_needed)) |
|---|
| 1640 | + return NULL; |
|---|
| 1641 | + return __tcp_md5_do_lookup(sk, l3index, addr, family); |
|---|
| 1642 | +} |
|---|
| 1643 | + |
|---|
| 1575 | 1644 | #define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key) |
|---|
| 1576 | 1645 | #else |
|---|
| 1577 | | -static inline struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk, |
|---|
| 1578 | | - const union tcp_md5_addr *addr, |
|---|
| 1579 | | - int family) |
|---|
| 1646 | +static inline struct tcp_md5sig_key * |
|---|
| 1647 | +tcp_md5_do_lookup(const struct sock *sk, int l3index, |
|---|
| 1648 | + const union tcp_md5_addr *addr, int family) |
|---|
| 1580 | 1649 | { |
|---|
| 1581 | 1650 | return NULL; |
|---|
| 1582 | 1651 | } |
|---|
| .. | .. |
|---|
| 1608 | 1677 | struct msghdr *data; /* data in MSG_FASTOPEN */ |
|---|
| 1609 | 1678 | size_t size; |
|---|
| 1610 | 1679 | int copied; /* queued in tcp_connect() */ |
|---|
| 1680 | + struct ubuf_info *uarg; |
|---|
| 1611 | 1681 | }; |
|---|
| 1612 | 1682 | void tcp_free_fastopen_req(struct tcp_sock *tp); |
|---|
| 1613 | 1683 | void tcp_fastopen_destroy_cipher(struct sock *sk); |
|---|
| 1614 | 1684 | void tcp_fastopen_ctx_destroy(struct net *net); |
|---|
| 1615 | 1685 | int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, |
|---|
| 1616 | | - void *key, unsigned int len); |
|---|
| 1686 | + void *primary_key, void *backup_key); |
|---|
| 1687 | +int tcp_fastopen_get_cipher(struct net *net, struct inet_connection_sock *icsk, |
|---|
| 1688 | + u64 *key); |
|---|
| 1617 | 1689 | void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb); |
|---|
| 1618 | 1690 | struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, |
|---|
| 1619 | 1691 | struct request_sock *req, |
|---|
| .. | .. |
|---|
| 1623 | 1695 | bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, |
|---|
| 1624 | 1696 | struct tcp_fastopen_cookie *cookie); |
|---|
| 1625 | 1697 | bool tcp_fastopen_defer_connect(struct sock *sk, int *err); |
|---|
| 1626 | | -#define TCP_FASTOPEN_KEY_LENGTH 16 |
|---|
| 1698 | +#define TCP_FASTOPEN_KEY_LENGTH sizeof(siphash_key_t) |
|---|
| 1699 | +#define TCP_FASTOPEN_KEY_MAX 2 |
|---|
| 1700 | +#define TCP_FASTOPEN_KEY_BUF_LENGTH \ |
|---|
| 1701 | + (TCP_FASTOPEN_KEY_LENGTH * TCP_FASTOPEN_KEY_MAX) |
|---|
| 1627 | 1702 | |
|---|
| 1628 | 1703 | /* Fastopen key context */ |
|---|
| 1629 | 1704 | struct tcp_fastopen_context { |
|---|
| 1630 | | - struct crypto_cipher *tfm; |
|---|
| 1631 | | - __u8 key[TCP_FASTOPEN_KEY_LENGTH]; |
|---|
| 1632 | | - struct rcu_head rcu; |
|---|
| 1705 | + siphash_key_t key[TCP_FASTOPEN_KEY_MAX]; |
|---|
| 1706 | + int num; |
|---|
| 1707 | + struct rcu_head rcu; |
|---|
| 1633 | 1708 | }; |
|---|
| 1634 | 1709 | |
|---|
| 1635 | 1710 | extern unsigned int sysctl_tcp_fastopen_blackhole_timeout; |
|---|
| .. | .. |
|---|
| 1637 | 1712 | bool tcp_fastopen_active_should_disable(struct sock *sk); |
|---|
| 1638 | 1713 | void tcp_fastopen_active_disable_ofo_check(struct sock *sk); |
|---|
| 1639 | 1714 | void tcp_fastopen_active_detect_blackhole(struct sock *sk, bool expired); |
|---|
| 1715 | + |
|---|
| 1716 | +/* Caller needs to wrap with rcu_read_(un)lock() */ |
|---|
| 1717 | +static inline |
|---|
| 1718 | +struct tcp_fastopen_context *tcp_fastopen_get_ctx(const struct sock *sk) |
|---|
| 1719 | +{ |
|---|
| 1720 | + struct tcp_fastopen_context *ctx; |
|---|
| 1721 | + |
|---|
| 1722 | + ctx = rcu_dereference(inet_csk(sk)->icsk_accept_queue.fastopenq.ctx); |
|---|
| 1723 | + if (!ctx) |
|---|
| 1724 | + ctx = rcu_dereference(sock_net(sk)->ipv4.tcp_fastopen_ctx); |
|---|
| 1725 | + return ctx; |
|---|
| 1726 | +} |
|---|
| 1727 | + |
|---|
| 1728 | +static inline |
|---|
| 1729 | +bool tcp_fastopen_cookie_match(const struct tcp_fastopen_cookie *foc, |
|---|
| 1730 | + const struct tcp_fastopen_cookie *orig) |
|---|
| 1731 | +{ |
|---|
| 1732 | + if (orig->len == TCP_FASTOPEN_COOKIE_SIZE && |
|---|
| 1733 | + orig->len == foc->len && |
|---|
| 1734 | + !memcmp(orig->val, foc->val, foc->len)) |
|---|
| 1735 | + return true; |
|---|
| 1736 | + return false; |
|---|
| 1737 | +} |
|---|
| 1738 | + |
|---|
| 1739 | +static inline |
|---|
| 1740 | +int tcp_fastopen_context_len(const struct tcp_fastopen_context *ctx) |
|---|
| 1741 | +{ |
|---|
| 1742 | + return ctx->num; |
|---|
| 1743 | +} |
|---|
| 1640 | 1744 | |
|---|
| 1641 | 1745 | /* Latencies incurred by various limits for a sender. They are |
|---|
| 1642 | 1746 | * chronograph-like stats that are mutually exclusive. |
|---|
| .. | .. |
|---|
| 1705 | 1809 | return skb_queue_is_last(&sk->sk_write_queue, skb); |
|---|
| 1706 | 1810 | } |
|---|
| 1707 | 1811 | |
|---|
| 1812 | +/** |
|---|
| 1813 | + * tcp_write_queue_empty - test if any payload (or FIN) is available in write queue |
|---|
| 1814 | + * @sk: socket |
|---|
| 1815 | + * |
|---|
| 1816 | + * Since the write queue can have a temporary empty skb in it, |
|---|
| 1817 | + * we must not use "return skb_queue_empty(&sk->sk_write_queue)" |
|---|
| 1818 | + */ |
|---|
| 1708 | 1819 | static inline bool tcp_write_queue_empty(const struct sock *sk) |
|---|
| 1709 | 1820 | { |
|---|
| 1710 | | - return skb_queue_empty(&sk->sk_write_queue); |
|---|
| 1821 | + const struct tcp_sock *tp = tcp_sk(sk); |
|---|
| 1822 | + |
|---|
| 1823 | + return tp->write_seq == tp->snd_nxt; |
|---|
| 1711 | 1824 | } |
|---|
| 1712 | 1825 | |
|---|
| 1713 | 1826 | static inline bool tcp_rtx_queue_empty(const struct sock *sk) |
|---|
| .. | .. |
|---|
| 1720 | 1833 | return tcp_rtx_queue_empty(sk) && tcp_write_queue_empty(sk); |
|---|
| 1721 | 1834 | } |
|---|
| 1722 | 1835 | |
|---|
| 1723 | | -static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked) |
|---|
| 1724 | | -{ |
|---|
| 1725 | | - if (tcp_write_queue_empty(sk)) |
|---|
| 1726 | | - tcp_chrono_stop(sk, TCP_CHRONO_BUSY); |
|---|
| 1727 | | -} |
|---|
| 1728 | | - |
|---|
| 1729 | | -static inline void __tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb) |
|---|
| 1730 | | -{ |
|---|
| 1731 | | - __skb_queue_tail(&sk->sk_write_queue, skb); |
|---|
| 1732 | | -} |
|---|
| 1733 | | - |
|---|
| 1734 | 1836 | static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb) |
|---|
| 1735 | 1837 | { |
|---|
| 1736 | | - __tcp_add_write_queue_tail(sk, skb); |
|---|
| 1838 | + __skb_queue_tail(&sk->sk_write_queue, skb); |
|---|
| 1737 | 1839 | |
|---|
| 1738 | 1840 | /* Queue it, remembering where we must start sending. */ |
|---|
| 1739 | 1841 | if (sk->sk_write_queue.next == skb) |
|---|
| .. | .. |
|---|
| 1855 | 1957 | struct seq_net_private p; |
|---|
| 1856 | 1958 | enum tcp_seq_states state; |
|---|
| 1857 | 1959 | struct sock *syn_wait_sk; |
|---|
| 1960 | + struct tcp_seq_afinfo *bpf_seq_afinfo; |
|---|
| 1858 | 1961 | int bucket, offset, sbucket, num; |
|---|
| 1859 | 1962 | loff_t last_pos; |
|---|
| 1860 | 1963 | }; |
|---|
| .. | .. |
|---|
| 1867 | 1970 | struct sk_buff *tcp_gso_segment(struct sk_buff *skb, |
|---|
| 1868 | 1971 | netdev_features_t features); |
|---|
| 1869 | 1972 | struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb); |
|---|
| 1973 | +INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff)); |
|---|
| 1974 | +INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)); |
|---|
| 1975 | +INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff)); |
|---|
| 1976 | +INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb)); |
|---|
| 1870 | 1977 | int tcp_gro_complete(struct sk_buff *skb); |
|---|
| 1871 | 1978 | |
|---|
| 1872 | 1979 | void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr); |
|---|
| .. | .. |
|---|
| 1874 | 1981 | static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) |
|---|
| 1875 | 1982 | { |
|---|
| 1876 | 1983 | struct net *net = sock_net((struct sock *)tp); |
|---|
| 1877 | | - return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat; |
|---|
| 1984 | + return tp->notsent_lowat ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat); |
|---|
| 1878 | 1985 | } |
|---|
| 1879 | 1986 | |
|---|
| 1880 | | -static inline bool tcp_stream_memory_free(const struct sock *sk) |
|---|
| 1987 | +/* @wake is one when sk_stream_write_space() calls us. |
|---|
| 1988 | + * This sends EPOLLOUT only if notsent_bytes is half the limit. |
|---|
| 1989 | + * This mimics the strategy used in sock_def_write_space(). |
|---|
| 1990 | + */ |
|---|
| 1991 | +static inline bool tcp_stream_memory_free(const struct sock *sk, int wake) |
|---|
| 1881 | 1992 | { |
|---|
| 1882 | 1993 | const struct tcp_sock *tp = tcp_sk(sk); |
|---|
| 1883 | | - u32 notsent_bytes = READ_ONCE(tp->write_seq) - tp->snd_nxt; |
|---|
| 1994 | + u32 notsent_bytes = READ_ONCE(tp->write_seq) - |
|---|
| 1995 | + READ_ONCE(tp->snd_nxt); |
|---|
| 1884 | 1996 | |
|---|
| 1885 | | - return notsent_bytes < tcp_notsent_lowat(tp); |
|---|
| 1997 | + return (notsent_bytes << wake) < tcp_notsent_lowat(tp); |
|---|
| 1886 | 1998 | } |
|---|
| 1887 | 1999 | |
|---|
| 1888 | 2000 | #ifdef CONFIG_PROC_FS |
|---|
| .. | .. |
|---|
| 1906 | 2018 | const struct sk_buff *skb); |
|---|
| 1907 | 2019 | int (*md5_parse)(struct sock *sk, |
|---|
| 1908 | 2020 | int optname, |
|---|
| 1909 | | - char __user *optval, |
|---|
| 2021 | + sockptr_t optval, |
|---|
| 1910 | 2022 | int optlen); |
|---|
| 1911 | 2023 | #endif |
|---|
| 1912 | 2024 | }; |
|---|
| .. | .. |
|---|
| 1935 | 2047 | int (*send_synack)(const struct sock *sk, struct dst_entry *dst, |
|---|
| 1936 | 2048 | struct flowi *fl, struct request_sock *req, |
|---|
| 1937 | 2049 | struct tcp_fastopen_cookie *foc, |
|---|
| 1938 | | - enum tcp_synack_type synack_type); |
|---|
| 2050 | + enum tcp_synack_type synack_type, |
|---|
| 2051 | + struct sk_buff *syn_skb); |
|---|
| 1939 | 2052 | }; |
|---|
| 2053 | + |
|---|
| 2054 | +extern const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops; |
|---|
| 2055 | +#if IS_ENABLED(CONFIG_IPV6) |
|---|
| 2056 | +extern const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops; |
|---|
| 2057 | +#endif |
|---|
| 1940 | 2058 | |
|---|
| 1941 | 2059 | #ifdef CONFIG_SYN_COOKIES |
|---|
| 1942 | 2060 | static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops, |
|---|
| .. | .. |
|---|
| 1977 | 2095 | { |
|---|
| 1978 | 2096 | const struct sk_buff *skb = tcp_rtx_queue_head(sk); |
|---|
| 1979 | 2097 | u32 rto = inet_csk(sk)->icsk_rto; |
|---|
| 1980 | | - u64 rto_time_stamp_us = skb->skb_mstamp + jiffies_to_usecs(rto); |
|---|
| 2098 | + u64 rto_time_stamp_us = tcp_skb_timestamp_us(skb) + jiffies_to_usecs(rto); |
|---|
| 1981 | 2099 | |
|---|
| 1982 | 2100 | return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp; |
|---|
| 1983 | 2101 | } |
|---|
| .. | .. |
|---|
| 2077 | 2195 | #define TCP_ULP_MAX 128 |
|---|
| 2078 | 2196 | #define TCP_ULP_BUF_MAX (TCP_ULP_NAME_MAX*TCP_ULP_MAX) |
|---|
| 2079 | 2197 | |
|---|
| 2080 | | -enum { |
|---|
| 2081 | | - TCP_ULP_TLS, |
|---|
| 2082 | | - TCP_ULP_BPF, |
|---|
| 2083 | | -}; |
|---|
| 2084 | | - |
|---|
| 2085 | 2198 | struct tcp_ulp_ops { |
|---|
| 2086 | 2199 | struct list_head list; |
|---|
| 2087 | 2200 | |
|---|
| 2088 | 2201 | /* initialize ulp */ |
|---|
| 2089 | 2202 | int (*init)(struct sock *sk); |
|---|
| 2203 | + /* update ulp */ |
|---|
| 2204 | + void (*update)(struct sock *sk, struct proto *p, |
|---|
| 2205 | + void (*write_space)(struct sock *sk)); |
|---|
| 2090 | 2206 | /* cleanup ulp */ |
|---|
| 2091 | 2207 | void (*release)(struct sock *sk); |
|---|
| 2208 | + /* diagnostic */ |
|---|
| 2209 | + int (*get_info)(const struct sock *sk, struct sk_buff *skb); |
|---|
| 2210 | + size_t (*get_info_size)(const struct sock *sk); |
|---|
| 2211 | + /* clone ulp */ |
|---|
| 2212 | + void (*clone)(const struct request_sock *req, struct sock *newsk, |
|---|
| 2213 | + const gfp_t priority); |
|---|
| 2092 | 2214 | |
|---|
| 2093 | | - int uid; |
|---|
| 2094 | 2215 | char name[TCP_ULP_NAME_MAX]; |
|---|
| 2095 | | - bool user_visible; |
|---|
| 2096 | 2216 | struct module *owner; |
|---|
| 2097 | 2217 | }; |
|---|
| 2098 | 2218 | int tcp_register_ulp(struct tcp_ulp_ops *type); |
|---|
| 2099 | 2219 | void tcp_unregister_ulp(struct tcp_ulp_ops *type); |
|---|
| 2100 | 2220 | int tcp_set_ulp(struct sock *sk, const char *name); |
|---|
| 2101 | | -int tcp_set_ulp_id(struct sock *sk, const int ulp); |
|---|
| 2102 | 2221 | void tcp_get_available_ulp(char *buf, size_t len); |
|---|
| 2103 | 2222 | void tcp_cleanup_ulp(struct sock *sk); |
|---|
| 2223 | +void tcp_update_ulp(struct sock *sk, struct proto *p, |
|---|
| 2224 | + void (*write_space)(struct sock *sk)); |
|---|
| 2104 | 2225 | |
|---|
| 2105 | 2226 | #define MODULE_ALIAS_TCP_ULP(name) \ |
|---|
| 2106 | 2227 | __MODULE_INFO(alias, alias_userspace, name); \ |
|---|
| 2107 | 2228 | __MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name) |
|---|
| 2229 | + |
|---|
| 2230 | +struct sk_msg; |
|---|
| 2231 | +struct sk_psock; |
|---|
| 2232 | + |
|---|
| 2233 | +#ifdef CONFIG_BPF_STREAM_PARSER |
|---|
| 2234 | +struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock); |
|---|
| 2235 | +void tcp_bpf_clone(const struct sock *sk, struct sock *newsk); |
|---|
| 2236 | +#else |
|---|
| 2237 | +static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk) |
|---|
| 2238 | +{ |
|---|
| 2239 | +} |
|---|
| 2240 | +#endif /* CONFIG_BPF_STREAM_PARSER */ |
|---|
| 2241 | + |
|---|
| 2242 | +#ifdef CONFIG_NET_SOCK_MSG |
|---|
| 2243 | +int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes, |
|---|
| 2244 | + int flags); |
|---|
| 2245 | +int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, |
|---|
| 2246 | + struct msghdr *msg, int len, int flags); |
|---|
| 2247 | +#endif /* CONFIG_NET_SOCK_MSG */ |
|---|
| 2248 | + |
|---|
| 2249 | +#ifdef CONFIG_CGROUP_BPF |
|---|
| 2250 | +static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops, |
|---|
| 2251 | + struct sk_buff *skb, |
|---|
| 2252 | + unsigned int end_offset) |
|---|
| 2253 | +{ |
|---|
| 2254 | + skops->skb = skb; |
|---|
| 2255 | + skops->skb_data_end = skb->data + end_offset; |
|---|
| 2256 | +} |
|---|
| 2257 | +#else |
|---|
| 2258 | +static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops, |
|---|
| 2259 | + struct sk_buff *skb, |
|---|
| 2260 | + unsigned int end_offset) |
|---|
| 2261 | +{ |
|---|
| 2262 | +} |
|---|
| 2263 | +#endif |
|---|
| 2108 | 2264 | |
|---|
| 2109 | 2265 | /* Call BPF_SOCK_OPS program that returns an int. If the return value |
|---|
| 2110 | 2266 | * is < 0, then the BPF op failed (for example if the loaded BPF |
|---|
| .. | .. |
|---|
| 2197 | 2353 | return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN, 0, NULL) == 1); |
|---|
| 2198 | 2354 | } |
|---|
| 2199 | 2355 | |
|---|
| 2356 | +static inline void tcp_bpf_rtt(struct sock *sk) |
|---|
| 2357 | +{ |
|---|
| 2358 | + if (BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_RTT_CB_FLAG)) |
|---|
| 2359 | + tcp_call_bpf(sk, BPF_SOCK_OPS_RTT_CB, 0, NULL); |
|---|
| 2360 | +} |
|---|
| 2361 | + |
|---|
| 2200 | 2362 | #if IS_ENABLED(CONFIG_SMC) |
|---|
| 2201 | 2363 | extern struct static_key_false tcp_have_smc; |
|---|
| 2202 | 2364 | #endif |
|---|
| .. | .. |
|---|
| 2205 | 2367 | void clean_acked_data_enable(struct inet_connection_sock *icsk, |
|---|
| 2206 | 2368 | void (*cad)(struct sock *sk, u32 ack_seq)); |
|---|
| 2207 | 2369 | void clean_acked_data_disable(struct inet_connection_sock *icsk); |
|---|
| 2208 | | - |
|---|
| 2370 | +void clean_acked_data_flush(void); |
|---|
| 2209 | 2371 | #endif |
|---|
| 2210 | 2372 | |
|---|
| 2373 | +DECLARE_STATIC_KEY_FALSE(tcp_tx_delay_enabled); |
|---|
| 2374 | +static inline void tcp_add_tx_delay(struct sk_buff *skb, |
|---|
| 2375 | + const struct tcp_sock *tp) |
|---|
| 2376 | +{ |
|---|
| 2377 | + if (static_branch_unlikely(&tcp_tx_delay_enabled)) |
|---|
| 2378 | + skb->skb_mstamp_ns += (u64)tp->tcp_tx_delay * NSEC_PER_USEC; |
|---|
| 2379 | +} |
|---|
| 2380 | + |
|---|
| 2381 | +/* Compute Earliest Departure Time for some control packets |
|---|
| 2382 | + * like ACK or RST for TIME_WAIT or non ESTABLISHED sockets. |
|---|
| 2383 | + */ |
|---|
| 2384 | +static inline u64 tcp_transmit_time(const struct sock *sk) |
|---|
| 2385 | +{ |
|---|
| 2386 | + if (static_branch_unlikely(&tcp_tx_delay_enabled)) { |
|---|
| 2387 | + u32 delay = (sk->sk_state == TCP_TIME_WAIT) ? |
|---|
| 2388 | + tcp_twsk(sk)->tw_tx_delay : tcp_sk(sk)->tcp_tx_delay; |
|---|
| 2389 | + |
|---|
| 2390 | + return tcp_clock_ns() + (u64)delay * NSEC_PER_USEC; |
|---|
| 2391 | + } |
|---|
| 2392 | + return 0; |
|---|
| 2393 | +} |
|---|
| 2394 | + |
|---|
| 2211 | 2395 | #endif /* _TCP_H */ |
|---|