.. | .. |
---|
| 1 | +/* SPDX-License-Identifier: GPL-2.0-or-later */ |
---|
1 | 2 | /* |
---|
2 | 3 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
---|
3 | 4 | * operating system. INET is implemented using the BSD Socket |
---|
.. | .. |
---|
9 | 10 | * |
---|
10 | 11 | * Authors: Ross Biro |
---|
11 | 12 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> |
---|
12 | | - * |
---|
13 | | - * This program is free software; you can redistribute it and/or |
---|
14 | | - * modify it under the terms of the GNU General Public License |
---|
15 | | - * as published by the Free Software Foundation; either version |
---|
16 | | - * 2 of the License, or (at your option) any later version. |
---|
17 | 13 | */ |
---|
18 | 14 | #ifndef _TCP_H |
---|
19 | 15 | #define _TCP_H |
---|
.. | .. |
---|
27 | 23 | #include <linux/cache.h> |
---|
28 | 24 | #include <linux/percpu.h> |
---|
29 | 25 | #include <linux/skbuff.h> |
---|
30 | | -#include <linux/cryptohash.h> |
---|
31 | 26 | #include <linux/kref.h> |
---|
32 | 27 | #include <linux/ktime.h> |
---|
| 28 | +#include <linux/indirect_call_wrapper.h> |
---|
33 | 29 | |
---|
34 | 30 | #include <net/inet_connection_sock.h> |
---|
35 | 31 | #include <net/inet_timewait_sock.h> |
---|
.. | .. |
---|
43 | 39 | #include <net/tcp_states.h> |
---|
44 | 40 | #include <net/inet_ecn.h> |
---|
45 | 41 | #include <net/dst.h> |
---|
| 42 | +#include <net/mptcp.h> |
---|
46 | 43 | |
---|
47 | 44 | #include <linux/seq_file.h> |
---|
48 | 45 | #include <linux/memcontrol.h> |
---|
49 | 46 | #include <linux/bpf-cgroup.h> |
---|
| 47 | +#include <linux/siphash.h> |
---|
50 | 48 | |
---|
51 | 49 | extern struct inet_hashinfo tcp_hashinfo; |
---|
52 | 50 | |
---|
.. | .. |
---|
67 | 65 | /* Minimal accepted MSS. It is (60+60+8) - (20+20). */ |
---|
68 | 66 | #define TCP_MIN_MSS 88U |
---|
69 | 67 | |
---|
70 | | -/* The least MTU to use for probing */ |
---|
| 68 | +/* The initial MTU to use for probing */ |
---|
71 | 69 | #define TCP_BASE_MSS 1024 |
---|
72 | 70 | |
---|
73 | 71 | /* probing interval, default to 10 minutes as per RFC4821 */ |
---|
.. | .. |
---|
128 | 126 | * to combine FIN-WAIT-2 timeout with |
---|
129 | 127 | * TIME-WAIT timer. |
---|
130 | 128 | */ |
---|
| 129 | +#define TCP_FIN_TIMEOUT_MAX (120 * HZ) /* max TCP_LINGER2 value (two minutes) */ |
---|
131 | 130 | |
---|
132 | 131 | #define TCP_DELACK_MAX ((unsigned)(HZ/5)) /* maximal time to delay before sending an ACK */ |
---|
133 | 132 | #if HZ >= 100 |
---|
.. | .. |
---|
185 | 184 | #define TCPOPT_SACK 5 /* SACK Block */ |
---|
186 | 185 | #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ |
---|
187 | 186 | #define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */ |
---|
| 187 | +#define TCPOPT_MPTCP 30 /* Multipath TCP (RFC6824) */ |
---|
188 | 188 | #define TCPOPT_FASTOPEN 34 /* Fast open (RFC7413) */ |
---|
189 | 189 | #define TCPOPT_EXP 254 /* Experimental */ |
---|
190 | 190 | /* Magic number to be after the option value for sharing TCP |
---|
.. | .. |
---|
315 | 315 | |
---|
316 | 316 | void tcp_tasklet_init(void); |
---|
317 | 317 | |
---|
318 | | -void tcp_v4_err(struct sk_buff *skb, u32); |
---|
| 318 | +int tcp_v4_err(struct sk_buff *skb, u32); |
---|
319 | 319 | |
---|
320 | 320 | void tcp_shutdown(struct sock *sk, int how); |
---|
321 | 321 | |
---|
.. | .. |
---|
331 | 331 | size_t size, int flags); |
---|
332 | 332 | ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, |
---|
333 | 333 | size_t size, int flags); |
---|
| 334 | +int tcp_send_mss(struct sock *sk, int *size_goal, int flags); |
---|
| 335 | +void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle, |
---|
| 336 | + int size_goal); |
---|
334 | 337 | void tcp_release_cb(struct sock *sk); |
---|
335 | 338 | void tcp_wfree(struct sk_buff *skb); |
---|
336 | 339 | void tcp_write_timer_handler(struct sock *sk); |
---|
.. | .. |
---|
391 | 394 | bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst); |
---|
392 | 395 | void tcp_close(struct sock *sk, long timeout); |
---|
393 | 396 | void tcp_init_sock(struct sock *sk); |
---|
394 | | -void tcp_init_transfer(struct sock *sk, int bpf_op); |
---|
| 397 | +void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb); |
---|
395 | 398 | __poll_t tcp_poll(struct file *file, struct socket *sock, |
---|
396 | 399 | struct poll_table_struct *wait); |
---|
397 | 400 | int tcp_getsockopt(struct sock *sk, int level, int optname, |
---|
398 | 401 | char __user *optval, int __user *optlen); |
---|
399 | | -int tcp_setsockopt(struct sock *sk, int level, int optname, |
---|
400 | | - char __user *optval, unsigned int optlen); |
---|
401 | | -int compat_tcp_getsockopt(struct sock *sk, int level, int optname, |
---|
402 | | - char __user *optval, int __user *optlen); |
---|
403 | | -int compat_tcp_setsockopt(struct sock *sk, int level, int optname, |
---|
404 | | - char __user *optval, unsigned int optlen); |
---|
| 402 | +int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, |
---|
| 403 | + unsigned int optlen); |
---|
405 | 404 | void tcp_set_keepalive(struct sock *sk, int val); |
---|
406 | 405 | void tcp_syn_ack_timeout(const struct request_sock *req); |
---|
407 | 406 | int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, |
---|
408 | 407 | int flags, int *addr_len); |
---|
409 | 408 | int tcp_set_rcvlowat(struct sock *sk, int val); |
---|
410 | 409 | void tcp_data_ready(struct sock *sk); |
---|
| 410 | +#ifdef CONFIG_MMU |
---|
411 | 411 | int tcp_mmap(struct file *file, struct socket *sock, |
---|
412 | 412 | struct vm_area_struct *vma); |
---|
| 413 | +#endif |
---|
413 | 414 | void tcp_parse_options(const struct net *net, const struct sk_buff *skb, |
---|
414 | 415 | struct tcp_options_received *opt_rx, |
---|
415 | 416 | int estab, struct tcp_fastopen_cookie *foc); |
---|
416 | 417 | const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); |
---|
417 | 418 | |
---|
| 419 | +/* |
---|
| 420 | + * BPF SKB-less helpers |
---|
| 421 | + */ |
---|
| 422 | +u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph, |
---|
| 423 | + struct tcphdr *th, u32 *cookie); |
---|
| 424 | +u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, |
---|
| 425 | + struct tcphdr *th, u32 *cookie); |
---|
| 426 | +u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops, |
---|
| 427 | + const struct tcp_request_sock_ops *af_ops, |
---|
| 428 | + struct sock *sk, struct tcphdr *th); |
---|
418 | 429 | /* |
---|
419 | 430 | * TCP v4 functions exported for the inet6 API |
---|
420 | 431 | */ |
---|
.. | .. |
---|
422 | 433 | void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb); |
---|
423 | 434 | void tcp_v4_mtu_reduced(struct sock *sk); |
---|
424 | 435 | void tcp_req_err(struct sock *sk, u32 seq, bool abort); |
---|
| 436 | +void tcp_ld_RTO_revert(struct sock *sk, u32 seq); |
---|
425 | 437 | int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb); |
---|
426 | 438 | struct sock *tcp_create_openreq_child(const struct sock *sk, |
---|
427 | 439 | struct request_sock *req, |
---|
.. | .. |
---|
443 | 455 | struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, |
---|
444 | 456 | struct request_sock *req, |
---|
445 | 457 | struct tcp_fastopen_cookie *foc, |
---|
446 | | - enum tcp_synack_type synack_type); |
---|
| 458 | + enum tcp_synack_type synack_type, |
---|
| 459 | + struct sk_buff *syn_skb); |
---|
447 | 460 | int tcp_disconnect(struct sock *sk, int flags); |
---|
448 | 461 | |
---|
449 | 462 | void tcp_finish_connect(struct sock *sk, struct sk_buff *skb); |
---|
.. | .. |
---|
457 | 470 | int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, |
---|
458 | 471 | u32 cookie); |
---|
459 | 472 | struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); |
---|
| 473 | +struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, |
---|
| 474 | + const struct tcp_request_sock_ops *af_ops, |
---|
| 475 | + struct sock *sk, struct sk_buff *skb); |
---|
460 | 476 | #ifdef CONFIG_SYN_COOKIES |
---|
461 | 477 | |
---|
462 | 478 | /* Syncookies use a monotonic timer which increments every 60 seconds. |
---|
.. | .. |
---|
539 | 555 | u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th, |
---|
540 | 556 | u16 *mssp); |
---|
541 | 557 | __u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss); |
---|
542 | | -u64 cookie_init_timestamp(struct request_sock *req); |
---|
| 558 | +u64 cookie_init_timestamp(struct request_sock *req, u64 now); |
---|
543 | 559 | bool cookie_timestamp_decode(const struct net *net, |
---|
544 | 560 | struct tcp_options_received *opt); |
---|
545 | 561 | bool cookie_ecn_ok(const struct tcp_options_received *opt, |
---|
.. | .. |
---|
594 | 610 | void tcp_reset(struct sock *sk); |
---|
595 | 611 | void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb); |
---|
596 | 612 | void tcp_fin(struct sock *sk); |
---|
| 613 | +void tcp_check_space(struct sock *sk); |
---|
597 | 614 | |
---|
598 | 615 | /* tcp_timer.c */ |
---|
599 | 616 | void tcp_init_xmit_timers(struct sock *); |
---|
.. | .. |
---|
610 | 627 | |
---|
611 | 628 | unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu); |
---|
612 | 629 | unsigned int tcp_current_mss(struct sock *sk); |
---|
| 630 | +u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when); |
---|
613 | 631 | |
---|
614 | 632 | /* Bound MSS / TSO packet size with the half of the window */ |
---|
615 | 633 | static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) |
---|
.. | .. |
---|
646 | 664 | int tcp_mtu_to_mss(struct sock *sk, int pmtu); |
---|
647 | 665 | int tcp_mss_to_mtu(struct sock *sk, int mss); |
---|
648 | 666 | void tcp_mtup_init(struct sock *sk); |
---|
649 | | -void tcp_init_buffer_space(struct sock *sk); |
---|
650 | 667 | |
---|
651 | 668 | static inline void tcp_bound_rto(const struct sock *sk) |
---|
652 | 669 | { |
---|
.. | .. |
---|
661 | 678 | |
---|
662 | 679 | static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) |
---|
663 | 680 | { |
---|
| 681 | + /* mptcp hooks are only on the slow path */ |
---|
| 682 | + if (sk_is_mptcp((struct sock *)tp)) |
---|
| 683 | + return; |
---|
| 684 | + |
---|
664 | 685 | tp->pred_flags = htonl((tp->tcp_header_len << 26) | |
---|
665 | 686 | ntohl(TCP_FLAG_ACK) | |
---|
666 | 687 | snd_wnd); |
---|
.. | .. |
---|
686 | 707 | static inline u32 tcp_rto_min(struct sock *sk) |
---|
687 | 708 | { |
---|
688 | 709 | const struct dst_entry *dst = __sk_dst_get(sk); |
---|
689 | | - u32 rto_min = TCP_RTO_MIN; |
---|
| 710 | + u32 rto_min = inet_csk(sk)->icsk_rto_min; |
---|
690 | 711 | |
---|
691 | 712 | if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) |
---|
692 | 713 | rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN); |
---|
.. | .. |
---|
745 | 766 | |
---|
746 | 767 | static inline u64 tcp_clock_ns(void) |
---|
747 | 768 | { |
---|
748 | | - return local_clock(); |
---|
| 769 | + return ktime_get_ns(); |
---|
749 | 770 | } |
---|
750 | 771 | |
---|
751 | 772 | static inline u64 tcp_clock_us(void) |
---|
.. | .. |
---|
759 | 780 | return div_u64(tp->tcp_mstamp, USEC_PER_SEC / TCP_TS_HZ); |
---|
760 | 781 | } |
---|
761 | 782 | |
---|
| 783 | +/* Convert a nsec timestamp into TCP TSval timestamp (ms based currently) */ |
---|
| 784 | +static inline u32 tcp_ns_to_ts(u64 ns) |
---|
| 785 | +{ |
---|
| 786 | + return div_u64(ns, NSEC_PER_SEC / TCP_TS_HZ); |
---|
| 787 | +} |
---|
| 788 | + |
---|
762 | 789 | /* Could use tcp_clock_us() / 1000, but this version uses a single divide */ |
---|
763 | 790 | static inline u32 tcp_time_stamp_raw(void) |
---|
764 | 791 | { |
---|
765 | | - return div_u64(tcp_clock_ns(), NSEC_PER_SEC / TCP_TS_HZ); |
---|
| 792 | + return tcp_ns_to_ts(tcp_clock_ns()); |
---|
766 | 793 | } |
---|
767 | 794 | |
---|
768 | | - |
---|
769 | | -/* Refresh 1us clock of a TCP socket, |
---|
770 | | - * ensuring monotically increasing values. |
---|
771 | | - */ |
---|
772 | | -static inline void tcp_mstamp_refresh(struct tcp_sock *tp) |
---|
773 | | -{ |
---|
774 | | - u64 val = tcp_clock_us(); |
---|
775 | | - |
---|
776 | | - if (val > tp->tcp_mstamp) |
---|
777 | | - tp->tcp_mstamp = val; |
---|
778 | | -} |
---|
| 795 | +void tcp_mstamp_refresh(struct tcp_sock *tp); |
---|
779 | 796 | |
---|
780 | 797 | static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0) |
---|
781 | 798 | { |
---|
.. | .. |
---|
784 | 801 | |
---|
785 | 802 | static inline u32 tcp_skb_timestamp(const struct sk_buff *skb) |
---|
786 | 803 | { |
---|
787 | | - return div_u64(skb->skb_mstamp, USEC_PER_SEC / TCP_TS_HZ); |
---|
| 804 | + return tcp_ns_to_ts(skb->skb_mstamp_ns); |
---|
| 805 | +} |
---|
| 806 | + |
---|
| 807 | +/* provide the departure time in us unit */ |
---|
| 808 | +static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb) |
---|
| 809 | +{ |
---|
| 810 | + return div_u64(skb->skb_mstamp_ns, NSEC_PER_USEC); |
---|
788 | 811 | } |
---|
789 | 812 | |
---|
790 | 813 | |
---|
.. | .. |
---|
830 | 853 | #define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */ |
---|
831 | 854 | #define TCPCB_LOST 0x04 /* SKB is lost */ |
---|
832 | 855 | #define TCPCB_TAGBITS 0x07 /* All tag bits */ |
---|
833 | | -#define TCPCB_REPAIRED 0x10 /* SKB repaired (no skb_mstamp) */ |
---|
| 856 | +#define TCPCB_REPAIRED 0x10 /* SKB repaired (no skb_mstamp_ns) */ |
---|
834 | 857 | #define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */ |
---|
835 | 858 | #define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS| \ |
---|
836 | 859 | TCPCB_REPAIRED) |
---|
.. | .. |
---|
875 | 898 | TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb); |
---|
876 | 899 | } |
---|
877 | 900 | |
---|
| 901 | +static inline bool tcp_skb_bpf_ingress(const struct sk_buff *skb) |
---|
| 902 | +{ |
---|
| 903 | + return TCP_SKB_CB(skb)->bpf.flags & BPF_F_INGRESS; |
---|
| 904 | +} |
---|
| 905 | + |
---|
| 906 | +static inline struct sock *tcp_skb_bpf_redirect_fetch(struct sk_buff *skb) |
---|
| 907 | +{ |
---|
| 908 | + return TCP_SKB_CB(skb)->bpf.sk_redir; |
---|
| 909 | +} |
---|
| 910 | + |
---|
| 911 | +static inline void tcp_skb_bpf_redirect_clear(struct sk_buff *skb) |
---|
| 912 | +{ |
---|
| 913 | + TCP_SKB_CB(skb)->bpf.sk_redir = NULL; |
---|
| 914 | +} |
---|
| 915 | + |
---|
| 916 | +extern const struct inet_connection_sock_af_ops ipv4_specific; |
---|
| 917 | + |
---|
878 | 918 | #if IS_ENABLED(CONFIG_IPV6) |
---|
879 | 919 | /* This is the variant of inet6_iif() that must be used by TCP, |
---|
880 | 920 | * as TCP moves IP6CB into a different location in skb->cb[] |
---|
.. | .. |
---|
900 | 940 | #endif |
---|
901 | 941 | return 0; |
---|
902 | 942 | } |
---|
903 | | -#endif |
---|
904 | 943 | |
---|
905 | | -static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb) |
---|
906 | | -{ |
---|
907 | | -#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) |
---|
908 | | - if (!net->ipv4.sysctl_tcp_l3mdev_accept && |
---|
909 | | - skb && ipv4_l3mdev_skb(IPCB(skb)->flags)) |
---|
910 | | - return true; |
---|
| 944 | +extern const struct inet_connection_sock_af_ops ipv6_specific; |
---|
| 945 | + |
---|
| 946 | +INDIRECT_CALLABLE_DECLARE(void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)); |
---|
| 947 | +INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *skb)); |
---|
| 948 | +void tcp_v6_early_demux(struct sk_buff *skb); |
---|
| 949 | + |
---|
911 | 950 | #endif |
---|
912 | | - return false; |
---|
913 | | -} |
---|
914 | 951 | |
---|
915 | 952 | /* TCP_SKB_CB reference means this can not be used from early demux */ |
---|
916 | 953 | static inline int tcp_v4_sdif(struct sk_buff *skb) |
---|
.. | .. |
---|
951 | 988 | return likely(!TCP_SKB_CB(skb)->eor); |
---|
952 | 989 | } |
---|
953 | 990 | |
---|
| 991 | +static inline bool tcp_skb_can_collapse(const struct sk_buff *to, |
---|
| 992 | + const struct sk_buff *from) |
---|
| 993 | +{ |
---|
| 994 | + return likely(tcp_skb_can_collapse_to(to) && |
---|
| 995 | + mptcp_skb_can_collapse(to, from)); |
---|
| 996 | +} |
---|
| 997 | + |
---|
954 | 998 | /* Events passed to congestion control interface */ |
---|
955 | 999 | enum tcp_ca_event { |
---|
956 | 1000 | CA_EVENT_TX_START, /* first transmit when no packets in flight */ |
---|
.. | .. |
---|
981 | 1025 | #define TCP_CONG_NON_RESTRICTED 0x1 |
---|
982 | 1026 | /* Requires ECN/ECT set on all packets */ |
---|
983 | 1027 | #define TCP_CONG_NEEDS_ECN 0x2 |
---|
| 1028 | +#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN) |
---|
984 | 1029 | |
---|
985 | 1030 | union tcp_cc_info; |
---|
986 | 1031 | |
---|
.. | .. |
---|
1066 | 1111 | void tcp_get_allowed_congestion_control(char *buf, size_t len); |
---|
1067 | 1112 | int tcp_set_allowed_congestion_control(char *allowed); |
---|
1068 | 1113 | int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, |
---|
1069 | | - bool reinit, bool cap_net_admin); |
---|
| 1114 | + bool cap_net_admin); |
---|
1070 | 1115 | u32 tcp_slow_start(struct tcp_sock *tp, u32 acked); |
---|
1071 | 1116 | void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked); |
---|
1072 | 1117 | |
---|
.. | .. |
---|
1075 | 1120 | void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked); |
---|
1076 | 1121 | extern struct tcp_congestion_ops tcp_reno; |
---|
1077 | 1122 | |
---|
| 1123 | +struct tcp_congestion_ops *tcp_ca_find(const char *name); |
---|
1078 | 1124 | struct tcp_congestion_ops *tcp_ca_find_key(u32 key); |
---|
1079 | 1125 | u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca); |
---|
1080 | 1126 | #ifdef CONFIG_INET |
---|
.. | .. |
---|
1127 | 1173 | */ |
---|
1128 | 1174 | static inline int tcp_is_sack(const struct tcp_sock *tp) |
---|
1129 | 1175 | { |
---|
1130 | | - return tp->rx_opt.sack_ok; |
---|
| 1176 | + return likely(tp->rx_opt.sack_ok); |
---|
1131 | 1177 | } |
---|
1132 | 1178 | |
---|
1133 | 1179 | static inline bool tcp_is_reno(const struct tcp_sock *tp) |
---|
.. | .. |
---|
1230 | 1276 | { |
---|
1231 | 1277 | const struct tcp_sock *tp = tcp_sk(sk); |
---|
1232 | 1278 | |
---|
| 1279 | + if (tp->is_cwnd_limited) |
---|
| 1280 | + return true; |
---|
| 1281 | + |
---|
1233 | 1282 | /* If in slow start, ensure cwnd grows to twice what was ACKed. */ |
---|
1234 | 1283 | if (tcp_in_slow_start(tp)) |
---|
1235 | 1284 | return tp->snd_cwnd < 2 * tp->max_packets_out; |
---|
1236 | 1285 | |
---|
1237 | | - return tp->is_cwnd_limited; |
---|
| 1286 | + return false; |
---|
1238 | 1287 | } |
---|
1239 | 1288 | |
---|
1240 | 1289 | /* BBR congestion control needs pacing. |
---|
.. | .. |
---|
1248 | 1297 | return smp_load_acquire(&sk->sk_pacing_status) == SK_PACING_NEEDED; |
---|
1249 | 1298 | } |
---|
1250 | 1299 | |
---|
| 1300 | +/* Estimates in how many jiffies next packet for this flow can be sent. |
---|
| 1301 | + * Scheduling a retransmit timer too early would be silly. |
---|
| 1302 | + */ |
---|
| 1303 | +static inline unsigned long tcp_pacing_delay(const struct sock *sk) |
---|
| 1304 | +{ |
---|
| 1305 | + s64 delay = tcp_sk(sk)->tcp_wstamp_ns - tcp_sk(sk)->tcp_clock_cache; |
---|
| 1306 | + |
---|
| 1307 | + return delay > 0 ? nsecs_to_jiffies(delay) : 0; |
---|
| 1308 | +} |
---|
| 1309 | + |
---|
| 1310 | +static inline void tcp_reset_xmit_timer(struct sock *sk, |
---|
| 1311 | + const int what, |
---|
| 1312 | + unsigned long when, |
---|
| 1313 | + const unsigned long max_when) |
---|
| 1314 | +{ |
---|
| 1315 | + inet_csk_reset_xmit_timer(sk, what, when + tcp_pacing_delay(sk), |
---|
| 1316 | + max_when); |
---|
| 1317 | +} |
---|
| 1318 | + |
---|
1251 | 1319 | /* Something is really bad, we could not queue an additional packet, |
---|
1252 | | - * because qdisc is full or receiver sent a 0 window. |
---|
| 1320 | + * because qdisc is full or receiver sent a 0 window, or we are paced. |
---|
1253 | 1321 | * We do not want to add fuel to the fire, or abort too early, |
---|
1254 | 1322 | * so make sure the timer we arm now is at least 200ms in the future, |
---|
1255 | 1323 | * regardless of current icsk_rto value (as it could be ~2ms) |
---|
.. | .. |
---|
1271 | 1339 | static inline void tcp_check_probe_timer(struct sock *sk) |
---|
1272 | 1340 | { |
---|
1273 | 1341 | if (!tcp_sk(sk)->packets_out && !inet_csk(sk)->icsk_pending) |
---|
1274 | | - inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, |
---|
1275 | | - tcp_probe0_base(sk), TCP_RTO_MAX); |
---|
| 1342 | + tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, |
---|
| 1343 | + tcp_probe0_base(sk), TCP_RTO_MAX); |
---|
1276 | 1344 | } |
---|
1277 | 1345 | |
---|
1278 | 1346 | static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq) |
---|
.. | .. |
---|
1291 | 1359 | static inline __sum16 tcp_v4_check(int len, __be32 saddr, |
---|
1292 | 1360 | __be32 daddr, __wsum base) |
---|
1293 | 1361 | { |
---|
1294 | | - return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base); |
---|
1295 | | -} |
---|
1296 | | - |
---|
1297 | | -static inline __sum16 __tcp_checksum_complete(struct sk_buff *skb) |
---|
1298 | | -{ |
---|
1299 | | - return __skb_checksum_complete(skb); |
---|
| 1362 | + return csum_tcpudp_magic(saddr, daddr, len, IPPROTO_TCP, base); |
---|
1300 | 1363 | } |
---|
1301 | 1364 | |
---|
1302 | 1365 | static inline bool tcp_checksum_complete(struct sk_buff *skb) |
---|
1303 | 1366 | { |
---|
1304 | 1367 | return !skb_csum_unnecessary(skb) && |
---|
1305 | | - __tcp_checksum_complete(skb); |
---|
| 1368 | + __skb_checksum_complete(skb); |
---|
1306 | 1369 | } |
---|
1307 | 1370 | |
---|
1308 | 1371 | bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb); |
---|
1309 | 1372 | int tcp_filter(struct sock *sk, struct sk_buff *skb); |
---|
1310 | | - |
---|
1311 | | -#undef STATE_TRACE |
---|
1312 | | - |
---|
1313 | | -#ifdef STATE_TRACE |
---|
1314 | | -static const char *statename[]={ |
---|
1315 | | - "Unused","Established","Syn Sent","Syn Recv", |
---|
1316 | | - "Fin Wait 1","Fin Wait 2","Time Wait", "Close", |
---|
1317 | | - "Close Wait","Last ACK","Listen","Closing" |
---|
1318 | | -}; |
---|
1319 | | -#endif |
---|
1320 | 1373 | void tcp_set_state(struct sock *sk, int state); |
---|
1321 | | - |
---|
1322 | 1374 | void tcp_done(struct sock *sk); |
---|
1323 | | - |
---|
1324 | 1375 | int tcp_abort(struct sock *sk, int err); |
---|
1325 | 1376 | |
---|
1326 | 1377 | static inline void tcp_sack_reset(struct tcp_options_received *rx_opt) |
---|
.. | .. |
---|
1329 | 1380 | rx_opt->num_sacks = 0; |
---|
1330 | 1381 | } |
---|
1331 | 1382 | |
---|
1332 | | -u32 tcp_default_init_rwnd(u32 mss); |
---|
1333 | 1383 | void tcp_cwnd_restart(struct sock *sk, s32 delta); |
---|
1334 | 1384 | |
---|
1335 | 1385 | static inline void tcp_slow_start_after_idle_check(struct sock *sk) |
---|
.. | .. |
---|
1338 | 1388 | struct tcp_sock *tp = tcp_sk(sk); |
---|
1339 | 1389 | s32 delta; |
---|
1340 | 1390 | |
---|
1341 | | - if (!sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle || tp->packets_out || |
---|
1342 | | - ca_ops->cong_control) |
---|
| 1391 | + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) || |
---|
| 1392 | + tp->packets_out || ca_ops->cong_control) |
---|
1343 | 1393 | return; |
---|
1344 | 1394 | delta = tcp_jiffies32 - tp->lsndtime; |
---|
1345 | 1395 | if (delta > inet_csk(sk)->icsk_rto) |
---|
.. | .. |
---|
1354 | 1404 | |
---|
1355 | 1405 | static inline int tcp_win_from_space(const struct sock *sk, int space) |
---|
1356 | 1406 | { |
---|
1357 | | - int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale; |
---|
| 1407 | + int tcp_adv_win_scale = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale); |
---|
1358 | 1408 | |
---|
1359 | 1409 | return tcp_adv_win_scale <= 0 ? |
---|
1360 | 1410 | (space>>(-tcp_adv_win_scale)) : |
---|
.. | .. |
---|
1364 | 1414 | /* Note: caller must be prepared to deal with negative returns */ |
---|
1365 | 1415 | static inline int tcp_space(const struct sock *sk) |
---|
1366 | 1416 | { |
---|
1367 | | - return tcp_win_from_space(sk, sk->sk_rcvbuf - sk->sk_backlog.len - |
---|
| 1417 | + return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - |
---|
| 1418 | + READ_ONCE(sk->sk_backlog.len) - |
---|
1368 | 1419 | atomic_read(&sk->sk_rmem_alloc)); |
---|
1369 | 1420 | } |
---|
1370 | 1421 | |
---|
1371 | 1422 | static inline int tcp_full_space(const struct sock *sk) |
---|
1372 | 1423 | { |
---|
1373 | | - return tcp_win_from_space(sk, sk->sk_rcvbuf); |
---|
| 1424 | + return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf)); |
---|
1374 | 1425 | } |
---|
| 1426 | + |
---|
| 1427 | +void tcp_cleanup_rbuf(struct sock *sk, int copied); |
---|
1375 | 1428 | |
---|
1376 | 1429 | /* We provision sk_rcvbuf around 200% of sk_rcvlowat. |
---|
1377 | 1430 | * If 87.5 % (7/8) of the space has been consumed, we want to override |
---|
.. | .. |
---|
1402 | 1455 | { |
---|
1403 | 1456 | struct net *net = sock_net((struct sock *)tp); |
---|
1404 | 1457 | |
---|
1405 | | - return tp->keepalive_intvl ? : net->ipv4.sysctl_tcp_keepalive_intvl; |
---|
| 1458 | + return tp->keepalive_intvl ? : |
---|
| 1459 | + READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl); |
---|
1406 | 1460 | } |
---|
1407 | 1461 | |
---|
1408 | 1462 | static inline int keepalive_time_when(const struct tcp_sock *tp) |
---|
1409 | 1463 | { |
---|
1410 | 1464 | struct net *net = sock_net((struct sock *)tp); |
---|
1411 | 1465 | |
---|
1412 | | - return tp->keepalive_time ? : net->ipv4.sysctl_tcp_keepalive_time; |
---|
| 1466 | + return tp->keepalive_time ? : |
---|
| 1467 | + READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time); |
---|
1413 | 1468 | } |
---|
1414 | 1469 | |
---|
1415 | 1470 | static inline int keepalive_probes(const struct tcp_sock *tp) |
---|
1416 | 1471 | { |
---|
1417 | 1472 | struct net *net = sock_net((struct sock *)tp); |
---|
1418 | 1473 | |
---|
1419 | | - return tp->keepalive_probes ? : net->ipv4.sysctl_tcp_keepalive_probes; |
---|
| 1474 | + return tp->keepalive_probes ? : |
---|
| 1475 | + READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes); |
---|
1420 | 1476 | } |
---|
1421 | 1477 | |
---|
1422 | 1478 | static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) |
---|
.. | .. |
---|
1429 | 1485 | |
---|
1430 | 1486 | static inline int tcp_fin_time(const struct sock *sk) |
---|
1431 | 1487 | { |
---|
1432 | | - int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout; |
---|
| 1488 | + int fin_timeout = tcp_sk(sk)->linger2 ? : |
---|
| 1489 | + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fin_timeout); |
---|
1433 | 1490 | const int rto = inet_csk(sk)->icsk_rto; |
---|
1434 | 1491 | |
---|
1435 | 1492 | if (fin_timeout < (rto << 2) - (rto >> 1)) |
---|
.. | .. |
---|
1516 | 1573 | struct hlist_node node; |
---|
1517 | 1574 | u8 keylen; |
---|
1518 | 1575 | u8 family; /* AF_INET or AF_INET6 */ |
---|
1519 | | - union tcp_md5_addr addr; |
---|
1520 | 1576 | u8 prefixlen; |
---|
| 1577 | + union tcp_md5_addr addr; |
---|
| 1578 | + int l3index; /* set if key added with L3 scope */ |
---|
1521 | 1579 | u8 key[TCP_MD5SIG_MAXKEYLEN]; |
---|
1522 | 1580 | struct rcu_head rcu; |
---|
1523 | 1581 | }; |
---|
.. | .. |
---|
1561 | 1619 | int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, |
---|
1562 | 1620 | const struct sock *sk, const struct sk_buff *skb); |
---|
1563 | 1621 | int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, |
---|
1564 | | - int family, u8 prefixlen, const u8 *newkey, u8 newkeylen, |
---|
1565 | | - gfp_t gfp); |
---|
| 1622 | + int family, u8 prefixlen, int l3index, |
---|
| 1623 | + const u8 *newkey, u8 newkeylen, gfp_t gfp); |
---|
1566 | 1624 | int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, |
---|
1567 | | - int family, u8 prefixlen); |
---|
| 1625 | + int family, u8 prefixlen, int l3index); |
---|
1568 | 1626 | struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, |
---|
1569 | 1627 | const struct sock *addr_sk); |
---|
1570 | 1628 | |
---|
1571 | 1629 | #ifdef CONFIG_TCP_MD5SIG |
---|
1572 | | -struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk, |
---|
1573 | | - const union tcp_md5_addr *addr, |
---|
1574 | | - int family); |
---|
| 1630 | +#include <linux/jump_label.h> |
---|
| 1631 | +extern struct static_key_false tcp_md5_needed; |
---|
| 1632 | +struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index, |
---|
| 1633 | + const union tcp_md5_addr *addr, |
---|
| 1634 | + int family); |
---|
| 1635 | +static inline struct tcp_md5sig_key * |
---|
| 1636 | +tcp_md5_do_lookup(const struct sock *sk, int l3index, |
---|
| 1637 | + const union tcp_md5_addr *addr, int family) |
---|
| 1638 | +{ |
---|
| 1639 | + if (!static_branch_unlikely(&tcp_md5_needed)) |
---|
| 1640 | + return NULL; |
---|
| 1641 | + return __tcp_md5_do_lookup(sk, l3index, addr, family); |
---|
| 1642 | +} |
---|
| 1643 | + |
---|
1575 | 1644 | #define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key) |
---|
1576 | 1645 | #else |
---|
1577 | | -static inline struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk, |
---|
1578 | | - const union tcp_md5_addr *addr, |
---|
1579 | | - int family) |
---|
| 1646 | +static inline struct tcp_md5sig_key * |
---|
| 1647 | +tcp_md5_do_lookup(const struct sock *sk, int l3index, |
---|
| 1648 | + const union tcp_md5_addr *addr, int family) |
---|
1580 | 1649 | { |
---|
1581 | 1650 | return NULL; |
---|
1582 | 1651 | } |
---|
.. | .. |
---|
1608 | 1677 | struct msghdr *data; /* data in MSG_FASTOPEN */ |
---|
1609 | 1678 | size_t size; |
---|
1610 | 1679 | int copied; /* queued in tcp_connect() */ |
---|
| 1680 | + struct ubuf_info *uarg; |
---|
1611 | 1681 | }; |
---|
1612 | 1682 | void tcp_free_fastopen_req(struct tcp_sock *tp); |
---|
1613 | 1683 | void tcp_fastopen_destroy_cipher(struct sock *sk); |
---|
1614 | 1684 | void tcp_fastopen_ctx_destroy(struct net *net); |
---|
1615 | 1685 | int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, |
---|
1616 | | - void *key, unsigned int len); |
---|
| 1686 | + void *primary_key, void *backup_key); |
---|
| 1687 | +int tcp_fastopen_get_cipher(struct net *net, struct inet_connection_sock *icsk, |
---|
| 1688 | + u64 *key); |
---|
1617 | 1689 | void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb); |
---|
1618 | 1690 | struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, |
---|
1619 | 1691 | struct request_sock *req, |
---|
.. | .. |
---|
1623 | 1695 | bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, |
---|
1624 | 1696 | struct tcp_fastopen_cookie *cookie); |
---|
1625 | 1697 | bool tcp_fastopen_defer_connect(struct sock *sk, int *err); |
---|
1626 | | -#define TCP_FASTOPEN_KEY_LENGTH 16 |
---|
| 1698 | +#define TCP_FASTOPEN_KEY_LENGTH sizeof(siphash_key_t) |
---|
| 1699 | +#define TCP_FASTOPEN_KEY_MAX 2 |
---|
| 1700 | +#define TCP_FASTOPEN_KEY_BUF_LENGTH \ |
---|
| 1701 | + (TCP_FASTOPEN_KEY_LENGTH * TCP_FASTOPEN_KEY_MAX) |
---|
1627 | 1702 | |
---|
1628 | 1703 | /* Fastopen key context */ |
---|
1629 | 1704 | struct tcp_fastopen_context { |
---|
1630 | | - struct crypto_cipher *tfm; |
---|
1631 | | - __u8 key[TCP_FASTOPEN_KEY_LENGTH]; |
---|
1632 | | - struct rcu_head rcu; |
---|
| 1705 | + siphash_key_t key[TCP_FASTOPEN_KEY_MAX]; |
---|
| 1706 | + int num; |
---|
| 1707 | + struct rcu_head rcu; |
---|
1633 | 1708 | }; |
---|
1634 | 1709 | |
---|
1635 | 1710 | extern unsigned int sysctl_tcp_fastopen_blackhole_timeout; |
---|
.. | .. |
---|
1637 | 1712 | bool tcp_fastopen_active_should_disable(struct sock *sk); |
---|
1638 | 1713 | void tcp_fastopen_active_disable_ofo_check(struct sock *sk); |
---|
1639 | 1714 | void tcp_fastopen_active_detect_blackhole(struct sock *sk, bool expired); |
---|
| 1715 | + |
---|
| 1716 | +/* Caller needs to wrap with rcu_read_(un)lock() */ |
---|
| 1717 | +static inline |
---|
| 1718 | +struct tcp_fastopen_context *tcp_fastopen_get_ctx(const struct sock *sk) |
---|
| 1719 | +{ |
---|
| 1720 | + struct tcp_fastopen_context *ctx; |
---|
| 1721 | + |
---|
| 1722 | + ctx = rcu_dereference(inet_csk(sk)->icsk_accept_queue.fastopenq.ctx); |
---|
| 1723 | + if (!ctx) |
---|
| 1724 | + ctx = rcu_dereference(sock_net(sk)->ipv4.tcp_fastopen_ctx); |
---|
| 1725 | + return ctx; |
---|
| 1726 | +} |
---|
| 1727 | + |
---|
| 1728 | +static inline |
---|
| 1729 | +bool tcp_fastopen_cookie_match(const struct tcp_fastopen_cookie *foc, |
---|
| 1730 | + const struct tcp_fastopen_cookie *orig) |
---|
| 1731 | +{ |
---|
| 1732 | + if (orig->len == TCP_FASTOPEN_COOKIE_SIZE && |
---|
| 1733 | + orig->len == foc->len && |
---|
| 1734 | + !memcmp(orig->val, foc->val, foc->len)) |
---|
| 1735 | + return true; |
---|
| 1736 | + return false; |
---|
| 1737 | +} |
---|
| 1738 | + |
---|
| 1739 | +static inline |
---|
| 1740 | +int tcp_fastopen_context_len(const struct tcp_fastopen_context *ctx) |
---|
| 1741 | +{ |
---|
| 1742 | + return ctx->num; |
---|
| 1743 | +} |
---|
1640 | 1744 | |
---|
1641 | 1745 | /* Latencies incurred by various limits for a sender. They are |
---|
1642 | 1746 | * chronograph-like stats that are mutually exclusive. |
---|
.. | .. |
---|
1705 | 1809 | return skb_queue_is_last(&sk->sk_write_queue, skb); |
---|
1706 | 1810 | } |
---|
1707 | 1811 | |
---|
| 1812 | +/** |
---|
| 1813 | + * tcp_write_queue_empty - test if any payload (or FIN) is available in write queue |
---|
| 1814 | + * @sk: socket |
---|
| 1815 | + * |
---|
| 1816 | + * Since the write queue can have a temporary empty skb in it, |
---|
| 1817 | + * we must not use "return skb_queue_empty(&sk->sk_write_queue)" |
---|
| 1818 | + */ |
---|
1708 | 1819 | static inline bool tcp_write_queue_empty(const struct sock *sk) |
---|
1709 | 1820 | { |
---|
1710 | | - return skb_queue_empty(&sk->sk_write_queue); |
---|
| 1821 | + const struct tcp_sock *tp = tcp_sk(sk); |
---|
| 1822 | + |
---|
| 1823 | + return tp->write_seq == tp->snd_nxt; |
---|
1711 | 1824 | } |
---|
1712 | 1825 | |
---|
1713 | 1826 | static inline bool tcp_rtx_queue_empty(const struct sock *sk) |
---|
.. | .. |
---|
1720 | 1833 | return tcp_rtx_queue_empty(sk) && tcp_write_queue_empty(sk); |
---|
1721 | 1834 | } |
---|
1722 | 1835 | |
---|
1723 | | -static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked) |
---|
1724 | | -{ |
---|
1725 | | - if (tcp_write_queue_empty(sk)) |
---|
1726 | | - tcp_chrono_stop(sk, TCP_CHRONO_BUSY); |
---|
1727 | | -} |
---|
1728 | | - |
---|
1729 | | -static inline void __tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb) |
---|
1730 | | -{ |
---|
1731 | | - __skb_queue_tail(&sk->sk_write_queue, skb); |
---|
1732 | | -} |
---|
1733 | | - |
---|
1734 | 1836 | static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb) |
---|
1735 | 1837 | { |
---|
1736 | | - __tcp_add_write_queue_tail(sk, skb); |
---|
| 1838 | + __skb_queue_tail(&sk->sk_write_queue, skb); |
---|
1737 | 1839 | |
---|
1738 | 1840 | /* Queue it, remembering where we must start sending. */ |
---|
1739 | 1841 | if (sk->sk_write_queue.next == skb) |
---|
.. | .. |
---|
1855 | 1957 | struct seq_net_private p; |
---|
1856 | 1958 | enum tcp_seq_states state; |
---|
1857 | 1959 | struct sock *syn_wait_sk; |
---|
| 1960 | + struct tcp_seq_afinfo *bpf_seq_afinfo; |
---|
1858 | 1961 | int bucket, offset, sbucket, num; |
---|
1859 | 1962 | loff_t last_pos; |
---|
1860 | 1963 | }; |
---|
.. | .. |
---|
1867 | 1970 | struct sk_buff *tcp_gso_segment(struct sk_buff *skb, |
---|
1868 | 1971 | netdev_features_t features); |
---|
1869 | 1972 | struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb); |
---|
| 1973 | +INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff)); |
---|
| 1974 | +INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)); |
---|
| 1975 | +INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff)); |
---|
| 1976 | +INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb)); |
---|
1870 | 1977 | int tcp_gro_complete(struct sk_buff *skb); |
---|
1871 | 1978 | |
---|
1872 | 1979 | void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr); |
---|
.. | .. |
---|
1874 | 1981 | static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) |
---|
1875 | 1982 | { |
---|
1876 | 1983 | struct net *net = sock_net((struct sock *)tp); |
---|
1877 | | - return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat; |
---|
| 1984 | + return tp->notsent_lowat ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat); |
---|
1878 | 1985 | } |
---|
1879 | 1986 | |
---|
1880 | | -static inline bool tcp_stream_memory_free(const struct sock *sk) |
---|
| 1987 | +/* @wake is one when sk_stream_write_space() calls us. |
---|
| 1988 | + * This sends EPOLLOUT only if notsent_bytes is half the limit. |
---|
| 1989 | + * This mimics the strategy used in sock_def_write_space(). |
---|
| 1990 | + */ |
---|
| 1991 | +static inline bool tcp_stream_memory_free(const struct sock *sk, int wake) |
---|
1881 | 1992 | { |
---|
1882 | 1993 | const struct tcp_sock *tp = tcp_sk(sk); |
---|
1883 | | - u32 notsent_bytes = READ_ONCE(tp->write_seq) - tp->snd_nxt; |
---|
| 1994 | + u32 notsent_bytes = READ_ONCE(tp->write_seq) - |
---|
| 1995 | + READ_ONCE(tp->snd_nxt); |
---|
1884 | 1996 | |
---|
1885 | | - return notsent_bytes < tcp_notsent_lowat(tp); |
---|
| 1997 | + return (notsent_bytes << wake) < tcp_notsent_lowat(tp); |
---|
1886 | 1998 | } |
---|
1887 | 1999 | |
---|
1888 | 2000 | #ifdef CONFIG_PROC_FS |
---|
.. | .. |
---|
1906 | 2018 | const struct sk_buff *skb); |
---|
1907 | 2019 | int (*md5_parse)(struct sock *sk, |
---|
1908 | 2020 | int optname, |
---|
1909 | | - char __user *optval, |
---|
| 2021 | + sockptr_t optval, |
---|
1910 | 2022 | int optlen); |
---|
1911 | 2023 | #endif |
---|
1912 | 2024 | }; |
---|
.. | .. |
---|
1935 | 2047 | int (*send_synack)(const struct sock *sk, struct dst_entry *dst, |
---|
1936 | 2048 | struct flowi *fl, struct request_sock *req, |
---|
1937 | 2049 | struct tcp_fastopen_cookie *foc, |
---|
1938 | | - enum tcp_synack_type synack_type); |
---|
| 2050 | + enum tcp_synack_type synack_type, |
---|
| 2051 | + struct sk_buff *syn_skb); |
---|
1939 | 2052 | }; |
---|
| 2053 | + |
---|
| 2054 | +extern const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops; |
---|
| 2055 | +#if IS_ENABLED(CONFIG_IPV6) |
---|
| 2056 | +extern const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops; |
---|
| 2057 | +#endif |
---|
1940 | 2058 | |
---|
1941 | 2059 | #ifdef CONFIG_SYN_COOKIES |
---|
1942 | 2060 | static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops, |
---|
.. | .. |
---|
1977 | 2095 | { |
---|
1978 | 2096 | const struct sk_buff *skb = tcp_rtx_queue_head(sk); |
---|
1979 | 2097 | u32 rto = inet_csk(sk)->icsk_rto; |
---|
1980 | | - u64 rto_time_stamp_us = skb->skb_mstamp + jiffies_to_usecs(rto); |
---|
| 2098 | + u64 rto_time_stamp_us = tcp_skb_timestamp_us(skb) + jiffies_to_usecs(rto); |
---|
1981 | 2099 | |
---|
1982 | 2100 | return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp; |
---|
1983 | 2101 | } |
---|
.. | .. |
---|
2077 | 2195 | #define TCP_ULP_MAX 128 |
---|
2078 | 2196 | #define TCP_ULP_BUF_MAX (TCP_ULP_NAME_MAX*TCP_ULP_MAX) |
---|
2079 | 2197 | |
---|
2080 | | -enum { |
---|
2081 | | - TCP_ULP_TLS, |
---|
2082 | | - TCP_ULP_BPF, |
---|
2083 | | -}; |
---|
2084 | | - |
---|
2085 | 2198 | struct tcp_ulp_ops { |
---|
2086 | 2199 | struct list_head list; |
---|
2087 | 2200 | |
---|
2088 | 2201 | /* initialize ulp */ |
---|
2089 | 2202 | int (*init)(struct sock *sk); |
---|
| 2203 | + /* update ulp */ |
---|
| 2204 | + void (*update)(struct sock *sk, struct proto *p, |
---|
| 2205 | + void (*write_space)(struct sock *sk)); |
---|
2090 | 2206 | /* cleanup ulp */ |
---|
2091 | 2207 | void (*release)(struct sock *sk); |
---|
| 2208 | + /* diagnostic */ |
---|
| 2209 | + int (*get_info)(const struct sock *sk, struct sk_buff *skb); |
---|
| 2210 | + size_t (*get_info_size)(const struct sock *sk); |
---|
| 2211 | + /* clone ulp */ |
---|
| 2212 | + void (*clone)(const struct request_sock *req, struct sock *newsk, |
---|
| 2213 | + const gfp_t priority); |
---|
2092 | 2214 | |
---|
2093 | | - int uid; |
---|
2094 | 2215 | char name[TCP_ULP_NAME_MAX]; |
---|
2095 | | - bool user_visible; |
---|
2096 | 2216 | struct module *owner; |
---|
2097 | 2217 | }; |
---|
2098 | 2218 | int tcp_register_ulp(struct tcp_ulp_ops *type); |
---|
2099 | 2219 | void tcp_unregister_ulp(struct tcp_ulp_ops *type); |
---|
2100 | 2220 | int tcp_set_ulp(struct sock *sk, const char *name); |
---|
2101 | | -int tcp_set_ulp_id(struct sock *sk, const int ulp); |
---|
2102 | 2221 | void tcp_get_available_ulp(char *buf, size_t len); |
---|
2103 | 2222 | void tcp_cleanup_ulp(struct sock *sk); |
---|
| 2223 | +void tcp_update_ulp(struct sock *sk, struct proto *p, |
---|
| 2224 | + void (*write_space)(struct sock *sk)); |
---|
2104 | 2225 | |
---|
2105 | 2226 | #define MODULE_ALIAS_TCP_ULP(name) \ |
---|
2106 | 2227 | __MODULE_INFO(alias, alias_userspace, name); \ |
---|
2107 | 2228 | __MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name) |
---|
| 2229 | + |
---|
| 2230 | +struct sk_msg; |
---|
| 2231 | +struct sk_psock; |
---|
| 2232 | + |
---|
| 2233 | +#ifdef CONFIG_BPF_STREAM_PARSER |
---|
| 2234 | +struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock); |
---|
| 2235 | +void tcp_bpf_clone(const struct sock *sk, struct sock *newsk); |
---|
| 2236 | +#else |
---|
| 2237 | +static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk) |
---|
| 2238 | +{ |
---|
| 2239 | +} |
---|
| 2240 | +#endif /* CONFIG_BPF_STREAM_PARSER */ |
---|
| 2241 | + |
---|
| 2242 | +#ifdef CONFIG_NET_SOCK_MSG |
---|
| 2243 | +int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes, |
---|
| 2244 | + int flags); |
---|
| 2245 | +int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, |
---|
| 2246 | + struct msghdr *msg, int len, int flags); |
---|
| 2247 | +#endif /* CONFIG_NET_SOCK_MSG */ |
---|
| 2248 | + |
---|
| 2249 | +#ifdef CONFIG_CGROUP_BPF |
---|
| 2250 | +static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops, |
---|
| 2251 | + struct sk_buff *skb, |
---|
| 2252 | + unsigned int end_offset) |
---|
| 2253 | +{ |
---|
| 2254 | + skops->skb = skb; |
---|
| 2255 | + skops->skb_data_end = skb->data + end_offset; |
---|
| 2256 | +} |
---|
| 2257 | +#else |
---|
| 2258 | +static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops, |
---|
| 2259 | + struct sk_buff *skb, |
---|
| 2260 | + unsigned int end_offset) |
---|
| 2261 | +{ |
---|
| 2262 | +} |
---|
| 2263 | +#endif |
---|
2108 | 2264 | |
---|
2109 | 2265 | /* Call BPF_SOCK_OPS program that returns an int. If the return value |
---|
2110 | 2266 | * is < 0, then the BPF op failed (for example if the loaded BPF |
---|
.. | .. |
---|
2197 | 2353 | return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN, 0, NULL) == 1); |
---|
2198 | 2354 | } |
---|
2199 | 2355 | |
---|
| 2356 | +static inline void tcp_bpf_rtt(struct sock *sk) |
---|
| 2357 | +{ |
---|
| 2358 | + if (BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_RTT_CB_FLAG)) |
---|
| 2359 | + tcp_call_bpf(sk, BPF_SOCK_OPS_RTT_CB, 0, NULL); |
---|
| 2360 | +} |
---|
| 2361 | + |
---|
2200 | 2362 | #if IS_ENABLED(CONFIG_SMC) |
---|
2201 | 2363 | extern struct static_key_false tcp_have_smc; |
---|
2202 | 2364 | #endif |
---|
.. | .. |
---|
2205 | 2367 | void clean_acked_data_enable(struct inet_connection_sock *icsk, |
---|
2206 | 2368 | void (*cad)(struct sock *sk, u32 ack_seq)); |
---|
2207 | 2369 | void clean_acked_data_disable(struct inet_connection_sock *icsk); |
---|
2208 | | - |
---|
| 2370 | +void clean_acked_data_flush(void); |
---|
2209 | 2371 | #endif |
---|
2210 | 2372 | |
---|
| 2373 | +DECLARE_STATIC_KEY_FALSE(tcp_tx_delay_enabled); |
---|
| 2374 | +static inline void tcp_add_tx_delay(struct sk_buff *skb, |
---|
| 2375 | + const struct tcp_sock *tp) |
---|
| 2376 | +{ |
---|
| 2377 | + if (static_branch_unlikely(&tcp_tx_delay_enabled)) |
---|
| 2378 | + skb->skb_mstamp_ns += (u64)tp->tcp_tx_delay * NSEC_PER_USEC; |
---|
| 2379 | +} |
---|
| 2380 | + |
---|
| 2381 | +/* Compute Earliest Departure Time for some control packets |
---|
| 2382 | + * like ACK or RST for TIME_WAIT or non ESTABLISHED sockets. |
---|
| 2383 | + */ |
---|
| 2384 | +static inline u64 tcp_transmit_time(const struct sock *sk) |
---|
| 2385 | +{ |
---|
| 2386 | + if (static_branch_unlikely(&tcp_tx_delay_enabled)) { |
---|
| 2387 | + u32 delay = (sk->sk_state == TCP_TIME_WAIT) ? |
---|
| 2388 | + tcp_twsk(sk)->tw_tx_delay : tcp_sk(sk)->tcp_tx_delay; |
---|
| 2389 | + |
---|
| 2390 | + return tcp_clock_ns() + (u64)delay * NSEC_PER_USEC; |
---|
| 2391 | + } |
---|
| 2392 | + return 0; |
---|
| 2393 | +} |
---|
| 2394 | + |
---|
2211 | 2395 | #endif /* _TCP_H */ |
---|