.. | .. |
---|
| 1 | +/* SPDX-License-Identifier: GPL-2.0-or-later */ |
---|
1 | 2 | /* |
---|
2 | 3 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
---|
3 | 4 | * operating system. INET is implemented using the BSD Socket |
---|
.. | .. |
---|
9 | 10 | * |
---|
10 | 11 | * Authors: Ross Biro |
---|
11 | 12 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> |
---|
12 | | - * |
---|
13 | | - * This program is free software; you can redistribute it and/or |
---|
14 | | - * modify it under the terms of the GNU General Public License |
---|
15 | | - * as published by the Free Software Foundation; either version |
---|
16 | | - * 2 of the License, or (at your option) any later version. |
---|
17 | 13 | */ |
---|
18 | 14 | #ifndef _TCP_H |
---|
19 | 15 | #define _TCP_H |
---|
.. | .. |
---|
27 | 23 | #include <linux/cache.h> |
---|
28 | 24 | #include <linux/percpu.h> |
---|
29 | 25 | #include <linux/skbuff.h> |
---|
30 | | -#include <linux/cryptohash.h> |
---|
31 | 26 | #include <linux/kref.h> |
---|
32 | 27 | #include <linux/ktime.h> |
---|
| 28 | +#include <linux/indirect_call_wrapper.h> |
---|
33 | 29 | |
---|
34 | 30 | #include <net/inet_connection_sock.h> |
---|
35 | 31 | #include <net/inet_timewait_sock.h> |
---|
.. | .. |
---|
43 | 39 | #include <net/tcp_states.h> |
---|
44 | 40 | #include <net/inet_ecn.h> |
---|
45 | 41 | #include <net/dst.h> |
---|
| 42 | +#include <net/mptcp.h> |
---|
46 | 43 | |
---|
47 | 44 | #include <linux/seq_file.h> |
---|
48 | 45 | #include <linux/memcontrol.h> |
---|
49 | 46 | #include <linux/bpf-cgroup.h> |
---|
| 47 | +#include <linux/siphash.h> |
---|
50 | 48 | |
---|
51 | 49 | extern struct inet_hashinfo tcp_hashinfo; |
---|
52 | 50 | |
---|
.. | .. |
---|
67 | 65 | /* Minimal accepted MSS. It is (60+60+8) - (20+20). */ |
---|
68 | 66 | #define TCP_MIN_MSS 88U |
---|
69 | 67 | |
---|
70 | | -/* The least MTU to use for probing */ |
---|
| 68 | +/* The initial MTU to use for probing */ |
---|
71 | 69 | #define TCP_BASE_MSS 1024 |
---|
72 | 70 | |
---|
73 | 71 | /* probing interval, default to 10 minutes as per RFC4821 */ |
---|
.. | .. |
---|
128 | 126 | * to combine FIN-WAIT-2 timeout with |
---|
129 | 127 | * TIME-WAIT timer. |
---|
130 | 128 | */ |
---|
| 129 | +#define TCP_FIN_TIMEOUT_MAX (120 * HZ) /* max TCP_LINGER2 value (two minutes) */ |
---|
131 | 130 | |
---|
132 | 131 | #define TCP_DELACK_MAX ((unsigned)(HZ/5)) /* maximal time to delay before sending an ACK */ |
---|
133 | 132 | #if HZ >= 100 |
---|
.. | .. |
---|
185 | 184 | #define TCPOPT_SACK 5 /* SACK Block */ |
---|
186 | 185 | #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ |
---|
187 | 186 | #define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */ |
---|
| 187 | +#define TCPOPT_MPTCP 30 /* Multipath TCP (RFC6824) */ |
---|
188 | 188 | #define TCPOPT_FASTOPEN 34 /* Fast open (RFC7413) */ |
---|
189 | 189 | #define TCPOPT_EXP 254 /* Experimental */ |
---|
190 | 190 | /* Magic number to be after the option value for sharing TCP |
---|
.. | .. |
---|
315 | 315 | |
---|
316 | 316 | void tcp_tasklet_init(void); |
---|
317 | 317 | |
---|
318 | | -void tcp_v4_err(struct sk_buff *skb, u32); |
---|
| 318 | +int tcp_v4_err(struct sk_buff *skb, u32); |
---|
319 | 319 | |
---|
320 | 320 | void tcp_shutdown(struct sock *sk, int how); |
---|
321 | 321 | |
---|
.. | .. |
---|
331 | 331 | size_t size, int flags); |
---|
332 | 332 | ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, |
---|
333 | 333 | size_t size, int flags); |
---|
| 334 | +int tcp_send_mss(struct sock *sk, int *size_goal, int flags); |
---|
| 335 | +void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle, |
---|
| 336 | + int size_goal); |
---|
334 | 337 | void tcp_release_cb(struct sock *sk); |
---|
335 | 338 | void tcp_wfree(struct sk_buff *skb); |
---|
336 | 339 | void tcp_write_timer_handler(struct sock *sk); |
---|
.. | .. |
---|
345 | 348 | struct pipe_inode_info *pipe, size_t len, |
---|
346 | 349 | unsigned int flags); |
---|
347 | 350 | |
---|
348 | | -void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks); |
---|
349 | | -static inline void tcp_dec_quickack_mode(struct sock *sk, |
---|
350 | | - const unsigned int pkts) |
---|
| 351 | +static inline void tcp_dec_quickack_mode(struct sock *sk) |
---|
351 | 352 | { |
---|
352 | 353 | struct inet_connection_sock *icsk = inet_csk(sk); |
---|
353 | 354 | |
---|
354 | 355 | if (icsk->icsk_ack.quick) { |
---|
| 356 | + /* How many ACKs S/ACKing new data have we sent? */ |
---|
| 357 | + const unsigned int pkts = inet_csk_ack_scheduled(sk) ? 1 : 0; |
---|
| 358 | + |
---|
355 | 359 | if (pkts >= icsk->icsk_ack.quick) { |
---|
356 | 360 | icsk->icsk_ack.quick = 0; |
---|
357 | 361 | /* Leaving quickack mode we deflate ATO. */ |
---|
.. | .. |
---|
389 | 393 | void tcp_init_metrics(struct sock *sk); |
---|
390 | 394 | void tcp_metrics_init(void); |
---|
391 | 395 | bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst); |
---|
| 396 | +void __tcp_close(struct sock *sk, long timeout); |
---|
392 | 397 | void tcp_close(struct sock *sk, long timeout); |
---|
393 | 398 | void tcp_init_sock(struct sock *sk); |
---|
394 | | -void tcp_init_transfer(struct sock *sk, int bpf_op); |
---|
| 399 | +void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb); |
---|
395 | 400 | __poll_t tcp_poll(struct file *file, struct socket *sock, |
---|
396 | 401 | struct poll_table_struct *wait); |
---|
397 | 402 | int tcp_getsockopt(struct sock *sk, int level, int optname, |
---|
398 | 403 | char __user *optval, int __user *optlen); |
---|
399 | | -int tcp_setsockopt(struct sock *sk, int level, int optname, |
---|
400 | | - char __user *optval, unsigned int optlen); |
---|
401 | | -int compat_tcp_getsockopt(struct sock *sk, int level, int optname, |
---|
402 | | - char __user *optval, int __user *optlen); |
---|
403 | | -int compat_tcp_setsockopt(struct sock *sk, int level, int optname, |
---|
404 | | - char __user *optval, unsigned int optlen); |
---|
| 404 | +int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, |
---|
| 405 | + unsigned int optlen); |
---|
405 | 406 | void tcp_set_keepalive(struct sock *sk, int val); |
---|
406 | 407 | void tcp_syn_ack_timeout(const struct request_sock *req); |
---|
407 | 408 | int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, |
---|
408 | 409 | int flags, int *addr_len); |
---|
409 | 410 | int tcp_set_rcvlowat(struct sock *sk, int val); |
---|
410 | 411 | void tcp_data_ready(struct sock *sk); |
---|
| 412 | +#ifdef CONFIG_MMU |
---|
411 | 413 | int tcp_mmap(struct file *file, struct socket *sock, |
---|
412 | 414 | struct vm_area_struct *vma); |
---|
| 415 | +#endif |
---|
413 | 416 | void tcp_parse_options(const struct net *net, const struct sk_buff *skb, |
---|
414 | 417 | struct tcp_options_received *opt_rx, |
---|
415 | 418 | int estab, struct tcp_fastopen_cookie *foc); |
---|
416 | 419 | const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); |
---|
417 | 420 | |
---|
| 421 | +/* |
---|
| 422 | + * BPF SKB-less helpers |
---|
| 423 | + */ |
---|
| 424 | +u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph, |
---|
| 425 | + struct tcphdr *th, u32 *cookie); |
---|
| 426 | +u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, |
---|
| 427 | + struct tcphdr *th, u32 *cookie); |
---|
| 428 | +u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops, |
---|
| 429 | + const struct tcp_request_sock_ops *af_ops, |
---|
| 430 | + struct sock *sk, struct tcphdr *th); |
---|
418 | 431 | /* |
---|
419 | 432 | * TCP v4 functions exported for the inet6 API |
---|
420 | 433 | */ |
---|
.. | .. |
---|
422 | 435 | void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb); |
---|
423 | 436 | void tcp_v4_mtu_reduced(struct sock *sk); |
---|
424 | 437 | void tcp_req_err(struct sock *sk, u32 seq, bool abort); |
---|
| 438 | +void tcp_ld_RTO_revert(struct sock *sk, u32 seq); |
---|
425 | 439 | int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb); |
---|
426 | 440 | struct sock *tcp_create_openreq_child(const struct sock *sk, |
---|
427 | 441 | struct request_sock *req, |
---|
.. | .. |
---|
443 | 457 | struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, |
---|
444 | 458 | struct request_sock *req, |
---|
445 | 459 | struct tcp_fastopen_cookie *foc, |
---|
446 | | - enum tcp_synack_type synack_type); |
---|
| 460 | + enum tcp_synack_type synack_type, |
---|
| 461 | + struct sk_buff *syn_skb); |
---|
447 | 462 | int tcp_disconnect(struct sock *sk, int flags); |
---|
448 | 463 | |
---|
449 | 464 | void tcp_finish_connect(struct sock *sk, struct sk_buff *skb); |
---|
.. | .. |
---|
457 | 472 | int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, |
---|
458 | 473 | u32 cookie); |
---|
459 | 474 | struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); |
---|
| 475 | +struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, |
---|
| 476 | + const struct tcp_request_sock_ops *af_ops, |
---|
| 477 | + struct sock *sk, struct sk_buff *skb); |
---|
460 | 478 | #ifdef CONFIG_SYN_COOKIES |
---|
461 | 479 | |
---|
462 | 480 | /* Syncookies use a monotonic timer which increments every 60 seconds. |
---|
.. | .. |
---|
539 | 557 | u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th, |
---|
540 | 558 | u16 *mssp); |
---|
541 | 559 | __u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss); |
---|
542 | | -u64 cookie_init_timestamp(struct request_sock *req); |
---|
| 560 | +u64 cookie_init_timestamp(struct request_sock *req, u64 now); |
---|
543 | 561 | bool cookie_timestamp_decode(const struct net *net, |
---|
544 | 562 | struct tcp_options_received *opt); |
---|
545 | 563 | bool cookie_ecn_ok(const struct tcp_options_received *opt, |
---|
.. | .. |
---|
594 | 612 | void tcp_reset(struct sock *sk); |
---|
595 | 613 | void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb); |
---|
596 | 614 | void tcp_fin(struct sock *sk); |
---|
| 615 | +void tcp_check_space(struct sock *sk); |
---|
597 | 616 | |
---|
598 | 617 | /* tcp_timer.c */ |
---|
599 | 618 | void tcp_init_xmit_timers(struct sock *); |
---|
.. | .. |
---|
610 | 629 | |
---|
611 | 630 | unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu); |
---|
612 | 631 | unsigned int tcp_current_mss(struct sock *sk); |
---|
| 632 | +u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when); |
---|
613 | 633 | |
---|
614 | 634 | /* Bound MSS / TSO packet size with the half of the window */ |
---|
615 | 635 | static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) |
---|
.. | .. |
---|
646 | 666 | int tcp_mtu_to_mss(struct sock *sk, int pmtu); |
---|
647 | 667 | int tcp_mss_to_mtu(struct sock *sk, int mss); |
---|
648 | 668 | void tcp_mtup_init(struct sock *sk); |
---|
649 | | -void tcp_init_buffer_space(struct sock *sk); |
---|
650 | 669 | |
---|
651 | 670 | static inline void tcp_bound_rto(const struct sock *sk) |
---|
652 | 671 | { |
---|
.. | .. |
---|
661 | 680 | |
---|
662 | 681 | static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) |
---|
663 | 682 | { |
---|
| 683 | + /* mptcp hooks are only on the slow path */ |
---|
| 684 | + if (sk_is_mptcp((struct sock *)tp)) |
---|
| 685 | + return; |
---|
| 686 | + |
---|
664 | 687 | tp->pred_flags = htonl((tp->tcp_header_len << 26) | |
---|
665 | 688 | ntohl(TCP_FLAG_ACK) | |
---|
666 | 689 | snd_wnd); |
---|
.. | .. |
---|
686 | 709 | static inline u32 tcp_rto_min(struct sock *sk) |
---|
687 | 710 | { |
---|
688 | 711 | const struct dst_entry *dst = __sk_dst_get(sk); |
---|
689 | | - u32 rto_min = TCP_RTO_MIN; |
---|
| 712 | + u32 rto_min = inet_csk(sk)->icsk_rto_min; |
---|
690 | 713 | |
---|
691 | 714 | if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) |
---|
692 | 715 | rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN); |
---|
.. | .. |
---|
745 | 768 | |
---|
746 | 769 | static inline u64 tcp_clock_ns(void) |
---|
747 | 770 | { |
---|
748 | | - return local_clock(); |
---|
| 771 | + return ktime_get_ns(); |
---|
749 | 772 | } |
---|
750 | 773 | |
---|
751 | 774 | static inline u64 tcp_clock_us(void) |
---|
.. | .. |
---|
759 | 782 | return div_u64(tp->tcp_mstamp, USEC_PER_SEC / TCP_TS_HZ); |
---|
760 | 783 | } |
---|
761 | 784 | |
---|
| 785 | +/* Convert a nsec timestamp into TCP TSval timestamp (ms based currently) */ |
---|
| 786 | +static inline u32 tcp_ns_to_ts(u64 ns) |
---|
| 787 | +{ |
---|
| 788 | + return div_u64(ns, NSEC_PER_SEC / TCP_TS_HZ); |
---|
| 789 | +} |
---|
| 790 | + |
---|
762 | 791 | /* Could use tcp_clock_us() / 1000, but this version uses a single divide */ |
---|
763 | 792 | static inline u32 tcp_time_stamp_raw(void) |
---|
764 | 793 | { |
---|
765 | | - return div_u64(tcp_clock_ns(), NSEC_PER_SEC / TCP_TS_HZ); |
---|
| 794 | + return tcp_ns_to_ts(tcp_clock_ns()); |
---|
766 | 795 | } |
---|
767 | 796 | |
---|
768 | | - |
---|
769 | | -/* Refresh 1us clock of a TCP socket, |
---|
770 | | - * ensuring monotically increasing values. |
---|
771 | | - */ |
---|
772 | | -static inline void tcp_mstamp_refresh(struct tcp_sock *tp) |
---|
773 | | -{ |
---|
774 | | - u64 val = tcp_clock_us(); |
---|
775 | | - |
---|
776 | | - if (val > tp->tcp_mstamp) |
---|
777 | | - tp->tcp_mstamp = val; |
---|
778 | | -} |
---|
| 797 | +void tcp_mstamp_refresh(struct tcp_sock *tp); |
---|
779 | 798 | |
---|
780 | 799 | static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0) |
---|
781 | 800 | { |
---|
.. | .. |
---|
784 | 803 | |
---|
785 | 804 | static inline u32 tcp_skb_timestamp(const struct sk_buff *skb) |
---|
786 | 805 | { |
---|
787 | | - return div_u64(skb->skb_mstamp, USEC_PER_SEC / TCP_TS_HZ); |
---|
| 806 | + return tcp_ns_to_ts(skb->skb_mstamp_ns); |
---|
| 807 | +} |
---|
| 808 | + |
---|
| 809 | +/* provide the departure time in us unit */ |
---|
| 810 | +static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb) |
---|
| 811 | +{ |
---|
| 812 | + return div_u64(skb->skb_mstamp_ns, NSEC_PER_USEC); |
---|
788 | 813 | } |
---|
789 | 814 | |
---|
790 | 815 | |
---|
.. | .. |
---|
830 | 855 | #define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */ |
---|
831 | 856 | #define TCPCB_LOST 0x04 /* SKB is lost */ |
---|
832 | 857 | #define TCPCB_TAGBITS 0x07 /* All tag bits */ |
---|
833 | | -#define TCPCB_REPAIRED 0x10 /* SKB repaired (no skb_mstamp) */ |
---|
| 858 | +#define TCPCB_REPAIRED 0x10 /* SKB repaired (no skb_mstamp_ns) */ |
---|
834 | 859 | #define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */ |
---|
835 | 860 | #define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS| \ |
---|
836 | 861 | TCPCB_REPAIRED) |
---|
.. | .. |
---|
875 | 900 | TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb); |
---|
876 | 901 | } |
---|
877 | 902 | |
---|
| 903 | +static inline bool tcp_skb_bpf_ingress(const struct sk_buff *skb) |
---|
| 904 | +{ |
---|
| 905 | + return TCP_SKB_CB(skb)->bpf.flags & BPF_F_INGRESS; |
---|
| 906 | +} |
---|
| 907 | + |
---|
| 908 | +static inline struct sock *tcp_skb_bpf_redirect_fetch(struct sk_buff *skb) |
---|
| 909 | +{ |
---|
| 910 | + return TCP_SKB_CB(skb)->bpf.sk_redir; |
---|
| 911 | +} |
---|
| 912 | + |
---|
| 913 | +static inline void tcp_skb_bpf_redirect_clear(struct sk_buff *skb) |
---|
| 914 | +{ |
---|
| 915 | + TCP_SKB_CB(skb)->bpf.sk_redir = NULL; |
---|
| 916 | +} |
---|
| 917 | + |
---|
| 918 | +extern const struct inet_connection_sock_af_ops ipv4_specific; |
---|
| 919 | + |
---|
878 | 920 | #if IS_ENABLED(CONFIG_IPV6) |
---|
879 | 921 | /* This is the variant of inet6_iif() that must be used by TCP, |
---|
880 | 922 | * as TCP moves IP6CB into a different location in skb->cb[] |
---|
.. | .. |
---|
900 | 942 | #endif |
---|
901 | 943 | return 0; |
---|
902 | 944 | } |
---|
903 | | -#endif |
---|
904 | 945 | |
---|
905 | | -static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb) |
---|
906 | | -{ |
---|
907 | | -#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) |
---|
908 | | - if (!net->ipv4.sysctl_tcp_l3mdev_accept && |
---|
909 | | - skb && ipv4_l3mdev_skb(IPCB(skb)->flags)) |
---|
910 | | - return true; |
---|
| 946 | +extern const struct inet_connection_sock_af_ops ipv6_specific; |
---|
| 947 | + |
---|
| 948 | +INDIRECT_CALLABLE_DECLARE(void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)); |
---|
| 949 | +INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *skb)); |
---|
| 950 | +void tcp_v6_early_demux(struct sk_buff *skb); |
---|
| 951 | + |
---|
911 | 952 | #endif |
---|
912 | | - return false; |
---|
913 | | -} |
---|
914 | 953 | |
---|
915 | 954 | /* TCP_SKB_CB reference means this can not be used from early demux */ |
---|
916 | 955 | static inline int tcp_v4_sdif(struct sk_buff *skb) |
---|
.. | .. |
---|
951 | 990 | return likely(!TCP_SKB_CB(skb)->eor); |
---|
952 | 991 | } |
---|
953 | 992 | |
---|
| 993 | +static inline bool tcp_skb_can_collapse(const struct sk_buff *to, |
---|
| 994 | + const struct sk_buff *from) |
---|
| 995 | +{ |
---|
| 996 | + return likely(tcp_skb_can_collapse_to(to) && |
---|
| 997 | + mptcp_skb_can_collapse(to, from)); |
---|
| 998 | +} |
---|
| 999 | + |
---|
954 | 1000 | /* Events passed to congestion control interface */ |
---|
955 | 1001 | enum tcp_ca_event { |
---|
956 | 1002 | CA_EVENT_TX_START, /* first transmit when no packets in flight */ |
---|
.. | .. |
---|
981 | 1027 | #define TCP_CONG_NON_RESTRICTED 0x1 |
---|
982 | 1028 | /* Requires ECN/ECT set on all packets */ |
---|
983 | 1029 | #define TCP_CONG_NEEDS_ECN 0x2 |
---|
| 1030 | +#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN) |
---|
984 | 1031 | |
---|
985 | 1032 | union tcp_cc_info; |
---|
986 | 1033 | |
---|
.. | .. |
---|
1066 | 1113 | void tcp_get_allowed_congestion_control(char *buf, size_t len); |
---|
1067 | 1114 | int tcp_set_allowed_congestion_control(char *allowed); |
---|
1068 | 1115 | int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, |
---|
1069 | | - bool reinit, bool cap_net_admin); |
---|
| 1116 | + bool cap_net_admin); |
---|
1070 | 1117 | u32 tcp_slow_start(struct tcp_sock *tp, u32 acked); |
---|
1071 | 1118 | void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked); |
---|
1072 | 1119 | |
---|
.. | .. |
---|
1075 | 1122 | void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked); |
---|
1076 | 1123 | extern struct tcp_congestion_ops tcp_reno; |
---|
1077 | 1124 | |
---|
| 1125 | +struct tcp_congestion_ops *tcp_ca_find(const char *name); |
---|
1078 | 1126 | struct tcp_congestion_ops *tcp_ca_find_key(u32 key); |
---|
1079 | 1127 | u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca); |
---|
1080 | 1128 | #ifdef CONFIG_INET |
---|
.. | .. |
---|
1127 | 1175 | */ |
---|
1128 | 1176 | static inline int tcp_is_sack(const struct tcp_sock *tp) |
---|
1129 | 1177 | { |
---|
1130 | | - return tp->rx_opt.sack_ok; |
---|
| 1178 | + return likely(tp->rx_opt.sack_ok); |
---|
1131 | 1179 | } |
---|
1132 | 1180 | |
---|
1133 | 1181 | static inline bool tcp_is_reno(const struct tcp_sock *tp) |
---|
.. | .. |
---|
1230 | 1278 | { |
---|
1231 | 1279 | const struct tcp_sock *tp = tcp_sk(sk); |
---|
1232 | 1280 | |
---|
| 1281 | + if (tp->is_cwnd_limited) |
---|
| 1282 | + return true; |
---|
| 1283 | + |
---|
1233 | 1284 | /* If in slow start, ensure cwnd grows to twice what was ACKed. */ |
---|
1234 | 1285 | if (tcp_in_slow_start(tp)) |
---|
1235 | 1286 | return tp->snd_cwnd < 2 * tp->max_packets_out; |
---|
1236 | 1287 | |
---|
1237 | | - return tp->is_cwnd_limited; |
---|
| 1288 | + return false; |
---|
1238 | 1289 | } |
---|
1239 | 1290 | |
---|
1240 | 1291 | /* BBR congestion control needs pacing. |
---|
.. | .. |
---|
1248 | 1299 | return smp_load_acquire(&sk->sk_pacing_status) == SK_PACING_NEEDED; |
---|
1249 | 1300 | } |
---|
1250 | 1301 | |
---|
| 1302 | +/* Estimates in how many jiffies next packet for this flow can be sent. |
---|
| 1303 | + * Scheduling a retransmit timer too early would be silly. |
---|
| 1304 | + */ |
---|
| 1305 | +static inline unsigned long tcp_pacing_delay(const struct sock *sk) |
---|
| 1306 | +{ |
---|
| 1307 | + s64 delay = tcp_sk(sk)->tcp_wstamp_ns - tcp_sk(sk)->tcp_clock_cache; |
---|
| 1308 | + |
---|
| 1309 | + return delay > 0 ? nsecs_to_jiffies(delay) : 0; |
---|
| 1310 | +} |
---|
| 1311 | + |
---|
| 1312 | +static inline void tcp_reset_xmit_timer(struct sock *sk, |
---|
| 1313 | + const int what, |
---|
| 1314 | + unsigned long when, |
---|
| 1315 | + const unsigned long max_when) |
---|
| 1316 | +{ |
---|
| 1317 | + inet_csk_reset_xmit_timer(sk, what, when + tcp_pacing_delay(sk), |
---|
| 1318 | + max_when); |
---|
| 1319 | +} |
---|
| 1320 | + |
---|
1251 | 1321 | /* Something is really bad, we could not queue an additional packet, |
---|
1252 | | - * because qdisc is full or receiver sent a 0 window. |
---|
| 1322 | + * because qdisc is full or receiver sent a 0 window, or we are paced. |
---|
1253 | 1323 | * We do not want to add fuel to the fire, or abort too early, |
---|
1254 | 1324 | * so make sure the timer we arm now is at least 200ms in the future, |
---|
1255 | 1325 | * regardless of current icsk_rto value (as it could be ~2ms) |
---|
.. | .. |
---|
1271 | 1341 | static inline void tcp_check_probe_timer(struct sock *sk) |
---|
1272 | 1342 | { |
---|
1273 | 1343 | if (!tcp_sk(sk)->packets_out && !inet_csk(sk)->icsk_pending) |
---|
1274 | | - inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, |
---|
1275 | | - tcp_probe0_base(sk), TCP_RTO_MAX); |
---|
| 1344 | + tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, |
---|
| 1345 | + tcp_probe0_base(sk), TCP_RTO_MAX); |
---|
1276 | 1346 | } |
---|
1277 | 1347 | |
---|
1278 | 1348 | static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq) |
---|
.. | .. |
---|
1291 | 1361 | static inline __sum16 tcp_v4_check(int len, __be32 saddr, |
---|
1292 | 1362 | __be32 daddr, __wsum base) |
---|
1293 | 1363 | { |
---|
1294 | | - return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base); |
---|
1295 | | -} |
---|
1296 | | - |
---|
1297 | | -static inline __sum16 __tcp_checksum_complete(struct sk_buff *skb) |
---|
1298 | | -{ |
---|
1299 | | - return __skb_checksum_complete(skb); |
---|
| 1364 | + return csum_tcpudp_magic(saddr, daddr, len, IPPROTO_TCP, base); |
---|
1300 | 1365 | } |
---|
1301 | 1366 | |
---|
1302 | 1367 | static inline bool tcp_checksum_complete(struct sk_buff *skb) |
---|
1303 | 1368 | { |
---|
1304 | 1369 | return !skb_csum_unnecessary(skb) && |
---|
1305 | | - __tcp_checksum_complete(skb); |
---|
| 1370 | + __skb_checksum_complete(skb); |
---|
1306 | 1371 | } |
---|
1307 | 1372 | |
---|
1308 | 1373 | bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb); |
---|
1309 | 1374 | int tcp_filter(struct sock *sk, struct sk_buff *skb); |
---|
1310 | | - |
---|
1311 | | -#undef STATE_TRACE |
---|
1312 | | - |
---|
1313 | | -#ifdef STATE_TRACE |
---|
1314 | | -static const char *statename[]={ |
---|
1315 | | - "Unused","Established","Syn Sent","Syn Recv", |
---|
1316 | | - "Fin Wait 1","Fin Wait 2","Time Wait", "Close", |
---|
1317 | | - "Close Wait","Last ACK","Listen","Closing" |
---|
1318 | | -}; |
---|
1319 | | -#endif |
---|
1320 | 1375 | void tcp_set_state(struct sock *sk, int state); |
---|
1321 | | - |
---|
1322 | 1376 | void tcp_done(struct sock *sk); |
---|
1323 | | - |
---|
1324 | 1377 | int tcp_abort(struct sock *sk, int err); |
---|
1325 | 1378 | |
---|
1326 | 1379 | static inline void tcp_sack_reset(struct tcp_options_received *rx_opt) |
---|
.. | .. |
---|
1329 | 1382 | rx_opt->num_sacks = 0; |
---|
1330 | 1383 | } |
---|
1331 | 1384 | |
---|
1332 | | -u32 tcp_default_init_rwnd(u32 mss); |
---|
1333 | 1385 | void tcp_cwnd_restart(struct sock *sk, s32 delta); |
---|
1334 | 1386 | |
---|
1335 | 1387 | static inline void tcp_slow_start_after_idle_check(struct sock *sk) |
---|
.. | .. |
---|
1338 | 1390 | struct tcp_sock *tp = tcp_sk(sk); |
---|
1339 | 1391 | s32 delta; |
---|
1340 | 1392 | |
---|
1341 | | - if (!sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle || tp->packets_out || |
---|
1342 | | - ca_ops->cong_control) |
---|
| 1393 | + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) || |
---|
| 1394 | + tp->packets_out || ca_ops->cong_control) |
---|
1343 | 1395 | return; |
---|
1344 | 1396 | delta = tcp_jiffies32 - tp->lsndtime; |
---|
1345 | 1397 | if (delta > inet_csk(sk)->icsk_rto) |
---|
.. | .. |
---|
1354 | 1406 | |
---|
1355 | 1407 | static inline int tcp_win_from_space(const struct sock *sk, int space) |
---|
1356 | 1408 | { |
---|
1357 | | - int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale; |
---|
| 1409 | + int tcp_adv_win_scale = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale); |
---|
1358 | 1410 | |
---|
1359 | 1411 | return tcp_adv_win_scale <= 0 ? |
---|
1360 | 1412 | (space>>(-tcp_adv_win_scale)) : |
---|
.. | .. |
---|
1364 | 1416 | /* Note: caller must be prepared to deal with negative returns */ |
---|
1365 | 1417 | static inline int tcp_space(const struct sock *sk) |
---|
1366 | 1418 | { |
---|
1367 | | - return tcp_win_from_space(sk, sk->sk_rcvbuf - sk->sk_backlog.len - |
---|
| 1419 | + return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - |
---|
| 1420 | + READ_ONCE(sk->sk_backlog.len) - |
---|
1368 | 1421 | atomic_read(&sk->sk_rmem_alloc)); |
---|
1369 | 1422 | } |
---|
1370 | 1423 | |
---|
1371 | 1424 | static inline int tcp_full_space(const struct sock *sk) |
---|
1372 | 1425 | { |
---|
1373 | | - return tcp_win_from_space(sk, sk->sk_rcvbuf); |
---|
| 1426 | + return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf)); |
---|
1374 | 1427 | } |
---|
| 1428 | + |
---|
| 1429 | +void tcp_cleanup_rbuf(struct sock *sk, int copied); |
---|
1375 | 1430 | |
---|
1376 | 1431 | /* We provision sk_rcvbuf around 200% of sk_rcvlowat. |
---|
1377 | 1432 | * If 87.5 % (7/8) of the space has been consumed, we want to override |
---|
.. | .. |
---|
1401 | 1456 | static inline int keepalive_intvl_when(const struct tcp_sock *tp) |
---|
1402 | 1457 | { |
---|
1403 | 1458 | struct net *net = sock_net((struct sock *)tp); |
---|
| 1459 | + int val; |
---|
1404 | 1460 | |
---|
1405 | | - return tp->keepalive_intvl ? : net->ipv4.sysctl_tcp_keepalive_intvl; |
---|
| 1461 | + /* Paired with WRITE_ONCE() in tcp_sock_set_keepintvl() |
---|
| 1462 | + * and do_tcp_setsockopt(). |
---|
| 1463 | + */ |
---|
| 1464 | + val = READ_ONCE(tp->keepalive_intvl); |
---|
| 1465 | + |
---|
| 1466 | + return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl); |
---|
1406 | 1467 | } |
---|
1407 | 1468 | |
---|
1408 | 1469 | static inline int keepalive_time_when(const struct tcp_sock *tp) |
---|
1409 | 1470 | { |
---|
1410 | 1471 | struct net *net = sock_net((struct sock *)tp); |
---|
| 1472 | + int val; |
---|
1411 | 1473 | |
---|
1412 | | - return tp->keepalive_time ? : net->ipv4.sysctl_tcp_keepalive_time; |
---|
| 1474 | + /* Paired with WRITE_ONCE() in tcp_sock_set_keepidle_locked() */ |
---|
| 1475 | + val = READ_ONCE(tp->keepalive_time); |
---|
| 1476 | + |
---|
| 1477 | + return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time); |
---|
1413 | 1478 | } |
---|
1414 | 1479 | |
---|
1415 | 1480 | static inline int keepalive_probes(const struct tcp_sock *tp) |
---|
1416 | 1481 | { |
---|
1417 | 1482 | struct net *net = sock_net((struct sock *)tp); |
---|
| 1483 | + int val; |
---|
1418 | 1484 | |
---|
1419 | | - return tp->keepalive_probes ? : net->ipv4.sysctl_tcp_keepalive_probes; |
---|
| 1485 | + /* Paired with WRITE_ONCE() in tcp_sock_set_keepcnt() |
---|
| 1486 | + * and do_tcp_setsockopt(). |
---|
| 1487 | + */ |
---|
| 1488 | + val = READ_ONCE(tp->keepalive_probes); |
---|
| 1489 | + |
---|
| 1490 | + return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes); |
---|
1420 | 1491 | } |
---|
1421 | 1492 | |
---|
1422 | 1493 | static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) |
---|
.. | .. |
---|
1429 | 1500 | |
---|
1430 | 1501 | static inline int tcp_fin_time(const struct sock *sk) |
---|
1431 | 1502 | { |
---|
1432 | | - int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout; |
---|
| 1503 | + int fin_timeout = tcp_sk(sk)->linger2 ? : |
---|
| 1504 | + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fin_timeout); |
---|
1433 | 1505 | const int rto = inet_csk(sk)->icsk_rto; |
---|
1434 | 1506 | |
---|
1435 | 1507 | if (fin_timeout < (rto << 2) - (rto >> 1)) |
---|
.. | .. |
---|
1516 | 1588 | struct hlist_node node; |
---|
1517 | 1589 | u8 keylen; |
---|
1518 | 1590 | u8 family; /* AF_INET or AF_INET6 */ |
---|
1519 | | - union tcp_md5_addr addr; |
---|
1520 | 1591 | u8 prefixlen; |
---|
| 1592 | + union tcp_md5_addr addr; |
---|
| 1593 | + int l3index; /* set if key added with L3 scope */ |
---|
1521 | 1594 | u8 key[TCP_MD5SIG_MAXKEYLEN]; |
---|
1522 | 1595 | struct rcu_head rcu; |
---|
1523 | 1596 | }; |
---|
.. | .. |
---|
1561 | 1634 | int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, |
---|
1562 | 1635 | const struct sock *sk, const struct sk_buff *skb); |
---|
1563 | 1636 | int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, |
---|
1564 | | - int family, u8 prefixlen, const u8 *newkey, u8 newkeylen, |
---|
1565 | | - gfp_t gfp); |
---|
| 1637 | + int family, u8 prefixlen, int l3index, |
---|
| 1638 | + const u8 *newkey, u8 newkeylen, gfp_t gfp); |
---|
1566 | 1639 | int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, |
---|
1567 | | - int family, u8 prefixlen); |
---|
| 1640 | + int family, u8 prefixlen, int l3index); |
---|
1568 | 1641 | struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, |
---|
1569 | 1642 | const struct sock *addr_sk); |
---|
1570 | 1643 | |
---|
1571 | 1644 | #ifdef CONFIG_TCP_MD5SIG |
---|
1572 | | -struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk, |
---|
1573 | | - const union tcp_md5_addr *addr, |
---|
1574 | | - int family); |
---|
| 1645 | +#include <linux/jump_label.h> |
---|
| 1646 | +extern struct static_key_false tcp_md5_needed; |
---|
| 1647 | +struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index, |
---|
| 1648 | + const union tcp_md5_addr *addr, |
---|
| 1649 | + int family); |
---|
| 1650 | +static inline struct tcp_md5sig_key * |
---|
| 1651 | +tcp_md5_do_lookup(const struct sock *sk, int l3index, |
---|
| 1652 | + const union tcp_md5_addr *addr, int family) |
---|
| 1653 | +{ |
---|
| 1654 | + if (!static_branch_unlikely(&tcp_md5_needed)) |
---|
| 1655 | + return NULL; |
---|
| 1656 | + return __tcp_md5_do_lookup(sk, l3index, addr, family); |
---|
| 1657 | +} |
---|
| 1658 | + |
---|
1575 | 1659 | #define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key) |
---|
1576 | 1660 | #else |
---|
1577 | | -static inline struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk, |
---|
1578 | | - const union tcp_md5_addr *addr, |
---|
1579 | | - int family) |
---|
| 1661 | +static inline struct tcp_md5sig_key * |
---|
| 1662 | +tcp_md5_do_lookup(const struct sock *sk, int l3index, |
---|
| 1663 | + const union tcp_md5_addr *addr, int family) |
---|
1580 | 1664 | { |
---|
1581 | 1665 | return NULL; |
---|
1582 | 1666 | } |
---|
.. | .. |
---|
1608 | 1692 | struct msghdr *data; /* data in MSG_FASTOPEN */ |
---|
1609 | 1693 | size_t size; |
---|
1610 | 1694 | int copied; /* queued in tcp_connect() */ |
---|
| 1695 | + struct ubuf_info *uarg; |
---|
1611 | 1696 | }; |
---|
1612 | 1697 | void tcp_free_fastopen_req(struct tcp_sock *tp); |
---|
1613 | 1698 | void tcp_fastopen_destroy_cipher(struct sock *sk); |
---|
1614 | 1699 | void tcp_fastopen_ctx_destroy(struct net *net); |
---|
1615 | 1700 | int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, |
---|
1616 | | - void *key, unsigned int len); |
---|
| 1701 | + void *primary_key, void *backup_key); |
---|
| 1702 | +int tcp_fastopen_get_cipher(struct net *net, struct inet_connection_sock *icsk, |
---|
| 1703 | + u64 *key); |
---|
1617 | 1704 | void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb); |
---|
1618 | 1705 | struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, |
---|
1619 | 1706 | struct request_sock *req, |
---|
.. | .. |
---|
1623 | 1710 | bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, |
---|
1624 | 1711 | struct tcp_fastopen_cookie *cookie); |
---|
1625 | 1712 | bool tcp_fastopen_defer_connect(struct sock *sk, int *err); |
---|
1626 | | -#define TCP_FASTOPEN_KEY_LENGTH 16 |
---|
| 1713 | +#define TCP_FASTOPEN_KEY_LENGTH sizeof(siphash_key_t) |
---|
| 1714 | +#define TCP_FASTOPEN_KEY_MAX 2 |
---|
| 1715 | +#define TCP_FASTOPEN_KEY_BUF_LENGTH \ |
---|
| 1716 | + (TCP_FASTOPEN_KEY_LENGTH * TCP_FASTOPEN_KEY_MAX) |
---|
1627 | 1717 | |
---|
1628 | 1718 | /* Fastopen key context */ |
---|
1629 | 1719 | struct tcp_fastopen_context { |
---|
1630 | | - struct crypto_cipher *tfm; |
---|
1631 | | - __u8 key[TCP_FASTOPEN_KEY_LENGTH]; |
---|
1632 | | - struct rcu_head rcu; |
---|
| 1720 | + siphash_key_t key[TCP_FASTOPEN_KEY_MAX]; |
---|
| 1721 | + int num; |
---|
| 1722 | + struct rcu_head rcu; |
---|
1633 | 1723 | }; |
---|
1634 | 1724 | |
---|
1635 | 1725 | extern unsigned int sysctl_tcp_fastopen_blackhole_timeout; |
---|
.. | .. |
---|
1637 | 1727 | bool tcp_fastopen_active_should_disable(struct sock *sk); |
---|
1638 | 1728 | void tcp_fastopen_active_disable_ofo_check(struct sock *sk); |
---|
1639 | 1729 | void tcp_fastopen_active_detect_blackhole(struct sock *sk, bool expired); |
---|
| 1730 | + |
---|
| 1731 | +/* Caller needs to wrap with rcu_read_(un)lock() */ |
---|
| 1732 | +static inline |
---|
| 1733 | +struct tcp_fastopen_context *tcp_fastopen_get_ctx(const struct sock *sk) |
---|
| 1734 | +{ |
---|
| 1735 | + struct tcp_fastopen_context *ctx; |
---|
| 1736 | + |
---|
| 1737 | + ctx = rcu_dereference(inet_csk(sk)->icsk_accept_queue.fastopenq.ctx); |
---|
| 1738 | + if (!ctx) |
---|
| 1739 | + ctx = rcu_dereference(sock_net(sk)->ipv4.tcp_fastopen_ctx); |
---|
| 1740 | + return ctx; |
---|
| 1741 | +} |
---|
| 1742 | + |
---|
| 1743 | +static inline |
---|
| 1744 | +bool tcp_fastopen_cookie_match(const struct tcp_fastopen_cookie *foc, |
---|
| 1745 | + const struct tcp_fastopen_cookie *orig) |
---|
| 1746 | +{ |
---|
| 1747 | + if (orig->len == TCP_FASTOPEN_COOKIE_SIZE && |
---|
| 1748 | + orig->len == foc->len && |
---|
| 1749 | + !memcmp(orig->val, foc->val, foc->len)) |
---|
| 1750 | + return true; |
---|
| 1751 | + return false; |
---|
| 1752 | +} |
---|
| 1753 | + |
---|
| 1754 | +static inline |
---|
| 1755 | +int tcp_fastopen_context_len(const struct tcp_fastopen_context *ctx) |
---|
| 1756 | +{ |
---|
| 1757 | + return ctx->num; |
---|
| 1758 | +} |
---|
1640 | 1759 | |
---|
1641 | 1760 | /* Latencies incurred by various limits for a sender. They are |
---|
1642 | 1761 | * chronograph-like stats that are mutually exclusive. |
---|
.. | .. |
---|
1705 | 1824 | return skb_queue_is_last(&sk->sk_write_queue, skb); |
---|
1706 | 1825 | } |
---|
1707 | 1826 | |
---|
| 1827 | +/** |
---|
| 1828 | + * tcp_write_queue_empty - test if any payload (or FIN) is available in write queue |
---|
| 1829 | + * @sk: socket |
---|
| 1830 | + * |
---|
| 1831 | + * Since the write queue can have a temporary empty skb in it, |
---|
| 1832 | + * we must not use "return skb_queue_empty(&sk->sk_write_queue)" |
---|
| 1833 | + */ |
---|
1708 | 1834 | static inline bool tcp_write_queue_empty(const struct sock *sk) |
---|
1709 | 1835 | { |
---|
1710 | | - return skb_queue_empty(&sk->sk_write_queue); |
---|
| 1836 | + const struct tcp_sock *tp = tcp_sk(sk); |
---|
| 1837 | + |
---|
| 1838 | + return tp->write_seq == tp->snd_nxt; |
---|
1711 | 1839 | } |
---|
1712 | 1840 | |
---|
1713 | 1841 | static inline bool tcp_rtx_queue_empty(const struct sock *sk) |
---|
.. | .. |
---|
1720 | 1848 | return tcp_rtx_queue_empty(sk) && tcp_write_queue_empty(sk); |
---|
1721 | 1849 | } |
---|
1722 | 1850 | |
---|
1723 | | -static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked) |
---|
1724 | | -{ |
---|
1725 | | - if (tcp_write_queue_empty(sk)) |
---|
1726 | | - tcp_chrono_stop(sk, TCP_CHRONO_BUSY); |
---|
1727 | | -} |
---|
1728 | | - |
---|
1729 | | -static inline void __tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb) |
---|
1730 | | -{ |
---|
1731 | | - __skb_queue_tail(&sk->sk_write_queue, skb); |
---|
1732 | | -} |
---|
1733 | | - |
---|
1734 | 1851 | static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb) |
---|
1735 | 1852 | { |
---|
1736 | | - __tcp_add_write_queue_tail(sk, skb); |
---|
| 1853 | + __skb_queue_tail(&sk->sk_write_queue, skb); |
---|
1737 | 1854 | |
---|
1738 | 1855 | /* Queue it, remembering where we must start sending. */ |
---|
1739 | 1856 | if (sk->sk_write_queue.next == skb) |
---|
.. | .. |
---|
1855 | 1972 | struct seq_net_private p; |
---|
1856 | 1973 | enum tcp_seq_states state; |
---|
1857 | 1974 | struct sock *syn_wait_sk; |
---|
| 1975 | + struct tcp_seq_afinfo *bpf_seq_afinfo; |
---|
1858 | 1976 | int bucket, offset, sbucket, num; |
---|
1859 | 1977 | loff_t last_pos; |
---|
1860 | 1978 | }; |
---|
.. | .. |
---|
1867 | 1985 | struct sk_buff *tcp_gso_segment(struct sk_buff *skb, |
---|
1868 | 1986 | netdev_features_t features); |
---|
1869 | 1987 | struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb); |
---|
| 1988 | +INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff)); |
---|
| 1989 | +INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)); |
---|
| 1990 | +INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff)); |
---|
| 1991 | +INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb)); |
---|
1870 | 1992 | int tcp_gro_complete(struct sk_buff *skb); |
---|
1871 | 1993 | |
---|
1872 | 1994 | void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr); |
---|
.. | .. |
---|
1874 | 1996 | static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) |
---|
1875 | 1997 | { |
---|
1876 | 1998 | struct net *net = sock_net((struct sock *)tp); |
---|
1877 | | - return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat; |
---|
| 1999 | + u32 val; |
---|
| 2000 | + |
---|
| 2001 | + val = READ_ONCE(tp->notsent_lowat); |
---|
| 2002 | + |
---|
| 2003 | + return val ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat); |
---|
1878 | 2004 | } |
---|
1879 | 2005 | |
---|
1880 | | -static inline bool tcp_stream_memory_free(const struct sock *sk) |
---|
| 2006 | +/* @wake is one when sk_stream_write_space() calls us. |
---|
| 2007 | + * This sends EPOLLOUT only if notsent_bytes is half the limit. |
---|
| 2008 | + * This mimics the strategy used in sock_def_write_space(). |
---|
| 2009 | + */ |
---|
| 2010 | +static inline bool tcp_stream_memory_free(const struct sock *sk, int wake) |
---|
1881 | 2011 | { |
---|
1882 | 2012 | const struct tcp_sock *tp = tcp_sk(sk); |
---|
1883 | | - u32 notsent_bytes = READ_ONCE(tp->write_seq) - tp->snd_nxt; |
---|
| 2013 | + u32 notsent_bytes = READ_ONCE(tp->write_seq) - |
---|
| 2014 | + READ_ONCE(tp->snd_nxt); |
---|
1884 | 2015 | |
---|
1885 | | - return notsent_bytes < tcp_notsent_lowat(tp); |
---|
| 2016 | + return (notsent_bytes << wake) < tcp_notsent_lowat(tp); |
---|
1886 | 2017 | } |
---|
1887 | 2018 | |
---|
1888 | 2019 | #ifdef CONFIG_PROC_FS |
---|
.. | .. |
---|
1906 | 2037 | const struct sk_buff *skb); |
---|
1907 | 2038 | int (*md5_parse)(struct sock *sk, |
---|
1908 | 2039 | int optname, |
---|
1909 | | - char __user *optval, |
---|
| 2040 | + sockptr_t optval, |
---|
1910 | 2041 | int optlen); |
---|
1911 | 2042 | #endif |
---|
1912 | 2043 | }; |
---|
.. | .. |
---|
1935 | 2066 | int (*send_synack)(const struct sock *sk, struct dst_entry *dst, |
---|
1936 | 2067 | struct flowi *fl, struct request_sock *req, |
---|
1937 | 2068 | struct tcp_fastopen_cookie *foc, |
---|
1938 | | - enum tcp_synack_type synack_type); |
---|
| 2069 | + enum tcp_synack_type synack_type, |
---|
| 2070 | + struct sk_buff *syn_skb); |
---|
1939 | 2071 | }; |
---|
| 2072 | + |
---|
| 2073 | +extern const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops; |
---|
| 2074 | +#if IS_ENABLED(CONFIG_IPV6) |
---|
| 2075 | +extern const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops; |
---|
| 2076 | +#endif |
---|
1940 | 2077 | |
---|
1941 | 2078 | #ifdef CONFIG_SYN_COOKIES |
---|
1942 | 2079 | static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops, |
---|
.. | .. |
---|
1977 | 2114 | { |
---|
1978 | 2115 | const struct sk_buff *skb = tcp_rtx_queue_head(sk); |
---|
1979 | 2116 | u32 rto = inet_csk(sk)->icsk_rto; |
---|
1980 | | - u64 rto_time_stamp_us = skb->skb_mstamp + jiffies_to_usecs(rto); |
---|
| 2117 | + u64 rto_time_stamp_us = tcp_skb_timestamp_us(skb) + jiffies_to_usecs(rto); |
---|
1981 | 2118 | |
---|
1982 | 2119 | return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp; |
---|
1983 | 2120 | } |
---|
.. | .. |
---|
2077 | 2214 | #define TCP_ULP_MAX 128 |
---|
2078 | 2215 | #define TCP_ULP_BUF_MAX (TCP_ULP_NAME_MAX*TCP_ULP_MAX) |
---|
2079 | 2216 | |
---|
2080 | | -enum { |
---|
2081 | | - TCP_ULP_TLS, |
---|
2082 | | - TCP_ULP_BPF, |
---|
2083 | | -}; |
---|
2084 | | - |
---|
2085 | 2217 | struct tcp_ulp_ops { |
---|
2086 | 2218 | struct list_head list; |
---|
2087 | 2219 | |
---|
2088 | 2220 | /* initialize ulp */ |
---|
2089 | 2221 | int (*init)(struct sock *sk); |
---|
| 2222 | + /* update ulp */ |
---|
| 2223 | + void (*update)(struct sock *sk, struct proto *p, |
---|
| 2224 | + void (*write_space)(struct sock *sk)); |
---|
2090 | 2225 | /* cleanup ulp */ |
---|
2091 | 2226 | void (*release)(struct sock *sk); |
---|
| 2227 | + /* diagnostic */ |
---|
| 2228 | + int (*get_info)(const struct sock *sk, struct sk_buff *skb); |
---|
| 2229 | + size_t (*get_info_size)(const struct sock *sk); |
---|
| 2230 | + /* clone ulp */ |
---|
| 2231 | + void (*clone)(const struct request_sock *req, struct sock *newsk, |
---|
| 2232 | + const gfp_t priority); |
---|
2092 | 2233 | |
---|
2093 | | - int uid; |
---|
2094 | 2234 | char name[TCP_ULP_NAME_MAX]; |
---|
2095 | | - bool user_visible; |
---|
2096 | 2235 | struct module *owner; |
---|
2097 | 2236 | }; |
---|
2098 | 2237 | int tcp_register_ulp(struct tcp_ulp_ops *type); |
---|
2099 | 2238 | void tcp_unregister_ulp(struct tcp_ulp_ops *type); |
---|
2100 | 2239 | int tcp_set_ulp(struct sock *sk, const char *name); |
---|
2101 | | -int tcp_set_ulp_id(struct sock *sk, const int ulp); |
---|
2102 | 2240 | void tcp_get_available_ulp(char *buf, size_t len); |
---|
2103 | 2241 | void tcp_cleanup_ulp(struct sock *sk); |
---|
| 2242 | +void tcp_update_ulp(struct sock *sk, struct proto *p, |
---|
| 2243 | + void (*write_space)(struct sock *sk)); |
---|
2104 | 2244 | |
---|
2105 | 2245 | #define MODULE_ALIAS_TCP_ULP(name) \ |
---|
2106 | 2246 | __MODULE_INFO(alias, alias_userspace, name); \ |
---|
2107 | 2247 | __MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name) |
---|
| 2248 | + |
---|
| 2249 | +struct sk_msg; |
---|
| 2250 | +struct sk_psock; |
---|
| 2251 | + |
---|
| 2252 | +#ifdef CONFIG_BPF_STREAM_PARSER |
---|
| 2253 | +struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock); |
---|
| 2254 | +void tcp_bpf_clone(const struct sock *sk, struct sock *newsk); |
---|
| 2255 | +#else |
---|
| 2256 | +static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk) |
---|
| 2257 | +{ |
---|
| 2258 | +} |
---|
| 2259 | +#endif /* CONFIG_BPF_STREAM_PARSER */ |
---|
| 2260 | + |
---|
| 2261 | +#ifdef CONFIG_NET_SOCK_MSG |
---|
| 2262 | +int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes, |
---|
| 2263 | + int flags); |
---|
| 2264 | +int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, |
---|
| 2265 | + struct msghdr *msg, int len, int flags); |
---|
| 2266 | +#endif /* CONFIG_NET_SOCK_MSG */ |
---|
| 2267 | + |
---|
| 2268 | +#ifdef CONFIG_CGROUP_BPF |
---|
| 2269 | +static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops, |
---|
| 2270 | + struct sk_buff *skb, |
---|
| 2271 | + unsigned int end_offset) |
---|
| 2272 | +{ |
---|
| 2273 | + skops->skb = skb; |
---|
| 2274 | + skops->skb_data_end = skb->data + end_offset; |
---|
| 2275 | +} |
---|
| 2276 | +#else |
---|
| 2277 | +static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops, |
---|
| 2278 | + struct sk_buff *skb, |
---|
| 2279 | + unsigned int end_offset) |
---|
| 2280 | +{ |
---|
| 2281 | +} |
---|
| 2282 | +#endif |
---|
2108 | 2283 | |
---|
2109 | 2284 | /* Call BPF_SOCK_OPS program that returns an int. If the return value |
---|
2110 | 2285 | * is < 0, then the BPF op failed (for example if the loaded BPF |
---|
.. | .. |
---|
2197 | 2372 | return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN, 0, NULL) == 1); |
---|
2198 | 2373 | } |
---|
2199 | 2374 | |
---|
| 2375 | +static inline void tcp_bpf_rtt(struct sock *sk) |
---|
| 2376 | +{ |
---|
| 2377 | + if (BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_RTT_CB_FLAG)) |
---|
| 2378 | + tcp_call_bpf(sk, BPF_SOCK_OPS_RTT_CB, 0, NULL); |
---|
| 2379 | +} |
---|
| 2380 | + |
---|
2200 | 2381 | #if IS_ENABLED(CONFIG_SMC) |
---|
2201 | 2382 | extern struct static_key_false tcp_have_smc; |
---|
2202 | 2383 | #endif |
---|
.. | .. |
---|
2205 | 2386 | void clean_acked_data_enable(struct inet_connection_sock *icsk, |
---|
2206 | 2387 | void (*cad)(struct sock *sk, u32 ack_seq)); |
---|
2207 | 2388 | void clean_acked_data_disable(struct inet_connection_sock *icsk); |
---|
2208 | | - |
---|
| 2389 | +void clean_acked_data_flush(void); |
---|
2209 | 2390 | #endif |
---|
2210 | 2391 | |
---|
| 2392 | +DECLARE_STATIC_KEY_FALSE(tcp_tx_delay_enabled); |
---|
| 2393 | +static inline void tcp_add_tx_delay(struct sk_buff *skb, |
---|
| 2394 | + const struct tcp_sock *tp) |
---|
| 2395 | +{ |
---|
| 2396 | + if (static_branch_unlikely(&tcp_tx_delay_enabled)) |
---|
| 2397 | + skb->skb_mstamp_ns += (u64)tp->tcp_tx_delay * NSEC_PER_USEC; |
---|
| 2398 | +} |
---|
| 2399 | + |
---|
| 2400 | +/* Compute Earliest Departure Time for some control packets |
---|
| 2401 | + * like ACK or RST for TIME_WAIT or non ESTABLISHED sockets. |
---|
| 2402 | + */ |
---|
| 2403 | +static inline u64 tcp_transmit_time(const struct sock *sk) |
---|
| 2404 | +{ |
---|
| 2405 | + if (static_branch_unlikely(&tcp_tx_delay_enabled)) { |
---|
| 2406 | + u32 delay = (sk->sk_state == TCP_TIME_WAIT) ? |
---|
| 2407 | + tcp_twsk(sk)->tw_tx_delay : tcp_sk(sk)->tcp_tx_delay; |
---|
| 2408 | + |
---|
| 2409 | + return tcp_clock_ns() + (u64)delay * NSEC_PER_USEC; |
---|
| 2410 | + } |
---|
| 2411 | + return 0; |
---|
| 2412 | +} |
---|
| 2413 | + |
---|
2211 | 2414 | #endif /* _TCP_H */ |
---|