From b22da3d8526a935aa31e086e63f60ff3246cb61c Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Sat, 09 Dec 2023 07:24:11 +0000
Subject: [PATCH] add stmac read mac form eeprom
---
kernel/include/net/tcp.h | 428 ++++++++++++++++++++++++++++++++++++++---------------
1 files changed, 306 insertions(+), 122 deletions(-)
diff --git a/kernel/include/net/tcp.h b/kernel/include/net/tcp.h
index 3f0d654..f936fcb 100644
--- a/kernel/include/net/tcp.h
+++ b/kernel/include/net/tcp.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
@@ -9,11 +10,6 @@
*
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#ifndef _TCP_H
#define _TCP_H
@@ -27,9 +23,9 @@
#include <linux/cache.h>
#include <linux/percpu.h>
#include <linux/skbuff.h>
-#include <linux/cryptohash.h>
#include <linux/kref.h>
#include <linux/ktime.h>
+#include <linux/indirect_call_wrapper.h>
#include <net/inet_connection_sock.h>
#include <net/inet_timewait_sock.h>
@@ -43,10 +39,12 @@
#include <net/tcp_states.h>
#include <net/inet_ecn.h>
#include <net/dst.h>
+#include <net/mptcp.h>
#include <linux/seq_file.h>
#include <linux/memcontrol.h>
#include <linux/bpf-cgroup.h>
+#include <linux/siphash.h>
extern struct inet_hashinfo tcp_hashinfo;
@@ -67,7 +65,7 @@
/* Minimal accepted MSS. It is (60+60+8) - (20+20). */
#define TCP_MIN_MSS 88U
-/* The least MTU to use for probing */
+/* The initial MTU to use for probing */
#define TCP_BASE_MSS 1024
/* probing interval, default to 10 minutes as per RFC4821 */
@@ -128,6 +126,7 @@
* to combine FIN-WAIT-2 timeout with
* TIME-WAIT timer.
*/
+#define TCP_FIN_TIMEOUT_MAX (120 * HZ) /* max TCP_LINGER2 value (two minutes) */
#define TCP_DELACK_MAX ((unsigned)(HZ/5)) /* maximal time to delay before sending an ACK */
#if HZ >= 100
@@ -185,6 +184,7 @@
#define TCPOPT_SACK 5 /* SACK Block */
#define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */
#define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */
+#define TCPOPT_MPTCP 30 /* Multipath TCP (RFC6824) */
#define TCPOPT_FASTOPEN 34 /* Fast open (RFC7413) */
#define TCPOPT_EXP 254 /* Experimental */
/* Magic number to be after the option value for sharing TCP
@@ -315,7 +315,7 @@
void tcp_tasklet_init(void);
-void tcp_v4_err(struct sk_buff *skb, u32);
+int tcp_v4_err(struct sk_buff *skb, u32);
void tcp_shutdown(struct sock *sk, int how);
@@ -331,6 +331,9 @@
size_t size, int flags);
ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
size_t size, int flags);
+int tcp_send_mss(struct sock *sk, int *size_goal, int flags);
+void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle,
+ int size_goal);
void tcp_release_cb(struct sock *sk);
void tcp_wfree(struct sk_buff *skb);
void tcp_write_timer_handler(struct sock *sk);
@@ -391,30 +394,38 @@
bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst);
void tcp_close(struct sock *sk, long timeout);
void tcp_init_sock(struct sock *sk);
-void tcp_init_transfer(struct sock *sk, int bpf_op);
+void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb);
__poll_t tcp_poll(struct file *file, struct socket *sock,
struct poll_table_struct *wait);
int tcp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
-int tcp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen);
-int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen);
-int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen);
+int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
+ unsigned int optlen);
void tcp_set_keepalive(struct sock *sk, int val);
void tcp_syn_ack_timeout(const struct request_sock *req);
int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
int flags, int *addr_len);
int tcp_set_rcvlowat(struct sock *sk, int val);
void tcp_data_ready(struct sock *sk);
+#ifdef CONFIG_MMU
int tcp_mmap(struct file *file, struct socket *sock,
struct vm_area_struct *vma);
+#endif
void tcp_parse_options(const struct net *net, const struct sk_buff *skb,
struct tcp_options_received *opt_rx,
int estab, struct tcp_fastopen_cookie *foc);
const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
+/*
+ * BPF SKB-less helpers
+ */
+u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph,
+ struct tcphdr *th, u32 *cookie);
+u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
+ struct tcphdr *th, u32 *cookie);
+u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
+ const struct tcp_request_sock_ops *af_ops,
+ struct sock *sk, struct tcphdr *th);
/*
* TCP v4 functions exported for the inet6 API
*/
@@ -422,6 +433,7 @@
void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb);
void tcp_v4_mtu_reduced(struct sock *sk);
void tcp_req_err(struct sock *sk, u32 seq, bool abort);
+void tcp_ld_RTO_revert(struct sock *sk, u32 seq);
int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
struct sock *tcp_create_openreq_child(const struct sock *sk,
struct request_sock *req,
@@ -443,7 +455,8 @@
struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
struct request_sock *req,
struct tcp_fastopen_cookie *foc,
- enum tcp_synack_type synack_type);
+ enum tcp_synack_type synack_type,
+ struct sk_buff *syn_skb);
int tcp_disconnect(struct sock *sk, int flags);
void tcp_finish_connect(struct sock *sk, struct sk_buff *skb);
@@ -457,6 +470,9 @@
int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th,
u32 cookie);
struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb);
+struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
+ const struct tcp_request_sock_ops *af_ops,
+ struct sock *sk, struct sk_buff *skb);
#ifdef CONFIG_SYN_COOKIES
/* Syncookies use a monotonic timer which increments every 60 seconds.
@@ -539,7 +555,7 @@
u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th,
u16 *mssp);
__u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss);
-u64 cookie_init_timestamp(struct request_sock *req);
+u64 cookie_init_timestamp(struct request_sock *req, u64 now);
bool cookie_timestamp_decode(const struct net *net,
struct tcp_options_received *opt);
bool cookie_ecn_ok(const struct tcp_options_received *opt,
@@ -594,6 +610,7 @@
void tcp_reset(struct sock *sk);
void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb);
void tcp_fin(struct sock *sk);
+void tcp_check_space(struct sock *sk);
/* tcp_timer.c */
void tcp_init_xmit_timers(struct sock *);
@@ -610,6 +627,7 @@
unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu);
unsigned int tcp_current_mss(struct sock *sk);
+u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when);
/* Bound MSS / TSO packet size with the half of the window */
static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
@@ -646,7 +664,6 @@
int tcp_mtu_to_mss(struct sock *sk, int pmtu);
int tcp_mss_to_mtu(struct sock *sk, int mss);
void tcp_mtup_init(struct sock *sk);
-void tcp_init_buffer_space(struct sock *sk);
static inline void tcp_bound_rto(const struct sock *sk)
{
@@ -661,6 +678,10 @@
static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
{
+ /* mptcp hooks are only on the slow path */
+ if (sk_is_mptcp((struct sock *)tp))
+ return;
+
tp->pred_flags = htonl((tp->tcp_header_len << 26) |
ntohl(TCP_FLAG_ACK) |
snd_wnd);
@@ -686,7 +707,7 @@
static inline u32 tcp_rto_min(struct sock *sk)
{
const struct dst_entry *dst = __sk_dst_get(sk);
- u32 rto_min = TCP_RTO_MIN;
+ u32 rto_min = inet_csk(sk)->icsk_rto_min;
if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN);
@@ -745,7 +766,7 @@
static inline u64 tcp_clock_ns(void)
{
- return local_clock();
+ return ktime_get_ns();
}
static inline u64 tcp_clock_us(void)
@@ -759,23 +780,19 @@
return div_u64(tp->tcp_mstamp, USEC_PER_SEC / TCP_TS_HZ);
}
+/* Convert a nsec timestamp into TCP TSval timestamp (ms based currently) */
+static inline u32 tcp_ns_to_ts(u64 ns)
+{
+ return div_u64(ns, NSEC_PER_SEC / TCP_TS_HZ);
+}
+
/* Could use tcp_clock_us() / 1000, but this version uses a single divide */
static inline u32 tcp_time_stamp_raw(void)
{
- return div_u64(tcp_clock_ns(), NSEC_PER_SEC / TCP_TS_HZ);
+ return tcp_ns_to_ts(tcp_clock_ns());
}
-
-/* Refresh 1us clock of a TCP socket,
- * ensuring monotically increasing values.
- */
-static inline void tcp_mstamp_refresh(struct tcp_sock *tp)
-{
- u64 val = tcp_clock_us();
-
- if (val > tp->tcp_mstamp)
- tp->tcp_mstamp = val;
-}
+void tcp_mstamp_refresh(struct tcp_sock *tp);
static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0)
{
@@ -784,7 +801,13 @@
static inline u32 tcp_skb_timestamp(const struct sk_buff *skb)
{
- return div_u64(skb->skb_mstamp, USEC_PER_SEC / TCP_TS_HZ);
+ return tcp_ns_to_ts(skb->skb_mstamp_ns);
+}
+
+/* provide the departure time in us unit */
+static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb)
+{
+ return div_u64(skb->skb_mstamp_ns, NSEC_PER_USEC);
}
@@ -830,7 +853,7 @@
#define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */
#define TCPCB_LOST 0x04 /* SKB is lost */
#define TCPCB_TAGBITS 0x07 /* All tag bits */
-#define TCPCB_REPAIRED 0x10 /* SKB repaired (no skb_mstamp) */
+#define TCPCB_REPAIRED 0x10 /* SKB repaired (no skb_mstamp_ns) */
#define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */
#define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS| \
TCPCB_REPAIRED)
@@ -875,6 +898,23 @@
TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb);
}
+static inline bool tcp_skb_bpf_ingress(const struct sk_buff *skb)
+{
+ return TCP_SKB_CB(skb)->bpf.flags & BPF_F_INGRESS;
+}
+
+static inline struct sock *tcp_skb_bpf_redirect_fetch(struct sk_buff *skb)
+{
+ return TCP_SKB_CB(skb)->bpf.sk_redir;
+}
+
+static inline void tcp_skb_bpf_redirect_clear(struct sk_buff *skb)
+{
+ TCP_SKB_CB(skb)->bpf.sk_redir = NULL;
+}
+
+extern const struct inet_connection_sock_af_ops ipv4_specific;
+
#if IS_ENABLED(CONFIG_IPV6)
/* This is the variant of inet6_iif() that must be used by TCP,
* as TCP moves IP6CB into a different location in skb->cb[]
@@ -900,17 +940,14 @@
#endif
return 0;
}
-#endif
-static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb)
-{
-#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
- if (!net->ipv4.sysctl_tcp_l3mdev_accept &&
- skb && ipv4_l3mdev_skb(IPCB(skb)->flags))
- return true;
+extern const struct inet_connection_sock_af_ops ipv6_specific;
+
+INDIRECT_CALLABLE_DECLARE(void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb));
+INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *skb));
+void tcp_v6_early_demux(struct sk_buff *skb);
+
#endif
- return false;
-}
/* TCP_SKB_CB reference means this can not be used from early demux */
static inline int tcp_v4_sdif(struct sk_buff *skb)
@@ -951,6 +988,13 @@
return likely(!TCP_SKB_CB(skb)->eor);
}
+static inline bool tcp_skb_can_collapse(const struct sk_buff *to,
+ const struct sk_buff *from)
+{
+ return likely(tcp_skb_can_collapse_to(to) &&
+ mptcp_skb_can_collapse(to, from));
+}
+
/* Events passed to congestion control interface */
enum tcp_ca_event {
CA_EVENT_TX_START, /* first transmit when no packets in flight */
@@ -981,6 +1025,7 @@
#define TCP_CONG_NON_RESTRICTED 0x1
/* Requires ECN/ECT set on all packets */
#define TCP_CONG_NEEDS_ECN 0x2
+#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN)
union tcp_cc_info;
@@ -1066,7 +1111,7 @@
void tcp_get_allowed_congestion_control(char *buf, size_t len);
int tcp_set_allowed_congestion_control(char *allowed);
int tcp_set_congestion_control(struct sock *sk, const char *name, bool load,
- bool reinit, bool cap_net_admin);
+ bool cap_net_admin);
u32 tcp_slow_start(struct tcp_sock *tp, u32 acked);
void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked);
@@ -1075,6 +1120,7 @@
void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked);
extern struct tcp_congestion_ops tcp_reno;
+struct tcp_congestion_ops *tcp_ca_find(const char *name);
struct tcp_congestion_ops *tcp_ca_find_key(u32 key);
u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca);
#ifdef CONFIG_INET
@@ -1127,7 +1173,7 @@
*/
static inline int tcp_is_sack(const struct tcp_sock *tp)
{
- return tp->rx_opt.sack_ok;
+ return likely(tp->rx_opt.sack_ok);
}
static inline bool tcp_is_reno(const struct tcp_sock *tp)
@@ -1230,11 +1276,14 @@
{
const struct tcp_sock *tp = tcp_sk(sk);
+ if (tp->is_cwnd_limited)
+ return true;
+
/* If in slow start, ensure cwnd grows to twice what was ACKed. */
if (tcp_in_slow_start(tp))
return tp->snd_cwnd < 2 * tp->max_packets_out;
- return tp->is_cwnd_limited;
+ return false;
}
/* BBR congestion control needs pacing.
@@ -1248,8 +1297,27 @@
return smp_load_acquire(&sk->sk_pacing_status) == SK_PACING_NEEDED;
}
+/* Estimates in how many jiffies next packet for this flow can be sent.
+ * Scheduling a retransmit timer too early would be silly.
+ */
+static inline unsigned long tcp_pacing_delay(const struct sock *sk)
+{
+ s64 delay = tcp_sk(sk)->tcp_wstamp_ns - tcp_sk(sk)->tcp_clock_cache;
+
+ return delay > 0 ? nsecs_to_jiffies(delay) : 0;
+}
+
+static inline void tcp_reset_xmit_timer(struct sock *sk,
+ const int what,
+ unsigned long when,
+ const unsigned long max_when)
+{
+ inet_csk_reset_xmit_timer(sk, what, when + tcp_pacing_delay(sk),
+ max_when);
+}
+
/* Something is really bad, we could not queue an additional packet,
- * because qdisc is full or receiver sent a 0 window.
+ * because qdisc is full or receiver sent a 0 window, or we are paced.
* We do not want to add fuel to the fire, or abort too early,
* so make sure the timer we arm now is at least 200ms in the future,
* regardless of current icsk_rto value (as it could be ~2ms)
@@ -1271,8 +1339,8 @@
static inline void tcp_check_probe_timer(struct sock *sk)
{
if (!tcp_sk(sk)->packets_out && !inet_csk(sk)->icsk_pending)
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
- tcp_probe0_base(sk), TCP_RTO_MAX);
+ tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
+ tcp_probe0_base(sk), TCP_RTO_MAX);
}
static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq)
@@ -1291,36 +1359,19 @@
static inline __sum16 tcp_v4_check(int len, __be32 saddr,
__be32 daddr, __wsum base)
{
- return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base);
-}
-
-static inline __sum16 __tcp_checksum_complete(struct sk_buff *skb)
-{
- return __skb_checksum_complete(skb);
+ return csum_tcpudp_magic(saddr, daddr, len, IPPROTO_TCP, base);
}
static inline bool tcp_checksum_complete(struct sk_buff *skb)
{
return !skb_csum_unnecessary(skb) &&
- __tcp_checksum_complete(skb);
+ __skb_checksum_complete(skb);
}
bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb);
int tcp_filter(struct sock *sk, struct sk_buff *skb);
-
-#undef STATE_TRACE
-
-#ifdef STATE_TRACE
-static const char *statename[]={
- "Unused","Established","Syn Sent","Syn Recv",
- "Fin Wait 1","Fin Wait 2","Time Wait", "Close",
- "Close Wait","Last ACK","Listen","Closing"
-};
-#endif
void tcp_set_state(struct sock *sk, int state);
-
void tcp_done(struct sock *sk);
-
int tcp_abort(struct sock *sk, int err);
static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)
@@ -1329,7 +1380,6 @@
rx_opt->num_sacks = 0;
}
-u32 tcp_default_init_rwnd(u32 mss);
void tcp_cwnd_restart(struct sock *sk, s32 delta);
static inline void tcp_slow_start_after_idle_check(struct sock *sk)
@@ -1338,8 +1388,8 @@
struct tcp_sock *tp = tcp_sk(sk);
s32 delta;
- if (!sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle || tp->packets_out ||
- ca_ops->cong_control)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) ||
+ tp->packets_out || ca_ops->cong_control)
return;
delta = tcp_jiffies32 - tp->lsndtime;
if (delta > inet_csk(sk)->icsk_rto)
@@ -1354,7 +1404,7 @@
static inline int tcp_win_from_space(const struct sock *sk, int space)
{
- int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale;
+ int tcp_adv_win_scale = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale);
return tcp_adv_win_scale <= 0 ?
(space>>(-tcp_adv_win_scale)) :
@@ -1364,14 +1414,17 @@
/* Note: caller must be prepared to deal with negative returns */
static inline int tcp_space(const struct sock *sk)
{
- return tcp_win_from_space(sk, sk->sk_rcvbuf - sk->sk_backlog.len -
+ return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) -
+ READ_ONCE(sk->sk_backlog.len) -
atomic_read(&sk->sk_rmem_alloc));
}
static inline int tcp_full_space(const struct sock *sk)
{
- return tcp_win_from_space(sk, sk->sk_rcvbuf);
+ return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf));
}
+
+void tcp_cleanup_rbuf(struct sock *sk, int copied);
/* We provision sk_rcvbuf around 200% of sk_rcvlowat.
* If 87.5 % (7/8) of the space has been consumed, we want to override
@@ -1402,21 +1455,24 @@
{
struct net *net = sock_net((struct sock *)tp);
- return tp->keepalive_intvl ? : net->ipv4.sysctl_tcp_keepalive_intvl;
+ return tp->keepalive_intvl ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl);
}
static inline int keepalive_time_when(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
- return tp->keepalive_time ? : net->ipv4.sysctl_tcp_keepalive_time;
+ return tp->keepalive_time ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time);
}
static inline int keepalive_probes(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
- return tp->keepalive_probes ? : net->ipv4.sysctl_tcp_keepalive_probes;
+ return tp->keepalive_probes ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes);
}
static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp)
@@ -1429,7 +1485,8 @@
static inline int tcp_fin_time(const struct sock *sk)
{
- int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout;
+ int fin_timeout = tcp_sk(sk)->linger2 ? :
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fin_timeout);
const int rto = inet_csk(sk)->icsk_rto;
if (fin_timeout < (rto << 2) - (rto >> 1))
@@ -1516,8 +1573,9 @@
struct hlist_node node;
u8 keylen;
u8 family; /* AF_INET or AF_INET6 */
- union tcp_md5_addr addr;
u8 prefixlen;
+ union tcp_md5_addr addr;
+ int l3index; /* set if key added with L3 scope */
u8 key[TCP_MD5SIG_MAXKEYLEN];
struct rcu_head rcu;
};
@@ -1561,22 +1619,33 @@
int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
const struct sock *sk, const struct sk_buff *skb);
int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
- int family, u8 prefixlen, const u8 *newkey, u8 newkeylen,
- gfp_t gfp);
+ int family, u8 prefixlen, int l3index,
+ const u8 *newkey, u8 newkeylen, gfp_t gfp);
int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr,
- int family, u8 prefixlen);
+ int family, u8 prefixlen, int l3index);
struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
const struct sock *addr_sk);
#ifdef CONFIG_TCP_MD5SIG
-struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
- const union tcp_md5_addr *addr,
- int family);
+#include <linux/jump_label.h>
+extern struct static_key_false tcp_md5_needed;
+struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index,
+ const union tcp_md5_addr *addr,
+ int family);
+static inline struct tcp_md5sig_key *
+tcp_md5_do_lookup(const struct sock *sk, int l3index,
+ const union tcp_md5_addr *addr, int family)
+{
+ if (!static_branch_unlikely(&tcp_md5_needed))
+ return NULL;
+ return __tcp_md5_do_lookup(sk, l3index, addr, family);
+}
+
#define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key)
#else
-static inline struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
- const union tcp_md5_addr *addr,
- int family)
+static inline struct tcp_md5sig_key *
+tcp_md5_do_lookup(const struct sock *sk, int l3index,
+ const union tcp_md5_addr *addr, int family)
{
return NULL;
}
@@ -1608,12 +1677,15 @@
struct msghdr *data; /* data in MSG_FASTOPEN */
size_t size;
int copied; /* queued in tcp_connect() */
+ struct ubuf_info *uarg;
};
void tcp_free_fastopen_req(struct tcp_sock *tp);
void tcp_fastopen_destroy_cipher(struct sock *sk);
void tcp_fastopen_ctx_destroy(struct net *net);
int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
- void *key, unsigned int len);
+ void *primary_key, void *backup_key);
+int tcp_fastopen_get_cipher(struct net *net, struct inet_connection_sock *icsk,
+ u64 *key);
void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb);
struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
@@ -1623,13 +1695,16 @@
bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
struct tcp_fastopen_cookie *cookie);
bool tcp_fastopen_defer_connect(struct sock *sk, int *err);
-#define TCP_FASTOPEN_KEY_LENGTH 16
+#define TCP_FASTOPEN_KEY_LENGTH sizeof(siphash_key_t)
+#define TCP_FASTOPEN_KEY_MAX 2
+#define TCP_FASTOPEN_KEY_BUF_LENGTH \
+ (TCP_FASTOPEN_KEY_LENGTH * TCP_FASTOPEN_KEY_MAX)
/* Fastopen key context */
struct tcp_fastopen_context {
- struct crypto_cipher *tfm;
- __u8 key[TCP_FASTOPEN_KEY_LENGTH];
- struct rcu_head rcu;
+ siphash_key_t key[TCP_FASTOPEN_KEY_MAX];
+ int num;
+ struct rcu_head rcu;
};
extern unsigned int sysctl_tcp_fastopen_blackhole_timeout;
@@ -1637,6 +1712,35 @@
bool tcp_fastopen_active_should_disable(struct sock *sk);
void tcp_fastopen_active_disable_ofo_check(struct sock *sk);
void tcp_fastopen_active_detect_blackhole(struct sock *sk, bool expired);
+
+/* Caller needs to wrap with rcu_read_(un)lock() */
+static inline
+struct tcp_fastopen_context *tcp_fastopen_get_ctx(const struct sock *sk)
+{
+ struct tcp_fastopen_context *ctx;
+
+ ctx = rcu_dereference(inet_csk(sk)->icsk_accept_queue.fastopenq.ctx);
+ if (!ctx)
+ ctx = rcu_dereference(sock_net(sk)->ipv4.tcp_fastopen_ctx);
+ return ctx;
+}
+
+static inline
+bool tcp_fastopen_cookie_match(const struct tcp_fastopen_cookie *foc,
+ const struct tcp_fastopen_cookie *orig)
+{
+ if (orig->len == TCP_FASTOPEN_COOKIE_SIZE &&
+ orig->len == foc->len &&
+ !memcmp(orig->val, foc->val, foc->len))
+ return true;
+ return false;
+}
+
+static inline
+int tcp_fastopen_context_len(const struct tcp_fastopen_context *ctx)
+{
+ return ctx->num;
+}
/* Latencies incurred by various limits for a sender. They are
* chronograph-like stats that are mutually exclusive.
@@ -1705,9 +1809,18 @@
return skb_queue_is_last(&sk->sk_write_queue, skb);
}
+/**
+ * tcp_write_queue_empty - test if any payload (or FIN) is available in write queue
+ * @sk: socket
+ *
+ * Since the write queue can have a temporary empty skb in it,
+ * we must not use "return skb_queue_empty(&sk->sk_write_queue)"
+ */
static inline bool tcp_write_queue_empty(const struct sock *sk)
{
- return skb_queue_empty(&sk->sk_write_queue);
+ const struct tcp_sock *tp = tcp_sk(sk);
+
+ return tp->write_seq == tp->snd_nxt;
}
static inline bool tcp_rtx_queue_empty(const struct sock *sk)
@@ -1720,20 +1833,9 @@
return tcp_rtx_queue_empty(sk) && tcp_write_queue_empty(sk);
}
-static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked)
-{
- if (tcp_write_queue_empty(sk))
- tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
-}
-
-static inline void __tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb)
-{
- __skb_queue_tail(&sk->sk_write_queue, skb);
-}
-
static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb)
{
- __tcp_add_write_queue_tail(sk, skb);
+ __skb_queue_tail(&sk->sk_write_queue, skb);
/* Queue it, remembering where we must start sending. */
if (sk->sk_write_queue.next == skb)
@@ -1855,6 +1957,7 @@
struct seq_net_private p;
enum tcp_seq_states state;
struct sock *syn_wait_sk;
+ struct tcp_seq_afinfo *bpf_seq_afinfo;
int bucket, offset, sbucket, num;
loff_t last_pos;
};
@@ -1867,6 +1970,10 @@
struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
netdev_features_t features);
struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb);
+INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff));
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb));
+INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff));
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb));
int tcp_gro_complete(struct sk_buff *skb);
void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr);
@@ -1874,15 +1981,20 @@
static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
- return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat;
+ return tp->notsent_lowat ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat);
}
-static inline bool tcp_stream_memory_free(const struct sock *sk)
+/* @wake is one when sk_stream_write_space() calls us.
+ * This sends EPOLLOUT only if notsent_bytes is half the limit.
+ * This mimics the strategy used in sock_def_write_space().
+ */
+static inline bool tcp_stream_memory_free(const struct sock *sk, int wake)
{
const struct tcp_sock *tp = tcp_sk(sk);
- u32 notsent_bytes = READ_ONCE(tp->write_seq) - tp->snd_nxt;
+ u32 notsent_bytes = READ_ONCE(tp->write_seq) -
+ READ_ONCE(tp->snd_nxt);
- return notsent_bytes < tcp_notsent_lowat(tp);
+ return (notsent_bytes << wake) < tcp_notsent_lowat(tp);
}
#ifdef CONFIG_PROC_FS
@@ -1906,7 +2018,7 @@
const struct sk_buff *skb);
int (*md5_parse)(struct sock *sk,
int optname,
- char __user *optval,
+ sockptr_t optval,
int optlen);
#endif
};
@@ -1935,8 +2047,14 @@
int (*send_synack)(const struct sock *sk, struct dst_entry *dst,
struct flowi *fl, struct request_sock *req,
struct tcp_fastopen_cookie *foc,
- enum tcp_synack_type synack_type);
+ enum tcp_synack_type synack_type,
+ struct sk_buff *syn_skb);
};
+
+extern const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops;
+#if IS_ENABLED(CONFIG_IPV6)
+extern const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops;
+#endif
#ifdef CONFIG_SYN_COOKIES
static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops,
@@ -1977,7 +2095,7 @@
{
const struct sk_buff *skb = tcp_rtx_queue_head(sk);
u32 rto = inet_csk(sk)->icsk_rto;
- u64 rto_time_stamp_us = skb->skb_mstamp + jiffies_to_usecs(rto);
+ u64 rto_time_stamp_us = tcp_skb_timestamp_us(skb) + jiffies_to_usecs(rto);
return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp;
}
@@ -2077,34 +2195,72 @@
#define TCP_ULP_MAX 128
#define TCP_ULP_BUF_MAX (TCP_ULP_NAME_MAX*TCP_ULP_MAX)
-enum {
- TCP_ULP_TLS,
- TCP_ULP_BPF,
-};
-
struct tcp_ulp_ops {
struct list_head list;
/* initialize ulp */
int (*init)(struct sock *sk);
+ /* update ulp */
+ void (*update)(struct sock *sk, struct proto *p,
+ void (*write_space)(struct sock *sk));
/* cleanup ulp */
void (*release)(struct sock *sk);
+ /* diagnostic */
+ int (*get_info)(const struct sock *sk, struct sk_buff *skb);
+ size_t (*get_info_size)(const struct sock *sk);
+ /* clone ulp */
+ void (*clone)(const struct request_sock *req, struct sock *newsk,
+ const gfp_t priority);
- int uid;
char name[TCP_ULP_NAME_MAX];
- bool user_visible;
struct module *owner;
};
int tcp_register_ulp(struct tcp_ulp_ops *type);
void tcp_unregister_ulp(struct tcp_ulp_ops *type);
int tcp_set_ulp(struct sock *sk, const char *name);
-int tcp_set_ulp_id(struct sock *sk, const int ulp);
void tcp_get_available_ulp(char *buf, size_t len);
void tcp_cleanup_ulp(struct sock *sk);
+void tcp_update_ulp(struct sock *sk, struct proto *p,
+ void (*write_space)(struct sock *sk));
#define MODULE_ALIAS_TCP_ULP(name) \
__MODULE_INFO(alias, alias_userspace, name); \
__MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name)
+
+struct sk_msg;
+struct sk_psock;
+
+#ifdef CONFIG_BPF_STREAM_PARSER
+struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
+void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
+#else
+static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
+{
+}
+#endif /* CONFIG_BPF_STREAM_PARSER */
+
+#ifdef CONFIG_NET_SOCK_MSG
+int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes,
+ int flags);
+int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
+ struct msghdr *msg, int len, int flags);
+#endif /* CONFIG_NET_SOCK_MSG */
+
+#ifdef CONFIG_CGROUP_BPF
+static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
+ struct sk_buff *skb,
+ unsigned int end_offset)
+{
+ skops->skb = skb;
+ skops->skb_data_end = skb->data + end_offset;
+}
+#else
+static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
+ struct sk_buff *skb,
+ unsigned int end_offset)
+{
+}
+#endif
/* Call BPF_SOCK_OPS program that returns an int. If the return value
* is < 0, then the BPF op failed (for example if the loaded BPF
@@ -2197,6 +2353,12 @@
return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN, 0, NULL) == 1);
}
+static inline void tcp_bpf_rtt(struct sock *sk)
+{
+ if (BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_RTT_CB_FLAG))
+ tcp_call_bpf(sk, BPF_SOCK_OPS_RTT_CB, 0, NULL);
+}
+
#if IS_ENABLED(CONFIG_SMC)
extern struct static_key_false tcp_have_smc;
#endif
@@ -2205,7 +2367,29 @@
void clean_acked_data_enable(struct inet_connection_sock *icsk,
void (*cad)(struct sock *sk, u32 ack_seq));
void clean_acked_data_disable(struct inet_connection_sock *icsk);
-
+void clean_acked_data_flush(void);
#endif
+DECLARE_STATIC_KEY_FALSE(tcp_tx_delay_enabled);
+static inline void tcp_add_tx_delay(struct sk_buff *skb,
+ const struct tcp_sock *tp)
+{
+ if (static_branch_unlikely(&tcp_tx_delay_enabled))
+ skb->skb_mstamp_ns += (u64)tp->tcp_tx_delay * NSEC_PER_USEC;
+}
+
+/* Compute Earliest Departure Time for some control packets
+ * like ACK or RST for TIME_WAIT or non ESTABLISHED sockets.
+ */
+static inline u64 tcp_transmit_time(const struct sock *sk)
+{
+ if (static_branch_unlikely(&tcp_tx_delay_enabled)) {
+ u32 delay = (sk->sk_state == TCP_TIME_WAIT) ?
+ tcp_twsk(sk)->tw_tx_delay : tcp_sk(sk)->tcp_tx_delay;
+
+ return tcp_clock_ns() + (u64)delay * NSEC_PER_USEC;
+ }
+ return 0;
+}
+
#endif /* _TCP_H */
--
Gitblit v1.6.2