forked from ~ljy/RK356X_SDK_RELEASE

hc
2023-12-09 95099d4622f8cb224d94e314c7a8e0df60b13f87
kernel/net/ipv4/tcp_output.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * INET An implementation of the TCP/IP protocol suite for the LINUX
34 * operating system. INET is implemented using the BSD Socket
....@@ -37,6 +38,7 @@
3738 #define pr_fmt(fmt) "TCP: " fmt
3839
3940 #include <net/tcp.h>
41
+#include <net/mptcp.h>
4042
4143 #include <linux/compiler.h>
4244 #include <linux/gfp.h>
....@@ -44,6 +46,17 @@
4446 #include <linux/static_key.h>
4547
4648 #include <trace/events/tcp.h>
49
+
50
+/* Refresh clocks of a TCP socket,
51
+ * ensuring monotically increasing values.
52
+ */
53
+void tcp_mstamp_refresh(struct tcp_sock *tp)
54
+{
55
+ u64 val = tcp_clock_ns();
56
+
57
+ tp->tcp_clock_cache = val;
58
+ tp->tcp_mstamp = div_u64(val, NSEC_PER_USEC);
59
+}
4760
4861 static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
4962 int push_one, gfp_t gfp);
....@@ -55,7 +68,7 @@
5568 struct tcp_sock *tp = tcp_sk(sk);
5669 unsigned int prior_packets = tp->packets_out;
5770
58
- tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
71
+ WRITE_ONCE(tp->snd_nxt, TCP_SKB_CB(skb)->end_seq);
5972
6073 __skb_unlink(skb, &sk->sk_write_queue);
6174 tcp_rbtree_insert(&sk->tcp_rtx_queue, skb);
....@@ -69,6 +82,7 @@
6982
7083 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT,
7184 tcp_skb_pcount(skb));
85
+ tcp_check_space(sk);
7286 }
7387
7488 /* SND.NXT, if window was not shrunk or the amount of shrunk was less than one
....@@ -159,7 +173,7 @@
159173 * packet, enter pingpong mode.
160174 */
161175 if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
162
- icsk->icsk_ack.pingpong = 1;
176
+ inet_csk_enter_pingpong_mode(sk);
163177 }
164178
165179 /* Account for an ACK we sent. */
....@@ -168,10 +182,10 @@
168182 {
169183 struct tcp_sock *tp = tcp_sk(sk);
170184
171
- if (unlikely(tp->compressed_ack > TCP_FASTRETRANS_THRESH)) {
185
+ if (unlikely(tp->compressed_ack)) {
172186 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
173
- tp->compressed_ack - TCP_FASTRETRANS_THRESH);
174
- tp->compressed_ack = TCP_FASTRETRANS_THRESH;
187
+ tp->compressed_ack);
188
+ tp->compressed_ack = 0;
175189 if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
176190 __sock_put(sk);
177191 }
....@@ -221,16 +235,14 @@
221235 if (init_rcv_wnd)
222236 *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
223237
224
- (*rcv_wscale) = 0;
238
+ *rcv_wscale = 0;
225239 if (wscale_ok) {
226240 /* Set window scaling on max possible window */
227
- space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
228
- space = max_t(u32, space, sysctl_rmem_max);
241
+ space = max_t(u32, space, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
242
+ space = max_t(u32, space, READ_ONCE(sysctl_rmem_max));
229243 space = min_t(u32, space, *window_clamp);
230
- while (space > U16_MAX && (*rcv_wscale) < TCP_MAX_WSCALE) {
231
- space >>= 1;
232
- (*rcv_wscale)++;
233
- }
244
+ *rcv_wscale = clamp_t(int, ilog2(space) - 15,
245
+ 0, TCP_MAX_WSCALE);
234246 }
235247 /* Set the clamp no higher than max representable value */
236248 (*window_clamp) = min_t(__u32, U16_MAX << (*rcv_wscale), *window_clamp);
....@@ -401,6 +413,7 @@
401413 #define OPTION_WSCALE (1 << 3)
402414 #define OPTION_FAST_OPEN_COOKIE (1 << 8)
403415 #define OPTION_SMC (1 << 9)
416
+#define OPTION_MPTCP (1 << 10)
404417
405418 static void smc_options_write(__be32 *ptr, u16 *options)
406419 {
....@@ -423,10 +436,159 @@
423436 u8 ws; /* window scale, 0 to disable */
424437 u8 num_sack_blocks; /* number of SACK blocks to include */
425438 u8 hash_size; /* bytes in hash_location */
439
+ u8 bpf_opt_len; /* length of BPF hdr option */
426440 __u8 *hash_location; /* temporary pointer, overloaded */
427441 __u32 tsval, tsecr; /* need to include OPTION_TS */
428442 struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */
443
+ struct mptcp_out_options mptcp;
429444 };
445
+
446
+static void mptcp_options_write(__be32 *ptr, struct tcp_out_options *opts)
447
+{
448
+#if IS_ENABLED(CONFIG_MPTCP)
449
+ if (unlikely(OPTION_MPTCP & opts->options))
450
+ mptcp_write_options(ptr, &opts->mptcp);
451
+#endif
452
+}
453
+
454
+#ifdef CONFIG_CGROUP_BPF
455
+static int bpf_skops_write_hdr_opt_arg0(struct sk_buff *skb,
456
+ enum tcp_synack_type synack_type)
457
+{
458
+ if (unlikely(!skb))
459
+ return BPF_WRITE_HDR_TCP_CURRENT_MSS;
460
+
461
+ if (unlikely(synack_type == TCP_SYNACK_COOKIE))
462
+ return BPF_WRITE_HDR_TCP_SYNACK_COOKIE;
463
+
464
+ return 0;
465
+}
466
+
467
+/* req, syn_skb and synack_type are used when writing synack */
468
+static void bpf_skops_hdr_opt_len(struct sock *sk, struct sk_buff *skb,
469
+ struct request_sock *req,
470
+ struct sk_buff *syn_skb,
471
+ enum tcp_synack_type synack_type,
472
+ struct tcp_out_options *opts,
473
+ unsigned int *remaining)
474
+{
475
+ struct bpf_sock_ops_kern sock_ops;
476
+ int err;
477
+
478
+ if (likely(!BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk),
479
+ BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG)) ||
480
+ !*remaining)
481
+ return;
482
+
483
+ /* *remaining has already been aligned to 4 bytes, so *remaining >= 4 */
484
+
485
+ /* init sock_ops */
486
+ memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
487
+
488
+ sock_ops.op = BPF_SOCK_OPS_HDR_OPT_LEN_CB;
489
+
490
+ if (req) {
491
+ /* The listen "sk" cannot be passed here because
492
+ * it is not locked. It would not make too much
493
+ * sense to do bpf_setsockopt(listen_sk) based
494
+ * on individual connection request also.
495
+ *
496
+ * Thus, "req" is passed here and the cgroup-bpf-progs
497
+ * of the listen "sk" will be run.
498
+ *
499
+ * "req" is also used here for fastopen even the "sk" here is
500
+ * a fullsock "child" sk. It is to keep the behavior
501
+ * consistent between fastopen and non-fastopen on
502
+ * the bpf programming side.
503
+ */
504
+ sock_ops.sk = (struct sock *)req;
505
+ sock_ops.syn_skb = syn_skb;
506
+ } else {
507
+ sock_owned_by_me(sk);
508
+
509
+ sock_ops.is_fullsock = 1;
510
+ sock_ops.sk = sk;
511
+ }
512
+
513
+ sock_ops.args[0] = bpf_skops_write_hdr_opt_arg0(skb, synack_type);
514
+ sock_ops.remaining_opt_len = *remaining;
515
+ /* tcp_current_mss() does not pass a skb */
516
+ if (skb)
517
+ bpf_skops_init_skb(&sock_ops, skb, 0);
518
+
519
+ err = BPF_CGROUP_RUN_PROG_SOCK_OPS_SK(&sock_ops, sk);
520
+
521
+ if (err || sock_ops.remaining_opt_len == *remaining)
522
+ return;
523
+
524
+ opts->bpf_opt_len = *remaining - sock_ops.remaining_opt_len;
525
+ /* round up to 4 bytes */
526
+ opts->bpf_opt_len = (opts->bpf_opt_len + 3) & ~3;
527
+
528
+ *remaining -= opts->bpf_opt_len;
529
+}
530
+
531
+static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb,
532
+ struct request_sock *req,
533
+ struct sk_buff *syn_skb,
534
+ enum tcp_synack_type synack_type,
535
+ struct tcp_out_options *opts)
536
+{
537
+ u8 first_opt_off, nr_written, max_opt_len = opts->bpf_opt_len;
538
+ struct bpf_sock_ops_kern sock_ops;
539
+ int err;
540
+
541
+ if (likely(!max_opt_len))
542
+ return;
543
+
544
+ memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
545
+
546
+ sock_ops.op = BPF_SOCK_OPS_WRITE_HDR_OPT_CB;
547
+
548
+ if (req) {
549
+ sock_ops.sk = (struct sock *)req;
550
+ sock_ops.syn_skb = syn_skb;
551
+ } else {
552
+ sock_owned_by_me(sk);
553
+
554
+ sock_ops.is_fullsock = 1;
555
+ sock_ops.sk = sk;
556
+ }
557
+
558
+ sock_ops.args[0] = bpf_skops_write_hdr_opt_arg0(skb, synack_type);
559
+ sock_ops.remaining_opt_len = max_opt_len;
560
+ first_opt_off = tcp_hdrlen(skb) - max_opt_len;
561
+ bpf_skops_init_skb(&sock_ops, skb, first_opt_off);
562
+
563
+ err = BPF_CGROUP_RUN_PROG_SOCK_OPS_SK(&sock_ops, sk);
564
+
565
+ if (err)
566
+ nr_written = 0;
567
+ else
568
+ nr_written = max_opt_len - sock_ops.remaining_opt_len;
569
+
570
+ if (nr_written < max_opt_len)
571
+ memset(skb->data + first_opt_off + nr_written, TCPOPT_NOP,
572
+ max_opt_len - nr_written);
573
+}
574
+#else
575
+static void bpf_skops_hdr_opt_len(struct sock *sk, struct sk_buff *skb,
576
+ struct request_sock *req,
577
+ struct sk_buff *syn_skb,
578
+ enum tcp_synack_type synack_type,
579
+ struct tcp_out_options *opts,
580
+ unsigned int *remaining)
581
+{
582
+}
583
+
584
+static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb,
585
+ struct request_sock *req,
586
+ struct sk_buff *syn_skb,
587
+ enum tcp_synack_type synack_type,
588
+ struct tcp_out_options *opts)
589
+{
590
+}
591
+#endif
430592
431593 /* Write previously computed TCP options to the packet.
432594 *
....@@ -536,6 +698,8 @@
536698 }
537699
538700 smc_options_write(ptr, &options);
701
+
702
+ mptcp_options_write(ptr, opts);
539703 }
540704
541705 static void smc_set_option(const struct tcp_sock *tp,
....@@ -571,6 +735,22 @@
571735 #endif
572736 }
573737
738
+static void mptcp_set_option_cond(const struct request_sock *req,
739
+ struct tcp_out_options *opts,
740
+ unsigned int *remaining)
741
+{
742
+ if (rsk_is_mptcp(req)) {
743
+ unsigned int size;
744
+
745
+ if (mptcp_synack_options(req, &size, &opts->mptcp)) {
746
+ if (*remaining >= size) {
747
+ opts->options |= OPTION_MPTCP;
748
+ *remaining -= size;
749
+ }
750
+ }
751
+ }
752
+}
753
+
574754 /* Compute TCP options for SYN packets. This is not the final
575755 * network wire format yet.
576756 */
....@@ -584,7 +764,8 @@
584764
585765 *md5 = NULL;
586766 #ifdef CONFIG_TCP_MD5SIG
587
- if (unlikely(rcu_access_pointer(tp->md5sig_info))) {
767
+ if (static_branch_unlikely(&tcp_md5_needed) &&
768
+ rcu_access_pointer(tp->md5sig_info)) {
588769 *md5 = tp->af_specific->md5_lookup(sk, sk);
589770 if (*md5) {
590771 opts->options |= OPTION_MD5;
....@@ -605,18 +786,18 @@
605786 opts->mss = tcp_advertise_mss(sk);
606787 remaining -= TCPOLEN_MSS_ALIGNED;
607788
608
- if (likely(sock_net(sk)->ipv4.sysctl_tcp_timestamps && !*md5)) {
789
+ if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps) && !*md5)) {
609790 opts->options |= OPTION_TS;
610791 opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset;
611792 opts->tsecr = tp->rx_opt.ts_recent;
612793 remaining -= TCPOLEN_TSTAMP_ALIGNED;
613794 }
614
- if (likely(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) {
795
+ if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling))) {
615796 opts->ws = tp->rx_opt.rcv_wscale;
616797 opts->options |= OPTION_WSCALE;
617798 remaining -= TCPOLEN_WSCALE_ALIGNED;
618799 }
619
- if (likely(sock_net(sk)->ipv4.sysctl_tcp_sack)) {
800
+ if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_sack))) {
620801 opts->options |= OPTION_SACK_ADVERTISE;
621802 if (unlikely(!(OPTION_TS & opts->options)))
622803 remaining -= TCPOLEN_SACKPERM_ALIGNED;
....@@ -639,6 +820,17 @@
639820
640821 smc_set_option(tp, opts, &remaining);
641822
823
+ if (sk_is_mptcp(sk)) {
824
+ unsigned int size;
825
+
826
+ if (mptcp_syn_options(sk, skb, &size, &opts->mptcp)) {
827
+ opts->options |= OPTION_MPTCP;
828
+ remaining -= size;
829
+ }
830
+ }
831
+
832
+ bpf_skops_hdr_opt_len(sk, skb, NULL, NULL, 0, opts, &remaining);
833
+
642834 return MAX_TCP_OPTION_SPACE - remaining;
643835 }
644836
....@@ -649,7 +841,8 @@
649841 struct tcp_out_options *opts,
650842 const struct tcp_md5sig_key *md5,
651843 struct tcp_fastopen_cookie *foc,
652
- enum tcp_synack_type synack_type)
844
+ enum tcp_synack_type synack_type,
845
+ struct sk_buff *syn_skb)
653846 {
654847 struct inet_request_sock *ireq = inet_rsk(req);
655848 unsigned int remaining = MAX_TCP_OPTION_SPACE;
....@@ -702,7 +895,12 @@
702895 }
703896 }
704897
898
+ mptcp_set_option_cond(req, opts, &remaining);
899
+
705900 smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
901
+
902
+ bpf_skops_hdr_opt_len((struct sock *)sk, skb, req, syn_skb,
903
+ synack_type, opts, &remaining);
706904
707905 return MAX_TCP_OPTION_SPACE - remaining;
708906 }
....@@ -722,7 +920,8 @@
722920
723921 *md5 = NULL;
724922 #ifdef CONFIG_TCP_MD5SIG
725
- if (unlikely(rcu_access_pointer(tp->md5sig_info))) {
923
+ if (static_branch_unlikely(&tcp_md5_needed) &&
924
+ rcu_access_pointer(tp->md5sig_info)) {
726925 *md5 = tp->af_specific->md5_lookup(sk, sk);
727926 if (*md5) {
728927 opts->options |= OPTION_MD5;
....@@ -738,16 +937,46 @@
738937 size += TCPOLEN_TSTAMP_ALIGNED;
739938 }
740939
940
+ /* MPTCP options have precedence over SACK for the limited TCP
941
+ * option space because a MPTCP connection would be forced to
942
+ * fall back to regular TCP if a required multipath option is
943
+ * missing. SACK still gets a chance to use whatever space is
944
+ * left.
945
+ */
946
+ if (sk_is_mptcp(sk)) {
947
+ unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
948
+ unsigned int opt_size = 0;
949
+
950
+ if (mptcp_established_options(sk, skb, &opt_size, remaining,
951
+ &opts->mptcp)) {
952
+ opts->options |= OPTION_MPTCP;
953
+ size += opt_size;
954
+ }
955
+ }
956
+
741957 eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
742958 if (unlikely(eff_sacks)) {
743959 const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
960
+ if (unlikely(remaining < TCPOLEN_SACK_BASE_ALIGNED +
961
+ TCPOLEN_SACK_PERBLOCK))
962
+ return size;
963
+
744964 opts->num_sack_blocks =
745965 min_t(unsigned int, eff_sacks,
746966 (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
747967 TCPOLEN_SACK_PERBLOCK);
748
- if (likely(opts->num_sack_blocks))
749
- size += TCPOLEN_SACK_BASE_ALIGNED +
750
- opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
968
+
969
+ size += TCPOLEN_SACK_BASE_ALIGNED +
970
+ opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
971
+ }
972
+
973
+ if (unlikely(BPF_SOCK_OPS_TEST_FLAG(tp,
974
+ BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG))) {
975
+ unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
976
+
977
+ bpf_skops_hdr_opt_len(sk, skb, NULL, NULL, 0, opts, &remaining);
978
+
979
+ size = MAX_TCP_OPTION_SPACE - remaining;
751980 }
752981
753982 return size;
....@@ -966,48 +1195,33 @@
9661195 return HRTIMER_NORESTART;
9671196 }
9681197
969
-static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)
1198
+static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb,
1199
+ u64 prior_wstamp)
9701200 {
9711201 struct tcp_sock *tp = tcp_sk(sk);
972
- ktime_t expire, now;
973
- u64 len_ns;
974
- u32 rate;
9751202
976
- if (!tcp_needs_internal_pacing(sk))
977
- return;
978
- rate = sk->sk_pacing_rate;
979
- if (!rate || rate == ~0U)
980
- return;
1203
+ if (sk->sk_pacing_status != SK_PACING_NONE) {
1204
+ unsigned long rate = sk->sk_pacing_rate;
9811205
982
- len_ns = (u64)skb->len * NSEC_PER_SEC;
983
- do_div(len_ns, rate);
984
- now = ktime_get();
985
- /* If hrtimer is already armed, then our caller has not
986
- * used tcp_pacing_check().
987
- */
988
- if (unlikely(hrtimer_is_queued(&tp->pacing_timer))) {
989
- expire = hrtimer_get_softexpires(&tp->pacing_timer);
990
- if (ktime_after(expire, now))
991
- now = expire;
992
- if (hrtimer_try_to_cancel(&tp->pacing_timer) == 1)
993
- __sock_put(sk);
1206
+ /* Original sch_fq does not pace first 10 MSS
1207
+ * Note that tp->data_segs_out overflows after 2^32 packets,
1208
+ * this is a minor annoyance.
1209
+ */
1210
+ if (rate != ~0UL && rate && tp->data_segs_out >= 10) {
1211
+ u64 len_ns = div64_ul((u64)skb->len * NSEC_PER_SEC, rate);
1212
+ u64 credit = tp->tcp_wstamp_ns - prior_wstamp;
1213
+
1214
+ /* take into account OS jitter */
1215
+ len_ns -= min_t(u64, len_ns / 2, credit);
1216
+ tp->tcp_wstamp_ns += len_ns;
1217
+ }
9941218 }
995
- hrtimer_start(&tp->pacing_timer, ktime_add_ns(now, len_ns),
996
- HRTIMER_MODE_ABS_PINNED_SOFT);
997
- sock_hold(sk);
998
-}
999
-
1000
-static bool tcp_pacing_check(const struct sock *sk)
1001
-{
1002
- return tcp_needs_internal_pacing(sk) &&
1003
- hrtimer_is_queued(&tcp_sk(sk)->pacing_timer);
1004
-}
1005
-
1006
-static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb)
1007
-{
1008
- skb->skb_mstamp = tp->tcp_mstamp;
10091219 list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
10101220 }
1221
+
1222
+INDIRECT_CALLABLE_DECLARE(int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl));
1223
+INDIRECT_CALLABLE_DECLARE(int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl));
1224
+INDIRECT_CALLABLE_DECLARE(void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb));
10111225
10121226 /* This routine actually transmits TCP packets queued in by
10131227 * tcp_do_sendmsg(). This is used by both the initial
....@@ -1032,11 +1246,14 @@
10321246 struct sk_buff *oskb = NULL;
10331247 struct tcp_md5sig_key *md5;
10341248 struct tcphdr *th;
1249
+ u64 prior_wstamp;
10351250 int err;
10361251
10371252 BUG_ON(!skb || !tcp_skb_pcount(skb));
10381253 tp = tcp_sk(sk);
1039
-
1254
+ prior_wstamp = tp->tcp_wstamp_ns;
1255
+ tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
1256
+ skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
10401257 if (clone_it) {
10411258 TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
10421259 - tp->snd_una;
....@@ -1051,18 +1268,32 @@
10511268
10521269 if (unlikely(!skb))
10531270 return -ENOBUFS;
1271
+ /* retransmit skbs might have a non zero value in skb->dev
1272
+ * because skb->dev is aliased with skb->rbnode.rb_left
1273
+ */
1274
+ skb->dev = NULL;
10541275 }
1055
- skb->skb_mstamp = tp->tcp_mstamp;
10561276
10571277 inet = inet_sk(sk);
10581278 tcb = TCP_SKB_CB(skb);
10591279 memset(&opts, 0, sizeof(opts));
10601280
1061
- if (unlikely(tcb->tcp_flags & TCPHDR_SYN))
1281
+ if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) {
10621282 tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
1063
- else
1283
+ } else {
10641284 tcp_options_size = tcp_established_options(sk, skb, &opts,
10651285 &md5);
1286
+ /* Force a PSH flag on all (GSO) packets to expedite GRO flush
1287
+ * at receiver : This slightly improve GRO performance.
1288
+ * Note that we do not force the PSH flag for non GSO packets,
1289
+ * because they might be sent under high congestion events,
1290
+ * and in this case it is better to delay the delivery of 1-MSS
1291
+ * packets and thus the corresponding ACK packet that would
1292
+ * release the following packet.
1293
+ */
1294
+ if (tcp_skb_pcount(skb) > 1)
1295
+ tcb->tcp_flags |= TCPHDR_PSH;
1296
+ }
10661297 tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
10671298
10681299 /* if no packet is in qdisc/device queue, then allow XPS to select
....@@ -1135,7 +1366,12 @@
11351366 }
11361367 #endif
11371368
1138
- icsk->icsk_af_ops->send_check(sk, skb);
1369
+ /* BPF prog is the last one writing header option */
1370
+ bpf_skops_write_hdr_opt(sk, skb, NULL, NULL, 0, &opts);
1371
+
1372
+ INDIRECT_CALL_INET(icsk->icsk_af_ops->send_check,
1373
+ tcp_v6_send_check, tcp_v4_send_check,
1374
+ sk, skb);
11391375
11401376 if (likely(tcb->tcp_flags & TCPHDR_ACK))
11411377 tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt);
....@@ -1144,7 +1380,6 @@
11441380 tcp_event_data_sent(tp, sk);
11451381 tp->data_segs_out += tcp_skb_pcount(skb);
11461382 tp->bytes_sent += skb->len - tcp_header_size;
1147
- tcp_internal_pacing(sk, skb);
11481383 }
11491384
11501385 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
....@@ -1156,21 +1391,24 @@
11561391 skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb);
11571392 skb_shinfo(skb)->gso_size = tcp_skb_mss(skb);
11581393
1159
- /* Our usage of tstamp should remain private */
1160
- skb->tstamp = 0;
1394
+ /* Leave earliest departure time in skb->tstamp (skb->skb_mstamp_ns) */
11611395
11621396 /* Cleanup our debris for IP stacks */
11631397 memset(skb->cb, 0, max(sizeof(struct inet_skb_parm),
11641398 sizeof(struct inet6_skb_parm)));
11651399
1166
- err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
1400
+ tcp_add_tx_delay(skb, tp);
1401
+
1402
+ err = INDIRECT_CALL_INET(icsk->icsk_af_ops->queue_xmit,
1403
+ inet6_csk_xmit, ip_queue_xmit,
1404
+ sk, skb, &inet->cork.fl);
11671405
11681406 if (unlikely(err > 0)) {
11691407 tcp_enter_cwr(sk);
11701408 err = net_xmit_eval(err);
11711409 }
11721410 if (!err && oskb) {
1173
- tcp_update_skb_after_send(tp, oskb);
1411
+ tcp_update_skb_after_send(sk, oskb, prior_wstamp);
11741412 tcp_rate_skb_sent(sk, oskb);
11751413 }
11761414 return err;
....@@ -1196,7 +1434,7 @@
11961434 WRITE_ONCE(tp->write_seq, TCP_SKB_CB(skb)->end_seq);
11971435 __skb_header_release(skb);
11981436 tcp_add_write_queue_tail(sk, skb);
1199
- sk->sk_wmem_queued += skb->truesize;
1437
+ sk_wmem_queued_add(sk, skb->truesize);
12001438 sk_mem_charge(sk, skb->truesize);
12011439 }
12021440
....@@ -1321,15 +1559,16 @@
13211559 return -ENOMEM;
13221560 }
13231561
1324
- if (skb_unclone(skb, gfp))
1562
+ if (skb_unclone_keeptruesize(skb, gfp))
13251563 return -ENOMEM;
13261564
13271565 /* Get a new skb... force flag on. */
13281566 buff = sk_stream_alloc_skb(sk, nsize, gfp, true);
13291567 if (!buff)
13301568 return -ENOMEM; /* We'll just try again later. */
1569
+ skb_copy_decrypted(buff, skb);
13311570
1332
- sk->sk_wmem_queued += buff->truesize;
1571
+ sk_wmem_queued_add(sk, buff->truesize);
13331572 sk_mem_charge(sk, buff->truesize);
13341573 nlen = skb->len - len - nsize;
13351574 buff->truesize += nlen;
....@@ -1410,7 +1649,7 @@
14101649 } else {
14111650 shinfo->frags[k] = shinfo->frags[i];
14121651 if (eat) {
1413
- shinfo->frags[k].page_offset += eat;
1652
+ skb_frag_off_add(&shinfo->frags[k], eat);
14141653 skb_frag_size_sub(&shinfo->frags[k], eat);
14151654 eat = 0;
14161655 }
....@@ -1429,7 +1668,7 @@
14291668 {
14301669 u32 delta_truesize;
14311670
1432
- if (skb_unclone(skb, GFP_ATOMIC))
1671
+ if (skb_unclone_keeptruesize(skb, GFP_ATOMIC))
14331672 return -ENOMEM;
14341673
14351674 delta_truesize = __pskb_trim_head(skb, len);
....@@ -1439,9 +1678,8 @@
14391678
14401679 if (delta_truesize) {
14411680 skb->truesize -= delta_truesize;
1442
- sk->sk_wmem_queued -= delta_truesize;
1681
+ sk_wmem_queued_add(sk, -delta_truesize);
14431682 sk_mem_uncharge(sk, delta_truesize);
1444
- sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
14451683 }
14461684
14471685 /* Any change of skb->len requires recalculation of tso factor. */
....@@ -1479,7 +1717,8 @@
14791717 mss_now -= icsk->icsk_ext_hdr_len;
14801718
14811719 /* Then reserve room for full set of TCP options and 8 bytes of data */
1482
- mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss);
1720
+ mss_now = max(mss_now,
1721
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss));
14831722 return mss_now;
14841723 }
14851724
....@@ -1522,10 +1761,10 @@
15221761 struct inet_connection_sock *icsk = inet_csk(sk);
15231762 struct net *net = sock_net(sk);
15241763
1525
- icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1;
1764
+ icsk->icsk_mtup.enabled = READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing) > 1;
15261765 icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
15271766 icsk->icsk_af_ops->net_header_len;
1528
- icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
1767
+ icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, READ_ONCE(net->ipv4.sysctl_tcp_base_mss));
15291768 icsk->icsk_mtup.probe_size = 0;
15301769 if (icsk->icsk_mtup.enabled)
15311770 icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
....@@ -1637,15 +1876,20 @@
16371876 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
16381877 struct tcp_sock *tp = tcp_sk(sk);
16391878
1640
- /* Track the maximum number of outstanding packets in each
1641
- * window, and remember whether we were cwnd-limited then.
1879
+ /* Track the strongest available signal of the degree to which the cwnd
1880
+ * is fully utilized. If cwnd-limited then remember that fact for the
1881
+ * current window. If not cwnd-limited then track the maximum number of
1882
+ * outstanding packets in the current window. (If cwnd-limited then we
1883
+ * chose to not update tp->max_packets_out to avoid an extra else
1884
+ * clause with no functional impact.)
16421885 */
1643
- if (!before(tp->snd_una, tp->max_packets_seq) ||
1644
- tp->packets_out > tp->max_packets_out ||
1645
- is_cwnd_limited) {
1646
- tp->max_packets_out = tp->packets_out;
1647
- tp->max_packets_seq = tp->snd_nxt;
1886
+ if (!before(tp->snd_una, tp->cwnd_usage_seq) ||
1887
+ is_cwnd_limited ||
1888
+ (!tp->is_cwnd_limited &&
1889
+ tp->packets_out > tp->max_packets_out)) {
16481890 tp->is_cwnd_limited = is_cwnd_limited;
1891
+ tp->max_packets_out = tp->packets_out;
1892
+ tp->cwnd_usage_seq = tp->snd_nxt;
16491893 }
16501894
16511895 if (tcp_is_cwnd_limited(sk)) {
....@@ -1657,7 +1901,7 @@
16571901 if (tp->packets_out > tp->snd_cwnd_used)
16581902 tp->snd_cwnd_used = tp->packets_out;
16591903
1660
- if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle &&
1904
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) &&
16611905 (s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto &&
16621906 !ca_ops->cong_control)
16631907 tcp_cwnd_application_limited(sk);
....@@ -1721,8 +1965,9 @@
17211965 {
17221966 u32 bytes, segs;
17231967
1724
- bytes = min(sk->sk_pacing_rate >> sk->sk_pacing_shift,
1725
- sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
1968
+ bytes = min_t(unsigned long,
1969
+ sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift),
1970
+ sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
17261971
17271972 /* Goal is to send at least one packet per ms,
17281973 * not one big TSO packet every 100 ms.
....@@ -1744,7 +1989,7 @@
17441989
17451990 min_tso = ca_ops->min_tso_segs ?
17461991 ca_ops->min_tso_segs(sk) :
1747
- sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs;
1992
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
17481993
17491994 tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
17501995 return min_t(u32, tso_segs, sk->sk_gso_max_segs);
....@@ -1868,23 +2113,24 @@
18682113 * know that all the data is in scatter-gather pages, and that the
18692114 * packet has never been sent out before (and thus is not cloned).
18702115 */
1871
-static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue,
1872
- struct sk_buff *skb, unsigned int len,
2116
+static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
18732117 unsigned int mss_now, gfp_t gfp)
18742118 {
1875
- struct sk_buff *buff;
18762119 int nlen = skb->len - len;
2120
+ struct sk_buff *buff;
18772121 u8 flags;
18782122
18792123 /* All of a TSO frame must be composed of paged data. */
18802124 if (skb->len != skb->data_len)
1881
- return tcp_fragment(sk, tcp_queue, skb, len, mss_now, gfp);
2125
+ return tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
2126
+ skb, len, mss_now, gfp);
18822127
18832128 buff = sk_stream_alloc_skb(sk, 0, gfp, true);
18842129 if (unlikely(!buff))
18852130 return -ENOMEM;
2131
+ skb_copy_decrypted(buff, skb);
18862132
1887
- sk->sk_wmem_queued += buff->truesize;
2133
+ sk_wmem_queued_add(sk, buff->truesize);
18882134 sk_mem_charge(sk, buff->truesize);
18892135 buff->truesize += nlen;
18902136 skb->truesize -= nlen;
....@@ -1914,7 +2160,7 @@
19142160
19152161 /* Link BUFF into the send queue. */
19162162 __skb_header_release(buff);
1917
- tcp_insert_write_queue_after(skb, buff, sk, tcp_queue);
2163
+ tcp_insert_write_queue_after(skb, buff, sk, TCP_FRAG_IN_WRITE_QUEUE);
19182164
19192165 return 0;
19202166 }
....@@ -1930,18 +2176,22 @@
19302176 u32 max_segs)
19312177 {
19322178 const struct inet_connection_sock *icsk = inet_csk(sk);
1933
- u32 age, send_win, cong_win, limit, in_flight;
2179
+ u32 send_win, cong_win, limit, in_flight;
19342180 struct tcp_sock *tp = tcp_sk(sk);
19352181 struct sk_buff *head;
19362182 int win_divisor;
2183
+ s64 delta;
19372184
19382185 if (icsk->icsk_ca_state >= TCP_CA_Recovery)
19392186 goto send_now;
19402187
19412188 /* Avoid bursty behavior by allowing defer
1942
- * only if the last write was recent.
2189
+ * only if the last write was recent (1 ms).
2190
+ * Note that tp->tcp_wstamp_ns can be in the future if we have
2191
+ * packets waiting in a qdisc or device for EDT delivery.
19432192 */
1944
- if ((s32)(tcp_jiffies32 - tp->lsndtime) > 0)
2193
+ delta = tp->tcp_clock_cache - tp->tcp_wstamp_ns - NSEC_PER_MSEC;
2194
+ if (delta > 0)
19452195 goto send_now;
19462196
19472197 in_flight = tcp_packets_in_flight(tp);
....@@ -1988,9 +2238,9 @@
19882238 head = tcp_rtx_queue_head(sk);
19892239 if (!head)
19902240 goto send_now;
1991
- age = tcp_stamp_us_delta(tp->tcp_mstamp, head->skb_mstamp);
2241
+ delta = tp->tcp_clock_cache - head->tstamp;
19922242 /* If next ACK is likely to come too late (half srtt), do not defer */
1993
- if (age < (tp->srtt_us >> 4))
2243
+ if ((s64)(delta - (u64)NSEC_PER_USEC * (tp->srtt_us >> 4)) < 0)
19942244 goto send_now;
19952245
19962246 /* Ok, it looks like it is advisable to defer.
....@@ -2012,7 +2262,8 @@
20122262 }
20132263
20142264 /* If this packet won't get more data, do not wait. */
2015
- if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
2265
+ if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) ||
2266
+ TCP_SKB_CB(skb)->eor)
20162267 goto send_now;
20172268
20182269 return true;
....@@ -2029,7 +2280,7 @@
20292280 u32 interval;
20302281 s32 delta;
20312282
2032
- interval = net->ipv4.sysctl_tcp_probe_interval;
2283
+ interval = READ_ONCE(net->ipv4.sysctl_tcp_probe_interval);
20332284 delta = tcp_jiffies32 - icsk->icsk_mtup.probe_timestamp;
20342285 if (unlikely(delta >= interval * HZ)) {
20352286 int mss = tcp_current_mss(sk);
....@@ -2111,7 +2362,7 @@
21112362 * probing process by not resetting search range to its orignal.
21122363 */
21132364 if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) ||
2114
- interval < net->ipv4.sysctl_tcp_probe_threshold) {
2365
+ interval < READ_ONCE(net->ipv4.sysctl_tcp_probe_threshold)) {
21152366 /* Check whether enough time has elaplased for
21162367 * another round of probing.
21172368 */
....@@ -2139,17 +2390,15 @@
21392390 if (!tcp_can_coalesce_send_queue_head(sk, probe_size))
21402391 return -1;
21412392
2142
- if (tcp_pacing_check(sk))
2143
- return -1;
2144
-
21452393 /* We're allowed to probe. Build it now. */
21462394 nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false);
21472395 if (!nskb)
21482396 return -1;
2149
- sk->sk_wmem_queued += nskb->truesize;
2397
+ sk_wmem_queued_add(sk, nskb->truesize);
21502398 sk_mem_charge(sk, nskb->truesize);
21512399
21522400 skb = tcp_send_head(sk);
2401
+ skb_copy_decrypted(nskb, skb);
21532402
21542403 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
21552404 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
....@@ -2215,6 +2464,25 @@
22152464 return -1;
22162465 }
22172466
2467
+static bool tcp_pacing_check(struct sock *sk)
2468
+{
2469
+ struct tcp_sock *tp = tcp_sk(sk);
2470
+
2471
+ if (!tcp_needs_internal_pacing(sk))
2472
+ return false;
2473
+
2474
+ if (tp->tcp_wstamp_ns <= tp->tcp_clock_cache)
2475
+ return false;
2476
+
2477
+ if (!hrtimer_is_queued(&tp->pacing_timer)) {
2478
+ hrtimer_start(&tp->pacing_timer,
2479
+ ns_to_ktime(tp->tcp_wstamp_ns),
2480
+ HRTIMER_MODE_ABS_PINNED_SOFT);
2481
+ sock_hold(sk);
2482
+ }
2483
+ return true;
2484
+}
2485
+
22182486 /* TCP Small Queues :
22192487 * Control number of packets in qdisc/devices to two packets / or ~1 ms.
22202488 * (These limits are doubled for retransmits)
....@@ -2229,13 +2497,28 @@
22292497 static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
22302498 unsigned int factor)
22312499 {
2232
- unsigned int limit;
2500
+ unsigned long limit;
22332501
2234
- limit = max(2 * skb->truesize, sk->sk_pacing_rate >> sk->sk_pacing_shift);
2235
- limit = min_t(u32, limit,
2236
- sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
2502
+ limit = max_t(unsigned long,
2503
+ 2 * skb->truesize,
2504
+ sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift));
2505
+ if (sk->sk_pacing_status == SK_PACING_NONE)
2506
+ limit = min_t(unsigned long, limit,
2507
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes));
22372508 limit <<= factor;
22382509
2510
+ if (static_branch_unlikely(&tcp_tx_delay_enabled) &&
2511
+ tcp_sk(sk)->tcp_tx_delay) {
2512
+ u64 extra_bytes = (u64)sk->sk_pacing_rate * tcp_sk(sk)->tcp_tx_delay;
2513
+
2514
+ /* TSQ is based on skb truesize sum (sk_wmem_alloc), so we
2515
+ * approximate our needs assuming an ~100% skb->truesize overhead.
2516
+ * USEC_PER_SEC is approximated by 2^20.
2517
+ * do_div(extra_bytes, USEC_PER_SEC/2) is replaced by a right shift.
2518
+ */
2519
+ extra_bytes >>= (20 - 1);
2520
+ limit += extra_bytes;
2521
+ }
22392522 if (refcount_read(&sk->sk_wmem_alloc) > limit) {
22402523 /* Always send skb if rtx queue is empty.
22412524 * No need to wait for TX completion to call us back,
....@@ -2341,17 +2624,19 @@
23412624 while ((skb = tcp_send_head(sk))) {
23422625 unsigned int limit;
23432626
2627
+ if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
2628
+ /* "skb_mstamp_ns" is used as a start point for the retransmit timer */
2629
+ skb->skb_mstamp_ns = tp->tcp_wstamp_ns = tp->tcp_clock_cache;
2630
+ list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
2631
+ tcp_init_tso_segs(skb, mss_now);
2632
+ goto repair; /* Skip network transmission */
2633
+ }
2634
+
23442635 if (tcp_pacing_check(sk))
23452636 break;
23462637
23472638 tso_segs = tcp_init_tso_segs(skb, mss_now);
23482639 BUG_ON(!tso_segs);
2349
-
2350
- if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
2351
- /* "skb_mstamp" is used as a start point for the retransmit timer */
2352
- tcp_update_skb_after_send(tp, skb);
2353
- goto repair; /* Skip network transmission */
2354
- }
23552640
23562641 cwnd_quota = tcp_cwnd_test(tp, skb);
23572642 if (!cwnd_quota) {
....@@ -2388,8 +2673,7 @@
23882673 nonagle);
23892674
23902675 if (skb->len > limit &&
2391
- unlikely(tso_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
2392
- skb, limit, mss_now, gfp)))
2676
+ unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
23932677 break;
23942678
23952679 if (tcp_small_queue_check(sk, skb, 0))
....@@ -2450,10 +2734,10 @@
24502734 /* Don't do any loss probe on a Fast Open connection before 3WHS
24512735 * finishes.
24522736 */
2453
- if (tp->fastopen_rsk)
2737
+ if (rcu_access_pointer(tp->fastopen_rsk))
24542738 return false;
24552739
2456
- early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans;
2740
+ early_retrans = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_early_retrans);
24572741 /* Schedule a loss probe in 2*RTT for SACK capable connections
24582742 * not in loss recovery, that are either limited by cwnd or application.
24592743 */
....@@ -2484,8 +2768,7 @@
24842768 if (rto_delta_us > 0)
24852769 timeout = min_t(u32, timeout, usecs_to_jiffies(rto_delta_us));
24862770
2487
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout,
2488
- TCP_RTO_MAX);
2771
+ tcp_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout, TCP_RTO_MAX);
24892772 return true;
24902773 }
24912774
....@@ -2666,8 +2949,12 @@
26662949 int mss = icsk->icsk_ack.rcv_mss;
26672950 int free_space = tcp_space(sk);
26682951 int allowed_space = tcp_full_space(sk);
2669
- int full_space = min_t(int, tp->window_clamp, allowed_space);
2670
- int window;
2952
+ int full_space, window;
2953
+
2954
+ if (sk_is_mptcp(sk))
2955
+ mptcp_space(sk, &free_space, &allowed_space);
2956
+
2957
+ full_space = min_t(int, tp->window_clamp, allowed_space);
26712958
26722959 if (unlikely(mss > full_space)) {
26732960 mss = full_space;
....@@ -2815,7 +3102,7 @@
28153102 struct sk_buff *skb = to, *tmp;
28163103 bool first = true;
28173104
2818
- if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse)
3105
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse))
28193106 return;
28203107 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
28213108 return;
....@@ -2824,7 +3111,7 @@
28243111 if (!tcp_can_collapse(sk, skb))
28253112 break;
28263113
2827
- if (!tcp_skb_can_collapse_to(to))
3114
+ if (!tcp_skb_can_collapse(to, skb))
28283115 break;
28293116
28303117 space -= skb->len;
....@@ -2855,7 +3142,7 @@
28553142 struct tcp_sock *tp = tcp_sk(sk);
28563143 unsigned int cur_mss;
28573144 int diff, len, err;
2858
-
3145
+ int avail_wnd;
28593146
28603147 /* Inconclusive MTU probe */
28613148 if (icsk->icsk_mtup.probe_size)
....@@ -2885,23 +3172,31 @@
28853172 return -EHOSTUNREACH; /* Routing failure or similar. */
28863173
28873174 cur_mss = tcp_current_mss(sk);
3175
+ avail_wnd = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
28883176
28893177 /* If receiver has shrunk his window, and skb is out of
28903178 * new window, do not retransmit it. The exception is the
28913179 * case, when window is shrunk to zero. In this case
2892
- * our retransmit serves as a zero window probe.
3180
+ * our retransmit of one segment serves as a zero window probe.
28933181 */
2894
- if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) &&
2895
- TCP_SKB_CB(skb)->seq != tp->snd_una)
2896
- return -EAGAIN;
3182
+ if (avail_wnd <= 0) {
3183
+ if (TCP_SKB_CB(skb)->seq != tp->snd_una)
3184
+ return -EAGAIN;
3185
+ avail_wnd = cur_mss;
3186
+ }
28973187
28983188 len = cur_mss * segs;
3189
+ if (len > avail_wnd) {
3190
+ len = rounddown(avail_wnd, cur_mss);
3191
+ if (!len)
3192
+ len = avail_wnd;
3193
+ }
28993194 if (skb->len > len) {
29003195 if (tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, len,
29013196 cur_mss, GFP_ATOMIC))
29023197 return -ENOMEM; /* We'll try again later. */
29033198 } else {
2904
- if (skb_unclone(skb, GFP_ATOMIC))
3199
+ if (skb_unclone_keeptruesize(skb, GFP_ATOMIC))
29053200 return -ENOMEM;
29063201
29073202 diff = tcp_skb_pcount(skb);
....@@ -2909,8 +3204,9 @@
29093204 diff -= tcp_skb_pcount(skb);
29103205 if (diff)
29113206 tcp_adjust_pcount(sk, skb, diff);
2912
- if (skb->len < cur_mss)
2913
- tcp_retrans_try_collapse(sk, skb, cur_mss);
3207
+ avail_wnd = min_t(int, avail_wnd, cur_mss);
3208
+ if (skb->len < avail_wnd)
3209
+ tcp_retrans_try_collapse(sk, skb, avail_wnd);
29143210 }
29153211
29163212 /* RFC3168, section 6.1.1.1. ECN fallback */
....@@ -2935,24 +3231,32 @@
29353231
29363232 tcp_skb_tsorted_save(skb) {
29373233 nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
2938
- err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
2939
- -ENOBUFS;
3234
+ if (nskb) {
3235
+ nskb->dev = NULL;
3236
+ err = tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC);
3237
+ } else {
3238
+ err = -ENOBUFS;
3239
+ }
29403240 } tcp_skb_tsorted_restore(skb);
29413241
29423242 if (!err) {
2943
- tcp_update_skb_after_send(tp, skb);
3243
+ tcp_update_skb_after_send(sk, skb, tp->tcp_wstamp_ns);
29443244 tcp_rate_skb_sent(sk, skb);
29453245 }
29463246 } else {
29473247 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
29483248 }
29493249
3250
+ /* To avoid taking spuriously low RTT samples based on a timestamp
3251
+ * for a transmit that never happened, always mark EVER_RETRANS
3252
+ */
3253
+ TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
3254
+
29503255 if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RETRANS_CB_FLAG))
29513256 tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RETRANS_CB,
29523257 TCP_SKB_CB(skb)->seq, segs, err);
29533258
29543259 if (likely(!err)) {
2955
- TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
29563260 trace_tcp_retransmit_skb(sk, skb);
29573261 } else if (err != -EBUSY) {
29583262 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL, segs);
....@@ -2995,6 +3299,7 @@
29953299 const struct inet_connection_sock *icsk = inet_csk(sk);
29963300 struct sk_buff *skb, *rtx_head, *hole = NULL;
29973301 struct tcp_sock *tp = tcp_sk(sk);
3302
+ bool rearm_timer = false;
29983303 u32 max_segs;
29993304 int mib_idx;
30003305
....@@ -3017,7 +3322,7 @@
30173322
30183323 segs = tp->snd_cwnd - tcp_packets_in_flight(tp);
30193324 if (segs <= 0)
3020
- return;
3325
+ break;
30213326 sacked = TCP_SKB_CB(skb)->sacked;
30223327 /* In case tcp_shift_skb_data() have aggregated large skbs,
30233328 * we need to make sure not sending too bigs TSO packets
....@@ -3042,10 +3347,10 @@
30423347 continue;
30433348
30443349 if (tcp_small_queue_check(sk, skb, 1))
3045
- return;
3350
+ break;
30463351
30473352 if (tcp_retransmit_skb(sk, skb, segs))
3048
- return;
3353
+ break;
30493354
30503355 NET_ADD_STATS(sock_net(sk), mib_idx, tcp_skb_pcount(skb));
30513356
....@@ -3054,10 +3359,13 @@
30543359
30553360 if (skb == rtx_head &&
30563361 icsk->icsk_pending != ICSK_TIME_REO_TIMEOUT)
3057
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
3058
- inet_csk(sk)->icsk_rto,
3059
- TCP_RTO_MAX);
3362
+ rearm_timer = true;
3363
+
30603364 }
3365
+ if (rearm_timer)
3366
+ tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
3367
+ inet_csk(sk)->icsk_rto,
3368
+ TCP_RTO_MAX);
30613369 }
30623370
30633371 /* We allow to exceed memory limits for FIN packets to expedite
....@@ -3069,11 +3377,12 @@
30693377 */
30703378 void sk_forced_mem_schedule(struct sock *sk, int size)
30713379 {
3072
- int amt;
3380
+ int delta, amt;
30733381
3074
- if (size <= sk->sk_forward_alloc)
3382
+ delta = size - sk->sk_forward_alloc;
3383
+ if (delta <= 0)
30753384 return;
3076
- amt = sk_mem_pages(size);
3385
+ amt = sk_mem_pages(delta);
30773386 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
30783387 sk_memory_allocated_add(sk, amt);
30793388
....@@ -3086,7 +3395,7 @@
30863395 */
30873396 void tcp_send_fin(struct sock *sk)
30883397 {
3089
- struct sk_buff *skb, *tskb = tcp_write_queue_tail(sk);
3398
+ struct sk_buff *skb, *tskb, *tail = tcp_write_queue_tail(sk);
30903399 struct tcp_sock *tp = tcp_sk(sk);
30913400
30923401 /* Optimization, tack on the FIN if we have one skb in write queue and
....@@ -3094,31 +3403,29 @@
30943403 * Note: in the latter case, FIN packet will be sent after a timeout,
30953404 * as TCP stack thinks it has already been transmitted.
30963405 */
3406
+ tskb = tail;
30973407 if (!tskb && tcp_under_memory_pressure(sk))
30983408 tskb = skb_rb_last(&sk->tcp_rtx_queue);
30993409
31003410 if (tskb) {
3101
-coalesce:
31023411 TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;
31033412 TCP_SKB_CB(tskb)->end_seq++;
31043413 tp->write_seq++;
3105
- if (tcp_write_queue_empty(sk)) {
3414
+ if (!tail) {
31063415 /* This means tskb was already sent.
31073416 * Pretend we included the FIN on previous transmit.
31083417 * We need to set tp->snd_nxt to the value it would have
31093418 * if FIN had been sent. This is because retransmit path
31103419 * does not change tp->snd_nxt.
31113420 */
3112
- tp->snd_nxt++;
3421
+ WRITE_ONCE(tp->snd_nxt, tp->snd_nxt + 1);
31133422 return;
31143423 }
31153424 } else {
31163425 skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation);
3117
- if (unlikely(!skb)) {
3118
- if (tskb)
3119
- goto coalesce;
3426
+ if (unlikely(!skb))
31203427 return;
3121
- }
3428
+
31223429 INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
31233430 skb_reserve(skb, MAX_TCP_HEADER);
31243431 sk_forced_mem_schedule(sk, skb->truesize);
....@@ -3192,7 +3499,7 @@
31923499 tcp_rtx_queue_unlink_and_free(skb, sk);
31933500 __skb_header_release(nskb);
31943501 tcp_rbtree_insert(&sk->tcp_rtx_queue, nskb);
3195
- sk->sk_wmem_queued += nskb->truesize;
3502
+ sk_wmem_queued_add(sk, nskb->truesize);
31963503 sk_mem_charge(sk, nskb->truesize);
31973504 skb = nskb;
31983505 }
....@@ -3204,18 +3511,20 @@
32043511 }
32053512
32063513 /**
3207
- * tcp_make_synack - Prepare a SYN-ACK.
3208
- * sk: listener socket
3209
- * dst: dst entry attached to the SYNACK
3210
- * req: request_sock pointer
3211
- *
3212
- * Allocate one skb and build a SYNACK packet.
3213
- * @dst is consumed : Caller should not use it again.
3514
+ * tcp_make_synack - Allocate one skb and build a SYNACK packet.
3515
+ * @sk: listener socket
3516
+ * @dst: dst entry attached to the SYNACK. It is consumed and caller
3517
+ * should not use it again.
3518
+ * @req: request_sock pointer
3519
+ * @foc: cookie for tcp fast open
3520
+ * @synack_type: Type of synack to prepare
3521
+ * @syn_skb: SYN packet just received. It could be NULL for rtx case.
32143522 */
32153523 struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
32163524 struct request_sock *req,
32173525 struct tcp_fastopen_cookie *foc,
3218
- enum tcp_synack_type synack_type)
3526
+ enum tcp_synack_type synack_type,
3527
+ struct sk_buff *syn_skb)
32193528 {
32203529 struct inet_request_sock *ireq = inet_rsk(req);
32213530 const struct tcp_sock *tp = tcp_sk(sk);
....@@ -3225,6 +3534,7 @@
32253534 int tcp_header_size;
32263535 struct tcphdr *th;
32273536 int mss;
3537
+ u64 now;
32283538
32293539 skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
32303540 if (unlikely(!skb)) {
....@@ -3256,20 +3566,28 @@
32563566 mss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
32573567
32583568 memset(&opts, 0, sizeof(opts));
3569
+ now = tcp_clock_ns();
32593570 #ifdef CONFIG_SYN_COOKIES
3260
- if (unlikely(req->cookie_ts))
3261
- skb->skb_mstamp = cookie_init_timestamp(req);
3571
+ if (unlikely(synack_type == TCP_SYNACK_COOKIE && ireq->tstamp_ok))
3572
+ skb->skb_mstamp_ns = cookie_init_timestamp(req, now);
32623573 else
32633574 #endif
3264
- skb->skb_mstamp = tcp_clock_us();
3575
+ {
3576
+ skb->skb_mstamp_ns = now;
3577
+ if (!tcp_rsk(req)->snt_synack) /* Timestamp first SYNACK */
3578
+ tcp_rsk(req)->snt_synack = tcp_skb_timestamp_us(skb);
3579
+ }
32653580
32663581 #ifdef CONFIG_TCP_MD5SIG
32673582 rcu_read_lock();
32683583 md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
32693584 #endif
32703585 skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
3586
+ /* bpf program will be interested in the tcp_flags */
3587
+ TCP_SKB_CB(skb)->tcp_flags = TCPHDR_SYN | TCPHDR_ACK;
32713588 tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
3272
- foc, synack_type) + sizeof(*th);
3589
+ foc, synack_type,
3590
+ syn_skb) + sizeof(*th);
32733591
32743592 skb_push(skb, tcp_header_size);
32753593 skb_reset_transport_header(skb);
....@@ -3301,8 +3619,12 @@
33013619 rcu_read_unlock();
33023620 #endif
33033621
3304
- /* Do not fool tcpdump (if any), clean our debris */
3305
- skb->tstamp = 0;
3622
+ bpf_skops_write_hdr_opt((struct sock *)sk, skb, req, syn_skb,
3623
+ synack_type, &opts);
3624
+
3625
+ skb->skb_mstamp_ns = now;
3626
+ tcp_add_tx_delay(skb, tp);
3627
+
33063628 return skb;
33073629 }
33083630 EXPORT_SYMBOL(tcp_make_synack);
....@@ -3318,8 +3640,8 @@
33183640
33193641 rcu_read_lock();
33203642 ca = tcp_ca_find_key(ca_key);
3321
- if (likely(ca && try_module_get(ca->owner))) {
3322
- module_put(icsk->icsk_ca_ops->owner);
3643
+ if (likely(ca && bpf_try_module_get(ca, ca->owner))) {
3644
+ bpf_module_put(icsk->icsk_ca_ops, icsk->icsk_ca_ops->owner);
33233645 icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst);
33243646 icsk->icsk_ca_ops = ca;
33253647 }
....@@ -3338,7 +3660,7 @@
33383660 * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
33393661 */
33403662 tp->tcp_header_len = sizeof(struct tcphdr);
3341
- if (sock_net(sk)->ipv4.sysctl_tcp_timestamps)
3663
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps))
33423664 tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED;
33433665
33443666 #ifdef CONFIG_TCP_MD5SIG
....@@ -3374,7 +3696,7 @@
33743696 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
33753697 &tp->rcv_wnd,
33763698 &tp->window_clamp,
3377
- sock_net(sk)->ipv4.sysctl_tcp_window_scaling,
3699
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling),
33783700 &rcv_wscale,
33793701 rcv_wnd);
33803702
....@@ -3389,7 +3711,7 @@
33893711 tp->snd_una = tp->write_seq;
33903712 tp->snd_sml = tp->write_seq;
33913713 tp->snd_up = tp->write_seq;
3392
- tp->snd_nxt = tp->write_seq;
3714
+ WRITE_ONCE(tp->snd_nxt, tp->write_seq);
33933715
33943716 if (likely(!tp->repair))
33953717 tp->rcv_nxt = 0;
....@@ -3410,7 +3732,7 @@
34103732
34113733 tcb->end_seq += skb->len;
34123734 __skb_header_release(skb);
3413
- sk->sk_wmem_queued += skb->truesize;
3735
+ sk_wmem_queued_add(sk, skb->truesize);
34143736 sk_mem_charge(sk, skb->truesize);
34153737 WRITE_ONCE(tp->write_seq, tcb->end_seq);
34163738 tp->packets_out += tcp_skb_pcount(skb);
....@@ -3425,6 +3747,7 @@
34253747 */
34263748 static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
34273749 {
3750
+ struct inet_connection_sock *icsk = inet_csk(sk);
34283751 struct tcp_sock *tp = tcp_sk(sk);
34293752 struct tcp_fastopen_request *fo = tp->fastopen_req;
34303753 int space, err = 0;
....@@ -3439,8 +3762,10 @@
34393762 * private TCP options. The cost is reduced data space in SYN :(
34403763 */
34413764 tp->rx_opt.mss_clamp = tcp_mss_clamp(tp, tp->rx_opt.mss_clamp);
3765
+ /* Sync mss_cache after updating the mss_clamp */
3766
+ tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
34423767
3443
- space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
3768
+ space = __tcp_mtu_to_mss(sk, icsk->icsk_pmtu_cookie) -
34443769 MAX_TCP_OPTION_SPACE;
34453770
34463771 space = min_t(size_t, space, fo->size);
....@@ -3465,6 +3790,7 @@
34653790 skb_trim(syn_data, copied);
34663791 space = copied;
34673792 }
3793
+ skb_zcopy_set(syn_data, fo->uarg, NULL);
34683794 }
34693795 /* No more data pending in inet_wait_for_connect() */
34703796 if (space == fo->size)
....@@ -3477,7 +3803,7 @@
34773803
34783804 err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation);
34793805
3480
- syn->skb_mstamp = syn_data->skb_mstamp;
3806
+ syn->skb_mstamp_ns = syn_data->skb_mstamp_ns;
34813807
34823808 /* Now full SYN+DATA was cloned and sent (or not),
34833809 * remove the SYN from the original skb (syn_data)
....@@ -3548,11 +3874,11 @@
35483874 /* We change tp->snd_nxt after the tcp_transmit_skb() call
35493875 * in order to make this packet get counted in tcpOutSegs.
35503876 */
3551
- tp->snd_nxt = tp->write_seq;
3877
+ WRITE_ONCE(tp->snd_nxt, tp->write_seq);
35523878 tp->pushed_seq = tp->write_seq;
35533879 buff = tcp_send_head(sk);
35543880 if (unlikely(buff)) {
3555
- tp->snd_nxt = TCP_SKB_CB(buff)->seq;
3881
+ WRITE_ONCE(tp->snd_nxt, TCP_SKB_CB(buff)->seq);
35563882 tp->pushed_seq = TCP_SKB_CB(buff)->seq;
35573883 }
35583884 TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
....@@ -3578,7 +3904,7 @@
35783904 const struct tcp_sock *tp = tcp_sk(sk);
35793905 int max_ato = HZ / 2;
35803906
3581
- if (icsk->icsk_ack.pingpong ||
3907
+ if (inet_csk_in_pingpong_mode(sk) ||
35823908 (icsk->icsk_ack.pending & ICSK_ACK_PUSHED))
35833909 max_ato = TCP_DELACK_MAX;
35843910
....@@ -3599,16 +3925,15 @@
35993925 ato = min(ato, max_ato);
36003926 }
36013927
3928
+ ato = min_t(u32, ato, inet_csk(sk)->icsk_delack_max);
3929
+
36023930 /* Stay within the limit we were given */
36033931 timeout = jiffies + ato;
36043932
36053933 /* Use new timeout only if there wasn't a older one earlier. */
36063934 if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
3607
- /* If delack timer was blocked or is about to expire,
3608
- * send ACK now.
3609
- */
3610
- if (icsk->icsk_ack.blocked ||
3611
- time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) {
3935
+ /* If delack timer is about to expire, send ACK now. */
3936
+ if (time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) {
36123937 tcp_send_ack(sk);
36133938 return;
36143939 }
....@@ -3637,10 +3962,15 @@
36373962 buff = alloc_skb(MAX_TCP_HEADER,
36383963 sk_gfp_mask(sk, GFP_ATOMIC | __GFP_NOWARN));
36393964 if (unlikely(!buff)) {
3965
+ struct inet_connection_sock *icsk = inet_csk(sk);
3966
+ unsigned long delay;
3967
+
3968
+ delay = TCP_DELACK_MAX << icsk->icsk_ack.retry;
3969
+ if (delay < TCP_RTO_MAX)
3970
+ icsk->icsk_ack.retry++;
36403971 inet_csk_schedule_ack(sk);
3641
- inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
3642
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
3643
- TCP_DELACK_MAX, TCP_RTO_MAX);
3972
+ icsk->icsk_ack.ato = TCP_ATO_MIN;
3973
+ inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, delay, TCP_RTO_MAX);
36443974 return;
36453975 }
36463976
....@@ -3759,7 +4089,7 @@
37594089 struct inet_connection_sock *icsk = inet_csk(sk);
37604090 struct tcp_sock *tp = tcp_sk(sk);
37614091 struct net *net = sock_net(sk);
3762
- unsigned long probe_max;
4092
+ unsigned long timeout;
37634093 int err;
37644094
37654095 err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE);
....@@ -3768,28 +4098,24 @@
37684098 /* Cancel probe timer, if it is not required. */
37694099 icsk->icsk_probes_out = 0;
37704100 icsk->icsk_backoff = 0;
4101
+ icsk->icsk_probes_tstamp = 0;
37714102 return;
37724103 }
37734104
4105
+ icsk->icsk_probes_out++;
37744106 if (err <= 0) {
3775
- if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2)
4107
+ if (icsk->icsk_backoff < READ_ONCE(net->ipv4.sysctl_tcp_retries2))
37764108 icsk->icsk_backoff++;
3777
- icsk->icsk_probes_out++;
3778
- probe_max = TCP_RTO_MAX;
4109
+ timeout = tcp_probe0_when(sk, TCP_RTO_MAX);
37794110 } else {
37804111 /* If packet was not sent due to local congestion,
3781
- * do not backoff and do not remember icsk_probes_out.
3782
- * Let local senders to fight for local resources.
3783
- *
3784
- * Use accumulated backoff yet.
4112
+ * Let senders fight for local resources conservatively.
37854113 */
3786
- if (!icsk->icsk_probes_out)
3787
- icsk->icsk_probes_out = 1;
3788
- probe_max = TCP_RESOURCE_PROBE_INTERVAL;
4114
+ timeout = TCP_RESOURCE_PROBE_INTERVAL;
37894115 }
3790
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3791
- tcp_probe0_when(sk, probe_max),
3792
- TCP_RTO_MAX);
4116
+
4117
+ timeout = tcp_clamp_probe0_to_user_timeout(sk, timeout);
4118
+ tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, timeout, TCP_RTO_MAX);
37934119 }
37944120
37954121 int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
....@@ -3799,10 +4125,11 @@
37994125 int res;
38004126
38014127 tcp_rsk(req)->txhash = net_tx_rndhash();
3802
- res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL);
4128
+ res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL,
4129
+ NULL);
38034130 if (!res) {
3804
- __TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
3805
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
4131
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
4132
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
38064133 if (unlikely(tcp_passive_fastopen(sk)))
38074134 tcp_sk(sk)->total_retrans++;
38084135 trace_tcp_retransmit_synack(sk, req);