hc
2024-01-03 2f7c68cb55ecb7331f2381deb497c27155f32faf
kernel/net/ipv4/tcp_input.c
....@@ -243,6 +243,19 @@
243243 if (unlikely(len > icsk->icsk_ack.rcv_mss +
244244 MAX_TCP_OPTION_SPACE))
245245 tcp_gro_dev_warn(sk, skb, len);
246
+ /* If the skb has a len of exactly 1*MSS and has the PSH bit
247
+ * set then it is likely the end of an application write. So
248
+ * more data may not be arriving soon, and yet the data sender
249
+ * may be waiting for an ACK if cwnd-bound or using TX zero
250
+ * copy. So we set ICSK_ACK_PUSHED here so that
251
+ * tcp_cleanup_rbuf() will send an ACK immediately if the app
252
+ * reads all of the data and is not ping-pong. If len > MSS
253
+ * then this logic does not matter (and does not hurt) because
254
+ * tcp_cleanup_rbuf() will always ACK immediately if the app
255
+ * reads data and there is more than an MSS of unACKed data.
256
+ */
257
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_PSH)
258
+ icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
246259 } else {
247260 /* Otherwise, we make more careful check taking into account,
248261 * that SACKs block is variable.
....@@ -287,7 +300,7 @@
287300 icsk->icsk_ack.quick = quickacks;
288301 }
289302
290
-void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
303
+static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
291304 {
292305 struct inet_connection_sock *icsk = inet_csk(sk);
293306
....@@ -295,7 +308,6 @@
295308 inet_csk_exit_pingpong_mode(sk);
296309 icsk->icsk_ack.ato = TCP_ATO_MIN;
297310 }
298
-EXPORT_SYMBOL(tcp_enter_quickack_mode);
299311
300312 /* Send ACKs quickly, if "quick" count is not exhausted
301313 * and the session is not interactive.
....@@ -3561,8 +3573,11 @@
35613573 static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
35623574 u32 *last_oow_ack_time)
35633575 {
3564
- if (*last_oow_ack_time) {
3565
- s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
3576
+ /* Paired with the WRITE_ONCE() in this function. */
3577
+ u32 val = READ_ONCE(*last_oow_ack_time);
3578
+
3579
+ if (val) {
3580
+ s32 elapsed = (s32)(tcp_jiffies32 - val);
35663581
35673582 if (0 <= elapsed &&
35683583 elapsed < READ_ONCE(net->ipv4.sysctl_tcp_invalid_ratelimit)) {
....@@ -3571,7 +3586,10 @@
35713586 }
35723587 }
35733588
3574
- *last_oow_ack_time = tcp_jiffies32;
3589
+ /* Paired with the prior READ_ONCE() and with itself,
3590
+ * as we might be lockless.
3591
+ */
3592
+ WRITE_ONCE(*last_oow_ack_time, tcp_jiffies32);
35753593
35763594 return false; /* not rate-limited: go ahead, send dupack now! */
35773595 }
....@@ -4324,7 +4342,7 @@
43244342
43254343 inet_csk_schedule_ack(sk);
43264344
4327
- sk->sk_shutdown |= RCV_SHUTDOWN;
4345
+ WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | RCV_SHUTDOWN);
43284346 sock_set_flag(sk, SOCK_DONE);
43294347
43304348 switch (sk->sk_state) {
....@@ -6506,7 +6524,7 @@
65066524 break;
65076525
65086526 tcp_set_state(sk, TCP_FIN_WAIT2);
6509
- sk->sk_shutdown |= SEND_SHUTDOWN;
6527
+ WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | SEND_SHUTDOWN);
65106528
65116529 sk_dst_confirm(sk);
65126530