hc
2024-05-14 bedbef8ad3e75a304af6361af235302bcc61d06b
kernel/net/ipv4/tcp.c
....@@ -434,6 +434,7 @@
434434
435435 /* There's a bubble in the pipe until at least the first ACK. */
436436 tp->app_limited = ~0U;
437
+ tp->rate_app_limited = 1;
437438
438439 /* See draft-stevens-tcpca-spec-01 for discussion of the
439440 * initialization of these values.
....@@ -507,6 +508,7 @@
507508 __poll_t mask;
508509 struct sock *sk = sock->sk;
509510 const struct tcp_sock *tp = tcp_sk(sk);
511
+ u8 shutdown;
510512 int state;
511513
512514 sock_poll_wait(file, sock, wait);
....@@ -549,9 +551,10 @@
549551 * NOTE. Check for TCP_CLOSE is added. The goal is to prevent
550552 * blocking on fresh not-connected or disconnected socket. --ANK
551553 */
552
- if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
554
+ shutdown = READ_ONCE(sk->sk_shutdown);
555
+ if (shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
553556 mask |= EPOLLHUP;
554
- if (sk->sk_shutdown & RCV_SHUTDOWN)
557
+ if (shutdown & RCV_SHUTDOWN)
555558 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
556559
557560 /* Connected or passive Fast Open socket? */
....@@ -567,7 +570,7 @@
567570 if (tcp_stream_is_readable(tp, target, sk))
568571 mask |= EPOLLIN | EPOLLRDNORM;
569572
570
- if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
573
+ if (!(shutdown & SEND_SHUTDOWN)) {
571574 if (__sk_stream_is_writeable(sk, 1)) {
572575 mask |= EPOLLOUT | EPOLLWRNORM;
573576 } else { /* send SIGIO later */
....@@ -2488,14 +2491,13 @@
24882491 return too_many_orphans || out_of_socket_memory;
24892492 }
24902493
2491
-void tcp_close(struct sock *sk, long timeout)
2494
+void __tcp_close(struct sock *sk, long timeout)
24922495 {
24932496 struct sk_buff *skb;
24942497 int data_was_unread = 0;
24952498 int state;
24962499
2497
- lock_sock(sk);
2498
- sk->sk_shutdown = SHUTDOWN_MASK;
2500
+ WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
24992501
25002502 if (sk->sk_state == TCP_LISTEN) {
25012503 tcp_set_state(sk, TCP_CLOSE);
....@@ -2658,6 +2660,12 @@
26582660 out:
26592661 bh_unlock_sock(sk);
26602662 local_bh_enable();
2663
+}
2664
+
2665
+void tcp_close(struct sock *sk, long timeout)
2666
+{
2667
+ lock_sock(sk);
2668
+ __tcp_close(sk, timeout);
26612669 release_sock(sk);
26622670 sock_put(sk);
26632671 }
....@@ -2755,7 +2763,7 @@
27552763 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
27562764 inet_reset_saddr(sk);
27572765
2758
- sk->sk_shutdown = 0;
2766
+ WRITE_ONCE(sk->sk_shutdown, 0);
27592767 sock_reset_flag(sk, SOCK_DONE);
27602768 tp->srtt_us = 0;
27612769 tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
....@@ -2816,6 +2824,7 @@
28162824 tp->last_oow_ack_time = 0;
28172825 /* There's a bubble in the pipe until at least the first ACK. */
28182826 tp->app_limited = ~0U;
2827
+ tp->rate_app_limited = 1;
28192828 tp->rack.mstamp = 0;
28202829 tp->rack.advanced = 0;
28212830 tp->rack.reo_wnd_steps = 1;
....@@ -3045,7 +3054,7 @@
30453054 void tcp_sock_set_user_timeout(struct sock *sk, u32 val)
30463055 {
30473056 lock_sock(sk);
3048
- inet_csk(sk)->icsk_user_timeout = val;
3057
+ WRITE_ONCE(inet_csk(sk)->icsk_user_timeout, val);
30493058 release_sock(sk);
30503059 }
30513060 EXPORT_SYMBOL(tcp_sock_set_user_timeout);
....@@ -3057,7 +3066,8 @@
30573066 if (val < 1 || val > MAX_TCP_KEEPIDLE)
30583067 return -EINVAL;
30593068
3060
- tp->keepalive_time = val * HZ;
3069
+ /* Paired with WRITE_ONCE() in keepalive_time_when() */
3070
+ WRITE_ONCE(tp->keepalive_time, val * HZ);
30613071 if (sock_flag(sk, SOCK_KEEPOPEN) &&
30623072 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
30633073 u32 elapsed = keepalive_time_elapsed(tp);
....@@ -3089,7 +3099,7 @@
30893099 return -EINVAL;
30903100
30913101 lock_sock(sk);
3092
- tcp_sk(sk)->keepalive_intvl = val * HZ;
3102
+ WRITE_ONCE(tcp_sk(sk)->keepalive_intvl, val * HZ);
30933103 release_sock(sk);
30943104 return 0;
30953105 }
....@@ -3101,7 +3111,8 @@
31013111 return -EINVAL;
31023112
31033113 lock_sock(sk);
3104
- tcp_sk(sk)->keepalive_probes = val;
3114
+ /* Paired with READ_ONCE() in keepalive_probes() */
3115
+ WRITE_ONCE(tcp_sk(sk)->keepalive_probes, val);
31053116 release_sock(sk);
31063117 return 0;
31073118 }
....@@ -3287,13 +3298,13 @@
32873298 if (val < 1 || val > MAX_TCP_KEEPINTVL)
32883299 err = -EINVAL;
32893300 else
3290
- tp->keepalive_intvl = val * HZ;
3301
+ WRITE_ONCE(tp->keepalive_intvl, val * HZ);
32913302 break;
32923303 case TCP_KEEPCNT:
32933304 if (val < 1 || val > MAX_TCP_KEEPCNT)
32943305 err = -EINVAL;
32953306 else
3296
- tp->keepalive_probes = val;
3307
+ WRITE_ONCE(tp->keepalive_probes, val);
32973308 break;
32983309 case TCP_SYNCNT:
32993310 if (val < 1 || val > MAX_TCP_SYNCNT)
....@@ -3312,18 +3323,18 @@
33123323
33133324 case TCP_LINGER2:
33143325 if (val < 0)
3315
- tp->linger2 = -1;
3326
+ WRITE_ONCE(tp->linger2, -1);
33163327 else if (val > TCP_FIN_TIMEOUT_MAX / HZ)
3317
- tp->linger2 = TCP_FIN_TIMEOUT_MAX;
3328
+ WRITE_ONCE(tp->linger2, TCP_FIN_TIMEOUT_MAX);
33183329 else
3319
- tp->linger2 = val * HZ;
3330
+ WRITE_ONCE(tp->linger2, val * HZ);
33203331 break;
33213332
33223333 case TCP_DEFER_ACCEPT:
33233334 /* Translate value in seconds to number of retransmits */
3324
- icsk->icsk_accept_queue.rskq_defer_accept =
3325
- secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
3326
- TCP_RTO_MAX / HZ);
3335
+ WRITE_ONCE(icsk->icsk_accept_queue.rskq_defer_accept,
3336
+ secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
3337
+ TCP_RTO_MAX / HZ));
33273338 break;
33283339
33293340 case TCP_WINDOW_CLAMP:
....@@ -3355,7 +3366,7 @@
33553366 if (val < 0)
33563367 err = -EINVAL;
33573368 else
3358
- icsk->icsk_user_timeout = val;
3369
+ WRITE_ONCE(icsk->icsk_user_timeout, val);
33593370 break;
33603371
33613372 case TCP_FASTOPEN:
....@@ -3399,7 +3410,7 @@
33993410 err = tcp_repair_set_window(tp, optval, optlen);
34003411 break;
34013412 case TCP_NOTSENT_LOWAT:
3402
- tp->notsent_lowat = val;
3413
+ WRITE_ONCE(tp->notsent_lowat, val);
34033414 sk->sk_write_space(sk);
34043415 break;
34053416 case TCP_INQ:
....@@ -3411,7 +3422,7 @@
34113422 case TCP_TX_DELAY:
34123423 if (val)
34133424 tcp_enable_tx_delay();
3414
- tp->tcp_tx_delay = val;
3425
+ WRITE_ONCE(tp->tcp_tx_delay, val);
34153426 break;
34163427 default:
34173428 err = -ENOPROTOOPT;
....@@ -3683,7 +3694,8 @@
36833694 switch (optname) {
36843695 case TCP_MAXSEG:
36853696 val = tp->mss_cache;
3686
- if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
3697
+ if (tp->rx_opt.user_mss &&
3698
+ ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
36873699 val = tp->rx_opt.user_mss;
36883700 if (tp->repair)
36893701 val = tp->rx_opt.mss_clamp;
....@@ -3707,13 +3719,14 @@
37073719 val = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
37083720 break;
37093721 case TCP_LINGER2:
3710
- val = tp->linger2;
3722
+ val = READ_ONCE(tp->linger2);
37113723 if (val >= 0)
37123724 val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ;
37133725 break;
37143726 case TCP_DEFER_ACCEPT:
3715
- val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
3716
- TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ);
3727
+ val = READ_ONCE(icsk->icsk_accept_queue.rskq_defer_accept);
3728
+ val = retrans_to_secs(val, TCP_TIMEOUT_INIT / HZ,
3729
+ TCP_RTO_MAX / HZ);
37173730 break;
37183731 case TCP_WINDOW_CLAMP:
37193732 val = tp->window_clamp;
....@@ -3849,11 +3862,11 @@
38493862 break;
38503863
38513864 case TCP_USER_TIMEOUT:
3852
- val = icsk->icsk_user_timeout;
3865
+ val = READ_ONCE(icsk->icsk_user_timeout);
38533866 break;
38543867
38553868 case TCP_FASTOPEN:
3856
- val = icsk->icsk_accept_queue.fastopenq.max_qlen;
3869
+ val = READ_ONCE(icsk->icsk_accept_queue.fastopenq.max_qlen);
38573870 break;
38583871
38593872 case TCP_FASTOPEN_CONNECT:
....@@ -3865,14 +3878,14 @@
38653878 break;
38663879
38673880 case TCP_TX_DELAY:
3868
- val = tp->tcp_tx_delay;
3881
+ val = READ_ONCE(tp->tcp_tx_delay);
38693882 break;
38703883
38713884 case TCP_TIMESTAMP:
38723885 val = tcp_time_stamp_raw() + tp->tsoffset;
38733886 break;
38743887 case TCP_NOTSENT_LOWAT:
3875
- val = tp->notsent_lowat;
3888
+ val = READ_ONCE(tp->notsent_lowat);
38763889 break;
38773890 case TCP_INQ:
38783891 val = tp->recvmsg_inq;
....@@ -4141,7 +4154,7 @@
41414154 if (req)
41424155 reqsk_fastopen_remove(sk, req, false);
41434156
4144
- sk->sk_shutdown = SHUTDOWN_MASK;
4157
+ WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
41454158
41464159 if (!sock_flag(sk, SOCK_DEAD))
41474160 sk->sk_state_change(sk);