| .. | .. |
|---|
| 434 | 434 | |
|---|
| 435 | 435 | /* There's a bubble in the pipe until at least the first ACK. */ |
|---|
| 436 | 436 | tp->app_limited = ~0U; |
|---|
| 437 | + tp->rate_app_limited = 1; |
|---|
| 437 | 438 | |
|---|
| 438 | 439 | /* See draft-stevens-tcpca-spec-01 for discussion of the |
|---|
| 439 | 440 | * initialization of these values. |
|---|
| .. | .. |
|---|
| 507 | 508 | __poll_t mask; |
|---|
| 508 | 509 | struct sock *sk = sock->sk; |
|---|
| 509 | 510 | const struct tcp_sock *tp = tcp_sk(sk); |
|---|
| 511 | + u8 shutdown; |
|---|
| 510 | 512 | int state; |
|---|
| 511 | 513 | |
|---|
| 512 | 514 | sock_poll_wait(file, sock, wait); |
|---|
| .. | .. |
|---|
| 549 | 551 | * NOTE. Check for TCP_CLOSE is added. The goal is to prevent |
|---|
| 550 | 552 | * blocking on fresh not-connected or disconnected socket. --ANK |
|---|
| 551 | 553 | */ |
|---|
| 552 | | - if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE) |
|---|
| 554 | + shutdown = READ_ONCE(sk->sk_shutdown); |
|---|
| 555 | + if (shutdown == SHUTDOWN_MASK || state == TCP_CLOSE) |
|---|
| 553 | 556 | mask |= EPOLLHUP; |
|---|
| 554 | | - if (sk->sk_shutdown & RCV_SHUTDOWN) |
|---|
| 557 | + if (shutdown & RCV_SHUTDOWN) |
|---|
| 555 | 558 | mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; |
|---|
| 556 | 559 | |
|---|
| 557 | 560 | /* Connected or passive Fast Open socket? */ |
|---|
| .. | .. |
|---|
| 567 | 570 | if (tcp_stream_is_readable(tp, target, sk)) |
|---|
| 568 | 571 | mask |= EPOLLIN | EPOLLRDNORM; |
|---|
| 569 | 572 | |
|---|
| 570 | | - if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { |
|---|
| 573 | + if (!(shutdown & SEND_SHUTDOWN)) { |
|---|
| 571 | 574 | if (__sk_stream_is_writeable(sk, 1)) { |
|---|
| 572 | 575 | mask |= EPOLLOUT | EPOLLWRNORM; |
|---|
| 573 | 576 | } else { /* send SIGIO later */ |
|---|
| .. | .. |
|---|
| 2488 | 2491 | return too_many_orphans || out_of_socket_memory; |
|---|
| 2489 | 2492 | } |
|---|
| 2490 | 2493 | |
|---|
| 2491 | | -void tcp_close(struct sock *sk, long timeout) |
|---|
| 2494 | +void __tcp_close(struct sock *sk, long timeout) |
|---|
| 2492 | 2495 | { |
|---|
| 2493 | 2496 | struct sk_buff *skb; |
|---|
| 2494 | 2497 | int data_was_unread = 0; |
|---|
| 2495 | 2498 | int state; |
|---|
| 2496 | 2499 | |
|---|
| 2497 | | - lock_sock(sk); |
|---|
| 2498 | | - sk->sk_shutdown = SHUTDOWN_MASK; |
|---|
| 2500 | + WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); |
|---|
| 2499 | 2501 | |
|---|
| 2500 | 2502 | if (sk->sk_state == TCP_LISTEN) { |
|---|
| 2501 | 2503 | tcp_set_state(sk, TCP_CLOSE); |
|---|
| .. | .. |
|---|
| 2658 | 2660 | out: |
|---|
| 2659 | 2661 | bh_unlock_sock(sk); |
|---|
| 2660 | 2662 | local_bh_enable(); |
|---|
| 2663 | +} |
|---|
| 2664 | + |
|---|
| 2665 | +void tcp_close(struct sock *sk, long timeout) |
|---|
| 2666 | +{ |
|---|
| 2667 | + lock_sock(sk); |
|---|
| 2668 | + __tcp_close(sk, timeout); |
|---|
| 2661 | 2669 | release_sock(sk); |
|---|
| 2662 | 2670 | sock_put(sk); |
|---|
| 2663 | 2671 | } |
|---|
| .. | .. |
|---|
| 2755 | 2763 | if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) |
|---|
| 2756 | 2764 | inet_reset_saddr(sk); |
|---|
| 2757 | 2765 | |
|---|
| 2758 | | - sk->sk_shutdown = 0; |
|---|
| 2766 | + WRITE_ONCE(sk->sk_shutdown, 0); |
|---|
| 2759 | 2767 | sock_reset_flag(sk, SOCK_DONE); |
|---|
| 2760 | 2768 | tp->srtt_us = 0; |
|---|
| 2761 | 2769 | tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); |
|---|
| .. | .. |
|---|
| 2816 | 2824 | tp->last_oow_ack_time = 0; |
|---|
| 2817 | 2825 | /* There's a bubble in the pipe until at least the first ACK. */ |
|---|
| 2818 | 2826 | tp->app_limited = ~0U; |
|---|
| 2827 | + tp->rate_app_limited = 1; |
|---|
| 2819 | 2828 | tp->rack.mstamp = 0; |
|---|
| 2820 | 2829 | tp->rack.advanced = 0; |
|---|
| 2821 | 2830 | tp->rack.reo_wnd_steps = 1; |
|---|
| .. | .. |
|---|
| 3045 | 3054 | void tcp_sock_set_user_timeout(struct sock *sk, u32 val) |
|---|
| 3046 | 3055 | { |
|---|
| 3047 | 3056 | lock_sock(sk); |
|---|
| 3048 | | - inet_csk(sk)->icsk_user_timeout = val; |
|---|
| 3057 | + WRITE_ONCE(inet_csk(sk)->icsk_user_timeout, val); |
|---|
| 3049 | 3058 | release_sock(sk); |
|---|
| 3050 | 3059 | } |
|---|
| 3051 | 3060 | EXPORT_SYMBOL(tcp_sock_set_user_timeout); |
|---|
| .. | .. |
|---|
| 3057 | 3066 | if (val < 1 || val > MAX_TCP_KEEPIDLE) |
|---|
| 3058 | 3067 | return -EINVAL; |
|---|
| 3059 | 3068 | |
|---|
| 3060 | | - tp->keepalive_time = val * HZ; |
|---|
| 3069 | + /* Paired with WRITE_ONCE() in keepalive_time_when() */ |
|---|
| 3070 | + WRITE_ONCE(tp->keepalive_time, val * HZ); |
|---|
| 3061 | 3071 | if (sock_flag(sk, SOCK_KEEPOPEN) && |
|---|
| 3062 | 3072 | !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { |
|---|
| 3063 | 3073 | u32 elapsed = keepalive_time_elapsed(tp); |
|---|
| .. | .. |
|---|
| 3089 | 3099 | return -EINVAL; |
|---|
| 3090 | 3100 | |
|---|
| 3091 | 3101 | lock_sock(sk); |
|---|
| 3092 | | - tcp_sk(sk)->keepalive_intvl = val * HZ; |
|---|
| 3102 | + WRITE_ONCE(tcp_sk(sk)->keepalive_intvl, val * HZ); |
|---|
| 3093 | 3103 | release_sock(sk); |
|---|
| 3094 | 3104 | return 0; |
|---|
| 3095 | 3105 | } |
|---|
| .. | .. |
|---|
| 3101 | 3111 | return -EINVAL; |
|---|
| 3102 | 3112 | |
|---|
| 3103 | 3113 | lock_sock(sk); |
|---|
| 3104 | | - tcp_sk(sk)->keepalive_probes = val; |
|---|
| 3114 | + /* Paired with READ_ONCE() in keepalive_probes() */ |
|---|
| 3115 | + WRITE_ONCE(tcp_sk(sk)->keepalive_probes, val); |
|---|
| 3105 | 3116 | release_sock(sk); |
|---|
| 3106 | 3117 | return 0; |
|---|
| 3107 | 3118 | } |
|---|
| .. | .. |
|---|
| 3287 | 3298 | if (val < 1 || val > MAX_TCP_KEEPINTVL) |
|---|
| 3288 | 3299 | err = -EINVAL; |
|---|
| 3289 | 3300 | else |
|---|
| 3290 | | - tp->keepalive_intvl = val * HZ; |
|---|
| 3301 | + WRITE_ONCE(tp->keepalive_intvl, val * HZ); |
|---|
| 3291 | 3302 | break; |
|---|
| 3292 | 3303 | case TCP_KEEPCNT: |
|---|
| 3293 | 3304 | if (val < 1 || val > MAX_TCP_KEEPCNT) |
|---|
| 3294 | 3305 | err = -EINVAL; |
|---|
| 3295 | 3306 | else |
|---|
| 3296 | | - tp->keepalive_probes = val; |
|---|
| 3307 | + WRITE_ONCE(tp->keepalive_probes, val); |
|---|
| 3297 | 3308 | break; |
|---|
| 3298 | 3309 | case TCP_SYNCNT: |
|---|
| 3299 | 3310 | if (val < 1 || val > MAX_TCP_SYNCNT) |
|---|
| .. | .. |
|---|
| 3312 | 3323 | |
|---|
| 3313 | 3324 | case TCP_LINGER2: |
|---|
| 3314 | 3325 | if (val < 0) |
|---|
| 3315 | | - tp->linger2 = -1; |
|---|
| 3326 | + WRITE_ONCE(tp->linger2, -1); |
|---|
| 3316 | 3327 | else if (val > TCP_FIN_TIMEOUT_MAX / HZ) |
|---|
| 3317 | | - tp->linger2 = TCP_FIN_TIMEOUT_MAX; |
|---|
| 3328 | + WRITE_ONCE(tp->linger2, TCP_FIN_TIMEOUT_MAX); |
|---|
| 3318 | 3329 | else |
|---|
| 3319 | | - tp->linger2 = val * HZ; |
|---|
| 3330 | + WRITE_ONCE(tp->linger2, val * HZ); |
|---|
| 3320 | 3331 | break; |
|---|
| 3321 | 3332 | |
|---|
| 3322 | 3333 | case TCP_DEFER_ACCEPT: |
|---|
| 3323 | 3334 | /* Translate value in seconds to number of retransmits */ |
|---|
| 3324 | | - icsk->icsk_accept_queue.rskq_defer_accept = |
|---|
| 3325 | | - secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ, |
|---|
| 3326 | | - TCP_RTO_MAX / HZ); |
|---|
| 3335 | + WRITE_ONCE(icsk->icsk_accept_queue.rskq_defer_accept, |
|---|
| 3336 | + secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ, |
|---|
| 3337 | + TCP_RTO_MAX / HZ)); |
|---|
| 3327 | 3338 | break; |
|---|
| 3328 | 3339 | |
|---|
| 3329 | 3340 | case TCP_WINDOW_CLAMP: |
|---|
| .. | .. |
|---|
| 3355 | 3366 | if (val < 0) |
|---|
| 3356 | 3367 | err = -EINVAL; |
|---|
| 3357 | 3368 | else |
|---|
| 3358 | | - icsk->icsk_user_timeout = val; |
|---|
| 3369 | + WRITE_ONCE(icsk->icsk_user_timeout, val); |
|---|
| 3359 | 3370 | break; |
|---|
| 3360 | 3371 | |
|---|
| 3361 | 3372 | case TCP_FASTOPEN: |
|---|
| .. | .. |
|---|
| 3399 | 3410 | err = tcp_repair_set_window(tp, optval, optlen); |
|---|
| 3400 | 3411 | break; |
|---|
| 3401 | 3412 | case TCP_NOTSENT_LOWAT: |
|---|
| 3402 | | - tp->notsent_lowat = val; |
|---|
| 3413 | + WRITE_ONCE(tp->notsent_lowat, val); |
|---|
| 3403 | 3414 | sk->sk_write_space(sk); |
|---|
| 3404 | 3415 | break; |
|---|
| 3405 | 3416 | case TCP_INQ: |
|---|
| .. | .. |
|---|
| 3411 | 3422 | case TCP_TX_DELAY: |
|---|
| 3412 | 3423 | if (val) |
|---|
| 3413 | 3424 | tcp_enable_tx_delay(); |
|---|
| 3414 | | - tp->tcp_tx_delay = val; |
|---|
| 3425 | + WRITE_ONCE(tp->tcp_tx_delay, val); |
|---|
| 3415 | 3426 | break; |
|---|
| 3416 | 3427 | default: |
|---|
| 3417 | 3428 | err = -ENOPROTOOPT; |
|---|
| .. | .. |
|---|
| 3683 | 3694 | switch (optname) { |
|---|
| 3684 | 3695 | case TCP_MAXSEG: |
|---|
| 3685 | 3696 | val = tp->mss_cache; |
|---|
| 3686 | | - if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) |
|---|
| 3697 | + if (tp->rx_opt.user_mss && |
|---|
| 3698 | + ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) |
|---|
| 3687 | 3699 | val = tp->rx_opt.user_mss; |
|---|
| 3688 | 3700 | if (tp->repair) |
|---|
| 3689 | 3701 | val = tp->rx_opt.mss_clamp; |
|---|
| .. | .. |
|---|
| 3707 | 3719 | val = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; |
|---|
| 3708 | 3720 | break; |
|---|
| 3709 | 3721 | case TCP_LINGER2: |
|---|
| 3710 | | - val = tp->linger2; |
|---|
| 3722 | + val = READ_ONCE(tp->linger2); |
|---|
| 3711 | 3723 | if (val >= 0) |
|---|
| 3712 | 3724 | val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ; |
|---|
| 3713 | 3725 | break; |
|---|
| 3714 | 3726 | case TCP_DEFER_ACCEPT: |
|---|
| 3715 | | - val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept, |
|---|
| 3716 | | - TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ); |
|---|
| 3727 | + val = READ_ONCE(icsk->icsk_accept_queue.rskq_defer_accept); |
|---|
| 3728 | + val = retrans_to_secs(val, TCP_TIMEOUT_INIT / HZ, |
|---|
| 3729 | + TCP_RTO_MAX / HZ); |
|---|
| 3717 | 3730 | break; |
|---|
| 3718 | 3731 | case TCP_WINDOW_CLAMP: |
|---|
| 3719 | 3732 | val = tp->window_clamp; |
|---|
| .. | .. |
|---|
| 3849 | 3862 | break; |
|---|
| 3850 | 3863 | |
|---|
| 3851 | 3864 | case TCP_USER_TIMEOUT: |
|---|
| 3852 | | - val = icsk->icsk_user_timeout; |
|---|
| 3865 | + val = READ_ONCE(icsk->icsk_user_timeout); |
|---|
| 3853 | 3866 | break; |
|---|
| 3854 | 3867 | |
|---|
| 3855 | 3868 | case TCP_FASTOPEN: |
|---|
| 3856 | | - val = icsk->icsk_accept_queue.fastopenq.max_qlen; |
|---|
| 3869 | + val = READ_ONCE(icsk->icsk_accept_queue.fastopenq.max_qlen); |
|---|
| 3857 | 3870 | break; |
|---|
| 3858 | 3871 | |
|---|
| 3859 | 3872 | case TCP_FASTOPEN_CONNECT: |
|---|
| .. | .. |
|---|
| 3865 | 3878 | break; |
|---|
| 3866 | 3879 | |
|---|
| 3867 | 3880 | case TCP_TX_DELAY: |
|---|
| 3868 | | - val = tp->tcp_tx_delay; |
|---|
| 3881 | + val = READ_ONCE(tp->tcp_tx_delay); |
|---|
| 3869 | 3882 | break; |
|---|
| 3870 | 3883 | |
|---|
| 3871 | 3884 | case TCP_TIMESTAMP: |
|---|
| 3872 | 3885 | val = tcp_time_stamp_raw() + tp->tsoffset; |
|---|
| 3873 | 3886 | break; |
|---|
| 3874 | 3887 | case TCP_NOTSENT_LOWAT: |
|---|
| 3875 | | - val = tp->notsent_lowat; |
|---|
| 3888 | + val = READ_ONCE(tp->notsent_lowat); |
|---|
| 3876 | 3889 | break; |
|---|
| 3877 | 3890 | case TCP_INQ: |
|---|
| 3878 | 3891 | val = tp->recvmsg_inq; |
|---|
| .. | .. |
|---|
| 4141 | 4154 | if (req) |
|---|
| 4142 | 4155 | reqsk_fastopen_remove(sk, req, false); |
|---|
| 4143 | 4156 | |
|---|
| 4144 | | - sk->sk_shutdown = SHUTDOWN_MASK; |
|---|
| 4157 | + WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); |
|---|
| 4145 | 4158 | |
|---|
| 4146 | 4159 | if (!sock_flag(sk, SOCK_DEAD)) |
|---|
| 4147 | 4160 | sk->sk_state_change(sk); |
|---|