.. | .. |
---|
40 | 40 | |
---|
41 | 41 | struct tcp_metrics_block { |
---|
42 | 42 | struct tcp_metrics_block __rcu *tcpm_next; |
---|
43 | | - possible_net_t tcpm_net; |
---|
| 43 | + struct net *tcpm_net; |
---|
44 | 44 | struct inetpeer_addr tcpm_saddr; |
---|
45 | 45 | struct inetpeer_addr tcpm_daddr; |
---|
46 | 46 | unsigned long tcpm_stamp; |
---|
.. | .. |
---|
51 | 51 | struct rcu_head rcu_head; |
---|
52 | 52 | }; |
---|
53 | 53 | |
---|
54 | | -static inline struct net *tm_net(struct tcp_metrics_block *tm) |
---|
| 54 | +static inline struct net *tm_net(const struct tcp_metrics_block *tm) |
---|
55 | 55 | { |
---|
56 | | - return read_pnet(&tm->tcpm_net); |
---|
| 56 | + /* Paired with the WRITE_ONCE() in tcpm_new() */ |
---|
| 57 | + return READ_ONCE(tm->tcpm_net); |
---|
57 | 58 | } |
---|
58 | 59 | |
---|
59 | 60 | static bool tcp_metric_locked(struct tcp_metrics_block *tm, |
---|
60 | 61 | enum tcp_metric_index idx) |
---|
61 | 62 | { |
---|
62 | | - return tm->tcpm_lock & (1 << idx); |
---|
| 63 | + /* Paired with WRITE_ONCE() in tcpm_suck_dst() */ |
---|
| 64 | + return READ_ONCE(tm->tcpm_lock) & (1 << idx); |
---|
63 | 65 | } |
---|
64 | 66 | |
---|
65 | | -static u32 tcp_metric_get(struct tcp_metrics_block *tm, |
---|
| 67 | +static u32 tcp_metric_get(const struct tcp_metrics_block *tm, |
---|
66 | 68 | enum tcp_metric_index idx) |
---|
67 | 69 | { |
---|
68 | | - return tm->tcpm_vals[idx]; |
---|
| 70 | + /* Paired with WRITE_ONCE() in tcp_metric_set() */ |
---|
| 71 | + return READ_ONCE(tm->tcpm_vals[idx]); |
---|
69 | 72 | } |
---|
70 | 73 | |
---|
71 | 74 | static void tcp_metric_set(struct tcp_metrics_block *tm, |
---|
72 | 75 | enum tcp_metric_index idx, |
---|
73 | 76 | u32 val) |
---|
74 | 77 | { |
---|
75 | | - tm->tcpm_vals[idx] = val; |
---|
| 78 | + /* Paired with READ_ONCE() in tcp_metric_get() */ |
---|
| 79 | + WRITE_ONCE(tm->tcpm_vals[idx], val); |
---|
76 | 80 | } |
---|
77 | 81 | |
---|
78 | 82 | static bool addr_same(const struct inetpeer_addr *a, |
---|
79 | 83 | const struct inetpeer_addr *b) |
---|
80 | 84 | { |
---|
81 | | - return inetpeer_addr_cmp(a, b) == 0; |
---|
| 85 | + return (a->family == b->family) && !inetpeer_addr_cmp(a, b); |
---|
82 | 86 | } |
---|
83 | 87 | |
---|
84 | 88 | struct tcpm_hash_bucket { |
---|
.. | .. |
---|
89 | 93 | static unsigned int tcp_metrics_hash_log __read_mostly; |
---|
90 | 94 | |
---|
91 | 95 | static DEFINE_SPINLOCK(tcp_metrics_lock); |
---|
| 96 | +static DEFINE_SEQLOCK(fastopen_seqlock); |
---|
92 | 97 | |
---|
93 | 98 | static void tcpm_suck_dst(struct tcp_metrics_block *tm, |
---|
94 | 99 | const struct dst_entry *dst, |
---|
.. | .. |
---|
97 | 102 | u32 msval; |
---|
98 | 103 | u32 val; |
---|
99 | 104 | |
---|
100 | | - tm->tcpm_stamp = jiffies; |
---|
| 105 | + WRITE_ONCE(tm->tcpm_stamp, jiffies); |
---|
101 | 106 | |
---|
102 | 107 | val = 0; |
---|
103 | 108 | if (dst_metric_locked(dst, RTAX_RTT)) |
---|
.. | .. |
---|
110 | 115 | val |= 1 << TCP_METRIC_CWND; |
---|
111 | 116 | if (dst_metric_locked(dst, RTAX_REORDERING)) |
---|
112 | 117 | val |= 1 << TCP_METRIC_REORDERING; |
---|
113 | | - tm->tcpm_lock = val; |
---|
| 118 | + /* Paired with READ_ONCE() in tcp_metric_locked() */ |
---|
| 119 | + WRITE_ONCE(tm->tcpm_lock, val); |
---|
114 | 120 | |
---|
115 | 121 | msval = dst_metric_raw(dst, RTAX_RTT); |
---|
116 | | - tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC; |
---|
| 122 | + tcp_metric_set(tm, TCP_METRIC_RTT, msval * USEC_PER_MSEC); |
---|
117 | 123 | |
---|
118 | 124 | msval = dst_metric_raw(dst, RTAX_RTTVAR); |
---|
119 | | - tm->tcpm_vals[TCP_METRIC_RTTVAR] = msval * USEC_PER_MSEC; |
---|
120 | | - tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH); |
---|
121 | | - tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND); |
---|
122 | | - tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING); |
---|
| 125 | + tcp_metric_set(tm, TCP_METRIC_RTTVAR, msval * USEC_PER_MSEC); |
---|
| 126 | + tcp_metric_set(tm, TCP_METRIC_SSTHRESH, |
---|
| 127 | + dst_metric_raw(dst, RTAX_SSTHRESH)); |
---|
| 128 | + tcp_metric_set(tm, TCP_METRIC_CWND, |
---|
| 129 | + dst_metric_raw(dst, RTAX_CWND)); |
---|
| 130 | + tcp_metric_set(tm, TCP_METRIC_REORDERING, |
---|
| 131 | + dst_metric_raw(dst, RTAX_REORDERING)); |
---|
123 | 132 | if (fastopen_clear) { |
---|
| 133 | + write_seqlock(&fastopen_seqlock); |
---|
124 | 134 | tm->tcpm_fastopen.mss = 0; |
---|
125 | 135 | tm->tcpm_fastopen.syn_loss = 0; |
---|
126 | 136 | tm->tcpm_fastopen.try_exp = 0; |
---|
127 | 137 | tm->tcpm_fastopen.cookie.exp = false; |
---|
128 | 138 | tm->tcpm_fastopen.cookie.len = 0; |
---|
| 139 | + write_sequnlock(&fastopen_seqlock); |
---|
129 | 140 | } |
---|
130 | 141 | } |
---|
131 | 142 | |
---|
132 | 143 | #define TCP_METRICS_TIMEOUT (60 * 60 * HZ) |
---|
133 | 144 | |
---|
134 | | -static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst) |
---|
| 145 | +static void tcpm_check_stamp(struct tcp_metrics_block *tm, |
---|
| 146 | + const struct dst_entry *dst) |
---|
135 | 147 | { |
---|
136 | | - if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT))) |
---|
| 148 | + unsigned long limit; |
---|
| 149 | + |
---|
| 150 | + if (!tm) |
---|
| 151 | + return; |
---|
| 152 | + limit = READ_ONCE(tm->tcpm_stamp) + TCP_METRICS_TIMEOUT; |
---|
| 153 | + if (unlikely(time_after(jiffies, limit))) |
---|
137 | 154 | tcpm_suck_dst(tm, dst, false); |
---|
138 | 155 | } |
---|
139 | 156 | |
---|
.. | .. |
---|
174 | 191 | oldest = deref_locked(tcp_metrics_hash[hash].chain); |
---|
175 | 192 | for (tm = deref_locked(oldest->tcpm_next); tm; |
---|
176 | 193 | tm = deref_locked(tm->tcpm_next)) { |
---|
177 | | - if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp)) |
---|
| 194 | + if (time_before(READ_ONCE(tm->tcpm_stamp), |
---|
| 195 | + READ_ONCE(oldest->tcpm_stamp))) |
---|
178 | 196 | oldest = tm; |
---|
179 | 197 | } |
---|
180 | 198 | tm = oldest; |
---|
181 | 199 | } else { |
---|
182 | | - tm = kmalloc(sizeof(*tm), GFP_ATOMIC); |
---|
| 200 | + tm = kzalloc(sizeof(*tm), GFP_ATOMIC); |
---|
183 | 201 | if (!tm) |
---|
184 | 202 | goto out_unlock; |
---|
185 | 203 | } |
---|
186 | | - write_pnet(&tm->tcpm_net, net); |
---|
| 204 | + /* Paired with the READ_ONCE() in tm_net() */ |
---|
| 205 | + WRITE_ONCE(tm->tcpm_net, net); |
---|
| 206 | + |
---|
187 | 207 | tm->tcpm_saddr = *saddr; |
---|
188 | 208 | tm->tcpm_daddr = *daddr; |
---|
189 | 209 | |
---|
190 | | - tcpm_suck_dst(tm, dst, true); |
---|
| 210 | + tcpm_suck_dst(tm, dst, reclaim); |
---|
191 | 211 | |
---|
192 | 212 | if (likely(!reclaim)) { |
---|
193 | 213 | tm->tcpm_next = tcp_metrics_hash[hash].chain; |
---|
.. | .. |
---|
329 | 349 | int m; |
---|
330 | 350 | |
---|
331 | 351 | sk_dst_confirm(sk); |
---|
332 | | - if (net->ipv4.sysctl_tcp_nometrics_save || !dst) |
---|
| 352 | + if (READ_ONCE(net->ipv4.sysctl_tcp_nometrics_save) || !dst) |
---|
333 | 353 | return; |
---|
334 | 354 | |
---|
335 | 355 | rcu_read_lock(); |
---|
.. | .. |
---|
385 | 405 | |
---|
386 | 406 | if (tcp_in_initial_slowstart(tp)) { |
---|
387 | 407 | /* Slow start still did not finish. */ |
---|
388 | | - if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) { |
---|
| 408 | + if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) && |
---|
| 409 | + !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) { |
---|
389 | 410 | val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); |
---|
390 | 411 | if (val && (tp->snd_cwnd >> 1) > val) |
---|
391 | 412 | tcp_metric_set(tm, TCP_METRIC_SSTHRESH, |
---|
.. | .. |
---|
400 | 421 | } else if (!tcp_in_slow_start(tp) && |
---|
401 | 422 | icsk->icsk_ca_state == TCP_CA_Open) { |
---|
402 | 423 | /* Cong. avoidance phase, cwnd is reliable. */ |
---|
403 | | - if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) |
---|
| 424 | + if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) && |
---|
| 425 | + !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) |
---|
404 | 426 | tcp_metric_set(tm, TCP_METRIC_SSTHRESH, |
---|
405 | 427 | max(tp->snd_cwnd >> 1, tp->snd_ssthresh)); |
---|
406 | 428 | if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) { |
---|
.. | .. |
---|
416 | 438 | tcp_metric_set(tm, TCP_METRIC_CWND, |
---|
417 | 439 | (val + tp->snd_ssthresh) >> 1); |
---|
418 | 440 | } |
---|
419 | | - if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) { |
---|
| 441 | + if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) && |
---|
| 442 | + !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) { |
---|
420 | 443 | val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); |
---|
421 | 444 | if (val && tp->snd_ssthresh > val) |
---|
422 | 445 | tcp_metric_set(tm, TCP_METRIC_SSTHRESH, |
---|
.. | .. |
---|
425 | 448 | if (!tcp_metric_locked(tm, TCP_METRIC_REORDERING)) { |
---|
426 | 449 | val = tcp_metric_get(tm, TCP_METRIC_REORDERING); |
---|
427 | 450 | if (val < tp->reordering && |
---|
428 | | - tp->reordering != net->ipv4.sysctl_tcp_reordering) |
---|
| 451 | + tp->reordering != |
---|
| 452 | + READ_ONCE(net->ipv4.sysctl_tcp_reordering)) |
---|
429 | 453 | tcp_metric_set(tm, TCP_METRIC_REORDERING, |
---|
430 | 454 | tp->reordering); |
---|
431 | 455 | } |
---|
432 | 456 | } |
---|
433 | | - tm->tcpm_stamp = jiffies; |
---|
| 457 | + WRITE_ONCE(tm->tcpm_stamp, jiffies); |
---|
434 | 458 | out_unlock: |
---|
435 | 459 | rcu_read_unlock(); |
---|
436 | 460 | } |
---|
.. | .. |
---|
441 | 465 | { |
---|
442 | 466 | struct dst_entry *dst = __sk_dst_get(sk); |
---|
443 | 467 | struct tcp_sock *tp = tcp_sk(sk); |
---|
| 468 | + struct net *net = sock_net(sk); |
---|
444 | 469 | struct tcp_metrics_block *tm; |
---|
445 | 470 | u32 val, crtt = 0; /* cached RTT scaled by 8 */ |
---|
446 | 471 | |
---|
.. | .. |
---|
458 | 483 | if (tcp_metric_locked(tm, TCP_METRIC_CWND)) |
---|
459 | 484 | tp->snd_cwnd_clamp = tcp_metric_get(tm, TCP_METRIC_CWND); |
---|
460 | 485 | |
---|
461 | | - val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); |
---|
| 486 | + val = READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) ? |
---|
| 487 | + 0 : tcp_metric_get(tm, TCP_METRIC_SSTHRESH); |
---|
462 | 488 | if (val) { |
---|
463 | 489 | tp->snd_ssthresh = val; |
---|
464 | 490 | if (tp->snd_ssthresh > tp->snd_cwnd_clamp) |
---|
.. | .. |
---|
512 | 538 | |
---|
513 | 539 | inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK; |
---|
514 | 540 | } |
---|
515 | | - /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been |
---|
516 | | - * retransmitted. In light of RFC6298 more aggressive 1sec |
---|
517 | | - * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK |
---|
518 | | - * retransmission has occurred. |
---|
519 | | - */ |
---|
520 | | - if (tp->total_retrans > 1) |
---|
521 | | - tp->snd_cwnd = 1; |
---|
522 | | - else |
---|
523 | | - tp->snd_cwnd = tcp_init_cwnd(tp, dst); |
---|
524 | | - tp->snd_cwnd_stamp = tcp_jiffies32; |
---|
525 | 541 | } |
---|
526 | 542 | |
---|
527 | 543 | bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst) |
---|
.. | .. |
---|
542 | 558 | |
---|
543 | 559 | return ret; |
---|
544 | 560 | } |
---|
545 | | - |
---|
546 | | -static DEFINE_SEQLOCK(fastopen_seqlock); |
---|
547 | 561 | |
---|
548 | 562 | void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, |
---|
549 | 563 | struct tcp_fastopen_cookie *cookie) |
---|
.. | .. |
---|
651 | 665 | } |
---|
652 | 666 | |
---|
653 | 667 | if (nla_put_msecs(msg, TCP_METRICS_ATTR_AGE, |
---|
654 | | - jiffies - tm->tcpm_stamp, |
---|
| 668 | + jiffies - READ_ONCE(tm->tcpm_stamp), |
---|
655 | 669 | TCP_METRICS_ATTR_PAD) < 0) |
---|
656 | 670 | goto nla_put_failure; |
---|
657 | 671 | |
---|
658 | 672 | { |
---|
659 | 673 | int n = 0; |
---|
660 | 674 | |
---|
661 | | - nest = nla_nest_start(msg, TCP_METRICS_ATTR_VALS); |
---|
| 675 | + nest = nla_nest_start_noflag(msg, TCP_METRICS_ATTR_VALS); |
---|
662 | 676 | if (!nest) |
---|
663 | 677 | goto nla_put_failure; |
---|
664 | 678 | for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) { |
---|
665 | | - u32 val = tm->tcpm_vals[i]; |
---|
| 679 | + u32 val = tcp_metric_get(tm, i); |
---|
666 | 680 | |
---|
667 | 681 | if (!val) |
---|
668 | 682 | continue; |
---|
.. | .. |
---|
948 | 962 | return 0; |
---|
949 | 963 | } |
---|
950 | 964 | |
---|
951 | | -static const struct genl_ops tcp_metrics_nl_ops[] = { |
---|
| 965 | +static const struct genl_small_ops tcp_metrics_nl_ops[] = { |
---|
952 | 966 | { |
---|
953 | 967 | .cmd = TCP_METRICS_CMD_GET, |
---|
| 968 | + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, |
---|
954 | 969 | .doit = tcp_metrics_nl_cmd_get, |
---|
955 | 970 | .dumpit = tcp_metrics_nl_dump, |
---|
956 | | - .policy = tcp_metrics_nl_policy, |
---|
957 | 971 | }, |
---|
958 | 972 | { |
---|
959 | 973 | .cmd = TCP_METRICS_CMD_DEL, |
---|
| 974 | + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, |
---|
960 | 975 | .doit = tcp_metrics_nl_cmd_del, |
---|
961 | | - .policy = tcp_metrics_nl_policy, |
---|
962 | 976 | .flags = GENL_ADMIN_PERM, |
---|
963 | 977 | }, |
---|
964 | 978 | }; |
---|
.. | .. |
---|
968 | 982 | .name = TCP_METRICS_GENL_NAME, |
---|
969 | 983 | .version = TCP_METRICS_GENL_VERSION, |
---|
970 | 984 | .maxattr = TCP_METRICS_ATTR_MAX, |
---|
| 985 | + .policy = tcp_metrics_nl_policy, |
---|
971 | 986 | .netnsok = true, |
---|
972 | 987 | .module = THIS_MODULE, |
---|
973 | | - .ops = tcp_metrics_nl_ops, |
---|
974 | | - .n_ops = ARRAY_SIZE(tcp_metrics_nl_ops), |
---|
| 988 | + .small_ops = tcp_metrics_nl_ops, |
---|
| 989 | + .n_small_ops = ARRAY_SIZE(tcp_metrics_nl_ops), |
---|
975 | 990 | }; |
---|
976 | 991 | |
---|
977 | 992 | static unsigned int tcpmhash_entries; |
---|
.. | .. |
---|
1000 | 1015 | |
---|
1001 | 1016 | slots = tcpmhash_entries; |
---|
1002 | 1017 | if (!slots) { |
---|
1003 | | - if (totalram_pages >= 128 * 1024) |
---|
| 1018 | + if (totalram_pages() >= 128 * 1024) |
---|
1004 | 1019 | slots = 16 * 1024; |
---|
1005 | 1020 | else |
---|
1006 | 1021 | slots = 8 * 1024; |
---|