hc
2024-10-22 8ac6c7a54ed1b98d142dce24b11c6de6a1e239a5
kernel/net/ipv4/tcp_dctcp.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /* DataCenter TCP (DCTCP) congestion control.
23 *
34 * http://simula.stanford.edu/~alizade/Site/DCTCP.html
....@@ -33,24 +34,19 @@
3334 * Daniel Borkmann <dborkman@redhat.com>
3435 * Florian Westphal <fw@strlen.de>
3536 * Glenn Judd <glenn.judd@morganstanley.com>
36
- *
37
- * This program is free software; you can redistribute it and/or modify
38
- * it under the terms of the GNU General Public License as published by
39
- * the Free Software Foundation; either version 2 of the License, or (at
40
- * your option) any later version.
4137 */
4238
4339 #include <linux/module.h>
4440 #include <linux/mm.h>
4541 #include <net/tcp.h>
4642 #include <linux/inet_diag.h>
43
+#include "tcp_dctcp.h"
4744
4845 #define DCTCP_MAX_ALPHA 1024U
4946
5047 struct dctcp {
51
- u32 acked_bytes_ecn;
52
- u32 acked_bytes_total;
53
- u32 prior_snd_una;
48
+ u32 old_delivered;
49
+ u32 old_delivered_ce;
5450 u32 prior_rcv_nxt;
5551 u32 dctcp_alpha;
5652 u32 next_seq;
....@@ -72,8 +68,8 @@
7268 {
7369 ca->next_seq = tp->snd_nxt;
7470
75
- ca->acked_bytes_ecn = 0;
76
- ca->acked_bytes_total = 0;
71
+ ca->old_delivered = tp->delivered;
72
+ ca->old_delivered_ce = tp->delivered_ce;
7773 }
7874
7975 static void dctcp_init(struct sock *sk)
....@@ -85,7 +81,6 @@
8581 sk->sk_state == TCP_CLOSE)) {
8682 struct dctcp *ca = inet_csk_ca(sk);
8783
88
- ca->prior_snd_una = tp->snd_una;
8984 ca->prior_rcv_nxt = tp->rcv_nxt;
9085
9186 ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
....@@ -113,89 +108,29 @@
113108 return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U);
114109 }
115110
116
-/* Minimal DCTP CE state machine:
117
- *
118
- * S: 0 <- last pkt was non-CE
119
- * 1 <- last pkt was CE
120
- */
121
-
122
-static void dctcp_ce_state_0_to_1(struct sock *sk)
123
-{
124
- struct dctcp *ca = inet_csk_ca(sk);
125
- struct tcp_sock *tp = tcp_sk(sk);
126
-
127
- if (!ca->ce_state) {
128
- /* State has changed from CE=0 to CE=1, force an immediate
129
- * ACK to reflect the new CE state. If an ACK was delayed,
130
- * send that first to reflect the prior CE state.
131
- */
132
- if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
133
- __tcp_send_ack(sk, ca->prior_rcv_nxt);
134
- inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
135
- }
136
-
137
- ca->prior_rcv_nxt = tp->rcv_nxt;
138
- ca->ce_state = 1;
139
-
140
- tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
141
-}
142
-
143
-static void dctcp_ce_state_1_to_0(struct sock *sk)
144
-{
145
- struct dctcp *ca = inet_csk_ca(sk);
146
- struct tcp_sock *tp = tcp_sk(sk);
147
-
148
- if (ca->ce_state) {
149
- /* State has changed from CE=1 to CE=0, force an immediate
150
- * ACK to reflect the new CE state. If an ACK was delayed,
151
- * send that first to reflect the prior CE state.
152
- */
153
- if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
154
- __tcp_send_ack(sk, ca->prior_rcv_nxt);
155
- inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
156
- }
157
-
158
- ca->prior_rcv_nxt = tp->rcv_nxt;
159
- ca->ce_state = 0;
160
-
161
- tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
162
-}
163
-
164111 static void dctcp_update_alpha(struct sock *sk, u32 flags)
165112 {
166113 const struct tcp_sock *tp = tcp_sk(sk);
167114 struct dctcp *ca = inet_csk_ca(sk);
168
- u32 acked_bytes = tp->snd_una - ca->prior_snd_una;
169
-
170
- /* If ack did not advance snd_una, count dupack as MSS size.
171
- * If ack did update window, do not count it at all.
172
- */
173
- if (acked_bytes == 0 && !(flags & CA_ACK_WIN_UPDATE))
174
- acked_bytes = inet_csk(sk)->icsk_ack.rcv_mss;
175
- if (acked_bytes) {
176
- ca->acked_bytes_total += acked_bytes;
177
- ca->prior_snd_una = tp->snd_una;
178
-
179
- if (flags & CA_ACK_ECE)
180
- ca->acked_bytes_ecn += acked_bytes;
181
- }
182115
183116 /* Expired RTT */
184117 if (!before(tp->snd_una, ca->next_seq)) {
185
- u64 bytes_ecn = ca->acked_bytes_ecn;
118
+ u32 delivered_ce = tp->delivered_ce - ca->old_delivered_ce;
186119 u32 alpha = ca->dctcp_alpha;
187120
188121 /* alpha = (1 - g) * alpha + g * F */
189122
190123 alpha -= min_not_zero(alpha, alpha >> dctcp_shift_g);
191
- if (bytes_ecn) {
192
- /* If dctcp_shift_g == 1, a 32bit value would overflow
193
- * after 8 Mbytes.
194
- */
195
- bytes_ecn <<= (10 - dctcp_shift_g);
196
- do_div(bytes_ecn, max(1U, ca->acked_bytes_total));
124
+ if (delivered_ce) {
125
+ u32 delivered = tp->delivered - ca->old_delivered;
197126
198
- alpha = min(alpha + (u32)bytes_ecn, DCTCP_MAX_ALPHA);
127
+ /* If dctcp_shift_g == 1, a 32bit value would overflow
128
+ * after 8 M packets.
129
+ */
130
+ delivered_ce <<= (10 - dctcp_shift_g);
131
+ delivered_ce /= max(1U, delivered);
132
+
133
+ alpha = min(alpha + delivered_ce, DCTCP_MAX_ALPHA);
199134 }
200135 /* dctcp_alpha can be read from dctcp_get_info() without
201136 * synchro, so we ask compiler to not use dctcp_alpha
....@@ -227,12 +162,12 @@
227162
228163 static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
229164 {
165
+ struct dctcp *ca = inet_csk_ca(sk);
166
+
230167 switch (ev) {
231168 case CA_EVENT_ECN_IS_CE:
232
- dctcp_ce_state_0_to_1(sk);
233
- break;
234169 case CA_EVENT_ECN_NO_CE:
235
- dctcp_ce_state_1_to_0(sk);
170
+ dctcp_ece_ack_update(sk, ev, &ca->prior_rcv_nxt, &ca->ce_state);
236171 break;
237172 case CA_EVENT_LOSS:
238173 dctcp_react_to_loss(sk);
....@@ -247,6 +182,7 @@
247182 union tcp_cc_info *info)
248183 {
249184 const struct dctcp *ca = inet_csk_ca(sk);
185
+ const struct tcp_sock *tp = tcp_sk(sk);
250186
251187 /* Fill it also in case of VEGASINFO due to req struct limits.
252188 * We can still correctly retrieve it later.
....@@ -258,8 +194,10 @@
258194 info->dctcp.dctcp_enabled = 1;
259195 info->dctcp.dctcp_ce_state = (u16) ca->ce_state;
260196 info->dctcp.dctcp_alpha = ca->dctcp_alpha;
261
- info->dctcp.dctcp_ab_ecn = ca->acked_bytes_ecn;
262
- info->dctcp.dctcp_ab_tot = ca->acked_bytes_total;
197
+ info->dctcp.dctcp_ab_ecn = tp->mss_cache *
198
+ (tp->delivered_ce - ca->old_delivered_ce);
199
+ info->dctcp.dctcp_ab_tot = tp->mss_cache *
200
+ (tp->delivered - ca->old_delivered);
263201 }
264202
265203 *attr = INET_DIAG_DCTCPINFO;