hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/net/sched/sch_pie.c
....@@ -1,14 +1,5 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /* Copyright (C) 2013 Cisco Systems, Inc, 2013.
2
- *
3
- * This program is free software; you can redistribute it and/or
4
- * modify it under the terms of the GNU General Public License
5
- * as published by the Free Software Foundation; either version 2
6
- * of the License.
7
- *
8
- * This program is distributed in the hope that it will be useful,
9
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
10
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
- * GNU General Public License for more details.
123 *
134 * Author: Vijay Subramanian <vijaynsu@cisco.com>
145 * Author: Mythili Prabhu <mysuryan@cisco.com>
....@@ -17,9 +8,7 @@
178 * University of Oslo, Norway.
189 *
1910 * References:
20
- * IETF draft submission: http://tools.ietf.org/html/draft-pan-aqm-pie-00
21
- * IEEE Conference on High Performance Switching and Routing 2013 :
22
- * "PIE: A * Lightweight Control Scheme to Address the Bufferbloat Problem"
11
+ * RFC 8033: https://tools.ietf.org/html/rfc8033
2312 */
2413
2514 #include <linux/module.h>
....@@ -30,110 +19,68 @@
3019 #include <linux/skbuff.h>
3120 #include <net/pkt_sched.h>
3221 #include <net/inet_ecn.h>
33
-
34
-#define QUEUE_THRESHOLD 10000
35
-#define DQCOUNT_INVALID -1
36
-#define MAX_PROB 0xffffffff
37
-#define PIE_SCALE 8
38
-
39
-/* parameters used */
40
-struct pie_params {
41
- psched_time_t target; /* user specified target delay in pschedtime */
42
- u32 tupdate; /* timer frequency (in jiffies) */
43
- u32 limit; /* number of packets that can be enqueued */
44
- u32 alpha; /* alpha and beta are between 0 and 32 */
45
- u32 beta; /* and are used for shift relative to 1 */
46
- bool ecn; /* true if ecn is enabled */
47
- bool bytemode; /* to scale drop early prob based on pkt size */
48
-};
49
-
50
-/* variables used */
51
-struct pie_vars {
52
- u32 prob; /* probability but scaled by u32 limit. */
53
- psched_time_t burst_time;
54
- psched_time_t qdelay;
55
- psched_time_t qdelay_old;
56
- u64 dq_count; /* measured in bytes */
57
- psched_time_t dq_tstamp; /* drain rate */
58
- u32 avg_dq_rate; /* bytes per pschedtime tick,scaled */
59
- u32 qlen_old; /* in bytes */
60
-};
61
-
62
-/* statistics gathering */
63
-struct pie_stats {
64
- u32 packets_in; /* total number of packets enqueued */
65
- u32 dropped; /* packets dropped due to pie_action */
66
- u32 overlimit; /* dropped due to lack of space in queue */
67
- u32 maxq; /* maximum queue size */
68
- u32 ecn_mark; /* packets marked with ECN */
69
-};
22
+#include <net/pie.h>
7023
7124 /* private data for the Qdisc */
7225 struct pie_sched_data {
73
- struct pie_params params;
7426 struct pie_vars vars;
27
+ struct pie_params params;
7528 struct pie_stats stats;
7629 struct timer_list adapt_timer;
7730 struct Qdisc *sch;
7831 };
7932
80
-static void pie_params_init(struct pie_params *params)
33
+bool pie_drop_early(struct Qdisc *sch, struct pie_params *params,
34
+ struct pie_vars *vars, u32 backlog, u32 packet_size)
8135 {
82
- params->alpha = 2;
83
- params->beta = 20;
84
- params->tupdate = usecs_to_jiffies(30 * USEC_PER_MSEC); /* 30 ms */
85
- params->limit = 1000; /* default of 1000 packets */
86
- params->target = PSCHED_NS2TICKS(20 * NSEC_PER_MSEC); /* 20 ms */
87
- params->ecn = false;
88
- params->bytemode = false;
89
-}
90
-
91
-static void pie_vars_init(struct pie_vars *vars)
92
-{
93
- vars->dq_count = DQCOUNT_INVALID;
94
- vars->avg_dq_rate = 0;
95
- /* default of 100 ms in pschedtime */
96
- vars->burst_time = PSCHED_NS2TICKS(100 * NSEC_PER_MSEC);
97
-}
98
-
99
-static bool drop_early(struct Qdisc *sch, u32 packet_size)
100
-{
101
- struct pie_sched_data *q = qdisc_priv(sch);
102
- u32 rnd;
103
- u32 local_prob = q->vars.prob;
36
+ u64 rnd;
37
+ u64 local_prob = vars->prob;
10438 u32 mtu = psched_mtu(qdisc_dev(sch));
10539
10640 /* If there is still burst allowance left skip random early drop */
107
- if (q->vars.burst_time > 0)
41
+ if (vars->burst_time > 0)
10842 return false;
10943
11044 /* If current delay is less than half of target, and
11145 * if drop prob is low already, disable early_drop
11246 */
113
- if ((q->vars.qdelay < q->params.target / 2)
114
- && (q->vars.prob < MAX_PROB / 5))
47
+ if ((vars->qdelay < params->target / 2) &&
48
+ (vars->prob < MAX_PROB / 5))
11549 return false;
11650
117
- /* If we have fewer than 2 mtu-sized packets, disable drop_early,
51
+ /* If we have fewer than 2 mtu-sized packets, disable pie_drop_early,
11852 * similar to min_th in RED
11953 */
120
- if (sch->qstats.backlog < 2 * mtu)
54
+ if (backlog < 2 * mtu)
12155 return false;
12256
12357 /* If bytemode is turned on, use packet size to compute new
12458 * probablity. Smaller packets will have lower drop prob in this case
12559 */
126
- if (q->params.bytemode && packet_size <= mtu)
127
- local_prob = (local_prob / mtu) * packet_size;
60
+ if (params->bytemode && packet_size <= mtu)
61
+ local_prob = (u64)packet_size * div_u64(local_prob, mtu);
12862 else
129
- local_prob = q->vars.prob;
63
+ local_prob = vars->prob;
13064
131
- rnd = prandom_u32();
132
- if (rnd < local_prob)
65
+ if (local_prob == 0)
66
+ vars->accu_prob = 0;
67
+ else
68
+ vars->accu_prob += local_prob;
69
+
70
+ if (vars->accu_prob < (MAX_PROB / 100) * 85)
71
+ return false;
72
+ if (vars->accu_prob >= (MAX_PROB / 2) * 17)
13373 return true;
74
+
75
+ prandom_bytes(&rnd, 8);
76
+ if ((rnd >> BITS_PER_BYTE) < local_prob) {
77
+ vars->accu_prob = 0;
78
+ return true;
79
+ }
13480
13581 return false;
13682 }
83
+EXPORT_SYMBOL_GPL(pie_drop_early);
13784
13885 static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
13986 struct sk_buff **to_free)
....@@ -146,7 +93,8 @@
14693 goto out;
14794 }
14895
149
- if (!drop_early(sch, skb->len)) {
96
+ if (!pie_drop_early(sch, &q->params, &q->vars, sch->qstats.backlog,
97
+ skb->len)) {
15098 enqueue = true;
15199 } else if (q->params.ecn && (q->vars.prob <= MAX_PROB / 10) &&
152100 INET_ECN_set_ce(skb)) {
....@@ -159,6 +107,10 @@
159107
160108 /* we can enqueue the packet */
161109 if (enqueue) {
110
+ /* Set enqueue time only when dq_rate_estimator is disabled. */
111
+ if (!q->params.dq_rate_estimator)
112
+ pie_set_enqueue_time(skb);
113
+
162114 q->stats.packets_in++;
163115 if (qdisc_qlen(sch) > q->stats.maxq)
164116 q->stats.maxq = qdisc_qlen(sch);
....@@ -168,17 +120,19 @@
168120
169121 out:
170122 q->stats.dropped++;
123
+ q->vars.accu_prob = 0;
171124 return qdisc_drop(skb, sch, to_free);
172125 }
173126
174127 static const struct nla_policy pie_policy[TCA_PIE_MAX + 1] = {
175
- [TCA_PIE_TARGET] = {.type = NLA_U32},
176
- [TCA_PIE_LIMIT] = {.type = NLA_U32},
177
- [TCA_PIE_TUPDATE] = {.type = NLA_U32},
178
- [TCA_PIE_ALPHA] = {.type = NLA_U32},
179
- [TCA_PIE_BETA] = {.type = NLA_U32},
180
- [TCA_PIE_ECN] = {.type = NLA_U32},
181
- [TCA_PIE_BYTEMODE] = {.type = NLA_U32},
128
+ [TCA_PIE_TARGET] = {.type = NLA_U32},
129
+ [TCA_PIE_LIMIT] = {.type = NLA_U32},
130
+ [TCA_PIE_TUPDATE] = {.type = NLA_U32},
131
+ [TCA_PIE_ALPHA] = {.type = NLA_U32},
132
+ [TCA_PIE_BETA] = {.type = NLA_U32},
133
+ [TCA_PIE_ECN] = {.type = NLA_U32},
134
+ [TCA_PIE_BYTEMODE] = {.type = NLA_U32},
135
+ [TCA_PIE_DQ_RATE_ESTIMATOR] = {.type = NLA_U32},
182136 };
183137
184138 static int pie_change(struct Qdisc *sch, struct nlattr *opt,
....@@ -192,7 +146,8 @@
192146 if (!opt)
193147 return -EINVAL;
194148
195
- err = nla_parse_nested(tb, TCA_PIE_MAX, opt, pie_policy, NULL);
149
+ err = nla_parse_nested_deprecated(tb, TCA_PIE_MAX, opt, pie_policy,
150
+ NULL);
196151 if (err < 0)
197152 return err;
198153
....@@ -209,7 +164,8 @@
209164
210165 /* tupdate is in jiffies */
211166 if (tb[TCA_PIE_TUPDATE])
212
- q->params.tupdate = usecs_to_jiffies(nla_get_u32(tb[TCA_PIE_TUPDATE]));
167
+ q->params.tupdate =
168
+ usecs_to_jiffies(nla_get_u32(tb[TCA_PIE_TUPDATE]));
213169
214170 if (tb[TCA_PIE_LIMIT]) {
215171 u32 limit = nla_get_u32(tb[TCA_PIE_LIMIT]);
....@@ -230,6 +186,10 @@
230186 if (tb[TCA_PIE_BYTEMODE])
231187 q->params.bytemode = nla_get_u32(tb[TCA_PIE_BYTEMODE]);
232188
189
+ if (tb[TCA_PIE_DQ_RATE_ESTIMATOR])
190
+ q->params.dq_rate_estimator =
191
+ nla_get_u32(tb[TCA_PIE_DQ_RATE_ESTIMATOR]);
192
+
233193 /* Drop excess packets if new limit is lower */
234194 qlen = sch->q.qlen;
235195 while (sch->q.qlen > sch->limit) {
....@@ -245,133 +205,163 @@
245205 return 0;
246206 }
247207
248
-static void pie_process_dequeue(struct Qdisc *sch, struct sk_buff *skb)
208
+void pie_process_dequeue(struct sk_buff *skb, struct pie_params *params,
209
+ struct pie_vars *vars, u32 backlog)
249210 {
211
+ psched_time_t now = psched_get_time();
212
+ u32 dtime = 0;
250213
251
- struct pie_sched_data *q = qdisc_priv(sch);
252
- int qlen = sch->qstats.backlog; /* current queue size in bytes */
214
+ /* If dq_rate_estimator is disabled, calculate qdelay using the
215
+ * packet timestamp.
216
+ */
217
+ if (!params->dq_rate_estimator) {
218
+ vars->qdelay = now - pie_get_enqueue_time(skb);
219
+
220
+ if (vars->dq_tstamp != DTIME_INVALID)
221
+ dtime = now - vars->dq_tstamp;
222
+
223
+ vars->dq_tstamp = now;
224
+
225
+ if (backlog == 0)
226
+ vars->qdelay = 0;
227
+
228
+ if (dtime == 0)
229
+ return;
230
+
231
+ goto burst_allowance_reduction;
232
+ }
253233
254234 /* If current queue is about 10 packets or more and dq_count is unset
255235 * we have enough packets to calculate the drain rate. Save
256236 * current time as dq_tstamp and start measurement cycle.
257237 */
258
- if (qlen >= QUEUE_THRESHOLD && q->vars.dq_count == DQCOUNT_INVALID) {
259
- q->vars.dq_tstamp = psched_get_time();
260
- q->vars.dq_count = 0;
238
+ if (backlog >= QUEUE_THRESHOLD && vars->dq_count == DQCOUNT_INVALID) {
239
+ vars->dq_tstamp = psched_get_time();
240
+ vars->dq_count = 0;
261241 }
262242
263
- /* Calculate the average drain rate from this value. If queue length
264
- * has receded to a small value viz., <= QUEUE_THRESHOLD bytes,reset
243
+ /* Calculate the average drain rate from this value. If queue length
244
+ * has receded to a small value viz., <= QUEUE_THRESHOLD bytes, reset
265245 * the dq_count to -1 as we don't have enough packets to calculate the
266
- * drain rate anymore The following if block is entered only when we
246
+ * drain rate anymore. The following if block is entered only when we
267247 * have a substantial queue built up (QUEUE_THRESHOLD bytes or more)
268248 * and we calculate the drain rate for the threshold here. dq_count is
269249 * in bytes, time difference in psched_time, hence rate is in
270250 * bytes/psched_time.
271251 */
272
- if (q->vars.dq_count != DQCOUNT_INVALID) {
273
- q->vars.dq_count += skb->len;
252
+ if (vars->dq_count != DQCOUNT_INVALID) {
253
+ vars->dq_count += skb->len;
274254
275
- if (q->vars.dq_count >= QUEUE_THRESHOLD) {
276
- psched_time_t now = psched_get_time();
277
- u32 dtime = now - q->vars.dq_tstamp;
278
- u32 count = q->vars.dq_count << PIE_SCALE;
255
+ if (vars->dq_count >= QUEUE_THRESHOLD) {
256
+ u32 count = vars->dq_count << PIE_SCALE;
257
+
258
+ dtime = now - vars->dq_tstamp;
279259
280260 if (dtime == 0)
281261 return;
282262
283263 count = count / dtime;
284264
285
- if (q->vars.avg_dq_rate == 0)
286
- q->vars.avg_dq_rate = count;
265
+ if (vars->avg_dq_rate == 0)
266
+ vars->avg_dq_rate = count;
287267 else
288
- q->vars.avg_dq_rate =
289
- (q->vars.avg_dq_rate -
290
- (q->vars.avg_dq_rate >> 3)) + (count >> 3);
268
+ vars->avg_dq_rate =
269
+ (vars->avg_dq_rate -
270
+ (vars->avg_dq_rate >> 3)) + (count >> 3);
291271
292272 /* If the queue has receded below the threshold, we hold
293273 * on to the last drain rate calculated, else we reset
294274 * dq_count to 0 to re-enter the if block when the next
295275 * packet is dequeued
296276 */
297
- if (qlen < QUEUE_THRESHOLD)
298
- q->vars.dq_count = DQCOUNT_INVALID;
299
- else {
300
- q->vars.dq_count = 0;
301
- q->vars.dq_tstamp = psched_get_time();
277
+ if (backlog < QUEUE_THRESHOLD) {
278
+ vars->dq_count = DQCOUNT_INVALID;
279
+ } else {
280
+ vars->dq_count = 0;
281
+ vars->dq_tstamp = psched_get_time();
302282 }
303283
304
- if (q->vars.burst_time > 0) {
305
- if (q->vars.burst_time > dtime)
306
- q->vars.burst_time -= dtime;
307
- else
308
- q->vars.burst_time = 0;
309
- }
284
+ goto burst_allowance_reduction;
310285 }
311286 }
312
-}
313287
314
-static void calculate_probability(struct Qdisc *sch)
288
+ return;
289
+
290
+burst_allowance_reduction:
291
+ if (vars->burst_time > 0) {
292
+ if (vars->burst_time > dtime)
293
+ vars->burst_time -= dtime;
294
+ else
295
+ vars->burst_time = 0;
296
+ }
297
+}
298
+EXPORT_SYMBOL_GPL(pie_process_dequeue);
299
+
300
+void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars,
301
+ u32 backlog)
315302 {
316
- struct pie_sched_data *q = qdisc_priv(sch);
317
- u32 qlen = sch->qstats.backlog; /* queue size in bytes */
318303 psched_time_t qdelay = 0; /* in pschedtime */
319
- psched_time_t qdelay_old = q->vars.qdelay; /* in pschedtime */
320
- s32 delta = 0; /* determines the change in probability */
321
- u32 oldprob;
322
- u32 alpha, beta;
304
+ psched_time_t qdelay_old = 0; /* in pschedtime */
305
+ s64 delta = 0; /* determines the change in probability */
306
+ u64 oldprob;
307
+ u64 alpha, beta;
308
+ u32 power;
323309 bool update_prob = true;
324310
325
- q->vars.qdelay_old = q->vars.qdelay;
311
+ if (params->dq_rate_estimator) {
312
+ qdelay_old = vars->qdelay;
313
+ vars->qdelay_old = vars->qdelay;
326314
327
- if (q->vars.avg_dq_rate > 0)
328
- qdelay = (qlen << PIE_SCALE) / q->vars.avg_dq_rate;
329
- else
330
- qdelay = 0;
315
+ if (vars->avg_dq_rate > 0)
316
+ qdelay = (backlog << PIE_SCALE) / vars->avg_dq_rate;
317
+ else
318
+ qdelay = 0;
319
+ } else {
320
+ qdelay = vars->qdelay;
321
+ qdelay_old = vars->qdelay_old;
322
+ }
331323
332
- /* If qdelay is zero and qlen is not, it means qlen is very small, less
333
- * than dequeue_rate, so we do not update probabilty in this round
324
+ /* If qdelay is zero and backlog is not, it means backlog is very small,
325
+ * so we do not update probabilty in this round.
334326 */
335
- if (qdelay == 0 && qlen != 0)
327
+ if (qdelay == 0 && backlog != 0)
336328 update_prob = false;
337329
338330 /* In the algorithm, alpha and beta are between 0 and 2 with typical
339331 * value for alpha as 0.125. In this implementation, we use values 0-32
340332 * passed from user space to represent this. Also, alpha and beta have
341333 * unit of HZ and need to be scaled before they can used to update
342
- * probability. alpha/beta are updated locally below by 1) scaling them
343
- * appropriately 2) scaling down by 16 to come to 0-2 range.
344
- * Please see paper for details.
345
- *
346
- * We scale alpha and beta differently depending on whether we are in
347
- * light, medium or high dropping mode.
334
+ * probability. alpha/beta are updated locally below by scaling down
335
+ * by 16 to come to 0-2 range.
348336 */
349
- if (q->vars.prob < MAX_PROB / 100) {
350
- alpha =
351
- (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7;
352
- beta =
353
- (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7;
354
- } else if (q->vars.prob < MAX_PROB / 10) {
355
- alpha =
356
- (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5;
357
- beta =
358
- (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5;
359
- } else {
360
- alpha =
361
- (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
362
- beta =
363
- (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
337
+ alpha = ((u64)params->alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
338
+ beta = ((u64)params->beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
339
+
340
+ /* We scale alpha and beta differently depending on how heavy the
341
+ * congestion is. Please see RFC 8033 for details.
342
+ */
343
+ if (vars->prob < MAX_PROB / 10) {
344
+ alpha >>= 1;
345
+ beta >>= 1;
346
+
347
+ power = 100;
348
+ while (vars->prob < div_u64(MAX_PROB, power) &&
349
+ power <= 1000000) {
350
+ alpha >>= 2;
351
+ beta >>= 2;
352
+ power *= 10;
353
+ }
364354 }
365355
366356 /* alpha and beta should be between 0 and 32, in multiples of 1/16 */
367
- delta += alpha * ((qdelay - q->params.target));
368
- delta += beta * ((qdelay - qdelay_old));
357
+ delta += alpha * (qdelay - params->target);
358
+ delta += beta * (qdelay - qdelay_old);
369359
370
- oldprob = q->vars.prob;
360
+ oldprob = vars->prob;
371361
372362 /* to ensure we increase probability in steps of no more than 2% */
373
- if (delta > (s32) (MAX_PROB / (100 / 2)) &&
374
- q->vars.prob >= MAX_PROB / 10)
363
+ if (delta > (s64)(MAX_PROB / (100 / 2)) &&
364
+ vars->prob >= MAX_PROB / 10)
375365 delta = (MAX_PROB / 100) * 2;
376366
377367 /* Non-linear drop:
....@@ -382,12 +372,12 @@
382372 if (qdelay > (PSCHED_NS2TICKS(250 * NSEC_PER_MSEC)))
383373 delta += MAX_PROB / (100 / 2);
384374
385
- q->vars.prob += delta;
375
+ vars->prob += delta;
386376
387377 if (delta > 0) {
388378 /* prevent overflow */
389
- if (q->vars.prob < oldprob) {
390
- q->vars.prob = MAX_PROB;
379
+ if (vars->prob < oldprob) {
380
+ vars->prob = MAX_PROB;
391381 /* Prevent normalization error. If probability is at
392382 * maximum value already, we normalize it here, and
393383 * skip the check to do a non-linear drop in the next
....@@ -397,32 +387,38 @@
397387 }
398388 } else {
399389 /* prevent underflow */
400
- if (q->vars.prob > oldprob)
401
- q->vars.prob = 0;
390
+ if (vars->prob > oldprob)
391
+ vars->prob = 0;
402392 }
403393
404394 /* Non-linear drop in probability: Reduce drop probability quickly if
405395 * delay is 0 for 2 consecutive Tupdate periods.
406396 */
407397
408
- if ((qdelay == 0) && (qdelay_old == 0) && update_prob)
409
- q->vars.prob = (q->vars.prob * 98) / 100;
398
+ if (qdelay == 0 && qdelay_old == 0 && update_prob)
399
+ /* Reduce drop probability to 98.4% */
400
+ vars->prob -= vars->prob / 64;
410401
411
- q->vars.qdelay = qdelay;
412
- q->vars.qlen_old = qlen;
402
+ vars->qdelay = qdelay;
403
+ vars->backlog_old = backlog;
413404
414405 /* We restart the measurement cycle if the following conditions are met
415406 * 1. If the delay has been low for 2 consecutive Tupdate periods
416407 * 2. Calculated drop probability is zero
417
- * 3. We have atleast one estimate for the avg_dq_rate ie.,
418
- * is a non-zero value
408
+ * 3. If average dq_rate_estimator is enabled, we have atleast one
409
+ * estimate for the avg_dq_rate ie., is a non-zero value
419410 */
420
- if ((q->vars.qdelay < q->params.target / 2) &&
421
- (q->vars.qdelay_old < q->params.target / 2) &&
422
- (q->vars.prob == 0) &&
423
- (q->vars.avg_dq_rate > 0))
424
- pie_vars_init(&q->vars);
411
+ if ((vars->qdelay < params->target / 2) &&
412
+ (vars->qdelay_old < params->target / 2) &&
413
+ vars->prob == 0 &&
414
+ (!params->dq_rate_estimator || vars->avg_dq_rate > 0)) {
415
+ pie_vars_init(vars);
416
+ }
417
+
418
+ if (!params->dq_rate_estimator)
419
+ vars->qdelay_old = qdelay;
425420 }
421
+EXPORT_SYMBOL_GPL(pie_calculate_probability);
426422
427423 static void pie_timer(struct timer_list *t)
428424 {
....@@ -431,13 +427,12 @@
431427 spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
432428
433429 spin_lock(root_lock);
434
- calculate_probability(sch);
430
+ pie_calculate_probability(&q->params, &q->vars, sch->qstats.backlog);
435431
436432 /* reset the timer to fire after 'tupdate'. tupdate is in jiffies. */
437433 if (q->params.tupdate)
438434 mod_timer(&q->adapt_timer, jiffies + q->params.tupdate);
439435 spin_unlock(root_lock);
440
-
441436 }
442437
443438 static int pie_init(struct Qdisc *sch, struct nlattr *opt,
....@@ -468,20 +463,23 @@
468463 struct pie_sched_data *q = qdisc_priv(sch);
469464 struct nlattr *opts;
470465
471
- opts = nla_nest_start(skb, TCA_OPTIONS);
472
- if (opts == NULL)
466
+ opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
467
+ if (!opts)
473468 goto nla_put_failure;
474469
475470 /* convert target from pschedtime to us */
476471 if (nla_put_u32(skb, TCA_PIE_TARGET,
477
- ((u32) PSCHED_TICKS2NS(q->params.target)) /
472
+ ((u32)PSCHED_TICKS2NS(q->params.target)) /
478473 NSEC_PER_USEC) ||
479474 nla_put_u32(skb, TCA_PIE_LIMIT, sch->limit) ||
480
- nla_put_u32(skb, TCA_PIE_TUPDATE, jiffies_to_usecs(q->params.tupdate)) ||
475
+ nla_put_u32(skb, TCA_PIE_TUPDATE,
476
+ jiffies_to_usecs(q->params.tupdate)) ||
481477 nla_put_u32(skb, TCA_PIE_ALPHA, q->params.alpha) ||
482478 nla_put_u32(skb, TCA_PIE_BETA, q->params.beta) ||
483479 nla_put_u32(skb, TCA_PIE_ECN, q->params.ecn) ||
484
- nla_put_u32(skb, TCA_PIE_BYTEMODE, q->params.bytemode))
480
+ nla_put_u32(skb, TCA_PIE_BYTEMODE, q->params.bytemode) ||
481
+ nla_put_u32(skb, TCA_PIE_DQ_RATE_ESTIMATOR,
482
+ q->params.dq_rate_estimator))
485483 goto nla_put_failure;
486484
487485 return nla_nest_end(skb, opts);
....@@ -489,19 +487,15 @@
489487 nla_put_failure:
490488 nla_nest_cancel(skb, opts);
491489 return -1;
492
-
493490 }
494491
495492 static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
496493 {
497494 struct pie_sched_data *q = qdisc_priv(sch);
498495 struct tc_pie_xstats st = {
499
- .prob = q->vars.prob,
500
- .delay = ((u32) PSCHED_TICKS2NS(q->vars.qdelay)) /
496
+ .prob = q->vars.prob << BITS_PER_BYTE,
497
+ .delay = ((u32)PSCHED_TICKS2NS(q->vars.qdelay)) /
501498 NSEC_PER_USEC,
502
- /* unscale and return dq_rate in bytes per sec */
503
- .avg_dq_rate = q->vars.avg_dq_rate *
504
- (PSCHED_TICKS_PER_SEC) >> PIE_SCALE,
505499 .packets_in = q->stats.packets_in,
506500 .overlimit = q->stats.overlimit,
507501 .maxq = q->stats.maxq,
....@@ -509,24 +503,33 @@
509503 .ecn_mark = q->stats.ecn_mark,
510504 };
511505
506
+ /* avg_dq_rate is only valid if dq_rate_estimator is enabled */
507
+ st.dq_rate_estimating = q->params.dq_rate_estimator;
508
+
509
+ /* unscale and return dq_rate in bytes per sec */
510
+ if (q->params.dq_rate_estimator)
511
+ st.avg_dq_rate = q->vars.avg_dq_rate *
512
+ (PSCHED_TICKS_PER_SEC) >> PIE_SCALE;
513
+
512514 return gnet_stats_copy_app(d, &st, sizeof(st));
513515 }
514516
515517 static struct sk_buff *pie_qdisc_dequeue(struct Qdisc *sch)
516518 {
517
- struct sk_buff *skb;
518
- skb = qdisc_dequeue_head(sch);
519
+ struct pie_sched_data *q = qdisc_priv(sch);
520
+ struct sk_buff *skb = qdisc_dequeue_head(sch);
519521
520522 if (!skb)
521523 return NULL;
522524
523
- pie_process_dequeue(sch, skb);
525
+ pie_process_dequeue(skb, &q->params, &q->vars, sch->qstats.backlog);
524526 return skb;
525527 }
526528
527529 static void pie_reset(struct Qdisc *sch)
528530 {
529531 struct pie_sched_data *q = qdisc_priv(sch);
532
+
530533 qdisc_reset_queue(sch);
531534 pie_vars_init(&q->vars);
532535 }
....@@ -534,12 +537,13 @@
534537 static void pie_destroy(struct Qdisc *sch)
535538 {
536539 struct pie_sched_data *q = qdisc_priv(sch);
540
+
537541 q->params.tupdate = 0;
538542 del_timer_sync(&q->adapt_timer);
539543 }
540544
541545 static struct Qdisc_ops pie_qdisc_ops __read_mostly = {
542
- .id = "pie",
546
+ .id = "pie",
543547 .priv_size = sizeof(struct pie_sched_data),
544548 .enqueue = pie_qdisc_enqueue,
545549 .dequeue = pie_qdisc_dequeue,