hc
2024-10-22 8ac6c7a54ed1b98d142dce24b11c6de6a1e239a5
kernel/net/sched/sch_netem.c
....@@ -1,10 +1,6 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * net/sched/sch_netem.c Network emulator
3
- *
4
- * This program is free software; you can redistribute it and/or
5
- * modify it under the terms of the GNU General Public License
6
- * as published by the Free Software Foundation; either version
7
- * 2 of the License.
84 *
95 * Many of the algorithms and ideas for this came from
106 * NIST Net which is not copyrighted.
....@@ -70,12 +66,16 @@
7066
7167 struct disttable {
7268 u32 size;
73
- s16 table[0];
69
+ s16 table[];
7470 };
7571
7672 struct netem_sched_data {
7773 /* internal t(ime)fifo qdisc uses t_root and sch->limit */
7874 struct rb_root t_root;
75
+
76
+ /* a linear queue; reduces rbtree rebalancing when jitter is low */
77
+ struct sk_buff *t_head;
78
+ struct sk_buff *t_tail;
7979
8080 /* optional qdisc for classful handling (NULL at netem init) */
8181 struct Qdisc *qdisc;
....@@ -369,26 +369,39 @@
369369 rb_erase(&skb->rbnode, &q->t_root);
370370 rtnl_kfree_skbs(skb, skb);
371371 }
372
+
373
+ rtnl_kfree_skbs(q->t_head, q->t_tail);
374
+ q->t_head = NULL;
375
+ q->t_tail = NULL;
372376 }
373377
374378 static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
375379 {
376380 struct netem_sched_data *q = qdisc_priv(sch);
377381 u64 tnext = netem_skb_cb(nskb)->time_to_send;
378
- struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
379382
380
- while (*p) {
381
- struct sk_buff *skb;
382
-
383
- parent = *p;
384
- skb = rb_to_skb(parent);
385
- if (tnext >= netem_skb_cb(skb)->time_to_send)
386
- p = &parent->rb_right;
383
+ if (!q->t_tail || tnext >= netem_skb_cb(q->t_tail)->time_to_send) {
384
+ if (q->t_tail)
385
+ q->t_tail->next = nskb;
387386 else
388
- p = &parent->rb_left;
387
+ q->t_head = nskb;
388
+ q->t_tail = nskb;
389
+ } else {
390
+ struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
391
+
392
+ while (*p) {
393
+ struct sk_buff *skb;
394
+
395
+ parent = *p;
396
+ skb = rb_to_skb(parent);
397
+ if (tnext >= netem_skb_cb(skb)->time_to_send)
398
+ p = &parent->rb_right;
399
+ else
400
+ p = &parent->rb_left;
401
+ }
402
+ rb_link_node(&nskb->rbnode, parent, p);
403
+ rb_insert_color(&nskb->rbnode, &q->t_root);
389404 }
390
- rb_link_node(&nskb->rbnode, parent, p);
391
- rb_insert_color(&nskb->rbnode, &q->t_root);
392405 sch->q.qlen++;
393406 }
394407
....@@ -410,16 +423,6 @@
410423 }
411424 consume_skb(skb);
412425 return segs;
413
-}
414
-
415
-static void netem_enqueue_skb_head(struct qdisc_skb_head *qh, struct sk_buff *skb)
416
-{
417
- skb->next = qh->head;
418
-
419
- if (!qh->head)
420
- qh->tail = skb;
421
- qh->head = skb;
422
- qh->qlen++;
423426 }
424427
425428 /*
....@@ -490,16 +493,13 @@
490493 */
491494 if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
492495 if (skb_is_gso(skb)) {
493
- segs = netem_segment(skb, sch, to_free);
494
- if (!segs)
496
+ skb = netem_segment(skb, sch, to_free);
497
+ if (!skb)
495498 return rc_drop;
496
- qdisc_skb_cb(segs)->pkt_len = segs->len;
497
- } else {
498
- segs = skb;
499
+ segs = skb->next;
500
+ skb_mark_not_on_list(skb);
501
+ qdisc_skb_cb(skb)->pkt_len = skb->len;
499502 }
500
-
501
- skb = segs;
502
- segs = segs->next;
503503
504504 skb = skb_unshare(skb, GFP_ATOMIC);
505505 if (unlikely(!skb)) {
....@@ -518,6 +518,8 @@
518518 }
519519
520520 if (unlikely(sch->q.qlen >= sch->limit)) {
521
+ /* re-link segs, so that qdisc_drop_all() frees them all */
522
+ skb->next = segs;
521523 qdisc_drop_all(skb, sch, to_free);
522524 return rc_drop;
523525 }
....@@ -548,9 +550,16 @@
548550 t_skb = skb_rb_last(&q->t_root);
549551 t_last = netem_skb_cb(t_skb);
550552 if (!last ||
551
- t_last->time_to_send > last->time_to_send) {
553
+ t_last->time_to_send > last->time_to_send)
552554 last = t_last;
553
- }
555
+ }
556
+ if (q->t_tail) {
557
+ struct netem_skb_cb *t_last =
558
+ netem_skb_cb(q->t_tail);
559
+
560
+ if (!last ||
561
+ t_last->time_to_send > last->time_to_send)
562
+ last = t_last;
554563 }
555564
556565 if (last) {
....@@ -578,7 +587,7 @@
578587 cb->time_to_send = ktime_get_ns();
579588 q->counter = 0;
580589
581
- netem_enqueue_skb_head(&sch->q, skb);
590
+ __qdisc_enqueue_head(skb, &sch->q);
582591 sch->qstats.requeues++;
583592 }
584593
....@@ -592,7 +601,7 @@
592601
593602 while (segs) {
594603 skb2 = segs->next;
595
- segs->next = NULL;
604
+ skb_mark_not_on_list(segs);
596605 qdisc_skb_cb(segs)->pkt_len = segs->len;
597606 last_len = segs->len;
598607 rc = qdisc_enqueue(segs, sch, to_free);
....@@ -636,11 +645,38 @@
636645 q->slot.bytes_left = q->slot_config.max_bytes;
637646 }
638647
648
+static struct sk_buff *netem_peek(struct netem_sched_data *q)
649
+{
650
+ struct sk_buff *skb = skb_rb_first(&q->t_root);
651
+ u64 t1, t2;
652
+
653
+ if (!skb)
654
+ return q->t_head;
655
+ if (!q->t_head)
656
+ return skb;
657
+
658
+ t1 = netem_skb_cb(skb)->time_to_send;
659
+ t2 = netem_skb_cb(q->t_head)->time_to_send;
660
+ if (t1 < t2)
661
+ return skb;
662
+ return q->t_head;
663
+}
664
+
665
+static void netem_erase_head(struct netem_sched_data *q, struct sk_buff *skb)
666
+{
667
+ if (skb == q->t_head) {
668
+ q->t_head = skb->next;
669
+ if (!q->t_head)
670
+ q->t_tail = NULL;
671
+ } else {
672
+ rb_erase(&skb->rbnode, &q->t_root);
673
+ }
674
+}
675
+
639676 static struct sk_buff *netem_dequeue(struct Qdisc *sch)
640677 {
641678 struct netem_sched_data *q = qdisc_priv(sch);
642679 struct sk_buff *skb;
643
- struct rb_node *p;
644680
645681 tfifo_dequeue:
646682 skb = __qdisc_dequeue_head(&sch->q);
....@@ -650,20 +686,18 @@
650686 qdisc_bstats_update(sch, skb);
651687 return skb;
652688 }
653
- p = rb_first(&q->t_root);
654
- if (p) {
689
+ skb = netem_peek(q);
690
+ if (skb) {
655691 u64 time_to_send;
656692 u64 now = ktime_get_ns();
657
-
658
- skb = rb_to_skb(p);
659693
660694 /* if more time remaining? */
661695 time_to_send = netem_skb_cb(skb)->time_to_send;
662696 if (q->slot.slot_next && q->slot.slot_next < time_to_send)
663697 get_slot_next(q, now);
664698
665
- if (time_to_send <= now && q->slot.slot_next <= now) {
666
- rb_erase(p, &q->t_root);
699
+ if (time_to_send <= now && q->slot.slot_next <= now) {
700
+ netem_erase_head(q, skb);
667701 sch->q.qlen--;
668702 qdisc_qstats_backlog_dec(sch, skb);
669703 skb->next = NULL;
....@@ -672,15 +706,6 @@
672706 * we need to restore its value.
673707 */
674708 skb->dev = qdisc_dev(sch);
675
-
676
-#ifdef CONFIG_NET_CLS_ACT
677
- /*
678
- * If it's at ingress let's pretend the delay is
679
- * from the network (tstamp will be updated).
680
- */
681
- if (skb->tc_redirected && skb->tc_from_ingress)
682
- skb->tstamp = 0;
683
-#endif
684709
685710 if (q->slot.slot_next) {
686711 q->slot.packets_left--;
....@@ -748,12 +773,10 @@
748773 * signed 16 bit values.
749774 */
750775
751
-static int get_dist_table(struct Qdisc *sch, struct disttable **tbl,
752
- const struct nlattr *attr)
776
+static int get_dist_table(struct disttable **tbl, const struct nlattr *attr)
753777 {
754778 size_t n = nla_len(attr)/sizeof(__s16);
755779 const __s16 *data = nla_data(attr);
756
- spinlock_t *root_lock;
757780 struct disttable *d;
758781 int i;
759782
....@@ -768,13 +791,7 @@
768791 for (i = 0; i < n; i++)
769792 d->table[i] = data[i];
770793
771
- root_lock = qdisc_root_sleeping_lock(sch);
772
-
773
- spin_lock_bh(root_lock);
774
- swap(*tbl, d);
775
- spin_unlock_bh(root_lock);
776
-
777
- dist_free(d);
794
+ *tbl = d;
778795 return 0;
779796 }
780797
....@@ -917,8 +934,9 @@
917934 }
918935
919936 if (nested_len >= nla_attr_size(0))
920
- return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
921
- nested_len, policy, NULL);
937
+ return nla_parse_deprecated(tb, maxtype,
938
+ nla_data(nla) + NLA_ALIGN(len),
939
+ nested_len, policy, NULL);
922940
923941 memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
924942 return 0;
....@@ -930,6 +948,8 @@
930948 {
931949 struct netem_sched_data *q = qdisc_priv(sch);
932950 struct nlattr *tb[TCA_NETEM_MAX + 1];
951
+ struct disttable *delay_dist = NULL;
952
+ struct disttable *slot_dist = NULL;
933953 struct tc_netem_qopt *qopt;
934954 struct clgstate old_clg;
935955 int old_loss_model = CLG_RANDOM;
....@@ -943,6 +963,19 @@
943963 if (ret < 0)
944964 return ret;
945965
966
+ if (tb[TCA_NETEM_DELAY_DIST]) {
967
+ ret = get_dist_table(&delay_dist, tb[TCA_NETEM_DELAY_DIST]);
968
+ if (ret)
969
+ goto table_free;
970
+ }
971
+
972
+ if (tb[TCA_NETEM_SLOT_DIST]) {
973
+ ret = get_dist_table(&slot_dist, tb[TCA_NETEM_SLOT_DIST]);
974
+ if (ret)
975
+ goto table_free;
976
+ }
977
+
978
+ sch_tree_lock(sch);
946979 /* backup q->clg and q->loss_model */
947980 old_clg = q->clg;
948981 old_loss_model = q->loss_model;
....@@ -951,26 +984,17 @@
951984 ret = get_loss_clg(q, tb[TCA_NETEM_LOSS]);
952985 if (ret) {
953986 q->loss_model = old_loss_model;
954
- return ret;
987
+ q->clg = old_clg;
988
+ goto unlock;
955989 }
956990 } else {
957991 q->loss_model = CLG_RANDOM;
958992 }
959993
960
- if (tb[TCA_NETEM_DELAY_DIST]) {
961
- ret = get_dist_table(sch, &q->delay_dist,
962
- tb[TCA_NETEM_DELAY_DIST]);
963
- if (ret)
964
- goto get_table_failure;
965
- }
966
-
967
- if (tb[TCA_NETEM_SLOT_DIST]) {
968
- ret = get_dist_table(sch, &q->slot_dist,
969
- tb[TCA_NETEM_SLOT_DIST]);
970
- if (ret)
971
- goto get_table_failure;
972
- }
973
-
994
+ if (delay_dist)
995
+ swap(q->delay_dist, delay_dist);
996
+ if (slot_dist)
997
+ swap(q->slot_dist, slot_dist);
974998 sch->limit = qopt->limit;
975999
9761000 q->latency = PSCHED_TICKS2NS(qopt->latency);
....@@ -1018,15 +1042,12 @@
10181042 /* capping jitter to the range acceptable by tabledist() */
10191043 q->jitter = min_t(s64, abs(q->jitter), INT_MAX);
10201044
1021
- return ret;
1045
+unlock:
1046
+ sch_tree_unlock(sch);
10221047
1023
-get_table_failure:
1024
- /* recover clg and loss_model, in case of
1025
- * q->clg and q->loss_model were modified
1026
- * in get_loss_clg()
1027
- */
1028
- q->clg = old_clg;
1029
- q->loss_model = old_loss_model;
1048
+table_free:
1049
+ dist_free(delay_dist);
1050
+ dist_free(slot_dist);
10301051 return ret;
10311052 }
10321053
....@@ -1064,7 +1085,7 @@
10641085 {
10651086 struct nlattr *nest;
10661087
1067
- nest = nla_nest_start(skb, TCA_NETEM_LOSS);
1088
+ nest = nla_nest_start_noflag(skb, TCA_NETEM_LOSS);
10681089 if (nest == NULL)
10691090 goto nla_put_failure;
10701091
....@@ -1120,9 +1141,9 @@
11201141 struct tc_netem_rate rate;
11211142 struct tc_netem_slot slot;
11221143
1123
- qopt.latency = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->latency),
1144
+ qopt.latency = min_t(psched_time_t, PSCHED_NS2TICKS(q->latency),
11241145 UINT_MAX);
1125
- qopt.jitter = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->jitter),
1146
+ qopt.jitter = min_t(psched_time_t, PSCHED_NS2TICKS(q->jitter),
11261147 UINT_MAX);
11271148 qopt.limit = q->limit;
11281149 qopt.loss = q->loss;