.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * net/sched/sch_netem.c Network emulator |
---|
3 | | - * |
---|
4 | | - * This program is free software; you can redistribute it and/or |
---|
5 | | - * modify it under the terms of the GNU General Public License |
---|
6 | | - * as published by the Free Software Foundation; either version |
---|
7 | | - * 2 of the License. |
---|
8 | 4 | * |
---|
9 | 5 | * Many of the algorithms and ideas for this came from |
---|
10 | 6 | * NIST Net which is not copyrighted. |
---|
.. | .. |
---|
70 | 66 | |
---|
71 | 67 | struct disttable { |
---|
72 | 68 | u32 size; |
---|
73 | | - s16 table[0]; |
---|
| 69 | + s16 table[]; |
---|
74 | 70 | }; |
---|
75 | 71 | |
---|
76 | 72 | struct netem_sched_data { |
---|
77 | 73 | /* internal t(ime)fifo qdisc uses t_root and sch->limit */ |
---|
78 | 74 | struct rb_root t_root; |
---|
| 75 | + |
---|
| 76 | + /* a linear queue; reduces rbtree rebalancing when jitter is low */ |
---|
| 77 | + struct sk_buff *t_head; |
---|
| 78 | + struct sk_buff *t_tail; |
---|
79 | 79 | |
---|
80 | 80 | /* optional qdisc for classful handling (NULL at netem init) */ |
---|
81 | 81 | struct Qdisc *qdisc; |
---|
.. | .. |
---|
369 | 369 | rb_erase(&skb->rbnode, &q->t_root); |
---|
370 | 370 | rtnl_kfree_skbs(skb, skb); |
---|
371 | 371 | } |
---|
| 372 | + |
---|
| 373 | + rtnl_kfree_skbs(q->t_head, q->t_tail); |
---|
| 374 | + q->t_head = NULL; |
---|
| 375 | + q->t_tail = NULL; |
---|
372 | 376 | } |
---|
373 | 377 | |
---|
374 | 378 | static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) |
---|
375 | 379 | { |
---|
376 | 380 | struct netem_sched_data *q = qdisc_priv(sch); |
---|
377 | 381 | u64 tnext = netem_skb_cb(nskb)->time_to_send; |
---|
378 | | - struct rb_node **p = &q->t_root.rb_node, *parent = NULL; |
---|
379 | 382 | |
---|
380 | | - while (*p) { |
---|
381 | | - struct sk_buff *skb; |
---|
382 | | - |
---|
383 | | - parent = *p; |
---|
384 | | - skb = rb_to_skb(parent); |
---|
385 | | - if (tnext >= netem_skb_cb(skb)->time_to_send) |
---|
386 | | - p = &parent->rb_right; |
---|
| 383 | + if (!q->t_tail || tnext >= netem_skb_cb(q->t_tail)->time_to_send) { |
---|
| 384 | + if (q->t_tail) |
---|
| 385 | + q->t_tail->next = nskb; |
---|
387 | 386 | else |
---|
388 | | - p = &parent->rb_left; |
---|
| 387 | + q->t_head = nskb; |
---|
| 388 | + q->t_tail = nskb; |
---|
| 389 | + } else { |
---|
| 390 | + struct rb_node **p = &q->t_root.rb_node, *parent = NULL; |
---|
| 391 | + |
---|
| 392 | + while (*p) { |
---|
| 393 | + struct sk_buff *skb; |
---|
| 394 | + |
---|
| 395 | + parent = *p; |
---|
| 396 | + skb = rb_to_skb(parent); |
---|
| 397 | + if (tnext >= netem_skb_cb(skb)->time_to_send) |
---|
| 398 | + p = &parent->rb_right; |
---|
| 399 | + else |
---|
| 400 | + p = &parent->rb_left; |
---|
| 401 | + } |
---|
| 402 | + rb_link_node(&nskb->rbnode, parent, p); |
---|
| 403 | + rb_insert_color(&nskb->rbnode, &q->t_root); |
---|
389 | 404 | } |
---|
390 | | - rb_link_node(&nskb->rbnode, parent, p); |
---|
391 | | - rb_insert_color(&nskb->rbnode, &q->t_root); |
---|
392 | 405 | sch->q.qlen++; |
---|
393 | 406 | } |
---|
394 | 407 | |
---|
.. | .. |
---|
410 | 423 | } |
---|
411 | 424 | consume_skb(skb); |
---|
412 | 425 | return segs; |
---|
413 | | -} |
---|
414 | | - |
---|
415 | | -static void netem_enqueue_skb_head(struct qdisc_skb_head *qh, struct sk_buff *skb) |
---|
416 | | -{ |
---|
417 | | - skb->next = qh->head; |
---|
418 | | - |
---|
419 | | - if (!qh->head) |
---|
420 | | - qh->tail = skb; |
---|
421 | | - qh->head = skb; |
---|
422 | | - qh->qlen++; |
---|
423 | 426 | } |
---|
424 | 427 | |
---|
425 | 428 | /* |
---|
.. | .. |
---|
490 | 493 | */ |
---|
491 | 494 | if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { |
---|
492 | 495 | if (skb_is_gso(skb)) { |
---|
493 | | - segs = netem_segment(skb, sch, to_free); |
---|
494 | | - if (!segs) |
---|
| 496 | + skb = netem_segment(skb, sch, to_free); |
---|
| 497 | + if (!skb) |
---|
495 | 498 | return rc_drop; |
---|
496 | | - qdisc_skb_cb(segs)->pkt_len = segs->len; |
---|
497 | | - } else { |
---|
498 | | - segs = skb; |
---|
| 499 | + segs = skb->next; |
---|
| 500 | + skb_mark_not_on_list(skb); |
---|
| 501 | + qdisc_skb_cb(skb)->pkt_len = skb->len; |
---|
499 | 502 | } |
---|
500 | | - |
---|
501 | | - skb = segs; |
---|
502 | | - segs = segs->next; |
---|
503 | 503 | |
---|
504 | 504 | skb = skb_unshare(skb, GFP_ATOMIC); |
---|
505 | 505 | if (unlikely(!skb)) { |
---|
.. | .. |
---|
518 | 518 | } |
---|
519 | 519 | |
---|
520 | 520 | if (unlikely(sch->q.qlen >= sch->limit)) { |
---|
| 521 | + /* re-link segs, so that qdisc_drop_all() frees them all */ |
---|
| 522 | + skb->next = segs; |
---|
521 | 523 | qdisc_drop_all(skb, sch, to_free); |
---|
522 | 524 | return rc_drop; |
---|
523 | 525 | } |
---|
.. | .. |
---|
548 | 550 | t_skb = skb_rb_last(&q->t_root); |
---|
549 | 551 | t_last = netem_skb_cb(t_skb); |
---|
550 | 552 | if (!last || |
---|
551 | | - t_last->time_to_send > last->time_to_send) { |
---|
| 553 | + t_last->time_to_send > last->time_to_send) |
---|
552 | 554 | last = t_last; |
---|
553 | | - } |
---|
| 555 | + } |
---|
| 556 | + if (q->t_tail) { |
---|
| 557 | + struct netem_skb_cb *t_last = |
---|
| 558 | + netem_skb_cb(q->t_tail); |
---|
| 559 | + |
---|
| 560 | + if (!last || |
---|
| 561 | + t_last->time_to_send > last->time_to_send) |
---|
| 562 | + last = t_last; |
---|
554 | 563 | } |
---|
555 | 564 | |
---|
556 | 565 | if (last) { |
---|
.. | .. |
---|
578 | 587 | cb->time_to_send = ktime_get_ns(); |
---|
579 | 588 | q->counter = 0; |
---|
580 | 589 | |
---|
581 | | - netem_enqueue_skb_head(&sch->q, skb); |
---|
| 590 | + __qdisc_enqueue_head(skb, &sch->q); |
---|
582 | 591 | sch->qstats.requeues++; |
---|
583 | 592 | } |
---|
584 | 593 | |
---|
.. | .. |
---|
592 | 601 | |
---|
593 | 602 | while (segs) { |
---|
594 | 603 | skb2 = segs->next; |
---|
595 | | - segs->next = NULL; |
---|
| 604 | + skb_mark_not_on_list(segs); |
---|
596 | 605 | qdisc_skb_cb(segs)->pkt_len = segs->len; |
---|
597 | 606 | last_len = segs->len; |
---|
598 | 607 | rc = qdisc_enqueue(segs, sch, to_free); |
---|
.. | .. |
---|
636 | 645 | q->slot.bytes_left = q->slot_config.max_bytes; |
---|
637 | 646 | } |
---|
638 | 647 | |
---|
| 648 | +static struct sk_buff *netem_peek(struct netem_sched_data *q) |
---|
| 649 | +{ |
---|
| 650 | + struct sk_buff *skb = skb_rb_first(&q->t_root); |
---|
| 651 | + u64 t1, t2; |
---|
| 652 | + |
---|
| 653 | + if (!skb) |
---|
| 654 | + return q->t_head; |
---|
| 655 | + if (!q->t_head) |
---|
| 656 | + return skb; |
---|
| 657 | + |
---|
| 658 | + t1 = netem_skb_cb(skb)->time_to_send; |
---|
| 659 | + t2 = netem_skb_cb(q->t_head)->time_to_send; |
---|
| 660 | + if (t1 < t2) |
---|
| 661 | + return skb; |
---|
| 662 | + return q->t_head; |
---|
| 663 | +} |
---|
| 664 | + |
---|
| 665 | +static void netem_erase_head(struct netem_sched_data *q, struct sk_buff *skb) |
---|
| 666 | +{ |
---|
| 667 | + if (skb == q->t_head) { |
---|
| 668 | + q->t_head = skb->next; |
---|
| 669 | + if (!q->t_head) |
---|
| 670 | + q->t_tail = NULL; |
---|
| 671 | + } else { |
---|
| 672 | + rb_erase(&skb->rbnode, &q->t_root); |
---|
| 673 | + } |
---|
| 674 | +} |
---|
| 675 | + |
---|
639 | 676 | static struct sk_buff *netem_dequeue(struct Qdisc *sch) |
---|
640 | 677 | { |
---|
641 | 678 | struct netem_sched_data *q = qdisc_priv(sch); |
---|
642 | 679 | struct sk_buff *skb; |
---|
643 | | - struct rb_node *p; |
---|
644 | 680 | |
---|
645 | 681 | tfifo_dequeue: |
---|
646 | 682 | skb = __qdisc_dequeue_head(&sch->q); |
---|
.. | .. |
---|
650 | 686 | qdisc_bstats_update(sch, skb); |
---|
651 | 687 | return skb; |
---|
652 | 688 | } |
---|
653 | | - p = rb_first(&q->t_root); |
---|
654 | | - if (p) { |
---|
| 689 | + skb = netem_peek(q); |
---|
| 690 | + if (skb) { |
---|
655 | 691 | u64 time_to_send; |
---|
656 | 692 | u64 now = ktime_get_ns(); |
---|
657 | | - |
---|
658 | | - skb = rb_to_skb(p); |
---|
659 | 693 | |
---|
660 | 694 | /* if more time remaining? */ |
---|
661 | 695 | time_to_send = netem_skb_cb(skb)->time_to_send; |
---|
662 | 696 | if (q->slot.slot_next && q->slot.slot_next < time_to_send) |
---|
663 | 697 | get_slot_next(q, now); |
---|
664 | 698 | |
---|
665 | | - if (time_to_send <= now && q->slot.slot_next <= now) { |
---|
666 | | - rb_erase(p, &q->t_root); |
---|
| 699 | + if (time_to_send <= now && q->slot.slot_next <= now) { |
---|
| 700 | + netem_erase_head(q, skb); |
---|
667 | 701 | sch->q.qlen--; |
---|
668 | 702 | qdisc_qstats_backlog_dec(sch, skb); |
---|
669 | 703 | skb->next = NULL; |
---|
.. | .. |
---|
672 | 706 | * we need to restore its value. |
---|
673 | 707 | */ |
---|
674 | 708 | skb->dev = qdisc_dev(sch); |
---|
675 | | - |
---|
676 | | -#ifdef CONFIG_NET_CLS_ACT |
---|
677 | | - /* |
---|
678 | | - * If it's at ingress let's pretend the delay is |
---|
679 | | - * from the network (tstamp will be updated). |
---|
680 | | - */ |
---|
681 | | - if (skb->tc_redirected && skb->tc_from_ingress) |
---|
682 | | - skb->tstamp = 0; |
---|
683 | | -#endif |
---|
684 | 709 | |
---|
685 | 710 | if (q->slot.slot_next) { |
---|
686 | 711 | q->slot.packets_left--; |
---|
.. | .. |
---|
748 | 773 | * signed 16 bit values. |
---|
749 | 774 | */ |
---|
750 | 775 | |
---|
751 | | -static int get_dist_table(struct Qdisc *sch, struct disttable **tbl, |
---|
752 | | - const struct nlattr *attr) |
---|
| 776 | +static int get_dist_table(struct disttable **tbl, const struct nlattr *attr) |
---|
753 | 777 | { |
---|
754 | 778 | size_t n = nla_len(attr)/sizeof(__s16); |
---|
755 | 779 | const __s16 *data = nla_data(attr); |
---|
756 | | - spinlock_t *root_lock; |
---|
757 | 780 | struct disttable *d; |
---|
758 | 781 | int i; |
---|
759 | 782 | |
---|
.. | .. |
---|
768 | 791 | for (i = 0; i < n; i++) |
---|
769 | 792 | d->table[i] = data[i]; |
---|
770 | 793 | |
---|
771 | | - root_lock = qdisc_root_sleeping_lock(sch); |
---|
772 | | - |
---|
773 | | - spin_lock_bh(root_lock); |
---|
774 | | - swap(*tbl, d); |
---|
775 | | - spin_unlock_bh(root_lock); |
---|
776 | | - |
---|
777 | | - dist_free(d); |
---|
| 794 | + *tbl = d; |
---|
778 | 795 | return 0; |
---|
779 | 796 | } |
---|
780 | 797 | |
---|
.. | .. |
---|
917 | 934 | } |
---|
918 | 935 | |
---|
919 | 936 | if (nested_len >= nla_attr_size(0)) |
---|
920 | | - return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len), |
---|
921 | | - nested_len, policy, NULL); |
---|
| 937 | + return nla_parse_deprecated(tb, maxtype, |
---|
| 938 | + nla_data(nla) + NLA_ALIGN(len), |
---|
| 939 | + nested_len, policy, NULL); |
---|
922 | 940 | |
---|
923 | 941 | memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); |
---|
924 | 942 | return 0; |
---|
.. | .. |
---|
930 | 948 | { |
---|
931 | 949 | struct netem_sched_data *q = qdisc_priv(sch); |
---|
932 | 950 | struct nlattr *tb[TCA_NETEM_MAX + 1]; |
---|
| 951 | + struct disttable *delay_dist = NULL; |
---|
| 952 | + struct disttable *slot_dist = NULL; |
---|
933 | 953 | struct tc_netem_qopt *qopt; |
---|
934 | 954 | struct clgstate old_clg; |
---|
935 | 955 | int old_loss_model = CLG_RANDOM; |
---|
.. | .. |
---|
943 | 963 | if (ret < 0) |
---|
944 | 964 | return ret; |
---|
945 | 965 | |
---|
| 966 | + if (tb[TCA_NETEM_DELAY_DIST]) { |
---|
| 967 | + ret = get_dist_table(&delay_dist, tb[TCA_NETEM_DELAY_DIST]); |
---|
| 968 | + if (ret) |
---|
| 969 | + goto table_free; |
---|
| 970 | + } |
---|
| 971 | + |
---|
| 972 | + if (tb[TCA_NETEM_SLOT_DIST]) { |
---|
| 973 | + ret = get_dist_table(&slot_dist, tb[TCA_NETEM_SLOT_DIST]); |
---|
| 974 | + if (ret) |
---|
| 975 | + goto table_free; |
---|
| 976 | + } |
---|
| 977 | + |
---|
| 978 | + sch_tree_lock(sch); |
---|
946 | 979 | /* backup q->clg and q->loss_model */ |
---|
947 | 980 | old_clg = q->clg; |
---|
948 | 981 | old_loss_model = q->loss_model; |
---|
.. | .. |
---|
951 | 984 | ret = get_loss_clg(q, tb[TCA_NETEM_LOSS]); |
---|
952 | 985 | if (ret) { |
---|
953 | 986 | q->loss_model = old_loss_model; |
---|
954 | | - return ret; |
---|
| 987 | + q->clg = old_clg; |
---|
| 988 | + goto unlock; |
---|
955 | 989 | } |
---|
956 | 990 | } else { |
---|
957 | 991 | q->loss_model = CLG_RANDOM; |
---|
958 | 992 | } |
---|
959 | 993 | |
---|
960 | | - if (tb[TCA_NETEM_DELAY_DIST]) { |
---|
961 | | - ret = get_dist_table(sch, &q->delay_dist, |
---|
962 | | - tb[TCA_NETEM_DELAY_DIST]); |
---|
963 | | - if (ret) |
---|
964 | | - goto get_table_failure; |
---|
965 | | - } |
---|
966 | | - |
---|
967 | | - if (tb[TCA_NETEM_SLOT_DIST]) { |
---|
968 | | - ret = get_dist_table(sch, &q->slot_dist, |
---|
969 | | - tb[TCA_NETEM_SLOT_DIST]); |
---|
970 | | - if (ret) |
---|
971 | | - goto get_table_failure; |
---|
972 | | - } |
---|
973 | | - |
---|
| 994 | + if (delay_dist) |
---|
| 995 | + swap(q->delay_dist, delay_dist); |
---|
| 996 | + if (slot_dist) |
---|
| 997 | + swap(q->slot_dist, slot_dist); |
---|
974 | 998 | sch->limit = qopt->limit; |
---|
975 | 999 | |
---|
976 | 1000 | q->latency = PSCHED_TICKS2NS(qopt->latency); |
---|
.. | .. |
---|
1018 | 1042 | /* capping jitter to the range acceptable by tabledist() */ |
---|
1019 | 1043 | q->jitter = min_t(s64, abs(q->jitter), INT_MAX); |
---|
1020 | 1044 | |
---|
1021 | | - return ret; |
---|
| 1045 | +unlock: |
---|
| 1046 | + sch_tree_unlock(sch); |
---|
1022 | 1047 | |
---|
1023 | | -get_table_failure: |
---|
1024 | | - /* recover clg and loss_model, in case of |
---|
1025 | | - * q->clg and q->loss_model were modified |
---|
1026 | | - * in get_loss_clg() |
---|
1027 | | - */ |
---|
1028 | | - q->clg = old_clg; |
---|
1029 | | - q->loss_model = old_loss_model; |
---|
| 1048 | +table_free: |
---|
| 1049 | + dist_free(delay_dist); |
---|
| 1050 | + dist_free(slot_dist); |
---|
1030 | 1051 | return ret; |
---|
1031 | 1052 | } |
---|
1032 | 1053 | |
---|
.. | .. |
---|
1064 | 1085 | { |
---|
1065 | 1086 | struct nlattr *nest; |
---|
1066 | 1087 | |
---|
1067 | | - nest = nla_nest_start(skb, TCA_NETEM_LOSS); |
---|
| 1088 | + nest = nla_nest_start_noflag(skb, TCA_NETEM_LOSS); |
---|
1068 | 1089 | if (nest == NULL) |
---|
1069 | 1090 | goto nla_put_failure; |
---|
1070 | 1091 | |
---|
.. | .. |
---|
1120 | 1141 | struct tc_netem_rate rate; |
---|
1121 | 1142 | struct tc_netem_slot slot; |
---|
1122 | 1143 | |
---|
1123 | | - qopt.latency = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->latency), |
---|
| 1144 | + qopt.latency = min_t(psched_time_t, PSCHED_NS2TICKS(q->latency), |
---|
1124 | 1145 | UINT_MAX); |
---|
1125 | | - qopt.jitter = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->jitter), |
---|
| 1146 | + qopt.jitter = min_t(psched_time_t, PSCHED_NS2TICKS(q->jitter), |
---|
1126 | 1147 | UINT_MAX); |
---|
1127 | 1148 | qopt.limit = q->limit; |
---|
1128 | 1149 | qopt.loss = q->loss; |
---|