| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * net/sched/sch_netem.c Network emulator |
|---|
| 3 | | - * |
|---|
| 4 | | - * This program is free software; you can redistribute it and/or |
|---|
| 5 | | - * modify it under the terms of the GNU General Public License |
|---|
| 6 | | - * as published by the Free Software Foundation; either version |
|---|
| 7 | | - * 2 of the License. |
|---|
| 8 | 4 | * |
|---|
| 9 | 5 | * Many of the algorithms and ideas for this came from |
|---|
| 10 | 6 | * NIST Net which is not copyrighted. |
|---|
| .. | .. |
|---|
| 70 | 66 | |
|---|
| 71 | 67 | struct disttable { |
|---|
| 72 | 68 | u32 size; |
|---|
| 73 | | - s16 table[0]; |
|---|
| 69 | + s16 table[]; |
|---|
| 74 | 70 | }; |
|---|
| 75 | 71 | |
|---|
| 76 | 72 | struct netem_sched_data { |
|---|
| 77 | 73 | /* internal t(ime)fifo qdisc uses t_root and sch->limit */ |
|---|
| 78 | 74 | struct rb_root t_root; |
|---|
| 75 | + |
|---|
| 76 | + /* a linear queue; reduces rbtree rebalancing when jitter is low */ |
|---|
| 77 | + struct sk_buff *t_head; |
|---|
| 78 | + struct sk_buff *t_tail; |
|---|
| 79 | 79 | |
|---|
| 80 | 80 | /* optional qdisc for classful handling (NULL at netem init) */ |
|---|
| 81 | 81 | struct Qdisc *qdisc; |
|---|
| .. | .. |
|---|
| 369 | 369 | rb_erase(&skb->rbnode, &q->t_root); |
|---|
| 370 | 370 | rtnl_kfree_skbs(skb, skb); |
|---|
| 371 | 371 | } |
|---|
| 372 | + |
|---|
| 373 | + rtnl_kfree_skbs(q->t_head, q->t_tail); |
|---|
| 374 | + q->t_head = NULL; |
|---|
| 375 | + q->t_tail = NULL; |
|---|
| 372 | 376 | } |
|---|
| 373 | 377 | |
|---|
| 374 | 378 | static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) |
|---|
| 375 | 379 | { |
|---|
| 376 | 380 | struct netem_sched_data *q = qdisc_priv(sch); |
|---|
| 377 | 381 | u64 tnext = netem_skb_cb(nskb)->time_to_send; |
|---|
| 378 | | - struct rb_node **p = &q->t_root.rb_node, *parent = NULL; |
|---|
| 379 | 382 | |
|---|
| 380 | | - while (*p) { |
|---|
| 381 | | - struct sk_buff *skb; |
|---|
| 382 | | - |
|---|
| 383 | | - parent = *p; |
|---|
| 384 | | - skb = rb_to_skb(parent); |
|---|
| 385 | | - if (tnext >= netem_skb_cb(skb)->time_to_send) |
|---|
| 386 | | - p = &parent->rb_right; |
|---|
| 383 | + if (!q->t_tail || tnext >= netem_skb_cb(q->t_tail)->time_to_send) { |
|---|
| 384 | + if (q->t_tail) |
|---|
| 385 | + q->t_tail->next = nskb; |
|---|
| 387 | 386 | else |
|---|
| 388 | | - p = &parent->rb_left; |
|---|
| 387 | + q->t_head = nskb; |
|---|
| 388 | + q->t_tail = nskb; |
|---|
| 389 | + } else { |
|---|
| 390 | + struct rb_node **p = &q->t_root.rb_node, *parent = NULL; |
|---|
| 391 | + |
|---|
| 392 | + while (*p) { |
|---|
| 393 | + struct sk_buff *skb; |
|---|
| 394 | + |
|---|
| 395 | + parent = *p; |
|---|
| 396 | + skb = rb_to_skb(parent); |
|---|
| 397 | + if (tnext >= netem_skb_cb(skb)->time_to_send) |
|---|
| 398 | + p = &parent->rb_right; |
|---|
| 399 | + else |
|---|
| 400 | + p = &parent->rb_left; |
|---|
| 401 | + } |
|---|
| 402 | + rb_link_node(&nskb->rbnode, parent, p); |
|---|
| 403 | + rb_insert_color(&nskb->rbnode, &q->t_root); |
|---|
| 389 | 404 | } |
|---|
| 390 | | - rb_link_node(&nskb->rbnode, parent, p); |
|---|
| 391 | | - rb_insert_color(&nskb->rbnode, &q->t_root); |
|---|
| 392 | 405 | sch->q.qlen++; |
|---|
| 393 | 406 | } |
|---|
| 394 | 407 | |
|---|
| .. | .. |
|---|
| 410 | 423 | } |
|---|
| 411 | 424 | consume_skb(skb); |
|---|
| 412 | 425 | return segs; |
|---|
| 413 | | -} |
|---|
| 414 | | - |
|---|
| 415 | | -static void netem_enqueue_skb_head(struct qdisc_skb_head *qh, struct sk_buff *skb) |
|---|
| 416 | | -{ |
|---|
| 417 | | - skb->next = qh->head; |
|---|
| 418 | | - |
|---|
| 419 | | - if (!qh->head) |
|---|
| 420 | | - qh->tail = skb; |
|---|
| 421 | | - qh->head = skb; |
|---|
| 422 | | - qh->qlen++; |
|---|
| 423 | 426 | } |
|---|
| 424 | 427 | |
|---|
| 425 | 428 | /* |
|---|
| .. | .. |
|---|
| 490 | 493 | */ |
|---|
| 491 | 494 | if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { |
|---|
| 492 | 495 | if (skb_is_gso(skb)) { |
|---|
| 493 | | - segs = netem_segment(skb, sch, to_free); |
|---|
| 494 | | - if (!segs) |
|---|
| 496 | + skb = netem_segment(skb, sch, to_free); |
|---|
| 497 | + if (!skb) |
|---|
| 495 | 498 | return rc_drop; |
|---|
| 496 | | - qdisc_skb_cb(segs)->pkt_len = segs->len; |
|---|
| 497 | | - } else { |
|---|
| 498 | | - segs = skb; |
|---|
| 499 | + segs = skb->next; |
|---|
| 500 | + skb_mark_not_on_list(skb); |
|---|
| 501 | + qdisc_skb_cb(skb)->pkt_len = skb->len; |
|---|
| 499 | 502 | } |
|---|
| 500 | | - |
|---|
| 501 | | - skb = segs; |
|---|
| 502 | | - segs = segs->next; |
|---|
| 503 | 503 | |
|---|
| 504 | 504 | skb = skb_unshare(skb, GFP_ATOMIC); |
|---|
| 505 | 505 | if (unlikely(!skb)) { |
|---|
| .. | .. |
|---|
| 518 | 518 | } |
|---|
| 519 | 519 | |
|---|
| 520 | 520 | if (unlikely(sch->q.qlen >= sch->limit)) { |
|---|
| 521 | + /* re-link segs, so that qdisc_drop_all() frees them all */ |
|---|
| 522 | + skb->next = segs; |
|---|
| 521 | 523 | qdisc_drop_all(skb, sch, to_free); |
|---|
| 522 | 524 | return rc_drop; |
|---|
| 523 | 525 | } |
|---|
| .. | .. |
|---|
| 548 | 550 | t_skb = skb_rb_last(&q->t_root); |
|---|
| 549 | 551 | t_last = netem_skb_cb(t_skb); |
|---|
| 550 | 552 | if (!last || |
|---|
| 551 | | - t_last->time_to_send > last->time_to_send) { |
|---|
| 553 | + t_last->time_to_send > last->time_to_send) |
|---|
| 552 | 554 | last = t_last; |
|---|
| 553 | | - } |
|---|
| 555 | + } |
|---|
| 556 | + if (q->t_tail) { |
|---|
| 557 | + struct netem_skb_cb *t_last = |
|---|
| 558 | + netem_skb_cb(q->t_tail); |
|---|
| 559 | + |
|---|
| 560 | + if (!last || |
|---|
| 561 | + t_last->time_to_send > last->time_to_send) |
|---|
| 562 | + last = t_last; |
|---|
| 554 | 563 | } |
|---|
| 555 | 564 | |
|---|
| 556 | 565 | if (last) { |
|---|
| .. | .. |
|---|
| 578 | 587 | cb->time_to_send = ktime_get_ns(); |
|---|
| 579 | 588 | q->counter = 0; |
|---|
| 580 | 589 | |
|---|
| 581 | | - netem_enqueue_skb_head(&sch->q, skb); |
|---|
| 590 | + __qdisc_enqueue_head(skb, &sch->q); |
|---|
| 582 | 591 | sch->qstats.requeues++; |
|---|
| 583 | 592 | } |
|---|
| 584 | 593 | |
|---|
| .. | .. |
|---|
| 592 | 601 | |
|---|
| 593 | 602 | while (segs) { |
|---|
| 594 | 603 | skb2 = segs->next; |
|---|
| 595 | | - segs->next = NULL; |
|---|
| 604 | + skb_mark_not_on_list(segs); |
|---|
| 596 | 605 | qdisc_skb_cb(segs)->pkt_len = segs->len; |
|---|
| 597 | 606 | last_len = segs->len; |
|---|
| 598 | 607 | rc = qdisc_enqueue(segs, sch, to_free); |
|---|
| .. | .. |
|---|
| 636 | 645 | q->slot.bytes_left = q->slot_config.max_bytes; |
|---|
| 637 | 646 | } |
|---|
| 638 | 647 | |
|---|
| 648 | +static struct sk_buff *netem_peek(struct netem_sched_data *q) |
|---|
| 649 | +{ |
|---|
| 650 | + struct sk_buff *skb = skb_rb_first(&q->t_root); |
|---|
| 651 | + u64 t1, t2; |
|---|
| 652 | + |
|---|
| 653 | + if (!skb) |
|---|
| 654 | + return q->t_head; |
|---|
| 655 | + if (!q->t_head) |
|---|
| 656 | + return skb; |
|---|
| 657 | + |
|---|
| 658 | + t1 = netem_skb_cb(skb)->time_to_send; |
|---|
| 659 | + t2 = netem_skb_cb(q->t_head)->time_to_send; |
|---|
| 660 | + if (t1 < t2) |
|---|
| 661 | + return skb; |
|---|
| 662 | + return q->t_head; |
|---|
| 663 | +} |
|---|
| 664 | + |
|---|
| 665 | +static void netem_erase_head(struct netem_sched_data *q, struct sk_buff *skb) |
|---|
| 666 | +{ |
|---|
| 667 | + if (skb == q->t_head) { |
|---|
| 668 | + q->t_head = skb->next; |
|---|
| 669 | + if (!q->t_head) |
|---|
| 670 | + q->t_tail = NULL; |
|---|
| 671 | + } else { |
|---|
| 672 | + rb_erase(&skb->rbnode, &q->t_root); |
|---|
| 673 | + } |
|---|
| 674 | +} |
|---|
| 675 | + |
|---|
| 639 | 676 | static struct sk_buff *netem_dequeue(struct Qdisc *sch) |
|---|
| 640 | 677 | { |
|---|
| 641 | 678 | struct netem_sched_data *q = qdisc_priv(sch); |
|---|
| 642 | 679 | struct sk_buff *skb; |
|---|
| 643 | | - struct rb_node *p; |
|---|
| 644 | 680 | |
|---|
| 645 | 681 | tfifo_dequeue: |
|---|
| 646 | 682 | skb = __qdisc_dequeue_head(&sch->q); |
|---|
| .. | .. |
|---|
| 650 | 686 | qdisc_bstats_update(sch, skb); |
|---|
| 651 | 687 | return skb; |
|---|
| 652 | 688 | } |
|---|
| 653 | | - p = rb_first(&q->t_root); |
|---|
| 654 | | - if (p) { |
|---|
| 689 | + skb = netem_peek(q); |
|---|
| 690 | + if (skb) { |
|---|
| 655 | 691 | u64 time_to_send; |
|---|
| 656 | 692 | u64 now = ktime_get_ns(); |
|---|
| 657 | | - |
|---|
| 658 | | - skb = rb_to_skb(p); |
|---|
| 659 | 693 | |
|---|
| 660 | 694 | /* if more time remaining? */ |
|---|
| 661 | 695 | time_to_send = netem_skb_cb(skb)->time_to_send; |
|---|
| 662 | 696 | if (q->slot.slot_next && q->slot.slot_next < time_to_send) |
|---|
| 663 | 697 | get_slot_next(q, now); |
|---|
| 664 | 698 | |
|---|
| 665 | | - if (time_to_send <= now && q->slot.slot_next <= now) { |
|---|
| 666 | | - rb_erase(p, &q->t_root); |
|---|
| 699 | + if (time_to_send <= now && q->slot.slot_next <= now) { |
|---|
| 700 | + netem_erase_head(q, skb); |
|---|
| 667 | 701 | sch->q.qlen--; |
|---|
| 668 | 702 | qdisc_qstats_backlog_dec(sch, skb); |
|---|
| 669 | 703 | skb->next = NULL; |
|---|
| .. | .. |
|---|
| 672 | 706 | * we need to restore its value. |
|---|
| 673 | 707 | */ |
|---|
| 674 | 708 | skb->dev = qdisc_dev(sch); |
|---|
| 675 | | - |
|---|
| 676 | | -#ifdef CONFIG_NET_CLS_ACT |
|---|
| 677 | | - /* |
|---|
| 678 | | - * If it's at ingress let's pretend the delay is |
|---|
| 679 | | - * from the network (tstamp will be updated). |
|---|
| 680 | | - */ |
|---|
| 681 | | - if (skb->tc_redirected && skb->tc_from_ingress) |
|---|
| 682 | | - skb->tstamp = 0; |
|---|
| 683 | | -#endif |
|---|
| 684 | 709 | |
|---|
| 685 | 710 | if (q->slot.slot_next) { |
|---|
| 686 | 711 | q->slot.packets_left--; |
|---|
| .. | .. |
|---|
| 748 | 773 | * signed 16 bit values. |
|---|
| 749 | 774 | */ |
|---|
| 750 | 775 | |
|---|
| 751 | | -static int get_dist_table(struct Qdisc *sch, struct disttable **tbl, |
|---|
| 752 | | - const struct nlattr *attr) |
|---|
| 776 | +static int get_dist_table(struct disttable **tbl, const struct nlattr *attr) |
|---|
| 753 | 777 | { |
|---|
| 754 | 778 | size_t n = nla_len(attr)/sizeof(__s16); |
|---|
| 755 | 779 | const __s16 *data = nla_data(attr); |
|---|
| 756 | | - spinlock_t *root_lock; |
|---|
| 757 | 780 | struct disttable *d; |
|---|
| 758 | 781 | int i; |
|---|
| 759 | 782 | |
|---|
| .. | .. |
|---|
| 768 | 791 | for (i = 0; i < n; i++) |
|---|
| 769 | 792 | d->table[i] = data[i]; |
|---|
| 770 | 793 | |
|---|
| 771 | | - root_lock = qdisc_root_sleeping_lock(sch); |
|---|
| 772 | | - |
|---|
| 773 | | - spin_lock_bh(root_lock); |
|---|
| 774 | | - swap(*tbl, d); |
|---|
| 775 | | - spin_unlock_bh(root_lock); |
|---|
| 776 | | - |
|---|
| 777 | | - dist_free(d); |
|---|
| 794 | + *tbl = d; |
|---|
| 778 | 795 | return 0; |
|---|
| 779 | 796 | } |
|---|
| 780 | 797 | |
|---|
| .. | .. |
|---|
| 917 | 934 | } |
|---|
| 918 | 935 | |
|---|
| 919 | 936 | if (nested_len >= nla_attr_size(0)) |
|---|
| 920 | | - return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len), |
|---|
| 921 | | - nested_len, policy, NULL); |
|---|
| 937 | + return nla_parse_deprecated(tb, maxtype, |
|---|
| 938 | + nla_data(nla) + NLA_ALIGN(len), |
|---|
| 939 | + nested_len, policy, NULL); |
|---|
| 922 | 940 | |
|---|
| 923 | 941 | memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); |
|---|
| 924 | 942 | return 0; |
|---|
| .. | .. |
|---|
| 930 | 948 | { |
|---|
| 931 | 949 | struct netem_sched_data *q = qdisc_priv(sch); |
|---|
| 932 | 950 | struct nlattr *tb[TCA_NETEM_MAX + 1]; |
|---|
| 951 | + struct disttable *delay_dist = NULL; |
|---|
| 952 | + struct disttable *slot_dist = NULL; |
|---|
| 933 | 953 | struct tc_netem_qopt *qopt; |
|---|
| 934 | 954 | struct clgstate old_clg; |
|---|
| 935 | 955 | int old_loss_model = CLG_RANDOM; |
|---|
| .. | .. |
|---|
| 943 | 963 | if (ret < 0) |
|---|
| 944 | 964 | return ret; |
|---|
| 945 | 965 | |
|---|
| 966 | + if (tb[TCA_NETEM_DELAY_DIST]) { |
|---|
| 967 | + ret = get_dist_table(&delay_dist, tb[TCA_NETEM_DELAY_DIST]); |
|---|
| 968 | + if (ret) |
|---|
| 969 | + goto table_free; |
|---|
| 970 | + } |
|---|
| 971 | + |
|---|
| 972 | + if (tb[TCA_NETEM_SLOT_DIST]) { |
|---|
| 973 | + ret = get_dist_table(&slot_dist, tb[TCA_NETEM_SLOT_DIST]); |
|---|
| 974 | + if (ret) |
|---|
| 975 | + goto table_free; |
|---|
| 976 | + } |
|---|
| 977 | + |
|---|
| 978 | + sch_tree_lock(sch); |
|---|
| 946 | 979 | /* backup q->clg and q->loss_model */ |
|---|
| 947 | 980 | old_clg = q->clg; |
|---|
| 948 | 981 | old_loss_model = q->loss_model; |
|---|
| .. | .. |
|---|
| 951 | 984 | ret = get_loss_clg(q, tb[TCA_NETEM_LOSS]); |
|---|
| 952 | 985 | if (ret) { |
|---|
| 953 | 986 | q->loss_model = old_loss_model; |
|---|
| 954 | | - return ret; |
|---|
| 987 | + q->clg = old_clg; |
|---|
| 988 | + goto unlock; |
|---|
| 955 | 989 | } |
|---|
| 956 | 990 | } else { |
|---|
| 957 | 991 | q->loss_model = CLG_RANDOM; |
|---|
| 958 | 992 | } |
|---|
| 959 | 993 | |
|---|
| 960 | | - if (tb[TCA_NETEM_DELAY_DIST]) { |
|---|
| 961 | | - ret = get_dist_table(sch, &q->delay_dist, |
|---|
| 962 | | - tb[TCA_NETEM_DELAY_DIST]); |
|---|
| 963 | | - if (ret) |
|---|
| 964 | | - goto get_table_failure; |
|---|
| 965 | | - } |
|---|
| 966 | | - |
|---|
| 967 | | - if (tb[TCA_NETEM_SLOT_DIST]) { |
|---|
| 968 | | - ret = get_dist_table(sch, &q->slot_dist, |
|---|
| 969 | | - tb[TCA_NETEM_SLOT_DIST]); |
|---|
| 970 | | - if (ret) |
|---|
| 971 | | - goto get_table_failure; |
|---|
| 972 | | - } |
|---|
| 973 | | - |
|---|
| 994 | + if (delay_dist) |
|---|
| 995 | + swap(q->delay_dist, delay_dist); |
|---|
| 996 | + if (slot_dist) |
|---|
| 997 | + swap(q->slot_dist, slot_dist); |
|---|
| 974 | 998 | sch->limit = qopt->limit; |
|---|
| 975 | 999 | |
|---|
| 976 | 1000 | q->latency = PSCHED_TICKS2NS(qopt->latency); |
|---|
| .. | .. |
|---|
| 1018 | 1042 | /* capping jitter to the range acceptable by tabledist() */ |
|---|
| 1019 | 1043 | q->jitter = min_t(s64, abs(q->jitter), INT_MAX); |
|---|
| 1020 | 1044 | |
|---|
| 1021 | | - return ret; |
|---|
| 1045 | +unlock: |
|---|
| 1046 | + sch_tree_unlock(sch); |
|---|
| 1022 | 1047 | |
|---|
| 1023 | | -get_table_failure: |
|---|
| 1024 | | - /* recover clg and loss_model, in case of |
|---|
| 1025 | | - * q->clg and q->loss_model were modified |
|---|
| 1026 | | - * in get_loss_clg() |
|---|
| 1027 | | - */ |
|---|
| 1028 | | - q->clg = old_clg; |
|---|
| 1029 | | - q->loss_model = old_loss_model; |
|---|
| 1048 | +table_free: |
|---|
| 1049 | + dist_free(delay_dist); |
|---|
| 1050 | + dist_free(slot_dist); |
|---|
| 1030 | 1051 | return ret; |
|---|
| 1031 | 1052 | } |
|---|
| 1032 | 1053 | |
|---|
| .. | .. |
|---|
| 1064 | 1085 | { |
|---|
| 1065 | 1086 | struct nlattr *nest; |
|---|
| 1066 | 1087 | |
|---|
| 1067 | | - nest = nla_nest_start(skb, TCA_NETEM_LOSS); |
|---|
| 1088 | + nest = nla_nest_start_noflag(skb, TCA_NETEM_LOSS); |
|---|
| 1068 | 1089 | if (nest == NULL) |
|---|
| 1069 | 1090 | goto nla_put_failure; |
|---|
| 1070 | 1091 | |
|---|
| .. | .. |
|---|
| 1120 | 1141 | struct tc_netem_rate rate; |
|---|
| 1121 | 1142 | struct tc_netem_slot slot; |
|---|
| 1122 | 1143 | |
|---|
| 1123 | | - qopt.latency = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->latency), |
|---|
| 1144 | + qopt.latency = min_t(psched_time_t, PSCHED_NS2TICKS(q->latency), |
|---|
| 1124 | 1145 | UINT_MAX); |
|---|
| 1125 | | - qopt.jitter = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->jitter), |
|---|
| 1146 | + qopt.jitter = min_t(psched_time_t, PSCHED_NS2TICKS(q->jitter), |
|---|
| 1126 | 1147 | UINT_MAX); |
|---|
| 1127 | 1148 | qopt.limit = q->limit; |
|---|
| 1128 | 1149 | qopt.loss = q->loss; |
|---|