| .. | .. |
|---|
| 12 | 12 | #include <linux/blktrace_api.h> |
|---|
| 13 | 13 | #include <linux/blk-cgroup.h> |
|---|
| 14 | 14 | #include "blk.h" |
|---|
| 15 | +#include "blk-cgroup-rwstat.h" |
|---|
| 15 | 16 | |
|---|
| 16 | 17 | /* Max dispatch from a group in 1 round */ |
|---|
| 17 | | -static int throtl_grp_quantum = 8; |
|---|
| 18 | +#define THROTL_GRP_QUANTUM 8 |
|---|
| 18 | 19 | |
|---|
| 19 | 20 | /* Total max dispatch from all groups in one round */ |
|---|
| 20 | | -static int throtl_quantum = 32; |
|---|
| 21 | +#define THROTL_QUANTUM 32 |
|---|
| 21 | 22 | |
|---|
| 22 | 23 | /* Throttling is performed over a slice and after that slice is renewed */ |
|---|
| 23 | 24 | #define DFL_THROTL_SLICE_HD (HZ / 10) |
|---|
| .. | .. |
|---|
| 84 | 85 | * RB tree of active children throtl_grp's, which are sorted by |
|---|
| 85 | 86 | * their ->disptime. |
|---|
| 86 | 87 | */ |
|---|
| 87 | | - struct rb_root pending_tree; /* RB tree of active tgs */ |
|---|
| 88 | | - struct rb_node *first_pending; /* first node in the tree */ |
|---|
| 88 | + struct rb_root_cached pending_tree; /* RB tree of active tgs */ |
|---|
| 89 | 89 | unsigned int nr_pending; /* # queued in the tree */ |
|---|
| 90 | 90 | unsigned long first_pending_disptime; /* disptime of the first tg */ |
|---|
| 91 | 91 | struct timer_list pending_timer; /* fires on first_pending_disptime */ |
|---|
| .. | .. |
|---|
| 150 | 150 | /* user configured IOPS limits */ |
|---|
| 151 | 151 | unsigned int iops_conf[2][LIMIT_CNT]; |
|---|
| 152 | 152 | |
|---|
| 153 | | - /* Number of bytes disptached in current slice */ |
|---|
| 153 | + /* Number of bytes dispatched in current slice */ |
|---|
| 154 | 154 | uint64_t bytes_disp[2]; |
|---|
| 155 | 155 | /* Number of bio's dispatched in current slice */ |
|---|
| 156 | 156 | unsigned int io_disp[2]; |
|---|
| .. | .. |
|---|
| 177 | 177 | unsigned int bio_cnt; /* total bios */ |
|---|
| 178 | 178 | unsigned int bad_bio_cnt; /* bios exceeding latency threshold */ |
|---|
| 179 | 179 | unsigned long bio_cnt_reset_time; |
|---|
| 180 | + |
|---|
| 181 | + atomic_t io_split_cnt[2]; |
|---|
| 182 | + atomic_t last_io_split_cnt[2]; |
|---|
| 183 | + |
|---|
| 184 | + struct blkg_rwstat stat_bytes; |
|---|
| 185 | + struct blkg_rwstat stat_ios; |
|---|
| 180 | 186 | }; |
|---|
| 181 | 187 | |
|---|
| 182 | 188 | /* We measure latency for request size from <= 4k to >= 1M */ |
|---|
| .. | .. |
|---|
| 420 | 426 | */ |
|---|
| 421 | 427 | static struct bio *throtl_peek_queued(struct list_head *queued) |
|---|
| 422 | 428 | { |
|---|
| 423 | | - struct throtl_qnode *qn = list_first_entry(queued, struct throtl_qnode, node); |
|---|
| 429 | + struct throtl_qnode *qn; |
|---|
| 424 | 430 | struct bio *bio; |
|---|
| 425 | 431 | |
|---|
| 426 | 432 | if (list_empty(queued)) |
|---|
| 427 | 433 | return NULL; |
|---|
| 428 | 434 | |
|---|
| 435 | + qn = list_first_entry(queued, struct throtl_qnode, node); |
|---|
| 429 | 436 | bio = bio_list_peek(&qn->bios); |
|---|
| 430 | 437 | WARN_ON_ONCE(!bio); |
|---|
| 431 | 438 | return bio; |
|---|
| .. | .. |
|---|
| 448 | 455 | static struct bio *throtl_pop_queued(struct list_head *queued, |
|---|
| 449 | 456 | struct throtl_grp **tg_to_put) |
|---|
| 450 | 457 | { |
|---|
| 451 | | - struct throtl_qnode *qn = list_first_entry(queued, struct throtl_qnode, node); |
|---|
| 458 | + struct throtl_qnode *qn; |
|---|
| 452 | 459 | struct bio *bio; |
|---|
| 453 | 460 | |
|---|
| 454 | 461 | if (list_empty(queued)) |
|---|
| 455 | 462 | return NULL; |
|---|
| 456 | 463 | |
|---|
| 464 | + qn = list_first_entry(queued, struct throtl_qnode, node); |
|---|
| 457 | 465 | bio = bio_list_pop(&qn->bios); |
|---|
| 458 | 466 | WARN_ON_ONCE(!bio); |
|---|
| 459 | 467 | |
|---|
| .. | .. |
|---|
| 475 | 483 | { |
|---|
| 476 | 484 | INIT_LIST_HEAD(&sq->queued[0]); |
|---|
| 477 | 485 | INIT_LIST_HEAD(&sq->queued[1]); |
|---|
| 478 | | - sq->pending_tree = RB_ROOT; |
|---|
| 486 | + sq->pending_tree = RB_ROOT_CACHED; |
|---|
| 479 | 487 | timer_setup(&sq->pending_timer, throtl_pending_timer_fn, 0); |
|---|
| 480 | 488 | } |
|---|
| 481 | 489 | |
|---|
| 482 | | -static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node) |
|---|
| 490 | +static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, |
|---|
| 491 | + struct request_queue *q, |
|---|
| 492 | + struct blkcg *blkcg) |
|---|
| 483 | 493 | { |
|---|
| 484 | 494 | struct throtl_grp *tg; |
|---|
| 485 | 495 | int rw; |
|---|
| 486 | 496 | |
|---|
| 487 | | - tg = kzalloc_node(sizeof(*tg), gfp, node); |
|---|
| 497 | + tg = kzalloc_node(sizeof(*tg), gfp, q->node); |
|---|
| 488 | 498 | if (!tg) |
|---|
| 489 | 499 | return NULL; |
|---|
| 500 | + |
|---|
| 501 | + if (blkg_rwstat_init(&tg->stat_bytes, gfp)) |
|---|
| 502 | + goto err_free_tg; |
|---|
| 503 | + |
|---|
| 504 | + if (blkg_rwstat_init(&tg->stat_ios, gfp)) |
|---|
| 505 | + goto err_exit_stat_bytes; |
|---|
| 490 | 506 | |
|---|
| 491 | 507 | throtl_service_queue_init(&tg->service_queue); |
|---|
| 492 | 508 | |
|---|
| .. | .. |
|---|
| 512 | 528 | tg->idletime_threshold_conf = DFL_IDLE_THRESHOLD; |
|---|
| 513 | 529 | |
|---|
| 514 | 530 | return &tg->pd; |
|---|
| 531 | + |
|---|
| 532 | +err_exit_stat_bytes: |
|---|
| 533 | + blkg_rwstat_exit(&tg->stat_bytes); |
|---|
| 534 | +err_free_tg: |
|---|
| 535 | + kfree(tg); |
|---|
| 536 | + return NULL; |
|---|
| 515 | 537 | } |
|---|
| 516 | 538 | |
|---|
| 517 | 539 | static void throtl_pd_init(struct blkg_policy_data *pd) |
|---|
| .. | .. |
|---|
| 610 | 632 | struct throtl_grp *tg = pd_to_tg(pd); |
|---|
| 611 | 633 | |
|---|
| 612 | 634 | del_timer_sync(&tg->service_queue.pending_timer); |
|---|
| 635 | + blkg_rwstat_exit(&tg->stat_bytes); |
|---|
| 636 | + blkg_rwstat_exit(&tg->stat_ios); |
|---|
| 613 | 637 | kfree(tg); |
|---|
| 614 | 638 | } |
|---|
| 615 | 639 | |
|---|
| 616 | 640 | static struct throtl_grp * |
|---|
| 617 | 641 | throtl_rb_first(struct throtl_service_queue *parent_sq) |
|---|
| 618 | 642 | { |
|---|
| 619 | | - /* Service tree is empty */ |
|---|
| 620 | | - if (!parent_sq->nr_pending) |
|---|
| 643 | + struct rb_node *n; |
|---|
| 644 | + |
|---|
| 645 | + n = rb_first_cached(&parent_sq->pending_tree); |
|---|
| 646 | + WARN_ON_ONCE(!n); |
|---|
| 647 | + if (!n) |
|---|
| 621 | 648 | return NULL; |
|---|
| 622 | | - |
|---|
| 623 | | - if (!parent_sq->first_pending) |
|---|
| 624 | | - parent_sq->first_pending = rb_first(&parent_sq->pending_tree); |
|---|
| 625 | | - |
|---|
| 626 | | - if (parent_sq->first_pending) |
|---|
| 627 | | - return rb_entry_tg(parent_sq->first_pending); |
|---|
| 628 | | - |
|---|
| 629 | | - return NULL; |
|---|
| 630 | | -} |
|---|
| 631 | | - |
|---|
| 632 | | -static void rb_erase_init(struct rb_node *n, struct rb_root *root) |
|---|
| 633 | | -{ |
|---|
| 634 | | - rb_erase(n, root); |
|---|
| 635 | | - RB_CLEAR_NODE(n); |
|---|
| 649 | + return rb_entry_tg(n); |
|---|
| 636 | 650 | } |
|---|
| 637 | 651 | |
|---|
| 638 | 652 | static void throtl_rb_erase(struct rb_node *n, |
|---|
| 639 | 653 | struct throtl_service_queue *parent_sq) |
|---|
| 640 | 654 | { |
|---|
| 641 | | - if (parent_sq->first_pending == n) |
|---|
| 642 | | - parent_sq->first_pending = NULL; |
|---|
| 643 | | - rb_erase_init(n, &parent_sq->pending_tree); |
|---|
| 655 | + rb_erase_cached(n, &parent_sq->pending_tree); |
|---|
| 656 | + RB_CLEAR_NODE(n); |
|---|
| 644 | 657 | --parent_sq->nr_pending; |
|---|
| 645 | 658 | } |
|---|
| 646 | 659 | |
|---|
| .. | .. |
|---|
| 658 | 671 | static void tg_service_queue_add(struct throtl_grp *tg) |
|---|
| 659 | 672 | { |
|---|
| 660 | 673 | struct throtl_service_queue *parent_sq = tg->service_queue.parent_sq; |
|---|
| 661 | | - struct rb_node **node = &parent_sq->pending_tree.rb_node; |
|---|
| 674 | + struct rb_node **node = &parent_sq->pending_tree.rb_root.rb_node; |
|---|
| 662 | 675 | struct rb_node *parent = NULL; |
|---|
| 663 | 676 | struct throtl_grp *__tg; |
|---|
| 664 | 677 | unsigned long key = tg->disptime; |
|---|
| 665 | | - int left = 1; |
|---|
| 678 | + bool leftmost = true; |
|---|
| 666 | 679 | |
|---|
| 667 | 680 | while (*node != NULL) { |
|---|
| 668 | 681 | parent = *node; |
|---|
| .. | .. |
|---|
| 672 | 685 | node = &parent->rb_left; |
|---|
| 673 | 686 | else { |
|---|
| 674 | 687 | node = &parent->rb_right; |
|---|
| 675 | | - left = 0; |
|---|
| 688 | + leftmost = false; |
|---|
| 676 | 689 | } |
|---|
| 677 | 690 | } |
|---|
| 678 | 691 | |
|---|
| 679 | | - if (left) |
|---|
| 680 | | - parent_sq->first_pending = &tg->rb_node; |
|---|
| 681 | | - |
|---|
| 682 | 692 | rb_link_node(&tg->rb_node, parent, node); |
|---|
| 683 | | - rb_insert_color(&tg->rb_node, &parent_sq->pending_tree); |
|---|
| 684 | | -} |
|---|
| 685 | | - |
|---|
| 686 | | -static void __throtl_enqueue_tg(struct throtl_grp *tg) |
|---|
| 687 | | -{ |
|---|
| 688 | | - tg_service_queue_add(tg); |
|---|
| 689 | | - tg->flags |= THROTL_TG_PENDING; |
|---|
| 690 | | - tg->service_queue.parent_sq->nr_pending++; |
|---|
| 693 | + rb_insert_color_cached(&tg->rb_node, &parent_sq->pending_tree, |
|---|
| 694 | + leftmost); |
|---|
| 691 | 695 | } |
|---|
| 692 | 696 | |
|---|
| 693 | 697 | static void throtl_enqueue_tg(struct throtl_grp *tg) |
|---|
| 694 | 698 | { |
|---|
| 695 | | - if (!(tg->flags & THROTL_TG_PENDING)) |
|---|
| 696 | | - __throtl_enqueue_tg(tg); |
|---|
| 697 | | -} |
|---|
| 698 | | - |
|---|
| 699 | | -static void __throtl_dequeue_tg(struct throtl_grp *tg) |
|---|
| 700 | | -{ |
|---|
| 701 | | - throtl_rb_erase(&tg->rb_node, tg->service_queue.parent_sq); |
|---|
| 702 | | - tg->flags &= ~THROTL_TG_PENDING; |
|---|
| 699 | + if (!(tg->flags & THROTL_TG_PENDING)) { |
|---|
| 700 | + tg_service_queue_add(tg); |
|---|
| 701 | + tg->flags |= THROTL_TG_PENDING; |
|---|
| 702 | + tg->service_queue.parent_sq->nr_pending++; |
|---|
| 703 | + } |
|---|
| 703 | 704 | } |
|---|
| 704 | 705 | |
|---|
| 705 | 706 | static void throtl_dequeue_tg(struct throtl_grp *tg) |
|---|
| 706 | 707 | { |
|---|
| 707 | | - if (tg->flags & THROTL_TG_PENDING) |
|---|
| 708 | | - __throtl_dequeue_tg(tg); |
|---|
| 708 | + if (tg->flags & THROTL_TG_PENDING) { |
|---|
| 709 | + throtl_rb_erase(&tg->rb_node, tg->service_queue.parent_sq); |
|---|
| 710 | + tg->flags &= ~THROTL_TG_PENDING; |
|---|
| 711 | + } |
|---|
| 709 | 712 | } |
|---|
| 710 | 713 | |
|---|
| 711 | 714 | /* Call with queue lock held */ |
|---|
| .. | .. |
|---|
| 771 | 774 | tg->bytes_disp[rw] = 0; |
|---|
| 772 | 775 | tg->io_disp[rw] = 0; |
|---|
| 773 | 776 | |
|---|
| 777 | + atomic_set(&tg->io_split_cnt[rw], 0); |
|---|
| 778 | + |
|---|
| 774 | 779 | /* |
|---|
| 775 | 780 | * Previous slice has expired. We must have trimmed it after last |
|---|
| 776 | 781 | * bio dispatch. That means since start of last slice, we never used |
|---|
| .. | .. |
|---|
| 793 | 798 | tg->io_disp[rw] = 0; |
|---|
| 794 | 799 | tg->slice_start[rw] = jiffies; |
|---|
| 795 | 800 | tg->slice_end[rw] = jiffies + tg->td->throtl_slice; |
|---|
| 801 | + |
|---|
| 802 | + atomic_set(&tg->io_split_cnt[rw], 0); |
|---|
| 803 | + |
|---|
| 796 | 804 | throtl_log(&tg->service_queue, |
|---|
| 797 | 805 | "[%c] new slice start=%lu end=%lu jiffies=%lu", |
|---|
| 798 | 806 | rw == READ ? 'R' : 'W', tg->slice_start[rw], |
|---|
| .. | .. |
|---|
| 808 | 816 | static inline void throtl_extend_slice(struct throtl_grp *tg, bool rw, |
|---|
| 809 | 817 | unsigned long jiffy_end) |
|---|
| 810 | 818 | { |
|---|
| 811 | | - tg->slice_end[rw] = roundup(jiffy_end, tg->td->throtl_slice); |
|---|
| 819 | + throtl_set_slice_end(tg, rw, jiffy_end); |
|---|
| 812 | 820 | throtl_log(&tg->service_queue, |
|---|
| 813 | 821 | "[%c] extend slice start=%lu end=%lu jiffies=%lu", |
|---|
| 814 | 822 | rw == READ ? 'R' : 'W', tg->slice_start[rw], |
|---|
| .. | .. |
|---|
| 843 | 851 | /* |
|---|
| 844 | 852 | * A bio has been dispatched. Also adjust slice_end. It might happen |
|---|
| 845 | 853 | * that initially cgroup limit was very low resulting in high |
|---|
| 846 | | - * slice_end, but later limit was bumped up and bio was dispached |
|---|
| 854 | + * slice_end, but later limit was bumped up and bio was dispatched |
|---|
| 847 | 855 | * sooner, then we need to reduce slice_end. A high bogus slice_end |
|---|
| 848 | 856 | * is bad because it does not allow new slice to start. |
|---|
| 849 | 857 | */ |
|---|
| .. | .. |
|---|
| 885 | 893 | } |
|---|
| 886 | 894 | |
|---|
| 887 | 895 | static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio, |
|---|
| 888 | | - unsigned long *wait) |
|---|
| 896 | + u32 iops_limit, unsigned long *wait) |
|---|
| 889 | 897 | { |
|---|
| 890 | 898 | bool rw = bio_data_dir(bio); |
|---|
| 891 | 899 | unsigned int io_allowed; |
|---|
| 892 | 900 | unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd; |
|---|
| 893 | 901 | u64 tmp; |
|---|
| 902 | + |
|---|
| 903 | + if (iops_limit == UINT_MAX) { |
|---|
| 904 | + if (wait) |
|---|
| 905 | + *wait = 0; |
|---|
| 906 | + return true; |
|---|
| 907 | + } |
|---|
| 894 | 908 | |
|---|
| 895 | 909 | jiffy_elapsed = jiffies - tg->slice_start[rw]; |
|---|
| 896 | 910 | |
|---|
| .. | .. |
|---|
| 904 | 918 | * have been trimmed. |
|---|
| 905 | 919 | */ |
|---|
| 906 | 920 | |
|---|
| 907 | | - tmp = (u64)tg_iops_limit(tg, rw) * jiffy_elapsed_rnd; |
|---|
| 921 | + tmp = (u64)iops_limit * jiffy_elapsed_rnd; |
|---|
| 908 | 922 | do_div(tmp, HZ); |
|---|
| 909 | 923 | |
|---|
| 910 | 924 | if (tmp > UINT_MAX) |
|---|
| .. | .. |
|---|
| 927 | 941 | } |
|---|
| 928 | 942 | |
|---|
| 929 | 943 | static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio, |
|---|
| 930 | | - unsigned long *wait) |
|---|
| 944 | + u64 bps_limit, unsigned long *wait) |
|---|
| 931 | 945 | { |
|---|
| 932 | 946 | bool rw = bio_data_dir(bio); |
|---|
| 933 | | - u64 bytes_allowed, extra_bytes, tmp; |
|---|
| 947 | + u64 bytes_allowed, extra_bytes; |
|---|
| 934 | 948 | unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd; |
|---|
| 935 | 949 | unsigned int bio_size = throtl_bio_data_size(bio); |
|---|
| 950 | + |
|---|
| 951 | + if (bps_limit == U64_MAX) { |
|---|
| 952 | + if (wait) |
|---|
| 953 | + *wait = 0; |
|---|
| 954 | + return true; |
|---|
| 955 | + } |
|---|
| 936 | 956 | |
|---|
| 937 | 957 | jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw]; |
|---|
| 938 | 958 | |
|---|
| .. | .. |
|---|
| 941 | 961 | jiffy_elapsed_rnd = tg->td->throtl_slice; |
|---|
| 942 | 962 | |
|---|
| 943 | 963 | jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, tg->td->throtl_slice); |
|---|
| 944 | | - |
|---|
| 945 | | - tmp = tg_bps_limit(tg, rw) * jiffy_elapsed_rnd; |
|---|
| 946 | | - do_div(tmp, HZ); |
|---|
| 947 | | - bytes_allowed = tmp; |
|---|
| 964 | + bytes_allowed = mul_u64_u64_div_u64(bps_limit, (u64)jiffy_elapsed_rnd, |
|---|
| 965 | + (u64)HZ); |
|---|
| 948 | 966 | |
|---|
| 949 | 967 | if (tg->bytes_disp[rw] + bio_size <= bytes_allowed) { |
|---|
| 950 | 968 | if (wait) |
|---|
| .. | .. |
|---|
| 954 | 972 | |
|---|
| 955 | 973 | /* Calc approx time to dispatch */ |
|---|
| 956 | 974 | extra_bytes = tg->bytes_disp[rw] + bio_size - bytes_allowed; |
|---|
| 957 | | - jiffy_wait = div64_u64(extra_bytes * HZ, tg_bps_limit(tg, rw)); |
|---|
| 975 | + jiffy_wait = div64_u64(extra_bytes * HZ, bps_limit); |
|---|
| 958 | 976 | |
|---|
| 959 | 977 | if (!jiffy_wait) |
|---|
| 960 | 978 | jiffy_wait = 1; |
|---|
| .. | .. |
|---|
| 978 | 996 | { |
|---|
| 979 | 997 | bool rw = bio_data_dir(bio); |
|---|
| 980 | 998 | unsigned long bps_wait = 0, iops_wait = 0, max_wait = 0; |
|---|
| 999 | + u64 bps_limit = tg_bps_limit(tg, rw); |
|---|
| 1000 | + u32 iops_limit = tg_iops_limit(tg, rw); |
|---|
| 981 | 1001 | |
|---|
| 982 | 1002 | /* |
|---|
| 983 | 1003 | * Currently whole state machine of group depends on first bio |
|---|
| .. | .. |
|---|
| 989 | 1009 | bio != throtl_peek_queued(&tg->service_queue.queued[rw])); |
|---|
| 990 | 1010 | |
|---|
| 991 | 1011 | /* If tg->bps = -1, then BW is unlimited */ |
|---|
| 992 | | - if (tg_bps_limit(tg, rw) == U64_MAX && |
|---|
| 993 | | - tg_iops_limit(tg, rw) == UINT_MAX) { |
|---|
| 1012 | + if (bps_limit == U64_MAX && iops_limit == UINT_MAX) { |
|---|
| 994 | 1013 | if (wait) |
|---|
| 995 | 1014 | *wait = 0; |
|---|
| 996 | 1015 | return true; |
|---|
| .. | .. |
|---|
| 1012 | 1031 | jiffies + tg->td->throtl_slice); |
|---|
| 1013 | 1032 | } |
|---|
| 1014 | 1033 | |
|---|
| 1015 | | - if (tg_with_in_bps_limit(tg, bio, &bps_wait) && |
|---|
| 1016 | | - tg_with_in_iops_limit(tg, bio, &iops_wait)) { |
|---|
| 1034 | + if (iops_limit != UINT_MAX) |
|---|
| 1035 | + tg->io_disp[rw] += atomic_xchg(&tg->io_split_cnt[rw], 0); |
|---|
| 1036 | + |
|---|
| 1037 | + if (tg_with_in_bps_limit(tg, bio, bps_limit, &bps_wait) && |
|---|
| 1038 | + tg_with_in_iops_limit(tg, bio, iops_limit, &iops_wait)) { |
|---|
| 1017 | 1039 | if (wait) |
|---|
| 1018 | 1040 | *wait = 0; |
|---|
| 1019 | 1041 | return true; |
|---|
| .. | .. |
|---|
| 1073 | 1095 | * If @tg doesn't currently have any bios queued in the same |
|---|
| 1074 | 1096 | * direction, queueing @bio can change when @tg should be |
|---|
| 1075 | 1097 | * dispatched. Mark that @tg was empty. This is automatically |
|---|
| 1076 | | - * cleaered on the next tg_update_disptime(). |
|---|
| 1098 | + * cleared on the next tg_update_disptime(). |
|---|
| 1077 | 1099 | */ |
|---|
| 1078 | 1100 | if (!sq->nr_queued[rw]) |
|---|
| 1079 | 1101 | tg->flags |= THROTL_TG_WAS_EMPTY; |
|---|
| .. | .. |
|---|
| 1166 | 1188 | { |
|---|
| 1167 | 1189 | struct throtl_service_queue *sq = &tg->service_queue; |
|---|
| 1168 | 1190 | unsigned int nr_reads = 0, nr_writes = 0; |
|---|
| 1169 | | - unsigned int max_nr_reads = throtl_grp_quantum*3/4; |
|---|
| 1170 | | - unsigned int max_nr_writes = throtl_grp_quantum - max_nr_reads; |
|---|
| 1191 | + unsigned int max_nr_reads = THROTL_GRP_QUANTUM * 3 / 4; |
|---|
| 1192 | + unsigned int max_nr_writes = THROTL_GRP_QUANTUM - max_nr_reads; |
|---|
| 1171 | 1193 | struct bio *bio; |
|---|
| 1172 | 1194 | |
|---|
| 1173 | 1195 | /* Try to dispatch 75% READS and 25% WRITES */ |
|---|
| .. | .. |
|---|
| 1200 | 1222 | unsigned int nr_disp = 0; |
|---|
| 1201 | 1223 | |
|---|
| 1202 | 1224 | while (1) { |
|---|
| 1203 | | - struct throtl_grp *tg = throtl_rb_first(parent_sq); |
|---|
| 1225 | + struct throtl_grp *tg; |
|---|
| 1204 | 1226 | struct throtl_service_queue *sq; |
|---|
| 1205 | 1227 | |
|---|
| 1228 | + if (!parent_sq->nr_pending) |
|---|
| 1229 | + break; |
|---|
| 1230 | + |
|---|
| 1231 | + tg = throtl_rb_first(parent_sq); |
|---|
| 1206 | 1232 | if (!tg) |
|---|
| 1207 | 1233 | break; |
|---|
| 1208 | 1234 | |
|---|
| .. | .. |
|---|
| 1217 | 1243 | if (sq->nr_queued[0] || sq->nr_queued[1]) |
|---|
| 1218 | 1244 | tg_update_disptime(tg); |
|---|
| 1219 | 1245 | |
|---|
| 1220 | | - if (nr_disp >= throtl_quantum) |
|---|
| 1246 | + if (nr_disp >= THROTL_QUANTUM) |
|---|
| 1221 | 1247 | break; |
|---|
| 1222 | 1248 | } |
|---|
| 1223 | 1249 | |
|---|
| .. | .. |
|---|
| 1228 | 1254 | struct throtl_grp *this_tg); |
|---|
| 1229 | 1255 | /** |
|---|
| 1230 | 1256 | * throtl_pending_timer_fn - timer function for service_queue->pending_timer |
|---|
| 1231 | | - * @arg: the throtl_service_queue being serviced |
|---|
| 1257 | + * @t: the pending_timer member of the throtl_service_queue being serviced |
|---|
| 1232 | 1258 | * |
|---|
| 1233 | 1259 | * This timer is armed when a child throtl_grp with active bio's become |
|---|
| 1234 | 1260 | * pending and queued on the service_queue's pending_tree and expires when |
|---|
| .. | .. |
|---|
| 1251 | 1277 | bool dispatched; |
|---|
| 1252 | 1278 | int ret; |
|---|
| 1253 | 1279 | |
|---|
| 1254 | | - spin_lock_irq(q->queue_lock); |
|---|
| 1280 | + spin_lock_irq(&q->queue_lock); |
|---|
| 1255 | 1281 | if (throtl_can_upgrade(td, NULL)) |
|---|
| 1256 | 1282 | throtl_upgrade_state(td); |
|---|
| 1257 | 1283 | |
|---|
| .. | .. |
|---|
| 1274 | 1300 | break; |
|---|
| 1275 | 1301 | |
|---|
| 1276 | 1302 | /* this dispatch windows is still open, relax and repeat */ |
|---|
| 1277 | | - spin_unlock_irq(q->queue_lock); |
|---|
| 1303 | + spin_unlock_irq(&q->queue_lock); |
|---|
| 1278 | 1304 | cpu_relax(); |
|---|
| 1279 | | - spin_lock_irq(q->queue_lock); |
|---|
| 1305 | + spin_lock_irq(&q->queue_lock); |
|---|
| 1280 | 1306 | } |
|---|
| 1281 | 1307 | |
|---|
| 1282 | 1308 | if (!dispatched) |
|---|
| .. | .. |
|---|
| 1294 | 1320 | } |
|---|
| 1295 | 1321 | } |
|---|
| 1296 | 1322 | } else { |
|---|
| 1297 | | - /* reached the top-level, queue issueing */ |
|---|
| 1323 | + /* reached the top-level, queue issuing */ |
|---|
| 1298 | 1324 | queue_work(kthrotld_workqueue, &td->dispatch_work); |
|---|
| 1299 | 1325 | } |
|---|
| 1300 | 1326 | out_unlock: |
|---|
| 1301 | | - spin_unlock_irq(q->queue_lock); |
|---|
| 1327 | + spin_unlock_irq(&q->queue_lock); |
|---|
| 1302 | 1328 | } |
|---|
| 1303 | 1329 | |
|---|
| 1304 | 1330 | /** |
|---|
| 1305 | 1331 | * blk_throtl_dispatch_work_fn - work function for throtl_data->dispatch_work |
|---|
| 1306 | 1332 | * @work: work item being executed |
|---|
| 1307 | 1333 | * |
|---|
| 1308 | | - * This function is queued for execution when bio's reach the bio_lists[] |
|---|
| 1309 | | - * of throtl_data->service_queue. Those bio's are ready and issued by this |
|---|
| 1334 | + * This function is queued for execution when bios reach the bio_lists[] |
|---|
| 1335 | + * of throtl_data->service_queue. Those bios are ready and issued by this |
|---|
| 1310 | 1336 | * function. |
|---|
| 1311 | 1337 | */ |
|---|
| 1312 | 1338 | static void blk_throtl_dispatch_work_fn(struct work_struct *work) |
|---|
| .. | .. |
|---|
| 1322 | 1348 | |
|---|
| 1323 | 1349 | bio_list_init(&bio_list_on_stack); |
|---|
| 1324 | 1350 | |
|---|
| 1325 | | - spin_lock_irq(q->queue_lock); |
|---|
| 1351 | + spin_lock_irq(&q->queue_lock); |
|---|
| 1326 | 1352 | for (rw = READ; rw <= WRITE; rw++) |
|---|
| 1327 | 1353 | while ((bio = throtl_pop_queued(&td_sq->queued[rw], NULL))) |
|---|
| 1328 | 1354 | bio_list_add(&bio_list_on_stack, bio); |
|---|
| 1329 | | - spin_unlock_irq(q->queue_lock); |
|---|
| 1355 | + spin_unlock_irq(&q->queue_lock); |
|---|
| 1330 | 1356 | |
|---|
| 1331 | 1357 | if (!bio_list_empty(&bio_list_on_stack)) { |
|---|
| 1332 | 1358 | blk_start_plug(&plug); |
|---|
| 1333 | | - while((bio = bio_list_pop(&bio_list_on_stack))) |
|---|
| 1334 | | - generic_make_request(bio); |
|---|
| 1359 | + while ((bio = bio_list_pop(&bio_list_on_stack))) |
|---|
| 1360 | + submit_bio_noacct(bio); |
|---|
| 1335 | 1361 | blk_finish_plug(&plug); |
|---|
| 1336 | 1362 | } |
|---|
| 1337 | 1363 | } |
|---|
| .. | .. |
|---|
| 1419 | 1445 | * that a group's limit are dropped suddenly and we don't want to |
|---|
| 1420 | 1446 | * account recently dispatched IO with new low rate. |
|---|
| 1421 | 1447 | */ |
|---|
| 1422 | | - throtl_start_new_slice(tg, 0); |
|---|
| 1423 | | - throtl_start_new_slice(tg, 1); |
|---|
| 1448 | + throtl_start_new_slice(tg, READ); |
|---|
| 1449 | + throtl_start_new_slice(tg, WRITE); |
|---|
| 1424 | 1450 | |
|---|
| 1425 | 1451 | if (tg->flags & THROTL_TG_PENDING) { |
|---|
| 1426 | 1452 | tg_update_disptime(tg); |
|---|
| .. | .. |
|---|
| 1473 | 1499 | return tg_set_conf(of, buf, nbytes, off, false); |
|---|
| 1474 | 1500 | } |
|---|
| 1475 | 1501 | |
|---|
| 1502 | +static int tg_print_rwstat(struct seq_file *sf, void *v) |
|---|
| 1503 | +{ |
|---|
| 1504 | + blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), |
|---|
| 1505 | + blkg_prfill_rwstat, &blkcg_policy_throtl, |
|---|
| 1506 | + seq_cft(sf)->private, true); |
|---|
| 1507 | + return 0; |
|---|
| 1508 | +} |
|---|
| 1509 | + |
|---|
| 1510 | +static u64 tg_prfill_rwstat_recursive(struct seq_file *sf, |
|---|
| 1511 | + struct blkg_policy_data *pd, int off) |
|---|
| 1512 | +{ |
|---|
| 1513 | + struct blkg_rwstat_sample sum; |
|---|
| 1514 | + |
|---|
| 1515 | + blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_throtl, off, |
|---|
| 1516 | + &sum); |
|---|
| 1517 | + return __blkg_prfill_rwstat(sf, pd, &sum); |
|---|
| 1518 | +} |
|---|
| 1519 | + |
|---|
| 1520 | +static int tg_print_rwstat_recursive(struct seq_file *sf, void *v) |
|---|
| 1521 | +{ |
|---|
| 1522 | + blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), |
|---|
| 1523 | + tg_prfill_rwstat_recursive, &blkcg_policy_throtl, |
|---|
| 1524 | + seq_cft(sf)->private, true); |
|---|
| 1525 | + return 0; |
|---|
| 1526 | +} |
|---|
| 1527 | + |
|---|
| 1476 | 1528 | static struct cftype throtl_legacy_files[] = { |
|---|
| 1477 | 1529 | { |
|---|
| 1478 | 1530 | .name = "throttle.read_bps_device", |
|---|
| .. | .. |
|---|
| 1500 | 1552 | }, |
|---|
| 1501 | 1553 | { |
|---|
| 1502 | 1554 | .name = "throttle.io_service_bytes", |
|---|
| 1503 | | - .private = (unsigned long)&blkcg_policy_throtl, |
|---|
| 1504 | | - .seq_show = blkg_print_stat_bytes, |
|---|
| 1555 | + .private = offsetof(struct throtl_grp, stat_bytes), |
|---|
| 1556 | + .seq_show = tg_print_rwstat, |
|---|
| 1505 | 1557 | }, |
|---|
| 1506 | 1558 | { |
|---|
| 1507 | 1559 | .name = "throttle.io_service_bytes_recursive", |
|---|
| 1508 | | - .private = (unsigned long)&blkcg_policy_throtl, |
|---|
| 1509 | | - .seq_show = blkg_print_stat_bytes_recursive, |
|---|
| 1560 | + .private = offsetof(struct throtl_grp, stat_bytes), |
|---|
| 1561 | + .seq_show = tg_print_rwstat_recursive, |
|---|
| 1510 | 1562 | }, |
|---|
| 1511 | 1563 | { |
|---|
| 1512 | 1564 | .name = "throttle.io_serviced", |
|---|
| 1513 | | - .private = (unsigned long)&blkcg_policy_throtl, |
|---|
| 1514 | | - .seq_show = blkg_print_stat_ios, |
|---|
| 1565 | + .private = offsetof(struct throtl_grp, stat_ios), |
|---|
| 1566 | + .seq_show = tg_print_rwstat, |
|---|
| 1515 | 1567 | }, |
|---|
| 1516 | 1568 | { |
|---|
| 1517 | 1569 | .name = "throttle.io_serviced_recursive", |
|---|
| 1518 | | - .private = (unsigned long)&blkcg_policy_throtl, |
|---|
| 1519 | | - .seq_show = blkg_print_stat_ios_recursive, |
|---|
| 1570 | + .private = offsetof(struct throtl_grp, stat_ios), |
|---|
| 1571 | + .seq_show = tg_print_rwstat_recursive, |
|---|
| 1520 | 1572 | }, |
|---|
| 1521 | 1573 | { } /* terminate */ |
|---|
| 1522 | 1574 | }; |
|---|
| .. | .. |
|---|
| 1639 | 1691 | goto out_finish; |
|---|
| 1640 | 1692 | |
|---|
| 1641 | 1693 | ret = -EINVAL; |
|---|
| 1642 | | - if (!strcmp(tok, "rbps")) |
|---|
| 1694 | + if (!strcmp(tok, "rbps") && val > 1) |
|---|
| 1643 | 1695 | v[0] = val; |
|---|
| 1644 | | - else if (!strcmp(tok, "wbps")) |
|---|
| 1696 | + else if (!strcmp(tok, "wbps") && val > 1) |
|---|
| 1645 | 1697 | v[1] = val; |
|---|
| 1646 | | - else if (!strcmp(tok, "riops")) |
|---|
| 1698 | + else if (!strcmp(tok, "riops") && val > 1) |
|---|
| 1647 | 1699 | v[2] = min_t(u64, val, UINT_MAX); |
|---|
| 1648 | | - else if (!strcmp(tok, "wiops")) |
|---|
| 1700 | + else if (!strcmp(tok, "wiops") && val > 1) |
|---|
| 1649 | 1701 | v[3] = min_t(u64, val, UINT_MAX); |
|---|
| 1650 | 1702 | else if (off == LIMIT_LOW && !strcmp(tok, "idle")) |
|---|
| 1651 | 1703 | idle_time = val; |
|---|
| .. | .. |
|---|
| 1922 | 1974 | queue_work(kthrotld_workqueue, &td->dispatch_work); |
|---|
| 1923 | 1975 | } |
|---|
| 1924 | 1976 | |
|---|
| 1925 | | -static void throtl_downgrade_state(struct throtl_data *td, int new) |
|---|
| 1977 | +static void throtl_downgrade_state(struct throtl_data *td) |
|---|
| 1926 | 1978 | { |
|---|
| 1927 | 1979 | td->scale /= 2; |
|---|
| 1928 | 1980 | |
|---|
| .. | .. |
|---|
| 1932 | 1984 | return; |
|---|
| 1933 | 1985 | } |
|---|
| 1934 | 1986 | |
|---|
| 1935 | | - td->limit_index = new; |
|---|
| 1987 | + td->limit_index = LIMIT_LOW; |
|---|
| 1936 | 1988 | td->low_downgrade_time = jiffies; |
|---|
| 1937 | 1989 | } |
|---|
| 1938 | 1990 | |
|---|
| .. | .. |
|---|
| 2003 | 2055 | } |
|---|
| 2004 | 2056 | |
|---|
| 2005 | 2057 | if (tg->iops[READ][LIMIT_LOW]) { |
|---|
| 2058 | + tg->last_io_disp[READ] += atomic_xchg(&tg->last_io_split_cnt[READ], 0); |
|---|
| 2006 | 2059 | iops = tg->last_io_disp[READ] * HZ / elapsed_time; |
|---|
| 2007 | 2060 | if (iops >= tg->iops[READ][LIMIT_LOW]) |
|---|
| 2008 | 2061 | tg->last_low_overflow_time[READ] = now; |
|---|
| 2009 | 2062 | } |
|---|
| 2010 | 2063 | |
|---|
| 2011 | 2064 | if (tg->iops[WRITE][LIMIT_LOW]) { |
|---|
| 2065 | + tg->last_io_disp[WRITE] += atomic_xchg(&tg->last_io_split_cnt[WRITE], 0); |
|---|
| 2012 | 2066 | iops = tg->last_io_disp[WRITE] * HZ / elapsed_time; |
|---|
| 2013 | 2067 | if (iops >= tg->iops[WRITE][LIMIT_LOW]) |
|---|
| 2014 | 2068 | tg->last_low_overflow_time[WRITE] = now; |
|---|
| .. | .. |
|---|
| 2019 | 2073 | * cgroups |
|---|
| 2020 | 2074 | */ |
|---|
| 2021 | 2075 | if (throtl_hierarchy_can_downgrade(tg)) |
|---|
| 2022 | | - throtl_downgrade_state(tg->td, LIMIT_LOW); |
|---|
| 2076 | + throtl_downgrade_state(tg->td); |
|---|
| 2023 | 2077 | |
|---|
| 2024 | 2078 | tg->last_bytes_disp[READ] = 0; |
|---|
| 2025 | 2079 | tg->last_bytes_disp[WRITE] = 0; |
|---|
| .. | .. |
|---|
| 2029 | 2083 | |
|---|
| 2030 | 2084 | static void blk_throtl_update_idletime(struct throtl_grp *tg) |
|---|
| 2031 | 2085 | { |
|---|
| 2032 | | - unsigned long now = ktime_get_ns() >> 10; |
|---|
| 2086 | + unsigned long now; |
|---|
| 2033 | 2087 | unsigned long last_finish_time = tg->last_finish_time; |
|---|
| 2034 | 2088 | |
|---|
| 2035 | | - if (now <= last_finish_time || last_finish_time == 0 || |
|---|
| 2089 | + if (last_finish_time == 0) |
|---|
| 2090 | + return; |
|---|
| 2091 | + |
|---|
| 2092 | + now = ktime_get_ns() >> 10; |
|---|
| 2093 | + if (now <= last_finish_time || |
|---|
| 2036 | 2094 | last_finish_time == tg->checked_last_finish_time) |
|---|
| 2037 | 2095 | return; |
|---|
| 2038 | 2096 | |
|---|
| .. | .. |
|---|
| 2048 | 2106 | unsigned long last_latency[2] = { 0 }; |
|---|
| 2049 | 2107 | unsigned long latency[2]; |
|---|
| 2050 | 2108 | |
|---|
| 2051 | | - if (!blk_queue_nonrot(td->queue)) |
|---|
| 2109 | + if (!blk_queue_nonrot(td->queue) || !td->limit_valid[LIMIT_LOW]) |
|---|
| 2052 | 2110 | return; |
|---|
| 2053 | 2111 | if (time_before(jiffies, td->last_calculate_time + HZ)) |
|---|
| 2054 | 2112 | return; |
|---|
| .. | .. |
|---|
| 2123 | 2181 | } |
|---|
| 2124 | 2182 | #endif |
|---|
| 2125 | 2183 | |
|---|
| 2126 | | -static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio) |
|---|
| 2184 | +void blk_throtl_charge_bio_split(struct bio *bio) |
|---|
| 2127 | 2185 | { |
|---|
| 2128 | | -#ifdef CONFIG_BLK_DEV_THROTTLING_LOW |
|---|
| 2129 | | - /* fallback to root_blkg if we fail to get a blkg ref */ |
|---|
| 2130 | | - if (bio->bi_css && (bio_associate_blkg(bio, tg_to_blkg(tg)) == -ENODEV)) |
|---|
| 2131 | | - bio_associate_blkg(bio, bio->bi_disk->queue->root_blkg); |
|---|
| 2132 | | - bio_issue_init(&bio->bi_issue, bio_sectors(bio)); |
|---|
| 2133 | | -#endif |
|---|
| 2186 | + struct blkcg_gq *blkg = bio->bi_blkg; |
|---|
| 2187 | + struct throtl_grp *parent = blkg_to_tg(blkg); |
|---|
| 2188 | + struct throtl_service_queue *parent_sq; |
|---|
| 2189 | + bool rw = bio_data_dir(bio); |
|---|
| 2190 | + |
|---|
| 2191 | + do { |
|---|
| 2192 | + if (!parent->has_rules[rw]) |
|---|
| 2193 | + break; |
|---|
| 2194 | + |
|---|
| 2195 | + atomic_inc(&parent->io_split_cnt[rw]); |
|---|
| 2196 | + atomic_inc(&parent->last_io_split_cnt[rw]); |
|---|
| 2197 | + |
|---|
| 2198 | + parent_sq = parent->service_queue.parent_sq; |
|---|
| 2199 | + parent = sq_to_tg(parent_sq); |
|---|
| 2200 | + } while (parent); |
|---|
| 2134 | 2201 | } |
|---|
| 2135 | 2202 | |
|---|
| 2136 | | -bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, |
|---|
| 2137 | | - struct bio *bio) |
|---|
| 2203 | +bool blk_throtl_bio(struct bio *bio) |
|---|
| 2138 | 2204 | { |
|---|
| 2205 | + struct request_queue *q = bio->bi_disk->queue; |
|---|
| 2206 | + struct blkcg_gq *blkg = bio->bi_blkg; |
|---|
| 2139 | 2207 | struct throtl_qnode *qn = NULL; |
|---|
| 2140 | | - struct throtl_grp *tg = blkg_to_tg(blkg ?: q->root_blkg); |
|---|
| 2208 | + struct throtl_grp *tg = blkg_to_tg(blkg); |
|---|
| 2141 | 2209 | struct throtl_service_queue *sq; |
|---|
| 2142 | 2210 | bool rw = bio_data_dir(bio); |
|---|
| 2143 | 2211 | bool throttled = false; |
|---|
| 2144 | 2212 | struct throtl_data *td = tg->td; |
|---|
| 2145 | 2213 | |
|---|
| 2146 | | - WARN_ON_ONCE(!rcu_read_lock_held()); |
|---|
| 2214 | + rcu_read_lock(); |
|---|
| 2147 | 2215 | |
|---|
| 2148 | 2216 | /* see throtl_charge_bio() */ |
|---|
| 2149 | | - if (bio_flagged(bio, BIO_THROTTLED) || !tg->has_rules[rw]) |
|---|
| 2217 | + if (bio_flagged(bio, BIO_THROTTLED)) |
|---|
| 2150 | 2218 | goto out; |
|---|
| 2151 | 2219 | |
|---|
| 2152 | | - spin_lock_irq(q->queue_lock); |
|---|
| 2220 | + if (!cgroup_subsys_on_dfl(io_cgrp_subsys)) { |
|---|
| 2221 | + blkg_rwstat_add(&tg->stat_bytes, bio->bi_opf, |
|---|
| 2222 | + bio->bi_iter.bi_size); |
|---|
| 2223 | + blkg_rwstat_add(&tg->stat_ios, bio->bi_opf, 1); |
|---|
| 2224 | + } |
|---|
| 2225 | + |
|---|
| 2226 | + if (!tg->has_rules[rw]) |
|---|
| 2227 | + goto out; |
|---|
| 2228 | + |
|---|
| 2229 | + spin_lock_irq(&q->queue_lock); |
|---|
| 2153 | 2230 | |
|---|
| 2154 | 2231 | throtl_update_latency_buckets(td); |
|---|
| 2155 | 2232 | |
|---|
| 2156 | | - if (unlikely(blk_queue_bypass(q))) |
|---|
| 2157 | | - goto out_unlock; |
|---|
| 2158 | | - |
|---|
| 2159 | | - blk_throtl_assoc_bio(tg, bio); |
|---|
| 2160 | 2233 | blk_throtl_update_idletime(tg); |
|---|
| 2161 | 2234 | |
|---|
| 2162 | 2235 | sq = &tg->service_queue; |
|---|
| .. | .. |
|---|
| 2199 | 2272 | |
|---|
| 2200 | 2273 | /* |
|---|
| 2201 | 2274 | * @bio passed through this layer without being throttled. |
|---|
| 2202 | | - * Climb up the ladder. If we''re already at the top, it |
|---|
| 2275 | + * Climb up the ladder. If we're already at the top, it |
|---|
| 2203 | 2276 | * can be executed directly. |
|---|
| 2204 | 2277 | */ |
|---|
| 2205 | 2278 | qn = &tg->qnode_on_parent[rw]; |
|---|
| .. | .. |
|---|
| 2235 | 2308 | } |
|---|
| 2236 | 2309 | |
|---|
| 2237 | 2310 | out_unlock: |
|---|
| 2238 | | - spin_unlock_irq(q->queue_lock); |
|---|
| 2311 | + spin_unlock_irq(&q->queue_lock); |
|---|
| 2239 | 2312 | out: |
|---|
| 2240 | 2313 | bio_set_flag(bio, BIO_THROTTLED); |
|---|
| 2241 | 2314 | |
|---|
| .. | .. |
|---|
| 2243 | 2316 | if (throttled || !td->track_bio_latency) |
|---|
| 2244 | 2317 | bio->bi_issue.value |= BIO_ISSUE_THROTL_SKIP_LATENCY; |
|---|
| 2245 | 2318 | #endif |
|---|
| 2319 | + rcu_read_unlock(); |
|---|
| 2246 | 2320 | return throttled; |
|---|
| 2247 | 2321 | } |
|---|
| 2248 | 2322 | |
|---|
| .. | .. |
|---|
| 2271 | 2345 | struct request_queue *q = rq->q; |
|---|
| 2272 | 2346 | struct throtl_data *td = q->td; |
|---|
| 2273 | 2347 | |
|---|
| 2274 | | - throtl_track_latency(td, rq->throtl_size, req_op(rq), time_ns >> 10); |
|---|
| 2348 | + throtl_track_latency(td, blk_rq_stats_sectors(rq), req_op(rq), |
|---|
| 2349 | + time_ns >> 10); |
|---|
| 2275 | 2350 | } |
|---|
| 2276 | 2351 | |
|---|
| 2277 | 2352 | void blk_throtl_bio_endio(struct bio *bio) |
|---|
| .. | .. |
|---|
| 2288 | 2363 | if (!blkg) |
|---|
| 2289 | 2364 | return; |
|---|
| 2290 | 2365 | tg = blkg_to_tg(blkg); |
|---|
| 2366 | + if (!tg->td->limit_valid[LIMIT_LOW]) |
|---|
| 2367 | + return; |
|---|
| 2291 | 2368 | |
|---|
| 2292 | 2369 | finish_time_ns = ktime_get_ns(); |
|---|
| 2293 | 2370 | tg->last_finish_time = finish_time_ns >> 10; |
|---|
| .. | .. |
|---|
| 2326 | 2403 | } |
|---|
| 2327 | 2404 | } |
|---|
| 2328 | 2405 | #endif |
|---|
| 2329 | | - |
|---|
| 2330 | | -/* |
|---|
| 2331 | | - * Dispatch all bios from all children tg's queued on @parent_sq. On |
|---|
| 2332 | | - * return, @parent_sq is guaranteed to not have any active children tg's |
|---|
| 2333 | | - * and all bios from previously active tg's are on @parent_sq->bio_lists[]. |
|---|
| 2334 | | - */ |
|---|
| 2335 | | -static void tg_drain_bios(struct throtl_service_queue *parent_sq) |
|---|
| 2336 | | -{ |
|---|
| 2337 | | - struct throtl_grp *tg; |
|---|
| 2338 | | - |
|---|
| 2339 | | - while ((tg = throtl_rb_first(parent_sq))) { |
|---|
| 2340 | | - struct throtl_service_queue *sq = &tg->service_queue; |
|---|
| 2341 | | - struct bio *bio; |
|---|
| 2342 | | - |
|---|
| 2343 | | - throtl_dequeue_tg(tg); |
|---|
| 2344 | | - |
|---|
| 2345 | | - while ((bio = throtl_peek_queued(&sq->queued[READ]))) |
|---|
| 2346 | | - tg_dispatch_one_bio(tg, bio_data_dir(bio)); |
|---|
| 2347 | | - while ((bio = throtl_peek_queued(&sq->queued[WRITE]))) |
|---|
| 2348 | | - tg_dispatch_one_bio(tg, bio_data_dir(bio)); |
|---|
| 2349 | | - } |
|---|
| 2350 | | -} |
|---|
| 2351 | | - |
|---|
| 2352 | | -/** |
|---|
| 2353 | | - * blk_throtl_drain - drain throttled bios |
|---|
| 2354 | | - * @q: request_queue to drain throttled bios for |
|---|
| 2355 | | - * |
|---|
| 2356 | | - * Dispatch all currently throttled bios on @q through ->make_request_fn(). |
|---|
| 2357 | | - */ |
|---|
| 2358 | | -void blk_throtl_drain(struct request_queue *q) |
|---|
| 2359 | | - __releases(q->queue_lock) __acquires(q->queue_lock) |
|---|
| 2360 | | -{ |
|---|
| 2361 | | - struct throtl_data *td = q->td; |
|---|
| 2362 | | - struct blkcg_gq *blkg; |
|---|
| 2363 | | - struct cgroup_subsys_state *pos_css; |
|---|
| 2364 | | - struct bio *bio; |
|---|
| 2365 | | - int rw; |
|---|
| 2366 | | - |
|---|
| 2367 | | - queue_lockdep_assert_held(q); |
|---|
| 2368 | | - rcu_read_lock(); |
|---|
| 2369 | | - |
|---|
| 2370 | | - /* |
|---|
| 2371 | | - * Drain each tg while doing post-order walk on the blkg tree, so |
|---|
| 2372 | | - * that all bios are propagated to td->service_queue. It'd be |
|---|
| 2373 | | - * better to walk service_queue tree directly but blkg walk is |
|---|
| 2374 | | - * easier. |
|---|
| 2375 | | - */ |
|---|
| 2376 | | - blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) |
|---|
| 2377 | | - tg_drain_bios(&blkg_to_tg(blkg)->service_queue); |
|---|
| 2378 | | - |
|---|
| 2379 | | - /* finally, transfer bios from top-level tg's into the td */ |
|---|
| 2380 | | - tg_drain_bios(&td->service_queue); |
|---|
| 2381 | | - |
|---|
| 2382 | | - rcu_read_unlock(); |
|---|
| 2383 | | - spin_unlock_irq(q->queue_lock); |
|---|
| 2384 | | - |
|---|
| 2385 | | - /* all bios now should be in td->service_queue, issue them */ |
|---|
| 2386 | | - for (rw = READ; rw <= WRITE; rw++) |
|---|
| 2387 | | - while ((bio = throtl_pop_queued(&td->service_queue.queued[rw], |
|---|
| 2388 | | - NULL))) |
|---|
| 2389 | | - generic_make_request(bio); |
|---|
| 2390 | | - |
|---|
| 2391 | | - spin_lock_irq(q->queue_lock); |
|---|
| 2392 | | -} |
|---|
| 2393 | 2406 | |
|---|
| 2394 | 2407 | int blk_throtl_init(struct request_queue *q) |
|---|
| 2395 | 2408 | { |
|---|
| .. | .. |
|---|
| 2469 | 2482 | td->throtl_slice = DFL_THROTL_SLICE_HD; |
|---|
| 2470 | 2483 | #endif |
|---|
| 2471 | 2484 | |
|---|
| 2472 | | - td->track_bio_latency = !queue_is_rq_based(q); |
|---|
| 2485 | + td->track_bio_latency = !queue_is_mq(q); |
|---|
| 2473 | 2486 | if (!td->track_bio_latency) |
|---|
| 2474 | 2487 | blk_stat_enable_accounting(q); |
|---|
| 2475 | 2488 | } |
|---|