| .. | .. |
|---|
| 67 | 67 | * Send out a notify message. |
|---|
| 68 | 68 | */ |
|---|
| 69 | 69 | static void trace_note(struct blk_trace *bt, pid_t pid, int action, |
|---|
| 70 | | - const void *data, size_t len, |
|---|
| 71 | | - union kernfs_node_id *cgid) |
|---|
| 70 | + const void *data, size_t len, u64 cgid) |
|---|
| 72 | 71 | { |
|---|
| 73 | 72 | struct blk_io_trace *t; |
|---|
| 74 | 73 | struct ring_buffer_event *event = NULL; |
|---|
| 75 | | - struct ring_buffer *buffer = NULL; |
|---|
| 74 | + struct trace_buffer *buffer = NULL; |
|---|
| 76 | 75 | int pc = 0; |
|---|
| 77 | 76 | int cpu = smp_processor_id(); |
|---|
| 78 | 77 | bool blk_tracer = blk_tracer_enabled; |
|---|
| 79 | | - ssize_t cgid_len = cgid ? sizeof(*cgid) : 0; |
|---|
| 78 | + ssize_t cgid_len = cgid ? sizeof(cgid) : 0; |
|---|
| 80 | 79 | |
|---|
| 81 | 80 | if (blk_tracer) { |
|---|
| 82 | | - buffer = blk_tr->trace_buffer.buffer; |
|---|
| 81 | + buffer = blk_tr->array_buffer.buffer; |
|---|
| 83 | 82 | pc = preempt_count(); |
|---|
| 84 | 83 | event = trace_buffer_lock_reserve(buffer, TRACE_BLK, |
|---|
| 85 | 84 | sizeof(*t) + len + cgid_len, |
|---|
| .. | .. |
|---|
| 103 | 102 | t->pid = pid; |
|---|
| 104 | 103 | t->cpu = cpu; |
|---|
| 105 | 104 | t->pdu_len = len + cgid_len; |
|---|
| 106 | | - if (cgid) |
|---|
| 107 | | - memcpy((void *)t + sizeof(*t), cgid, cgid_len); |
|---|
| 105 | + if (cgid_len) |
|---|
| 106 | + memcpy((void *)t + sizeof(*t), &cgid, cgid_len); |
|---|
| 108 | 107 | memcpy((void *) t + sizeof(*t) + cgid_len, data, len); |
|---|
| 109 | 108 | |
|---|
| 110 | 109 | if (blk_tracer) |
|---|
| .. | .. |
|---|
| 125 | 124 | spin_lock_irqsave(&running_trace_lock, flags); |
|---|
| 126 | 125 | list_for_each_entry(bt, &running_trace_list, running_list) { |
|---|
| 127 | 126 | trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm, |
|---|
| 128 | | - sizeof(tsk->comm), NULL); |
|---|
| 127 | + sizeof(tsk->comm), 0); |
|---|
| 129 | 128 | } |
|---|
| 130 | 129 | spin_unlock_irqrestore(&running_trace_lock, flags); |
|---|
| 131 | 130 | } |
|---|
| .. | .. |
|---|
| 142 | 141 | words[1] = now.tv_nsec; |
|---|
| 143 | 142 | |
|---|
| 144 | 143 | local_irq_save(flags); |
|---|
| 145 | | - trace_note(bt, 0, BLK_TN_TIMESTAMP, words, sizeof(words), NULL); |
|---|
| 144 | + trace_note(bt, 0, BLK_TN_TIMESTAMP, words, sizeof(words), 0); |
|---|
| 146 | 145 | local_irq_restore(flags); |
|---|
| 147 | 146 | } |
|---|
| 148 | 147 | |
|---|
| .. | .. |
|---|
| 174 | 173 | if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP)) |
|---|
| 175 | 174 | blkcg = NULL; |
|---|
| 176 | 175 | #ifdef CONFIG_BLK_CGROUP |
|---|
| 177 | | - trace_note(bt, 0, BLK_TN_MESSAGE, buf, n, |
|---|
| 178 | | - blkcg ? cgroup_get_kernfs_id(blkcg->css.cgroup) : NULL); |
|---|
| 176 | + trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n, |
|---|
| 177 | + blkcg ? cgroup_id(blkcg->css.cgroup) : 1); |
|---|
| 179 | 178 | #else |
|---|
| 180 | | - trace_note(bt, 0, BLK_TN_MESSAGE, buf, n, NULL); |
|---|
| 179 | + trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n, 0); |
|---|
| 181 | 180 | #endif |
|---|
| 182 | 181 | local_irq_restore(flags); |
|---|
| 183 | 182 | } |
|---|
| .. | .. |
|---|
| 215 | 214 | */ |
|---|
| 216 | 215 | static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, |
|---|
| 217 | 216 | int op, int op_flags, u32 what, int error, int pdu_len, |
|---|
| 218 | | - void *pdu_data, union kernfs_node_id *cgid) |
|---|
| 217 | + void *pdu_data, u64 cgid) |
|---|
| 219 | 218 | { |
|---|
| 220 | 219 | struct task_struct *tsk = current; |
|---|
| 221 | 220 | struct ring_buffer_event *event = NULL; |
|---|
| 222 | | - struct ring_buffer *buffer = NULL; |
|---|
| 221 | + struct trace_buffer *buffer = NULL; |
|---|
| 223 | 222 | struct blk_io_trace *t; |
|---|
| 224 | 223 | unsigned long flags = 0; |
|---|
| 225 | 224 | unsigned long *sequence; |
|---|
| 226 | 225 | pid_t pid; |
|---|
| 227 | 226 | int cpu, pc = 0; |
|---|
| 228 | 227 | bool blk_tracer = blk_tracer_enabled; |
|---|
| 229 | | - ssize_t cgid_len = cgid ? sizeof(*cgid) : 0; |
|---|
| 228 | + ssize_t cgid_len = cgid ? sizeof(cgid) : 0; |
|---|
| 230 | 229 | |
|---|
| 231 | 230 | if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer)) |
|---|
| 232 | 231 | return; |
|---|
| .. | .. |
|---|
| 252 | 251 | if (blk_tracer) { |
|---|
| 253 | 252 | tracing_record_cmdline(current); |
|---|
| 254 | 253 | |
|---|
| 255 | | - buffer = blk_tr->trace_buffer.buffer; |
|---|
| 254 | + buffer = blk_tr->array_buffer.buffer; |
|---|
| 256 | 255 | pc = preempt_count(); |
|---|
| 257 | 256 | event = trace_buffer_lock_reserve(buffer, TRACE_BLK, |
|---|
| 258 | 257 | sizeof(*t) + pdu_len + cgid_len, |
|---|
| .. | .. |
|---|
| 297 | 296 | t->pdu_len = pdu_len + cgid_len; |
|---|
| 298 | 297 | |
|---|
| 299 | 298 | if (cgid_len) |
|---|
| 300 | | - memcpy((void *)t + sizeof(*t), cgid, cgid_len); |
|---|
| 299 | + memcpy((void *)t + sizeof(*t), &cgid, cgid_len); |
|---|
| 301 | 300 | if (pdu_len) |
|---|
| 302 | 301 | memcpy((void *)t + sizeof(*t) + cgid_len, pdu_data, pdu_len); |
|---|
| 303 | 302 | |
|---|
| .. | .. |
|---|
| 348 | 347 | { |
|---|
| 349 | 348 | struct blk_trace *bt; |
|---|
| 350 | 349 | |
|---|
| 351 | | - bt = xchg(&q->blk_trace, NULL); |
|---|
| 350 | + bt = rcu_replace_pointer(q->blk_trace, NULL, |
|---|
| 351 | + lockdep_is_held(&q->debugfs_mutex)); |
|---|
| 352 | 352 | if (!bt) |
|---|
| 353 | 353 | return -EINVAL; |
|---|
| 354 | 354 | |
|---|
| .. | .. |
|---|
| 362 | 362 | { |
|---|
| 363 | 363 | int ret; |
|---|
| 364 | 364 | |
|---|
| 365 | | - mutex_lock(&q->blk_trace_mutex); |
|---|
| 365 | + mutex_lock(&q->debugfs_mutex); |
|---|
| 366 | 366 | ret = __blk_trace_remove(q); |
|---|
| 367 | | - mutex_unlock(&q->blk_trace_mutex); |
|---|
| 367 | + mutex_unlock(&q->debugfs_mutex); |
|---|
| 368 | 368 | |
|---|
| 369 | 369 | return ret; |
|---|
| 370 | 370 | } |
|---|
| .. | .. |
|---|
| 483 | 483 | struct dentry *dir = NULL; |
|---|
| 484 | 484 | int ret; |
|---|
| 485 | 485 | |
|---|
| 486 | + lockdep_assert_held(&q->debugfs_mutex); |
|---|
| 487 | + |
|---|
| 486 | 488 | if (!buts->buf_size || !buts->buf_nr) |
|---|
| 487 | 489 | return -EINVAL; |
|---|
| 488 | | - |
|---|
| 489 | | - if (!blk_debugfs_root) |
|---|
| 490 | | - return -ENOENT; |
|---|
| 491 | 490 | |
|---|
| 492 | 491 | strncpy(buts->name, name, BLKTRACE_BDEV_SIZE); |
|---|
| 493 | 492 | buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0'; |
|---|
| .. | .. |
|---|
| 502 | 501 | * bdev can be NULL, as with scsi-generic, this is a helpful as |
|---|
| 503 | 502 | * we can be. |
|---|
| 504 | 503 | */ |
|---|
| 505 | | - if (q->blk_trace) { |
|---|
| 504 | + if (rcu_dereference_protected(q->blk_trace, |
|---|
| 505 | + lockdep_is_held(&q->debugfs_mutex))) { |
|---|
| 506 | 506 | pr_warn("Concurrent blktraces are not allowed on %s\n", |
|---|
| 507 | 507 | buts->name); |
|---|
| 508 | 508 | return -EBUSY; |
|---|
| .. | .. |
|---|
| 521 | 521 | if (!bt->msg_data) |
|---|
| 522 | 522 | goto err; |
|---|
| 523 | 523 | |
|---|
| 524 | | -#ifdef CONFIG_BLK_DEBUG_FS |
|---|
| 525 | 524 | /* |
|---|
| 526 | | - * When tracing whole make_request drivers (multiqueue) block devices, |
|---|
| 527 | | - * reuse the existing debugfs directory created by the block layer on |
|---|
| 528 | | - * init. For request-based block devices, all partitions block devices, |
|---|
| 525 | + * When tracing the whole disk reuse the existing debugfs directory |
|---|
| 526 | + * created by the block layer on init. For partitions block devices, |
|---|
| 529 | 527 | * and scsi-generic block devices we create a temporary new debugfs |
|---|
| 530 | 528 | * directory that will be removed once the trace ends. |
|---|
| 531 | 529 | */ |
|---|
| 532 | | - if (q->mq_ops && bdev && bdev == bdev->bd_contains) |
|---|
| 530 | + if (bdev && !bdev_is_partition(bdev)) |
|---|
| 533 | 531 | dir = q->debugfs_dir; |
|---|
| 534 | 532 | else |
|---|
| 535 | | -#endif |
|---|
| 536 | 533 | bt->dir = dir = debugfs_create_dir(buts->name, blk_debugfs_root); |
|---|
| 537 | | - if (!dir) |
|---|
| 538 | | - goto err; |
|---|
| 539 | 534 | |
|---|
| 540 | 535 | /* |
|---|
| 541 | 536 | * As blktrace relies on debugfs for its interface the debugfs directory |
|---|
| .. | .. |
|---|
| 556 | 551 | ret = -EIO; |
|---|
| 557 | 552 | bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt, |
|---|
| 558 | 553 | &blk_dropped_fops); |
|---|
| 559 | | - if (!bt->dropped_file) |
|---|
| 560 | | - goto err; |
|---|
| 561 | 554 | |
|---|
| 562 | 555 | bt->msg_file = debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops); |
|---|
| 563 | | - if (!bt->msg_file) |
|---|
| 564 | | - goto err; |
|---|
| 565 | 556 | |
|---|
| 566 | 557 | bt->rchan = relay_open("trace", dir, buts->buf_size, |
|---|
| 567 | 558 | buts->buf_nr, &blk_relay_callbacks, bt); |
|---|
| .. | .. |
|---|
| 583 | 574 | bt->pid = buts->pid; |
|---|
| 584 | 575 | bt->trace_state = Blktrace_setup; |
|---|
| 585 | 576 | |
|---|
| 586 | | - ret = -EBUSY; |
|---|
| 587 | | - if (cmpxchg(&q->blk_trace, NULL, bt)) |
|---|
| 588 | | - goto err; |
|---|
| 589 | | - |
|---|
| 577 | + rcu_assign_pointer(q->blk_trace, bt); |
|---|
| 590 | 578 | get_probe_ref(); |
|---|
| 591 | 579 | |
|---|
| 592 | 580 | ret = 0; |
|---|
| .. | .. |
|---|
| 623 | 611 | { |
|---|
| 624 | 612 | int ret; |
|---|
| 625 | 613 | |
|---|
| 626 | | - mutex_lock(&q->blk_trace_mutex); |
|---|
| 614 | + mutex_lock(&q->debugfs_mutex); |
|---|
| 627 | 615 | ret = __blk_trace_setup(q, name, dev, bdev, arg); |
|---|
| 628 | | - mutex_unlock(&q->blk_trace_mutex); |
|---|
| 616 | + mutex_unlock(&q->debugfs_mutex); |
|---|
| 629 | 617 | |
|---|
| 630 | 618 | return ret; |
|---|
| 631 | 619 | } |
|---|
| .. | .. |
|---|
| 671 | 659 | struct blk_trace *bt; |
|---|
| 672 | 660 | |
|---|
| 673 | 661 | bt = rcu_dereference_protected(q->blk_trace, |
|---|
| 674 | | - lockdep_is_held(&q->blk_trace_mutex)); |
|---|
| 662 | + lockdep_is_held(&q->debugfs_mutex)); |
|---|
| 675 | 663 | if (bt == NULL) |
|---|
| 676 | 664 | return -EINVAL; |
|---|
| 677 | 665 | |
|---|
| .. | .. |
|---|
| 711 | 699 | { |
|---|
| 712 | 700 | int ret; |
|---|
| 713 | 701 | |
|---|
| 714 | | - mutex_lock(&q->blk_trace_mutex); |
|---|
| 702 | + mutex_lock(&q->debugfs_mutex); |
|---|
| 715 | 703 | ret = __blk_trace_startstop(q, start); |
|---|
| 716 | | - mutex_unlock(&q->blk_trace_mutex); |
|---|
| 704 | + mutex_unlock(&q->debugfs_mutex); |
|---|
| 717 | 705 | |
|---|
| 718 | 706 | return ret; |
|---|
| 719 | 707 | } |
|---|
| .. | .. |
|---|
| 742 | 730 | if (!q) |
|---|
| 743 | 731 | return -ENXIO; |
|---|
| 744 | 732 | |
|---|
| 745 | | - mutex_lock(&q->blk_trace_mutex); |
|---|
| 733 | + mutex_lock(&q->debugfs_mutex); |
|---|
| 746 | 734 | |
|---|
| 747 | 735 | switch (cmd) { |
|---|
| 748 | 736 | case BLKTRACESETUP: |
|---|
| .. | .. |
|---|
| 757 | 745 | #endif |
|---|
| 758 | 746 | case BLKTRACESTART: |
|---|
| 759 | 747 | start = 1; |
|---|
| 748 | + fallthrough; |
|---|
| 760 | 749 | case BLKTRACESTOP: |
|---|
| 761 | 750 | ret = __blk_trace_startstop(q, start); |
|---|
| 762 | 751 | break; |
|---|
| .. | .. |
|---|
| 768 | 757 | break; |
|---|
| 769 | 758 | } |
|---|
| 770 | 759 | |
|---|
| 771 | | - mutex_unlock(&q->blk_trace_mutex); |
|---|
| 760 | + mutex_unlock(&q->debugfs_mutex); |
|---|
| 772 | 761 | return ret; |
|---|
| 773 | 762 | } |
|---|
| 774 | 763 | |
|---|
| .. | .. |
|---|
| 779 | 768 | **/ |
|---|
| 780 | 769 | void blk_trace_shutdown(struct request_queue *q) |
|---|
| 781 | 770 | { |
|---|
| 782 | | - mutex_lock(&q->blk_trace_mutex); |
|---|
| 771 | + mutex_lock(&q->debugfs_mutex); |
|---|
| 783 | 772 | if (rcu_dereference_protected(q->blk_trace, |
|---|
| 784 | | - lockdep_is_held(&q->blk_trace_mutex))) { |
|---|
| 773 | + lockdep_is_held(&q->debugfs_mutex))) { |
|---|
| 785 | 774 | __blk_trace_startstop(q, 0); |
|---|
| 786 | 775 | __blk_trace_remove(q); |
|---|
| 787 | 776 | } |
|---|
| 788 | 777 | |
|---|
| 789 | | - mutex_unlock(&q->blk_trace_mutex); |
|---|
| 778 | + mutex_unlock(&q->debugfs_mutex); |
|---|
| 790 | 779 | } |
|---|
| 791 | 780 | |
|---|
| 792 | 781 | #ifdef CONFIG_BLK_CGROUP |
|---|
| 793 | | -static union kernfs_node_id * |
|---|
| 794 | | -blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) |
|---|
| 782 | +static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) |
|---|
| 795 | 783 | { |
|---|
| 796 | 784 | struct blk_trace *bt; |
|---|
| 797 | 785 | |
|---|
| 798 | 786 | /* We don't use the 'bt' value here except as an optimization... */ |
|---|
| 799 | 787 | bt = rcu_dereference_protected(q->blk_trace, 1); |
|---|
| 800 | 788 | if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP)) |
|---|
| 801 | | - return NULL; |
|---|
| 789 | + return 0; |
|---|
| 802 | 790 | |
|---|
| 803 | | - if (!bio->bi_css) |
|---|
| 804 | | - return NULL; |
|---|
| 805 | | - return cgroup_get_kernfs_id(bio->bi_css->cgroup); |
|---|
| 791 | + if (!bio->bi_blkg) |
|---|
| 792 | + return 0; |
|---|
| 793 | + return cgroup_id(bio_blkcg(bio)->css.cgroup); |
|---|
| 806 | 794 | } |
|---|
| 807 | 795 | #else |
|---|
| 808 | | -static union kernfs_node_id * |
|---|
| 809 | | -blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) |
|---|
| 796 | +static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) |
|---|
| 810 | 797 | { |
|---|
| 811 | | - return NULL; |
|---|
| 798 | + return 0; |
|---|
| 812 | 799 | } |
|---|
| 813 | 800 | #endif |
|---|
| 814 | 801 | |
|---|
| 815 | | -static union kernfs_node_id * |
|---|
| 802 | +static u64 |
|---|
| 816 | 803 | blk_trace_request_get_cgid(struct request_queue *q, struct request *rq) |
|---|
| 817 | 804 | { |
|---|
| 818 | 805 | if (!rq->bio) |
|---|
| 819 | | - return NULL; |
|---|
| 806 | + return 0; |
|---|
| 820 | 807 | /* Use the first bio */ |
|---|
| 821 | 808 | return blk_trace_bio_get_cgid(q, rq->bio); |
|---|
| 822 | 809 | } |
|---|
| .. | .. |
|---|
| 838 | 825 | * |
|---|
| 839 | 826 | **/ |
|---|
| 840 | 827 | static void blk_add_trace_rq(struct request *rq, int error, |
|---|
| 841 | | - unsigned int nr_bytes, u32 what, |
|---|
| 842 | | - union kernfs_node_id *cgid) |
|---|
| 828 | + unsigned int nr_bytes, u32 what, u64 cgid) |
|---|
| 843 | 829 | { |
|---|
| 844 | 830 | struct blk_trace *bt; |
|---|
| 845 | 831 | |
|---|
| .. | .. |
|---|
| 871 | 857 | struct request_queue *q, struct request *rq) |
|---|
| 872 | 858 | { |
|---|
| 873 | 859 | blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_ISSUE, |
|---|
| 860 | + blk_trace_request_get_cgid(q, rq)); |
|---|
| 861 | +} |
|---|
| 862 | + |
|---|
| 863 | +static void blk_add_trace_rq_merge(void *ignore, |
|---|
| 864 | + struct request_queue *q, struct request *rq) |
|---|
| 865 | +{ |
|---|
| 866 | + blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_BACKMERGE, |
|---|
| 874 | 867 | blk_trace_request_get_cgid(q, rq)); |
|---|
| 875 | 868 | } |
|---|
| 876 | 869 | |
|---|
| .. | .. |
|---|
| 925 | 918 | } |
|---|
| 926 | 919 | |
|---|
| 927 | 920 | static void blk_add_trace_bio_complete(void *ignore, |
|---|
| 928 | | - struct request_queue *q, struct bio *bio, |
|---|
| 929 | | - int error) |
|---|
| 921 | + struct request_queue *q, struct bio *bio) |
|---|
| 930 | 922 | { |
|---|
| 931 | | - blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error); |
|---|
| 923 | + blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, |
|---|
| 924 | + blk_status_to_errno(bio->bi_status)); |
|---|
| 932 | 925 | } |
|---|
| 933 | 926 | |
|---|
| 934 | 927 | static void blk_add_trace_bio_backmerge(void *ignore, |
|---|
| .. | .. |
|---|
| 966 | 959 | bt = rcu_dereference(q->blk_trace); |
|---|
| 967 | 960 | if (bt) |
|---|
| 968 | 961 | __blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_GETRQ, 0, 0, |
|---|
| 969 | | - NULL, NULL); |
|---|
| 962 | + NULL, 0); |
|---|
| 970 | 963 | rcu_read_unlock(); |
|---|
| 971 | 964 | } |
|---|
| 972 | 965 | } |
|---|
| .. | .. |
|---|
| 985 | 978 | bt = rcu_dereference(q->blk_trace); |
|---|
| 986 | 979 | if (bt) |
|---|
| 987 | 980 | __blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_SLEEPRQ, |
|---|
| 988 | | - 0, 0, NULL, NULL); |
|---|
| 981 | + 0, 0, NULL, 0); |
|---|
| 989 | 982 | rcu_read_unlock(); |
|---|
| 990 | 983 | } |
|---|
| 991 | 984 | } |
|---|
| .. | .. |
|---|
| 997 | 990 | rcu_read_lock(); |
|---|
| 998 | 991 | bt = rcu_dereference(q->blk_trace); |
|---|
| 999 | 992 | if (bt) |
|---|
| 1000 | | - __blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, NULL); |
|---|
| 993 | + __blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, 0); |
|---|
| 1001 | 994 | rcu_read_unlock(); |
|---|
| 1002 | 995 | } |
|---|
| 1003 | 996 | |
|---|
| .. | .. |
|---|
| 1017 | 1010 | else |
|---|
| 1018 | 1011 | what = BLK_TA_UNPLUG_TIMER; |
|---|
| 1019 | 1012 | |
|---|
| 1020 | | - __blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, NULL); |
|---|
| 1013 | + __blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, 0); |
|---|
| 1021 | 1014 | } |
|---|
| 1022 | 1015 | rcu_read_unlock(); |
|---|
| 1023 | 1016 | } |
|---|
| .. | .. |
|---|
| 1158 | 1151 | WARN_ON(ret); |
|---|
| 1159 | 1152 | ret = register_trace_block_rq_issue(blk_add_trace_rq_issue, NULL); |
|---|
| 1160 | 1153 | WARN_ON(ret); |
|---|
| 1154 | + ret = register_trace_block_rq_merge(blk_add_trace_rq_merge, NULL); |
|---|
| 1155 | + WARN_ON(ret); |
|---|
| 1161 | 1156 | ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL); |
|---|
| 1162 | 1157 | WARN_ON(ret); |
|---|
| 1163 | 1158 | ret = register_trace_block_rq_complete(blk_add_trace_rq_complete, NULL); |
|---|
| .. | .. |
|---|
| 1204 | 1199 | unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce, NULL); |
|---|
| 1205 | 1200 | unregister_trace_block_rq_complete(blk_add_trace_rq_complete, NULL); |
|---|
| 1206 | 1201 | unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL); |
|---|
| 1202 | + unregister_trace_block_rq_merge(blk_add_trace_rq_merge, NULL); |
|---|
| 1207 | 1203 | unregister_trace_block_rq_issue(blk_add_trace_rq_issue, NULL); |
|---|
| 1208 | 1204 | unregister_trace_block_rq_insert(blk_add_trace_rq_insert, NULL); |
|---|
| 1209 | 1205 | |
|---|
| .. | .. |
|---|
| 1256 | 1252 | |
|---|
| 1257 | 1253 | static inline const void *pdu_start(const struct trace_entry *ent, bool has_cg) |
|---|
| 1258 | 1254 | { |
|---|
| 1259 | | - return (void *)(te_blk_io_trace(ent) + 1) + |
|---|
| 1260 | | - (has_cg ? sizeof(union kernfs_node_id) : 0); |
|---|
| 1255 | + return (void *)(te_blk_io_trace(ent) + 1) + (has_cg ? sizeof(u64) : 0); |
|---|
| 1261 | 1256 | } |
|---|
| 1262 | 1257 | |
|---|
| 1263 | | -static inline const void *cgid_start(const struct trace_entry *ent) |
|---|
| 1258 | +static inline u64 t_cgid(const struct trace_entry *ent) |
|---|
| 1264 | 1259 | { |
|---|
| 1265 | | - return (void *)(te_blk_io_trace(ent) + 1); |
|---|
| 1260 | + return *(u64 *)(te_blk_io_trace(ent) + 1); |
|---|
| 1266 | 1261 | } |
|---|
| 1267 | 1262 | |
|---|
| 1268 | 1263 | static inline int pdu_real_len(const struct trace_entry *ent, bool has_cg) |
|---|
| 1269 | 1264 | { |
|---|
| 1270 | | - return te_blk_io_trace(ent)->pdu_len - |
|---|
| 1271 | | - (has_cg ? sizeof(union kernfs_node_id) : 0); |
|---|
| 1265 | + return te_blk_io_trace(ent)->pdu_len - (has_cg ? sizeof(u64) : 0); |
|---|
| 1272 | 1266 | } |
|---|
| 1273 | 1267 | |
|---|
| 1274 | 1268 | static inline u32 t_action(const struct trace_entry *ent) |
|---|
| .. | .. |
|---|
| 1330 | 1324 | |
|---|
| 1331 | 1325 | fill_rwbs(rwbs, t); |
|---|
| 1332 | 1326 | if (has_cg) { |
|---|
| 1333 | | - const union kernfs_node_id *id = cgid_start(iter->ent); |
|---|
| 1327 | + u64 id = t_cgid(iter->ent); |
|---|
| 1334 | 1328 | |
|---|
| 1335 | 1329 | if (blk_tracer_flags.val & TRACE_BLK_OPT_CGNAME) { |
|---|
| 1336 | 1330 | char blkcg_name_buf[NAME_MAX + 1] = "<...>"; |
|---|
| .. | .. |
|---|
| 1340 | 1334 | trace_seq_printf(&iter->seq, "%3d,%-3d %s %2s %3s ", |
|---|
| 1341 | 1335 | MAJOR(t->device), MINOR(t->device), |
|---|
| 1342 | 1336 | blkcg_name_buf, act, rwbs); |
|---|
| 1343 | | - } else |
|---|
| 1337 | + } else { |
|---|
| 1338 | + /* |
|---|
| 1339 | + * The cgid portion used to be "INO,GEN". Userland |
|---|
| 1340 | + * builds a FILEID_INO32_GEN fid out of them and |
|---|
| 1341 | + * opens the cgroup using open_by_handle_at(2). |
|---|
| 1342 | + * While 32bit ino setups are still the same, 64bit |
|---|
| 1343 | + * ones now use the 64bit ino as the whole ID and |
|---|
| 1344 | + * no longer use generation. |
|---|
| 1345 | + * |
|---|
| 1346 | + * Regarldess of the content, always output |
|---|
| 1347 | + * "LOW32,HIGH32" so that FILEID_INO32_GEN fid can |
|---|
| 1348 | + * be mapped back to @id on both 64 and 32bit ino |
|---|
| 1349 | + * setups. See __kernfs_fh_to_dentry(). |
|---|
| 1350 | + */ |
|---|
| 1344 | 1351 | trace_seq_printf(&iter->seq, |
|---|
| 1345 | | - "%3d,%-3d %x,%-x %2s %3s ", |
|---|
| 1352 | + "%3d,%-3d %llx,%-llx %2s %3s ", |
|---|
| 1346 | 1353 | MAJOR(t->device), MINOR(t->device), |
|---|
| 1347 | | - id->ino, id->generation, act, rwbs); |
|---|
| 1354 | + id & U32_MAX, id >> 32, act, rwbs); |
|---|
| 1355 | + } |
|---|
| 1348 | 1356 | } else |
|---|
| 1349 | 1357 | trace_seq_printf(&iter->seq, "%3d,%-3d %2s %3s ", |
|---|
| 1350 | 1358 | MAJOR(t->device), MINOR(t->device), act, rwbs); |
|---|
| .. | .. |
|---|
| 1594 | 1602 | |
|---|
| 1595 | 1603 | static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter) |
|---|
| 1596 | 1604 | { |
|---|
| 1597 | | - if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) |
|---|
| 1605 | + if ((iter->ent->type != TRACE_BLK) || |
|---|
| 1606 | + !(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) |
|---|
| 1598 | 1607 | return TRACE_TYPE_UNHANDLED; |
|---|
| 1599 | 1608 | |
|---|
| 1600 | 1609 | return print_one_line(iter, true); |
|---|
| .. | .. |
|---|
| 1657 | 1666 | { |
|---|
| 1658 | 1667 | struct blk_trace *bt; |
|---|
| 1659 | 1668 | |
|---|
| 1660 | | - bt = xchg(&q->blk_trace, NULL); |
|---|
| 1669 | + bt = rcu_replace_pointer(q->blk_trace, NULL, |
|---|
| 1670 | + lockdep_is_held(&q->debugfs_mutex)); |
|---|
| 1661 | 1671 | if (bt == NULL) |
|---|
| 1662 | 1672 | return -EINVAL; |
|---|
| 1663 | 1673 | |
|---|
| .. | .. |
|---|
| 1697 | 1707 | |
|---|
| 1698 | 1708 | blk_trace_setup_lba(bt, bdev); |
|---|
| 1699 | 1709 | |
|---|
| 1700 | | - ret = -EBUSY; |
|---|
| 1701 | | - if (cmpxchg(&q->blk_trace, NULL, bt)) |
|---|
| 1702 | | - goto free_bt; |
|---|
| 1703 | | - |
|---|
| 1710 | + rcu_assign_pointer(q->blk_trace, bt); |
|---|
| 1704 | 1711 | get_probe_ref(); |
|---|
| 1705 | 1712 | return 0; |
|---|
| 1706 | 1713 | |
|---|
| .. | .. |
|---|
| 1829 | 1836 | struct device_attribute *attr, |
|---|
| 1830 | 1837 | char *buf) |
|---|
| 1831 | 1838 | { |
|---|
| 1832 | | - struct hd_struct *p = dev_to_part(dev); |
|---|
| 1839 | + struct block_device *bdev = bdget_part(dev_to_part(dev)); |
|---|
| 1833 | 1840 | struct request_queue *q; |
|---|
| 1834 | | - struct block_device *bdev; |
|---|
| 1835 | 1841 | struct blk_trace *bt; |
|---|
| 1836 | 1842 | ssize_t ret = -ENXIO; |
|---|
| 1837 | 1843 | |
|---|
| 1838 | | - bdev = bdget(part_devt(p)); |
|---|
| 1839 | 1844 | if (bdev == NULL) |
|---|
| 1840 | 1845 | goto out; |
|---|
| 1841 | 1846 | |
|---|
| .. | .. |
|---|
| 1843 | 1848 | if (q == NULL) |
|---|
| 1844 | 1849 | goto out_bdput; |
|---|
| 1845 | 1850 | |
|---|
| 1846 | | - mutex_lock(&q->blk_trace_mutex); |
|---|
| 1851 | + mutex_lock(&q->debugfs_mutex); |
|---|
| 1847 | 1852 | |
|---|
| 1848 | 1853 | bt = rcu_dereference_protected(q->blk_trace, |
|---|
| 1849 | | - lockdep_is_held(&q->blk_trace_mutex)); |
|---|
| 1854 | + lockdep_is_held(&q->debugfs_mutex)); |
|---|
| 1850 | 1855 | if (attr == &dev_attr_enable) { |
|---|
| 1851 | 1856 | ret = sprintf(buf, "%u\n", !!bt); |
|---|
| 1852 | 1857 | goto out_unlock_bdev; |
|---|
| .. | .. |
|---|
| 1864 | 1869 | ret = sprintf(buf, "%llu\n", bt->end_lba); |
|---|
| 1865 | 1870 | |
|---|
| 1866 | 1871 | out_unlock_bdev: |
|---|
| 1867 | | - mutex_unlock(&q->blk_trace_mutex); |
|---|
| 1872 | + mutex_unlock(&q->debugfs_mutex); |
|---|
| 1868 | 1873 | out_bdput: |
|---|
| 1869 | 1874 | bdput(bdev); |
|---|
| 1870 | 1875 | out: |
|---|
| .. | .. |
|---|
| 1877 | 1882 | { |
|---|
| 1878 | 1883 | struct block_device *bdev; |
|---|
| 1879 | 1884 | struct request_queue *q; |
|---|
| 1880 | | - struct hd_struct *p; |
|---|
| 1881 | 1885 | struct blk_trace *bt; |
|---|
| 1882 | 1886 | u64 value; |
|---|
| 1883 | 1887 | ssize_t ret = -EINVAL; |
|---|
| .. | .. |
|---|
| 1897 | 1901 | goto out; |
|---|
| 1898 | 1902 | |
|---|
| 1899 | 1903 | ret = -ENXIO; |
|---|
| 1900 | | - |
|---|
| 1901 | | - p = dev_to_part(dev); |
|---|
| 1902 | | - bdev = bdget(part_devt(p)); |
|---|
| 1904 | + bdev = bdget_part(dev_to_part(dev)); |
|---|
| 1903 | 1905 | if (bdev == NULL) |
|---|
| 1904 | 1906 | goto out; |
|---|
| 1905 | 1907 | |
|---|
| .. | .. |
|---|
| 1907 | 1909 | if (q == NULL) |
|---|
| 1908 | 1910 | goto out_bdput; |
|---|
| 1909 | 1911 | |
|---|
| 1910 | | - mutex_lock(&q->blk_trace_mutex); |
|---|
| 1912 | + mutex_lock(&q->debugfs_mutex); |
|---|
| 1911 | 1913 | |
|---|
| 1912 | 1914 | bt = rcu_dereference_protected(q->blk_trace, |
|---|
| 1913 | | - lockdep_is_held(&q->blk_trace_mutex)); |
|---|
| 1915 | + lockdep_is_held(&q->debugfs_mutex)); |
|---|
| 1914 | 1916 | if (attr == &dev_attr_enable) { |
|---|
| 1915 | 1917 | if (!!value == !!bt) { |
|---|
| 1916 | 1918 | ret = 0; |
|---|
| .. | .. |
|---|
| 1927 | 1929 | if (bt == NULL) { |
|---|
| 1928 | 1930 | ret = blk_trace_setup_queue(q, bdev); |
|---|
| 1929 | 1931 | bt = rcu_dereference_protected(q->blk_trace, |
|---|
| 1930 | | - lockdep_is_held(&q->blk_trace_mutex)); |
|---|
| 1932 | + lockdep_is_held(&q->debugfs_mutex)); |
|---|
| 1931 | 1933 | } |
|---|
| 1932 | 1934 | |
|---|
| 1933 | 1935 | if (ret == 0) { |
|---|
| .. | .. |
|---|
| 1942 | 1944 | } |
|---|
| 1943 | 1945 | |
|---|
| 1944 | 1946 | out_unlock_bdev: |
|---|
| 1945 | | - mutex_unlock(&q->blk_trace_mutex); |
|---|
| 1947 | + mutex_unlock(&q->debugfs_mutex); |
|---|
| 1946 | 1948 | out_bdput: |
|---|
| 1947 | 1949 | bdput(bdev); |
|---|
| 1948 | 1950 | out: |
|---|