.. | .. |
---|
67 | 67 | * Send out a notify message. |
---|
68 | 68 | */ |
---|
69 | 69 | static void trace_note(struct blk_trace *bt, pid_t pid, int action, |
---|
70 | | - const void *data, size_t len, |
---|
71 | | - union kernfs_node_id *cgid) |
---|
| 70 | + const void *data, size_t len, u64 cgid) |
---|
72 | 71 | { |
---|
73 | 72 | struct blk_io_trace *t; |
---|
74 | 73 | struct ring_buffer_event *event = NULL; |
---|
75 | | - struct ring_buffer *buffer = NULL; |
---|
| 74 | + struct trace_buffer *buffer = NULL; |
---|
76 | 75 | int pc = 0; |
---|
77 | 76 | int cpu = smp_processor_id(); |
---|
78 | 77 | bool blk_tracer = blk_tracer_enabled; |
---|
79 | | - ssize_t cgid_len = cgid ? sizeof(*cgid) : 0; |
---|
| 78 | + ssize_t cgid_len = cgid ? sizeof(cgid) : 0; |
---|
80 | 79 | |
---|
81 | 80 | if (blk_tracer) { |
---|
82 | | - buffer = blk_tr->trace_buffer.buffer; |
---|
| 81 | + buffer = blk_tr->array_buffer.buffer; |
---|
83 | 82 | pc = preempt_count(); |
---|
84 | 83 | event = trace_buffer_lock_reserve(buffer, TRACE_BLK, |
---|
85 | 84 | sizeof(*t) + len + cgid_len, |
---|
.. | .. |
---|
103 | 102 | t->pid = pid; |
---|
104 | 103 | t->cpu = cpu; |
---|
105 | 104 | t->pdu_len = len + cgid_len; |
---|
106 | | - if (cgid) |
---|
107 | | - memcpy((void *)t + sizeof(*t), cgid, cgid_len); |
---|
| 105 | + if (cgid_len) |
---|
| 106 | + memcpy((void *)t + sizeof(*t), &cgid, cgid_len); |
---|
108 | 107 | memcpy((void *) t + sizeof(*t) + cgid_len, data, len); |
---|
109 | 108 | |
---|
110 | 109 | if (blk_tracer) |
---|
.. | .. |
---|
125 | 124 | spin_lock_irqsave(&running_trace_lock, flags); |
---|
126 | 125 | list_for_each_entry(bt, &running_trace_list, running_list) { |
---|
127 | 126 | trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm, |
---|
128 | | - sizeof(tsk->comm), NULL); |
---|
| 127 | + sizeof(tsk->comm), 0); |
---|
129 | 128 | } |
---|
130 | 129 | spin_unlock_irqrestore(&running_trace_lock, flags); |
---|
131 | 130 | } |
---|
.. | .. |
---|
142 | 141 | words[1] = now.tv_nsec; |
---|
143 | 142 | |
---|
144 | 143 | local_irq_save(flags); |
---|
145 | | - trace_note(bt, 0, BLK_TN_TIMESTAMP, words, sizeof(words), NULL); |
---|
| 144 | + trace_note(bt, 0, BLK_TN_TIMESTAMP, words, sizeof(words), 0); |
---|
146 | 145 | local_irq_restore(flags); |
---|
147 | 146 | } |
---|
148 | 147 | |
---|
.. | .. |
---|
174 | 173 | if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP)) |
---|
175 | 174 | blkcg = NULL; |
---|
176 | 175 | #ifdef CONFIG_BLK_CGROUP |
---|
177 | | - trace_note(bt, 0, BLK_TN_MESSAGE, buf, n, |
---|
178 | | - blkcg ? cgroup_get_kernfs_id(blkcg->css.cgroup) : NULL); |
---|
| 176 | + trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n, |
---|
| 177 | + blkcg ? cgroup_id(blkcg->css.cgroup) : 1); |
---|
179 | 178 | #else |
---|
180 | | - trace_note(bt, 0, BLK_TN_MESSAGE, buf, n, NULL); |
---|
| 179 | + trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n, 0); |
---|
181 | 180 | #endif |
---|
182 | 181 | local_irq_restore(flags); |
---|
183 | 182 | } |
---|
.. | .. |
---|
215 | 214 | */ |
---|
216 | 215 | static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, |
---|
217 | 216 | int op, int op_flags, u32 what, int error, int pdu_len, |
---|
218 | | - void *pdu_data, union kernfs_node_id *cgid) |
---|
| 217 | + void *pdu_data, u64 cgid) |
---|
219 | 218 | { |
---|
220 | 219 | struct task_struct *tsk = current; |
---|
221 | 220 | struct ring_buffer_event *event = NULL; |
---|
222 | | - struct ring_buffer *buffer = NULL; |
---|
| 221 | + struct trace_buffer *buffer = NULL; |
---|
223 | 222 | struct blk_io_trace *t; |
---|
224 | 223 | unsigned long flags = 0; |
---|
225 | 224 | unsigned long *sequence; |
---|
226 | 225 | pid_t pid; |
---|
227 | 226 | int cpu, pc = 0; |
---|
228 | 227 | bool blk_tracer = blk_tracer_enabled; |
---|
229 | | - ssize_t cgid_len = cgid ? sizeof(*cgid) : 0; |
---|
| 228 | + ssize_t cgid_len = cgid ? sizeof(cgid) : 0; |
---|
230 | 229 | |
---|
231 | 230 | if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer)) |
---|
232 | 231 | return; |
---|
.. | .. |
---|
252 | 251 | if (blk_tracer) { |
---|
253 | 252 | tracing_record_cmdline(current); |
---|
254 | 253 | |
---|
255 | | - buffer = blk_tr->trace_buffer.buffer; |
---|
| 254 | + buffer = blk_tr->array_buffer.buffer; |
---|
256 | 255 | pc = preempt_count(); |
---|
257 | 256 | event = trace_buffer_lock_reserve(buffer, TRACE_BLK, |
---|
258 | 257 | sizeof(*t) + pdu_len + cgid_len, |
---|
.. | .. |
---|
297 | 296 | t->pdu_len = pdu_len + cgid_len; |
---|
298 | 297 | |
---|
299 | 298 | if (cgid_len) |
---|
300 | | - memcpy((void *)t + sizeof(*t), cgid, cgid_len); |
---|
| 299 | + memcpy((void *)t + sizeof(*t), &cgid, cgid_len); |
---|
301 | 300 | if (pdu_len) |
---|
302 | 301 | memcpy((void *)t + sizeof(*t) + cgid_len, pdu_data, pdu_len); |
---|
303 | 302 | |
---|
.. | .. |
---|
348 | 347 | { |
---|
349 | 348 | struct blk_trace *bt; |
---|
350 | 349 | |
---|
351 | | - bt = xchg(&q->blk_trace, NULL); |
---|
| 350 | + bt = rcu_replace_pointer(q->blk_trace, NULL, |
---|
| 351 | + lockdep_is_held(&q->debugfs_mutex)); |
---|
352 | 352 | if (!bt) |
---|
353 | 353 | return -EINVAL; |
---|
354 | 354 | |
---|
.. | .. |
---|
362 | 362 | { |
---|
363 | 363 | int ret; |
---|
364 | 364 | |
---|
365 | | - mutex_lock(&q->blk_trace_mutex); |
---|
| 365 | + mutex_lock(&q->debugfs_mutex); |
---|
366 | 366 | ret = __blk_trace_remove(q); |
---|
367 | | - mutex_unlock(&q->blk_trace_mutex); |
---|
| 367 | + mutex_unlock(&q->debugfs_mutex); |
---|
368 | 368 | |
---|
369 | 369 | return ret; |
---|
370 | 370 | } |
---|
.. | .. |
---|
483 | 483 | struct dentry *dir = NULL; |
---|
484 | 484 | int ret; |
---|
485 | 485 | |
---|
| 486 | + lockdep_assert_held(&q->debugfs_mutex); |
---|
| 487 | + |
---|
486 | 488 | if (!buts->buf_size || !buts->buf_nr) |
---|
487 | 489 | return -EINVAL; |
---|
488 | | - |
---|
489 | | - if (!blk_debugfs_root) |
---|
490 | | - return -ENOENT; |
---|
491 | 490 | |
---|
492 | 491 | strncpy(buts->name, name, BLKTRACE_BDEV_SIZE); |
---|
493 | 492 | buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0'; |
---|
.. | .. |
---|
502 | 501 | * bdev can be NULL, as with scsi-generic, this is a helpful as |
---|
503 | 502 | * we can be. |
---|
504 | 503 | */ |
---|
505 | | - if (q->blk_trace) { |
---|
| 504 | + if (rcu_dereference_protected(q->blk_trace, |
---|
| 505 | + lockdep_is_held(&q->debugfs_mutex))) { |
---|
506 | 506 | pr_warn("Concurrent blktraces are not allowed on %s\n", |
---|
507 | 507 | buts->name); |
---|
508 | 508 | return -EBUSY; |
---|
.. | .. |
---|
521 | 521 | if (!bt->msg_data) |
---|
522 | 522 | goto err; |
---|
523 | 523 | |
---|
524 | | -#ifdef CONFIG_BLK_DEBUG_FS |
---|
525 | 524 | /* |
---|
526 | | - * When tracing whole make_request drivers (multiqueue) block devices, |
---|
527 | | - * reuse the existing debugfs directory created by the block layer on |
---|
528 | | - * init. For request-based block devices, all partitions block devices, |
---|
| 525 | + * When tracing the whole disk reuse the existing debugfs directory |
---|
| 526 | + * created by the block layer on init. For partitions block devices, |
---|
529 | 527 | * and scsi-generic block devices we create a temporary new debugfs |
---|
530 | 528 | * directory that will be removed once the trace ends. |
---|
531 | 529 | */ |
---|
532 | | - if (q->mq_ops && bdev && bdev == bdev->bd_contains) |
---|
| 530 | + if (bdev && !bdev_is_partition(bdev)) |
---|
533 | 531 | dir = q->debugfs_dir; |
---|
534 | 532 | else |
---|
535 | | -#endif |
---|
536 | 533 | bt->dir = dir = debugfs_create_dir(buts->name, blk_debugfs_root); |
---|
537 | | - if (!dir) |
---|
538 | | - goto err; |
---|
539 | 534 | |
---|
540 | 535 | /* |
---|
541 | 536 | * As blktrace relies on debugfs for its interface the debugfs directory |
---|
.. | .. |
---|
556 | 551 | ret = -EIO; |
---|
557 | 552 | bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt, |
---|
558 | 553 | &blk_dropped_fops); |
---|
559 | | - if (!bt->dropped_file) |
---|
560 | | - goto err; |
---|
561 | 554 | |
---|
562 | 555 | bt->msg_file = debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops); |
---|
563 | | - if (!bt->msg_file) |
---|
564 | | - goto err; |
---|
565 | 556 | |
---|
566 | 557 | bt->rchan = relay_open("trace", dir, buts->buf_size, |
---|
567 | 558 | buts->buf_nr, &blk_relay_callbacks, bt); |
---|
.. | .. |
---|
583 | 574 | bt->pid = buts->pid; |
---|
584 | 575 | bt->trace_state = Blktrace_setup; |
---|
585 | 576 | |
---|
586 | | - ret = -EBUSY; |
---|
587 | | - if (cmpxchg(&q->blk_trace, NULL, bt)) |
---|
588 | | - goto err; |
---|
589 | | - |
---|
| 577 | + rcu_assign_pointer(q->blk_trace, bt); |
---|
590 | 578 | get_probe_ref(); |
---|
591 | 579 | |
---|
592 | 580 | ret = 0; |
---|
.. | .. |
---|
623 | 611 | { |
---|
624 | 612 | int ret; |
---|
625 | 613 | |
---|
626 | | - mutex_lock(&q->blk_trace_mutex); |
---|
| 614 | + mutex_lock(&q->debugfs_mutex); |
---|
627 | 615 | ret = __blk_trace_setup(q, name, dev, bdev, arg); |
---|
628 | | - mutex_unlock(&q->blk_trace_mutex); |
---|
| 616 | + mutex_unlock(&q->debugfs_mutex); |
---|
629 | 617 | |
---|
630 | 618 | return ret; |
---|
631 | 619 | } |
---|
.. | .. |
---|
671 | 659 | struct blk_trace *bt; |
---|
672 | 660 | |
---|
673 | 661 | bt = rcu_dereference_protected(q->blk_trace, |
---|
674 | | - lockdep_is_held(&q->blk_trace_mutex)); |
---|
| 662 | + lockdep_is_held(&q->debugfs_mutex)); |
---|
675 | 663 | if (bt == NULL) |
---|
676 | 664 | return -EINVAL; |
---|
677 | 665 | |
---|
.. | .. |
---|
711 | 699 | { |
---|
712 | 700 | int ret; |
---|
713 | 701 | |
---|
714 | | - mutex_lock(&q->blk_trace_mutex); |
---|
| 702 | + mutex_lock(&q->debugfs_mutex); |
---|
715 | 703 | ret = __blk_trace_startstop(q, start); |
---|
716 | | - mutex_unlock(&q->blk_trace_mutex); |
---|
| 704 | + mutex_unlock(&q->debugfs_mutex); |
---|
717 | 705 | |
---|
718 | 706 | return ret; |
---|
719 | 707 | } |
---|
.. | .. |
---|
742 | 730 | if (!q) |
---|
743 | 731 | return -ENXIO; |
---|
744 | 732 | |
---|
745 | | - mutex_lock(&q->blk_trace_mutex); |
---|
| 733 | + mutex_lock(&q->debugfs_mutex); |
---|
746 | 734 | |
---|
747 | 735 | switch (cmd) { |
---|
748 | 736 | case BLKTRACESETUP: |
---|
.. | .. |
---|
757 | 745 | #endif |
---|
758 | 746 | case BLKTRACESTART: |
---|
759 | 747 | start = 1; |
---|
| 748 | + fallthrough; |
---|
760 | 749 | case BLKTRACESTOP: |
---|
761 | 750 | ret = __blk_trace_startstop(q, start); |
---|
762 | 751 | break; |
---|
.. | .. |
---|
768 | 757 | break; |
---|
769 | 758 | } |
---|
770 | 759 | |
---|
771 | | - mutex_unlock(&q->blk_trace_mutex); |
---|
| 760 | + mutex_unlock(&q->debugfs_mutex); |
---|
772 | 761 | return ret; |
---|
773 | 762 | } |
---|
774 | 763 | |
---|
.. | .. |
---|
779 | 768 | **/ |
---|
780 | 769 | void blk_trace_shutdown(struct request_queue *q) |
---|
781 | 770 | { |
---|
782 | | - mutex_lock(&q->blk_trace_mutex); |
---|
| 771 | + mutex_lock(&q->debugfs_mutex); |
---|
783 | 772 | if (rcu_dereference_protected(q->blk_trace, |
---|
784 | | - lockdep_is_held(&q->blk_trace_mutex))) { |
---|
| 773 | + lockdep_is_held(&q->debugfs_mutex))) { |
---|
785 | 774 | __blk_trace_startstop(q, 0); |
---|
786 | 775 | __blk_trace_remove(q); |
---|
787 | 776 | } |
---|
788 | 777 | |
---|
789 | | - mutex_unlock(&q->blk_trace_mutex); |
---|
| 778 | + mutex_unlock(&q->debugfs_mutex); |
---|
790 | 779 | } |
---|
791 | 780 | |
---|
792 | 781 | #ifdef CONFIG_BLK_CGROUP |
---|
793 | | -static union kernfs_node_id * |
---|
794 | | -blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) |
---|
| 782 | +static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) |
---|
795 | 783 | { |
---|
796 | 784 | struct blk_trace *bt; |
---|
797 | 785 | |
---|
798 | 786 | /* We don't use the 'bt' value here except as an optimization... */ |
---|
799 | 787 | bt = rcu_dereference_protected(q->blk_trace, 1); |
---|
800 | 788 | if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP)) |
---|
801 | | - return NULL; |
---|
| 789 | + return 0; |
---|
802 | 790 | |
---|
803 | | - if (!bio->bi_css) |
---|
804 | | - return NULL; |
---|
805 | | - return cgroup_get_kernfs_id(bio->bi_css->cgroup); |
---|
| 791 | + if (!bio->bi_blkg) |
---|
| 792 | + return 0; |
---|
| 793 | + return cgroup_id(bio_blkcg(bio)->css.cgroup); |
---|
806 | 794 | } |
---|
807 | 795 | #else |
---|
808 | | -static union kernfs_node_id * |
---|
809 | | -blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) |
---|
| 796 | +static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) |
---|
810 | 797 | { |
---|
811 | | - return NULL; |
---|
| 798 | + return 0; |
---|
812 | 799 | } |
---|
813 | 800 | #endif |
---|
814 | 801 | |
---|
815 | | -static union kernfs_node_id * |
---|
| 802 | +static u64 |
---|
816 | 803 | blk_trace_request_get_cgid(struct request_queue *q, struct request *rq) |
---|
817 | 804 | { |
---|
818 | 805 | if (!rq->bio) |
---|
819 | | - return NULL; |
---|
| 806 | + return 0; |
---|
820 | 807 | /* Use the first bio */ |
---|
821 | 808 | return blk_trace_bio_get_cgid(q, rq->bio); |
---|
822 | 809 | } |
---|
.. | .. |
---|
838 | 825 | * |
---|
839 | 826 | **/ |
---|
840 | 827 | static void blk_add_trace_rq(struct request *rq, int error, |
---|
841 | | - unsigned int nr_bytes, u32 what, |
---|
842 | | - union kernfs_node_id *cgid) |
---|
| 828 | + unsigned int nr_bytes, u32 what, u64 cgid) |
---|
843 | 829 | { |
---|
844 | 830 | struct blk_trace *bt; |
---|
845 | 831 | |
---|
.. | .. |
---|
871 | 857 | struct request_queue *q, struct request *rq) |
---|
872 | 858 | { |
---|
873 | 859 | blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_ISSUE, |
---|
| 860 | + blk_trace_request_get_cgid(q, rq)); |
---|
| 861 | +} |
---|
| 862 | + |
---|
| 863 | +static void blk_add_trace_rq_merge(void *ignore, |
---|
| 864 | + struct request_queue *q, struct request *rq) |
---|
| 865 | +{ |
---|
| 866 | + blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_BACKMERGE, |
---|
874 | 867 | blk_trace_request_get_cgid(q, rq)); |
---|
875 | 868 | } |
---|
876 | 869 | |
---|
.. | .. |
---|
925 | 918 | } |
---|
926 | 919 | |
---|
927 | 920 | static void blk_add_trace_bio_complete(void *ignore, |
---|
928 | | - struct request_queue *q, struct bio *bio, |
---|
929 | | - int error) |
---|
| 921 | + struct request_queue *q, struct bio *bio) |
---|
930 | 922 | { |
---|
931 | | - blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error); |
---|
| 923 | + blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, |
---|
| 924 | + blk_status_to_errno(bio->bi_status)); |
---|
932 | 925 | } |
---|
933 | 926 | |
---|
934 | 927 | static void blk_add_trace_bio_backmerge(void *ignore, |
---|
.. | .. |
---|
966 | 959 | bt = rcu_dereference(q->blk_trace); |
---|
967 | 960 | if (bt) |
---|
968 | 961 | __blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_GETRQ, 0, 0, |
---|
969 | | - NULL, NULL); |
---|
| 962 | + NULL, 0); |
---|
970 | 963 | rcu_read_unlock(); |
---|
971 | 964 | } |
---|
972 | 965 | } |
---|
.. | .. |
---|
985 | 978 | bt = rcu_dereference(q->blk_trace); |
---|
986 | 979 | if (bt) |
---|
987 | 980 | __blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_SLEEPRQ, |
---|
988 | | - 0, 0, NULL, NULL); |
---|
| 981 | + 0, 0, NULL, 0); |
---|
989 | 982 | rcu_read_unlock(); |
---|
990 | 983 | } |
---|
991 | 984 | } |
---|
.. | .. |
---|
997 | 990 | rcu_read_lock(); |
---|
998 | 991 | bt = rcu_dereference(q->blk_trace); |
---|
999 | 992 | if (bt) |
---|
1000 | | - __blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, NULL); |
---|
| 993 | + __blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, 0); |
---|
1001 | 994 | rcu_read_unlock(); |
---|
1002 | 995 | } |
---|
1003 | 996 | |
---|
.. | .. |
---|
1017 | 1010 | else |
---|
1018 | 1011 | what = BLK_TA_UNPLUG_TIMER; |
---|
1019 | 1012 | |
---|
1020 | | - __blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, NULL); |
---|
| 1013 | + __blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, 0); |
---|
1021 | 1014 | } |
---|
1022 | 1015 | rcu_read_unlock(); |
---|
1023 | 1016 | } |
---|
.. | .. |
---|
1158 | 1151 | WARN_ON(ret); |
---|
1159 | 1152 | ret = register_trace_block_rq_issue(blk_add_trace_rq_issue, NULL); |
---|
1160 | 1153 | WARN_ON(ret); |
---|
| 1154 | + ret = register_trace_block_rq_merge(blk_add_trace_rq_merge, NULL); |
---|
| 1155 | + WARN_ON(ret); |
---|
1161 | 1156 | ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL); |
---|
1162 | 1157 | WARN_ON(ret); |
---|
1163 | 1158 | ret = register_trace_block_rq_complete(blk_add_trace_rq_complete, NULL); |
---|
.. | .. |
---|
1204 | 1199 | unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce, NULL); |
---|
1205 | 1200 | unregister_trace_block_rq_complete(blk_add_trace_rq_complete, NULL); |
---|
1206 | 1201 | unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL); |
---|
| 1202 | + unregister_trace_block_rq_merge(blk_add_trace_rq_merge, NULL); |
---|
1207 | 1203 | unregister_trace_block_rq_issue(blk_add_trace_rq_issue, NULL); |
---|
1208 | 1204 | unregister_trace_block_rq_insert(blk_add_trace_rq_insert, NULL); |
---|
1209 | 1205 | |
---|
.. | .. |
---|
1256 | 1252 | |
---|
1257 | 1253 | static inline const void *pdu_start(const struct trace_entry *ent, bool has_cg) |
---|
1258 | 1254 | { |
---|
1259 | | - return (void *)(te_blk_io_trace(ent) + 1) + |
---|
1260 | | - (has_cg ? sizeof(union kernfs_node_id) : 0); |
---|
| 1255 | + return (void *)(te_blk_io_trace(ent) + 1) + (has_cg ? sizeof(u64) : 0); |
---|
1261 | 1256 | } |
---|
1262 | 1257 | |
---|
1263 | | -static inline const void *cgid_start(const struct trace_entry *ent) |
---|
| 1258 | +static inline u64 t_cgid(const struct trace_entry *ent) |
---|
1264 | 1259 | { |
---|
1265 | | - return (void *)(te_blk_io_trace(ent) + 1); |
---|
| 1260 | + return *(u64 *)(te_blk_io_trace(ent) + 1); |
---|
1266 | 1261 | } |
---|
1267 | 1262 | |
---|
1268 | 1263 | static inline int pdu_real_len(const struct trace_entry *ent, bool has_cg) |
---|
1269 | 1264 | { |
---|
1270 | | - return te_blk_io_trace(ent)->pdu_len - |
---|
1271 | | - (has_cg ? sizeof(union kernfs_node_id) : 0); |
---|
| 1265 | + return te_blk_io_trace(ent)->pdu_len - (has_cg ? sizeof(u64) : 0); |
---|
1272 | 1266 | } |
---|
1273 | 1267 | |
---|
1274 | 1268 | static inline u32 t_action(const struct trace_entry *ent) |
---|
.. | .. |
---|
1330 | 1324 | |
---|
1331 | 1325 | fill_rwbs(rwbs, t); |
---|
1332 | 1326 | if (has_cg) { |
---|
1333 | | - const union kernfs_node_id *id = cgid_start(iter->ent); |
---|
| 1327 | + u64 id = t_cgid(iter->ent); |
---|
1334 | 1328 | |
---|
1335 | 1329 | if (blk_tracer_flags.val & TRACE_BLK_OPT_CGNAME) { |
---|
1336 | 1330 | char blkcg_name_buf[NAME_MAX + 1] = "<...>"; |
---|
.. | .. |
---|
1340 | 1334 | trace_seq_printf(&iter->seq, "%3d,%-3d %s %2s %3s ", |
---|
1341 | 1335 | MAJOR(t->device), MINOR(t->device), |
---|
1342 | 1336 | blkcg_name_buf, act, rwbs); |
---|
1343 | | - } else |
---|
| 1337 | + } else { |
---|
| 1338 | + /* |
---|
| 1339 | + * The cgid portion used to be "INO,GEN". Userland |
---|
| 1340 | + * builds a FILEID_INO32_GEN fid out of them and |
---|
| 1341 | + * opens the cgroup using open_by_handle_at(2). |
---|
| 1342 | + * While 32bit ino setups are still the same, 64bit |
---|
| 1343 | + * ones now use the 64bit ino as the whole ID and |
---|
| 1344 | + * no longer use generation. |
---|
| 1345 | + * |
---|
| 1346 | + * Regarldess of the content, always output |
---|
| 1347 | + * "LOW32,HIGH32" so that FILEID_INO32_GEN fid can |
---|
| 1348 | + * be mapped back to @id on both 64 and 32bit ino |
---|
| 1349 | + * setups. See __kernfs_fh_to_dentry(). |
---|
| 1350 | + */ |
---|
1344 | 1351 | trace_seq_printf(&iter->seq, |
---|
1345 | | - "%3d,%-3d %x,%-x %2s %3s ", |
---|
| 1352 | + "%3d,%-3d %llx,%-llx %2s %3s ", |
---|
1346 | 1353 | MAJOR(t->device), MINOR(t->device), |
---|
1347 | | - id->ino, id->generation, act, rwbs); |
---|
| 1354 | + id & U32_MAX, id >> 32, act, rwbs); |
---|
| 1355 | + } |
---|
1348 | 1356 | } else |
---|
1349 | 1357 | trace_seq_printf(&iter->seq, "%3d,%-3d %2s %3s ", |
---|
1350 | 1358 | MAJOR(t->device), MINOR(t->device), act, rwbs); |
---|
.. | .. |
---|
1594 | 1602 | |
---|
1595 | 1603 | static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter) |
---|
1596 | 1604 | { |
---|
1597 | | - if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) |
---|
| 1605 | + if ((iter->ent->type != TRACE_BLK) || |
---|
| 1606 | + !(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) |
---|
1598 | 1607 | return TRACE_TYPE_UNHANDLED; |
---|
1599 | 1608 | |
---|
1600 | 1609 | return print_one_line(iter, true); |
---|
.. | .. |
---|
1657 | 1666 | { |
---|
1658 | 1667 | struct blk_trace *bt; |
---|
1659 | 1668 | |
---|
1660 | | - bt = xchg(&q->blk_trace, NULL); |
---|
| 1669 | + bt = rcu_replace_pointer(q->blk_trace, NULL, |
---|
| 1670 | + lockdep_is_held(&q->debugfs_mutex)); |
---|
1661 | 1671 | if (bt == NULL) |
---|
1662 | 1672 | return -EINVAL; |
---|
1663 | 1673 | |
---|
.. | .. |
---|
1697 | 1707 | |
---|
1698 | 1708 | blk_trace_setup_lba(bt, bdev); |
---|
1699 | 1709 | |
---|
1700 | | - ret = -EBUSY; |
---|
1701 | | - if (cmpxchg(&q->blk_trace, NULL, bt)) |
---|
1702 | | - goto free_bt; |
---|
1703 | | - |
---|
| 1710 | + rcu_assign_pointer(q->blk_trace, bt); |
---|
1704 | 1711 | get_probe_ref(); |
---|
1705 | 1712 | return 0; |
---|
1706 | 1713 | |
---|
.. | .. |
---|
1829 | 1836 | struct device_attribute *attr, |
---|
1830 | 1837 | char *buf) |
---|
1831 | 1838 | { |
---|
1832 | | - struct hd_struct *p = dev_to_part(dev); |
---|
| 1839 | + struct block_device *bdev = bdget_part(dev_to_part(dev)); |
---|
1833 | 1840 | struct request_queue *q; |
---|
1834 | | - struct block_device *bdev; |
---|
1835 | 1841 | struct blk_trace *bt; |
---|
1836 | 1842 | ssize_t ret = -ENXIO; |
---|
1837 | 1843 | |
---|
1838 | | - bdev = bdget(part_devt(p)); |
---|
1839 | 1844 | if (bdev == NULL) |
---|
1840 | 1845 | goto out; |
---|
1841 | 1846 | |
---|
.. | .. |
---|
1843 | 1848 | if (q == NULL) |
---|
1844 | 1849 | goto out_bdput; |
---|
1845 | 1850 | |
---|
1846 | | - mutex_lock(&q->blk_trace_mutex); |
---|
| 1851 | + mutex_lock(&q->debugfs_mutex); |
---|
1847 | 1852 | |
---|
1848 | 1853 | bt = rcu_dereference_protected(q->blk_trace, |
---|
1849 | | - lockdep_is_held(&q->blk_trace_mutex)); |
---|
| 1854 | + lockdep_is_held(&q->debugfs_mutex)); |
---|
1850 | 1855 | if (attr == &dev_attr_enable) { |
---|
1851 | 1856 | ret = sprintf(buf, "%u\n", !!bt); |
---|
1852 | 1857 | goto out_unlock_bdev; |
---|
.. | .. |
---|
1864 | 1869 | ret = sprintf(buf, "%llu\n", bt->end_lba); |
---|
1865 | 1870 | |
---|
1866 | 1871 | out_unlock_bdev: |
---|
1867 | | - mutex_unlock(&q->blk_trace_mutex); |
---|
| 1872 | + mutex_unlock(&q->debugfs_mutex); |
---|
1868 | 1873 | out_bdput: |
---|
1869 | 1874 | bdput(bdev); |
---|
1870 | 1875 | out: |
---|
.. | .. |
---|
1877 | 1882 | { |
---|
1878 | 1883 | struct block_device *bdev; |
---|
1879 | 1884 | struct request_queue *q; |
---|
1880 | | - struct hd_struct *p; |
---|
1881 | 1885 | struct blk_trace *bt; |
---|
1882 | 1886 | u64 value; |
---|
1883 | 1887 | ssize_t ret = -EINVAL; |
---|
.. | .. |
---|
1897 | 1901 | goto out; |
---|
1898 | 1902 | |
---|
1899 | 1903 | ret = -ENXIO; |
---|
1900 | | - |
---|
1901 | | - p = dev_to_part(dev); |
---|
1902 | | - bdev = bdget(part_devt(p)); |
---|
| 1904 | + bdev = bdget_part(dev_to_part(dev)); |
---|
1903 | 1905 | if (bdev == NULL) |
---|
1904 | 1906 | goto out; |
---|
1905 | 1907 | |
---|
.. | .. |
---|
1907 | 1909 | if (q == NULL) |
---|
1908 | 1910 | goto out_bdput; |
---|
1909 | 1911 | |
---|
1910 | | - mutex_lock(&q->blk_trace_mutex); |
---|
| 1912 | + mutex_lock(&q->debugfs_mutex); |
---|
1911 | 1913 | |
---|
1912 | 1914 | bt = rcu_dereference_protected(q->blk_trace, |
---|
1913 | | - lockdep_is_held(&q->blk_trace_mutex)); |
---|
| 1915 | + lockdep_is_held(&q->debugfs_mutex)); |
---|
1914 | 1916 | if (attr == &dev_attr_enable) { |
---|
1915 | 1917 | if (!!value == !!bt) { |
---|
1916 | 1918 | ret = 0; |
---|
.. | .. |
---|
1927 | 1929 | if (bt == NULL) { |
---|
1928 | 1930 | ret = blk_trace_setup_queue(q, bdev); |
---|
1929 | 1931 | bt = rcu_dereference_protected(q->blk_trace, |
---|
1930 | | - lockdep_is_held(&q->blk_trace_mutex)); |
---|
| 1932 | + lockdep_is_held(&q->debugfs_mutex)); |
---|
1931 | 1933 | } |
---|
1932 | 1934 | |
---|
1933 | 1935 | if (ret == 0) { |
---|
.. | .. |
---|
1942 | 1944 | } |
---|
1943 | 1945 | |
---|
1944 | 1946 | out_unlock_bdev: |
---|
1945 | | - mutex_unlock(&q->blk_trace_mutex); |
---|
| 1947 | + mutex_unlock(&q->debugfs_mutex); |
---|
1946 | 1948 | out_bdput: |
---|
1947 | 1949 | bdput(bdev); |
---|
1948 | 1950 | out: |
---|