.. | .. |
---|
67 | 67 | * Send out a notify message. |
---|
68 | 68 | */ |
---|
69 | 69 | static void trace_note(struct blk_trace *bt, pid_t pid, int action, |
---|
70 | | - const void *data, size_t len, |
---|
71 | | - union kernfs_node_id *cgid) |
---|
| 70 | + const void *data, size_t len, u64 cgid) |
---|
72 | 71 | { |
---|
73 | 72 | struct blk_io_trace *t; |
---|
74 | 73 | struct ring_buffer_event *event = NULL; |
---|
75 | | - struct ring_buffer *buffer = NULL; |
---|
76 | | - int pc = 0; |
---|
| 74 | + struct trace_buffer *buffer = NULL; |
---|
| 75 | + unsigned int trace_ctx = 0; |
---|
77 | 76 | int cpu = smp_processor_id(); |
---|
78 | 77 | bool blk_tracer = blk_tracer_enabled; |
---|
79 | | - ssize_t cgid_len = cgid ? sizeof(*cgid) : 0; |
---|
| 78 | + ssize_t cgid_len = cgid ? sizeof(cgid) : 0; |
---|
80 | 79 | |
---|
81 | 80 | if (blk_tracer) { |
---|
82 | | - buffer = blk_tr->trace_buffer.buffer; |
---|
83 | | - pc = preempt_count(); |
---|
| 81 | + buffer = blk_tr->array_buffer.buffer; |
---|
| 82 | + trace_ctx = tracing_gen_ctx_flags(0); |
---|
84 | 83 | event = trace_buffer_lock_reserve(buffer, TRACE_BLK, |
---|
85 | 84 | sizeof(*t) + len + cgid_len, |
---|
86 | | - 0, pc); |
---|
| 85 | + trace_ctx); |
---|
87 | 86 | if (!event) |
---|
88 | 87 | return; |
---|
89 | 88 | t = ring_buffer_event_data(event); |
---|
.. | .. |
---|
103 | 102 | t->pid = pid; |
---|
104 | 103 | t->cpu = cpu; |
---|
105 | 104 | t->pdu_len = len + cgid_len; |
---|
106 | | - if (cgid) |
---|
107 | | - memcpy((void *)t + sizeof(*t), cgid, cgid_len); |
---|
| 105 | + if (cgid_len) |
---|
| 106 | + memcpy((void *)t + sizeof(*t), &cgid, cgid_len); |
---|
108 | 107 | memcpy((void *) t + sizeof(*t) + cgid_len, data, len); |
---|
109 | 108 | |
---|
110 | 109 | if (blk_tracer) |
---|
111 | | - trace_buffer_unlock_commit(blk_tr, buffer, event, 0, pc); |
---|
| 110 | + trace_buffer_unlock_commit(blk_tr, buffer, event, trace_ctx); |
---|
112 | 111 | } |
---|
113 | 112 | } |
---|
114 | 113 | |
---|
.. | .. |
---|
125 | 124 | spin_lock_irqsave(&running_trace_lock, flags); |
---|
126 | 125 | list_for_each_entry(bt, &running_trace_list, running_list) { |
---|
127 | 126 | trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm, |
---|
128 | | - sizeof(tsk->comm), NULL); |
---|
| 127 | + sizeof(tsk->comm), 0); |
---|
129 | 128 | } |
---|
130 | 129 | spin_unlock_irqrestore(&running_trace_lock, flags); |
---|
131 | 130 | } |
---|
.. | .. |
---|
142 | 141 | words[1] = now.tv_nsec; |
---|
143 | 142 | |
---|
144 | 143 | local_irq_save(flags); |
---|
145 | | - trace_note(bt, 0, BLK_TN_TIMESTAMP, words, sizeof(words), NULL); |
---|
| 144 | + trace_note(bt, 0, BLK_TN_TIMESTAMP, words, sizeof(words), 0); |
---|
146 | 145 | local_irq_restore(flags); |
---|
147 | 146 | } |
---|
148 | 147 | |
---|
.. | .. |
---|
174 | 173 | if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP)) |
---|
175 | 174 | blkcg = NULL; |
---|
176 | 175 | #ifdef CONFIG_BLK_CGROUP |
---|
177 | | - trace_note(bt, 0, BLK_TN_MESSAGE, buf, n, |
---|
178 | | - blkcg ? cgroup_get_kernfs_id(blkcg->css.cgroup) : NULL); |
---|
| 176 | + trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n, |
---|
| 177 | + blkcg ? cgroup_id(blkcg->css.cgroup) : 1); |
---|
179 | 178 | #else |
---|
180 | | - trace_note(bt, 0, BLK_TN_MESSAGE, buf, n, NULL); |
---|
| 179 | + trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n, 0); |
---|
181 | 180 | #endif |
---|
182 | 181 | local_irq_restore(flags); |
---|
183 | 182 | } |
---|
.. | .. |
---|
215 | 214 | */ |
---|
216 | 215 | static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, |
---|
217 | 216 | int op, int op_flags, u32 what, int error, int pdu_len, |
---|
218 | | - void *pdu_data, union kernfs_node_id *cgid) |
---|
| 217 | + void *pdu_data, u64 cgid) |
---|
219 | 218 | { |
---|
220 | 219 | struct task_struct *tsk = current; |
---|
221 | 220 | struct ring_buffer_event *event = NULL; |
---|
222 | | - struct ring_buffer *buffer = NULL; |
---|
| 221 | + struct trace_buffer *buffer = NULL; |
---|
223 | 222 | struct blk_io_trace *t; |
---|
224 | 223 | unsigned long flags = 0; |
---|
225 | 224 | unsigned long *sequence; |
---|
| 225 | + unsigned int trace_ctx = 0; |
---|
226 | 226 | pid_t pid; |
---|
227 | | - int cpu, pc = 0; |
---|
| 227 | + int cpu; |
---|
228 | 228 | bool blk_tracer = blk_tracer_enabled; |
---|
229 | | - ssize_t cgid_len = cgid ? sizeof(*cgid) : 0; |
---|
| 229 | + ssize_t cgid_len = cgid ? sizeof(cgid) : 0; |
---|
230 | 230 | |
---|
231 | 231 | if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer)) |
---|
232 | 232 | return; |
---|
.. | .. |
---|
252 | 252 | if (blk_tracer) { |
---|
253 | 253 | tracing_record_cmdline(current); |
---|
254 | 254 | |
---|
255 | | - buffer = blk_tr->trace_buffer.buffer; |
---|
256 | | - pc = preempt_count(); |
---|
| 255 | + buffer = blk_tr->array_buffer.buffer; |
---|
| 256 | + trace_ctx = tracing_gen_ctx_flags(0); |
---|
257 | 257 | event = trace_buffer_lock_reserve(buffer, TRACE_BLK, |
---|
258 | 258 | sizeof(*t) + pdu_len + cgid_len, |
---|
259 | | - 0, pc); |
---|
| 259 | + trace_ctx); |
---|
260 | 260 | if (!event) |
---|
261 | 261 | return; |
---|
262 | 262 | t = ring_buffer_event_data(event); |
---|
.. | .. |
---|
297 | 297 | t->pdu_len = pdu_len + cgid_len; |
---|
298 | 298 | |
---|
299 | 299 | if (cgid_len) |
---|
300 | | - memcpy((void *)t + sizeof(*t), cgid, cgid_len); |
---|
| 300 | + memcpy((void *)t + sizeof(*t), &cgid, cgid_len); |
---|
301 | 301 | if (pdu_len) |
---|
302 | 302 | memcpy((void *)t + sizeof(*t) + cgid_len, pdu_data, pdu_len); |
---|
303 | 303 | |
---|
304 | 304 | if (blk_tracer) { |
---|
305 | | - trace_buffer_unlock_commit(blk_tr, buffer, event, 0, pc); |
---|
| 305 | + trace_buffer_unlock_commit(blk_tr, buffer, event, trace_ctx); |
---|
306 | 306 | return; |
---|
307 | 307 | } |
---|
308 | 308 | } |
---|
.. | .. |
---|
348 | 348 | { |
---|
349 | 349 | struct blk_trace *bt; |
---|
350 | 350 | |
---|
351 | | - bt = xchg(&q->blk_trace, NULL); |
---|
| 351 | + bt = rcu_replace_pointer(q->blk_trace, NULL, |
---|
| 352 | + lockdep_is_held(&q->debugfs_mutex)); |
---|
352 | 353 | if (!bt) |
---|
353 | 354 | return -EINVAL; |
---|
354 | 355 | |
---|
.. | .. |
---|
362 | 363 | { |
---|
363 | 364 | int ret; |
---|
364 | 365 | |
---|
365 | | - mutex_lock(&q->blk_trace_mutex); |
---|
| 366 | + mutex_lock(&q->debugfs_mutex); |
---|
366 | 367 | ret = __blk_trace_remove(q); |
---|
367 | | - mutex_unlock(&q->blk_trace_mutex); |
---|
| 368 | + mutex_unlock(&q->debugfs_mutex); |
---|
368 | 369 | |
---|
369 | 370 | return ret; |
---|
370 | 371 | } |
---|
.. | .. |
---|
483 | 484 | struct dentry *dir = NULL; |
---|
484 | 485 | int ret; |
---|
485 | 486 | |
---|
| 487 | + lockdep_assert_held(&q->debugfs_mutex); |
---|
| 488 | + |
---|
486 | 489 | if (!buts->buf_size || !buts->buf_nr) |
---|
487 | 490 | return -EINVAL; |
---|
488 | | - |
---|
489 | | - if (!blk_debugfs_root) |
---|
490 | | - return -ENOENT; |
---|
491 | 491 | |
---|
492 | 492 | strncpy(buts->name, name, BLKTRACE_BDEV_SIZE); |
---|
493 | 493 | buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0'; |
---|
.. | .. |
---|
502 | 502 | * bdev can be NULL, as with scsi-generic, this is a helpful as |
---|
503 | 503 | * we can be. |
---|
504 | 504 | */ |
---|
505 | | - if (q->blk_trace) { |
---|
| 505 | + if (rcu_dereference_protected(q->blk_trace, |
---|
| 506 | + lockdep_is_held(&q->debugfs_mutex))) { |
---|
506 | 507 | pr_warn("Concurrent blktraces are not allowed on %s\n", |
---|
507 | 508 | buts->name); |
---|
508 | 509 | return -EBUSY; |
---|
.. | .. |
---|
521 | 522 | if (!bt->msg_data) |
---|
522 | 523 | goto err; |
---|
523 | 524 | |
---|
524 | | -#ifdef CONFIG_BLK_DEBUG_FS |
---|
525 | 525 | /* |
---|
526 | | - * When tracing whole make_request drivers (multiqueue) block devices, |
---|
527 | | - * reuse the existing debugfs directory created by the block layer on |
---|
528 | | - * init. For request-based block devices, all partitions block devices, |
---|
| 526 | + * When tracing the whole disk reuse the existing debugfs directory |
---|
| 527 | + * created by the block layer on init. For partitions block devices, |
---|
529 | 528 | * and scsi-generic block devices we create a temporary new debugfs |
---|
530 | 529 | * directory that will be removed once the trace ends. |
---|
531 | 530 | */ |
---|
532 | | - if (q->mq_ops && bdev && bdev == bdev->bd_contains) |
---|
| 531 | + if (bdev && !bdev_is_partition(bdev)) |
---|
533 | 532 | dir = q->debugfs_dir; |
---|
534 | 533 | else |
---|
535 | | -#endif |
---|
536 | 534 | bt->dir = dir = debugfs_create_dir(buts->name, blk_debugfs_root); |
---|
537 | | - if (!dir) |
---|
538 | | - goto err; |
---|
539 | 535 | |
---|
540 | 536 | /* |
---|
541 | 537 | * As blktrace relies on debugfs for its interface the debugfs directory |
---|
.. | .. |
---|
556 | 552 | ret = -EIO; |
---|
557 | 553 | bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt, |
---|
558 | 554 | &blk_dropped_fops); |
---|
559 | | - if (!bt->dropped_file) |
---|
560 | | - goto err; |
---|
561 | 555 | |
---|
562 | 556 | bt->msg_file = debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops); |
---|
563 | | - if (!bt->msg_file) |
---|
564 | | - goto err; |
---|
565 | 557 | |
---|
566 | 558 | bt->rchan = relay_open("trace", dir, buts->buf_size, |
---|
567 | 559 | buts->buf_nr, &blk_relay_callbacks, bt); |
---|
.. | .. |
---|
583 | 575 | bt->pid = buts->pid; |
---|
584 | 576 | bt->trace_state = Blktrace_setup; |
---|
585 | 577 | |
---|
586 | | - ret = -EBUSY; |
---|
587 | | - if (cmpxchg(&q->blk_trace, NULL, bt)) |
---|
588 | | - goto err; |
---|
589 | | - |
---|
| 578 | + rcu_assign_pointer(q->blk_trace, bt); |
---|
590 | 579 | get_probe_ref(); |
---|
591 | 580 | |
---|
592 | 581 | ret = 0; |
---|
.. | .. |
---|
623 | 612 | { |
---|
624 | 613 | int ret; |
---|
625 | 614 | |
---|
626 | | - mutex_lock(&q->blk_trace_mutex); |
---|
| 615 | + mutex_lock(&q->debugfs_mutex); |
---|
627 | 616 | ret = __blk_trace_setup(q, name, dev, bdev, arg); |
---|
628 | | - mutex_unlock(&q->blk_trace_mutex); |
---|
| 617 | + mutex_unlock(&q->debugfs_mutex); |
---|
629 | 618 | |
---|
630 | 619 | return ret; |
---|
631 | 620 | } |
---|
.. | .. |
---|
671 | 660 | struct blk_trace *bt; |
---|
672 | 661 | |
---|
673 | 662 | bt = rcu_dereference_protected(q->blk_trace, |
---|
674 | | - lockdep_is_held(&q->blk_trace_mutex)); |
---|
| 663 | + lockdep_is_held(&q->debugfs_mutex)); |
---|
675 | 664 | if (bt == NULL) |
---|
676 | 665 | return -EINVAL; |
---|
677 | 666 | |
---|
.. | .. |
---|
711 | 700 | { |
---|
712 | 701 | int ret; |
---|
713 | 702 | |
---|
714 | | - mutex_lock(&q->blk_trace_mutex); |
---|
| 703 | + mutex_lock(&q->debugfs_mutex); |
---|
715 | 704 | ret = __blk_trace_startstop(q, start); |
---|
716 | | - mutex_unlock(&q->blk_trace_mutex); |
---|
| 705 | + mutex_unlock(&q->debugfs_mutex); |
---|
717 | 706 | |
---|
718 | 707 | return ret; |
---|
719 | 708 | } |
---|
.. | .. |
---|
742 | 731 | if (!q) |
---|
743 | 732 | return -ENXIO; |
---|
744 | 733 | |
---|
745 | | - mutex_lock(&q->blk_trace_mutex); |
---|
| 734 | + mutex_lock(&q->debugfs_mutex); |
---|
746 | 735 | |
---|
747 | 736 | switch (cmd) { |
---|
748 | 737 | case BLKTRACESETUP: |
---|
.. | .. |
---|
757 | 746 | #endif |
---|
758 | 747 | case BLKTRACESTART: |
---|
759 | 748 | start = 1; |
---|
| 749 | + fallthrough; |
---|
760 | 750 | case BLKTRACESTOP: |
---|
761 | 751 | ret = __blk_trace_startstop(q, start); |
---|
762 | 752 | break; |
---|
.. | .. |
---|
768 | 758 | break; |
---|
769 | 759 | } |
---|
770 | 760 | |
---|
771 | | - mutex_unlock(&q->blk_trace_mutex); |
---|
| 761 | + mutex_unlock(&q->debugfs_mutex); |
---|
772 | 762 | return ret; |
---|
773 | 763 | } |
---|
774 | 764 | |
---|
.. | .. |
---|
779 | 769 | **/ |
---|
780 | 770 | void blk_trace_shutdown(struct request_queue *q) |
---|
781 | 771 | { |
---|
782 | | - mutex_lock(&q->blk_trace_mutex); |
---|
| 772 | + mutex_lock(&q->debugfs_mutex); |
---|
783 | 773 | if (rcu_dereference_protected(q->blk_trace, |
---|
784 | | - lockdep_is_held(&q->blk_trace_mutex))) { |
---|
| 774 | + lockdep_is_held(&q->debugfs_mutex))) { |
---|
785 | 775 | __blk_trace_startstop(q, 0); |
---|
786 | 776 | __blk_trace_remove(q); |
---|
787 | 777 | } |
---|
788 | 778 | |
---|
789 | | - mutex_unlock(&q->blk_trace_mutex); |
---|
| 779 | + mutex_unlock(&q->debugfs_mutex); |
---|
790 | 780 | } |
---|
791 | 781 | |
---|
792 | 782 | #ifdef CONFIG_BLK_CGROUP |
---|
793 | | -static union kernfs_node_id * |
---|
794 | | -blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) |
---|
| 783 | +static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) |
---|
795 | 784 | { |
---|
796 | 785 | struct blk_trace *bt; |
---|
797 | 786 | |
---|
798 | 787 | /* We don't use the 'bt' value here except as an optimization... */ |
---|
799 | 788 | bt = rcu_dereference_protected(q->blk_trace, 1); |
---|
800 | 789 | if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP)) |
---|
801 | | - return NULL; |
---|
| 790 | + return 0; |
---|
802 | 791 | |
---|
803 | | - if (!bio->bi_css) |
---|
804 | | - return NULL; |
---|
805 | | - return cgroup_get_kernfs_id(bio->bi_css->cgroup); |
---|
| 792 | + if (!bio->bi_blkg) |
---|
| 793 | + return 0; |
---|
| 794 | + return cgroup_id(bio_blkcg(bio)->css.cgroup); |
---|
806 | 795 | } |
---|
807 | 796 | #else |
---|
808 | | -static union kernfs_node_id * |
---|
809 | | -blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) |
---|
| 797 | +static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) |
---|
810 | 798 | { |
---|
811 | | - return NULL; |
---|
| 799 | + return 0; |
---|
812 | 800 | } |
---|
813 | 801 | #endif |
---|
814 | 802 | |
---|
815 | | -static union kernfs_node_id * |
---|
| 803 | +static u64 |
---|
816 | 804 | blk_trace_request_get_cgid(struct request_queue *q, struct request *rq) |
---|
817 | 805 | { |
---|
818 | 806 | if (!rq->bio) |
---|
819 | | - return NULL; |
---|
| 807 | + return 0; |
---|
820 | 808 | /* Use the first bio */ |
---|
821 | 809 | return blk_trace_bio_get_cgid(q, rq->bio); |
---|
822 | 810 | } |
---|
.. | .. |
---|
838 | 826 | * |
---|
839 | 827 | **/ |
---|
840 | 828 | static void blk_add_trace_rq(struct request *rq, int error, |
---|
841 | | - unsigned int nr_bytes, u32 what, |
---|
842 | | - union kernfs_node_id *cgid) |
---|
| 829 | + unsigned int nr_bytes, u32 what, u64 cgid) |
---|
843 | 830 | { |
---|
844 | 831 | struct blk_trace *bt; |
---|
845 | 832 | |
---|
.. | .. |
---|
871 | 858 | struct request_queue *q, struct request *rq) |
---|
872 | 859 | { |
---|
873 | 860 | blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_ISSUE, |
---|
| 861 | + blk_trace_request_get_cgid(q, rq)); |
---|
| 862 | +} |
---|
| 863 | + |
---|
| 864 | +static void blk_add_trace_rq_merge(void *ignore, |
---|
| 865 | + struct request_queue *q, struct request *rq) |
---|
| 866 | +{ |
---|
| 867 | + blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_BACKMERGE, |
---|
874 | 868 | blk_trace_request_get_cgid(q, rq)); |
---|
875 | 869 | } |
---|
876 | 870 | |
---|
.. | .. |
---|
925 | 919 | } |
---|
926 | 920 | |
---|
927 | 921 | static void blk_add_trace_bio_complete(void *ignore, |
---|
928 | | - struct request_queue *q, struct bio *bio, |
---|
929 | | - int error) |
---|
| 922 | + struct request_queue *q, struct bio *bio) |
---|
930 | 923 | { |
---|
931 | | - blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error); |
---|
| 924 | + blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, |
---|
| 925 | + blk_status_to_errno(bio->bi_status)); |
---|
932 | 926 | } |
---|
933 | 927 | |
---|
934 | 928 | static void blk_add_trace_bio_backmerge(void *ignore, |
---|
.. | .. |
---|
966 | 960 | bt = rcu_dereference(q->blk_trace); |
---|
967 | 961 | if (bt) |
---|
968 | 962 | __blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_GETRQ, 0, 0, |
---|
969 | | - NULL, NULL); |
---|
| 963 | + NULL, 0); |
---|
970 | 964 | rcu_read_unlock(); |
---|
971 | 965 | } |
---|
972 | 966 | } |
---|
.. | .. |
---|
985 | 979 | bt = rcu_dereference(q->blk_trace); |
---|
986 | 980 | if (bt) |
---|
987 | 981 | __blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_SLEEPRQ, |
---|
988 | | - 0, 0, NULL, NULL); |
---|
| 982 | + 0, 0, NULL, 0); |
---|
989 | 983 | rcu_read_unlock(); |
---|
990 | 984 | } |
---|
991 | 985 | } |
---|
.. | .. |
---|
997 | 991 | rcu_read_lock(); |
---|
998 | 992 | bt = rcu_dereference(q->blk_trace); |
---|
999 | 993 | if (bt) |
---|
1000 | | - __blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, NULL); |
---|
| 994 | + __blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, 0); |
---|
1001 | 995 | rcu_read_unlock(); |
---|
1002 | 996 | } |
---|
1003 | 997 | |
---|
.. | .. |
---|
1017 | 1011 | else |
---|
1018 | 1012 | what = BLK_TA_UNPLUG_TIMER; |
---|
1019 | 1013 | |
---|
1020 | | - __blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, NULL); |
---|
| 1014 | + __blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, 0); |
---|
1021 | 1015 | } |
---|
1022 | 1016 | rcu_read_unlock(); |
---|
1023 | 1017 | } |
---|
.. | .. |
---|
1158 | 1152 | WARN_ON(ret); |
---|
1159 | 1153 | ret = register_trace_block_rq_issue(blk_add_trace_rq_issue, NULL); |
---|
1160 | 1154 | WARN_ON(ret); |
---|
| 1155 | + ret = register_trace_block_rq_merge(blk_add_trace_rq_merge, NULL); |
---|
| 1156 | + WARN_ON(ret); |
---|
1161 | 1157 | ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL); |
---|
1162 | 1158 | WARN_ON(ret); |
---|
1163 | 1159 | ret = register_trace_block_rq_complete(blk_add_trace_rq_complete, NULL); |
---|
.. | .. |
---|
1204 | 1200 | unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce, NULL); |
---|
1205 | 1201 | unregister_trace_block_rq_complete(blk_add_trace_rq_complete, NULL); |
---|
1206 | 1202 | unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL); |
---|
| 1203 | + unregister_trace_block_rq_merge(blk_add_trace_rq_merge, NULL); |
---|
1207 | 1204 | unregister_trace_block_rq_issue(blk_add_trace_rq_issue, NULL); |
---|
1208 | 1205 | unregister_trace_block_rq_insert(blk_add_trace_rq_insert, NULL); |
---|
1209 | 1206 | |
---|
.. | .. |
---|
1256 | 1253 | |
---|
1257 | 1254 | static inline const void *pdu_start(const struct trace_entry *ent, bool has_cg) |
---|
1258 | 1255 | { |
---|
1259 | | - return (void *)(te_blk_io_trace(ent) + 1) + |
---|
1260 | | - (has_cg ? sizeof(union kernfs_node_id) : 0); |
---|
| 1256 | + return (void *)(te_blk_io_trace(ent) + 1) + (has_cg ? sizeof(u64) : 0); |
---|
1261 | 1257 | } |
---|
1262 | 1258 | |
---|
1263 | | -static inline const void *cgid_start(const struct trace_entry *ent) |
---|
| 1259 | +static inline u64 t_cgid(const struct trace_entry *ent) |
---|
1264 | 1260 | { |
---|
1265 | | - return (void *)(te_blk_io_trace(ent) + 1); |
---|
| 1261 | + return *(u64 *)(te_blk_io_trace(ent) + 1); |
---|
1266 | 1262 | } |
---|
1267 | 1263 | |
---|
1268 | 1264 | static inline int pdu_real_len(const struct trace_entry *ent, bool has_cg) |
---|
1269 | 1265 | { |
---|
1270 | | - return te_blk_io_trace(ent)->pdu_len - |
---|
1271 | | - (has_cg ? sizeof(union kernfs_node_id) : 0); |
---|
| 1266 | + return te_blk_io_trace(ent)->pdu_len - (has_cg ? sizeof(u64) : 0); |
---|
1272 | 1267 | } |
---|
1273 | 1268 | |
---|
1274 | 1269 | static inline u32 t_action(const struct trace_entry *ent) |
---|
.. | .. |
---|
1330 | 1325 | |
---|
1331 | 1326 | fill_rwbs(rwbs, t); |
---|
1332 | 1327 | if (has_cg) { |
---|
1333 | | - const union kernfs_node_id *id = cgid_start(iter->ent); |
---|
| 1328 | + u64 id = t_cgid(iter->ent); |
---|
1334 | 1329 | |
---|
1335 | 1330 | if (blk_tracer_flags.val & TRACE_BLK_OPT_CGNAME) { |
---|
1336 | 1331 | char blkcg_name_buf[NAME_MAX + 1] = "<...>"; |
---|
.. | .. |
---|
1340 | 1335 | trace_seq_printf(&iter->seq, "%3d,%-3d %s %2s %3s ", |
---|
1341 | 1336 | MAJOR(t->device), MINOR(t->device), |
---|
1342 | 1337 | blkcg_name_buf, act, rwbs); |
---|
1343 | | - } else |
---|
| 1338 | + } else { |
---|
| 1339 | + /* |
---|
| 1340 | + * The cgid portion used to be "INO,GEN". Userland |
---|
| 1341 | + * builds a FILEID_INO32_GEN fid out of them and |
---|
| 1342 | + * opens the cgroup using open_by_handle_at(2). |
---|
| 1343 | + * While 32bit ino setups are still the same, 64bit |
---|
| 1344 | + * ones now use the 64bit ino as the whole ID and |
---|
| 1345 | + * no longer use generation. |
---|
| 1346 | + * |
---|
| 1347 | + * Regarldess of the content, always output |
---|
| 1348 | + * "LOW32,HIGH32" so that FILEID_INO32_GEN fid can |
---|
| 1349 | + * be mapped back to @id on both 64 and 32bit ino |
---|
| 1350 | + * setups. See __kernfs_fh_to_dentry(). |
---|
| 1351 | + */ |
---|
1344 | 1352 | trace_seq_printf(&iter->seq, |
---|
1345 | | - "%3d,%-3d %x,%-x %2s %3s ", |
---|
| 1353 | + "%3d,%-3d %llx,%-llx %2s %3s ", |
---|
1346 | 1354 | MAJOR(t->device), MINOR(t->device), |
---|
1347 | | - id->ino, id->generation, act, rwbs); |
---|
| 1355 | + id & U32_MAX, id >> 32, act, rwbs); |
---|
| 1356 | + } |
---|
1348 | 1357 | } else |
---|
1349 | 1358 | trace_seq_printf(&iter->seq, "%3d,%-3d %2s %3s ", |
---|
1350 | 1359 | MAJOR(t->device), MINOR(t->device), act, rwbs); |
---|
.. | .. |
---|
1657 | 1666 | { |
---|
1658 | 1667 | struct blk_trace *bt; |
---|
1659 | 1668 | |
---|
1660 | | - bt = xchg(&q->blk_trace, NULL); |
---|
| 1669 | + bt = rcu_replace_pointer(q->blk_trace, NULL, |
---|
| 1670 | + lockdep_is_held(&q->debugfs_mutex)); |
---|
1661 | 1671 | if (bt == NULL) |
---|
1662 | 1672 | return -EINVAL; |
---|
1663 | 1673 | |
---|
.. | .. |
---|
1697 | 1707 | |
---|
1698 | 1708 | blk_trace_setup_lba(bt, bdev); |
---|
1699 | 1709 | |
---|
1700 | | - ret = -EBUSY; |
---|
1701 | | - if (cmpxchg(&q->blk_trace, NULL, bt)) |
---|
1702 | | - goto free_bt; |
---|
1703 | | - |
---|
| 1710 | + rcu_assign_pointer(q->blk_trace, bt); |
---|
1704 | 1711 | get_probe_ref(); |
---|
1705 | 1712 | return 0; |
---|
1706 | 1713 | |
---|
.. | .. |
---|
1829 | 1836 | struct device_attribute *attr, |
---|
1830 | 1837 | char *buf) |
---|
1831 | 1838 | { |
---|
1832 | | - struct hd_struct *p = dev_to_part(dev); |
---|
| 1839 | + struct block_device *bdev = bdget_part(dev_to_part(dev)); |
---|
1833 | 1840 | struct request_queue *q; |
---|
1834 | | - struct block_device *bdev; |
---|
1835 | 1841 | struct blk_trace *bt; |
---|
1836 | 1842 | ssize_t ret = -ENXIO; |
---|
1837 | 1843 | |
---|
1838 | | - bdev = bdget(part_devt(p)); |
---|
1839 | 1844 | if (bdev == NULL) |
---|
1840 | 1845 | goto out; |
---|
1841 | 1846 | |
---|
.. | .. |
---|
1843 | 1848 | if (q == NULL) |
---|
1844 | 1849 | goto out_bdput; |
---|
1845 | 1850 | |
---|
1846 | | - mutex_lock(&q->blk_trace_mutex); |
---|
| 1851 | + mutex_lock(&q->debugfs_mutex); |
---|
1847 | 1852 | |
---|
1848 | 1853 | bt = rcu_dereference_protected(q->blk_trace, |
---|
1849 | | - lockdep_is_held(&q->blk_trace_mutex)); |
---|
| 1854 | + lockdep_is_held(&q->debugfs_mutex)); |
---|
1850 | 1855 | if (attr == &dev_attr_enable) { |
---|
1851 | 1856 | ret = sprintf(buf, "%u\n", !!bt); |
---|
1852 | 1857 | goto out_unlock_bdev; |
---|
.. | .. |
---|
1864 | 1869 | ret = sprintf(buf, "%llu\n", bt->end_lba); |
---|
1865 | 1870 | |
---|
1866 | 1871 | out_unlock_bdev: |
---|
1867 | | - mutex_unlock(&q->blk_trace_mutex); |
---|
| 1872 | + mutex_unlock(&q->debugfs_mutex); |
---|
1868 | 1873 | out_bdput: |
---|
1869 | 1874 | bdput(bdev); |
---|
1870 | 1875 | out: |
---|
.. | .. |
---|
1877 | 1882 | { |
---|
1878 | 1883 | struct block_device *bdev; |
---|
1879 | 1884 | struct request_queue *q; |
---|
1880 | | - struct hd_struct *p; |
---|
1881 | 1885 | struct blk_trace *bt; |
---|
1882 | 1886 | u64 value; |
---|
1883 | 1887 | ssize_t ret = -EINVAL; |
---|
.. | .. |
---|
1897 | 1901 | goto out; |
---|
1898 | 1902 | |
---|
1899 | 1903 | ret = -ENXIO; |
---|
1900 | | - |
---|
1901 | | - p = dev_to_part(dev); |
---|
1902 | | - bdev = bdget(part_devt(p)); |
---|
| 1904 | + bdev = bdget_part(dev_to_part(dev)); |
---|
1903 | 1905 | if (bdev == NULL) |
---|
1904 | 1906 | goto out; |
---|
1905 | 1907 | |
---|
.. | .. |
---|
1907 | 1909 | if (q == NULL) |
---|
1908 | 1910 | goto out_bdput; |
---|
1909 | 1911 | |
---|
1910 | | - mutex_lock(&q->blk_trace_mutex); |
---|
| 1912 | + mutex_lock(&q->debugfs_mutex); |
---|
1911 | 1913 | |
---|
1912 | 1914 | bt = rcu_dereference_protected(q->blk_trace, |
---|
1913 | | - lockdep_is_held(&q->blk_trace_mutex)); |
---|
| 1915 | + lockdep_is_held(&q->debugfs_mutex)); |
---|
1914 | 1916 | if (attr == &dev_attr_enable) { |
---|
1915 | 1917 | if (!!value == !!bt) { |
---|
1916 | 1918 | ret = 0; |
---|
.. | .. |
---|
1927 | 1929 | if (bt == NULL) { |
---|
1928 | 1930 | ret = blk_trace_setup_queue(q, bdev); |
---|
1929 | 1931 | bt = rcu_dereference_protected(q->blk_trace, |
---|
1930 | | - lockdep_is_held(&q->blk_trace_mutex)); |
---|
| 1932 | + lockdep_is_held(&q->debugfs_mutex)); |
---|
1931 | 1933 | } |
---|
1932 | 1934 | |
---|
1933 | 1935 | if (ret == 0) { |
---|
.. | .. |
---|
1942 | 1944 | } |
---|
1943 | 1945 | |
---|
1944 | 1946 | out_unlock_bdev: |
---|
1945 | | - mutex_unlock(&q->blk_trace_mutex); |
---|
| 1947 | + mutex_unlock(&q->debugfs_mutex); |
---|
1946 | 1948 | out_bdput: |
---|
1947 | 1949 | bdput(bdev); |
---|
1948 | 1950 | out: |
---|