| .. | .. | 
|---|
| 67 | 67 |   * Send out a notify message. | 
|---|
| 68 | 68 |   */ | 
|---|
| 69 | 69 |  static void trace_note(struct blk_trace *bt, pid_t pid, int action, | 
|---|
| 70 |  | -		       const void *data, size_t len,  | 
|---|
| 71 |  | -		       union kernfs_node_id *cgid)  | 
|---|
 | 70 | +		       const void *data, size_t len, u64 cgid)  | 
|---|
| 72 | 71 |  { | 
|---|
| 73 | 72 |  	struct blk_io_trace *t; | 
|---|
| 74 | 73 |  	struct ring_buffer_event *event = NULL; | 
|---|
| 75 |  | -	struct ring_buffer *buffer = NULL;  | 
|---|
| 76 |  | -	int pc = 0;  | 
|---|
 | 74 | +	struct trace_buffer *buffer = NULL;  | 
|---|
 | 75 | +	unsigned int trace_ctx = 0;  | 
|---|
| 77 | 76 |  	int cpu = smp_processor_id(); | 
|---|
| 78 | 77 |  	bool blk_tracer = blk_tracer_enabled; | 
|---|
| 79 |  | -	ssize_t cgid_len = cgid ? sizeof(*cgid) : 0;  | 
|---|
 | 78 | +	ssize_t cgid_len = cgid ? sizeof(cgid) : 0;  | 
|---|
| 80 | 79 |   | 
|---|
| 81 | 80 |  	if (blk_tracer) { | 
|---|
| 82 |  | -		buffer = blk_tr->trace_buffer.buffer;  | 
|---|
| 83 |  | -		pc = preempt_count();  | 
|---|
 | 81 | +		buffer = blk_tr->array_buffer.buffer;  | 
|---|
 | 82 | +		trace_ctx = tracing_gen_ctx_flags(0);  | 
|---|
| 84 | 83 |  		event = trace_buffer_lock_reserve(buffer, TRACE_BLK, | 
|---|
| 85 | 84 |  						  sizeof(*t) + len + cgid_len, | 
|---|
| 86 |  | -						  0, pc);  | 
|---|
 | 85 | +						  trace_ctx);  | 
|---|
| 87 | 86 |  		if (!event) | 
|---|
| 88 | 87 |  			return; | 
|---|
| 89 | 88 |  		t = ring_buffer_event_data(event); | 
|---|
| .. | .. | 
|---|
| 103 | 102 |  		t->pid = pid; | 
|---|
| 104 | 103 |  		t->cpu = cpu; | 
|---|
| 105 | 104 |  		t->pdu_len = len + cgid_len; | 
|---|
| 106 |  | -		if (cgid)  | 
|---|
| 107 |  | -			memcpy((void *)t + sizeof(*t), cgid, cgid_len);  | 
|---|
 | 105 | +		if (cgid_len)  | 
|---|
 | 106 | +			memcpy((void *)t + sizeof(*t), &cgid, cgid_len);  | 
|---|
| 108 | 107 |  		memcpy((void *) t + sizeof(*t) + cgid_len, data, len); | 
|---|
| 109 | 108 |   | 
|---|
| 110 | 109 |  		if (blk_tracer) | 
|---|
| 111 |  | -			trace_buffer_unlock_commit(blk_tr, buffer, event, 0, pc);  | 
|---|
 | 110 | +			trace_buffer_unlock_commit(blk_tr, buffer, event, trace_ctx);  | 
|---|
| 112 | 111 |  	} | 
|---|
| 113 | 112 |  } | 
|---|
| 114 | 113 |   | 
|---|
| .. | .. | 
|---|
| 125 | 124 |  	spin_lock_irqsave(&running_trace_lock, flags); | 
|---|
| 126 | 125 |  	list_for_each_entry(bt, &running_trace_list, running_list) { | 
|---|
| 127 | 126 |  		trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm, | 
|---|
| 128 |  | -			   sizeof(tsk->comm), NULL);  | 
|---|
 | 127 | +			   sizeof(tsk->comm), 0);  | 
|---|
| 129 | 128 |  	} | 
|---|
| 130 | 129 |  	spin_unlock_irqrestore(&running_trace_lock, flags); | 
|---|
| 131 | 130 |  } | 
|---|
| .. | .. | 
|---|
| 142 | 141 |  	words[1] = now.tv_nsec; | 
|---|
| 143 | 142 |   | 
|---|
| 144 | 143 |  	local_irq_save(flags); | 
|---|
| 145 |  | -	trace_note(bt, 0, BLK_TN_TIMESTAMP, words, sizeof(words), NULL);  | 
|---|
 | 144 | +	trace_note(bt, 0, BLK_TN_TIMESTAMP, words, sizeof(words), 0);  | 
|---|
| 146 | 145 |  	local_irq_restore(flags); | 
|---|
| 147 | 146 |  } | 
|---|
| 148 | 147 |   | 
|---|
| .. | .. | 
|---|
| 174 | 173 |  	if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP)) | 
|---|
| 175 | 174 |  		blkcg = NULL; | 
|---|
| 176 | 175 |  #ifdef CONFIG_BLK_CGROUP | 
|---|
| 177 |  | -	trace_note(bt, 0, BLK_TN_MESSAGE, buf, n,  | 
|---|
| 178 |  | -		blkcg ? cgroup_get_kernfs_id(blkcg->css.cgroup) : NULL);  | 
|---|
 | 176 | +	trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n,  | 
|---|
 | 177 | +		   blkcg ? cgroup_id(blkcg->css.cgroup) : 1);  | 
|---|
| 179 | 178 |  #else | 
|---|
| 180 |  | -	trace_note(bt, 0, BLK_TN_MESSAGE, buf, n, NULL);  | 
|---|
 | 179 | +	trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n, 0);  | 
|---|
| 181 | 180 |  #endif | 
|---|
| 182 | 181 |  	local_irq_restore(flags); | 
|---|
| 183 | 182 |  } | 
|---|
| .. | .. | 
|---|
| 215 | 214 |   */ | 
|---|
| 216 | 215 |  static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, | 
|---|
| 217 | 216 |  		     int op, int op_flags, u32 what, int error, int pdu_len, | 
|---|
| 218 |  | -		     void *pdu_data, union kernfs_node_id *cgid)  | 
|---|
 | 217 | +		     void *pdu_data, u64 cgid)  | 
|---|
| 219 | 218 |  { | 
|---|
| 220 | 219 |  	struct task_struct *tsk = current; | 
|---|
| 221 | 220 |  	struct ring_buffer_event *event = NULL; | 
|---|
| 222 |  | -	struct ring_buffer *buffer = NULL;  | 
|---|
 | 221 | +	struct trace_buffer *buffer = NULL;  | 
|---|
| 223 | 222 |  	struct blk_io_trace *t; | 
|---|
| 224 | 223 |  	unsigned long flags = 0; | 
|---|
| 225 | 224 |  	unsigned long *sequence; | 
|---|
 | 225 | +	unsigned int trace_ctx = 0;  | 
|---|
| 226 | 226 |  	pid_t pid; | 
|---|
| 227 |  | -	int cpu, pc = 0;  | 
|---|
 | 227 | +	int cpu;  | 
|---|
| 228 | 228 |  	bool blk_tracer = blk_tracer_enabled; | 
|---|
| 229 |  | -	ssize_t cgid_len = cgid ? sizeof(*cgid) : 0;  | 
|---|
 | 229 | +	ssize_t cgid_len = cgid ? sizeof(cgid) : 0;  | 
|---|
| 230 | 230 |   | 
|---|
| 231 | 231 |  	if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer)) | 
|---|
| 232 | 232 |  		return; | 
|---|
| .. | .. | 
|---|
| 252 | 252 |  	if (blk_tracer) { | 
|---|
| 253 | 253 |  		tracing_record_cmdline(current); | 
|---|
| 254 | 254 |   | 
|---|
| 255 |  | -		buffer = blk_tr->trace_buffer.buffer;  | 
|---|
| 256 |  | -		pc = preempt_count();  | 
|---|
 | 255 | +		buffer = blk_tr->array_buffer.buffer;  | 
|---|
 | 256 | +		trace_ctx = tracing_gen_ctx_flags(0);  | 
|---|
| 257 | 257 |  		event = trace_buffer_lock_reserve(buffer, TRACE_BLK, | 
|---|
| 258 | 258 |  						  sizeof(*t) + pdu_len + cgid_len, | 
|---|
| 259 |  | -						  0, pc);  | 
|---|
 | 259 | +						  trace_ctx);  | 
|---|
| 260 | 260 |  		if (!event) | 
|---|
| 261 | 261 |  			return; | 
|---|
| 262 | 262 |  		t = ring_buffer_event_data(event); | 
|---|
| .. | .. | 
|---|
| 297 | 297 |  		t->pdu_len = pdu_len + cgid_len; | 
|---|
| 298 | 298 |   | 
|---|
| 299 | 299 |  		if (cgid_len) | 
|---|
| 300 |  | -			memcpy((void *)t + sizeof(*t), cgid, cgid_len);  | 
|---|
 | 300 | +			memcpy((void *)t + sizeof(*t), &cgid, cgid_len);  | 
|---|
| 301 | 301 |  		if (pdu_len) | 
|---|
| 302 | 302 |  			memcpy((void *)t + sizeof(*t) + cgid_len, pdu_data, pdu_len); | 
|---|
| 303 | 303 |   | 
|---|
| 304 | 304 |  		if (blk_tracer) { | 
|---|
| 305 |  | -			trace_buffer_unlock_commit(blk_tr, buffer, event, 0, pc);  | 
|---|
 | 305 | +			trace_buffer_unlock_commit(blk_tr, buffer, event, trace_ctx);  | 
|---|
| 306 | 306 |  			return; | 
|---|
| 307 | 307 |  		} | 
|---|
| 308 | 308 |  	} | 
|---|
| .. | .. | 
|---|
| 348 | 348 |  { | 
|---|
| 349 | 349 |  	struct blk_trace *bt; | 
|---|
| 350 | 350 |   | 
|---|
| 351 |  | -	bt = xchg(&q->blk_trace, NULL);  | 
|---|
 | 351 | +	bt = rcu_replace_pointer(q->blk_trace, NULL,  | 
|---|
 | 352 | +				 lockdep_is_held(&q->debugfs_mutex));  | 
|---|
| 352 | 353 |  	if (!bt) | 
|---|
| 353 | 354 |  		return -EINVAL; | 
|---|
| 354 | 355 |   | 
|---|
| .. | .. | 
|---|
| 362 | 363 |  { | 
|---|
| 363 | 364 |  	int ret; | 
|---|
| 364 | 365 |   | 
|---|
| 365 |  | -	mutex_lock(&q->blk_trace_mutex);  | 
|---|
 | 366 | +	mutex_lock(&q->debugfs_mutex);  | 
|---|
| 366 | 367 |  	ret = __blk_trace_remove(q); | 
|---|
| 367 |  | -	mutex_unlock(&q->blk_trace_mutex);  | 
|---|
 | 368 | +	mutex_unlock(&q->debugfs_mutex);  | 
|---|
| 368 | 369 |   | 
|---|
| 369 | 370 |  	return ret; | 
|---|
| 370 | 371 |  } | 
|---|
| .. | .. | 
|---|
| 483 | 484 |  	struct dentry *dir = NULL; | 
|---|
| 484 | 485 |  	int ret; | 
|---|
| 485 | 486 |   | 
|---|
 | 487 | +	lockdep_assert_held(&q->debugfs_mutex);  | 
|---|
 | 488 | +  | 
|---|
| 486 | 489 |  	if (!buts->buf_size || !buts->buf_nr) | 
|---|
| 487 | 490 |  		return -EINVAL; | 
|---|
| 488 |  | -  | 
|---|
| 489 |  | -	if (!blk_debugfs_root)  | 
|---|
| 490 |  | -		return -ENOENT;  | 
|---|
| 491 | 491 |   | 
|---|
| 492 | 492 |  	strncpy(buts->name, name, BLKTRACE_BDEV_SIZE); | 
|---|
| 493 | 493 |  	buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0'; | 
|---|
| .. | .. | 
|---|
| 502 | 502 |  	 * bdev can be NULL, as with scsi-generic, this is a helpful as | 
|---|
| 503 | 503 |  	 * we can be. | 
|---|
| 504 | 504 |  	 */ | 
|---|
| 505 |  | -	if (q->blk_trace) {  | 
|---|
 | 505 | +	if (rcu_dereference_protected(q->blk_trace,  | 
|---|
 | 506 | +				      lockdep_is_held(&q->debugfs_mutex))) {  | 
|---|
| 506 | 507 |  		pr_warn("Concurrent blktraces are not allowed on %s\n", | 
|---|
| 507 | 508 |  			buts->name); | 
|---|
| 508 | 509 |  		return -EBUSY; | 
|---|
| .. | .. | 
|---|
| 521 | 522 |  	if (!bt->msg_data) | 
|---|
| 522 | 523 |  		goto err; | 
|---|
| 523 | 524 |   | 
|---|
| 524 |  | -#ifdef CONFIG_BLK_DEBUG_FS  | 
|---|
| 525 | 525 |  	/* | 
|---|
| 526 |  | -	 * When tracing whole make_request drivers (multiqueue) block devices,  | 
|---|
| 527 |  | -	 * reuse the existing debugfs directory created by the block layer on  | 
|---|
| 528 |  | -	 * init. For request-based block devices, all partitions block devices,  | 
|---|
 | 526 | +	 * When tracing the whole disk reuse the existing debugfs directory  | 
|---|
 | 527 | +	 * created by the block layer on init. For partitions block devices,  | 
|---|
| 529 | 528 |  	 * and scsi-generic block devices we create a temporary new debugfs | 
|---|
| 530 | 529 |  	 * directory that will be removed once the trace ends. | 
|---|
| 531 | 530 |  	 */ | 
|---|
| 532 |  | -	if (q->mq_ops && bdev && bdev == bdev->bd_contains)  | 
|---|
 | 531 | +	if (bdev && !bdev_is_partition(bdev))  | 
|---|
| 533 | 532 |  		dir = q->debugfs_dir; | 
|---|
| 534 | 533 |  	else | 
|---|
| 535 |  | -#endif  | 
|---|
| 536 | 534 |  		bt->dir = dir = debugfs_create_dir(buts->name, blk_debugfs_root); | 
|---|
| 537 |  | -	if (!dir)  | 
|---|
| 538 |  | -		goto err;  | 
|---|
| 539 | 535 |   | 
|---|
| 540 | 536 |  	/* | 
|---|
| 541 | 537 |  	 * As blktrace relies on debugfs for its interface the debugfs directory | 
|---|
| .. | .. | 
|---|
| 556 | 552 |  	ret = -EIO; | 
|---|
| 557 | 553 |  	bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt, | 
|---|
| 558 | 554 |  					       &blk_dropped_fops); | 
|---|
| 559 |  | -	if (!bt->dropped_file)  | 
|---|
| 560 |  | -		goto err;  | 
|---|
| 561 | 555 |   | 
|---|
| 562 | 556 |  	bt->msg_file = debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops); | 
|---|
| 563 |  | -	if (!bt->msg_file)  | 
|---|
| 564 |  | -		goto err;  | 
|---|
| 565 | 557 |   | 
|---|
| 566 | 558 |  	bt->rchan = relay_open("trace", dir, buts->buf_size, | 
|---|
| 567 | 559 |  				buts->buf_nr, &blk_relay_callbacks, bt); | 
|---|
| .. | .. | 
|---|
| 583 | 575 |  	bt->pid = buts->pid; | 
|---|
| 584 | 576 |  	bt->trace_state = Blktrace_setup; | 
|---|
| 585 | 577 |   | 
|---|
| 586 |  | -	ret = -EBUSY;  | 
|---|
| 587 |  | -	if (cmpxchg(&q->blk_trace, NULL, bt))  | 
|---|
| 588 |  | -		goto err;  | 
|---|
| 589 |  | -  | 
|---|
 | 578 | +	rcu_assign_pointer(q->blk_trace, bt);  | 
|---|
| 590 | 579 |  	get_probe_ref(); | 
|---|
| 591 | 580 |   | 
|---|
| 592 | 581 |  	ret = 0; | 
|---|
| .. | .. | 
|---|
| 623 | 612 |  { | 
|---|
| 624 | 613 |  	int ret; | 
|---|
| 625 | 614 |   | 
|---|
| 626 |  | -	mutex_lock(&q->blk_trace_mutex);  | 
|---|
 | 615 | +	mutex_lock(&q->debugfs_mutex);  | 
|---|
| 627 | 616 |  	ret = __blk_trace_setup(q, name, dev, bdev, arg); | 
|---|
| 628 |  | -	mutex_unlock(&q->blk_trace_mutex);  | 
|---|
 | 617 | +	mutex_unlock(&q->debugfs_mutex);  | 
|---|
| 629 | 618 |   | 
|---|
| 630 | 619 |  	return ret; | 
|---|
| 631 | 620 |  } | 
|---|
| .. | .. | 
|---|
| 671 | 660 |  	struct blk_trace *bt; | 
|---|
| 672 | 661 |   | 
|---|
| 673 | 662 |  	bt = rcu_dereference_protected(q->blk_trace, | 
|---|
| 674 |  | -				       lockdep_is_held(&q->blk_trace_mutex));  | 
|---|
 | 663 | +				       lockdep_is_held(&q->debugfs_mutex));  | 
|---|
| 675 | 664 |  	if (bt == NULL) | 
|---|
| 676 | 665 |  		return -EINVAL; | 
|---|
| 677 | 666 |   | 
|---|
| .. | .. | 
|---|
| 711 | 700 |  { | 
|---|
| 712 | 701 |  	int ret; | 
|---|
| 713 | 702 |   | 
|---|
| 714 |  | -	mutex_lock(&q->blk_trace_mutex);  | 
|---|
 | 703 | +	mutex_lock(&q->debugfs_mutex);  | 
|---|
| 715 | 704 |  	ret = __blk_trace_startstop(q, start); | 
|---|
| 716 |  | -	mutex_unlock(&q->blk_trace_mutex);  | 
|---|
 | 705 | +	mutex_unlock(&q->debugfs_mutex);  | 
|---|
| 717 | 706 |   | 
|---|
| 718 | 707 |  	return ret; | 
|---|
| 719 | 708 |  } | 
|---|
| .. | .. | 
|---|
| 742 | 731 |  	if (!q) | 
|---|
| 743 | 732 |  		return -ENXIO; | 
|---|
| 744 | 733 |   | 
|---|
| 745 |  | -	mutex_lock(&q->blk_trace_mutex);  | 
|---|
 | 734 | +	mutex_lock(&q->debugfs_mutex);  | 
|---|
| 746 | 735 |   | 
|---|
| 747 | 736 |  	switch (cmd) { | 
|---|
| 748 | 737 |  	case BLKTRACESETUP: | 
|---|
| .. | .. | 
|---|
| 757 | 746 |  #endif | 
|---|
| 758 | 747 |  	case BLKTRACESTART: | 
|---|
| 759 | 748 |  		start = 1; | 
|---|
 | 749 | +		fallthrough;  | 
|---|
| 760 | 750 |  	case BLKTRACESTOP: | 
|---|
| 761 | 751 |  		ret = __blk_trace_startstop(q, start); | 
|---|
| 762 | 752 |  		break; | 
|---|
| .. | .. | 
|---|
| 768 | 758 |  		break; | 
|---|
| 769 | 759 |  	} | 
|---|
| 770 | 760 |   | 
|---|
| 771 |  | -	mutex_unlock(&q->blk_trace_mutex);  | 
|---|
 | 761 | +	mutex_unlock(&q->debugfs_mutex);  | 
|---|
| 772 | 762 |  	return ret; | 
|---|
| 773 | 763 |  } | 
|---|
| 774 | 764 |   | 
|---|
| .. | .. | 
|---|
| 779 | 769 |   **/ | 
|---|
| 780 | 770 |  void blk_trace_shutdown(struct request_queue *q) | 
|---|
| 781 | 771 |  { | 
|---|
| 782 |  | -	mutex_lock(&q->blk_trace_mutex);  | 
|---|
 | 772 | +	mutex_lock(&q->debugfs_mutex);  | 
|---|
| 783 | 773 |  	if (rcu_dereference_protected(q->blk_trace, | 
|---|
| 784 |  | -				      lockdep_is_held(&q->blk_trace_mutex))) {  | 
|---|
 | 774 | +				      lockdep_is_held(&q->debugfs_mutex))) {  | 
|---|
| 785 | 775 |  		__blk_trace_startstop(q, 0); | 
|---|
| 786 | 776 |  		__blk_trace_remove(q); | 
|---|
| 787 | 777 |  	} | 
|---|
| 788 | 778 |   | 
|---|
| 789 |  | -	mutex_unlock(&q->blk_trace_mutex);  | 
|---|
 | 779 | +	mutex_unlock(&q->debugfs_mutex);  | 
|---|
| 790 | 780 |  } | 
|---|
| 791 | 781 |   | 
|---|
| 792 | 782 |  #ifdef CONFIG_BLK_CGROUP | 
|---|
| 793 |  | -static union kernfs_node_id *  | 
|---|
| 794 |  | -blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)  | 
|---|
 | 783 | +static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)  | 
|---|
| 795 | 784 |  { | 
|---|
| 796 | 785 |  	struct blk_trace *bt; | 
|---|
| 797 | 786 |   | 
|---|
| 798 | 787 |  	/* We don't use the 'bt' value here except as an optimization... */ | 
|---|
| 799 | 788 |  	bt = rcu_dereference_protected(q->blk_trace, 1); | 
|---|
| 800 | 789 |  	if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP)) | 
|---|
| 801 |  | -		return NULL;  | 
|---|
 | 790 | +		return 0;  | 
|---|
| 802 | 791 |   | 
|---|
| 803 |  | -	if (!bio->bi_css)  | 
|---|
| 804 |  | -		return NULL;  | 
|---|
| 805 |  | -	return cgroup_get_kernfs_id(bio->bi_css->cgroup);  | 
|---|
 | 792 | +	if (!bio->bi_blkg)  | 
|---|
 | 793 | +		return 0;  | 
|---|
 | 794 | +	return cgroup_id(bio_blkcg(bio)->css.cgroup);  | 
|---|
| 806 | 795 |  } | 
|---|
| 807 | 796 |  #else | 
|---|
| 808 |  | -static union kernfs_node_id *  | 
|---|
| 809 |  | -blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)  | 
|---|
 | 797 | +static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)  | 
|---|
| 810 | 798 |  { | 
|---|
| 811 |  | -	return NULL;  | 
|---|
 | 799 | +	return 0;  | 
|---|
| 812 | 800 |  } | 
|---|
| 813 | 801 |  #endif | 
|---|
| 814 | 802 |   | 
|---|
| 815 |  | -static union kernfs_node_id *  | 
|---|
 | 803 | +static u64  | 
|---|
| 816 | 804 |  blk_trace_request_get_cgid(struct request_queue *q, struct request *rq) | 
|---|
| 817 | 805 |  { | 
|---|
| 818 | 806 |  	if (!rq->bio) | 
|---|
| 819 |  | -		return NULL;  | 
|---|
 | 807 | +		return 0;  | 
|---|
| 820 | 808 |  	/* Use the first bio */ | 
|---|
| 821 | 809 |  	return blk_trace_bio_get_cgid(q, rq->bio); | 
|---|
| 822 | 810 |  } | 
|---|
| .. | .. | 
|---|
| 838 | 826 |   * | 
|---|
| 839 | 827 |   **/ | 
|---|
| 840 | 828 |  static void blk_add_trace_rq(struct request *rq, int error, | 
|---|
| 841 |  | -			     unsigned int nr_bytes, u32 what,  | 
|---|
| 842 |  | -			     union kernfs_node_id *cgid)  | 
|---|
 | 829 | +			     unsigned int nr_bytes, u32 what, u64 cgid)  | 
|---|
| 843 | 830 |  { | 
|---|
| 844 | 831 |  	struct blk_trace *bt; | 
|---|
| 845 | 832 |   | 
|---|
| .. | .. | 
|---|
| 871 | 858 |  				   struct request_queue *q, struct request *rq) | 
|---|
| 872 | 859 |  { | 
|---|
| 873 | 860 |  	blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_ISSUE, | 
|---|
 | 861 | +			 blk_trace_request_get_cgid(q, rq));  | 
|---|
 | 862 | +}  | 
|---|
 | 863 | +  | 
|---|
 | 864 | +static void blk_add_trace_rq_merge(void *ignore,  | 
|---|
 | 865 | +				   struct request_queue *q, struct request *rq)  | 
|---|
 | 866 | +{  | 
|---|
 | 867 | +	blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_BACKMERGE,  | 
|---|
| 874 | 868 |  			 blk_trace_request_get_cgid(q, rq)); | 
|---|
| 875 | 869 |  } | 
|---|
| 876 | 870 |   | 
|---|
| .. | .. | 
|---|
| 925 | 919 |  } | 
|---|
| 926 | 920 |   | 
|---|
| 927 | 921 |  static void blk_add_trace_bio_complete(void *ignore, | 
|---|
| 928 |  | -				       struct request_queue *q, struct bio *bio,  | 
|---|
| 929 |  | -				       int error)  | 
|---|
 | 922 | +				       struct request_queue *q, struct bio *bio)  | 
|---|
| 930 | 923 |  { | 
|---|
| 931 |  | -	blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error);  | 
|---|
 | 924 | +	blk_add_trace_bio(q, bio, BLK_TA_COMPLETE,  | 
|---|
 | 925 | +			  blk_status_to_errno(bio->bi_status));  | 
|---|
| 932 | 926 |  } | 
|---|
| 933 | 927 |   | 
|---|
| 934 | 928 |  static void blk_add_trace_bio_backmerge(void *ignore, | 
|---|
| .. | .. | 
|---|
| 966 | 960 |  		bt = rcu_dereference(q->blk_trace); | 
|---|
| 967 | 961 |  		if (bt) | 
|---|
| 968 | 962 |  			__blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_GETRQ, 0, 0, | 
|---|
| 969 |  | -					NULL, NULL);  | 
|---|
 | 963 | +					NULL, 0);  | 
|---|
| 970 | 964 |  		rcu_read_unlock(); | 
|---|
| 971 | 965 |  	} | 
|---|
| 972 | 966 |  } | 
|---|
| .. | .. | 
|---|
| 985 | 979 |  		bt = rcu_dereference(q->blk_trace); | 
|---|
| 986 | 980 |  		if (bt) | 
|---|
| 987 | 981 |  			__blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_SLEEPRQ, | 
|---|
| 988 |  | -					0, 0, NULL, NULL);  | 
|---|
 | 982 | +					0, 0, NULL, 0);  | 
|---|
| 989 | 983 |  		rcu_read_unlock(); | 
|---|
| 990 | 984 |  	} | 
|---|
| 991 | 985 |  } | 
|---|
| .. | .. | 
|---|
| 997 | 991 |  	rcu_read_lock(); | 
|---|
| 998 | 992 |  	bt = rcu_dereference(q->blk_trace); | 
|---|
| 999 | 993 |  	if (bt) | 
|---|
| 1000 |  | -		__blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, NULL);  | 
|---|
 | 994 | +		__blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, 0);  | 
|---|
| 1001 | 995 |  	rcu_read_unlock(); | 
|---|
| 1002 | 996 |  } | 
|---|
| 1003 | 997 |   | 
|---|
| .. | .. | 
|---|
| 1017 | 1011 |  		else | 
|---|
| 1018 | 1012 |  			what = BLK_TA_UNPLUG_TIMER; | 
|---|
| 1019 | 1013 |   | 
|---|
| 1020 |  | -		__blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, NULL);  | 
|---|
 | 1014 | +		__blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, 0);  | 
|---|
| 1021 | 1015 |  	} | 
|---|
| 1022 | 1016 |  	rcu_read_unlock(); | 
|---|
| 1023 | 1017 |  } | 
|---|
| .. | .. | 
|---|
| 1158 | 1152 |  	WARN_ON(ret); | 
|---|
| 1159 | 1153 |  	ret = register_trace_block_rq_issue(blk_add_trace_rq_issue, NULL); | 
|---|
| 1160 | 1154 |  	WARN_ON(ret); | 
|---|
 | 1155 | +	ret = register_trace_block_rq_merge(blk_add_trace_rq_merge, NULL);  | 
|---|
 | 1156 | +	WARN_ON(ret);  | 
|---|
| 1161 | 1157 |  	ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL); | 
|---|
| 1162 | 1158 |  	WARN_ON(ret); | 
|---|
| 1163 | 1159 |  	ret = register_trace_block_rq_complete(blk_add_trace_rq_complete, NULL); | 
|---|
| .. | .. | 
|---|
| 1204 | 1200 |  	unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce, NULL); | 
|---|
| 1205 | 1201 |  	unregister_trace_block_rq_complete(blk_add_trace_rq_complete, NULL); | 
|---|
| 1206 | 1202 |  	unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL); | 
|---|
 | 1203 | +	unregister_trace_block_rq_merge(blk_add_trace_rq_merge, NULL);  | 
|---|
| 1207 | 1204 |  	unregister_trace_block_rq_issue(blk_add_trace_rq_issue, NULL); | 
|---|
| 1208 | 1205 |  	unregister_trace_block_rq_insert(blk_add_trace_rq_insert, NULL); | 
|---|
| 1209 | 1206 |   | 
|---|
| .. | .. | 
|---|
| 1256 | 1253 |   | 
|---|
| 1257 | 1254 |  static inline const void *pdu_start(const struct trace_entry *ent, bool has_cg) | 
|---|
| 1258 | 1255 |  { | 
|---|
| 1259 |  | -	return (void *)(te_blk_io_trace(ent) + 1) +  | 
|---|
| 1260 |  | -		(has_cg ? sizeof(union kernfs_node_id) : 0);  | 
|---|
 | 1256 | +	return (void *)(te_blk_io_trace(ent) + 1) + (has_cg ? sizeof(u64) : 0);  | 
|---|
| 1261 | 1257 |  } | 
|---|
| 1262 | 1258 |   | 
|---|
| 1263 |  | -static inline const void *cgid_start(const struct trace_entry *ent)  | 
|---|
 | 1259 | +static inline u64 t_cgid(const struct trace_entry *ent)  | 
|---|
| 1264 | 1260 |  { | 
|---|
| 1265 |  | -	return (void *)(te_blk_io_trace(ent) + 1);  | 
|---|
 | 1261 | +	return *(u64 *)(te_blk_io_trace(ent) + 1);  | 
|---|
| 1266 | 1262 |  } | 
|---|
| 1267 | 1263 |   | 
|---|
| 1268 | 1264 |  static inline int pdu_real_len(const struct trace_entry *ent, bool has_cg) | 
|---|
| 1269 | 1265 |  { | 
|---|
| 1270 |  | -	return te_blk_io_trace(ent)->pdu_len -  | 
|---|
| 1271 |  | -			(has_cg ? sizeof(union kernfs_node_id) : 0);  | 
|---|
 | 1266 | +	return te_blk_io_trace(ent)->pdu_len - (has_cg ? sizeof(u64) : 0);  | 
|---|
| 1272 | 1267 |  } | 
|---|
| 1273 | 1268 |   | 
|---|
| 1274 | 1269 |  static inline u32 t_action(const struct trace_entry *ent) | 
|---|
| .. | .. | 
|---|
| 1330 | 1325 |   | 
|---|
| 1331 | 1326 |  	fill_rwbs(rwbs, t); | 
|---|
| 1332 | 1327 |  	if (has_cg) { | 
|---|
| 1333 |  | -		const union kernfs_node_id *id = cgid_start(iter->ent);  | 
|---|
 | 1328 | +		u64 id = t_cgid(iter->ent);  | 
|---|
| 1334 | 1329 |   | 
|---|
| 1335 | 1330 |  		if (blk_tracer_flags.val & TRACE_BLK_OPT_CGNAME) { | 
|---|
| 1336 | 1331 |  			char blkcg_name_buf[NAME_MAX + 1] = "<...>"; | 
|---|
| .. | .. | 
|---|
| 1340 | 1335 |  			trace_seq_printf(&iter->seq, "%3d,%-3d %s %2s %3s ", | 
|---|
| 1341 | 1336 |  				 MAJOR(t->device), MINOR(t->device), | 
|---|
| 1342 | 1337 |  				 blkcg_name_buf, act, rwbs); | 
|---|
| 1343 |  | -		} else  | 
|---|
 | 1338 | +		} else {  | 
|---|
 | 1339 | +			/*  | 
|---|
 | 1340 | +			 * The cgid portion used to be "INO,GEN".  Userland  | 
|---|
 | 1341 | +			 * builds a FILEID_INO32_GEN fid out of them and  | 
|---|
 | 1342 | +			 * opens the cgroup using open_by_handle_at(2).  | 
|---|
 | 1343 | +			 * While 32bit ino setups are still the same, 64bit  | 
|---|
 | 1344 | +			 * ones now use the 64bit ino as the whole ID and  | 
|---|
 | 1345 | +			 * no longer use generation.  | 
|---|
 | 1346 | +			 *  | 
|---|
 | 1347 | +			 * Regarldess of the content, always output  | 
|---|
 | 1348 | +			 * "LOW32,HIGH32" so that FILEID_INO32_GEN fid can  | 
|---|
 | 1349 | +			 * be mapped back to @id on both 64 and 32bit ino  | 
|---|
 | 1350 | +			 * setups.  See __kernfs_fh_to_dentry().  | 
|---|
 | 1351 | +			 */  | 
|---|
| 1344 | 1352 |  			trace_seq_printf(&iter->seq, | 
|---|
| 1345 |  | -				 "%3d,%-3d %x,%-x %2s %3s ",  | 
|---|
 | 1353 | +				 "%3d,%-3d %llx,%-llx %2s %3s ",  | 
|---|
| 1346 | 1354 |  				 MAJOR(t->device), MINOR(t->device), | 
|---|
| 1347 |  | -				 id->ino, id->generation, act, rwbs);  | 
|---|
 | 1355 | +				 id & U32_MAX, id >> 32, act, rwbs);  | 
|---|
 | 1356 | +		}  | 
|---|
| 1348 | 1357 |  	} else | 
|---|
| 1349 | 1358 |  		trace_seq_printf(&iter->seq, "%3d,%-3d %2s %3s ", | 
|---|
| 1350 | 1359 |  				 MAJOR(t->device), MINOR(t->device), act, rwbs); | 
|---|
| .. | .. | 
|---|
| 1657 | 1666 |  { | 
|---|
| 1658 | 1667 |  	struct blk_trace *bt; | 
|---|
| 1659 | 1668 |   | 
|---|
| 1660 |  | -	bt = xchg(&q->blk_trace, NULL);  | 
|---|
 | 1669 | +	bt = rcu_replace_pointer(q->blk_trace, NULL,  | 
|---|
 | 1670 | +				 lockdep_is_held(&q->debugfs_mutex));  | 
|---|
| 1661 | 1671 |  	if (bt == NULL) | 
|---|
| 1662 | 1672 |  		return -EINVAL; | 
|---|
| 1663 | 1673 |   | 
|---|
| .. | .. | 
|---|
| 1697 | 1707 |   | 
|---|
| 1698 | 1708 |  	blk_trace_setup_lba(bt, bdev); | 
|---|
| 1699 | 1709 |   | 
|---|
| 1700 |  | -	ret = -EBUSY;  | 
|---|
| 1701 |  | -	if (cmpxchg(&q->blk_trace, NULL, bt))  | 
|---|
| 1702 |  | -		goto free_bt;  | 
|---|
| 1703 |  | -  | 
|---|
 | 1710 | +	rcu_assign_pointer(q->blk_trace, bt);  | 
|---|
| 1704 | 1711 |  	get_probe_ref(); | 
|---|
| 1705 | 1712 |  	return 0; | 
|---|
| 1706 | 1713 |   | 
|---|
| .. | .. | 
|---|
| 1829 | 1836 |  					 struct device_attribute *attr, | 
|---|
| 1830 | 1837 |  					 char *buf) | 
|---|
| 1831 | 1838 |  { | 
|---|
| 1832 |  | -	struct hd_struct *p = dev_to_part(dev);  | 
|---|
 | 1839 | +	struct block_device *bdev = bdget_part(dev_to_part(dev));  | 
|---|
| 1833 | 1840 |  	struct request_queue *q; | 
|---|
| 1834 |  | -	struct block_device *bdev;  | 
|---|
| 1835 | 1841 |  	struct blk_trace *bt; | 
|---|
| 1836 | 1842 |  	ssize_t ret = -ENXIO; | 
|---|
| 1837 | 1843 |   | 
|---|
| 1838 |  | -	bdev = bdget(part_devt(p));  | 
|---|
| 1839 | 1844 |  	if (bdev == NULL) | 
|---|
| 1840 | 1845 |  		goto out; | 
|---|
| 1841 | 1846 |   | 
|---|
| .. | .. | 
|---|
| 1843 | 1848 |  	if (q == NULL) | 
|---|
| 1844 | 1849 |  		goto out_bdput; | 
|---|
| 1845 | 1850 |   | 
|---|
| 1846 |  | -	mutex_lock(&q->blk_trace_mutex);  | 
|---|
 | 1851 | +	mutex_lock(&q->debugfs_mutex);  | 
|---|
| 1847 | 1852 |   | 
|---|
| 1848 | 1853 |  	bt = rcu_dereference_protected(q->blk_trace, | 
|---|
| 1849 |  | -				       lockdep_is_held(&q->blk_trace_mutex));  | 
|---|
 | 1854 | +				       lockdep_is_held(&q->debugfs_mutex));  | 
|---|
| 1850 | 1855 |  	if (attr == &dev_attr_enable) { | 
|---|
| 1851 | 1856 |  		ret = sprintf(buf, "%u\n", !!bt); | 
|---|
| 1852 | 1857 |  		goto out_unlock_bdev; | 
|---|
| .. | .. | 
|---|
| 1864 | 1869 |  		ret = sprintf(buf, "%llu\n", bt->end_lba); | 
|---|
| 1865 | 1870 |   | 
|---|
| 1866 | 1871 |  out_unlock_bdev: | 
|---|
| 1867 |  | -	mutex_unlock(&q->blk_trace_mutex);  | 
|---|
 | 1872 | +	mutex_unlock(&q->debugfs_mutex);  | 
|---|
| 1868 | 1873 |  out_bdput: | 
|---|
| 1869 | 1874 |  	bdput(bdev); | 
|---|
| 1870 | 1875 |  out: | 
|---|
| .. | .. | 
|---|
| 1877 | 1882 |  { | 
|---|
| 1878 | 1883 |  	struct block_device *bdev; | 
|---|
| 1879 | 1884 |  	struct request_queue *q; | 
|---|
| 1880 |  | -	struct hd_struct *p;  | 
|---|
| 1881 | 1885 |  	struct blk_trace *bt; | 
|---|
| 1882 | 1886 |  	u64 value; | 
|---|
| 1883 | 1887 |  	ssize_t ret = -EINVAL; | 
|---|
| .. | .. | 
|---|
| 1897 | 1901 |  		goto out; | 
|---|
| 1898 | 1902 |   | 
|---|
| 1899 | 1903 |  	ret = -ENXIO; | 
|---|
| 1900 |  | -  | 
|---|
| 1901 |  | -	p = dev_to_part(dev);  | 
|---|
| 1902 |  | -	bdev = bdget(part_devt(p));  | 
|---|
 | 1904 | +	bdev = bdget_part(dev_to_part(dev));  | 
|---|
| 1903 | 1905 |  	if (bdev == NULL) | 
|---|
| 1904 | 1906 |  		goto out; | 
|---|
| 1905 | 1907 |   | 
|---|
| .. | .. | 
|---|
| 1907 | 1909 |  	if (q == NULL) | 
|---|
| 1908 | 1910 |  		goto out_bdput; | 
|---|
| 1909 | 1911 |   | 
|---|
| 1910 |  | -	mutex_lock(&q->blk_trace_mutex);  | 
|---|
 | 1912 | +	mutex_lock(&q->debugfs_mutex);  | 
|---|
| 1911 | 1913 |   | 
|---|
| 1912 | 1914 |  	bt = rcu_dereference_protected(q->blk_trace, | 
|---|
| 1913 |  | -				       lockdep_is_held(&q->blk_trace_mutex));  | 
|---|
 | 1915 | +				       lockdep_is_held(&q->debugfs_mutex));  | 
|---|
| 1914 | 1916 |  	if (attr == &dev_attr_enable) { | 
|---|
| 1915 | 1917 |  		if (!!value == !!bt) { | 
|---|
| 1916 | 1918 |  			ret = 0; | 
|---|
| .. | .. | 
|---|
| 1927 | 1929 |  	if (bt == NULL) { | 
|---|
| 1928 | 1930 |  		ret = blk_trace_setup_queue(q, bdev); | 
|---|
| 1929 | 1931 |  		bt = rcu_dereference_protected(q->blk_trace, | 
|---|
| 1930 |  | -				lockdep_is_held(&q->blk_trace_mutex));  | 
|---|
 | 1932 | +				lockdep_is_held(&q->debugfs_mutex));  | 
|---|
| 1931 | 1933 |  	} | 
|---|
| 1932 | 1934 |   | 
|---|
| 1933 | 1935 |  	if (ret == 0) { | 
|---|
| .. | .. | 
|---|
| 1942 | 1944 |  	} | 
|---|
| 1943 | 1945 |   | 
|---|
| 1944 | 1946 |  out_unlock_bdev: | 
|---|
| 1945 |  | -	mutex_unlock(&q->blk_trace_mutex);  | 
|---|
 | 1947 | +	mutex_unlock(&q->debugfs_mutex);  | 
|---|
| 1946 | 1948 |  out_bdput: | 
|---|
| 1947 | 1949 |  	bdput(bdev); | 
|---|
| 1948 | 1950 |  out: | 
|---|