hc
2024-05-10 37f49e37ab4cb5d0bc4c60eb5c6d4dd57db767bb
kernel/drivers/md/dm-rq.c
....@@ -12,6 +12,22 @@
1212
1313 #define DM_MSG_PREFIX "core-rq"
1414
15
+/*
16
+ * One of these is allocated per request.
17
+ */
18
+struct dm_rq_target_io {
19
+ struct mapped_device *md;
20
+ struct dm_target *ti;
21
+ struct request *orig, *clone;
22
+ struct kthread_work work;
23
+ blk_status_t error;
24
+ union map_info info;
25
+ struct dm_stats_aux stats_aux;
26
+ unsigned long duration_jiffies;
27
+ unsigned n_sectors;
28
+ unsigned completed;
29
+};
30
+
1531 #define DM_MQ_NR_HW_QUEUES 1
1632 #define DM_MQ_QUEUE_DEPTH 2048
1733 static unsigned dm_mq_nr_hw_queues = DM_MQ_NR_HW_QUEUES;
....@@ -22,19 +38,6 @@
2238 */
2339 #define RESERVED_REQUEST_BASED_IOS 256
2440 static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS;
25
-
26
-static bool use_blk_mq = IS_ENABLED(CONFIG_DM_MQ_DEFAULT);
27
-
28
-bool dm_use_blk_mq_default(void)
29
-{
30
- return use_blk_mq;
31
-}
32
-
33
-bool dm_use_blk_mq(struct mapped_device *md)
34
-{
35
- return md->use_blk_mq;
36
-}
37
-EXPORT_SYMBOL_GPL(dm_use_blk_mq);
3841
3942 unsigned dm_get_reserved_rq_based_ios(void)
4043 {
....@@ -56,54 +59,18 @@
5659
5760 int dm_request_based(struct mapped_device *md)
5861 {
59
- return queue_is_rq_based(md->queue);
62
+ return queue_is_mq(md->queue);
6063 }
6164
62
-static void dm_old_start_queue(struct request_queue *q)
63
-{
64
- unsigned long flags;
65
-
66
- spin_lock_irqsave(q->queue_lock, flags);
67
- if (blk_queue_stopped(q))
68
- blk_start_queue(q);
69
- spin_unlock_irqrestore(q->queue_lock, flags);
70
-}
71
-
72
-static void dm_mq_start_queue(struct request_queue *q)
65
+void dm_start_queue(struct request_queue *q)
7366 {
7467 blk_mq_unquiesce_queue(q);
7568 blk_mq_kick_requeue_list(q);
7669 }
7770
78
-void dm_start_queue(struct request_queue *q)
79
-{
80
- if (!q->mq_ops)
81
- dm_old_start_queue(q);
82
- else
83
- dm_mq_start_queue(q);
84
-}
85
-
86
-static void dm_old_stop_queue(struct request_queue *q)
87
-{
88
- unsigned long flags;
89
-
90
- spin_lock_irqsave(q->queue_lock, flags);
91
- if (!blk_queue_stopped(q))
92
- blk_stop_queue(q);
93
- spin_unlock_irqrestore(q->queue_lock, flags);
94
-}
95
-
96
-static void dm_mq_stop_queue(struct request_queue *q)
97
-{
98
- blk_mq_quiesce_queue(q);
99
-}
100
-
10171 void dm_stop_queue(struct request_queue *q)
10272 {
103
- if (!q->mq_ops)
104
- dm_old_stop_queue(q);
105
- else
106
- dm_mq_stop_queue(q);
73
+ blk_mq_quiesce_queue(q);
10774 }
10875
10976 /*
....@@ -145,7 +112,7 @@
145112
146113 /*
147114 * Update the original request.
148
- * Do not use blk_end_request() here, because it may complete
115
+ * Do not use blk_mq_end_request() here, because it may complete
149116 * the original request before the clone, and break the ordering.
150117 */
151118 if (is_last)
....@@ -174,29 +141,8 @@
174141 * the md may be freed in dm_put() at the end of this function.
175142 * Or do dm_get() before calling this function and dm_put() later.
176143 */
177
-static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
144
+static void rq_completed(struct mapped_device *md)
178145 {
179
- struct request_queue *q = md->queue;
180
- unsigned long flags;
181
-
182
- atomic_dec(&md->pending[rw]);
183
-
184
- /* nudge anyone waiting on suspend queue */
185
- if (!md_in_flight(md))
186
- wake_up(&md->wait);
187
-
188
- /*
189
- * Run this off this callpath, as drivers could invoke end_io while
190
- * inside their request_fn (and holding the queue lock). Calling
191
- * back into ->request_fn() could deadlock attempting to grab the
192
- * queue lock again.
193
- */
194
- if (!q->mq_ops && run_queue) {
195
- spin_lock_irqsave(q->queue_lock, flags);
196
- blk_run_queue_async(q);
197
- spin_unlock_irqrestore(q->queue_lock, flags);
198
- }
199
-
200146 /*
201147 * dm_put() must be at the end of this function. See the comment above
202148 */
....@@ -210,7 +156,6 @@
210156 */
211157 static void dm_end_request(struct request *clone, blk_status_t error)
212158 {
213
- int rw = rq_data_dir(clone);
214159 struct dm_rq_target_io *tio = clone->end_io_data;
215160 struct mapped_device *md = tio->md;
216161 struct request *rq = tio->orig;
....@@ -219,25 +164,8 @@
219164 tio->ti->type->release_clone_rq(clone, NULL);
220165
221166 rq_end_stats(md, rq);
222
- if (!rq->q->mq_ops)
223
- blk_end_request_all(rq, error);
224
- else
225
- blk_mq_end_request(rq, error);
226
- rq_completed(md, rw, true);
227
-}
228
-
229
-/*
230
- * Requeue the original request of a clone.
231
- */
232
-static void dm_old_requeue_request(struct request *rq, unsigned long delay_ms)
233
-{
234
- struct request_queue *q = rq->q;
235
- unsigned long flags;
236
-
237
- spin_lock_irqsave(q->queue_lock, flags);
238
- blk_requeue_request(q, rq);
239
- blk_delay_queue(q, delay_ms);
240
- spin_unlock_irqrestore(q->queue_lock, flags);
167
+ blk_mq_end_request(rq, error);
168
+ rq_completed(md);
241169 }
242170
243171 static void __dm_mq_kick_requeue_list(struct request_queue *q, unsigned long msecs)
....@@ -247,7 +175,7 @@
247175
248176 void dm_mq_kick_requeue_list(struct mapped_device *md)
249177 {
250
- __dm_mq_kick_requeue_list(dm_get_md_queue(md), 0);
178
+ __dm_mq_kick_requeue_list(md->queue, 0);
251179 }
252180 EXPORT_SYMBOL(dm_mq_kick_requeue_list);
253181
....@@ -261,7 +189,6 @@
261189 {
262190 struct mapped_device *md = tio->md;
263191 struct request *rq = tio->orig;
264
- int rw = rq_data_dir(rq);
265192 unsigned long delay_ms = delay_requeue ? 100 : 0;
266193
267194 rq_end_stats(md, rq);
....@@ -270,12 +197,8 @@
270197 tio->ti->type->release_clone_rq(tio->clone, NULL);
271198 }
272199
273
- if (!rq->q->mq_ops)
274
- dm_old_requeue_request(rq, delay_ms);
275
- else
276
- dm_mq_delay_requeue_request(rq, delay_ms);
277
-
278
- rq_completed(md, rw, false);
200
+ dm_mq_delay_requeue_request(rq, delay_ms);
201
+ rq_completed(md);
279202 }
280203
281204 static void dm_done(struct request *clone, blk_status_t error, bool mapped)
....@@ -333,18 +256,13 @@
333256 bool mapped = true;
334257 struct dm_rq_target_io *tio = tio_from_request(rq);
335258 struct request *clone = tio->clone;
336
- int rw;
337259
338260 if (!clone) {
339261 struct mapped_device *md = tio->md;
340262
341263 rq_end_stats(md, rq);
342
- rw = rq_data_dir(rq);
343
- if (!rq->q->mq_ops)
344
- blk_end_request_all(rq, tio->error);
345
- else
346
- blk_mq_end_request(rq, tio->error);
347
- rq_completed(md, rw, false);
264
+ blk_mq_end_request(rq, tio->error);
265
+ rq_completed(md);
348266 return;
349267 }
350268
....@@ -363,9 +281,7 @@
363281 struct dm_rq_target_io *tio = tio_from_request(rq);
364282
365283 tio->error = error;
366
- if (!rq->q->mq_ops)
367
- blk_complete_request(rq);
368
- else
284
+ if (likely(!blk_should_fake_timeout(rq->q)))
369285 blk_mq_complete_request(rq);
370286 }
371287
....@@ -373,7 +289,7 @@
373289 * Complete the not-mapped clone and the original request with the error status
374290 * through softirq context.
375291 * Target's rq_end_io() function isn't called.
376
- * This may be used when the target's map_rq() or clone_and_map_rq() functions fail.
292
+ * This may be used when the target's clone_and_map_rq() function fails.
377293 */
378294 static void dm_kill_unmapped_request(struct request *rq, blk_status_t error)
379295 {
....@@ -381,21 +297,10 @@
381297 dm_complete_request(rq, error);
382298 }
383299
384
-/*
385
- * Called with the clone's queue lock held (in the case of .request_fn)
386
- */
387300 static void end_clone_request(struct request *clone, blk_status_t error)
388301 {
389302 struct dm_rq_target_io *tio = clone->end_io_data;
390303
391
- /*
392
- * Actual request completion is done in a softirq context which doesn't
393
- * hold the clone's queue lock. Otherwise, deadlock could occur because:
394
- * - another request may be submitted by the upper level driver
395
- * of the stacking during the completion
396
- * - the submission which requires queue lock may be done
397
- * against this clone's queue
398
- */
399304 dm_complete_request(tio->orig, error);
400305 }
401306
....@@ -446,8 +351,6 @@
446351 return 0;
447352 }
448353
449
-static void map_tio_request(struct kthread_work *work);
450
-
451354 static void init_tio(struct dm_rq_target_io *tio, struct request *rq,
452355 struct mapped_device *md)
453356 {
....@@ -464,8 +367,6 @@
464367 */
465368 if (!md->init_tio_pdu)
466369 memset(&tio->info, 0, sizeof(tio->info));
467
- if (md->kworker_task)
468
- kthread_init_work(&tio->work, map_tio_request);
469370 }
470371
471372 /*
....@@ -484,7 +385,6 @@
484385 blk_status_t ret;
485386
486387 r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
487
-check_again:
488388 switch (r) {
489389 case DM_MAPIO_SUBMITTED:
490390 /* The target has taken the I/O to submit by itself later */
....@@ -505,11 +405,7 @@
505405 blk_mq_cleanup_rq(clone);
506406 tio->ti->type->release_clone_rq(clone, &tio->info);
507407 tio->clone = NULL;
508
- if (!rq->q->mq_ops)
509
- r = DM_MAPIO_DELAY_REQUEUE;
510
- else
511
- r = DM_MAPIO_REQUEUE;
512
- goto check_again;
408
+ return DM_MAPIO_REQUEUE;
513409 }
514410 break;
515411 case DM_MAPIO_REQUEUE:
....@@ -531,19 +427,21 @@
531427 return r;
532428 }
533429
430
+/* DEPRECATED: previously used for request-based merge heuristic in dm_request_fn() */
431
+ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf)
432
+{
433
+ return sprintf(buf, "%u\n", 0);
434
+}
435
+
436
+ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md,
437
+ const char *buf, size_t count)
438
+{
439
+ return count;
440
+}
441
+
534442 static void dm_start_request(struct mapped_device *md, struct request *orig)
535443 {
536
- if (!orig->q->mq_ops)
537
- blk_start_request(orig);
538
- else
539
- blk_mq_start_request(orig);
540
- atomic_inc(&md->pending[rq_data_dir(orig)]);
541
-
542
- if (md->seq_rq_merge_deadline_usecs) {
543
- md->last_rq_pos = rq_end_sector(orig);
544
- md->last_rq_rw = rq_data_dir(orig);
545
- md->last_rq_start_time = ktime_get();
546
- }
444
+ blk_mq_start_request(orig);
547445
548446 if (unlikely(dm_stats_used(&md->stats))) {
549447 struct dm_rq_target_io *tio = tio_from_request(orig);
....@@ -564,8 +462,10 @@
564462 dm_get(md);
565463 }
566464
567
-static int __dm_rq_init_rq(struct mapped_device *md, struct request *rq)
465
+static int dm_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
466
+ unsigned int hctx_idx, unsigned int numa_node)
568467 {
468
+ struct mapped_device *md = set->driver_data;
569469 struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
570470
571471 /*
....@@ -582,163 +482,6 @@
582482 return 0;
583483 }
584484
585
-static int dm_rq_init_rq(struct request_queue *q, struct request *rq, gfp_t gfp)
586
-{
587
- return __dm_rq_init_rq(q->rq_alloc_data, rq);
588
-}
589
-
590
-static void map_tio_request(struct kthread_work *work)
591
-{
592
- struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work);
593
-
594
- if (map_request(tio) == DM_MAPIO_REQUEUE)
595
- dm_requeue_original_request(tio, false);
596
-}
597
-
598
-ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf)
599
-{
600
- return sprintf(buf, "%u\n", md->seq_rq_merge_deadline_usecs);
601
-}
602
-
603
-#define MAX_SEQ_RQ_MERGE_DEADLINE_USECS 100000
604
-
605
-ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md,
606
- const char *buf, size_t count)
607
-{
608
- unsigned deadline;
609
-
610
- if (dm_get_md_type(md) != DM_TYPE_REQUEST_BASED)
611
- return count;
612
-
613
- if (kstrtouint(buf, 10, &deadline))
614
- return -EINVAL;
615
-
616
- if (deadline > MAX_SEQ_RQ_MERGE_DEADLINE_USECS)
617
- deadline = MAX_SEQ_RQ_MERGE_DEADLINE_USECS;
618
-
619
- md->seq_rq_merge_deadline_usecs = deadline;
620
-
621
- return count;
622
-}
623
-
624
-static bool dm_old_request_peeked_before_merge_deadline(struct mapped_device *md)
625
-{
626
- ktime_t kt_deadline;
627
-
628
- if (!md->seq_rq_merge_deadline_usecs)
629
- return false;
630
-
631
- kt_deadline = ns_to_ktime((u64)md->seq_rq_merge_deadline_usecs * NSEC_PER_USEC);
632
- kt_deadline = ktime_add_safe(md->last_rq_start_time, kt_deadline);
633
-
634
- return !ktime_after(ktime_get(), kt_deadline);
635
-}
636
-
637
-/*
638
- * q->request_fn for old request-based dm.
639
- * Called with the queue lock held.
640
- */
641
-static void dm_old_request_fn(struct request_queue *q)
642
-{
643
- struct mapped_device *md = q->queuedata;
644
- struct dm_target *ti = md->immutable_target;
645
- struct request *rq;
646
- struct dm_rq_target_io *tio;
647
- sector_t pos = 0;
648
-
649
- if (unlikely(!ti)) {
650
- int srcu_idx;
651
- struct dm_table *map = dm_get_live_table(md, &srcu_idx);
652
-
653
- if (unlikely(!map)) {
654
- dm_put_live_table(md, srcu_idx);
655
- return;
656
- }
657
- ti = dm_table_find_target(map, pos);
658
- dm_put_live_table(md, srcu_idx);
659
- }
660
-
661
- /*
662
- * For suspend, check blk_queue_stopped() and increment
663
- * ->pending within a single queue_lock not to increment the
664
- * number of in-flight I/Os after the queue is stopped in
665
- * dm_suspend().
666
- */
667
- while (!blk_queue_stopped(q)) {
668
- rq = blk_peek_request(q);
669
- if (!rq)
670
- return;
671
-
672
- /* always use block 0 to find the target for flushes for now */
673
- pos = 0;
674
- if (req_op(rq) != REQ_OP_FLUSH)
675
- pos = blk_rq_pos(rq);
676
-
677
- if ((dm_old_request_peeked_before_merge_deadline(md) &&
678
- md_in_flight(md) && rq->bio && !bio_multiple_segments(rq->bio) &&
679
- md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq)) ||
680
- (ti->type->busy && ti->type->busy(ti))) {
681
- blk_delay_queue(q, 10);
682
- return;
683
- }
684
-
685
- dm_start_request(md, rq);
686
-
687
- tio = tio_from_request(rq);
688
- init_tio(tio, rq, md);
689
- /* Establish tio->ti before queuing work (map_tio_request) */
690
- tio->ti = ti;
691
- kthread_queue_work(&md->kworker, &tio->work);
692
- BUG_ON(!irqs_disabled());
693
- }
694
-}
695
-
696
-/*
697
- * Fully initialize a .request_fn request-based queue.
698
- */
699
-int dm_old_init_request_queue(struct mapped_device *md, struct dm_table *t)
700
-{
701
- struct dm_target *immutable_tgt;
702
-
703
- /* Fully initialize the queue */
704
- md->queue->cmd_size = sizeof(struct dm_rq_target_io);
705
- md->queue->rq_alloc_data = md;
706
- md->queue->request_fn = dm_old_request_fn;
707
- md->queue->init_rq_fn = dm_rq_init_rq;
708
-
709
- immutable_tgt = dm_table_get_immutable_target(t);
710
- if (immutable_tgt && immutable_tgt->per_io_data_size) {
711
- /* any target-specific per-io data is immediately after the tio */
712
- md->queue->cmd_size += immutable_tgt->per_io_data_size;
713
- md->init_tio_pdu = true;
714
- }
715
- if (blk_init_allocated_queue(md->queue) < 0)
716
- return -EINVAL;
717
-
718
- /* disable dm_old_request_fn's merge heuristic by default */
719
- md->seq_rq_merge_deadline_usecs = 0;
720
-
721
- blk_queue_softirq_done(md->queue, dm_softirq_done);
722
-
723
- /* Initialize the request-based DM worker thread */
724
- kthread_init_worker(&md->kworker);
725
- md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker,
726
- "kdmwork-%s", dm_device_name(md));
727
- if (IS_ERR(md->kworker_task)) {
728
- int error = PTR_ERR(md->kworker_task);
729
- md->kworker_task = NULL;
730
- return error;
731
- }
732
-
733
- return 0;
734
-}
735
-
736
-static int dm_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
737
- unsigned int hctx_idx, unsigned int numa_node)
738
-{
739
- return __dm_rq_init_rq(set->driver_data, rq);
740
-}
741
-
742485 static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
743486 const struct blk_mq_queue_data *bd)
744487 {
....@@ -749,8 +492,13 @@
749492
750493 if (unlikely(!ti)) {
751494 int srcu_idx;
752
- struct dm_table *map = dm_get_live_table(md, &srcu_idx);
495
+ struct dm_table *map;
753496
497
+ map = dm_get_live_table(md, &srcu_idx);
498
+ if (unlikely(!map)) {
499
+ dm_put_live_table(md, srcu_idx);
500
+ return BLK_STS_RESOURCE;
501
+ }
754502 ti = dm_table_find_target(map, 0);
755503 dm_put_live_table(md, srcu_idx);
756504 }
....@@ -772,7 +520,7 @@
772520 if (map_request(tio) == DM_MAPIO_REQUEUE) {
773521 /* Undo dm_start_request() before requeuing */
774522 rq_end_stats(md, rq);
775
- rq_completed(md, rq_data_dir(rq), false);
523
+ rq_completed(md);
776524 return BLK_STS_RESOURCE;
777525 }
778526
....@@ -791,11 +539,6 @@
791539 struct dm_target *immutable_tgt;
792540 int err;
793541
794
- if (!dm_table_all_blk_mq_devices(t)) {
795
- DMERR("request-based dm-mq may only be stacked on blk-mq device(s)");
796
- return -EINVAL;
797
- }
798
-
799542 md->tag_set = kzalloc_node(sizeof(struct blk_mq_tag_set), GFP_KERNEL, md->numa_node_id);
800543 if (!md->tag_set)
801544 return -ENOMEM;
....@@ -803,7 +546,7 @@
803546 md->tag_set->ops = &dm_mq_ops;
804547 md->tag_set->queue_depth = dm_get_blk_mq_queue_depth();
805548 md->tag_set->numa_node = md->numa_node_id;
806
- md->tag_set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
549
+ md->tag_set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_STACKING;
807550 md->tag_set->nr_hw_queues = dm_get_blk_mq_nr_hw_queues();
808551 md->tag_set->driver_data = md;
809552
....@@ -819,7 +562,7 @@
819562 if (err)
820563 goto out_kfree_tag_set;
821564
822
- q = blk_mq_init_allocated_queue(md->tag_set, md->queue);
565
+ q = blk_mq_init_allocated_queue(md->tag_set, md->queue, true);
823566 if (IS_ERR(q)) {
824567 err = PTR_ERR(q);
825568 goto out_tag_set;
....@@ -848,6 +591,8 @@
848591 module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR);
849592 MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools");
850593
594
+/* Unused, but preserved for userspace compatibility */
595
+static bool use_blk_mq = true;
851596 module_param(use_blk_mq, bool, S_IRUGO | S_IWUSR);
852597 MODULE_PARM_DESC(use_blk_mq, "Use block multiqueue for request-based DM devices");
853598