hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/drivers/md/dm-mpath.c
....@@ -20,6 +20,7 @@
2020 #include <linux/pagemap.h>
2121 #include <linux/slab.h>
2222 #include <linux/time.h>
23
+#include <linux/timer.h>
2324 #include <linux/workqueue.h>
2425 #include <linux/delay.h>
2526 #include <scsi/scsi_dh.h>
....@@ -29,6 +30,9 @@
2930 #define DM_MSG_PREFIX "multipath"
3031 #define DM_PG_INIT_DELAY_MSECS 2000
3132 #define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1)
33
+#define QUEUE_IF_NO_PATH_TIMEOUT_DEFAULT 0
34
+
35
+static unsigned long queue_if_no_path_timeout_secs = QUEUE_IF_NO_PATH_TIMEOUT_DEFAULT;
3236
3337 /* Path properties */
3438 struct pgpath {
....@@ -91,6 +95,8 @@
9195
9296 struct work_struct process_queued_bios;
9397 struct bio_list queued_bios;
98
+
99
+ struct timer_list nopath_timer; /* Timeout for queue_if_no_path */
94100 };
95101
96102 /*
....@@ -108,6 +114,7 @@
108114 static void activate_or_offline_path(struct pgpath *pgpath);
109115 static void activate_path_work(struct work_struct *work);
110116 static void process_queued_bios(struct work_struct *work);
117
+static void queue_if_no_path_timeout_work(struct timer_list *t);
111118
112119 /*-----------------------------------------------
113120 * Multipath state flags.
....@@ -120,6 +127,20 @@
120127 #define MPATHF_PG_INIT_DISABLED 4 /* pg_init is not currently allowed */
121128 #define MPATHF_PG_INIT_REQUIRED 5 /* pg_init needs calling? */
122129 #define MPATHF_PG_INIT_DELAY_RETRY 6 /* Delay pg_init retry? */
130
+
131
+static bool mpath_double_check_test_bit(int MPATHF_bit, struct multipath *m)
132
+{
133
+ bool r = test_bit(MPATHF_bit, &m->flags);
134
+
135
+ if (r) {
136
+ unsigned long flags;
137
+ spin_lock_irqsave(&m->lock, flags);
138
+ r = test_bit(MPATHF_bit, &m->flags);
139
+ spin_unlock_irqrestore(&m->lock, flags);
140
+ }
141
+
142
+ return r;
143
+}
123144
124145 /*-----------------------------------------------
125146 * Allocation routines
....@@ -195,6 +216,8 @@
195216
196217 m->ti = ti;
197218 ti->private = m;
219
+
220
+ timer_setup(&m->nopath_timer, queue_if_no_path_timeout_work, 0);
198221 }
199222
200223 return m;
....@@ -203,14 +226,7 @@
203226 static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m)
204227 {
205228 if (m->queue_mode == DM_TYPE_NONE) {
206
- /*
207
- * Default to request-based.
208
- */
209
- if (dm_use_blk_mq(dm_table_get_md(ti->table)))
210
- m->queue_mode = DM_TYPE_MQ_REQUEST_BASED;
211
- else
212
- m->queue_mode = DM_TYPE_REQUEST_BASED;
213
-
229
+ m->queue_mode = DM_TYPE_REQUEST_BASED;
214230 } else if (m->queue_mode == DM_TYPE_BIO_BASED) {
215231 INIT_WORK(&m->process_queued_bios, process_queued_bios);
216232 /*
....@@ -333,6 +349,8 @@
333349
334350 static void __switch_pg(struct multipath *m, struct priority_group *pg)
335351 {
352
+ lockdep_assert_held(&m->lock);
353
+
336354 m->current_pg = pg;
337355
338356 /* Must we initialise the PG first, and queue I/O till it's ready? */
....@@ -380,7 +398,9 @@
380398 unsigned bypassed = 1;
381399
382400 if (!atomic_read(&m->nr_valid_paths)) {
401
+ spin_lock_irqsave(&m->lock, flags);
383402 clear_bit(MPATHF_QUEUE_IO, &m->flags);
403
+ spin_unlock_irqrestore(&m->lock, flags);
384404 goto failed;
385405 }
386406
....@@ -420,8 +440,11 @@
420440 continue;
421441 pgpath = choose_path_in_pg(m, pg, nr_bytes);
422442 if (!IS_ERR_OR_NULL(pgpath)) {
423
- if (!bypassed)
443
+ if (!bypassed) {
444
+ spin_lock_irqsave(&m->lock, flags);
424445 set_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags);
446
+ spin_unlock_irqrestore(&m->lock, flags);
447
+ }
425448 return pgpath;
426449 }
427450 }
....@@ -437,51 +460,38 @@
437460 }
438461
439462 /*
440
- * dm_report_EIO() is a macro instead of a function to make pr_debug()
463
+ * dm_report_EIO() is a macro instead of a function to make pr_debug_ratelimited()
441464 * report the function name and line number of the function from which
442465 * it has been invoked.
443466 */
444467 #define dm_report_EIO(m) \
445468 do { \
446
- struct mapped_device *md = dm_table_get_md((m)->ti->table); \
447
- \
448
- pr_debug("%s: returning EIO; QIFNP = %d; SQIFNP = %d; DNFS = %d\n", \
449
- dm_device_name(md), \
450
- test_bit(MPATHF_QUEUE_IF_NO_PATH, &(m)->flags), \
451
- test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &(m)->flags), \
452
- dm_noflush_suspending((m)->ti)); \
469
+ DMDEBUG_LIMIT("%s: returning EIO; QIFNP = %d; SQIFNP = %d; DNFS = %d", \
470
+ dm_table_device_name((m)->ti->table), \
471
+ test_bit(MPATHF_QUEUE_IF_NO_PATH, &(m)->flags), \
472
+ test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &(m)->flags), \
473
+ dm_noflush_suspending((m)->ti)); \
453474 } while (0)
454475
455476 /*
456477 * Check whether bios must be queued in the device-mapper core rather
457478 * than here in the target.
458
- *
459
- * If MPATHF_QUEUE_IF_NO_PATH and MPATHF_SAVED_QUEUE_IF_NO_PATH hold
460
- * the same value then we are not between multipath_presuspend()
461
- * and multipath_resume() calls and we have no need to check
462
- * for the DMF_NOFLUSH_SUSPENDING flag.
463479 */
464
-static bool __must_push_back(struct multipath *m, unsigned long flags)
480
+static bool __must_push_back(struct multipath *m)
465481 {
466
- return ((test_bit(MPATHF_QUEUE_IF_NO_PATH, &flags) !=
467
- test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &flags)) &&
468
- dm_noflush_suspending(m->ti));
482
+ return dm_noflush_suspending(m->ti);
469483 }
470484
471
-/*
472
- * Following functions use READ_ONCE to get atomic access to
473
- * all m->flags to avoid taking spinlock
474
- */
475485 static bool must_push_back_rq(struct multipath *m)
476486 {
477
- unsigned long flags = READ_ONCE(m->flags);
478
- return test_bit(MPATHF_QUEUE_IF_NO_PATH, &flags) || __must_push_back(m, flags);
479
-}
487
+ unsigned long flags;
488
+ bool ret;
480489
481
-static bool must_push_back_bio(struct multipath *m)
482
-{
483
- unsigned long flags = READ_ONCE(m->flags);
484
- return __must_push_back(m, flags);
490
+ spin_lock_irqsave(&m->lock, flags);
491
+ ret = (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) || __must_push_back(m));
492
+ spin_unlock_irqrestore(&m->lock, flags);
493
+
494
+ return ret;
485495 }
486496
487497 /*
....@@ -501,7 +511,7 @@
501511
502512 /* Do we need to select a new pgpath? */
503513 pgpath = READ_ONCE(m->current_pgpath);
504
- if (!pgpath || !test_bit(MPATHF_QUEUE_IO, &m->flags))
514
+ if (!pgpath || !mpath_double_check_test_bit(MPATHF_QUEUE_IO, m))
505515 pgpath = choose_pgpath(m, nr_bytes);
506516
507517 if (!pgpath) {
....@@ -509,8 +519,8 @@
509519 return DM_MAPIO_DELAY_REQUEUE;
510520 dm_report_EIO(m); /* Failed */
511521 return DM_MAPIO_KILL;
512
- } else if (test_bit(MPATHF_QUEUE_IO, &m->flags) ||
513
- test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) {
522
+ } else if (mpath_double_check_test_bit(MPATHF_QUEUE_IO, m) ||
523
+ mpath_double_check_test_bit(MPATHF_PG_INIT_REQUIRED, m)) {
514524 pg_init_all_paths(m);
515525 return DM_MAPIO_DELAY_REQUEUE;
516526 }
....@@ -537,10 +547,7 @@
537547 * get the queue busy feedback (via BLK_STS_RESOURCE),
538548 * otherwise I/O merging can suffer.
539549 */
540
- if (q->mq_ops)
541
- return DM_MAPIO_REQUEUE;
542
- else
543
- return DM_MAPIO_DELAY_REQUEUE;
550
+ return DM_MAPIO_REQUEUE;
544551 }
545552 clone->bio = clone->biotail = NULL;
546553 clone->rq_disk = bdev->bd_disk;
....@@ -568,7 +575,8 @@
568575 if (pgpath && pgpath->pg->ps.type->end_io)
569576 pgpath->pg->ps.type->end_io(&pgpath->pg->ps,
570577 &pgpath->path,
571
- mpio->nr_bytes);
578
+ mpio->nr_bytes,
579
+ clone->io_start_time_ns);
572580 }
573581
574582 blk_put_request(clone);
....@@ -578,33 +586,45 @@
578586 * Map cloned bios (bio-based multipath)
579587 */
580588
589
+static void __multipath_queue_bio(struct multipath *m, struct bio *bio)
590
+{
591
+ /* Queue for the daemon to resubmit */
592
+ bio_list_add(&m->queued_bios, bio);
593
+ if (!test_bit(MPATHF_QUEUE_IO, &m->flags))
594
+ queue_work(kmultipathd, &m->process_queued_bios);
595
+}
596
+
597
+static void multipath_queue_bio(struct multipath *m, struct bio *bio)
598
+{
599
+ unsigned long flags;
600
+
601
+ spin_lock_irqsave(&m->lock, flags);
602
+ __multipath_queue_bio(m, bio);
603
+ spin_unlock_irqrestore(&m->lock, flags);
604
+}
605
+
581606 static struct pgpath *__map_bio(struct multipath *m, struct bio *bio)
582607 {
583608 struct pgpath *pgpath;
584609 unsigned long flags;
585
- bool queue_io;
586610
587611 /* Do we need to select a new pgpath? */
588612 pgpath = READ_ONCE(m->current_pgpath);
589
- if (!pgpath || !test_bit(MPATHF_QUEUE_IO, &m->flags))
613
+ if (!pgpath || !mpath_double_check_test_bit(MPATHF_QUEUE_IO, m))
590614 pgpath = choose_pgpath(m, bio->bi_iter.bi_size);
591615
592
- /* MPATHF_QUEUE_IO might have been cleared by choose_pgpath. */
593
- queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags);
594
-
595
- if ((pgpath && queue_io) ||
596
- (!pgpath && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))) {
597
- /* Queue for the daemon to resubmit */
616
+ if (!pgpath) {
598617 spin_lock_irqsave(&m->lock, flags);
599
- bio_list_add(&m->queued_bios, bio);
618
+ if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
619
+ __multipath_queue_bio(m, bio);
620
+ pgpath = ERR_PTR(-EAGAIN);
621
+ }
600622 spin_unlock_irqrestore(&m->lock, flags);
601623
602
- /* PG_INIT_REQUIRED cannot be set without QUEUE_IO */
603
- if (queue_io || test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
604
- pg_init_all_paths(m);
605
- else if (!queue_io)
606
- queue_work(kmultipathd, &m->process_queued_bios);
607
-
624
+ } else if (mpath_double_check_test_bit(MPATHF_QUEUE_IO, m) ||
625
+ mpath_double_check_test_bit(MPATHF_PG_INIT_REQUIRED, m)) {
626
+ multipath_queue_bio(m, bio);
627
+ pg_init_all_paths(m);
608628 return ERR_PTR(-EAGAIN);
609629 }
610630
....@@ -620,7 +640,7 @@
620640 return DM_MAPIO_SUBMITTED;
621641
622642 if (!pgpath) {
623
- if (must_push_back_bio(m))
643
+ if (__must_push_back(m))
624644 return DM_MAPIO_REQUEUE;
625645 dm_report_EIO(m);
626646 return DM_MAPIO_KILL;
....@@ -650,7 +670,7 @@
650670
651671 static void process_queued_io_list(struct multipath *m)
652672 {
653
- if (m->queue_mode == DM_TYPE_MQ_REQUEST_BASED)
673
+ if (m->queue_mode == DM_TYPE_REQUEST_BASED)
654674 dm_mq_kick_requeue_list(dm_table_get_md(m->ti->table));
655675 else if (m->queue_mode == DM_TYPE_BIO_BASED)
656676 queue_work(kmultipathd, &m->process_queued_bios);
....@@ -695,7 +715,7 @@
695715 bio_endio(bio);
696716 break;
697717 case DM_MAPIO_REMAPPED:
698
- generic_make_request(bio);
718
+ submit_bio_noacct(bio);
699719 break;
700720 case DM_MAPIO_SUBMITTED:
701721 break;
....@@ -710,15 +730,38 @@
710730 * If we run out of usable paths, should we queue I/O or error it?
711731 */
712732 static int queue_if_no_path(struct multipath *m, bool queue_if_no_path,
713
- bool save_old_value)
733
+ bool save_old_value, const char *caller)
714734 {
715735 unsigned long flags;
736
+ bool queue_if_no_path_bit, saved_queue_if_no_path_bit;
737
+ const char *dm_dev_name = dm_table_device_name(m->ti->table);
738
+
739
+ DMDEBUG("%s: %s caller=%s queue_if_no_path=%d save_old_value=%d",
740
+ dm_dev_name, __func__, caller, queue_if_no_path, save_old_value);
716741
717742 spin_lock_irqsave(&m->lock, flags);
718
- assign_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags,
719
- (save_old_value && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) ||
720
- (!save_old_value && queue_if_no_path));
743
+
744
+ queue_if_no_path_bit = test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags);
745
+ saved_queue_if_no_path_bit = test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags);
746
+
747
+ if (save_old_value) {
748
+ if (unlikely(!queue_if_no_path_bit && saved_queue_if_no_path_bit)) {
749
+ DMERR("%s: QIFNP disabled but saved as enabled, saving again loses state, not saving!",
750
+ dm_dev_name);
751
+ } else
752
+ assign_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags, queue_if_no_path_bit);
753
+ } else if (!queue_if_no_path && saved_queue_if_no_path_bit) {
754
+ /* due to "fail_if_no_path" message, need to honor it. */
755
+ clear_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags);
756
+ }
721757 assign_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags, queue_if_no_path);
758
+
759
+ DMDEBUG("%s: after %s changes; QIFNP = %d; SQIFNP = %d; DNFS = %d",
760
+ dm_dev_name, __func__,
761
+ test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags),
762
+ test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags),
763
+ dm_noflush_suspending(m->ti));
764
+
722765 spin_unlock_irqrestore(&m->lock, flags);
723766
724767 if (!queue_if_no_path) {
....@@ -727,6 +770,43 @@
727770 }
728771
729772 return 0;
773
+}
774
+
775
+/*
776
+ * If the queue_if_no_path timeout fires, turn off queue_if_no_path and
777
+ * process any queued I/O.
778
+ */
779
+static void queue_if_no_path_timeout_work(struct timer_list *t)
780
+{
781
+ struct multipath *m = from_timer(m, t, nopath_timer);
782
+
783
+ DMWARN("queue_if_no_path timeout on %s, failing queued IO",
784
+ dm_table_device_name(m->ti->table));
785
+ queue_if_no_path(m, false, false, __func__);
786
+}
787
+
788
+/*
789
+ * Enable the queue_if_no_path timeout if necessary.
790
+ * Called with m->lock held.
791
+ */
792
+static void enable_nopath_timeout(struct multipath *m)
793
+{
794
+ unsigned long queue_if_no_path_timeout =
795
+ READ_ONCE(queue_if_no_path_timeout_secs) * HZ;
796
+
797
+ lockdep_assert_held(&m->lock);
798
+
799
+ if (queue_if_no_path_timeout > 0 &&
800
+ atomic_read(&m->nr_valid_paths) == 0 &&
801
+ test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
802
+ mod_timer(&m->nopath_timer,
803
+ jiffies + queue_if_no_path_timeout);
804
+ }
805
+}
806
+
807
+static void disable_nopath_timeout(struct multipath *m)
808
+{
809
+ del_timer_sync(&m->nopath_timer);
730810 }
731811
732812 /*
....@@ -793,7 +873,7 @@
793873 struct request_queue *q = bdev_get_queue(bdev);
794874 int r;
795875
796
- if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags)) {
876
+ if (mpath_double_check_test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, m)) {
797877 retain:
798878 if (*attached_handler_name) {
799879 /*
....@@ -1042,7 +1122,7 @@
10421122 argc--;
10431123
10441124 if (!strcasecmp(arg_name, "queue_if_no_path")) {
1045
- r = queue_if_no_path(m, true, false);
1125
+ r = queue_if_no_path(m, true, false, __func__);
10461126 continue;
10471127 }
10481128
....@@ -1071,10 +1151,9 @@
10711151
10721152 if (!strcasecmp(queue_mode_name, "bio"))
10731153 m->queue_mode = DM_TYPE_BIO_BASED;
1074
- else if (!strcasecmp(queue_mode_name, "rq"))
1154
+ else if (!strcasecmp(queue_mode_name, "rq") ||
1155
+ !strcasecmp(queue_mode_name, "mq"))
10751156 m->queue_mode = DM_TYPE_REQUEST_BASED;
1076
- else if (!strcasecmp(queue_mode_name, "mq"))
1077
- m->queue_mode = DM_TYPE_MQ_REQUEST_BASED;
10781157 else {
10791158 ti->error = "Unknown 'queue_mode' requested";
10801159 r = -EINVAL;
....@@ -1103,6 +1182,7 @@
11031182 struct dm_arg_set as;
11041183 unsigned pg_count = 0;
11051184 unsigned next_pg_num;
1185
+ unsigned long flags;
11061186
11071187 as.argc = argc;
11081188 as.argv = argv;
....@@ -1167,6 +1247,10 @@
11671247 goto bad;
11681248 }
11691249
1250
+ spin_lock_irqsave(&m->lock, flags);
1251
+ enable_nopath_timeout(m);
1252
+ spin_unlock_irqrestore(&m->lock, flags);
1253
+
11701254 ti->num_flush_bios = 1;
11711255 ti->num_discard_bios = 1;
11721256 ti->num_write_same_bios = 1;
....@@ -1201,17 +1285,27 @@
12011285 static void flush_multipath_work(struct multipath *m)
12021286 {
12031287 if (m->hw_handler_name) {
1204
- set_bit(MPATHF_PG_INIT_DISABLED, &m->flags);
1205
- smp_mb__after_atomic();
1288
+ unsigned long flags;
12061289
1207
- flush_workqueue(kmpath_handlerd);
1208
- multipath_wait_for_pg_init_completion(m);
1290
+ if (!atomic_read(&m->pg_init_in_progress))
1291
+ goto skip;
12091292
1210
- clear_bit(MPATHF_PG_INIT_DISABLED, &m->flags);
1211
- smp_mb__after_atomic();
1293
+ spin_lock_irqsave(&m->lock, flags);
1294
+ if (atomic_read(&m->pg_init_in_progress) &&
1295
+ !test_and_set_bit(MPATHF_PG_INIT_DISABLED, &m->flags)) {
1296
+ spin_unlock_irqrestore(&m->lock, flags);
1297
+
1298
+ flush_workqueue(kmpath_handlerd);
1299
+ multipath_wait_for_pg_init_completion(m);
1300
+
1301
+ spin_lock_irqsave(&m->lock, flags);
1302
+ clear_bit(MPATHF_PG_INIT_DISABLED, &m->flags);
1303
+ }
1304
+ spin_unlock_irqrestore(&m->lock, flags);
12121305 }
1213
-
1214
- flush_workqueue(kmultipathd);
1306
+skip:
1307
+ if (m->queue_mode == DM_TYPE_BIO_BASED)
1308
+ flush_work(&m->process_queued_bios);
12151309 flush_work(&m->trigger_event);
12161310 }
12171311
....@@ -1219,6 +1313,7 @@
12191313 {
12201314 struct multipath *m = ti->private;
12211315
1316
+ disable_nopath_timeout(m);
12221317 flush_multipath_work(m);
12231318 free_multipath(m);
12241319 }
....@@ -1236,7 +1331,9 @@
12361331 if (!pgpath->is_active)
12371332 goto out;
12381333
1239
- DMWARN("Failing path %s.", pgpath->path.dev->name);
1334
+ DMWARN("%s: Failing path %s.",
1335
+ dm_table_device_name(m->ti->table),
1336
+ pgpath->path.dev->name);
12401337
12411338 pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path);
12421339 pgpath->is_active = false;
....@@ -1251,6 +1348,8 @@
12511348 pgpath->path.dev->name, atomic_read(&m->nr_valid_paths));
12521349
12531350 schedule_work(&m->trigger_event);
1351
+
1352
+ enable_nopath_timeout(m);
12541353
12551354 out:
12561355 spin_unlock_irqrestore(&m->lock, flags);
....@@ -1273,7 +1372,9 @@
12731372 if (pgpath->is_active)
12741373 goto out;
12751374
1276
- DMWARN("Reinstating path %s.", pgpath->path.dev->name);
1375
+ DMWARN("%s: Reinstating path %s.",
1376
+ dm_table_device_name(m->ti->table),
1377
+ pgpath->path.dev->name);
12771378
12781379 r = pgpath->pg->ps.type->reinstate_path(&pgpath->pg->ps, &pgpath->path);
12791380 if (r)
....@@ -1301,6 +1402,9 @@
13011402 dm_table_run_md_queue_async(m->ti->table);
13021403 process_queued_io_list(m);
13031404 }
1405
+
1406
+ if (pgpath->is_active)
1407
+ disable_nopath_timeout(m);
13041408
13051409 return r;
13061410 }
....@@ -1455,8 +1559,8 @@
14551559 break;
14561560 case SCSI_DH_RETRY:
14571561 /* Wait before retrying. */
1458
- delay_retry = 1;
1459
- /* fall through */
1562
+ delay_retry = true;
1563
+ fallthrough;
14601564 case SCSI_DH_IMM_RETRY:
14611565 case SCSI_DH_RES_TEMP_UNAVAIL:
14621566 if (pg_init_limit_reached(m, pgpath))
....@@ -1556,7 +1660,7 @@
15561660 if (pgpath)
15571661 fail_path(pgpath);
15581662
1559
- if (atomic_read(&m->nr_valid_paths) == 0 &&
1663
+ if (!atomic_read(&m->nr_valid_paths) &&
15601664 !must_push_back_rq(m)) {
15611665 if (error == BLK_STS_IOERR)
15621666 dm_report_EIO(m);
....@@ -1569,7 +1673,8 @@
15691673 struct path_selector *ps = &pgpath->pg->ps;
15701674
15711675 if (ps->type->end_io)
1572
- ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
1676
+ ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes,
1677
+ clone->io_start_time_ns);
15731678 }
15741679
15751680 return r;
....@@ -1590,46 +1695,49 @@
15901695 if (pgpath)
15911696 fail_path(pgpath);
15921697
1593
- if (atomic_read(&m->nr_valid_paths) == 0 &&
1594
- !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
1595
- if (must_push_back_bio(m)) {
1596
- r = DM_ENDIO_REQUEUE;
1597
- } else {
1598
- dm_report_EIO(m);
1599
- *error = BLK_STS_IOERR;
1698
+ if (!atomic_read(&m->nr_valid_paths)) {
1699
+ spin_lock_irqsave(&m->lock, flags);
1700
+ if (!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
1701
+ if (__must_push_back(m)) {
1702
+ r = DM_ENDIO_REQUEUE;
1703
+ } else {
1704
+ dm_report_EIO(m);
1705
+ *error = BLK_STS_IOERR;
1706
+ }
1707
+ spin_unlock_irqrestore(&m->lock, flags);
1708
+ goto done;
16001709 }
1601
- goto done;
1710
+ spin_unlock_irqrestore(&m->lock, flags);
16021711 }
16031712
1604
- spin_lock_irqsave(&m->lock, flags);
1605
- bio_list_add(&m->queued_bios, clone);
1606
- spin_unlock_irqrestore(&m->lock, flags);
1607
- if (!test_bit(MPATHF_QUEUE_IO, &m->flags))
1608
- queue_work(kmultipathd, &m->process_queued_bios);
1609
-
1713
+ multipath_queue_bio(m, clone);
16101714 r = DM_ENDIO_INCOMPLETE;
16111715 done:
16121716 if (pgpath) {
16131717 struct path_selector *ps = &pgpath->pg->ps;
16141718
16151719 if (ps->type->end_io)
1616
- ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
1720
+ ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes,
1721
+ dm_start_time_ns_from_clone(clone));
16171722 }
16181723
16191724 return r;
16201725 }
16211726
16221727 /*
1623
- * Suspend can't complete until all the I/O is processed so if
1624
- * the last path fails we must error any remaining I/O.
1625
- * Note that if the freeze_bdev fails while suspending, the
1626
- * queue_if_no_path state is lost - userspace should reset it.
1728
+ * Suspend with flush can't complete until all the I/O is processed
1729
+ * so if the last path fails we must error any remaining I/O.
1730
+ * - Note that if the freeze_bdev fails while suspending, the
1731
+ * queue_if_no_path state is lost - userspace should reset it.
1732
+ * Otherwise, during noflush suspend, queue_if_no_path will not change.
16271733 */
16281734 static void multipath_presuspend(struct dm_target *ti)
16291735 {
16301736 struct multipath *m = ti->private;
16311737
1632
- queue_if_no_path(m, false, true);
1738
+ /* FIXME: bio-based shouldn't need to always disable queue_if_no_path */
1739
+ if (m->queue_mode == DM_TYPE_BIO_BASED || !dm_noflush_suspending(m->ti))
1740
+ queue_if_no_path(m, false, true, __func__);
16331741 }
16341742
16351743 static void multipath_postsuspend(struct dm_target *ti)
....@@ -1650,8 +1758,16 @@
16501758 unsigned long flags;
16511759
16521760 spin_lock_irqsave(&m->lock, flags);
1653
- assign_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags,
1654
- test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags));
1761
+ if (test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)) {
1762
+ set_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags);
1763
+ clear_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags);
1764
+ }
1765
+
1766
+ DMDEBUG("%s: %s finished; QIFNP = %d; SQIFNP = %d",
1767
+ dm_table_device_name(m->ti->table), __func__,
1768
+ test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags),
1769
+ test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags));
1770
+
16551771 spin_unlock_irqrestore(&m->lock, flags);
16561772 }
16571773
....@@ -1707,9 +1823,6 @@
17071823 switch(m->queue_mode) {
17081824 case DM_TYPE_BIO_BASED:
17091825 DMEMIT("queue_mode bio ");
1710
- break;
1711
- case DM_TYPE_MQ_REQUEST_BASED:
1712
- DMEMIT("queue_mode mq ");
17131826 break;
17141827 default:
17151828 WARN_ON_ONCE(true);
....@@ -1803,6 +1916,7 @@
18031916 struct dm_dev *dev;
18041917 struct multipath *m = ti->private;
18051918 action_fn action;
1919
+ unsigned long flags;
18061920
18071921 mutex_lock(&m->work_mutex);
18081922
....@@ -1813,10 +1927,14 @@
18131927
18141928 if (argc == 1) {
18151929 if (!strcasecmp(argv[0], "queue_if_no_path")) {
1816
- r = queue_if_no_path(m, true, false);
1930
+ r = queue_if_no_path(m, true, false, __func__);
1931
+ spin_lock_irqsave(&m->lock, flags);
1932
+ enable_nopath_timeout(m);
1933
+ spin_unlock_irqrestore(&m->lock, flags);
18171934 goto out;
18181935 } else if (!strcasecmp(argv[0], "fail_if_no_path")) {
1819
- r = queue_if_no_path(m, false, false);
1936
+ r = queue_if_no_path(m, false, false, __func__);
1937
+ disable_nopath_timeout(m);
18201938 goto out;
18211939 }
18221940 }
....@@ -1864,16 +1982,17 @@
18641982 struct block_device **bdev)
18651983 {
18661984 struct multipath *m = ti->private;
1867
- struct pgpath *current_pgpath;
1985
+ struct pgpath *pgpath;
1986
+ unsigned long flags;
18681987 int r;
18691988
1870
- current_pgpath = READ_ONCE(m->current_pgpath);
1871
- if (!current_pgpath || !test_bit(MPATHF_QUEUE_IO, &m->flags))
1872
- current_pgpath = choose_pgpath(m, 0);
1989
+ pgpath = READ_ONCE(m->current_pgpath);
1990
+ if (!pgpath || !mpath_double_check_test_bit(MPATHF_QUEUE_IO, m))
1991
+ pgpath = choose_pgpath(m, 0);
18731992
1874
- if (current_pgpath) {
1875
- if (!test_bit(MPATHF_QUEUE_IO, &m->flags)) {
1876
- *bdev = current_pgpath->path.dev->bdev;
1993
+ if (pgpath) {
1994
+ if (!mpath_double_check_test_bit(MPATHF_QUEUE_IO, m)) {
1995
+ *bdev = pgpath->path.dev->bdev;
18771996 r = 0;
18781997 } else {
18791998 /* pg_init has not started or completed */
....@@ -1881,10 +2000,11 @@
18812000 }
18822001 } else {
18832002 /* No path is available */
2003
+ r = -EIO;
2004
+ spin_lock_irqsave(&m->lock, flags);
18842005 if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
18852006 r = -ENOTCONN;
1886
- else
1887
- r = -EIO;
2007
+ spin_unlock_irqrestore(&m->lock, flags);
18882008 }
18892009
18902010 if (r == -ENOTCONN) {
....@@ -1892,8 +2012,10 @@
18922012 /* Path status changed, redo selection */
18932013 (void) choose_pgpath(m, 0);
18942014 }
2015
+ spin_lock_irqsave(&m->lock, flags);
18952016 if (test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
1896
- pg_init_all_paths(m);
2017
+ (void) __pg_init_all_paths(m);
2018
+ spin_unlock_irqrestore(&m->lock, flags);
18972019 dm_table_run_md_queue_async(m->ti->table);
18982020 process_queued_io_list(m);
18992021 }
....@@ -1953,8 +2075,15 @@
19532075 return true;
19542076
19552077 /* no paths available, for blk-mq: rely on IO mapping to delay requeue */
1956
- if (!atomic_read(&m->nr_valid_paths) && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
1957
- return (m->queue_mode != DM_TYPE_MQ_REQUEST_BASED);
2078
+ if (!atomic_read(&m->nr_valid_paths)) {
2079
+ unsigned long flags;
2080
+ spin_lock_irqsave(&m->lock, flags);
2081
+ if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
2082
+ spin_unlock_irqrestore(&m->lock, flags);
2083
+ return (m->queue_mode != DM_TYPE_REQUEST_BASED);
2084
+ }
2085
+ spin_unlock_irqrestore(&m->lock, flags);
2086
+ }
19582087
19592088 /* Guess which priority_group will be used at next mapping time */
19602089 pg = READ_ONCE(m->current_pg);
....@@ -2005,7 +2134,7 @@
20052134 *---------------------------------------------------------------*/
20062135 static struct target_type multipath_target = {
20072136 .name = "multipath",
2008
- .version = {1, 13, 0},
2137
+ .version = {1, 14, 0},
20092138 .features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE |
20102139 DM_TARGET_PASSES_INTEGRITY,
20112140 .module = THIS_MODULE,
....@@ -2079,6 +2208,10 @@
20792208 module_init(dm_multipath_init);
20802209 module_exit(dm_multipath_exit);
20812210
2211
+module_param_named(queue_if_no_path_timeout_secs,
2212
+ queue_if_no_path_timeout_secs, ulong, S_IRUGO | S_IWUSR);
2213
+MODULE_PARM_DESC(queue_if_no_path_timeout_secs, "No available paths queue IO timeout in seconds");
2214
+
20822215 MODULE_DESCRIPTION(DM_NAME " multipath target");
20832216 MODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>");
20842217 MODULE_LICENSE("GPL");