hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/drivers/md/raid0.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 raid0.c : Multiple Devices driver for Linux
34 Copyright (C) 1994-96 Marc ZYNGIER
....@@ -7,14 +8,6 @@
78
89 RAID-0 management functions.
910
10
- This program is free software; you can redistribute it and/or modify
11
- it under the terms of the GNU General Public License as published by
12
- the Free Software Foundation; either version 2, or (at your option)
13
- any later version.
14
-
15
- You should have received a copy of the GNU General Public License
16
- (for example /usr/src/linux/COPYING); if not, write to the Free
17
- Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
1811 */
1912
2013 #include <linux/blkdev.h>
....@@ -36,21 +29,6 @@
3629 (1L << MD_HAS_PPL) | \
3730 (1L << MD_HAS_MULTIPLE_PPLS))
3831
39
-static int raid0_congested(struct mddev *mddev, int bits)
40
-{
41
- struct r0conf *conf = mddev->private;
42
- struct md_rdev **devlist = conf->devlist;
43
- int raid_disks = conf->strip_zone[0].nb_dev;
44
- int i, ret = 0;
45
-
46
- for (i = 0; i < raid_disks && !ret ; i++) {
47
- struct request_queue *q = bdev_get_queue(devlist[i]->bdev);
48
-
49
- ret |= bdi_congested(q->backing_dev_info, bits);
50
- }
51
- return ret;
52
-}
53
-
5432 /*
5533 * inform the user of the raid configuration
5634 */
....@@ -70,7 +48,7 @@
7048 int len = 0;
7149
7250 for (k = 0; k < conf->strip_zone[j].nb_dev; k++)
73
- len += snprintf(line+len, 200-len, "%s%s", k?"/":"",
51
+ len += scnprintf(line+len, 200-len, "%s%s", k?"/":"",
7452 bdevname(conf->devlist[j*raid_disks
7553 + k]->bdev, b));
7654 pr_debug("md: zone%d=[%s]\n", j, line);
....@@ -150,21 +128,6 @@
150128 pr_debug("md/raid0:%s: FINAL %d zones\n",
151129 mdname(mddev), conf->nr_strip_zones);
152130
153
- if (conf->nr_strip_zones == 1) {
154
- conf->layout = RAID0_ORIG_LAYOUT;
155
- } else if (mddev->layout == RAID0_ORIG_LAYOUT ||
156
- mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {
157
- conf->layout = mddev->layout;
158
- } else if (default_layout == RAID0_ORIG_LAYOUT ||
159
- default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
160
- conf->layout = default_layout;
161
- } else {
162
- pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
163
- mdname(mddev));
164
- pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n");
165
- err = -ENOTSUPP;
166
- goto abort;
167
- }
168131 /*
169132 * now since we have the hard sector sizes, we can make sure
170133 * chunk size is a multiple of that sector size
....@@ -293,6 +256,34 @@
293256 pr_debug("md/raid0:%s: current zone start: %llu\n",
294257 mdname(mddev),
295258 (unsigned long long)smallest->sectors);
259
+ }
260
+
261
+ if (conf->nr_strip_zones == 1 || conf->strip_zone[1].nb_dev == 1) {
262
+ conf->layout = RAID0_ORIG_LAYOUT;
263
+ } else if (mddev->layout == RAID0_ORIG_LAYOUT ||
264
+ mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {
265
+ conf->layout = mddev->layout;
266
+ } else if (default_layout == RAID0_ORIG_LAYOUT ||
267
+ default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
268
+ conf->layout = default_layout;
269
+ } else {
270
+ pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
271
+ mdname(mddev));
272
+ pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n");
273
+ err = -EOPNOTSUPP;
274
+ goto abort;
275
+ }
276
+
277
+ if (conf->layout == RAID0_ORIG_LAYOUT) {
278
+ for (i = 1; i < conf->nr_strip_zones; i++) {
279
+ sector_t first_sector = conf->strip_zone[i-1].zone_end;
280
+
281
+ sector_div(first_sector, mddev->chunk_sectors);
282
+ zone = conf->strip_zone + i;
283
+ /* disk_shift is first disk index used in the zone */
284
+ zone->disk_shift = sector_div(first_sector,
285
+ zone->nb_dev);
286
+ }
296287 }
297288
298289 pr_debug("md/raid0:%s: done.\n", mdname(mddev));
....@@ -432,22 +423,6 @@
432423 mdname(mddev),
433424 (unsigned long long)mddev->array_sectors);
434425
435
- if (mddev->queue) {
436
- /* calculate the max read-ahead size.
437
- * For read-ahead of large files to be effective, we need to
438
- * readahead at least twice a whole stripe. i.e. number of devices
439
- * multiplied by chunk size times 2.
440
- * If an individual device has an ra_pages greater than the
441
- * chunk size, then we will not drive that device as hard as it
442
- * wants. We consider this a configuration error: a larger
443
- * chunksize should be used in that case.
444
- */
445
- int stripe = mddev->raid_disks *
446
- (mddev->chunk_sectors << 9) / PAGE_SIZE;
447
- if (mddev->queue->backing_dev_info->ra_pages < 2* stripe)
448
- mddev->queue->backing_dev_info->ra_pages = 2* stripe;
449
- }
450
-
451426 dump_zones(mddev);
452427
453428 ret = md_integrity_register(mddev);
....@@ -465,20 +440,17 @@
465440 }
466441
467442 /*
468
- * Is io distribute over 1 or more chunks ?
469
-*/
470
-static inline int is_io_in_chunk_boundary(struct mddev *mddev,
471
- unsigned int chunk_sects, struct bio *bio)
443
+ * Convert disk_index to the disk order in which it is read/written.
444
+ * For example, if we have 4 disks, they are numbered 0,1,2,3. If we
445
+ * write the disks starting at disk 3, then the read/write order would
446
+ * be disk 3, then 0, then 1, and then disk 2 and we want map_disk_shift()
447
+ * to map the disks as follows 0,1,2,3 => 1,2,3,0. So disk 0 would map
448
+ * to 1, 1 to 2, 2 to 3, and 3 to 0. That way we can compare disks in
449
+ * that 'output' space to understand the read/write disk ordering.
450
+ */
451
+static int map_disk_shift(int disk_index, int num_disks, int disk_shift)
472452 {
473
- if (likely(is_power_of_2(chunk_sects))) {
474
- return chunk_sects >=
475
- ((bio->bi_iter.bi_sector & (chunk_sects-1))
476
- + bio_sectors(bio));
477
- } else{
478
- sector_t sector = bio->bi_iter.bi_sector;
479
- return chunk_sects >= (sector_div(sector, chunk_sects)
480
- + bio_sectors(bio));
481
- }
453
+ return ((disk_index + num_disks - disk_shift) % num_disks);
482454 }
483455
484456 static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
....@@ -494,7 +466,9 @@
494466 sector_t end_disk_offset;
495467 unsigned int end_disk_index;
496468 unsigned int disk;
469
+ sector_t orig_start, orig_end;
497470
471
+ orig_start = start;
498472 zone = find_zone(conf, &start);
499473
500474 if (bio_end_sector(bio) > zone->zone_end) {
....@@ -502,12 +476,13 @@
502476 zone->zone_end - bio->bi_iter.bi_sector, GFP_NOIO,
503477 &mddev->bio_set);
504478 bio_chain(split, bio);
505
- generic_make_request(bio);
479
+ submit_bio_noacct(bio);
506480 bio = split;
507481 end = zone->zone_end;
508482 } else
509483 end = bio_end_sector(bio);
510484
485
+ orig_end = end;
511486 if (zone != conf->strip_zone)
512487 end = end - zone[-1].zone_end;
513488
....@@ -519,13 +494,26 @@
519494 last_stripe_index = end;
520495 sector_div(last_stripe_index, stripe_size);
521496
522
- start_disk_index = (int)(start - first_stripe_index * stripe_size) /
523
- mddev->chunk_sectors;
497
+ /* In the first zone the original and alternate layouts are the same */
498
+ if ((conf->layout == RAID0_ORIG_LAYOUT) && (zone != conf->strip_zone)) {
499
+ sector_div(orig_start, mddev->chunk_sectors);
500
+ start_disk_index = sector_div(orig_start, zone->nb_dev);
501
+ start_disk_index = map_disk_shift(start_disk_index,
502
+ zone->nb_dev,
503
+ zone->disk_shift);
504
+ sector_div(orig_end, mddev->chunk_sectors);
505
+ end_disk_index = sector_div(orig_end, zone->nb_dev);
506
+ end_disk_index = map_disk_shift(end_disk_index,
507
+ zone->nb_dev, zone->disk_shift);
508
+ } else {
509
+ start_disk_index = (int)(start - first_stripe_index * stripe_size) /
510
+ mddev->chunk_sectors;
511
+ end_disk_index = (int)(end - last_stripe_index * stripe_size) /
512
+ mddev->chunk_sectors;
513
+ }
524514 start_disk_offset = ((int)(start - first_stripe_index * stripe_size) %
525515 mddev->chunk_sectors) +
526516 first_stripe_index * mddev->chunk_sectors;
527
- end_disk_index = (int)(end - last_stripe_index * stripe_size) /
528
- mddev->chunk_sectors;
529517 end_disk_offset = ((int)(end - last_stripe_index * stripe_size) %
530518 mddev->chunk_sectors) +
531519 last_stripe_index * mddev->chunk_sectors;
....@@ -534,18 +522,22 @@
534522 sector_t dev_start, dev_end;
535523 struct bio *discard_bio = NULL;
536524 struct md_rdev *rdev;
525
+ int compare_disk;
537526
538
- if (disk < start_disk_index)
527
+ compare_disk = map_disk_shift(disk, zone->nb_dev,
528
+ zone->disk_shift);
529
+
530
+ if (compare_disk < start_disk_index)
539531 dev_start = (first_stripe_index + 1) *
540532 mddev->chunk_sectors;
541
- else if (disk > start_disk_index)
533
+ else if (compare_disk > start_disk_index)
542534 dev_start = first_stripe_index * mddev->chunk_sectors;
543535 else
544536 dev_start = start_disk_offset;
545537
546
- if (disk < end_disk_index)
538
+ if (compare_disk < end_disk_index)
547539 dev_end = (last_stripe_index + 1) * mddev->chunk_sectors;
548
- else if (disk > end_disk_index)
540
+ else if (compare_disk > end_disk_index)
549541 dev_end = last_stripe_index * mddev->chunk_sectors;
550542 else
551543 dev_end = end_disk_offset;
....@@ -561,13 +553,12 @@
561553 !discard_bio)
562554 continue;
563555 bio_chain(discard_bio, bio);
564
- bio_clone_blkcg_association(discard_bio, bio);
556
+ bio_clone_blkg_association(discard_bio, bio);
565557 if (mddev->gendisk)
566558 trace_block_bio_remap(bdev_get_queue(rdev->bdev),
567559 discard_bio, disk_devt(mddev->gendisk),
568560 bio->bi_iter.bi_sector);
569
- bio_clear_flag(bio, BIO_QUEUE_ENTERED);
570
- generic_make_request(discard_bio);
561
+ submit_bio_noacct(discard_bio);
571562 }
572563 bio_endio(bio);
573564 }
....@@ -608,7 +599,7 @@
608599 struct bio *split = bio_split(bio, sectors, GFP_NOIO,
609600 &mddev->bio_set);
610601 bio_chain(split, bio);
611
- generic_make_request(bio);
602
+ submit_bio_noacct(bio);
612603 bio = split;
613604 }
614605
....@@ -627,6 +618,11 @@
627618 return true;
628619 }
629620
621
+ if (unlikely(is_mddev_broken(tmp_dev, "raid0"))) {
622
+ bio_io_error(bio);
623
+ return true;
624
+ }
625
+
630626 bio_set_dev(bio, tmp_dev->bdev);
631627 bio->bi_iter.bi_sector = sector + zone->dev_start +
632628 tmp_dev->data_offset;
....@@ -636,8 +632,7 @@
636632 disk_devt(mddev->gendisk), bio_sector);
637633 mddev_check_writesame(mddev, bio);
638634 mddev_check_write_zeroes(mddev, bio);
639
- bio_clear_flag(bio, BIO_QUEUE_ENTERED);
640
- generic_make_request(bio);
635
+ submit_bio_noacct(bio);
641636 return true;
642637 }
643638
....@@ -822,7 +817,6 @@
822817 .size = raid0_size,
823818 .takeover = raid0_takeover,
824819 .quiesce = raid0_quiesce,
825
- .congested = raid0_congested,
826820 };
827821
828822 static int __init raid0_init (void)