.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
---|
1 | 2 | /* |
---|
2 | 3 | raid0.c : Multiple Devices driver for Linux |
---|
3 | 4 | Copyright (C) 1994-96 Marc ZYNGIER |
---|
.. | .. |
---|
7 | 8 | |
---|
8 | 9 | RAID-0 management functions. |
---|
9 | 10 | |
---|
10 | | - This program is free software; you can redistribute it and/or modify |
---|
11 | | - it under the terms of the GNU General Public License as published by |
---|
12 | | - the Free Software Foundation; either version 2, or (at your option) |
---|
13 | | - any later version. |
---|
14 | | - |
---|
15 | | - You should have received a copy of the GNU General Public License |
---|
16 | | - (for example /usr/src/linux/COPYING); if not, write to the Free |
---|
17 | | - Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
---|
18 | 11 | */ |
---|
19 | 12 | |
---|
20 | 13 | #include <linux/blkdev.h> |
---|
.. | .. |
---|
36 | 29 | (1L << MD_HAS_PPL) | \ |
---|
37 | 30 | (1L << MD_HAS_MULTIPLE_PPLS)) |
---|
38 | 31 | |
---|
39 | | -static int raid0_congested(struct mddev *mddev, int bits) |
---|
40 | | -{ |
---|
41 | | - struct r0conf *conf = mddev->private; |
---|
42 | | - struct md_rdev **devlist = conf->devlist; |
---|
43 | | - int raid_disks = conf->strip_zone[0].nb_dev; |
---|
44 | | - int i, ret = 0; |
---|
45 | | - |
---|
46 | | - for (i = 0; i < raid_disks && !ret ; i++) { |
---|
47 | | - struct request_queue *q = bdev_get_queue(devlist[i]->bdev); |
---|
48 | | - |
---|
49 | | - ret |= bdi_congested(q->backing_dev_info, bits); |
---|
50 | | - } |
---|
51 | | - return ret; |
---|
52 | | -} |
---|
53 | | - |
---|
54 | 32 | /* |
---|
55 | 33 | * inform the user of the raid configuration |
---|
56 | 34 | */ |
---|
.. | .. |
---|
70 | 48 | int len = 0; |
---|
71 | 49 | |
---|
72 | 50 | for (k = 0; k < conf->strip_zone[j].nb_dev; k++) |
---|
73 | | - len += snprintf(line+len, 200-len, "%s%s", k?"/":"", |
---|
| 51 | + len += scnprintf(line+len, 200-len, "%s%s", k?"/":"", |
---|
74 | 52 | bdevname(conf->devlist[j*raid_disks |
---|
75 | 53 | + k]->bdev, b)); |
---|
76 | 54 | pr_debug("md: zone%d=[%s]\n", j, line); |
---|
.. | .. |
---|
150 | 128 | pr_debug("md/raid0:%s: FINAL %d zones\n", |
---|
151 | 129 | mdname(mddev), conf->nr_strip_zones); |
---|
152 | 130 | |
---|
153 | | - if (conf->nr_strip_zones == 1) { |
---|
154 | | - conf->layout = RAID0_ORIG_LAYOUT; |
---|
155 | | - } else if (mddev->layout == RAID0_ORIG_LAYOUT || |
---|
156 | | - mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) { |
---|
157 | | - conf->layout = mddev->layout; |
---|
158 | | - } else if (default_layout == RAID0_ORIG_LAYOUT || |
---|
159 | | - default_layout == RAID0_ALT_MULTIZONE_LAYOUT) { |
---|
160 | | - conf->layout = default_layout; |
---|
161 | | - } else { |
---|
162 | | - pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n", |
---|
163 | | - mdname(mddev)); |
---|
164 | | - pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n"); |
---|
165 | | - err = -ENOTSUPP; |
---|
166 | | - goto abort; |
---|
167 | | - } |
---|
168 | 131 | /* |
---|
169 | 132 | * now since we have the hard sector sizes, we can make sure |
---|
170 | 133 | * chunk size is a multiple of that sector size |
---|
.. | .. |
---|
293 | 256 | pr_debug("md/raid0:%s: current zone start: %llu\n", |
---|
294 | 257 | mdname(mddev), |
---|
295 | 258 | (unsigned long long)smallest->sectors); |
---|
| 259 | + } |
---|
| 260 | + |
---|
| 261 | + if (conf->nr_strip_zones == 1 || conf->strip_zone[1].nb_dev == 1) { |
---|
| 262 | + conf->layout = RAID0_ORIG_LAYOUT; |
---|
| 263 | + } else if (mddev->layout == RAID0_ORIG_LAYOUT || |
---|
| 264 | + mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) { |
---|
| 265 | + conf->layout = mddev->layout; |
---|
| 266 | + } else if (default_layout == RAID0_ORIG_LAYOUT || |
---|
| 267 | + default_layout == RAID0_ALT_MULTIZONE_LAYOUT) { |
---|
| 268 | + conf->layout = default_layout; |
---|
| 269 | + } else { |
---|
| 270 | + pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n", |
---|
| 271 | + mdname(mddev)); |
---|
| 272 | + pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n"); |
---|
| 273 | + err = -EOPNOTSUPP; |
---|
| 274 | + goto abort; |
---|
| 275 | + } |
---|
| 276 | + |
---|
| 277 | + if (conf->layout == RAID0_ORIG_LAYOUT) { |
---|
| 278 | + for (i = 1; i < conf->nr_strip_zones; i++) { |
---|
| 279 | + sector_t first_sector = conf->strip_zone[i-1].zone_end; |
---|
| 280 | + |
---|
| 281 | + sector_div(first_sector, mddev->chunk_sectors); |
---|
| 282 | + zone = conf->strip_zone + i; |
---|
| 283 | + /* disk_shift is first disk index used in the zone */ |
---|
| 284 | + zone->disk_shift = sector_div(first_sector, |
---|
| 285 | + zone->nb_dev); |
---|
| 286 | + } |
---|
296 | 287 | } |
---|
297 | 288 | |
---|
298 | 289 | pr_debug("md/raid0:%s: done.\n", mdname(mddev)); |
---|
.. | .. |
---|
432 | 423 | mdname(mddev), |
---|
433 | 424 | (unsigned long long)mddev->array_sectors); |
---|
434 | 425 | |
---|
435 | | - if (mddev->queue) { |
---|
436 | | - /* calculate the max read-ahead size. |
---|
437 | | - * For read-ahead of large files to be effective, we need to |
---|
438 | | - * readahead at least twice a whole stripe. i.e. number of devices |
---|
439 | | - * multiplied by chunk size times 2. |
---|
440 | | - * If an individual device has an ra_pages greater than the |
---|
441 | | - * chunk size, then we will not drive that device as hard as it |
---|
442 | | - * wants. We consider this a configuration error: a larger |
---|
443 | | - * chunksize should be used in that case. |
---|
444 | | - */ |
---|
445 | | - int stripe = mddev->raid_disks * |
---|
446 | | - (mddev->chunk_sectors << 9) / PAGE_SIZE; |
---|
447 | | - if (mddev->queue->backing_dev_info->ra_pages < 2* stripe) |
---|
448 | | - mddev->queue->backing_dev_info->ra_pages = 2* stripe; |
---|
449 | | - } |
---|
450 | | - |
---|
451 | 426 | dump_zones(mddev); |
---|
452 | 427 | |
---|
453 | 428 | ret = md_integrity_register(mddev); |
---|
.. | .. |
---|
465 | 440 | } |
---|
466 | 441 | |
---|
467 | 442 | /* |
---|
468 | | - * Is io distribute over 1 or more chunks ? |
---|
469 | | -*/ |
---|
470 | | -static inline int is_io_in_chunk_boundary(struct mddev *mddev, |
---|
471 | | - unsigned int chunk_sects, struct bio *bio) |
---|
| 443 | + * Convert disk_index to the disk order in which it is read/written. |
---|
| 444 | + * For example, if we have 4 disks, they are numbered 0,1,2,3. If we |
---|
| 445 | + * write the disks starting at disk 3, then the read/write order would |
---|
| 446 | + * be disk 3, then 0, then 1, and then disk 2 and we want map_disk_shift() |
---|
| 447 | + * to map the disks as follows 0,1,2,3 => 1,2,3,0. So disk 0 would map |
---|
| 448 | + * to 1, 1 to 2, 2 to 3, and 3 to 0. That way we can compare disks in |
---|
| 449 | + * that 'output' space to understand the read/write disk ordering. |
---|
| 450 | + */ |
---|
| 451 | +static int map_disk_shift(int disk_index, int num_disks, int disk_shift) |
---|
472 | 452 | { |
---|
473 | | - if (likely(is_power_of_2(chunk_sects))) { |
---|
474 | | - return chunk_sects >= |
---|
475 | | - ((bio->bi_iter.bi_sector & (chunk_sects-1)) |
---|
476 | | - + bio_sectors(bio)); |
---|
477 | | - } else{ |
---|
478 | | - sector_t sector = bio->bi_iter.bi_sector; |
---|
479 | | - return chunk_sects >= (sector_div(sector, chunk_sects) |
---|
480 | | - + bio_sectors(bio)); |
---|
481 | | - } |
---|
| 453 | + return ((disk_index + num_disks - disk_shift) % num_disks); |
---|
482 | 454 | } |
---|
483 | 455 | |
---|
484 | 456 | static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) |
---|
.. | .. |
---|
494 | 466 | sector_t end_disk_offset; |
---|
495 | 467 | unsigned int end_disk_index; |
---|
496 | 468 | unsigned int disk; |
---|
| 469 | + sector_t orig_start, orig_end; |
---|
497 | 470 | |
---|
| 471 | + orig_start = start; |
---|
498 | 472 | zone = find_zone(conf, &start); |
---|
499 | 473 | |
---|
500 | 474 | if (bio_end_sector(bio) > zone->zone_end) { |
---|
.. | .. |
---|
502 | 476 | zone->zone_end - bio->bi_iter.bi_sector, GFP_NOIO, |
---|
503 | 477 | &mddev->bio_set); |
---|
504 | 478 | bio_chain(split, bio); |
---|
505 | | - generic_make_request(bio); |
---|
| 479 | + submit_bio_noacct(bio); |
---|
506 | 480 | bio = split; |
---|
507 | 481 | end = zone->zone_end; |
---|
508 | 482 | } else |
---|
509 | 483 | end = bio_end_sector(bio); |
---|
510 | 484 | |
---|
| 485 | + orig_end = end; |
---|
511 | 486 | if (zone != conf->strip_zone) |
---|
512 | 487 | end = end - zone[-1].zone_end; |
---|
513 | 488 | |
---|
.. | .. |
---|
519 | 494 | last_stripe_index = end; |
---|
520 | 495 | sector_div(last_stripe_index, stripe_size); |
---|
521 | 496 | |
---|
522 | | - start_disk_index = (int)(start - first_stripe_index * stripe_size) / |
---|
523 | | - mddev->chunk_sectors; |
---|
| 497 | + /* In the first zone the original and alternate layouts are the same */ |
---|
| 498 | + if ((conf->layout == RAID0_ORIG_LAYOUT) && (zone != conf->strip_zone)) { |
---|
| 499 | + sector_div(orig_start, mddev->chunk_sectors); |
---|
| 500 | + start_disk_index = sector_div(orig_start, zone->nb_dev); |
---|
| 501 | + start_disk_index = map_disk_shift(start_disk_index, |
---|
| 502 | + zone->nb_dev, |
---|
| 503 | + zone->disk_shift); |
---|
| 504 | + sector_div(orig_end, mddev->chunk_sectors); |
---|
| 505 | + end_disk_index = sector_div(orig_end, zone->nb_dev); |
---|
| 506 | + end_disk_index = map_disk_shift(end_disk_index, |
---|
| 507 | + zone->nb_dev, zone->disk_shift); |
---|
| 508 | + } else { |
---|
| 509 | + start_disk_index = (int)(start - first_stripe_index * stripe_size) / |
---|
| 510 | + mddev->chunk_sectors; |
---|
| 511 | + end_disk_index = (int)(end - last_stripe_index * stripe_size) / |
---|
| 512 | + mddev->chunk_sectors; |
---|
| 513 | + } |
---|
524 | 514 | start_disk_offset = ((int)(start - first_stripe_index * stripe_size) % |
---|
525 | 515 | mddev->chunk_sectors) + |
---|
526 | 516 | first_stripe_index * mddev->chunk_sectors; |
---|
527 | | - end_disk_index = (int)(end - last_stripe_index * stripe_size) / |
---|
528 | | - mddev->chunk_sectors; |
---|
529 | 517 | end_disk_offset = ((int)(end - last_stripe_index * stripe_size) % |
---|
530 | 518 | mddev->chunk_sectors) + |
---|
531 | 519 | last_stripe_index * mddev->chunk_sectors; |
---|
.. | .. |
---|
534 | 522 | sector_t dev_start, dev_end; |
---|
535 | 523 | struct bio *discard_bio = NULL; |
---|
536 | 524 | struct md_rdev *rdev; |
---|
| 525 | + int compare_disk; |
---|
537 | 526 | |
---|
538 | | - if (disk < start_disk_index) |
---|
| 527 | + compare_disk = map_disk_shift(disk, zone->nb_dev, |
---|
| 528 | + zone->disk_shift); |
---|
| 529 | + |
---|
| 530 | + if (compare_disk < start_disk_index) |
---|
539 | 531 | dev_start = (first_stripe_index + 1) * |
---|
540 | 532 | mddev->chunk_sectors; |
---|
541 | | - else if (disk > start_disk_index) |
---|
| 533 | + else if (compare_disk > start_disk_index) |
---|
542 | 534 | dev_start = first_stripe_index * mddev->chunk_sectors; |
---|
543 | 535 | else |
---|
544 | 536 | dev_start = start_disk_offset; |
---|
545 | 537 | |
---|
546 | | - if (disk < end_disk_index) |
---|
| 538 | + if (compare_disk < end_disk_index) |
---|
547 | 539 | dev_end = (last_stripe_index + 1) * mddev->chunk_sectors; |
---|
548 | | - else if (disk > end_disk_index) |
---|
| 540 | + else if (compare_disk > end_disk_index) |
---|
549 | 541 | dev_end = last_stripe_index * mddev->chunk_sectors; |
---|
550 | 542 | else |
---|
551 | 543 | dev_end = end_disk_offset; |
---|
.. | .. |
---|
561 | 553 | !discard_bio) |
---|
562 | 554 | continue; |
---|
563 | 555 | bio_chain(discard_bio, bio); |
---|
564 | | - bio_clone_blkcg_association(discard_bio, bio); |
---|
| 556 | + bio_clone_blkg_association(discard_bio, bio); |
---|
565 | 557 | if (mddev->gendisk) |
---|
566 | 558 | trace_block_bio_remap(bdev_get_queue(rdev->bdev), |
---|
567 | 559 | discard_bio, disk_devt(mddev->gendisk), |
---|
568 | 560 | bio->bi_iter.bi_sector); |
---|
569 | | - bio_clear_flag(bio, BIO_QUEUE_ENTERED); |
---|
570 | | - generic_make_request(discard_bio); |
---|
| 561 | + submit_bio_noacct(discard_bio); |
---|
571 | 562 | } |
---|
572 | 563 | bio_endio(bio); |
---|
573 | 564 | } |
---|
.. | .. |
---|
608 | 599 | struct bio *split = bio_split(bio, sectors, GFP_NOIO, |
---|
609 | 600 | &mddev->bio_set); |
---|
610 | 601 | bio_chain(split, bio); |
---|
611 | | - generic_make_request(bio); |
---|
| 602 | + submit_bio_noacct(bio); |
---|
612 | 603 | bio = split; |
---|
613 | 604 | } |
---|
614 | 605 | |
---|
.. | .. |
---|
627 | 618 | return true; |
---|
628 | 619 | } |
---|
629 | 620 | |
---|
| 621 | + if (unlikely(is_mddev_broken(tmp_dev, "raid0"))) { |
---|
| 622 | + bio_io_error(bio); |
---|
| 623 | + return true; |
---|
| 624 | + } |
---|
| 625 | + |
---|
630 | 626 | bio_set_dev(bio, tmp_dev->bdev); |
---|
631 | 627 | bio->bi_iter.bi_sector = sector + zone->dev_start + |
---|
632 | 628 | tmp_dev->data_offset; |
---|
.. | .. |
---|
636 | 632 | disk_devt(mddev->gendisk), bio_sector); |
---|
637 | 633 | mddev_check_writesame(mddev, bio); |
---|
638 | 634 | mddev_check_write_zeroes(mddev, bio); |
---|
639 | | - bio_clear_flag(bio, BIO_QUEUE_ENTERED); |
---|
640 | | - generic_make_request(bio); |
---|
| 635 | + submit_bio_noacct(bio); |
---|
641 | 636 | return true; |
---|
642 | 637 | } |
---|
643 | 638 | |
---|
.. | .. |
---|
822 | 817 | .size = raid0_size, |
---|
823 | 818 | .takeover = raid0_takeover, |
---|
824 | 819 | .quiesce = raid0_quiesce, |
---|
825 | | - .congested = raid0_congested, |
---|
826 | 820 | }; |
---|
827 | 821 | |
---|
828 | 822 | static int __init raid0_init (void) |
---|