| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | raid0.c : Multiple Devices driver for Linux |
|---|
| 3 | 4 | Copyright (C) 1994-96 Marc ZYNGIER |
|---|
| .. | .. |
|---|
| 7 | 8 | |
|---|
| 8 | 9 | RAID-0 management functions. |
|---|
| 9 | 10 | |
|---|
| 10 | | - This program is free software; you can redistribute it and/or modify |
|---|
| 11 | | - it under the terms of the GNU General Public License as published by |
|---|
| 12 | | - the Free Software Foundation; either version 2, or (at your option) |
|---|
| 13 | | - any later version. |
|---|
| 14 | | - |
|---|
| 15 | | - You should have received a copy of the GNU General Public License |
|---|
| 16 | | - (for example /usr/src/linux/COPYING); if not, write to the Free |
|---|
| 17 | | - Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
|---|
| 18 | 11 | */ |
|---|
| 19 | 12 | |
|---|
| 20 | 13 | #include <linux/blkdev.h> |
|---|
| .. | .. |
|---|
| 36 | 29 | (1L << MD_HAS_PPL) | \ |
|---|
| 37 | 30 | (1L << MD_HAS_MULTIPLE_PPLS)) |
|---|
| 38 | 31 | |
|---|
| 39 | | -static int raid0_congested(struct mddev *mddev, int bits) |
|---|
| 40 | | -{ |
|---|
| 41 | | - struct r0conf *conf = mddev->private; |
|---|
| 42 | | - struct md_rdev **devlist = conf->devlist; |
|---|
| 43 | | - int raid_disks = conf->strip_zone[0].nb_dev; |
|---|
| 44 | | - int i, ret = 0; |
|---|
| 45 | | - |
|---|
| 46 | | - for (i = 0; i < raid_disks && !ret ; i++) { |
|---|
| 47 | | - struct request_queue *q = bdev_get_queue(devlist[i]->bdev); |
|---|
| 48 | | - |
|---|
| 49 | | - ret |= bdi_congested(q->backing_dev_info, bits); |
|---|
| 50 | | - } |
|---|
| 51 | | - return ret; |
|---|
| 52 | | -} |
|---|
| 53 | | - |
|---|
| 54 | 32 | /* |
|---|
| 55 | 33 | * inform the user of the raid configuration |
|---|
| 56 | 34 | */ |
|---|
| .. | .. |
|---|
| 70 | 48 | int len = 0; |
|---|
| 71 | 49 | |
|---|
| 72 | 50 | for (k = 0; k < conf->strip_zone[j].nb_dev; k++) |
|---|
| 73 | | - len += snprintf(line+len, 200-len, "%s%s", k?"/":"", |
|---|
| 51 | + len += scnprintf(line+len, 200-len, "%s%s", k?"/":"", |
|---|
| 74 | 52 | bdevname(conf->devlist[j*raid_disks |
|---|
| 75 | 53 | + k]->bdev, b)); |
|---|
| 76 | 54 | pr_debug("md: zone%d=[%s]\n", j, line); |
|---|
| .. | .. |
|---|
| 150 | 128 | pr_debug("md/raid0:%s: FINAL %d zones\n", |
|---|
| 151 | 129 | mdname(mddev), conf->nr_strip_zones); |
|---|
| 152 | 130 | |
|---|
| 153 | | - if (conf->nr_strip_zones == 1) { |
|---|
| 154 | | - conf->layout = RAID0_ORIG_LAYOUT; |
|---|
| 155 | | - } else if (mddev->layout == RAID0_ORIG_LAYOUT || |
|---|
| 156 | | - mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) { |
|---|
| 157 | | - conf->layout = mddev->layout; |
|---|
| 158 | | - } else if (default_layout == RAID0_ORIG_LAYOUT || |
|---|
| 159 | | - default_layout == RAID0_ALT_MULTIZONE_LAYOUT) { |
|---|
| 160 | | - conf->layout = default_layout; |
|---|
| 161 | | - } else { |
|---|
| 162 | | - pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n", |
|---|
| 163 | | - mdname(mddev)); |
|---|
| 164 | | - pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n"); |
|---|
| 165 | | - err = -ENOTSUPP; |
|---|
| 166 | | - goto abort; |
|---|
| 167 | | - } |
|---|
| 168 | 131 | /* |
|---|
| 169 | 132 | * now since we have the hard sector sizes, we can make sure |
|---|
| 170 | 133 | * chunk size is a multiple of that sector size |
|---|
| .. | .. |
|---|
| 293 | 256 | pr_debug("md/raid0:%s: current zone start: %llu\n", |
|---|
| 294 | 257 | mdname(mddev), |
|---|
| 295 | 258 | (unsigned long long)smallest->sectors); |
|---|
| 259 | + } |
|---|
| 260 | + |
|---|
| 261 | + if (conf->nr_strip_zones == 1 || conf->strip_zone[1].nb_dev == 1) { |
|---|
| 262 | + conf->layout = RAID0_ORIG_LAYOUT; |
|---|
| 263 | + } else if (mddev->layout == RAID0_ORIG_LAYOUT || |
|---|
| 264 | + mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) { |
|---|
| 265 | + conf->layout = mddev->layout; |
|---|
| 266 | + } else if (default_layout == RAID0_ORIG_LAYOUT || |
|---|
| 267 | + default_layout == RAID0_ALT_MULTIZONE_LAYOUT) { |
|---|
| 268 | + conf->layout = default_layout; |
|---|
| 269 | + } else { |
|---|
| 270 | + pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n", |
|---|
| 271 | + mdname(mddev)); |
|---|
| 272 | + pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n"); |
|---|
| 273 | + err = -EOPNOTSUPP; |
|---|
| 274 | + goto abort; |
|---|
| 275 | + } |
|---|
| 276 | + |
|---|
| 277 | + if (conf->layout == RAID0_ORIG_LAYOUT) { |
|---|
| 278 | + for (i = 1; i < conf->nr_strip_zones; i++) { |
|---|
| 279 | + sector_t first_sector = conf->strip_zone[i-1].zone_end; |
|---|
| 280 | + |
|---|
| 281 | + sector_div(first_sector, mddev->chunk_sectors); |
|---|
| 282 | + zone = conf->strip_zone + i; |
|---|
| 283 | + /* disk_shift is first disk index used in the zone */ |
|---|
| 284 | + zone->disk_shift = sector_div(first_sector, |
|---|
| 285 | + zone->nb_dev); |
|---|
| 286 | + } |
|---|
| 296 | 287 | } |
|---|
| 297 | 288 | |
|---|
| 298 | 289 | pr_debug("md/raid0:%s: done.\n", mdname(mddev)); |
|---|
| .. | .. |
|---|
| 432 | 423 | mdname(mddev), |
|---|
| 433 | 424 | (unsigned long long)mddev->array_sectors); |
|---|
| 434 | 425 | |
|---|
| 435 | | - if (mddev->queue) { |
|---|
| 436 | | - /* calculate the max read-ahead size. |
|---|
| 437 | | - * For read-ahead of large files to be effective, we need to |
|---|
| 438 | | - * readahead at least twice a whole stripe. i.e. number of devices |
|---|
| 439 | | - * multiplied by chunk size times 2. |
|---|
| 440 | | - * If an individual device has an ra_pages greater than the |
|---|
| 441 | | - * chunk size, then we will not drive that device as hard as it |
|---|
| 442 | | - * wants. We consider this a configuration error: a larger |
|---|
| 443 | | - * chunksize should be used in that case. |
|---|
| 444 | | - */ |
|---|
| 445 | | - int stripe = mddev->raid_disks * |
|---|
| 446 | | - (mddev->chunk_sectors << 9) / PAGE_SIZE; |
|---|
| 447 | | - if (mddev->queue->backing_dev_info->ra_pages < 2* stripe) |
|---|
| 448 | | - mddev->queue->backing_dev_info->ra_pages = 2* stripe; |
|---|
| 449 | | - } |
|---|
| 450 | | - |
|---|
| 451 | 426 | dump_zones(mddev); |
|---|
| 452 | 427 | |
|---|
| 453 | 428 | ret = md_integrity_register(mddev); |
|---|
| .. | .. |
|---|
| 465 | 440 | } |
|---|
| 466 | 441 | |
|---|
| 467 | 442 | /* |
|---|
| 468 | | - * Is io distribute over 1 or more chunks ? |
|---|
| 469 | | -*/ |
|---|
| 470 | | -static inline int is_io_in_chunk_boundary(struct mddev *mddev, |
|---|
| 471 | | - unsigned int chunk_sects, struct bio *bio) |
|---|
| 443 | + * Convert disk_index to the disk order in which it is read/written. |
|---|
| 444 | + * For example, if we have 4 disks, they are numbered 0,1,2,3. If we |
|---|
| 445 | + * write the disks starting at disk 3, then the read/write order would |
|---|
| 446 | + * be disk 3, then 0, then 1, and then disk 2 and we want map_disk_shift() |
|---|
| 447 | + * to map the disks as follows 0,1,2,3 => 1,2,3,0. So disk 0 would map |
|---|
| 448 | + * to 1, 1 to 2, 2 to 3, and 3 to 0. That way we can compare disks in |
|---|
| 449 | + * that 'output' space to understand the read/write disk ordering. |
|---|
| 450 | + */ |
|---|
| 451 | +static int map_disk_shift(int disk_index, int num_disks, int disk_shift) |
|---|
| 472 | 452 | { |
|---|
| 473 | | - if (likely(is_power_of_2(chunk_sects))) { |
|---|
| 474 | | - return chunk_sects >= |
|---|
| 475 | | - ((bio->bi_iter.bi_sector & (chunk_sects-1)) |
|---|
| 476 | | - + bio_sectors(bio)); |
|---|
| 477 | | - } else{ |
|---|
| 478 | | - sector_t sector = bio->bi_iter.bi_sector; |
|---|
| 479 | | - return chunk_sects >= (sector_div(sector, chunk_sects) |
|---|
| 480 | | - + bio_sectors(bio)); |
|---|
| 481 | | - } |
|---|
| 453 | + return ((disk_index + num_disks - disk_shift) % num_disks); |
|---|
| 482 | 454 | } |
|---|
| 483 | 455 | |
|---|
| 484 | 456 | static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) |
|---|
| .. | .. |
|---|
| 494 | 466 | sector_t end_disk_offset; |
|---|
| 495 | 467 | unsigned int end_disk_index; |
|---|
| 496 | 468 | unsigned int disk; |
|---|
| 469 | + sector_t orig_start, orig_end; |
|---|
| 497 | 470 | |
|---|
| 471 | + orig_start = start; |
|---|
| 498 | 472 | zone = find_zone(conf, &start); |
|---|
| 499 | 473 | |
|---|
| 500 | 474 | if (bio_end_sector(bio) > zone->zone_end) { |
|---|
| .. | .. |
|---|
| 502 | 476 | zone->zone_end - bio->bi_iter.bi_sector, GFP_NOIO, |
|---|
| 503 | 477 | &mddev->bio_set); |
|---|
| 504 | 478 | bio_chain(split, bio); |
|---|
| 505 | | - generic_make_request(bio); |
|---|
| 479 | + submit_bio_noacct(bio); |
|---|
| 506 | 480 | bio = split; |
|---|
| 507 | 481 | end = zone->zone_end; |
|---|
| 508 | 482 | } else |
|---|
| 509 | 483 | end = bio_end_sector(bio); |
|---|
| 510 | 484 | |
|---|
| 485 | + orig_end = end; |
|---|
| 511 | 486 | if (zone != conf->strip_zone) |
|---|
| 512 | 487 | end = end - zone[-1].zone_end; |
|---|
| 513 | 488 | |
|---|
| .. | .. |
|---|
| 519 | 494 | last_stripe_index = end; |
|---|
| 520 | 495 | sector_div(last_stripe_index, stripe_size); |
|---|
| 521 | 496 | |
|---|
| 522 | | - start_disk_index = (int)(start - first_stripe_index * stripe_size) / |
|---|
| 523 | | - mddev->chunk_sectors; |
|---|
| 497 | + /* In the first zone the original and alternate layouts are the same */ |
|---|
| 498 | + if ((conf->layout == RAID0_ORIG_LAYOUT) && (zone != conf->strip_zone)) { |
|---|
| 499 | + sector_div(orig_start, mddev->chunk_sectors); |
|---|
| 500 | + start_disk_index = sector_div(orig_start, zone->nb_dev); |
|---|
| 501 | + start_disk_index = map_disk_shift(start_disk_index, |
|---|
| 502 | + zone->nb_dev, |
|---|
| 503 | + zone->disk_shift); |
|---|
| 504 | + sector_div(orig_end, mddev->chunk_sectors); |
|---|
| 505 | + end_disk_index = sector_div(orig_end, zone->nb_dev); |
|---|
| 506 | + end_disk_index = map_disk_shift(end_disk_index, |
|---|
| 507 | + zone->nb_dev, zone->disk_shift); |
|---|
| 508 | + } else { |
|---|
| 509 | + start_disk_index = (int)(start - first_stripe_index * stripe_size) / |
|---|
| 510 | + mddev->chunk_sectors; |
|---|
| 511 | + end_disk_index = (int)(end - last_stripe_index * stripe_size) / |
|---|
| 512 | + mddev->chunk_sectors; |
|---|
| 513 | + } |
|---|
| 524 | 514 | start_disk_offset = ((int)(start - first_stripe_index * stripe_size) % |
|---|
| 525 | 515 | mddev->chunk_sectors) + |
|---|
| 526 | 516 | first_stripe_index * mddev->chunk_sectors; |
|---|
| 527 | | - end_disk_index = (int)(end - last_stripe_index * stripe_size) / |
|---|
| 528 | | - mddev->chunk_sectors; |
|---|
| 529 | 517 | end_disk_offset = ((int)(end - last_stripe_index * stripe_size) % |
|---|
| 530 | 518 | mddev->chunk_sectors) + |
|---|
| 531 | 519 | last_stripe_index * mddev->chunk_sectors; |
|---|
| .. | .. |
|---|
| 534 | 522 | sector_t dev_start, dev_end; |
|---|
| 535 | 523 | struct bio *discard_bio = NULL; |
|---|
| 536 | 524 | struct md_rdev *rdev; |
|---|
| 525 | + int compare_disk; |
|---|
| 537 | 526 | |
|---|
| 538 | | - if (disk < start_disk_index) |
|---|
| 527 | + compare_disk = map_disk_shift(disk, zone->nb_dev, |
|---|
| 528 | + zone->disk_shift); |
|---|
| 529 | + |
|---|
| 530 | + if (compare_disk < start_disk_index) |
|---|
| 539 | 531 | dev_start = (first_stripe_index + 1) * |
|---|
| 540 | 532 | mddev->chunk_sectors; |
|---|
| 541 | | - else if (disk > start_disk_index) |
|---|
| 533 | + else if (compare_disk > start_disk_index) |
|---|
| 542 | 534 | dev_start = first_stripe_index * mddev->chunk_sectors; |
|---|
| 543 | 535 | else |
|---|
| 544 | 536 | dev_start = start_disk_offset; |
|---|
| 545 | 537 | |
|---|
| 546 | | - if (disk < end_disk_index) |
|---|
| 538 | + if (compare_disk < end_disk_index) |
|---|
| 547 | 539 | dev_end = (last_stripe_index + 1) * mddev->chunk_sectors; |
|---|
| 548 | | - else if (disk > end_disk_index) |
|---|
| 540 | + else if (compare_disk > end_disk_index) |
|---|
| 549 | 541 | dev_end = last_stripe_index * mddev->chunk_sectors; |
|---|
| 550 | 542 | else |
|---|
| 551 | 543 | dev_end = end_disk_offset; |
|---|
| .. | .. |
|---|
| 561 | 553 | !discard_bio) |
|---|
| 562 | 554 | continue; |
|---|
| 563 | 555 | bio_chain(discard_bio, bio); |
|---|
| 564 | | - bio_clone_blkcg_association(discard_bio, bio); |
|---|
| 556 | + bio_clone_blkg_association(discard_bio, bio); |
|---|
| 565 | 557 | if (mddev->gendisk) |
|---|
| 566 | 558 | trace_block_bio_remap(bdev_get_queue(rdev->bdev), |
|---|
| 567 | 559 | discard_bio, disk_devt(mddev->gendisk), |
|---|
| 568 | 560 | bio->bi_iter.bi_sector); |
|---|
| 569 | | - bio_clear_flag(bio, BIO_QUEUE_ENTERED); |
|---|
| 570 | | - generic_make_request(discard_bio); |
|---|
| 561 | + submit_bio_noacct(discard_bio); |
|---|
| 571 | 562 | } |
|---|
| 572 | 563 | bio_endio(bio); |
|---|
| 573 | 564 | } |
|---|
| .. | .. |
|---|
| 608 | 599 | struct bio *split = bio_split(bio, sectors, GFP_NOIO, |
|---|
| 609 | 600 | &mddev->bio_set); |
|---|
| 610 | 601 | bio_chain(split, bio); |
|---|
| 611 | | - generic_make_request(bio); |
|---|
| 602 | + submit_bio_noacct(bio); |
|---|
| 612 | 603 | bio = split; |
|---|
| 613 | 604 | } |
|---|
| 614 | 605 | |
|---|
| .. | .. |
|---|
| 627 | 618 | return true; |
|---|
| 628 | 619 | } |
|---|
| 629 | 620 | |
|---|
| 621 | + if (unlikely(is_mddev_broken(tmp_dev, "raid0"))) { |
|---|
| 622 | + bio_io_error(bio); |
|---|
| 623 | + return true; |
|---|
| 624 | + } |
|---|
| 625 | + |
|---|
| 630 | 626 | bio_set_dev(bio, tmp_dev->bdev); |
|---|
| 631 | 627 | bio->bi_iter.bi_sector = sector + zone->dev_start + |
|---|
| 632 | 628 | tmp_dev->data_offset; |
|---|
| .. | .. |
|---|
| 636 | 632 | disk_devt(mddev->gendisk), bio_sector); |
|---|
| 637 | 633 | mddev_check_writesame(mddev, bio); |
|---|
| 638 | 634 | mddev_check_write_zeroes(mddev, bio); |
|---|
| 639 | | - bio_clear_flag(bio, BIO_QUEUE_ENTERED); |
|---|
| 640 | | - generic_make_request(bio); |
|---|
| 635 | + submit_bio_noacct(bio); |
|---|
| 641 | 636 | return true; |
|---|
| 642 | 637 | } |
|---|
| 643 | 638 | |
|---|
| .. | .. |
|---|
| 822 | 817 | .size = raid0_size, |
|---|
| 823 | 818 | .takeover = raid0_takeover, |
|---|
| 824 | 819 | .quiesce = raid0_quiesce, |
|---|
| 825 | | - .congested = raid0_congested, |
|---|
| 826 | 820 | }; |
|---|
| 827 | 821 | |
|---|
| 828 | 822 | static int __init raid0_init (void) |
|---|