| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * raid1.c : Multiple Devices driver for Linux |
|---|
| 3 | 4 | * |
|---|
| .. | .. |
|---|
| 20 | 21 | * |
|---|
| 21 | 22 | * Additions to bitmap code, (C) 2003-2004 Paul Clements, SteelEye Technology: |
|---|
| 22 | 23 | * - persistent bitmap code |
|---|
| 23 | | - * |
|---|
| 24 | | - * This program is free software; you can redistribute it and/or modify |
|---|
| 25 | | - * it under the terms of the GNU General Public License as published by |
|---|
| 26 | | - * the Free Software Foundation; either version 2, or (at your option) |
|---|
| 27 | | - * any later version. |
|---|
| 28 | | - * |
|---|
| 29 | | - * You should have received a copy of the GNU General Public License |
|---|
| 30 | | - * (for example /usr/src/linux/COPYING); if not, write to the Free |
|---|
| 31 | | - * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
|---|
| 32 | 24 | */ |
|---|
| 33 | 25 | |
|---|
| 34 | 26 | #include <linux/slab.h> |
|---|
| .. | .. |
|---|
| 37 | 29 | #include <linux/module.h> |
|---|
| 38 | 30 | #include <linux/seq_file.h> |
|---|
| 39 | 31 | #include <linux/ratelimit.h> |
|---|
| 32 | +#include <linux/interval_tree_generic.h> |
|---|
| 40 | 33 | |
|---|
| 41 | 34 | #include <trace/events/block.h> |
|---|
| 42 | 35 | |
|---|
| .. | .. |
|---|
| 50 | 43 | (1L << MD_HAS_PPL) | \ |
|---|
| 51 | 44 | (1L << MD_HAS_MULTIPLE_PPLS)) |
|---|
| 52 | 45 | |
|---|
| 53 | | -/* |
|---|
| 54 | | - * Number of guaranteed r1bios in case of extreme VM load: |
|---|
| 55 | | - */ |
|---|
| 56 | | -#define NR_RAID1_BIOS 256 |
|---|
| 57 | | - |
|---|
| 58 | | -/* when we get a read error on a read-only array, we redirect to another |
|---|
| 59 | | - * device without failing the first device, or trying to over-write to |
|---|
| 60 | | - * correct the read error. To keep track of bad blocks on a per-bio |
|---|
| 61 | | - * level, we store IO_BLOCKED in the appropriate 'bios' pointer |
|---|
| 62 | | - */ |
|---|
| 63 | | -#define IO_BLOCKED ((struct bio *)1) |
|---|
| 64 | | -/* When we successfully write to a known bad-block, we need to remove the |
|---|
| 65 | | - * bad-block marking which must be done from process context. So we record |
|---|
| 66 | | - * the success by setting devs[n].bio to IO_MADE_GOOD |
|---|
| 67 | | - */ |
|---|
| 68 | | -#define IO_MADE_GOOD ((struct bio *)2) |
|---|
| 69 | | - |
|---|
| 70 | | -#define BIO_SPECIAL(bio) ((unsigned long)bio <= 2) |
|---|
| 71 | | - |
|---|
| 72 | | -/* When there are this many requests queue to be written by |
|---|
| 73 | | - * the raid1 thread, we become 'congested' to provide back-pressure |
|---|
| 74 | | - * for writeback. |
|---|
| 75 | | - */ |
|---|
| 76 | | -static int max_queued_requests = 1024; |
|---|
| 77 | | - |
|---|
| 78 | 46 | static void allow_barrier(struct r1conf *conf, sector_t sector_nr); |
|---|
| 79 | 47 | static void lower_barrier(struct r1conf *conf, sector_t sector_nr); |
|---|
| 80 | 48 | |
|---|
| .. | .. |
|---|
| 82 | 50 | do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid1 " fmt, ##args); } while (0) |
|---|
| 83 | 51 | |
|---|
| 84 | 52 | #include "raid1-10.c" |
|---|
| 53 | + |
|---|
| 54 | +#define START(node) ((node)->start) |
|---|
| 55 | +#define LAST(node) ((node)->last) |
|---|
| 56 | +INTERVAL_TREE_DEFINE(struct serial_info, node, sector_t, _subtree_last, |
|---|
| 57 | + START, LAST, static inline, raid1_rb); |
|---|
| 58 | + |
|---|
| 59 | +static int check_and_add_serial(struct md_rdev *rdev, struct r1bio *r1_bio, |
|---|
| 60 | + struct serial_info *si, int idx) |
|---|
| 61 | +{ |
|---|
| 62 | + unsigned long flags; |
|---|
| 63 | + int ret = 0; |
|---|
| 64 | + sector_t lo = r1_bio->sector; |
|---|
| 65 | + sector_t hi = lo + r1_bio->sectors; |
|---|
| 66 | + struct serial_in_rdev *serial = &rdev->serial[idx]; |
|---|
| 67 | + |
|---|
| 68 | + spin_lock_irqsave(&serial->serial_lock, flags); |
|---|
| 69 | + /* collision happened */ |
|---|
| 70 | + if (raid1_rb_iter_first(&serial->serial_rb, lo, hi)) |
|---|
| 71 | + ret = -EBUSY; |
|---|
| 72 | + else { |
|---|
| 73 | + si->start = lo; |
|---|
| 74 | + si->last = hi; |
|---|
| 75 | + raid1_rb_insert(si, &serial->serial_rb); |
|---|
| 76 | + } |
|---|
| 77 | + spin_unlock_irqrestore(&serial->serial_lock, flags); |
|---|
| 78 | + |
|---|
| 79 | + return ret; |
|---|
| 80 | +} |
|---|
| 81 | + |
|---|
| 82 | +static void wait_for_serialization(struct md_rdev *rdev, struct r1bio *r1_bio) |
|---|
| 83 | +{ |
|---|
| 84 | + struct mddev *mddev = rdev->mddev; |
|---|
| 85 | + struct serial_info *si; |
|---|
| 86 | + int idx = sector_to_idx(r1_bio->sector); |
|---|
| 87 | + struct serial_in_rdev *serial = &rdev->serial[idx]; |
|---|
| 88 | + |
|---|
| 89 | + if (WARN_ON(!mddev->serial_info_pool)) |
|---|
| 90 | + return; |
|---|
| 91 | + si = mempool_alloc(mddev->serial_info_pool, GFP_NOIO); |
|---|
| 92 | + wait_event(serial->serial_io_wait, |
|---|
| 93 | + check_and_add_serial(rdev, r1_bio, si, idx) == 0); |
|---|
| 94 | +} |
|---|
| 95 | + |
|---|
| 96 | +static void remove_serial(struct md_rdev *rdev, sector_t lo, sector_t hi) |
|---|
| 97 | +{ |
|---|
| 98 | + struct serial_info *si; |
|---|
| 99 | + unsigned long flags; |
|---|
| 100 | + int found = 0; |
|---|
| 101 | + struct mddev *mddev = rdev->mddev; |
|---|
| 102 | + int idx = sector_to_idx(lo); |
|---|
| 103 | + struct serial_in_rdev *serial = &rdev->serial[idx]; |
|---|
| 104 | + |
|---|
| 105 | + spin_lock_irqsave(&serial->serial_lock, flags); |
|---|
| 106 | + for (si = raid1_rb_iter_first(&serial->serial_rb, lo, hi); |
|---|
| 107 | + si; si = raid1_rb_iter_next(si, lo, hi)) { |
|---|
| 108 | + if (si->start == lo && si->last == hi) { |
|---|
| 109 | + raid1_rb_remove(si, &serial->serial_rb); |
|---|
| 110 | + mempool_free(si, mddev->serial_info_pool); |
|---|
| 111 | + found = 1; |
|---|
| 112 | + break; |
|---|
| 113 | + } |
|---|
| 114 | + } |
|---|
| 115 | + if (!found) |
|---|
| 116 | + WARN(1, "The write IO is not recorded for serialization\n"); |
|---|
| 117 | + spin_unlock_irqrestore(&serial->serial_lock, flags); |
|---|
| 118 | + wake_up(&serial->serial_io_wait); |
|---|
| 119 | +} |
|---|
| 85 | 120 | |
|---|
| 86 | 121 | /* |
|---|
| 87 | 122 | * for resync bio, r1bio pointer can be retrieved from the per-bio |
|---|
| .. | .. |
|---|
| 99 | 134 | |
|---|
| 100 | 135 | /* allocate a r1bio with room for raid_disks entries in the bios array */ |
|---|
| 101 | 136 | return kzalloc(size, gfp_flags); |
|---|
| 102 | | -} |
|---|
| 103 | | - |
|---|
| 104 | | -static void r1bio_pool_free(void *r1_bio, void *data) |
|---|
| 105 | | -{ |
|---|
| 106 | | - kfree(r1_bio); |
|---|
| 107 | 137 | } |
|---|
| 108 | 138 | |
|---|
| 109 | 139 | #define RESYNC_DEPTH 32 |
|---|
| .. | .. |
|---|
| 181 | 211 | kfree(rps); |
|---|
| 182 | 212 | |
|---|
| 183 | 213 | out_free_r1bio: |
|---|
| 184 | | - r1bio_pool_free(r1_bio, data); |
|---|
| 214 | + rbio_pool_free(r1_bio, data); |
|---|
| 185 | 215 | return NULL; |
|---|
| 186 | 216 | } |
|---|
| 187 | 217 | |
|---|
| .. | .. |
|---|
| 201 | 231 | /* resync pages array stored in the 1st bio's .bi_private */ |
|---|
| 202 | 232 | kfree(rp); |
|---|
| 203 | 233 | |
|---|
| 204 | | - r1bio_pool_free(r1bio, data); |
|---|
| 234 | + rbio_pool_free(r1bio, data); |
|---|
| 205 | 235 | } |
|---|
| 206 | 236 | |
|---|
| 207 | 237 | static void put_all_bios(struct r1conf *conf, struct r1bio *r1_bio) |
|---|
| .. | .. |
|---|
| 266 | 296 | static void call_bio_endio(struct r1bio *r1_bio) |
|---|
| 267 | 297 | { |
|---|
| 268 | 298 | struct bio *bio = r1_bio->master_bio; |
|---|
| 269 | | - struct r1conf *conf = r1_bio->mddev->private; |
|---|
| 270 | 299 | |
|---|
| 271 | 300 | if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) |
|---|
| 272 | 301 | bio->bi_status = BLK_STS_IOERR; |
|---|
| 273 | 302 | |
|---|
| 274 | 303 | bio_endio(bio); |
|---|
| 275 | | - /* |
|---|
| 276 | | - * Wake up any possible resync thread that waits for the device |
|---|
| 277 | | - * to go idle. |
|---|
| 278 | | - */ |
|---|
| 279 | | - allow_barrier(conf, r1_bio->sector); |
|---|
| 280 | 304 | } |
|---|
| 281 | 305 | |
|---|
| 282 | 306 | static void raid_end_bio_io(struct r1bio *r1_bio) |
|---|
| 283 | 307 | { |
|---|
| 284 | 308 | struct bio *bio = r1_bio->master_bio; |
|---|
| 309 | + struct r1conf *conf = r1_bio->mddev->private; |
|---|
| 285 | 310 | |
|---|
| 286 | 311 | /* if nobody has done the final endio yet, do it now */ |
|---|
| 287 | 312 | if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) { |
|---|
| .. | .. |
|---|
| 292 | 317 | |
|---|
| 293 | 318 | call_bio_endio(r1_bio); |
|---|
| 294 | 319 | } |
|---|
| 320 | + /* |
|---|
| 321 | + * Wake up any possible resync thread that waits for the device |
|---|
| 322 | + * to go idle. All I/Os, even write-behind writes, are done. |
|---|
| 323 | + */ |
|---|
| 324 | + allow_barrier(conf, r1_bio->sector); |
|---|
| 325 | + |
|---|
| 295 | 326 | free_r1bio(r1_bio); |
|---|
| 296 | 327 | } |
|---|
| 297 | 328 | |
|---|
| .. | .. |
|---|
| 417 | 448 | int mirror = find_bio_disk(r1_bio, bio); |
|---|
| 418 | 449 | struct md_rdev *rdev = conf->mirrors[mirror].rdev; |
|---|
| 419 | 450 | bool discard_error; |
|---|
| 451 | + sector_t lo = r1_bio->sector; |
|---|
| 452 | + sector_t hi = r1_bio->sector + r1_bio->sectors; |
|---|
| 420 | 453 | |
|---|
| 421 | 454 | discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD; |
|---|
| 422 | 455 | |
|---|
| .. | .. |
|---|
| 439 | 472 | /* |
|---|
| 440 | 473 | * When the device is faulty, it is not necessary to |
|---|
| 441 | 474 | * handle write error. |
|---|
| 442 | | - * For failfast, this is the only remaining device, |
|---|
| 443 | | - * We need to retry the write without FailFast. |
|---|
| 444 | 475 | */ |
|---|
| 445 | 476 | if (!test_bit(Faulty, &rdev->flags)) |
|---|
| 446 | 477 | set_bit(R1BIO_WriteError, &r1_bio->state); |
|---|
| .. | .. |
|---|
| 488 | 519 | } |
|---|
| 489 | 520 | |
|---|
| 490 | 521 | if (behind) { |
|---|
| 522 | + if (test_bit(CollisionCheck, &rdev->flags)) |
|---|
| 523 | + remove_serial(rdev, lo, hi); |
|---|
| 491 | 524 | if (test_bit(WriteMostly, &rdev->flags)) |
|---|
| 492 | 525 | atomic_dec(&r1_bio->behind_remaining); |
|---|
| 493 | 526 | |
|---|
| .. | .. |
|---|
| 510 | 543 | call_bio_endio(r1_bio); |
|---|
| 511 | 544 | } |
|---|
| 512 | 545 | } |
|---|
| 513 | | - } |
|---|
| 546 | + } else if (rdev->mddev->serialize_policy) |
|---|
| 547 | + remove_serial(rdev, lo, hi); |
|---|
| 514 | 548 | if (r1_bio->bios[mirror] == NULL) |
|---|
| 515 | 549 | rdev_dec_pending(rdev, conf->mddev); |
|---|
| 516 | 550 | |
|---|
| .. | .. |
|---|
| 752 | 786 | return best_disk; |
|---|
| 753 | 787 | } |
|---|
| 754 | 788 | |
|---|
| 755 | | -static int raid1_congested(struct mddev *mddev, int bits) |
|---|
| 756 | | -{ |
|---|
| 757 | | - struct r1conf *conf = mddev->private; |
|---|
| 758 | | - int i, ret = 0; |
|---|
| 759 | | - |
|---|
| 760 | | - if ((bits & (1 << WB_async_congested)) && |
|---|
| 761 | | - conf->pending_count >= max_queued_requests) |
|---|
| 762 | | - return 1; |
|---|
| 763 | | - |
|---|
| 764 | | - rcu_read_lock(); |
|---|
| 765 | | - for (i = 0; i < conf->raid_disks * 2; i++) { |
|---|
| 766 | | - struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); |
|---|
| 767 | | - if (rdev && !test_bit(Faulty, &rdev->flags)) { |
|---|
| 768 | | - struct request_queue *q = bdev_get_queue(rdev->bdev); |
|---|
| 769 | | - |
|---|
| 770 | | - BUG_ON(!q); |
|---|
| 771 | | - |
|---|
| 772 | | - /* Note the '|| 1' - when read_balance prefers |
|---|
| 773 | | - * non-congested targets, it can be removed |
|---|
| 774 | | - */ |
|---|
| 775 | | - if ((bits & (1 << WB_async_congested)) || 1) |
|---|
| 776 | | - ret |= bdi_congested(q->backing_dev_info, bits); |
|---|
| 777 | | - else |
|---|
| 778 | | - ret &= bdi_congested(q->backing_dev_info, bits); |
|---|
| 779 | | - } |
|---|
| 780 | | - } |
|---|
| 781 | | - rcu_read_unlock(); |
|---|
| 782 | | - return ret; |
|---|
| 783 | | -} |
|---|
| 784 | | - |
|---|
| 785 | 789 | static void flush_bio_list(struct r1conf *conf, struct bio *bio) |
|---|
| 786 | 790 | { |
|---|
| 787 | 791 | /* flush any pending bitmap writes to disk before proceeding w/ I/O */ |
|---|
| .. | .. |
|---|
| 800 | 804 | /* Just ignore it */ |
|---|
| 801 | 805 | bio_endio(bio); |
|---|
| 802 | 806 | else |
|---|
| 803 | | - generic_make_request(bio); |
|---|
| 807 | + submit_bio_noacct(bio); |
|---|
| 804 | 808 | bio = next; |
|---|
| 809 | + cond_resched(); |
|---|
| 805 | 810 | } |
|---|
| 806 | 811 | } |
|---|
| 807 | 812 | |
|---|
| .. | .. |
|---|
| 857 | 862 | * backgroup IO calls must call raise_barrier. Once that returns |
|---|
| 858 | 863 | * there is no normal IO happeing. It must arrange to call |
|---|
| 859 | 864 | * lower_barrier when the particular background IO completes. |
|---|
| 865 | + * |
|---|
| 866 | + * If resync/recovery is interrupted, returns -EINTR; |
|---|
| 867 | + * Otherwise, returns 0. |
|---|
| 860 | 868 | */ |
|---|
| 861 | | -static sector_t raise_barrier(struct r1conf *conf, sector_t sector_nr) |
|---|
| 869 | +static int raise_barrier(struct r1conf *conf, sector_t sector_nr) |
|---|
| 862 | 870 | { |
|---|
| 863 | 871 | int idx = sector_to_idx(sector_nr); |
|---|
| 864 | 872 | |
|---|
| .. | .. |
|---|
| 1274 | 1282 | struct bio *split = bio_split(bio, max_sectors, |
|---|
| 1275 | 1283 | gfp, &conf->bio_split); |
|---|
| 1276 | 1284 | bio_chain(split, bio); |
|---|
| 1277 | | - generic_make_request(bio); |
|---|
| 1285 | + submit_bio_noacct(bio); |
|---|
| 1278 | 1286 | bio = split; |
|---|
| 1279 | 1287 | r1_bio->master_bio = bio; |
|---|
| 1280 | 1288 | r1_bio->sectors = max_sectors; |
|---|
| .. | .. |
|---|
| 1300 | 1308 | trace_block_bio_remap(read_bio->bi_disk->queue, read_bio, |
|---|
| 1301 | 1309 | disk_devt(mddev->gendisk), r1_bio->sector); |
|---|
| 1302 | 1310 | |
|---|
| 1303 | | - generic_make_request(read_bio); |
|---|
| 1311 | + submit_bio_noacct(read_bio); |
|---|
| 1304 | 1312 | } |
|---|
| 1305 | 1313 | |
|---|
| 1306 | 1314 | static void raid1_write_request(struct mddev *mddev, struct bio *bio, |
|---|
| .. | .. |
|---|
| 1445 | 1453 | struct bio *split = bio_split(bio, max_sectors, |
|---|
| 1446 | 1454 | GFP_NOIO, &conf->bio_split); |
|---|
| 1447 | 1455 | bio_chain(split, bio); |
|---|
| 1448 | | - generic_make_request(bio); |
|---|
| 1456 | + submit_bio_noacct(bio); |
|---|
| 1449 | 1457 | bio = split; |
|---|
| 1450 | 1458 | r1_bio->master_bio = bio; |
|---|
| 1451 | 1459 | r1_bio->sectors = max_sectors; |
|---|
| .. | .. |
|---|
| 1458 | 1466 | |
|---|
| 1459 | 1467 | for (i = 0; i < disks; i++) { |
|---|
| 1460 | 1468 | struct bio *mbio = NULL; |
|---|
| 1469 | + struct md_rdev *rdev = conf->mirrors[i].rdev; |
|---|
| 1461 | 1470 | if (!r1_bio->bios[i]) |
|---|
| 1462 | 1471 | continue; |
|---|
| 1463 | | - |
|---|
| 1464 | 1472 | |
|---|
| 1465 | 1473 | if (first_clone) { |
|---|
| 1466 | 1474 | /* do behind I/O ? |
|---|
| .. | .. |
|---|
| 1486 | 1494 | mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set); |
|---|
| 1487 | 1495 | |
|---|
| 1488 | 1496 | if (r1_bio->behind_master_bio) { |
|---|
| 1489 | | - if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags)) |
|---|
| 1497 | + if (test_bit(CollisionCheck, &rdev->flags)) |
|---|
| 1498 | + wait_for_serialization(rdev, r1_bio); |
|---|
| 1499 | + if (test_bit(WriteMostly, &rdev->flags)) |
|---|
| 1490 | 1500 | atomic_inc(&r1_bio->behind_remaining); |
|---|
| 1491 | | - } |
|---|
| 1501 | + } else if (mddev->serialize_policy) |
|---|
| 1502 | + wait_for_serialization(rdev, r1_bio); |
|---|
| 1492 | 1503 | |
|---|
| 1493 | 1504 | r1_bio->bios[i] = mbio; |
|---|
| 1494 | 1505 | |
|---|
| .. | .. |
|---|
| 1588 | 1599 | |
|---|
| 1589 | 1600 | /* |
|---|
| 1590 | 1601 | * If it is not operational, then we have already marked it as dead |
|---|
| 1591 | | - * else if it is the last working disks, ignore the error, let the |
|---|
| 1592 | | - * next level up know. |
|---|
| 1602 | + * else if it is the last working disks with "fail_last_dev == false", |
|---|
| 1603 | + * ignore the error, let the next level up know. |
|---|
| 1593 | 1604 | * else mark the drive as failed |
|---|
| 1594 | 1605 | */ |
|---|
| 1595 | 1606 | spin_lock_irqsave(&conf->device_lock, flags); |
|---|
| 1596 | | - if (test_bit(In_sync, &rdev->flags) |
|---|
| 1607 | + if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev |
|---|
| 1597 | 1608 | && (conf->raid_disks - mddev->degraded) == 1) { |
|---|
| 1598 | 1609 | /* |
|---|
| 1599 | 1610 | * Don't fail the drive, act as though we were just a |
|---|
| .. | .. |
|---|
| 1606 | 1617 | return; |
|---|
| 1607 | 1618 | } |
|---|
| 1608 | 1619 | set_bit(Blocked, &rdev->flags); |
|---|
| 1609 | | - if (test_and_clear_bit(In_sync, &rdev->flags)) { |
|---|
| 1620 | + if (test_and_clear_bit(In_sync, &rdev->flags)) |
|---|
| 1610 | 1621 | mddev->degraded++; |
|---|
| 1611 | | - set_bit(Faulty, &rdev->flags); |
|---|
| 1612 | | - } else |
|---|
| 1613 | | - set_bit(Faulty, &rdev->flags); |
|---|
| 1622 | + set_bit(Faulty, &rdev->flags); |
|---|
| 1614 | 1623 | spin_unlock_irqrestore(&conf->device_lock, flags); |
|---|
| 1615 | 1624 | /* |
|---|
| 1616 | 1625 | * if recovery is running, make sure it aborts. |
|---|
| .. | .. |
|---|
| 1742 | 1751 | first = last = rdev->saved_raid_disk; |
|---|
| 1743 | 1752 | |
|---|
| 1744 | 1753 | for (mirror = first; mirror <= last; mirror++) { |
|---|
| 1745 | | - p = conf->mirrors+mirror; |
|---|
| 1754 | + p = conf->mirrors + mirror; |
|---|
| 1746 | 1755 | if (!p->rdev) { |
|---|
| 1747 | | - |
|---|
| 1748 | 1756 | if (mddev->gendisk) |
|---|
| 1749 | 1757 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
|---|
| 1750 | 1758 | rdev->data_offset << 9); |
|---|
| .. | .. |
|---|
| 1880 | 1888 | } while (sectors_to_go > 0); |
|---|
| 1881 | 1889 | } |
|---|
| 1882 | 1890 | |
|---|
| 1891 | +static void put_sync_write_buf(struct r1bio *r1_bio, int uptodate) |
|---|
| 1892 | +{ |
|---|
| 1893 | + if (atomic_dec_and_test(&r1_bio->remaining)) { |
|---|
| 1894 | + struct mddev *mddev = r1_bio->mddev; |
|---|
| 1895 | + int s = r1_bio->sectors; |
|---|
| 1896 | + |
|---|
| 1897 | + if (test_bit(R1BIO_MadeGood, &r1_bio->state) || |
|---|
| 1898 | + test_bit(R1BIO_WriteError, &r1_bio->state)) |
|---|
| 1899 | + reschedule_retry(r1_bio); |
|---|
| 1900 | + else { |
|---|
| 1901 | + put_buf(r1_bio); |
|---|
| 1902 | + md_done_sync(mddev, s, uptodate); |
|---|
| 1903 | + } |
|---|
| 1904 | + } |
|---|
| 1905 | +} |
|---|
| 1906 | + |
|---|
| 1883 | 1907 | static void end_sync_write(struct bio *bio) |
|---|
| 1884 | 1908 | { |
|---|
| 1885 | 1909 | int uptodate = !bio->bi_status; |
|---|
| .. | .. |
|---|
| 1906 | 1930 | ) |
|---|
| 1907 | 1931 | set_bit(R1BIO_MadeGood, &r1_bio->state); |
|---|
| 1908 | 1932 | |
|---|
| 1909 | | - if (atomic_dec_and_test(&r1_bio->remaining)) { |
|---|
| 1910 | | - int s = r1_bio->sectors; |
|---|
| 1911 | | - if (test_bit(R1BIO_MadeGood, &r1_bio->state) || |
|---|
| 1912 | | - test_bit(R1BIO_WriteError, &r1_bio->state)) |
|---|
| 1913 | | - reschedule_retry(r1_bio); |
|---|
| 1914 | | - else { |
|---|
| 1915 | | - put_buf(r1_bio); |
|---|
| 1916 | | - md_done_sync(mddev, s, uptodate); |
|---|
| 1917 | | - } |
|---|
| 1918 | | - } |
|---|
| 1933 | + put_sync_write_buf(r1_bio, uptodate); |
|---|
| 1919 | 1934 | } |
|---|
| 1920 | 1935 | |
|---|
| 1921 | 1936 | static int r1_sync_page_io(struct md_rdev *rdev, sector_t sector, |
|---|
| .. | .. |
|---|
| 2115 | 2130 | } |
|---|
| 2116 | 2131 | r1_bio->read_disk = primary; |
|---|
| 2117 | 2132 | for (i = 0; i < conf->raid_disks * 2; i++) { |
|---|
| 2118 | | - int j; |
|---|
| 2133 | + int j = 0; |
|---|
| 2119 | 2134 | struct bio *pbio = r1_bio->bios[primary]; |
|---|
| 2120 | 2135 | struct bio *sbio = r1_bio->bios[i]; |
|---|
| 2121 | 2136 | blk_status_t status = sbio->bi_status; |
|---|
| .. | .. |
|---|
| 2123 | 2138 | struct page **spages = get_resync_pages(sbio)->pages; |
|---|
| 2124 | 2139 | struct bio_vec *bi; |
|---|
| 2125 | 2140 | int page_len[RESYNC_PAGES] = { 0 }; |
|---|
| 2141 | + struct bvec_iter_all iter_all; |
|---|
| 2126 | 2142 | |
|---|
| 2127 | 2143 | if (sbio->bi_end_io != end_sync_read) |
|---|
| 2128 | 2144 | continue; |
|---|
| 2129 | 2145 | /* Now we can 'fixup' the error value */ |
|---|
| 2130 | 2146 | sbio->bi_status = 0; |
|---|
| 2131 | 2147 | |
|---|
| 2132 | | - bio_for_each_segment_all(bi, sbio, j) |
|---|
| 2133 | | - page_len[j] = bi->bv_len; |
|---|
| 2148 | + bio_for_each_segment_all(bi, sbio, iter_all) |
|---|
| 2149 | + page_len[j++] = bi->bv_len; |
|---|
| 2134 | 2150 | |
|---|
| 2135 | 2151 | if (!status) { |
|---|
| 2136 | 2152 | for (j = vcnt; j-- ; ) { |
|---|
| .. | .. |
|---|
| 2194 | 2210 | atomic_inc(&r1_bio->remaining); |
|---|
| 2195 | 2211 | md_sync_acct(conf->mirrors[i].rdev->bdev, bio_sectors(wbio)); |
|---|
| 2196 | 2212 | |
|---|
| 2197 | | - generic_make_request(wbio); |
|---|
| 2213 | + submit_bio_noacct(wbio); |
|---|
| 2198 | 2214 | } |
|---|
| 2199 | 2215 | |
|---|
| 2200 | | - if (atomic_dec_and_test(&r1_bio->remaining)) { |
|---|
| 2201 | | - /* if we're here, all write(s) have completed, so clean up */ |
|---|
| 2202 | | - int s = r1_bio->sectors; |
|---|
| 2203 | | - if (test_bit(R1BIO_MadeGood, &r1_bio->state) || |
|---|
| 2204 | | - test_bit(R1BIO_WriteError, &r1_bio->state)) |
|---|
| 2205 | | - reschedule_retry(r1_bio); |
|---|
| 2206 | | - else { |
|---|
| 2207 | | - put_buf(r1_bio); |
|---|
| 2208 | | - md_done_sync(mddev, s, 1); |
|---|
| 2209 | | - } |
|---|
| 2210 | | - } |
|---|
| 2216 | + put_sync_write_buf(r1_bio, 1); |
|---|
| 2211 | 2217 | } |
|---|
| 2212 | 2218 | |
|---|
| 2213 | 2219 | /* |
|---|
| .. | .. |
|---|
| 2890 | 2896 | md_sync_acct_bio(bio, nr_sectors); |
|---|
| 2891 | 2897 | if (read_targets == 1) |
|---|
| 2892 | 2898 | bio->bi_opf &= ~MD_FAILFAST; |
|---|
| 2893 | | - generic_make_request(bio); |
|---|
| 2899 | + submit_bio_noacct(bio); |
|---|
| 2894 | 2900 | } |
|---|
| 2895 | 2901 | } |
|---|
| 2896 | 2902 | } else { |
|---|
| .. | .. |
|---|
| 2899 | 2905 | md_sync_acct_bio(bio, nr_sectors); |
|---|
| 2900 | 2906 | if (read_targets == 1) |
|---|
| 2901 | 2907 | bio->bi_opf &= ~MD_FAILFAST; |
|---|
| 2902 | | - generic_make_request(bio); |
|---|
| 2903 | | - |
|---|
| 2908 | + submit_bio_noacct(bio); |
|---|
| 2904 | 2909 | } |
|---|
| 2905 | 2910 | return nr_sectors; |
|---|
| 2906 | 2911 | } |
|---|
| .. | .. |
|---|
| 2959 | 2964 | if (!conf->poolinfo) |
|---|
| 2960 | 2965 | goto abort; |
|---|
| 2961 | 2966 | conf->poolinfo->raid_disks = mddev->raid_disks * 2; |
|---|
| 2962 | | - err = mempool_init(&conf->r1bio_pool, NR_RAID1_BIOS, r1bio_pool_alloc, |
|---|
| 2963 | | - r1bio_pool_free, conf->poolinfo); |
|---|
| 2967 | + err = mempool_init(&conf->r1bio_pool, NR_RAID_BIOS, r1bio_pool_alloc, |
|---|
| 2968 | + rbio_pool_free, conf->poolinfo); |
|---|
| 2964 | 2969 | if (err) |
|---|
| 2965 | 2970 | goto abort; |
|---|
| 2966 | 2971 | |
|---|
| .. | .. |
|---|
| 3101 | 3106 | } |
|---|
| 3102 | 3107 | |
|---|
| 3103 | 3108 | mddev->degraded = 0; |
|---|
| 3104 | | - for (i=0; i < conf->raid_disks; i++) |
|---|
| 3109 | + for (i = 0; i < conf->raid_disks; i++) |
|---|
| 3105 | 3110 | if (conf->mirrors[i].rdev == NULL || |
|---|
| 3106 | 3111 | !test_bit(In_sync, &conf->mirrors[i].rdev->flags) || |
|---|
| 3107 | 3112 | test_bit(Faulty, &conf->mirrors[i].rdev->flags)) |
|---|
| .. | .. |
|---|
| 3143 | 3148 | mddev->queue); |
|---|
| 3144 | 3149 | } |
|---|
| 3145 | 3150 | |
|---|
| 3146 | | - ret = md_integrity_register(mddev); |
|---|
| 3151 | + ret = md_integrity_register(mddev); |
|---|
| 3147 | 3152 | if (ret) { |
|---|
| 3148 | 3153 | md_unregister_thread(&mddev->thread); |
|---|
| 3149 | 3154 | goto abort; |
|---|
| .. | .. |
|---|
| 3255 | 3260 | newpoolinfo->mddev = mddev; |
|---|
| 3256 | 3261 | newpoolinfo->raid_disks = raid_disks * 2; |
|---|
| 3257 | 3262 | |
|---|
| 3258 | | - ret = mempool_init(&newpool, NR_RAID1_BIOS, r1bio_pool_alloc, |
|---|
| 3259 | | - r1bio_pool_free, newpoolinfo); |
|---|
| 3263 | + ret = mempool_init(&newpool, NR_RAID_BIOS, r1bio_pool_alloc, |
|---|
| 3264 | + rbio_pool_free, newpoolinfo); |
|---|
| 3260 | 3265 | if (ret) { |
|---|
| 3261 | 3266 | kfree(newpoolinfo); |
|---|
| 3262 | 3267 | return ret; |
|---|
| .. | .. |
|---|
| 3361 | 3366 | .check_reshape = raid1_reshape, |
|---|
| 3362 | 3367 | .quiesce = raid1_quiesce, |
|---|
| 3363 | 3368 | .takeover = raid1_takeover, |
|---|
| 3364 | | - .congested = raid1_congested, |
|---|
| 3365 | 3369 | }; |
|---|
| 3366 | 3370 | |
|---|
| 3367 | 3371 | static int __init raid_init(void) |
|---|