From 23fa18eaa71266feff7ba8d83022d9e1cc83c65a Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Fri, 10 May 2024 07:42:03 +0000 Subject: [PATCH] disable pwm7 --- kernel/drivers/md/raid1.c | 272 ++++++++++++++++++++++++++++-------------------------- 1 files changed, 140 insertions(+), 132 deletions(-) diff --git a/kernel/drivers/md/raid1.c b/kernel/drivers/md/raid1.c index 876d3e1..3619db7 100644 --- a/kernel/drivers/md/raid1.c +++ b/kernel/drivers/md/raid1.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * raid1.c : Multiple Devices driver for Linux * @@ -20,15 +21,6 @@ * * Additions to bitmap code, (C) 2003-2004 Paul Clements, SteelEye Technology: * - persistent bitmap code - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * You should have received a copy of the GNU General Public License - * (for example /usr/src/linux/COPYING); if not, write to the Free - * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include <linux/slab.h> @@ -37,6 +29,7 @@ #include <linux/module.h> #include <linux/seq_file.h> #include <linux/ratelimit.h> +#include <linux/interval_tree_generic.h> #include <trace/events/block.h> @@ -50,31 +43,6 @@ (1L << MD_HAS_PPL) | \ (1L << MD_HAS_MULTIPLE_PPLS)) -/* - * Number of guaranteed r1bios in case of extreme VM load: - */ -#define NR_RAID1_BIOS 256 - -/* when we get a read error on a read-only array, we redirect to another - * device without failing the first device, or trying to over-write to - * correct the read error. To keep track of bad blocks on a per-bio - * level, we store IO_BLOCKED in the appropriate 'bios' pointer - */ -#define IO_BLOCKED ((struct bio *)1) -/* When we successfully write to a known bad-block, we need to remove the - * bad-block marking which must be done from process context. So we record - * the success by setting devs[n].bio to IO_MADE_GOOD - */ -#define IO_MADE_GOOD ((struct bio *)2) - -#define BIO_SPECIAL(bio) ((unsigned long)bio <= 2) - -/* When there are this many requests queue to be written by - * the raid1 thread, we become 'congested' to provide back-pressure - * for writeback. - */ -static int max_queued_requests = 1024; - static void allow_barrier(struct r1conf *conf, sector_t sector_nr); static void lower_barrier(struct r1conf *conf, sector_t sector_nr); @@ -82,6 +50,73 @@ do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid1 " fmt, ##args); } while (0) #include "raid1-10.c" + +#define START(node) ((node)->start) +#define LAST(node) ((node)->last) +INTERVAL_TREE_DEFINE(struct serial_info, node, sector_t, _subtree_last, + START, LAST, static inline, raid1_rb); + +static int check_and_add_serial(struct md_rdev *rdev, struct r1bio *r1_bio, + struct serial_info *si, int idx) +{ + unsigned long flags; + int ret = 0; + sector_t lo = r1_bio->sector; + sector_t hi = lo + r1_bio->sectors; + struct serial_in_rdev *serial = &rdev->serial[idx]; + + spin_lock_irqsave(&serial->serial_lock, flags); + /* collision happened */ + if (raid1_rb_iter_first(&serial->serial_rb, lo, hi)) + ret = -EBUSY; + else { + si->start = lo; + si->last = hi; + raid1_rb_insert(si, &serial->serial_rb); + } + spin_unlock_irqrestore(&serial->serial_lock, flags); + + return ret; +} + +static void wait_for_serialization(struct md_rdev *rdev, struct r1bio *r1_bio) +{ + struct mddev *mddev = rdev->mddev; + struct serial_info *si; + int idx = sector_to_idx(r1_bio->sector); + struct serial_in_rdev *serial = &rdev->serial[idx]; + + if (WARN_ON(!mddev->serial_info_pool)) + return; + si = mempool_alloc(mddev->serial_info_pool, GFP_NOIO); + wait_event(serial->serial_io_wait, + check_and_add_serial(rdev, r1_bio, si, idx) == 0); +} + +static void remove_serial(struct md_rdev *rdev, sector_t lo, sector_t hi) +{ + struct serial_info *si; + unsigned long flags; + int found = 0; + struct mddev *mddev = rdev->mddev; + int idx = sector_to_idx(lo); + struct serial_in_rdev *serial = &rdev->serial[idx]; + + spin_lock_irqsave(&serial->serial_lock, flags); + for (si = raid1_rb_iter_first(&serial->serial_rb, lo, hi); + si; si = raid1_rb_iter_next(si, lo, hi)) { + if (si->start == lo && si->last == hi) { + raid1_rb_remove(si, &serial->serial_rb); + mempool_free(si, mddev->serial_info_pool); + found = 1; + break; + } + } + if (!found) + WARN(1, "The write IO is not recorded for serialization\n"); + spin_unlock_irqrestore(&serial->serial_lock, flags); + wake_up(&serial->serial_io_wait); +} /* * for resync bio, r1bio pointer can be retrieved from the per-bio @@ -99,11 +134,6 @@ /* allocate a r1bio with room for raid_disks entries in the bios array */ return kzalloc(size, gfp_flags); -} - -static void r1bio_pool_free(void *r1_bio, void *data) -{ - kfree(r1_bio); } #define RESYNC_DEPTH 32 @@ -181,7 +211,7 @@ kfree(rps); out_free_r1bio: - r1bio_pool_free(r1_bio, data); + rbio_pool_free(r1_bio, data); return NULL; } @@ -201,7 +231,7 @@ /* resync pages array stored in the 1st bio's .bi_private */ kfree(rp); - r1bio_pool_free(r1bio, data); + rbio_pool_free(r1bio, data); } static void put_all_bios(struct r1conf *conf, struct r1bio *r1_bio) @@ -266,22 +296,17 @@ static void call_bio_endio(struct r1bio *r1_bio) { struct bio *bio = r1_bio->master_bio; - struct r1conf *conf = r1_bio->mddev->private; if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) bio->bi_status = BLK_STS_IOERR; bio_endio(bio); - /* - * Wake up any possible resync thread that waits for the device - * to go idle. - */ - allow_barrier(conf, r1_bio->sector); } static void raid_end_bio_io(struct r1bio *r1_bio) { struct bio *bio = r1_bio->master_bio; + struct r1conf *conf = r1_bio->mddev->private; /* if nobody has done the final endio yet, do it now */ if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) { @@ -292,6 +317,12 @@ call_bio_endio(r1_bio); } + /* + * Wake up any possible resync thread that waits for the device + * to go idle. All I/Os, even write-behind writes, are done. + */ + allow_barrier(conf, r1_bio->sector); + free_r1bio(r1_bio); } @@ -417,6 +448,8 @@ int mirror = find_bio_disk(r1_bio, bio); struct md_rdev *rdev = conf->mirrors[mirror].rdev; bool discard_error; + sector_t lo = r1_bio->sector; + sector_t hi = r1_bio->sector + r1_bio->sectors; discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD; @@ -439,8 +472,6 @@ /* * When the device is faulty, it is not necessary to * handle write error. - * For failfast, this is the only remaining device, - * We need to retry the write without FailFast. */ if (!test_bit(Faulty, &rdev->flags)) set_bit(R1BIO_WriteError, &r1_bio->state); @@ -488,6 +519,8 @@ } if (behind) { + if (test_bit(CollisionCheck, &rdev->flags)) + remove_serial(rdev, lo, hi); if (test_bit(WriteMostly, &rdev->flags)) atomic_dec(&r1_bio->behind_remaining); @@ -510,7 +543,8 @@ call_bio_endio(r1_bio); } } - } + } else if (rdev->mddev->serialize_policy) + remove_serial(rdev, lo, hi); if (r1_bio->bios[mirror] == NULL) rdev_dec_pending(rdev, conf->mddev); @@ -752,36 +786,6 @@ return best_disk; } -static int raid1_congested(struct mddev *mddev, int bits) -{ - struct r1conf *conf = mddev->private; - int i, ret = 0; - - if ((bits & (1 << WB_async_congested)) && - conf->pending_count >= max_queued_requests) - return 1; - - rcu_read_lock(); - for (i = 0; i < conf->raid_disks * 2; i++) { - struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); - if (rdev && !test_bit(Faulty, &rdev->flags)) { - struct request_queue *q = bdev_get_queue(rdev->bdev); - - BUG_ON(!q); - - /* Note the '|| 1' - when read_balance prefers - * non-congested targets, it can be removed - */ - if ((bits & (1 << WB_async_congested)) || 1) - ret |= bdi_congested(q->backing_dev_info, bits); - else - ret &= bdi_congested(q->backing_dev_info, bits); - } - } - rcu_read_unlock(); - return ret; -} - static void flush_bio_list(struct r1conf *conf, struct bio *bio) { /* flush any pending bitmap writes to disk before proceeding w/ I/O */ @@ -800,8 +804,9 @@ /* Just ignore it */ bio_endio(bio); else - generic_make_request(bio); + submit_bio_noacct(bio); bio = next; + cond_resched(); } } @@ -857,8 +862,11 @@ * backgroup IO calls must call raise_barrier. Once that returns * there is no normal IO happeing. It must arrange to call * lower_barrier when the particular background IO completes. + * + * If resync/recovery is interrupted, returns -EINTR; + * Otherwise, returns 0. */ -static sector_t raise_barrier(struct r1conf *conf, sector_t sector_nr) +static int raise_barrier(struct r1conf *conf, sector_t sector_nr) { int idx = sector_to_idx(sector_nr); @@ -1274,7 +1282,7 @@ struct bio *split = bio_split(bio, max_sectors, gfp, &conf->bio_split); bio_chain(split, bio); - generic_make_request(bio); + submit_bio_noacct(bio); bio = split; r1_bio->master_bio = bio; r1_bio->sectors = max_sectors; @@ -1300,7 +1308,7 @@ trace_block_bio_remap(read_bio->bi_disk->queue, read_bio, disk_devt(mddev->gendisk), r1_bio->sector); - generic_make_request(read_bio); + submit_bio_noacct(read_bio); } static void raid1_write_request(struct mddev *mddev, struct bio *bio, @@ -1445,7 +1453,7 @@ struct bio *split = bio_split(bio, max_sectors, GFP_NOIO, &conf->bio_split); bio_chain(split, bio); - generic_make_request(bio); + submit_bio_noacct(bio); bio = split; r1_bio->master_bio = bio; r1_bio->sectors = max_sectors; @@ -1458,9 +1466,9 @@ for (i = 0; i < disks; i++) { struct bio *mbio = NULL; + struct md_rdev *rdev = conf->mirrors[i].rdev; if (!r1_bio->bios[i]) continue; - if (first_clone) { /* do behind I/O ? @@ -1486,9 +1494,12 @@ mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set); if (r1_bio->behind_master_bio) { - if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags)) + if (test_bit(CollisionCheck, &rdev->flags)) + wait_for_serialization(rdev, r1_bio); + if (test_bit(WriteMostly, &rdev->flags)) atomic_inc(&r1_bio->behind_remaining); - } + } else if (mddev->serialize_policy) + wait_for_serialization(rdev, r1_bio); r1_bio->bios[i] = mbio; @@ -1588,12 +1599,12 @@ /* * If it is not operational, then we have already marked it as dead - * else if it is the last working disks, ignore the error, let the - * next level up know. + * else if it is the last working disks with "fail_last_dev == false", + * ignore the error, let the next level up know. * else mark the drive as failed */ spin_lock_irqsave(&conf->device_lock, flags); - if (test_bit(In_sync, &rdev->flags) + if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev && (conf->raid_disks - mddev->degraded) == 1) { /* * Don't fail the drive, act as though we were just a @@ -1606,11 +1617,9 @@ return; } set_bit(Blocked, &rdev->flags); - if (test_and_clear_bit(In_sync, &rdev->flags)) { + if (test_and_clear_bit(In_sync, &rdev->flags)) mddev->degraded++; - set_bit(Faulty, &rdev->flags); - } else - set_bit(Faulty, &rdev->flags); + set_bit(Faulty, &rdev->flags); spin_unlock_irqrestore(&conf->device_lock, flags); /* * if recovery is running, make sure it aborts. @@ -1742,9 +1751,8 @@ first = last = rdev->saved_raid_disk; for (mirror = first; mirror <= last; mirror++) { - p = conf->mirrors+mirror; + p = conf->mirrors + mirror; if (!p->rdev) { - if (mddev->gendisk) disk_stack_limits(mddev->gendisk, rdev->bdev, rdev->data_offset << 9); @@ -1784,6 +1792,9 @@ int err = 0; int number = rdev->raid_disk; struct raid1_info *p = conf->mirrors + number; + + if (unlikely(number >= conf->raid_disks)) + goto abort; if (rdev != p->rdev) p = conf->mirrors + conf->raid_disks + number; @@ -1880,6 +1891,22 @@ } while (sectors_to_go > 0); } +static void put_sync_write_buf(struct r1bio *r1_bio, int uptodate) +{ + if (atomic_dec_and_test(&r1_bio->remaining)) { + struct mddev *mddev = r1_bio->mddev; + int s = r1_bio->sectors; + + if (test_bit(R1BIO_MadeGood, &r1_bio->state) || + test_bit(R1BIO_WriteError, &r1_bio->state)) + reschedule_retry(r1_bio); + else { + put_buf(r1_bio); + md_done_sync(mddev, s, uptodate); + } + } +} + static void end_sync_write(struct bio *bio) { int uptodate = !bio->bi_status; @@ -1906,16 +1933,7 @@ ) set_bit(R1BIO_MadeGood, &r1_bio->state); - if (atomic_dec_and_test(&r1_bio->remaining)) { - int s = r1_bio->sectors; - if (test_bit(R1BIO_MadeGood, &r1_bio->state) || - test_bit(R1BIO_WriteError, &r1_bio->state)) - reschedule_retry(r1_bio); - else { - put_buf(r1_bio); - md_done_sync(mddev, s, uptodate); - } - } + put_sync_write_buf(r1_bio, uptodate); } static int r1_sync_page_io(struct md_rdev *rdev, sector_t sector, @@ -2115,7 +2133,7 @@ } r1_bio->read_disk = primary; for (i = 0; i < conf->raid_disks * 2; i++) { - int j; + int j = 0; struct bio *pbio = r1_bio->bios[primary]; struct bio *sbio = r1_bio->bios[i]; blk_status_t status = sbio->bi_status; @@ -2123,14 +2141,15 @@ struct page **spages = get_resync_pages(sbio)->pages; struct bio_vec *bi; int page_len[RESYNC_PAGES] = { 0 }; + struct bvec_iter_all iter_all; if (sbio->bi_end_io != end_sync_read) continue; /* Now we can 'fixup' the error value */ sbio->bi_status = 0; - bio_for_each_segment_all(bi, sbio, j) - page_len[j] = bi->bv_len; + bio_for_each_segment_all(bi, sbio, iter_all) + page_len[j++] = bi->bv_len; if (!status) { for (j = vcnt; j-- ; ) { @@ -2194,20 +2213,10 @@ atomic_inc(&r1_bio->remaining); md_sync_acct(conf->mirrors[i].rdev->bdev, bio_sectors(wbio)); - generic_make_request(wbio); + submit_bio_noacct(wbio); } - if (atomic_dec_and_test(&r1_bio->remaining)) { - /* if we're here, all write(s) have completed, so clean up */ - int s = r1_bio->sectors; - if (test_bit(R1BIO_MadeGood, &r1_bio->state) || - test_bit(R1BIO_WriteError, &r1_bio->state)) - reschedule_retry(r1_bio); - else { - put_buf(r1_bio); - md_done_sync(mddev, s, 1); - } - } + put_sync_write_buf(r1_bio, 1); } /* @@ -2890,7 +2899,7 @@ md_sync_acct_bio(bio, nr_sectors); if (read_targets == 1) bio->bi_opf &= ~MD_FAILFAST; - generic_make_request(bio); + submit_bio_noacct(bio); } } } else { @@ -2899,8 +2908,7 @@ md_sync_acct_bio(bio, nr_sectors); if (read_targets == 1) bio->bi_opf &= ~MD_FAILFAST; - generic_make_request(bio); - + submit_bio_noacct(bio); } return nr_sectors; } @@ -2959,8 +2967,8 @@ if (!conf->poolinfo) goto abort; conf->poolinfo->raid_disks = mddev->raid_disks * 2; - err = mempool_init(&conf->r1bio_pool, NR_RAID1_BIOS, r1bio_pool_alloc, - r1bio_pool_free, conf->poolinfo); + err = mempool_init(&conf->r1bio_pool, NR_RAID_BIOS, r1bio_pool_alloc, + rbio_pool_free, conf->poolinfo); if (err) goto abort; @@ -3101,7 +3109,7 @@ } mddev->degraded = 0; - for (i=0; i < conf->raid_disks; i++) + for (i = 0; i < conf->raid_disks; i++) if (conf->mirrors[i].rdev == NULL || !test_bit(In_sync, &conf->mirrors[i].rdev->flags) || test_bit(Faulty, &conf->mirrors[i].rdev->flags)) @@ -3110,6 +3118,7 @@ * RAID1 needs at least one disk in active */ if (conf->raid_disks - mddev->degraded < 1) { + md_unregister_thread(&conf->thread); ret = -EINVAL; goto abort; } @@ -3143,7 +3152,7 @@ mddev->queue); } - ret = md_integrity_register(mddev); + ret = md_integrity_register(mddev); if (ret) { md_unregister_thread(&mddev->thread); goto abort; @@ -3255,8 +3264,8 @@ newpoolinfo->mddev = mddev; newpoolinfo->raid_disks = raid_disks * 2; - ret = mempool_init(&newpool, NR_RAID1_BIOS, r1bio_pool_alloc, - r1bio_pool_free, newpoolinfo); + ret = mempool_init(&newpool, NR_RAID_BIOS, r1bio_pool_alloc, + rbio_pool_free, newpoolinfo); if (ret) { kfree(newpoolinfo); return ret; @@ -3361,7 +3370,6 @@ .check_reshape = raid1_reshape, .quiesce = raid1_quiesce, .takeover = raid1_takeover, - .congested = raid1_congested, }; static int __init raid_init(void) -- Gitblit v1.6.2