| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * raid10.c : Multiple Devices driver for Linux |
|---|
| 3 | 4 | * |
|---|
| .. | .. |
|---|
| 6 | 7 | * RAID-10 support for md. |
|---|
| 7 | 8 | * |
|---|
| 8 | 9 | * Base on code in raid1.c. See raid1.c for further copyright information. |
|---|
| 9 | | - * |
|---|
| 10 | | - * |
|---|
| 11 | | - * This program is free software; you can redistribute it and/or modify |
|---|
| 12 | | - * it under the terms of the GNU General Public License as published by |
|---|
| 13 | | - * the Free Software Foundation; either version 2, or (at your option) |
|---|
| 14 | | - * any later version. |
|---|
| 15 | | - * |
|---|
| 16 | | - * You should have received a copy of the GNU General Public License |
|---|
| 17 | | - * (for example /usr/src/linux/COPYING); if not, write to the Free |
|---|
| 18 | | - * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
|---|
| 19 | 10 | */ |
|---|
| 20 | 11 | |
|---|
| 21 | 12 | #include <linux/slab.h> |
|---|
| .. | .. |
|---|
| 25 | 16 | #include <linux/seq_file.h> |
|---|
| 26 | 17 | #include <linux/ratelimit.h> |
|---|
| 27 | 18 | #include <linux/kthread.h> |
|---|
| 19 | +#include <linux/raid/md_p.h> |
|---|
| 28 | 20 | #include <trace/events/block.h> |
|---|
| 29 | 21 | #include "md.h" |
|---|
| 30 | 22 | #include "raid10.h" |
|---|
| .. | .. |
|---|
| 72 | 64 | * [B A] [D C] [B A] [E C D] |
|---|
| 73 | 65 | */ |
|---|
| 74 | 66 | |
|---|
| 75 | | -/* |
|---|
| 76 | | - * Number of guaranteed r10bios in case of extreme VM load: |
|---|
| 77 | | - */ |
|---|
| 78 | | -#define NR_RAID10_BIOS 256 |
|---|
| 79 | | - |
|---|
| 80 | | -/* when we get a read error on a read-only array, we redirect to another |
|---|
| 81 | | - * device without failing the first device, or trying to over-write to |
|---|
| 82 | | - * correct the read error. To keep track of bad blocks on a per-bio |
|---|
| 83 | | - * level, we store IO_BLOCKED in the appropriate 'bios' pointer |
|---|
| 84 | | - */ |
|---|
| 85 | | -#define IO_BLOCKED ((struct bio *)1) |
|---|
| 86 | | -/* When we successfully write to a known bad-block, we need to remove the |
|---|
| 87 | | - * bad-block marking which must be done from process context. So we record |
|---|
| 88 | | - * the success by setting devs[n].bio to IO_MADE_GOOD |
|---|
| 89 | | - */ |
|---|
| 90 | | -#define IO_MADE_GOOD ((struct bio *)2) |
|---|
| 91 | | - |
|---|
| 92 | | -#define BIO_SPECIAL(bio) ((unsigned long)bio <= 2) |
|---|
| 93 | | - |
|---|
| 94 | | -/* When there are this many requests queued to be written by |
|---|
| 95 | | - * the raid10 thread, we become 'congested' to provide back-pressure |
|---|
| 96 | | - * for writeback. |
|---|
| 97 | | - */ |
|---|
| 98 | | -static int max_queued_requests = 1024; |
|---|
| 99 | | - |
|---|
| 100 | 67 | static void allow_barrier(struct r10conf *conf); |
|---|
| 101 | 68 | static void lower_barrier(struct r10conf *conf); |
|---|
| 102 | 69 | static int _enough(struct r10conf *conf, int previous, int ignore); |
|---|
| .. | .. |
|---|
| 129 | 96 | /* allocate a r10bio with room for raid_disks entries in the |
|---|
| 130 | 97 | * bios array */ |
|---|
| 131 | 98 | return kzalloc(size, gfp_flags); |
|---|
| 132 | | -} |
|---|
| 133 | | - |
|---|
| 134 | | -static void r10bio_pool_free(void *r10_bio, void *data) |
|---|
| 135 | | -{ |
|---|
| 136 | | - kfree(r10_bio); |
|---|
| 137 | 99 | } |
|---|
| 138 | 100 | |
|---|
| 139 | 101 | #define RESYNC_SECTORS (RESYNC_BLOCK_SIZE >> 9) |
|---|
| .. | .. |
|---|
| 241 | 203 | } |
|---|
| 242 | 204 | kfree(rps); |
|---|
| 243 | 205 | out_free_r10bio: |
|---|
| 244 | | - r10bio_pool_free(r10_bio, conf); |
|---|
| 206 | + rbio_pool_free(r10_bio, conf); |
|---|
| 245 | 207 | return NULL; |
|---|
| 246 | 208 | } |
|---|
| 247 | 209 | |
|---|
| .. | .. |
|---|
| 269 | 231 | /* resync pages array stored in the 1st bio's .bi_private */ |
|---|
| 270 | 232 | kfree(rp); |
|---|
| 271 | 233 | |
|---|
| 272 | | - r10bio_pool_free(r10bio, conf); |
|---|
| 234 | + rbio_pool_free(r10bio, conf); |
|---|
| 273 | 235 | } |
|---|
| 274 | 236 | |
|---|
| 275 | 237 | static void put_all_bios(struct r10conf *conf, struct r10bio *r10_bio) |
|---|
| .. | .. |
|---|
| 503 | 465 | if (test_bit(FailFast, &rdev->flags) && |
|---|
| 504 | 466 | (bio->bi_opf & MD_FAILFAST)) { |
|---|
| 505 | 467 | md_error(rdev->mddev, rdev); |
|---|
| 506 | | - if (!test_bit(Faulty, &rdev->flags)) |
|---|
| 507 | | - /* This is the only remaining device, |
|---|
| 508 | | - * We need to retry the write without |
|---|
| 509 | | - * FailFast |
|---|
| 510 | | - */ |
|---|
| 511 | | - set_bit(R10BIO_WriteError, &r10_bio->state); |
|---|
| 512 | | - else { |
|---|
| 513 | | - r10_bio->devs[slot].bio = NULL; |
|---|
| 514 | | - to_put = bio; |
|---|
| 515 | | - dec_rdev = 1; |
|---|
| 516 | | - } |
|---|
| 517 | | - } else |
|---|
| 468 | + } |
|---|
| 469 | + |
|---|
| 470 | + /* |
|---|
| 471 | + * When the device is faulty, it is not necessary to |
|---|
| 472 | + * handle write error. |
|---|
| 473 | + */ |
|---|
| 474 | + if (!test_bit(Faulty, &rdev->flags)) |
|---|
| 518 | 475 | set_bit(R10BIO_WriteError, &r10_bio->state); |
|---|
| 476 | + else { |
|---|
| 477 | + /* Fail the request */ |
|---|
| 478 | + set_bit(R10BIO_Degraded, &r10_bio->state); |
|---|
| 479 | + r10_bio->devs[slot].bio = NULL; |
|---|
| 480 | + to_put = bio; |
|---|
| 481 | + dec_rdev = 1; |
|---|
| 482 | + } |
|---|
| 519 | 483 | } |
|---|
| 520 | 484 | } else { |
|---|
| 521 | 485 | /* |
|---|
| .. | .. |
|---|
| 745 | 709 | int sectors = r10_bio->sectors; |
|---|
| 746 | 710 | int best_good_sectors; |
|---|
| 747 | 711 | sector_t new_distance, best_dist; |
|---|
| 748 | | - struct md_rdev *best_rdev, *rdev = NULL; |
|---|
| 712 | + struct md_rdev *best_dist_rdev, *best_pending_rdev, *rdev = NULL; |
|---|
| 749 | 713 | int do_balance; |
|---|
| 750 | | - int best_slot; |
|---|
| 714 | + int best_dist_slot, best_pending_slot; |
|---|
| 715 | + bool has_nonrot_disk = false; |
|---|
| 716 | + unsigned int min_pending; |
|---|
| 751 | 717 | struct geom *geo = &conf->geo; |
|---|
| 752 | 718 | |
|---|
| 753 | 719 | raid10_find_phys(conf, r10_bio); |
|---|
| 754 | 720 | rcu_read_lock(); |
|---|
| 755 | | - best_slot = -1; |
|---|
| 756 | | - best_rdev = NULL; |
|---|
| 721 | + best_dist_slot = -1; |
|---|
| 722 | + min_pending = UINT_MAX; |
|---|
| 723 | + best_dist_rdev = NULL; |
|---|
| 724 | + best_pending_rdev = NULL; |
|---|
| 757 | 725 | best_dist = MaxSector; |
|---|
| 758 | 726 | best_good_sectors = 0; |
|---|
| 759 | 727 | do_balance = 1; |
|---|
| .. | .. |
|---|
| 775 | 743 | sector_t first_bad; |
|---|
| 776 | 744 | int bad_sectors; |
|---|
| 777 | 745 | sector_t dev_sector; |
|---|
| 746 | + unsigned int pending; |
|---|
| 747 | + bool nonrot; |
|---|
| 778 | 748 | |
|---|
| 779 | 749 | if (r10_bio->devs[slot].bio == IO_BLOCKED) |
|---|
| 780 | 750 | continue; |
|---|
| .. | .. |
|---|
| 811 | 781 | first_bad - dev_sector; |
|---|
| 812 | 782 | if (good_sectors > best_good_sectors) { |
|---|
| 813 | 783 | best_good_sectors = good_sectors; |
|---|
| 814 | | - best_slot = slot; |
|---|
| 815 | | - best_rdev = rdev; |
|---|
| 784 | + best_dist_slot = slot; |
|---|
| 785 | + best_dist_rdev = rdev; |
|---|
| 816 | 786 | } |
|---|
| 817 | 787 | if (!do_balance) |
|---|
| 818 | 788 | /* Must read from here */ |
|---|
| .. | .. |
|---|
| 825 | 795 | if (!do_balance) |
|---|
| 826 | 796 | break; |
|---|
| 827 | 797 | |
|---|
| 828 | | - if (best_slot >= 0) |
|---|
| 798 | + nonrot = blk_queue_nonrot(bdev_get_queue(rdev->bdev)); |
|---|
| 799 | + has_nonrot_disk |= nonrot; |
|---|
| 800 | + pending = atomic_read(&rdev->nr_pending); |
|---|
| 801 | + if (min_pending > pending && nonrot) { |
|---|
| 802 | + min_pending = pending; |
|---|
| 803 | + best_pending_slot = slot; |
|---|
| 804 | + best_pending_rdev = rdev; |
|---|
| 805 | + } |
|---|
| 806 | + |
|---|
| 807 | + if (best_dist_slot >= 0) |
|---|
| 829 | 808 | /* At least 2 disks to choose from so failfast is OK */ |
|---|
| 830 | 809 | set_bit(R10BIO_FailFast, &r10_bio->state); |
|---|
| 831 | 810 | /* This optimisation is debatable, and completely destroys |
|---|
| 832 | 811 | * sequential read speed for 'far copies' arrays. So only |
|---|
| 833 | 812 | * keep it for 'near' arrays, and review those later. |
|---|
| 834 | 813 | */ |
|---|
| 835 | | - if (geo->near_copies > 1 && !atomic_read(&rdev->nr_pending)) |
|---|
| 814 | + if (geo->near_copies > 1 && !pending) |
|---|
| 836 | 815 | new_distance = 0; |
|---|
| 837 | 816 | |
|---|
| 838 | 817 | /* for far > 1 always use the lowest address */ |
|---|
| .. | .. |
|---|
| 841 | 820 | else |
|---|
| 842 | 821 | new_distance = abs(r10_bio->devs[slot].addr - |
|---|
| 843 | 822 | conf->mirrors[disk].head_position); |
|---|
| 823 | + |
|---|
| 844 | 824 | if (new_distance < best_dist) { |
|---|
| 845 | 825 | best_dist = new_distance; |
|---|
| 846 | | - best_slot = slot; |
|---|
| 847 | | - best_rdev = rdev; |
|---|
| 826 | + best_dist_slot = slot; |
|---|
| 827 | + best_dist_rdev = rdev; |
|---|
| 848 | 828 | } |
|---|
| 849 | 829 | } |
|---|
| 850 | 830 | if (slot >= conf->copies) { |
|---|
| 851 | | - slot = best_slot; |
|---|
| 852 | | - rdev = best_rdev; |
|---|
| 831 | + if (has_nonrot_disk) { |
|---|
| 832 | + slot = best_pending_slot; |
|---|
| 833 | + rdev = best_pending_rdev; |
|---|
| 834 | + } else { |
|---|
| 835 | + slot = best_dist_slot; |
|---|
| 836 | + rdev = best_dist_rdev; |
|---|
| 837 | + } |
|---|
| 853 | 838 | } |
|---|
| 854 | 839 | |
|---|
| 855 | 840 | if (slot >= 0) { |
|---|
| .. | .. |
|---|
| 861 | 846 | *max_sectors = best_good_sectors; |
|---|
| 862 | 847 | |
|---|
| 863 | 848 | return rdev; |
|---|
| 864 | | -} |
|---|
| 865 | | - |
|---|
| 866 | | -static int raid10_congested(struct mddev *mddev, int bits) |
|---|
| 867 | | -{ |
|---|
| 868 | | - struct r10conf *conf = mddev->private; |
|---|
| 869 | | - int i, ret = 0; |
|---|
| 870 | | - |
|---|
| 871 | | - if ((bits & (1 << WB_async_congested)) && |
|---|
| 872 | | - conf->pending_count >= max_queued_requests) |
|---|
| 873 | | - return 1; |
|---|
| 874 | | - |
|---|
| 875 | | - rcu_read_lock(); |
|---|
| 876 | | - for (i = 0; |
|---|
| 877 | | - (i < conf->geo.raid_disks || i < conf->prev.raid_disks) |
|---|
| 878 | | - && ret == 0; |
|---|
| 879 | | - i++) { |
|---|
| 880 | | - struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); |
|---|
| 881 | | - if (rdev && !test_bit(Faulty, &rdev->flags)) { |
|---|
| 882 | | - struct request_queue *q = bdev_get_queue(rdev->bdev); |
|---|
| 883 | | - |
|---|
| 884 | | - ret |= bdi_congested(q->backing_dev_info, bits); |
|---|
| 885 | | - } |
|---|
| 886 | | - } |
|---|
| 887 | | - rcu_read_unlock(); |
|---|
| 888 | | - return ret; |
|---|
| 889 | 849 | } |
|---|
| 890 | 850 | |
|---|
| 891 | 851 | static void flush_pending_writes(struct r10conf *conf) |
|---|
| .. | .. |
|---|
| 932 | 892 | /* Just ignore it */ |
|---|
| 933 | 893 | bio_endio(bio); |
|---|
| 934 | 894 | else |
|---|
| 935 | | - generic_make_request(bio); |
|---|
| 895 | + submit_bio_noacct(bio); |
|---|
| 936 | 896 | bio = next; |
|---|
| 937 | 897 | } |
|---|
| 938 | 898 | blk_finish_plug(&plug); |
|---|
| .. | .. |
|---|
| 995 | 955 | { |
|---|
| 996 | 956 | spin_lock_irq(&conf->resync_lock); |
|---|
| 997 | 957 | if (conf->barrier) { |
|---|
| 958 | + struct bio_list *bio_list = current->bio_list; |
|---|
| 998 | 959 | conf->nr_waiting++; |
|---|
| 999 | 960 | /* Wait for the barrier to drop. |
|---|
| 1000 | 961 | * However if there are already pending |
|---|
| .. | .. |
|---|
| 1009 | 970 | wait_event_lock_irq(conf->wait_barrier, |
|---|
| 1010 | 971 | !conf->barrier || |
|---|
| 1011 | 972 | (atomic_read(&conf->nr_pending) && |
|---|
| 1012 | | - current->bio_list && |
|---|
| 1013 | | - (!bio_list_empty(¤t->bio_list[0]) || |
|---|
| 1014 | | - !bio_list_empty(¤t->bio_list[1]))), |
|---|
| 973 | + bio_list && |
|---|
| 974 | + (!bio_list_empty(&bio_list[0]) || |
|---|
| 975 | + !bio_list_empty(&bio_list[1]))) || |
|---|
| 976 | + /* move on if recovery thread is |
|---|
| 977 | + * blocked by us |
|---|
| 978 | + */ |
|---|
| 979 | + (conf->mddev->thread->tsk == current && |
|---|
| 980 | + test_bit(MD_RECOVERY_RUNNING, |
|---|
| 981 | + &conf->mddev->recovery) && |
|---|
| 982 | + conf->nr_queued > 0), |
|---|
| 1015 | 983 | conf->resync_lock); |
|---|
| 1016 | 984 | conf->nr_waiting--; |
|---|
| 1017 | 985 | if (!conf->nr_waiting) |
|---|
| .. | .. |
|---|
| 1117 | 1085 | /* Just ignore it */ |
|---|
| 1118 | 1086 | bio_endio(bio); |
|---|
| 1119 | 1087 | else |
|---|
| 1120 | | - generic_make_request(bio); |
|---|
| 1088 | + submit_bio_noacct(bio); |
|---|
| 1121 | 1089 | bio = next; |
|---|
| 1122 | 1090 | } |
|---|
| 1123 | 1091 | kfree(plug); |
|---|
| 1092 | +} |
|---|
| 1093 | + |
|---|
| 1094 | +/* |
|---|
| 1095 | + * 1. Register the new request and wait if the reconstruction thread has put |
|---|
| 1096 | + * up a bar for new requests. Continue immediately if no resync is active |
|---|
| 1097 | + * currently. |
|---|
| 1098 | + * 2. If IO spans the reshape position. Need to wait for reshape to pass. |
|---|
| 1099 | + */ |
|---|
| 1100 | +static void regular_request_wait(struct mddev *mddev, struct r10conf *conf, |
|---|
| 1101 | + struct bio *bio, sector_t sectors) |
|---|
| 1102 | +{ |
|---|
| 1103 | + wait_barrier(conf); |
|---|
| 1104 | + while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && |
|---|
| 1105 | + bio->bi_iter.bi_sector < conf->reshape_progress && |
|---|
| 1106 | + bio->bi_iter.bi_sector + sectors > conf->reshape_progress) { |
|---|
| 1107 | + raid10_log(conf->mddev, "wait reshape"); |
|---|
| 1108 | + allow_barrier(conf); |
|---|
| 1109 | + wait_event(conf->wait_barrier, |
|---|
| 1110 | + conf->reshape_progress <= bio->bi_iter.bi_sector || |
|---|
| 1111 | + conf->reshape_progress >= bio->bi_iter.bi_sector + |
|---|
| 1112 | + sectors); |
|---|
| 1113 | + wait_barrier(conf); |
|---|
| 1114 | + } |
|---|
| 1124 | 1115 | } |
|---|
| 1125 | 1116 | |
|---|
| 1126 | 1117 | static void raid10_read_request(struct mddev *mddev, struct bio *bio, |
|---|
| .. | .. |
|---|
| 1131 | 1122 | const int op = bio_op(bio); |
|---|
| 1132 | 1123 | const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); |
|---|
| 1133 | 1124 | int max_sectors; |
|---|
| 1134 | | - sector_t sectors; |
|---|
| 1135 | 1125 | struct md_rdev *rdev; |
|---|
| 1136 | 1126 | char b[BDEVNAME_SIZE]; |
|---|
| 1137 | 1127 | int slot = r10_bio->read_slot; |
|---|
| .. | .. |
|---|
| 1165 | 1155 | } |
|---|
| 1166 | 1156 | rcu_read_unlock(); |
|---|
| 1167 | 1157 | } |
|---|
| 1168 | | - /* |
|---|
| 1169 | | - * Register the new request and wait if the reconstruction |
|---|
| 1170 | | - * thread has put up a bar for new requests. |
|---|
| 1171 | | - * Continue immediately if no resync is active currently. |
|---|
| 1172 | | - */ |
|---|
| 1173 | | - wait_barrier(conf); |
|---|
| 1174 | 1158 | |
|---|
| 1175 | | - sectors = r10_bio->sectors; |
|---|
| 1176 | | - while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && |
|---|
| 1177 | | - bio->bi_iter.bi_sector < conf->reshape_progress && |
|---|
| 1178 | | - bio->bi_iter.bi_sector + sectors > conf->reshape_progress) { |
|---|
| 1179 | | - /* |
|---|
| 1180 | | - * IO spans the reshape position. Need to wait for reshape to |
|---|
| 1181 | | - * pass |
|---|
| 1182 | | - */ |
|---|
| 1183 | | - raid10_log(conf->mddev, "wait reshape"); |
|---|
| 1184 | | - allow_barrier(conf); |
|---|
| 1185 | | - wait_event(conf->wait_barrier, |
|---|
| 1186 | | - conf->reshape_progress <= bio->bi_iter.bi_sector || |
|---|
| 1187 | | - conf->reshape_progress >= bio->bi_iter.bi_sector + |
|---|
| 1188 | | - sectors); |
|---|
| 1189 | | - wait_barrier(conf); |
|---|
| 1190 | | - } |
|---|
| 1191 | | - |
|---|
| 1159 | + regular_request_wait(mddev, conf, bio, r10_bio->sectors); |
|---|
| 1192 | 1160 | rdev = read_balance(conf, r10_bio, &max_sectors); |
|---|
| 1193 | 1161 | if (!rdev) { |
|---|
| 1194 | 1162 | if (err_rdev) { |
|---|
| .. | .. |
|---|
| 1209 | 1177 | gfp, &conf->bio_split); |
|---|
| 1210 | 1178 | bio_chain(split, bio); |
|---|
| 1211 | 1179 | allow_barrier(conf); |
|---|
| 1212 | | - generic_make_request(bio); |
|---|
| 1180 | + submit_bio_noacct(bio); |
|---|
| 1213 | 1181 | wait_barrier(conf); |
|---|
| 1214 | 1182 | bio = split; |
|---|
| 1215 | 1183 | r10_bio->master_bio = bio; |
|---|
| .. | .. |
|---|
| 1236 | 1204 | trace_block_bio_remap(read_bio->bi_disk->queue, |
|---|
| 1237 | 1205 | read_bio, disk_devt(mddev->gendisk), |
|---|
| 1238 | 1206 | r10_bio->sector); |
|---|
| 1239 | | - generic_make_request(read_bio); |
|---|
| 1207 | + submit_bio_noacct(read_bio); |
|---|
| 1240 | 1208 | return; |
|---|
| 1241 | 1209 | } |
|---|
| 1242 | 1210 | |
|---|
| .. | .. |
|---|
| 1333 | 1301 | finish_wait(&conf->wait_barrier, &w); |
|---|
| 1334 | 1302 | } |
|---|
| 1335 | 1303 | |
|---|
| 1336 | | - /* |
|---|
| 1337 | | - * Register the new request and wait if the reconstruction |
|---|
| 1338 | | - * thread has put up a bar for new requests. |
|---|
| 1339 | | - * Continue immediately if no resync is active currently. |
|---|
| 1340 | | - */ |
|---|
| 1341 | | - wait_barrier(conf); |
|---|
| 1342 | | - |
|---|
| 1343 | 1304 | sectors = r10_bio->sectors; |
|---|
| 1344 | | - while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && |
|---|
| 1345 | | - bio->bi_iter.bi_sector < conf->reshape_progress && |
|---|
| 1346 | | - bio->bi_iter.bi_sector + sectors > conf->reshape_progress) { |
|---|
| 1347 | | - /* |
|---|
| 1348 | | - * IO spans the reshape position. Need to wait for reshape to |
|---|
| 1349 | | - * pass |
|---|
| 1350 | | - */ |
|---|
| 1351 | | - raid10_log(conf->mddev, "wait reshape"); |
|---|
| 1352 | | - allow_barrier(conf); |
|---|
| 1353 | | - wait_event(conf->wait_barrier, |
|---|
| 1354 | | - conf->reshape_progress <= bio->bi_iter.bi_sector || |
|---|
| 1355 | | - conf->reshape_progress >= bio->bi_iter.bi_sector + |
|---|
| 1356 | | - sectors); |
|---|
| 1357 | | - wait_barrier(conf); |
|---|
| 1358 | | - } |
|---|
| 1359 | | - |
|---|
| 1305 | + regular_request_wait(mddev, conf, bio, sectors); |
|---|
| 1360 | 1306 | if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && |
|---|
| 1361 | 1307 | (mddev->reshape_backwards |
|---|
| 1362 | 1308 | ? (bio->bi_iter.bi_sector < conf->reshape_safe && |
|---|
| .. | .. |
|---|
| 1516 | 1462 | GFP_NOIO, &conf->bio_split); |
|---|
| 1517 | 1463 | bio_chain(split, bio); |
|---|
| 1518 | 1464 | allow_barrier(conf); |
|---|
| 1519 | | - generic_make_request(bio); |
|---|
| 1465 | + submit_bio_noacct(bio); |
|---|
| 1520 | 1466 | wait_barrier(conf); |
|---|
| 1521 | 1467 | bio = split; |
|---|
| 1522 | 1468 | r10_bio->master_bio = bio; |
|---|
| .. | .. |
|---|
| 1677 | 1623 | |
|---|
| 1678 | 1624 | /* |
|---|
| 1679 | 1625 | * If it is not operational, then we have already marked it as dead |
|---|
| 1680 | | - * else if it is the last working disks, ignore the error, let the |
|---|
| 1681 | | - * next level up know. |
|---|
| 1626 | + * else if it is the last working disks with "fail_last_dev == false", |
|---|
| 1627 | + * ignore the error, let the next level up know. |
|---|
| 1682 | 1628 | * else mark the drive as failed |
|---|
| 1683 | 1629 | */ |
|---|
| 1684 | 1630 | spin_lock_irqsave(&conf->device_lock, flags); |
|---|
| 1685 | | - if (test_bit(In_sync, &rdev->flags) |
|---|
| 1631 | + if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev |
|---|
| 1686 | 1632 | && !enough(conf, rdev->raid_disk)) { |
|---|
| 1687 | 1633 | /* |
|---|
| 1688 | 1634 | * Don't fail the drive, just return an IO error. |
|---|
| .. | .. |
|---|
| 1863 | 1809 | int err = 0; |
|---|
| 1864 | 1810 | int number = rdev->raid_disk; |
|---|
| 1865 | 1811 | struct md_rdev **rdevp; |
|---|
| 1866 | | - struct raid10_info *p = conf->mirrors + number; |
|---|
| 1812 | + struct raid10_info *p; |
|---|
| 1867 | 1813 | |
|---|
| 1868 | 1814 | print_conf(conf); |
|---|
| 1815 | + if (unlikely(number >= mddev->raid_disks)) |
|---|
| 1816 | + return 0; |
|---|
| 1817 | + p = conf->mirrors + number; |
|---|
| 1869 | 1818 | if (rdev == p->rdev) |
|---|
| 1870 | 1819 | rdevp = &p->rdev; |
|---|
| 1871 | 1820 | else if (rdev == p->replacement) |
|---|
| .. | .. |
|---|
| 2137 | 2086 | tbio->bi_opf |= MD_FAILFAST; |
|---|
| 2138 | 2087 | tbio->bi_iter.bi_sector += conf->mirrors[d].rdev->data_offset; |
|---|
| 2139 | 2088 | bio_set_dev(tbio, conf->mirrors[d].rdev->bdev); |
|---|
| 2140 | | - generic_make_request(tbio); |
|---|
| 2089 | + submit_bio_noacct(tbio); |
|---|
| 2141 | 2090 | } |
|---|
| 2142 | 2091 | |
|---|
| 2143 | 2092 | /* Now write out to any replacement devices |
|---|
| .. | .. |
|---|
| 2156 | 2105 | atomic_inc(&r10_bio->remaining); |
|---|
| 2157 | 2106 | md_sync_acct(conf->mirrors[d].replacement->bdev, |
|---|
| 2158 | 2107 | bio_sectors(tbio)); |
|---|
| 2159 | | - generic_make_request(tbio); |
|---|
| 2108 | + submit_bio_noacct(tbio); |
|---|
| 2160 | 2109 | } |
|---|
| 2161 | 2110 | |
|---|
| 2162 | 2111 | done: |
|---|
| .. | .. |
|---|
| 2279 | 2228 | wbio = r10_bio->devs[1].bio; |
|---|
| 2280 | 2229 | wbio2 = r10_bio->devs[1].repl_bio; |
|---|
| 2281 | 2230 | /* Need to test wbio2->bi_end_io before we call |
|---|
| 2282 | | - * generic_make_request as if the former is NULL, |
|---|
| 2231 | + * submit_bio_noacct as if the former is NULL, |
|---|
| 2283 | 2232 | * the latter is free to free wbio2. |
|---|
| 2284 | 2233 | */ |
|---|
| 2285 | 2234 | if (wbio2 && !wbio2->bi_end_io) |
|---|
| .. | .. |
|---|
| 2287 | 2236 | if (wbio->bi_end_io) { |
|---|
| 2288 | 2237 | atomic_inc(&conf->mirrors[d].rdev->nr_pending); |
|---|
| 2289 | 2238 | md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(wbio)); |
|---|
| 2290 | | - generic_make_request(wbio); |
|---|
| 2239 | + submit_bio_noacct(wbio); |
|---|
| 2291 | 2240 | } |
|---|
| 2292 | 2241 | if (wbio2) { |
|---|
| 2293 | 2242 | atomic_inc(&conf->mirrors[d].replacement->nr_pending); |
|---|
| 2294 | 2243 | md_sync_acct(conf->mirrors[d].replacement->bdev, |
|---|
| 2295 | 2244 | bio_sectors(wbio2)); |
|---|
| 2296 | | - generic_make_request(wbio2); |
|---|
| 2245 | + submit_bio_noacct(wbio2); |
|---|
| 2297 | 2246 | } |
|---|
| 2298 | 2247 | } |
|---|
| 2299 | 2248 | |
|---|
| .. | .. |
|---|
| 2927 | 2876 | * a number of r10_bio structures, one for each out-of-sync device. |
|---|
| 2928 | 2877 | * As we setup these structures, we collect all bio's together into a list |
|---|
| 2929 | 2878 | * which we then process collectively to add pages, and then process again |
|---|
| 2930 | | - * to pass to generic_make_request. |
|---|
| 2879 | + * to pass to submit_bio_noacct. |
|---|
| 2931 | 2880 | * |
|---|
| 2932 | 2881 | * The r10_bio structures are linked using a borrowed master_bio pointer. |
|---|
| 2933 | 2882 | * This link is counted in ->remaining. When the r10_bio that points to NULL |
|---|
| .. | .. |
|---|
| 3084 | 3033 | sector_t sect; |
|---|
| 3085 | 3034 | int must_sync; |
|---|
| 3086 | 3035 | int any_working; |
|---|
| 3036 | + int need_recover = 0; |
|---|
| 3037 | + int need_replace = 0; |
|---|
| 3087 | 3038 | struct raid10_info *mirror = &conf->mirrors[i]; |
|---|
| 3088 | 3039 | struct md_rdev *mrdev, *mreplace; |
|---|
| 3089 | 3040 | |
|---|
| .. | .. |
|---|
| 3091 | 3042 | mrdev = rcu_dereference(mirror->rdev); |
|---|
| 3092 | 3043 | mreplace = rcu_dereference(mirror->replacement); |
|---|
| 3093 | 3044 | |
|---|
| 3094 | | - if ((mrdev == NULL || |
|---|
| 3095 | | - test_bit(Faulty, &mrdev->flags) || |
|---|
| 3096 | | - test_bit(In_sync, &mrdev->flags)) && |
|---|
| 3097 | | - (mreplace == NULL || |
|---|
| 3098 | | - test_bit(Faulty, &mreplace->flags))) { |
|---|
| 3045 | + if (mrdev != NULL && |
|---|
| 3046 | + !test_bit(Faulty, &mrdev->flags) && |
|---|
| 3047 | + !test_bit(In_sync, &mrdev->flags)) |
|---|
| 3048 | + need_recover = 1; |
|---|
| 3049 | + if (mreplace != NULL && |
|---|
| 3050 | + !test_bit(Faulty, &mreplace->flags)) |
|---|
| 3051 | + need_replace = 1; |
|---|
| 3052 | + |
|---|
| 3053 | + if (!need_recover && !need_replace) { |
|---|
| 3099 | 3054 | rcu_read_unlock(); |
|---|
| 3100 | 3055 | continue; |
|---|
| 3101 | 3056 | } |
|---|
| .. | .. |
|---|
| 3218 | 3173 | r10_bio->devs[1].devnum = i; |
|---|
| 3219 | 3174 | r10_bio->devs[1].addr = to_addr; |
|---|
| 3220 | 3175 | |
|---|
| 3221 | | - if (!test_bit(In_sync, &mrdev->flags)) { |
|---|
| 3176 | + if (need_recover) { |
|---|
| 3222 | 3177 | bio = r10_bio->devs[1].bio; |
|---|
| 3223 | 3178 | bio->bi_next = biolist; |
|---|
| 3224 | 3179 | biolist = bio; |
|---|
| .. | .. |
|---|
| 3235 | 3190 | bio = r10_bio->devs[1].repl_bio; |
|---|
| 3236 | 3191 | if (bio) |
|---|
| 3237 | 3192 | bio->bi_end_io = NULL; |
|---|
| 3238 | | - /* Note: if mreplace != NULL, then bio |
|---|
| 3193 | + /* Note: if need_replace, then bio |
|---|
| 3239 | 3194 | * cannot be NULL as r10buf_pool_alloc will |
|---|
| 3240 | 3195 | * have allocated it. |
|---|
| 3241 | | - * So the second test here is pointless. |
|---|
| 3242 | | - * But it keeps semantic-checkers happy, and |
|---|
| 3243 | | - * this comment keeps human reviewers |
|---|
| 3244 | | - * happy. |
|---|
| 3245 | 3196 | */ |
|---|
| 3246 | | - if (mreplace == NULL || bio == NULL || |
|---|
| 3247 | | - test_bit(Faulty, &mreplace->flags)) |
|---|
| 3197 | + if (!need_replace) |
|---|
| 3248 | 3198 | break; |
|---|
| 3249 | 3199 | bio->bi_next = biolist; |
|---|
| 3250 | 3200 | biolist = bio; |
|---|
| .. | .. |
|---|
| 3533 | 3483 | if (bio->bi_end_io == end_sync_read) { |
|---|
| 3534 | 3484 | md_sync_acct_bio(bio, nr_sectors); |
|---|
| 3535 | 3485 | bio->bi_status = 0; |
|---|
| 3536 | | - generic_make_request(bio); |
|---|
| 3486 | + submit_bio_noacct(bio); |
|---|
| 3537 | 3487 | } |
|---|
| 3538 | 3488 | } |
|---|
| 3539 | 3489 | |
|---|
| .. | .. |
|---|
| 3704 | 3654 | |
|---|
| 3705 | 3655 | conf->geo = geo; |
|---|
| 3706 | 3656 | conf->copies = copies; |
|---|
| 3707 | | - err = mempool_init(&conf->r10bio_pool, NR_RAID10_BIOS, r10bio_pool_alloc, |
|---|
| 3708 | | - r10bio_pool_free, conf); |
|---|
| 3657 | + err = mempool_init(&conf->r10bio_pool, NR_RAID_BIOS, r10bio_pool_alloc, |
|---|
| 3658 | + rbio_pool_free, conf); |
|---|
| 3709 | 3659 | if (err) |
|---|
| 3710 | 3660 | goto out; |
|---|
| 3711 | 3661 | |
|---|
| .. | .. |
|---|
| 3757 | 3707 | return ERR_PTR(err); |
|---|
| 3758 | 3708 | } |
|---|
| 3759 | 3709 | |
|---|
| 3710 | +static void raid10_set_io_opt(struct r10conf *conf) |
|---|
| 3711 | +{ |
|---|
| 3712 | + int raid_disks = conf->geo.raid_disks; |
|---|
| 3713 | + |
|---|
| 3714 | + if (!(conf->geo.raid_disks % conf->geo.near_copies)) |
|---|
| 3715 | + raid_disks /= conf->geo.near_copies; |
|---|
| 3716 | + blk_queue_io_opt(conf->mddev->queue, (conf->mddev->chunk_sectors << 9) * |
|---|
| 3717 | + raid_disks); |
|---|
| 3718 | +} |
|---|
| 3719 | + |
|---|
| 3760 | 3720 | static int raid10_run(struct mddev *mddev) |
|---|
| 3761 | 3721 | { |
|---|
| 3762 | 3722 | struct r10conf *conf; |
|---|
| 3763 | | - int i, disk_idx, chunk_size; |
|---|
| 3723 | + int i, disk_idx; |
|---|
| 3764 | 3724 | struct raid10_info *disk; |
|---|
| 3765 | 3725 | struct md_rdev *rdev; |
|---|
| 3766 | 3726 | sector_t size; |
|---|
| .. | .. |
|---|
| 3796 | 3756 | mddev->thread = conf->thread; |
|---|
| 3797 | 3757 | conf->thread = NULL; |
|---|
| 3798 | 3758 | |
|---|
| 3799 | | - chunk_size = mddev->chunk_sectors << 9; |
|---|
| 3800 | 3759 | if (mddev->queue) { |
|---|
| 3801 | 3760 | blk_queue_max_discard_sectors(mddev->queue, |
|---|
| 3802 | 3761 | mddev->chunk_sectors); |
|---|
| 3803 | 3762 | blk_queue_max_write_same_sectors(mddev->queue, 0); |
|---|
| 3804 | 3763 | blk_queue_max_write_zeroes_sectors(mddev->queue, 0); |
|---|
| 3805 | | - blk_queue_io_min(mddev->queue, chunk_size); |
|---|
| 3806 | | - if (conf->geo.raid_disks % conf->geo.near_copies) |
|---|
| 3807 | | - blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks); |
|---|
| 3808 | | - else |
|---|
| 3809 | | - blk_queue_io_opt(mddev->queue, chunk_size * |
|---|
| 3810 | | - (conf->geo.raid_disks / conf->geo.near_copies)); |
|---|
| 3764 | + blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9); |
|---|
| 3765 | + raid10_set_io_opt(conf); |
|---|
| 3811 | 3766 | } |
|---|
| 3812 | 3767 | |
|---|
| 3813 | 3768 | rdev_for_each(rdev, mddev) { |
|---|
| .. | .. |
|---|
| 3921 | 3876 | md_set_array_sectors(mddev, size); |
|---|
| 3922 | 3877 | mddev->resync_max_sectors = size; |
|---|
| 3923 | 3878 | set_bit(MD_FAILFAST_SUPPORTED, &mddev->flags); |
|---|
| 3924 | | - |
|---|
| 3925 | | - if (mddev->queue) { |
|---|
| 3926 | | - int stripe = conf->geo.raid_disks * |
|---|
| 3927 | | - ((mddev->chunk_sectors << 9) / PAGE_SIZE); |
|---|
| 3928 | | - |
|---|
| 3929 | | - /* Calculate max read-ahead size. |
|---|
| 3930 | | - * We need to readahead at least twice a whole stripe.... |
|---|
| 3931 | | - * maybe... |
|---|
| 3932 | | - */ |
|---|
| 3933 | | - stripe /= conf->geo.near_copies; |
|---|
| 3934 | | - if (mddev->queue->backing_dev_info->ra_pages < 2 * stripe) |
|---|
| 3935 | | - mddev->queue->backing_dev_info->ra_pages = 2 * stripe; |
|---|
| 3936 | | - } |
|---|
| 3937 | 3879 | |
|---|
| 3938 | 3880 | if (md_integrity_register(mddev)) |
|---|
| 3939 | 3881 | goto out_free_conf; |
|---|
| .. | .. |
|---|
| 4293 | 4235 | spin_unlock_irq(&conf->device_lock); |
|---|
| 4294 | 4236 | |
|---|
| 4295 | 4237 | if (mddev->delta_disks && mddev->bitmap) { |
|---|
| 4296 | | - ret = md_bitmap_resize(mddev->bitmap, |
|---|
| 4297 | | - raid10_size(mddev, 0, conf->geo.raid_disks), |
|---|
| 4298 | | - 0, 0); |
|---|
| 4238 | + struct mdp_superblock_1 *sb = NULL; |
|---|
| 4239 | + sector_t oldsize, newsize; |
|---|
| 4240 | + |
|---|
| 4241 | + oldsize = raid10_size(mddev, 0, 0); |
|---|
| 4242 | + newsize = raid10_size(mddev, 0, conf->geo.raid_disks); |
|---|
| 4243 | + |
|---|
| 4244 | + if (!mddev_is_clustered(mddev)) { |
|---|
| 4245 | + ret = md_bitmap_resize(mddev->bitmap, newsize, 0, 0); |
|---|
| 4246 | + if (ret) |
|---|
| 4247 | + goto abort; |
|---|
| 4248 | + else |
|---|
| 4249 | + goto out; |
|---|
| 4250 | + } |
|---|
| 4251 | + |
|---|
| 4252 | + rdev_for_each(rdev, mddev) { |
|---|
| 4253 | + if (rdev->raid_disk > -1 && |
|---|
| 4254 | + !test_bit(Faulty, &rdev->flags)) |
|---|
| 4255 | + sb = page_address(rdev->sb_page); |
|---|
| 4256 | + } |
|---|
| 4257 | + |
|---|
| 4258 | + /* |
|---|
| 4259 | + * some node is already performing reshape, and no need to |
|---|
| 4260 | + * call md_bitmap_resize again since it should be called when |
|---|
| 4261 | + * receiving BITMAP_RESIZE msg |
|---|
| 4262 | + */ |
|---|
| 4263 | + if ((sb && (le32_to_cpu(sb->feature_map) & |
|---|
| 4264 | + MD_FEATURE_RESHAPE_ACTIVE)) || (oldsize == newsize)) |
|---|
| 4265 | + goto out; |
|---|
| 4266 | + |
|---|
| 4267 | + ret = md_bitmap_resize(mddev->bitmap, newsize, 0, 0); |
|---|
| 4299 | 4268 | if (ret) |
|---|
| 4300 | 4269 | goto abort; |
|---|
| 4270 | + |
|---|
| 4271 | + ret = md_cluster_ops->resize_bitmaps(mddev, newsize, oldsize); |
|---|
| 4272 | + if (ret) { |
|---|
| 4273 | + md_bitmap_resize(mddev->bitmap, oldsize, 0, 0); |
|---|
| 4274 | + goto abort; |
|---|
| 4275 | + } |
|---|
| 4301 | 4276 | } |
|---|
| 4277 | +out: |
|---|
| 4302 | 4278 | if (mddev->delta_disks > 0) { |
|---|
| 4303 | 4279 | rdev_for_each(rdev, mddev) |
|---|
| 4304 | 4280 | if (rdev->raid_disk < 0 && |
|---|
| .. | .. |
|---|
| 4310 | 4286 | else |
|---|
| 4311 | 4287 | rdev->recovery_offset = 0; |
|---|
| 4312 | 4288 | |
|---|
| 4313 | | - if (sysfs_link_rdev(mddev, rdev)) |
|---|
| 4314 | | - /* Failure here is OK */; |
|---|
| 4289 | + /* Failure here is OK */ |
|---|
| 4290 | + sysfs_link_rdev(mddev, rdev); |
|---|
| 4315 | 4291 | } |
|---|
| 4316 | 4292 | } else if (rdev->raid_disk >= conf->prev.raid_disks |
|---|
| 4317 | 4293 | && !test_bit(Faulty, &rdev->flags)) { |
|---|
| .. | .. |
|---|
| 4457 | 4433 | sector_nr = conf->reshape_progress; |
|---|
| 4458 | 4434 | if (sector_nr) { |
|---|
| 4459 | 4435 | mddev->curr_resync_completed = sector_nr; |
|---|
| 4460 | | - sysfs_notify(&mddev->kobj, NULL, "sync_completed"); |
|---|
| 4436 | + sysfs_notify_dirent_safe(mddev->sysfs_completed); |
|---|
| 4461 | 4437 | *skipped = 1; |
|---|
| 4462 | 4438 | return sector_nr; |
|---|
| 4463 | 4439 | } |
|---|
| .. | .. |
|---|
| 4486 | 4462 | last = conf->reshape_progress - 1; |
|---|
| 4487 | 4463 | sector_nr = last & ~(sector_t)(conf->geo.chunk_mask |
|---|
| 4488 | 4464 | & conf->prev.chunk_mask); |
|---|
| 4489 | | - if (sector_nr + RESYNC_BLOCK_SIZE/512 < last) |
|---|
| 4490 | | - sector_nr = last + 1 - RESYNC_BLOCK_SIZE/512; |
|---|
| 4465 | + if (sector_nr + RESYNC_SECTORS < last) |
|---|
| 4466 | + sector_nr = last + 1 - RESYNC_SECTORS; |
|---|
| 4491 | 4467 | } else { |
|---|
| 4492 | 4468 | /* 'next' is after the last device address that we |
|---|
| 4493 | 4469 | * might write to for this chunk in the new layout |
|---|
| .. | .. |
|---|
| 4509 | 4485 | last = sector_nr | (conf->geo.chunk_mask |
|---|
| 4510 | 4486 | & conf->prev.chunk_mask); |
|---|
| 4511 | 4487 | |
|---|
| 4512 | | - if (sector_nr + RESYNC_BLOCK_SIZE/512 <= last) |
|---|
| 4513 | | - last = sector_nr + RESYNC_BLOCK_SIZE/512 - 1; |
|---|
| 4488 | + if (sector_nr + RESYNC_SECTORS <= last) |
|---|
| 4489 | + last = sector_nr + RESYNC_SECTORS - 1; |
|---|
| 4514 | 4490 | } |
|---|
| 4515 | 4491 | |
|---|
| 4516 | 4492 | if (need_flush || |
|---|
| .. | .. |
|---|
| 4575 | 4551 | r10_bio->master_bio = read_bio; |
|---|
| 4576 | 4552 | r10_bio->read_slot = r10_bio->devs[r10_bio->read_slot].devnum; |
|---|
| 4577 | 4553 | |
|---|
| 4554 | + /* |
|---|
| 4555 | + * Broadcast RESYNC message to other nodes, so all nodes would not |
|---|
| 4556 | + * write to the region to avoid conflict. |
|---|
| 4557 | + */ |
|---|
| 4558 | + if (mddev_is_clustered(mddev) && conf->cluster_sync_high <= sector_nr) { |
|---|
| 4559 | + struct mdp_superblock_1 *sb = NULL; |
|---|
| 4560 | + int sb_reshape_pos = 0; |
|---|
| 4561 | + |
|---|
| 4562 | + conf->cluster_sync_low = sector_nr; |
|---|
| 4563 | + conf->cluster_sync_high = sector_nr + CLUSTER_RESYNC_WINDOW_SECTORS; |
|---|
| 4564 | + sb = page_address(rdev->sb_page); |
|---|
| 4565 | + if (sb) { |
|---|
| 4566 | + sb_reshape_pos = le64_to_cpu(sb->reshape_position); |
|---|
| 4567 | + /* |
|---|
| 4568 | + * Set cluster_sync_low again if next address for array |
|---|
| 4569 | + * reshape is less than cluster_sync_low. Since we can't |
|---|
| 4570 | + * update cluster_sync_low until it has finished reshape. |
|---|
| 4571 | + */ |
|---|
| 4572 | + if (sb_reshape_pos < conf->cluster_sync_low) |
|---|
| 4573 | + conf->cluster_sync_low = sb_reshape_pos; |
|---|
| 4574 | + } |
|---|
| 4575 | + |
|---|
| 4576 | + md_cluster_ops->resync_info_update(mddev, conf->cluster_sync_low, |
|---|
| 4577 | + conf->cluster_sync_high); |
|---|
| 4578 | + } |
|---|
| 4579 | + |
|---|
| 4578 | 4580 | /* Now find the locations in the new layout */ |
|---|
| 4579 | 4581 | __raid10_find_phys(&conf->geo, r10_bio); |
|---|
| 4580 | 4582 | |
|---|
| .. | .. |
|---|
| 4631 | 4633 | md_sync_acct_bio(read_bio, r10_bio->sectors); |
|---|
| 4632 | 4634 | atomic_inc(&r10_bio->remaining); |
|---|
| 4633 | 4635 | read_bio->bi_next = NULL; |
|---|
| 4634 | | - generic_make_request(read_bio); |
|---|
| 4636 | + submit_bio_noacct(read_bio); |
|---|
| 4635 | 4637 | sectors_done += nr_sectors; |
|---|
| 4636 | 4638 | if (sector_nr <= last) |
|---|
| 4637 | 4639 | goto read_more; |
|---|
| .. | .. |
|---|
| 4694 | 4696 | md_sync_acct_bio(b, r10_bio->sectors); |
|---|
| 4695 | 4697 | atomic_inc(&r10_bio->remaining); |
|---|
| 4696 | 4698 | b->bi_next = NULL; |
|---|
| 4697 | | - generic_make_request(b); |
|---|
| 4699 | + submit_bio_noacct(b); |
|---|
| 4698 | 4700 | } |
|---|
| 4699 | 4701 | end_reshape_request(r10_bio); |
|---|
| 4700 | 4702 | } |
|---|
| .. | .. |
|---|
| 4712 | 4714 | conf->reshape_safe = MaxSector; |
|---|
| 4713 | 4715 | spin_unlock_irq(&conf->device_lock); |
|---|
| 4714 | 4716 | |
|---|
| 4715 | | - /* read-ahead size must cover two whole stripes, which is |
|---|
| 4716 | | - * 2 * (datadisks) * chunksize where 'n' is the number of raid devices |
|---|
| 4717 | | - */ |
|---|
| 4718 | | - if (conf->mddev->queue) { |
|---|
| 4719 | | - int stripe = conf->geo.raid_disks * |
|---|
| 4720 | | - ((conf->mddev->chunk_sectors << 9) / PAGE_SIZE); |
|---|
| 4721 | | - stripe /= conf->geo.near_copies; |
|---|
| 4722 | | - if (conf->mddev->queue->backing_dev_info->ra_pages < 2 * stripe) |
|---|
| 4723 | | - conf->mddev->queue->backing_dev_info->ra_pages = 2 * stripe; |
|---|
| 4724 | | - } |
|---|
| 4717 | + if (conf->mddev->queue) |
|---|
| 4718 | + raid10_set_io_opt(conf); |
|---|
| 4725 | 4719 | conf->fullsync = 0; |
|---|
| 4720 | +} |
|---|
| 4721 | + |
|---|
| 4722 | +static void raid10_update_reshape_pos(struct mddev *mddev) |
|---|
| 4723 | +{ |
|---|
| 4724 | + struct r10conf *conf = mddev->private; |
|---|
| 4725 | + sector_t lo, hi; |
|---|
| 4726 | + |
|---|
| 4727 | + md_cluster_ops->resync_info_get(mddev, &lo, &hi); |
|---|
| 4728 | + if (((mddev->reshape_position <= hi) && (mddev->reshape_position >= lo)) |
|---|
| 4729 | + || mddev->reshape_position == MaxSector) |
|---|
| 4730 | + conf->reshape_progress = mddev->reshape_position; |
|---|
| 4731 | + else |
|---|
| 4732 | + WARN_ON_ONCE(1); |
|---|
| 4726 | 4733 | } |
|---|
| 4727 | 4734 | |
|---|
| 4728 | 4735 | static int handle_reshape_read_error(struct mddev *mddev, |
|---|
| .. | .. |
|---|
| 4736 | 4743 | int idx = 0; |
|---|
| 4737 | 4744 | struct page **pages; |
|---|
| 4738 | 4745 | |
|---|
| 4739 | | - r10b = kmalloc(sizeof(*r10b) + |
|---|
| 4740 | | - sizeof(struct r10dev) * conf->copies, GFP_NOIO); |
|---|
| 4746 | + r10b = kmalloc(struct_size(r10b, devs, conf->copies), GFP_NOIO); |
|---|
| 4741 | 4747 | if (!r10b) { |
|---|
| 4742 | 4748 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); |
|---|
| 4743 | 4749 | return -ENOMEM; |
|---|
| .. | .. |
|---|
| 4893 | 4899 | .check_reshape = raid10_check_reshape, |
|---|
| 4894 | 4900 | .start_reshape = raid10_start_reshape, |
|---|
| 4895 | 4901 | .finish_reshape = raid10_finish_reshape, |
|---|
| 4896 | | - .congested = raid10_congested, |
|---|
| 4902 | + .update_reshape_pos = raid10_update_reshape_pos, |
|---|
| 4897 | 4903 | }; |
|---|
| 4898 | 4904 | |
|---|
| 4899 | 4905 | static int __init raid_init(void) |
|---|