.. | .. |
---|
129 | 129 | CTR_FLAG_RAID10_COPIES | \ |
---|
130 | 130 | CTR_FLAG_RAID10_FORMAT | \ |
---|
131 | 131 | CTR_FLAG_DELTA_DISKS | \ |
---|
132 | | - CTR_FLAG_DATA_OFFSET) |
---|
| 132 | + CTR_FLAG_DATA_OFFSET | \ |
---|
| 133 | + CTR_FLAG_JOURNAL_DEV | \ |
---|
| 134 | + CTR_FLAG_JOURNAL_MODE) |
---|
133 | 135 | |
---|
134 | 136 | /* Valid options definitions per raid level... */ |
---|
135 | 137 | |
---|
.. | .. |
---|
209 | 211 | #define RT_FLAG_RS_SUSPENDED 5 |
---|
210 | 212 | #define RT_FLAG_RS_IN_SYNC 6 |
---|
211 | 213 | #define RT_FLAG_RS_RESYNCING 7 |
---|
| 214 | +#define RT_FLAG_RS_GROW 8 |
---|
212 | 215 | |
---|
213 | 216 | /* Array elements of 64 bit needed for rebuild/failed disk bits */ |
---|
214 | 217 | #define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8) |
---|
.. | .. |
---|
239 | 242 | |
---|
240 | 243 | struct mddev md; |
---|
241 | 244 | struct raid_type *raid_type; |
---|
242 | | - struct dm_target_callbacks callbacks; |
---|
| 245 | + |
---|
| 246 | + sector_t array_sectors; |
---|
| 247 | + sector_t dev_sectors; |
---|
243 | 248 | |
---|
244 | 249 | /* Optional raid4/5/6 journal device */ |
---|
245 | 250 | struct journal_dev { |
---|
.. | .. |
---|
248 | 253 | int mode; |
---|
249 | 254 | } journal_dev; |
---|
250 | 255 | |
---|
251 | | - struct raid_dev dev[0]; |
---|
| 256 | + struct raid_dev dev[]; |
---|
252 | 257 | }; |
---|
253 | 258 | |
---|
254 | 259 | static void rs_config_backup(struct raid_set *rs, struct rs_layout *l) |
---|
.. | .. |
---|
616 | 621 | |
---|
617 | 622 | } else if (algorithm == ALGORITHM_RAID10_FAR) { |
---|
618 | 623 | f = copies; |
---|
619 | | - r = !RAID10_OFFSET; |
---|
620 | 624 | if (!test_bit(__CTR_FLAG_RAID10_USE_NEAR_SETS, &rs->ctr_flags)) |
---|
621 | 625 | r |= RAID10_USE_FAR_SETS; |
---|
622 | 626 | |
---|
.. | .. |
---|
697 | 701 | struct gendisk *gendisk = dm_disk(dm_table_get_md(rs->ti->table)); |
---|
698 | 702 | |
---|
699 | 703 | set_capacity(gendisk, rs->md.array_sectors); |
---|
700 | | - revalidate_disk(gendisk); |
---|
| 704 | + revalidate_disk_size(gendisk, true); |
---|
701 | 705 | } |
---|
702 | 706 | |
---|
703 | 707 | /* |
---|
.. | .. |
---|
998 | 1002 | static int validate_raid_redundancy(struct raid_set *rs) |
---|
999 | 1003 | { |
---|
1000 | 1004 | unsigned int i, rebuild_cnt = 0; |
---|
1001 | | - unsigned int rebuilds_per_group = 0, copies; |
---|
| 1005 | + unsigned int rebuilds_per_group = 0, copies, raid_disks; |
---|
1002 | 1006 | unsigned int group_size, last_group_start; |
---|
1003 | 1007 | |
---|
1004 | | - for (i = 0; i < rs->md.raid_disks; i++) |
---|
1005 | | - if (!test_bit(In_sync, &rs->dev[i].rdev.flags) || |
---|
1006 | | - !rs->dev[i].rdev.sb_page) |
---|
| 1008 | + for (i = 0; i < rs->raid_disks; i++) |
---|
| 1009 | + if (!test_bit(FirstUse, &rs->dev[i].rdev.flags) && |
---|
| 1010 | + ((!test_bit(In_sync, &rs->dev[i].rdev.flags) || |
---|
| 1011 | + !rs->dev[i].rdev.sb_page))) |
---|
1007 | 1012 | rebuild_cnt++; |
---|
1008 | 1013 | |
---|
1009 | 1014 | switch (rs->md.level) { |
---|
.. | .. |
---|
1043 | 1048 | * A A B B C |
---|
1044 | 1049 | * C D D E E |
---|
1045 | 1050 | */ |
---|
| 1051 | + raid_disks = min(rs->raid_disks, rs->md.raid_disks); |
---|
1046 | 1052 | if (__is_raid10_near(rs->md.new_layout)) { |
---|
1047 | | - for (i = 0; i < rs->md.raid_disks; i++) { |
---|
| 1053 | + for (i = 0; i < raid_disks; i++) { |
---|
1048 | 1054 | if (!(i % copies)) |
---|
1049 | 1055 | rebuilds_per_group = 0; |
---|
1050 | 1056 | if ((!rs->dev[i].rdev.sb_page || |
---|
.. | .. |
---|
1067 | 1073 | * results in the need to treat the last (potentially larger) |
---|
1068 | 1074 | * set differently. |
---|
1069 | 1075 | */ |
---|
1070 | | - group_size = (rs->md.raid_disks / copies); |
---|
1071 | | - last_group_start = (rs->md.raid_disks / group_size) - 1; |
---|
| 1076 | + group_size = (raid_disks / copies); |
---|
| 1077 | + last_group_start = (raid_disks / group_size) - 1; |
---|
1072 | 1078 | last_group_start *= group_size; |
---|
1073 | | - for (i = 0; i < rs->md.raid_disks; i++) { |
---|
| 1079 | + for (i = 0; i < raid_disks; i++) { |
---|
1074 | 1080 | if (!(i % copies) && !(i > last_group_start)) |
---|
1075 | 1081 | rebuilds_per_group = 0; |
---|
1076 | 1082 | if ((!rs->dev[i].rdev.sb_page || |
---|
.. | .. |
---|
1585 | 1591 | { |
---|
1586 | 1592 | int i; |
---|
1587 | 1593 | |
---|
1588 | | - for (i = 0; i < rs->md.raid_disks; i++) { |
---|
| 1594 | + for (i = 0; i < rs->raid_disks; i++) { |
---|
1589 | 1595 | struct md_rdev *rdev = &rs->dev[i].rdev; |
---|
1590 | 1596 | |
---|
1591 | 1597 | if (!test_bit(Journal, &rdev->flags) && |
---|
.. | .. |
---|
1615 | 1621 | } |
---|
1616 | 1622 | |
---|
1617 | 1623 | /* Calculate the sectors per device and per array used for @rs */ |
---|
1618 | | -static int rs_set_dev_and_array_sectors(struct raid_set *rs, bool use_mddev) |
---|
| 1624 | +static int rs_set_dev_and_array_sectors(struct raid_set *rs, sector_t sectors, bool use_mddev) |
---|
1619 | 1625 | { |
---|
1620 | 1626 | int delta_disks; |
---|
1621 | 1627 | unsigned int data_stripes; |
---|
| 1628 | + sector_t array_sectors = sectors, dev_sectors = sectors; |
---|
1622 | 1629 | struct mddev *mddev = &rs->md; |
---|
1623 | | - struct md_rdev *rdev; |
---|
1624 | | - sector_t array_sectors = rs->ti->len, dev_sectors = rs->ti->len; |
---|
1625 | 1630 | |
---|
1626 | 1631 | if (use_mddev) { |
---|
1627 | 1632 | delta_disks = mddev->delta_disks; |
---|
.. | .. |
---|
1656 | 1661 | /* Striped layouts */ |
---|
1657 | 1662 | array_sectors = (data_stripes + delta_disks) * dev_sectors; |
---|
1658 | 1663 | |
---|
1659 | | - rdev_for_each(rdev, mddev) |
---|
1660 | | - if (!test_bit(Journal, &rdev->flags)) |
---|
1661 | | - rdev->sectors = dev_sectors; |
---|
1662 | | - |
---|
1663 | 1664 | mddev->array_sectors = array_sectors; |
---|
1664 | 1665 | mddev->dev_sectors = dev_sectors; |
---|
| 1666 | + rs_set_rdev_sectors(rs); |
---|
1665 | 1667 | |
---|
1666 | 1668 | return _check_data_dev_sectors(rs); |
---|
1667 | 1669 | bad: |
---|
.. | .. |
---|
1670 | 1672 | } |
---|
1671 | 1673 | |
---|
1672 | 1674 | /* Setup recovery on @rs */ |
---|
1673 | | -static void __rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors) |
---|
| 1675 | +static void rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors) |
---|
1674 | 1676 | { |
---|
1675 | 1677 | /* raid0 does not recover */ |
---|
1676 | 1678 | if (rs_is_raid0(rs)) |
---|
.. | .. |
---|
1691 | 1693 | ? MaxSector : dev_sectors; |
---|
1692 | 1694 | } |
---|
1693 | 1695 | |
---|
1694 | | -/* Setup recovery on @rs based on raid type, device size and 'nosync' flag */ |
---|
1695 | | -static void rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors) |
---|
1696 | | -{ |
---|
1697 | | - if (!dev_sectors) |
---|
1698 | | - /* New raid set or 'sync' flag provided */ |
---|
1699 | | - __rs_setup_recovery(rs, 0); |
---|
1700 | | - else if (dev_sectors == MaxSector) |
---|
1701 | | - /* Prevent recovery */ |
---|
1702 | | - __rs_setup_recovery(rs, MaxSector); |
---|
1703 | | - else if (__rdev_sectors(rs) < dev_sectors) |
---|
1704 | | - /* Grown raid set */ |
---|
1705 | | - __rs_setup_recovery(rs, __rdev_sectors(rs)); |
---|
1706 | | - else |
---|
1707 | | - __rs_setup_recovery(rs, MaxSector); |
---|
1708 | | -} |
---|
1709 | | - |
---|
1710 | 1696 | static void do_table_event(struct work_struct *ws) |
---|
1711 | 1697 | { |
---|
1712 | 1698 | struct raid_set *rs = container_of(ws, struct raid_set, md.event_work); |
---|
.. | .. |
---|
1718 | 1704 | rs_set_capacity(rs); |
---|
1719 | 1705 | } |
---|
1720 | 1706 | dm_table_event(rs->ti->table); |
---|
1721 | | -} |
---|
1722 | | - |
---|
1723 | | -static int raid_is_congested(struct dm_target_callbacks *cb, int bits) |
---|
1724 | | -{ |
---|
1725 | | - struct raid_set *rs = container_of(cb, struct raid_set, callbacks); |
---|
1726 | | - |
---|
1727 | | - return mddev_congested(&rs->md, bits); |
---|
1728 | 1707 | } |
---|
1729 | 1708 | |
---|
1730 | 1709 | /* |
---|
.. | .. |
---|
2366 | 2345 | |
---|
2367 | 2346 | if (new_devs == rs->raid_disks || !rebuilds) { |
---|
2368 | 2347 | /* Replace a broken device */ |
---|
2369 | | - if (new_devs == 1 && !rs->delta_disks) |
---|
2370 | | - ; |
---|
2371 | 2348 | if (new_devs == rs->raid_disks) { |
---|
2372 | 2349 | DMINFO("Superblocks created for new raid set"); |
---|
2373 | 2350 | set_bit(MD_ARRAY_FIRST_USE, &mddev->flags); |
---|
.. | .. |
---|
2480 | 2457 | return -EINVAL; |
---|
2481 | 2458 | } |
---|
2482 | 2459 | |
---|
2483 | | - /* Enable bitmap creation for RAID levels != 0 */ |
---|
| 2460 | + /* Enable bitmap creation on @rs unless no metadevs or raid0 or journaled raid4/5/6 set. */ |
---|
2484 | 2461 | mddev->bitmap_info.offset = (rt_is_raid0(rs->raid_type) || rs->journal_dev.dev) ? 0 : to_sector(4096); |
---|
2485 | 2462 | mddev->bitmap_info.default_offset = mddev->bitmap_info.offset; |
---|
2486 | 2463 | |
---|
.. | .. |
---|
2917 | 2894 | |
---|
2918 | 2895 | /* Remove disk(s) */ |
---|
2919 | 2896 | } else if (rs->delta_disks < 0) { |
---|
2920 | | - r = rs_set_dev_and_array_sectors(rs, true); |
---|
| 2897 | + r = rs_set_dev_and_array_sectors(rs, rs->ti->len, true); |
---|
2921 | 2898 | mddev->reshape_backwards = 1; /* removing disk(s) -> backward reshape */ |
---|
2922 | 2899 | |
---|
2923 | 2900 | /* Change layout and/or chunk size */ |
---|
.. | .. |
---|
3006 | 2983 | } |
---|
3007 | 2984 | } |
---|
3008 | 2985 | |
---|
3009 | | - /* |
---|
3010 | | - * RAID1 and RAID10 personalities require bio splitting, |
---|
3011 | | - * RAID0/4/5/6 don't and process large discard bios properly. |
---|
3012 | | - */ |
---|
3013 | | - ti->split_discard_bios = !!(rs_is_raid1(rs) || rs_is_raid10(rs)); |
---|
3014 | 2986 | ti->num_discard_bios = 1; |
---|
3015 | 2987 | } |
---|
3016 | 2988 | |
---|
.. | .. |
---|
3033 | 3005 | bool resize = false; |
---|
3034 | 3006 | struct raid_type *rt; |
---|
3035 | 3007 | unsigned int num_raid_params, num_raid_devs; |
---|
3036 | | - sector_t calculated_dev_sectors, rdev_sectors, reshape_sectors; |
---|
| 3008 | + sector_t sb_array_sectors, rdev_sectors, reshape_sectors; |
---|
3037 | 3009 | struct raid_set *rs = NULL; |
---|
3038 | 3010 | const char *arg; |
---|
3039 | 3011 | struct rs_layout rs_layout; |
---|
.. | .. |
---|
3043 | 3015 | { 1, 254, "Cannot understand number of raid devices parameters" } |
---|
3044 | 3016 | }; |
---|
3045 | 3017 | |
---|
3046 | | - /* Must have <raid_type> */ |
---|
3047 | 3018 | arg = dm_shift_arg(&as); |
---|
3048 | 3019 | if (!arg) { |
---|
3049 | 3020 | ti->error = "No arguments"; |
---|
.. | .. |
---|
3092 | 3063 | * |
---|
3093 | 3064 | * Any existing superblock will overwrite the array and device sizes |
---|
3094 | 3065 | */ |
---|
3095 | | - r = rs_set_dev_and_array_sectors(rs, false); |
---|
| 3066 | + r = rs_set_dev_and_array_sectors(rs, rs->ti->len, false); |
---|
3096 | 3067 | if (r) |
---|
3097 | 3068 | goto bad; |
---|
3098 | 3069 | |
---|
3099 | | - calculated_dev_sectors = rs->md.dev_sectors; |
---|
| 3070 | + /* Memorize just calculated, potentially larger sizes to grow the raid set in preresume */ |
---|
| 3071 | + rs->array_sectors = rs->md.array_sectors; |
---|
| 3072 | + rs->dev_sectors = rs->md.dev_sectors; |
---|
3100 | 3073 | |
---|
3101 | 3074 | /* |
---|
3102 | 3075 | * Backup any new raid set level, layout, ... |
---|
.. | .. |
---|
3109 | 3082 | if (r) |
---|
3110 | 3083 | goto bad; |
---|
3111 | 3084 | |
---|
| 3085 | + /* All in-core metadata now as of current superblocks after calling analyse_superblocks() */ |
---|
| 3086 | + sb_array_sectors = rs->md.array_sectors; |
---|
3112 | 3087 | rdev_sectors = __rdev_sectors(rs); |
---|
3113 | 3088 | if (!rdev_sectors) { |
---|
3114 | 3089 | ti->error = "Invalid rdev size"; |
---|
.. | .. |
---|
3118 | 3093 | |
---|
3119 | 3094 | |
---|
3120 | 3095 | reshape_sectors = _get_reshape_sectors(rs); |
---|
3121 | | - if (calculated_dev_sectors != rdev_sectors) |
---|
3122 | | - resize = calculated_dev_sectors != (reshape_sectors ? rdev_sectors - reshape_sectors : rdev_sectors); |
---|
| 3096 | + if (rs->dev_sectors != rdev_sectors) { |
---|
| 3097 | + resize = (rs->dev_sectors != rdev_sectors - reshape_sectors); |
---|
| 3098 | + if (rs->dev_sectors > rdev_sectors - reshape_sectors) |
---|
| 3099 | + set_bit(RT_FLAG_RS_GROW, &rs->runtime_flags); |
---|
| 3100 | + } |
---|
3123 | 3101 | |
---|
3124 | 3102 | INIT_WORK(&rs->md.event_work, do_table_event); |
---|
3125 | 3103 | ti->private = rs; |
---|
.. | .. |
---|
3146 | 3124 | set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); |
---|
3147 | 3125 | rs_set_new(rs); |
---|
3148 | 3126 | } else if (rs_is_recovering(rs)) { |
---|
3149 | | - /* Rebuild particular devices */ |
---|
3150 | | - if (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags)) { |
---|
3151 | | - set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); |
---|
3152 | | - rs_setup_recovery(rs, MaxSector); |
---|
3153 | | - } |
---|
3154 | 3127 | /* A recovering raid set may be resized */ |
---|
3155 | | - ; /* skip setup rs */ |
---|
| 3128 | + goto size_check; |
---|
3156 | 3129 | } else if (rs_is_reshaping(rs)) { |
---|
3157 | 3130 | /* Have to reject size change request during reshape */ |
---|
3158 | 3131 | if (resize) { |
---|
.. | .. |
---|
3196 | 3169 | rs_setup_recovery(rs, MaxSector); |
---|
3197 | 3170 | rs_set_new(rs); |
---|
3198 | 3171 | } else if (rs_reshape_requested(rs)) { |
---|
| 3172 | + /* Only request grow on raid set size extensions, not on reshapes. */ |
---|
| 3173 | + clear_bit(RT_FLAG_RS_GROW, &rs->runtime_flags); |
---|
| 3174 | + |
---|
3199 | 3175 | /* |
---|
3200 | 3176 | * No need to check for 'ongoing' takeover here, because takeover |
---|
3201 | 3177 | * is an instant operation as oposed to an ongoing reshape. |
---|
.. | .. |
---|
3226 | 3202 | } |
---|
3227 | 3203 | rs_set_cur(rs); |
---|
3228 | 3204 | } else { |
---|
| 3205 | +size_check: |
---|
3229 | 3206 | /* May not set recovery when a device rebuild is requested */ |
---|
3230 | 3207 | if (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags)) { |
---|
3231 | | - rs_setup_recovery(rs, MaxSector); |
---|
| 3208 | + clear_bit(RT_FLAG_RS_GROW, &rs->runtime_flags); |
---|
3232 | 3209 | set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); |
---|
3233 | | - } else |
---|
3234 | | - rs_setup_recovery(rs, test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags) ? |
---|
3235 | | - 0 : (resize ? calculated_dev_sectors : MaxSector)); |
---|
| 3210 | + rs_setup_recovery(rs, MaxSector); |
---|
| 3211 | + } else if (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags)) { |
---|
| 3212 | + /* |
---|
| 3213 | + * Set raid set to current size, i.e. size as of |
---|
| 3214 | + * superblocks to grow to larger size in preresume. |
---|
| 3215 | + */ |
---|
| 3216 | + r = rs_set_dev_and_array_sectors(rs, sb_array_sectors, false); |
---|
| 3217 | + if (r) |
---|
| 3218 | + goto bad; |
---|
| 3219 | + |
---|
| 3220 | + rs_setup_recovery(rs, rs->md.recovery_cp < rs->md.dev_sectors ? rs->md.recovery_cp : rs->md.dev_sectors); |
---|
| 3221 | + } else { |
---|
| 3222 | + /* This is no size change or it is shrinking, update size and record in superblocks */ |
---|
| 3223 | + r = rs_set_dev_and_array_sectors(rs, rs->ti->len, false); |
---|
| 3224 | + if (r) |
---|
| 3225 | + goto bad; |
---|
| 3226 | + |
---|
| 3227 | + if (sb_array_sectors > rs->array_sectors) |
---|
| 3228 | + set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); |
---|
| 3229 | + } |
---|
3236 | 3230 | rs_set_cur(rs); |
---|
3237 | 3231 | } |
---|
3238 | 3232 | |
---|
.. | .. |
---|
3264 | 3258 | r = md_start(&rs->md); |
---|
3265 | 3259 | if (r) { |
---|
3266 | 3260 | ti->error = "Failed to start raid array"; |
---|
3267 | | - mddev_unlock(&rs->md); |
---|
3268 | | - goto bad_md_start; |
---|
| 3261 | + goto bad_unlock; |
---|
3269 | 3262 | } |
---|
3270 | | - |
---|
3271 | | - rs->callbacks.congested_fn = raid_is_congested; |
---|
3272 | | - dm_table_add_target_callbacks(ti->table, &rs->callbacks); |
---|
3273 | 3263 | |
---|
3274 | 3264 | /* If raid4/5/6 journal mode explicitly requested (only possible with journal dev) -> set it */ |
---|
3275 | 3265 | if (test_bit(__CTR_FLAG_JOURNAL_MODE, &rs->ctr_flags)) { |
---|
3276 | 3266 | r = r5c_journal_mode_set(&rs->md, rs->journal_dev.mode); |
---|
3277 | 3267 | if (r) { |
---|
3278 | 3268 | ti->error = "Failed to set raid4/5/6 journal mode"; |
---|
3279 | | - mddev_unlock(&rs->md); |
---|
3280 | | - goto bad_journal_mode_set; |
---|
| 3269 | + goto bad_unlock; |
---|
3281 | 3270 | } |
---|
3282 | 3271 | } |
---|
3283 | 3272 | |
---|
.. | .. |
---|
3288 | 3277 | if (rs_is_raid456(rs)) { |
---|
3289 | 3278 | r = rs_set_raid456_stripe_cache(rs); |
---|
3290 | 3279 | if (r) |
---|
3291 | | - goto bad_stripe_cache; |
---|
| 3280 | + goto bad_unlock; |
---|
3292 | 3281 | } |
---|
3293 | 3282 | |
---|
3294 | 3283 | /* Now do an early reshape check */ |
---|
3295 | 3284 | if (test_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags)) { |
---|
3296 | 3285 | r = rs_check_reshape(rs); |
---|
3297 | 3286 | if (r) |
---|
3298 | | - goto bad_check_reshape; |
---|
| 3287 | + goto bad_unlock; |
---|
3299 | 3288 | |
---|
3300 | 3289 | /* Restore new, ctr requested layout to perform check */ |
---|
3301 | 3290 | rs_config_restore(rs, &rs_layout); |
---|
.. | .. |
---|
3304 | 3293 | r = rs->md.pers->check_reshape(&rs->md); |
---|
3305 | 3294 | if (r) { |
---|
3306 | 3295 | ti->error = "Reshape check failed"; |
---|
3307 | | - goto bad_check_reshape; |
---|
| 3296 | + goto bad_unlock; |
---|
3308 | 3297 | } |
---|
3309 | 3298 | } |
---|
3310 | 3299 | } |
---|
.. | .. |
---|
3315 | 3304 | mddev_unlock(&rs->md); |
---|
3316 | 3305 | return 0; |
---|
3317 | 3306 | |
---|
3318 | | -bad_md_start: |
---|
3319 | | -bad_journal_mode_set: |
---|
3320 | | -bad_stripe_cache: |
---|
3321 | | -bad_check_reshape: |
---|
| 3307 | +bad_unlock: |
---|
3322 | 3308 | md_stop(&rs->md); |
---|
| 3309 | + mddev_unlock(&rs->md); |
---|
3323 | 3310 | bad: |
---|
3324 | 3311 | raid_set_free(rs); |
---|
3325 | 3312 | |
---|
.. | .. |
---|
3330 | 3317 | { |
---|
3331 | 3318 | struct raid_set *rs = ti->private; |
---|
3332 | 3319 | |
---|
3333 | | - list_del_init(&rs->callbacks.list); |
---|
| 3320 | + mddev_lock_nointr(&rs->md); |
---|
3334 | 3321 | md_stop(&rs->md); |
---|
| 3322 | + mddev_unlock(&rs->md); |
---|
3335 | 3323 | raid_set_free(rs); |
---|
3336 | 3324 | } |
---|
3337 | 3325 | |
---|
.. | .. |
---|
3433 | 3421 | |
---|
3434 | 3422 | /* Helper to return resync/reshape progress for @rs and runtime flags for raid set in sync / resynching */ |
---|
3435 | 3423 | static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery, |
---|
3436 | | - sector_t resync_max_sectors) |
---|
| 3424 | + enum sync_state state, sector_t resync_max_sectors) |
---|
3437 | 3425 | { |
---|
3438 | 3426 | sector_t r; |
---|
3439 | | - enum sync_state state; |
---|
3440 | 3427 | struct mddev *mddev = &rs->md; |
---|
3441 | 3428 | |
---|
3442 | 3429 | clear_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags); |
---|
.. | .. |
---|
3447 | 3434 | set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags); |
---|
3448 | 3435 | |
---|
3449 | 3436 | } else { |
---|
3450 | | - state = decipher_sync_action(mddev, recovery); |
---|
3451 | | - |
---|
3452 | 3437 | if (state == st_idle && !test_bit(MD_RECOVERY_INTR, &recovery)) |
---|
3453 | 3438 | r = mddev->recovery_cp; |
---|
3454 | 3439 | else |
---|
.. | .. |
---|
3466 | 3451 | /* |
---|
3467 | 3452 | * In case we are recovering, the array is not in sync |
---|
3468 | 3453 | * and health chars should show the recovering legs. |
---|
| 3454 | + * |
---|
| 3455 | + * Already retrieved recovery offset from curr_resync_completed above. |
---|
3469 | 3456 | */ |
---|
3470 | 3457 | ; |
---|
3471 | | - else if (state == st_resync) |
---|
| 3458 | + |
---|
| 3459 | + else if (state == st_resync || state == st_reshape) |
---|
3472 | 3460 | /* |
---|
3473 | | - * If "resync" is occurring, the raid set |
---|
3474 | | - * is or may be out of sync hence the health |
---|
3475 | | - * characters shall be 'a'. |
---|
3476 | | - */ |
---|
3477 | | - set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags); |
---|
3478 | | - else if (state == st_reshape) |
---|
3479 | | - /* |
---|
3480 | | - * If "reshape" is occurring, the raid set |
---|
| 3461 | + * If "resync/reshape" is occurring, the raid set |
---|
3481 | 3462 | * is or may be out of sync hence the health |
---|
3482 | 3463 | * characters shall be 'a'. |
---|
3483 | 3464 | */ |
---|
.. | .. |
---|
3491 | 3472 | */ |
---|
3492 | 3473 | set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags); |
---|
3493 | 3474 | |
---|
3494 | | - else { |
---|
3495 | | - struct md_rdev *rdev; |
---|
3496 | | - |
---|
| 3475 | + else if (test_bit(MD_RECOVERY_NEEDED, &recovery)) |
---|
3497 | 3476 | /* |
---|
3498 | 3477 | * We are idle and recovery is needed, prevent 'A' chars race |
---|
3499 | 3478 | * caused by components still set to in-sync by constructor. |
---|
3500 | 3479 | */ |
---|
3501 | | - if (test_bit(MD_RECOVERY_NEEDED, &recovery)) |
---|
3502 | | - set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags); |
---|
| 3480 | + set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags); |
---|
3503 | 3481 | |
---|
| 3482 | + else { |
---|
3504 | 3483 | /* |
---|
3505 | | - * The raid set may be doing an initial sync, or it may |
---|
3506 | | - * be rebuilding individual components. If all the |
---|
3507 | | - * devices are In_sync, then it is the raid set that is |
---|
3508 | | - * being initialized. |
---|
| 3484 | + * We are idle and the raid set may be doing an initial |
---|
| 3485 | + * sync, or it may be rebuilding individual components. |
---|
| 3486 | + * If all the devices are In_sync, then it is the raid set |
---|
| 3487 | + * that is being initialized. |
---|
3509 | 3488 | */ |
---|
| 3489 | + struct md_rdev *rdev; |
---|
| 3490 | + |
---|
3510 | 3491 | set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags); |
---|
3511 | 3492 | rdev_for_each(rdev, mddev) |
---|
3512 | 3493 | if (!test_bit(Journal, &rdev->flags) && |
---|
.. | .. |
---|
3531 | 3512 | { |
---|
3532 | 3513 | struct raid_set *rs = ti->private; |
---|
3533 | 3514 | struct mddev *mddev = &rs->md; |
---|
3534 | | - struct r5conf *conf = mddev->private; |
---|
| 3515 | + struct r5conf *conf = rs_is_raid456(rs) ? mddev->private : NULL; |
---|
3535 | 3516 | int i, max_nr_stripes = conf ? conf->max_nr_stripes : 0; |
---|
3536 | 3517 | unsigned long recovery; |
---|
3537 | 3518 | unsigned int raid_param_cnt = 1; /* at least 1 for chunksize */ |
---|
3538 | 3519 | unsigned int sz = 0; |
---|
3539 | | - unsigned int rebuild_disks; |
---|
3540 | | - unsigned int write_mostly_params = 0; |
---|
| 3520 | + unsigned int rebuild_writemostly_count = 0; |
---|
3541 | 3521 | sector_t progress, resync_max_sectors, resync_mismatches; |
---|
3542 | | - const char *sync_action; |
---|
| 3522 | + enum sync_state state; |
---|
3543 | 3523 | struct raid_type *rt; |
---|
3544 | 3524 | |
---|
3545 | 3525 | switch (type) { |
---|
.. | .. |
---|
3553 | 3533 | |
---|
3554 | 3534 | /* Access most recent mddev properties for status output */ |
---|
3555 | 3535 | smp_rmb(); |
---|
3556 | | - recovery = rs->md.recovery; |
---|
3557 | 3536 | /* Get sensible max sectors even if raid set not yet started */ |
---|
3558 | 3537 | resync_max_sectors = test_bit(RT_FLAG_RS_PRERESUMED, &rs->runtime_flags) ? |
---|
3559 | 3538 | mddev->resync_max_sectors : mddev->dev_sectors; |
---|
3560 | | - progress = rs_get_progress(rs, recovery, resync_max_sectors); |
---|
| 3539 | + recovery = rs->md.recovery; |
---|
| 3540 | + state = decipher_sync_action(mddev, recovery); |
---|
| 3541 | + progress = rs_get_progress(rs, recovery, state, resync_max_sectors); |
---|
3561 | 3542 | resync_mismatches = (mddev->last_sync_action && !strcasecmp(mddev->last_sync_action, "check")) ? |
---|
3562 | 3543 | atomic64_read(&mddev->resync_mismatches) : 0; |
---|
3563 | | - sync_action = sync_str(decipher_sync_action(&rs->md, recovery)); |
---|
3564 | 3544 | |
---|
3565 | 3545 | /* HM FIXME: do we want another state char for raid0? It shows 'D'/'A'/'-' now */ |
---|
3566 | 3546 | for (i = 0; i < rs->raid_disks; i++) |
---|
.. | .. |
---|
3585 | 3565 | * v1.5.0+: |
---|
3586 | 3566 | * |
---|
3587 | 3567 | * Sync action: |
---|
3588 | | - * See Documentation/device-mapper/dm-raid.txt for |
---|
| 3568 | + * See Documentation/admin-guide/device-mapper/dm-raid.rst for |
---|
3589 | 3569 | * information on each of these states. |
---|
3590 | 3570 | */ |
---|
3591 | | - DMEMIT(" %s", sync_action); |
---|
| 3571 | + DMEMIT(" %s", sync_str(state)); |
---|
3592 | 3572 | |
---|
3593 | 3573 | /* |
---|
3594 | 3574 | * v1.5.0+: |
---|
.. | .. |
---|
3621 | 3601 | case STATUSTYPE_TABLE: |
---|
3622 | 3602 | /* Report the table line string you would use to construct this raid set */ |
---|
3623 | 3603 | |
---|
3624 | | - /* Calculate raid parameter count */ |
---|
3625 | | - for (i = 0; i < rs->raid_disks; i++) |
---|
3626 | | - if (test_bit(WriteMostly, &rs->dev[i].rdev.flags)) |
---|
3627 | | - write_mostly_params += 2; |
---|
3628 | | - rebuild_disks = memweight(rs->rebuild_disks, DISKS_ARRAY_ELEMS * sizeof(*rs->rebuild_disks)); |
---|
3629 | | - raid_param_cnt += rebuild_disks * 2 + |
---|
3630 | | - write_mostly_params + |
---|
| 3604 | + /* |
---|
| 3605 | + * Count any rebuild or writemostly argument pairs and subtract the |
---|
| 3606 | + * hweight count being added below of any rebuild and writemostly ctr flags. |
---|
| 3607 | + */ |
---|
| 3608 | + for (i = 0; i < rs->raid_disks; i++) { |
---|
| 3609 | + rebuild_writemostly_count += (test_bit(i, (void *) rs->rebuild_disks) ? 2 : 0) + |
---|
| 3610 | + (test_bit(WriteMostly, &rs->dev[i].rdev.flags) ? 2 : 0); |
---|
| 3611 | + } |
---|
| 3612 | + rebuild_writemostly_count -= (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags) ? 2 : 0) + |
---|
| 3613 | + (test_bit(__CTR_FLAG_WRITE_MOSTLY, &rs->ctr_flags) ? 2 : 0); |
---|
| 3614 | + /* Calculate raid parameter count based on ^ rebuild/writemostly argument counts and ctr flags set. */ |
---|
| 3615 | + raid_param_cnt += rebuild_writemostly_count + |
---|
3631 | 3616 | hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_NO_ARGS) + |
---|
3632 | | - hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_ONE_ARG) * 2 + |
---|
3633 | | - (test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags) ? 2 : 0) + |
---|
3634 | | - (test_bit(__CTR_FLAG_JOURNAL_MODE, &rs->ctr_flags) ? 2 : 0); |
---|
3635 | | - |
---|
| 3617 | + hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_ONE_ARG) * 2; |
---|
3636 | 3618 | /* Emit table line */ |
---|
3637 | 3619 | /* This has to be in the documented order for userspace! */ |
---|
3638 | 3620 | DMEMIT("%s %u %u", rs->raid_type->name, raid_param_cnt, mddev->new_chunk_sectors); |
---|
.. | .. |
---|
3640 | 3622 | DMEMIT(" %s", dm_raid_arg_name_by_flag(CTR_FLAG_SYNC)); |
---|
3641 | 3623 | if (test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags)) |
---|
3642 | 3624 | DMEMIT(" %s", dm_raid_arg_name_by_flag(CTR_FLAG_NOSYNC)); |
---|
3643 | | - if (rebuild_disks) |
---|
| 3625 | + if (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags)) |
---|
3644 | 3626 | for (i = 0; i < rs->raid_disks; i++) |
---|
3645 | | - if (test_bit(rs->dev[i].rdev.raid_disk, (void *) rs->rebuild_disks)) |
---|
3646 | | - DMEMIT(" %s %u", dm_raid_arg_name_by_flag(CTR_FLAG_REBUILD), |
---|
3647 | | - rs->dev[i].rdev.raid_disk); |
---|
| 3627 | + if (test_bit(i, (void *) rs->rebuild_disks)) |
---|
| 3628 | + DMEMIT(" %s %u", dm_raid_arg_name_by_flag(CTR_FLAG_REBUILD), i); |
---|
3648 | 3629 | if (test_bit(__CTR_FLAG_DAEMON_SLEEP, &rs->ctr_flags)) |
---|
3649 | 3630 | DMEMIT(" %s %lu", dm_raid_arg_name_by_flag(CTR_FLAG_DAEMON_SLEEP), |
---|
3650 | 3631 | mddev->bitmap_info.daemon_sleep); |
---|
.. | .. |
---|
3654 | 3635 | if (test_bit(__CTR_FLAG_MAX_RECOVERY_RATE, &rs->ctr_flags)) |
---|
3655 | 3636 | DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_MAX_RECOVERY_RATE), |
---|
3656 | 3637 | mddev->sync_speed_max); |
---|
3657 | | - if (write_mostly_params) |
---|
| 3638 | + if (test_bit(__CTR_FLAG_WRITE_MOSTLY, &rs->ctr_flags)) |
---|
3658 | 3639 | for (i = 0; i < rs->raid_disks; i++) |
---|
3659 | 3640 | if (test_bit(WriteMostly, &rs->dev[i].rdev.flags)) |
---|
3660 | 3641 | DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_WRITE_MOSTLY), |
---|
.. | .. |
---|
3751 | 3732 | unsigned int i; |
---|
3752 | 3733 | int r = 0; |
---|
3753 | 3734 | |
---|
3754 | | - for (i = 0; !r && i < rs->md.raid_disks; i++) |
---|
3755 | | - if (rs->dev[i].data_dev) |
---|
3756 | | - r = fn(ti, |
---|
3757 | | - rs->dev[i].data_dev, |
---|
3758 | | - 0, /* No offset on data devs */ |
---|
3759 | | - rs->md.dev_sectors, |
---|
3760 | | - data); |
---|
| 3735 | + for (i = 0; !r && i < rs->raid_disks; i++) { |
---|
| 3736 | + if (rs->dev[i].data_dev) { |
---|
| 3737 | + r = fn(ti, rs->dev[i].data_dev, |
---|
| 3738 | + 0, /* No offset on data devs */ |
---|
| 3739 | + rs->md.dev_sectors, data); |
---|
| 3740 | + } |
---|
| 3741 | + } |
---|
3761 | 3742 | |
---|
3762 | 3743 | return r; |
---|
3763 | 3744 | } |
---|
.. | .. |
---|
3765 | 3746 | static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits) |
---|
3766 | 3747 | { |
---|
3767 | 3748 | struct raid_set *rs = ti->private; |
---|
3768 | | - unsigned int chunk_size = to_bytes(rs->md.chunk_sectors); |
---|
| 3749 | + unsigned int chunk_size_bytes = to_bytes(rs->md.chunk_sectors); |
---|
3769 | 3750 | |
---|
3770 | | - blk_limits_io_min(limits, chunk_size); |
---|
3771 | | - blk_limits_io_opt(limits, chunk_size * mddev_data_stripes(rs)); |
---|
| 3751 | + blk_limits_io_min(limits, chunk_size_bytes); |
---|
| 3752 | + blk_limits_io_opt(limits, chunk_size_bytes * mddev_data_stripes(rs)); |
---|
| 3753 | + |
---|
| 3754 | + /* |
---|
| 3755 | + * RAID0 and RAID10 personalities require bio splitting, |
---|
| 3756 | + * RAID1/4/5/6 don't and process large discard bios properly. |
---|
| 3757 | + */ |
---|
| 3758 | + if (rs_is_raid0(rs) || rs_is_raid10(rs)) { |
---|
| 3759 | + limits->discard_granularity = chunk_size_bytes; |
---|
| 3760 | + limits->max_discard_sectors = rs->md.chunk_sectors; |
---|
| 3761 | + } |
---|
3772 | 3762 | } |
---|
3773 | 3763 | |
---|
3774 | 3764 | static void raid_postsuspend(struct dm_target *ti) |
---|
.. | .. |
---|
3802 | 3792 | |
---|
3803 | 3793 | memset(cleared_failed_devices, 0, sizeof(cleared_failed_devices)); |
---|
3804 | 3794 | |
---|
3805 | | - for (i = 0; i < mddev->raid_disks; i++) { |
---|
| 3795 | + for (i = 0; i < rs->raid_disks; i++) { |
---|
3806 | 3796 | r = &rs->dev[i].rdev; |
---|
3807 | 3797 | /* HM FIXME: enhance journal device recovery processing */ |
---|
3808 | 3798 | if (test_bit(Journal, &r->flags)) |
---|
.. | .. |
---|
3973 | 3963 | if (r) |
---|
3974 | 3964 | return r; |
---|
3975 | 3965 | |
---|
3976 | | - /* Resize bitmap to adjust to changed region size (aka MD bitmap chunksize) */ |
---|
3977 | | - if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) && mddev->bitmap && |
---|
3978 | | - mddev->bitmap_info.chunksize != to_bytes(rs->requested_bitmap_chunk_sectors)) { |
---|
3979 | | - r = md_bitmap_resize(mddev->bitmap, mddev->dev_sectors, |
---|
3980 | | - to_bytes(rs->requested_bitmap_chunk_sectors), 0); |
---|
| 3966 | + /* We are extending the raid set size, adjust mddev/md_rdev sizes and set capacity. */ |
---|
| 3967 | + if (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags)) { |
---|
| 3968 | + mddev->array_sectors = rs->array_sectors; |
---|
| 3969 | + mddev->dev_sectors = rs->dev_sectors; |
---|
| 3970 | + rs_set_rdev_sectors(rs); |
---|
| 3971 | + rs_set_capacity(rs); |
---|
| 3972 | + } |
---|
| 3973 | + |
---|
| 3974 | + /* Resize bitmap to adjust to changed region size (aka MD bitmap chunksize) or grown device size */ |
---|
| 3975 | + if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) && mddev->bitmap && |
---|
| 3976 | + (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags) || |
---|
| 3977 | + (rs->requested_bitmap_chunk_sectors && |
---|
| 3978 | + mddev->bitmap_info.chunksize != to_bytes(rs->requested_bitmap_chunk_sectors)))) { |
---|
| 3979 | + int chunksize = to_bytes(rs->requested_bitmap_chunk_sectors) ?: mddev->bitmap_info.chunksize; |
---|
| 3980 | + |
---|
| 3981 | + r = md_bitmap_resize(mddev->bitmap, mddev->dev_sectors, chunksize, 0); |
---|
3981 | 3982 | if (r) |
---|
3982 | 3983 | DMERR("Failed to resize bitmap"); |
---|
3983 | 3984 | } |
---|
.. | .. |
---|
3986 | 3987 | /* Be prepared for mddev_resume() in raid_resume() */ |
---|
3987 | 3988 | set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); |
---|
3988 | 3989 | if (mddev->recovery_cp && mddev->recovery_cp < MaxSector) { |
---|
3989 | | - set_bit(MD_RECOVERY_SYNC, &mddev->recovery); |
---|
| 3990 | + set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); |
---|
3990 | 3991 | mddev->resync_min = mddev->recovery_cp; |
---|
| 3992 | + if (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags)) |
---|
| 3993 | + mddev->resync_max_sectors = mddev->dev_sectors; |
---|
3991 | 3994 | } |
---|
3992 | 3995 | |
---|
3993 | 3996 | /* Check for any reshape request unless new raid set */ |
---|
.. | .. |
---|
4035 | 4038 | |
---|
4036 | 4039 | static struct target_type raid_target = { |
---|
4037 | 4040 | .name = "raid", |
---|
4038 | | - .version = {1, 14, 0}, |
---|
| 4041 | + .version = {1, 15, 1}, |
---|
4039 | 4042 | .module = THIS_MODULE, |
---|
4040 | 4043 | .ctr = raid_ctr, |
---|
4041 | 4044 | .dtr = raid_dtr, |
---|