forked from ~ljy/RK356X_SDK_RELEASE

hc
2024-01-31 f70575805708cabdedea7498aaa3f710fde4d920
kernel/drivers/md/dm-raid.c
....@@ -129,7 +129,9 @@
129129 CTR_FLAG_RAID10_COPIES | \
130130 CTR_FLAG_RAID10_FORMAT | \
131131 CTR_FLAG_DELTA_DISKS | \
132
- CTR_FLAG_DATA_OFFSET)
132
+ CTR_FLAG_DATA_OFFSET | \
133
+ CTR_FLAG_JOURNAL_DEV | \
134
+ CTR_FLAG_JOURNAL_MODE)
133135
134136 /* Valid options definitions per raid level... */
135137
....@@ -209,6 +211,7 @@
209211 #define RT_FLAG_RS_SUSPENDED 5
210212 #define RT_FLAG_RS_IN_SYNC 6
211213 #define RT_FLAG_RS_RESYNCING 7
214
+#define RT_FLAG_RS_GROW 8
212215
213216 /* Array elements of 64 bit needed for rebuild/failed disk bits */
214217 #define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8)
....@@ -239,7 +242,9 @@
239242
240243 struct mddev md;
241244 struct raid_type *raid_type;
242
- struct dm_target_callbacks callbacks;
245
+
246
+ sector_t array_sectors;
247
+ sector_t dev_sectors;
243248
244249 /* Optional raid4/5/6 journal device */
245250 struct journal_dev {
....@@ -248,7 +253,7 @@
248253 int mode;
249254 } journal_dev;
250255
251
- struct raid_dev dev[0];
256
+ struct raid_dev dev[];
252257 };
253258
254259 static void rs_config_backup(struct raid_set *rs, struct rs_layout *l)
....@@ -616,7 +621,6 @@
616621
617622 } else if (algorithm == ALGORITHM_RAID10_FAR) {
618623 f = copies;
619
- r = !RAID10_OFFSET;
620624 if (!test_bit(__CTR_FLAG_RAID10_USE_NEAR_SETS, &rs->ctr_flags))
621625 r |= RAID10_USE_FAR_SETS;
622626
....@@ -697,7 +701,7 @@
697701 struct gendisk *gendisk = dm_disk(dm_table_get_md(rs->ti->table));
698702
699703 set_capacity(gendisk, rs->md.array_sectors);
700
- revalidate_disk(gendisk);
704
+ revalidate_disk_size(gendisk, true);
701705 }
702706
703707 /*
....@@ -998,12 +1002,13 @@
9981002 static int validate_raid_redundancy(struct raid_set *rs)
9991003 {
10001004 unsigned int i, rebuild_cnt = 0;
1001
- unsigned int rebuilds_per_group = 0, copies;
1005
+ unsigned int rebuilds_per_group = 0, copies, raid_disks;
10021006 unsigned int group_size, last_group_start;
10031007
1004
- for (i = 0; i < rs->md.raid_disks; i++)
1005
- if (!test_bit(In_sync, &rs->dev[i].rdev.flags) ||
1006
- !rs->dev[i].rdev.sb_page)
1008
+ for (i = 0; i < rs->raid_disks; i++)
1009
+ if (!test_bit(FirstUse, &rs->dev[i].rdev.flags) &&
1010
+ ((!test_bit(In_sync, &rs->dev[i].rdev.flags) ||
1011
+ !rs->dev[i].rdev.sb_page)))
10071012 rebuild_cnt++;
10081013
10091014 switch (rs->md.level) {
....@@ -1043,8 +1048,9 @@
10431048 * A A B B C
10441049 * C D D E E
10451050 */
1051
+ raid_disks = min(rs->raid_disks, rs->md.raid_disks);
10461052 if (__is_raid10_near(rs->md.new_layout)) {
1047
- for (i = 0; i < rs->md.raid_disks; i++) {
1053
+ for (i = 0; i < raid_disks; i++) {
10481054 if (!(i % copies))
10491055 rebuilds_per_group = 0;
10501056 if ((!rs->dev[i].rdev.sb_page ||
....@@ -1067,10 +1073,10 @@
10671073 * results in the need to treat the last (potentially larger)
10681074 * set differently.
10691075 */
1070
- group_size = (rs->md.raid_disks / copies);
1071
- last_group_start = (rs->md.raid_disks / group_size) - 1;
1076
+ group_size = (raid_disks / copies);
1077
+ last_group_start = (raid_disks / group_size) - 1;
10721078 last_group_start *= group_size;
1073
- for (i = 0; i < rs->md.raid_disks; i++) {
1079
+ for (i = 0; i < raid_disks; i++) {
10741080 if (!(i % copies) && !(i > last_group_start))
10751081 rebuilds_per_group = 0;
10761082 if ((!rs->dev[i].rdev.sb_page ||
....@@ -1585,7 +1591,7 @@
15851591 {
15861592 int i;
15871593
1588
- for (i = 0; i < rs->md.raid_disks; i++) {
1594
+ for (i = 0; i < rs->raid_disks; i++) {
15891595 struct md_rdev *rdev = &rs->dev[i].rdev;
15901596
15911597 if (!test_bit(Journal, &rdev->flags) &&
....@@ -1615,13 +1621,12 @@
16151621 }
16161622
16171623 /* Calculate the sectors per device and per array used for @rs */
1618
-static int rs_set_dev_and_array_sectors(struct raid_set *rs, bool use_mddev)
1624
+static int rs_set_dev_and_array_sectors(struct raid_set *rs, sector_t sectors, bool use_mddev)
16191625 {
16201626 int delta_disks;
16211627 unsigned int data_stripes;
1628
+ sector_t array_sectors = sectors, dev_sectors = sectors;
16221629 struct mddev *mddev = &rs->md;
1623
- struct md_rdev *rdev;
1624
- sector_t array_sectors = rs->ti->len, dev_sectors = rs->ti->len;
16251630
16261631 if (use_mddev) {
16271632 delta_disks = mddev->delta_disks;
....@@ -1656,12 +1661,9 @@
16561661 /* Striped layouts */
16571662 array_sectors = (data_stripes + delta_disks) * dev_sectors;
16581663
1659
- rdev_for_each(rdev, mddev)
1660
- if (!test_bit(Journal, &rdev->flags))
1661
- rdev->sectors = dev_sectors;
1662
-
16631664 mddev->array_sectors = array_sectors;
16641665 mddev->dev_sectors = dev_sectors;
1666
+ rs_set_rdev_sectors(rs);
16651667
16661668 return _check_data_dev_sectors(rs);
16671669 bad:
....@@ -1670,7 +1672,7 @@
16701672 }
16711673
16721674 /* Setup recovery on @rs */
1673
-static void __rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors)
1675
+static void rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors)
16741676 {
16751677 /* raid0 does not recover */
16761678 if (rs_is_raid0(rs))
....@@ -1691,22 +1693,6 @@
16911693 ? MaxSector : dev_sectors;
16921694 }
16931695
1694
-/* Setup recovery on @rs based on raid type, device size and 'nosync' flag */
1695
-static void rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors)
1696
-{
1697
- if (!dev_sectors)
1698
- /* New raid set or 'sync' flag provided */
1699
- __rs_setup_recovery(rs, 0);
1700
- else if (dev_sectors == MaxSector)
1701
- /* Prevent recovery */
1702
- __rs_setup_recovery(rs, MaxSector);
1703
- else if (__rdev_sectors(rs) < dev_sectors)
1704
- /* Grown raid set */
1705
- __rs_setup_recovery(rs, __rdev_sectors(rs));
1706
- else
1707
- __rs_setup_recovery(rs, MaxSector);
1708
-}
1709
-
17101696 static void do_table_event(struct work_struct *ws)
17111697 {
17121698 struct raid_set *rs = container_of(ws, struct raid_set, md.event_work);
....@@ -1718,13 +1704,6 @@
17181704 rs_set_capacity(rs);
17191705 }
17201706 dm_table_event(rs->ti->table);
1721
-}
1722
-
1723
-static int raid_is_congested(struct dm_target_callbacks *cb, int bits)
1724
-{
1725
- struct raid_set *rs = container_of(cb, struct raid_set, callbacks);
1726
-
1727
- return mddev_congested(&rs->md, bits);
17281707 }
17291708
17301709 /*
....@@ -2366,8 +2345,6 @@
23662345
23672346 if (new_devs == rs->raid_disks || !rebuilds) {
23682347 /* Replace a broken device */
2369
- if (new_devs == 1 && !rs->delta_disks)
2370
- ;
23712348 if (new_devs == rs->raid_disks) {
23722349 DMINFO("Superblocks created for new raid set");
23732350 set_bit(MD_ARRAY_FIRST_USE, &mddev->flags);
....@@ -2480,7 +2457,7 @@
24802457 return -EINVAL;
24812458 }
24822459
2483
- /* Enable bitmap creation for RAID levels != 0 */
2460
+ /* Enable bitmap creation on @rs unless no metadevs or raid0 or journaled raid4/5/6 set. */
24842461 mddev->bitmap_info.offset = (rt_is_raid0(rs->raid_type) || rs->journal_dev.dev) ? 0 : to_sector(4096);
24852462 mddev->bitmap_info.default_offset = mddev->bitmap_info.offset;
24862463
....@@ -2917,7 +2894,7 @@
29172894
29182895 /* Remove disk(s) */
29192896 } else if (rs->delta_disks < 0) {
2920
- r = rs_set_dev_and_array_sectors(rs, true);
2897
+ r = rs_set_dev_and_array_sectors(rs, rs->ti->len, true);
29212898 mddev->reshape_backwards = 1; /* removing disk(s) -> backward reshape */
29222899
29232900 /* Change layout and/or chunk size */
....@@ -3006,11 +2983,6 @@
30062983 }
30072984 }
30082985
3009
- /*
3010
- * RAID1 and RAID10 personalities require bio splitting,
3011
- * RAID0/4/5/6 don't and process large discard bios properly.
3012
- */
3013
- ti->split_discard_bios = !!(rs_is_raid1(rs) || rs_is_raid10(rs));
30142986 ti->num_discard_bios = 1;
30152987 }
30162988
....@@ -3033,7 +3005,7 @@
30333005 bool resize = false;
30343006 struct raid_type *rt;
30353007 unsigned int num_raid_params, num_raid_devs;
3036
- sector_t calculated_dev_sectors, rdev_sectors, reshape_sectors;
3008
+ sector_t sb_array_sectors, rdev_sectors, reshape_sectors;
30373009 struct raid_set *rs = NULL;
30383010 const char *arg;
30393011 struct rs_layout rs_layout;
....@@ -3043,7 +3015,6 @@
30433015 { 1, 254, "Cannot understand number of raid devices parameters" }
30443016 };
30453017
3046
- /* Must have <raid_type> */
30473018 arg = dm_shift_arg(&as);
30483019 if (!arg) {
30493020 ti->error = "No arguments";
....@@ -3092,11 +3063,13 @@
30923063 *
30933064 * Any existing superblock will overwrite the array and device sizes
30943065 */
3095
- r = rs_set_dev_and_array_sectors(rs, false);
3066
+ r = rs_set_dev_and_array_sectors(rs, rs->ti->len, false);
30963067 if (r)
30973068 goto bad;
30983069
3099
- calculated_dev_sectors = rs->md.dev_sectors;
3070
+ /* Memorize just calculated, potentially larger sizes to grow the raid set in preresume */
3071
+ rs->array_sectors = rs->md.array_sectors;
3072
+ rs->dev_sectors = rs->md.dev_sectors;
31003073
31013074 /*
31023075 * Backup any new raid set level, layout, ...
....@@ -3109,6 +3082,8 @@
31093082 if (r)
31103083 goto bad;
31113084
3085
+ /* All in-core metadata now as of current superblocks after calling analyse_superblocks() */
3086
+ sb_array_sectors = rs->md.array_sectors;
31123087 rdev_sectors = __rdev_sectors(rs);
31133088 if (!rdev_sectors) {
31143089 ti->error = "Invalid rdev size";
....@@ -3118,8 +3093,11 @@
31183093
31193094
31203095 reshape_sectors = _get_reshape_sectors(rs);
3121
- if (calculated_dev_sectors != rdev_sectors)
3122
- resize = calculated_dev_sectors != (reshape_sectors ? rdev_sectors - reshape_sectors : rdev_sectors);
3096
+ if (rs->dev_sectors != rdev_sectors) {
3097
+ resize = (rs->dev_sectors != rdev_sectors - reshape_sectors);
3098
+ if (rs->dev_sectors > rdev_sectors - reshape_sectors)
3099
+ set_bit(RT_FLAG_RS_GROW, &rs->runtime_flags);
3100
+ }
31233101
31243102 INIT_WORK(&rs->md.event_work, do_table_event);
31253103 ti->private = rs;
....@@ -3146,13 +3124,8 @@
31463124 set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
31473125 rs_set_new(rs);
31483126 } else if (rs_is_recovering(rs)) {
3149
- /* Rebuild particular devices */
3150
- if (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags)) {
3151
- set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
3152
- rs_setup_recovery(rs, MaxSector);
3153
- }
31543127 /* A recovering raid set may be resized */
3155
- ; /* skip setup rs */
3128
+ goto size_check;
31563129 } else if (rs_is_reshaping(rs)) {
31573130 /* Have to reject size change request during reshape */
31583131 if (resize) {
....@@ -3196,6 +3169,9 @@
31963169 rs_setup_recovery(rs, MaxSector);
31973170 rs_set_new(rs);
31983171 } else if (rs_reshape_requested(rs)) {
3172
+ /* Only request grow on raid set size extensions, not on reshapes. */
3173
+ clear_bit(RT_FLAG_RS_GROW, &rs->runtime_flags);
3174
+
31993175 /*
32003176 * No need to check for 'ongoing' takeover here, because takeover
32013177 * is an instant operation as oposed to an ongoing reshape.
....@@ -3226,13 +3202,31 @@
32263202 }
32273203 rs_set_cur(rs);
32283204 } else {
3205
+size_check:
32293206 /* May not set recovery when a device rebuild is requested */
32303207 if (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags)) {
3231
- rs_setup_recovery(rs, MaxSector);
3208
+ clear_bit(RT_FLAG_RS_GROW, &rs->runtime_flags);
32323209 set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
3233
- } else
3234
- rs_setup_recovery(rs, test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags) ?
3235
- 0 : (resize ? calculated_dev_sectors : MaxSector));
3210
+ rs_setup_recovery(rs, MaxSector);
3211
+ } else if (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags)) {
3212
+ /*
3213
+ * Set raid set to current size, i.e. size as of
3214
+ * superblocks to grow to larger size in preresume.
3215
+ */
3216
+ r = rs_set_dev_and_array_sectors(rs, sb_array_sectors, false);
3217
+ if (r)
3218
+ goto bad;
3219
+
3220
+ rs_setup_recovery(rs, rs->md.recovery_cp < rs->md.dev_sectors ? rs->md.recovery_cp : rs->md.dev_sectors);
3221
+ } else {
3222
+ /* This is no size change or it is shrinking, update size and record in superblocks */
3223
+ r = rs_set_dev_and_array_sectors(rs, rs->ti->len, false);
3224
+ if (r)
3225
+ goto bad;
3226
+
3227
+ if (sb_array_sectors > rs->array_sectors)
3228
+ set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
3229
+ }
32363230 rs_set_cur(rs);
32373231 }
32383232
....@@ -3264,20 +3258,15 @@
32643258 r = md_start(&rs->md);
32653259 if (r) {
32663260 ti->error = "Failed to start raid array";
3267
- mddev_unlock(&rs->md);
3268
- goto bad_md_start;
3261
+ goto bad_unlock;
32693262 }
3270
-
3271
- rs->callbacks.congested_fn = raid_is_congested;
3272
- dm_table_add_target_callbacks(ti->table, &rs->callbacks);
32733263
32743264 /* If raid4/5/6 journal mode explicitly requested (only possible with journal dev) -> set it */
32753265 if (test_bit(__CTR_FLAG_JOURNAL_MODE, &rs->ctr_flags)) {
32763266 r = r5c_journal_mode_set(&rs->md, rs->journal_dev.mode);
32773267 if (r) {
32783268 ti->error = "Failed to set raid4/5/6 journal mode";
3279
- mddev_unlock(&rs->md);
3280
- goto bad_journal_mode_set;
3269
+ goto bad_unlock;
32813270 }
32823271 }
32833272
....@@ -3288,14 +3277,14 @@
32883277 if (rs_is_raid456(rs)) {
32893278 r = rs_set_raid456_stripe_cache(rs);
32903279 if (r)
3291
- goto bad_stripe_cache;
3280
+ goto bad_unlock;
32923281 }
32933282
32943283 /* Now do an early reshape check */
32953284 if (test_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags)) {
32963285 r = rs_check_reshape(rs);
32973286 if (r)
3298
- goto bad_check_reshape;
3287
+ goto bad_unlock;
32993288
33003289 /* Restore new, ctr requested layout to perform check */
33013290 rs_config_restore(rs, &rs_layout);
....@@ -3304,7 +3293,7 @@
33043293 r = rs->md.pers->check_reshape(&rs->md);
33053294 if (r) {
33063295 ti->error = "Reshape check failed";
3307
- goto bad_check_reshape;
3296
+ goto bad_unlock;
33083297 }
33093298 }
33103299 }
....@@ -3315,11 +3304,9 @@
33153304 mddev_unlock(&rs->md);
33163305 return 0;
33173306
3318
-bad_md_start:
3319
-bad_journal_mode_set:
3320
-bad_stripe_cache:
3321
-bad_check_reshape:
3307
+bad_unlock:
33223308 md_stop(&rs->md);
3309
+ mddev_unlock(&rs->md);
33233310 bad:
33243311 raid_set_free(rs);
33253312
....@@ -3330,8 +3317,9 @@
33303317 {
33313318 struct raid_set *rs = ti->private;
33323319
3333
- list_del_init(&rs->callbacks.list);
3320
+ mddev_lock_nointr(&rs->md);
33343321 md_stop(&rs->md);
3322
+ mddev_unlock(&rs->md);
33353323 raid_set_free(rs);
33363324 }
33373325
....@@ -3433,10 +3421,9 @@
34333421
34343422 /* Helper to return resync/reshape progress for @rs and runtime flags for raid set in sync / resynching */
34353423 static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
3436
- sector_t resync_max_sectors)
3424
+ enum sync_state state, sector_t resync_max_sectors)
34373425 {
34383426 sector_t r;
3439
- enum sync_state state;
34403427 struct mddev *mddev = &rs->md;
34413428
34423429 clear_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
....@@ -3447,8 +3434,6 @@
34473434 set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
34483435
34493436 } else {
3450
- state = decipher_sync_action(mddev, recovery);
3451
-
34523437 if (state == st_idle && !test_bit(MD_RECOVERY_INTR, &recovery))
34533438 r = mddev->recovery_cp;
34543439 else
....@@ -3466,18 +3451,14 @@
34663451 /*
34673452 * In case we are recovering, the array is not in sync
34683453 * and health chars should show the recovering legs.
3454
+ *
3455
+ * Already retrieved recovery offset from curr_resync_completed above.
34693456 */
34703457 ;
3471
- else if (state == st_resync)
3458
+
3459
+ else if (state == st_resync || state == st_reshape)
34723460 /*
3473
- * If "resync" is occurring, the raid set
3474
- * is or may be out of sync hence the health
3475
- * characters shall be 'a'.
3476
- */
3477
- set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags);
3478
- else if (state == st_reshape)
3479
- /*
3480
- * If "reshape" is occurring, the raid set
3461
+ * If "resync/reshape" is occurring, the raid set
34813462 * is or may be out of sync hence the health
34823463 * characters shall be 'a'.
34833464 */
....@@ -3491,22 +3472,22 @@
34913472 */
34923473 set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
34933474
3494
- else {
3495
- struct md_rdev *rdev;
3496
-
3475
+ else if (test_bit(MD_RECOVERY_NEEDED, &recovery))
34973476 /*
34983477 * We are idle and recovery is needed, prevent 'A' chars race
34993478 * caused by components still set to in-sync by constructor.
35003479 */
3501
- if (test_bit(MD_RECOVERY_NEEDED, &recovery))
3502
- set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags);
3480
+ set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags);
35033481
3482
+ else {
35043483 /*
3505
- * The raid set may be doing an initial sync, or it may
3506
- * be rebuilding individual components. If all the
3507
- * devices are In_sync, then it is the raid set that is
3508
- * being initialized.
3484
+ * We are idle and the raid set may be doing an initial
3485
+ * sync, or it may be rebuilding individual components.
3486
+ * If all the devices are In_sync, then it is the raid set
3487
+ * that is being initialized.
35093488 */
3489
+ struct md_rdev *rdev;
3490
+
35103491 set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
35113492 rdev_for_each(rdev, mddev)
35123493 if (!test_bit(Journal, &rdev->flags) &&
....@@ -3531,15 +3512,14 @@
35313512 {
35323513 struct raid_set *rs = ti->private;
35333514 struct mddev *mddev = &rs->md;
3534
- struct r5conf *conf = mddev->private;
3515
+ struct r5conf *conf = rs_is_raid456(rs) ? mddev->private : NULL;
35353516 int i, max_nr_stripes = conf ? conf->max_nr_stripes : 0;
35363517 unsigned long recovery;
35373518 unsigned int raid_param_cnt = 1; /* at least 1 for chunksize */
35383519 unsigned int sz = 0;
3539
- unsigned int rebuild_disks;
3540
- unsigned int write_mostly_params = 0;
3520
+ unsigned int rebuild_writemostly_count = 0;
35413521 sector_t progress, resync_max_sectors, resync_mismatches;
3542
- const char *sync_action;
3522
+ enum sync_state state;
35433523 struct raid_type *rt;
35443524
35453525 switch (type) {
....@@ -3553,14 +3533,14 @@
35533533
35543534 /* Access most recent mddev properties for status output */
35553535 smp_rmb();
3556
- recovery = rs->md.recovery;
35573536 /* Get sensible max sectors even if raid set not yet started */
35583537 resync_max_sectors = test_bit(RT_FLAG_RS_PRERESUMED, &rs->runtime_flags) ?
35593538 mddev->resync_max_sectors : mddev->dev_sectors;
3560
- progress = rs_get_progress(rs, recovery, resync_max_sectors);
3539
+ recovery = rs->md.recovery;
3540
+ state = decipher_sync_action(mddev, recovery);
3541
+ progress = rs_get_progress(rs, recovery, state, resync_max_sectors);
35613542 resync_mismatches = (mddev->last_sync_action && !strcasecmp(mddev->last_sync_action, "check")) ?
35623543 atomic64_read(&mddev->resync_mismatches) : 0;
3563
- sync_action = sync_str(decipher_sync_action(&rs->md, recovery));
35643544
35653545 /* HM FIXME: do we want another state char for raid0? It shows 'D'/'A'/'-' now */
35663546 for (i = 0; i < rs->raid_disks; i++)
....@@ -3585,10 +3565,10 @@
35853565 * v1.5.0+:
35863566 *
35873567 * Sync action:
3588
- * See Documentation/device-mapper/dm-raid.txt for
3568
+ * See Documentation/admin-guide/device-mapper/dm-raid.rst for
35893569 * information on each of these states.
35903570 */
3591
- DMEMIT(" %s", sync_action);
3571
+ DMEMIT(" %s", sync_str(state));
35923572
35933573 /*
35943574 * v1.5.0+:
....@@ -3621,18 +3601,20 @@
36213601 case STATUSTYPE_TABLE:
36223602 /* Report the table line string you would use to construct this raid set */
36233603
3624
- /* Calculate raid parameter count */
3625
- for (i = 0; i < rs->raid_disks; i++)
3626
- if (test_bit(WriteMostly, &rs->dev[i].rdev.flags))
3627
- write_mostly_params += 2;
3628
- rebuild_disks = memweight(rs->rebuild_disks, DISKS_ARRAY_ELEMS * sizeof(*rs->rebuild_disks));
3629
- raid_param_cnt += rebuild_disks * 2 +
3630
- write_mostly_params +
3604
+ /*
3605
+ * Count any rebuild or writemostly argument pairs and subtract the
3606
+ * hweight count being added below of any rebuild and writemostly ctr flags.
3607
+ */
3608
+ for (i = 0; i < rs->raid_disks; i++) {
3609
+ rebuild_writemostly_count += (test_bit(i, (void *) rs->rebuild_disks) ? 2 : 0) +
3610
+ (test_bit(WriteMostly, &rs->dev[i].rdev.flags) ? 2 : 0);
3611
+ }
3612
+ rebuild_writemostly_count -= (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags) ? 2 : 0) +
3613
+ (test_bit(__CTR_FLAG_WRITE_MOSTLY, &rs->ctr_flags) ? 2 : 0);
3614
+ /* Calculate raid parameter count based on ^ rebuild/writemostly argument counts and ctr flags set. */
3615
+ raid_param_cnt += rebuild_writemostly_count +
36313616 hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_NO_ARGS) +
3632
- hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_ONE_ARG) * 2 +
3633
- (test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags) ? 2 : 0) +
3634
- (test_bit(__CTR_FLAG_JOURNAL_MODE, &rs->ctr_flags) ? 2 : 0);
3635
-
3617
+ hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_ONE_ARG) * 2;
36363618 /* Emit table line */
36373619 /* This has to be in the documented order for userspace! */
36383620 DMEMIT("%s %u %u", rs->raid_type->name, raid_param_cnt, mddev->new_chunk_sectors);
....@@ -3640,11 +3622,10 @@
36403622 DMEMIT(" %s", dm_raid_arg_name_by_flag(CTR_FLAG_SYNC));
36413623 if (test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags))
36423624 DMEMIT(" %s", dm_raid_arg_name_by_flag(CTR_FLAG_NOSYNC));
3643
- if (rebuild_disks)
3625
+ if (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags))
36443626 for (i = 0; i < rs->raid_disks; i++)
3645
- if (test_bit(rs->dev[i].rdev.raid_disk, (void *) rs->rebuild_disks))
3646
- DMEMIT(" %s %u", dm_raid_arg_name_by_flag(CTR_FLAG_REBUILD),
3647
- rs->dev[i].rdev.raid_disk);
3627
+ if (test_bit(i, (void *) rs->rebuild_disks))
3628
+ DMEMIT(" %s %u", dm_raid_arg_name_by_flag(CTR_FLAG_REBUILD), i);
36483629 if (test_bit(__CTR_FLAG_DAEMON_SLEEP, &rs->ctr_flags))
36493630 DMEMIT(" %s %lu", dm_raid_arg_name_by_flag(CTR_FLAG_DAEMON_SLEEP),
36503631 mddev->bitmap_info.daemon_sleep);
....@@ -3654,7 +3635,7 @@
36543635 if (test_bit(__CTR_FLAG_MAX_RECOVERY_RATE, &rs->ctr_flags))
36553636 DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_MAX_RECOVERY_RATE),
36563637 mddev->sync_speed_max);
3657
- if (write_mostly_params)
3638
+ if (test_bit(__CTR_FLAG_WRITE_MOSTLY, &rs->ctr_flags))
36583639 for (i = 0; i < rs->raid_disks; i++)
36593640 if (test_bit(WriteMostly, &rs->dev[i].rdev.flags))
36603641 DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_WRITE_MOSTLY),
....@@ -3751,13 +3732,13 @@
37513732 unsigned int i;
37523733 int r = 0;
37533734
3754
- for (i = 0; !r && i < rs->md.raid_disks; i++)
3755
- if (rs->dev[i].data_dev)
3756
- r = fn(ti,
3757
- rs->dev[i].data_dev,
3758
- 0, /* No offset on data devs */
3759
- rs->md.dev_sectors,
3760
- data);
3735
+ for (i = 0; !r && i < rs->raid_disks; i++) {
3736
+ if (rs->dev[i].data_dev) {
3737
+ r = fn(ti, rs->dev[i].data_dev,
3738
+ 0, /* No offset on data devs */
3739
+ rs->md.dev_sectors, data);
3740
+ }
3741
+ }
37613742
37623743 return r;
37633744 }
....@@ -3765,10 +3746,19 @@
37653746 static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
37663747 {
37673748 struct raid_set *rs = ti->private;
3768
- unsigned int chunk_size = to_bytes(rs->md.chunk_sectors);
3749
+ unsigned int chunk_size_bytes = to_bytes(rs->md.chunk_sectors);
37693750
3770
- blk_limits_io_min(limits, chunk_size);
3771
- blk_limits_io_opt(limits, chunk_size * mddev_data_stripes(rs));
3751
+ blk_limits_io_min(limits, chunk_size_bytes);
3752
+ blk_limits_io_opt(limits, chunk_size_bytes * mddev_data_stripes(rs));
3753
+
3754
+ /*
3755
+ * RAID0 and RAID10 personalities require bio splitting,
3756
+ * RAID1/4/5/6 don't and process large discard bios properly.
3757
+ */
3758
+ if (rs_is_raid0(rs) || rs_is_raid10(rs)) {
3759
+ limits->discard_granularity = chunk_size_bytes;
3760
+ limits->max_discard_sectors = rs->md.chunk_sectors;
3761
+ }
37723762 }
37733763
37743764 static void raid_postsuspend(struct dm_target *ti)
....@@ -3802,7 +3792,7 @@
38023792
38033793 memset(cleared_failed_devices, 0, sizeof(cleared_failed_devices));
38043794
3805
- for (i = 0; i < mddev->raid_disks; i++) {
3795
+ for (i = 0; i < rs->raid_disks; i++) {
38063796 r = &rs->dev[i].rdev;
38073797 /* HM FIXME: enhance journal device recovery processing */
38083798 if (test_bit(Journal, &r->flags))
....@@ -3973,11 +3963,22 @@
39733963 if (r)
39743964 return r;
39753965
3976
- /* Resize bitmap to adjust to changed region size (aka MD bitmap chunksize) */
3977
- if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) && mddev->bitmap &&
3978
- mddev->bitmap_info.chunksize != to_bytes(rs->requested_bitmap_chunk_sectors)) {
3979
- r = md_bitmap_resize(mddev->bitmap, mddev->dev_sectors,
3980
- to_bytes(rs->requested_bitmap_chunk_sectors), 0);
3966
+ /* We are extending the raid set size, adjust mddev/md_rdev sizes and set capacity. */
3967
+ if (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags)) {
3968
+ mddev->array_sectors = rs->array_sectors;
3969
+ mddev->dev_sectors = rs->dev_sectors;
3970
+ rs_set_rdev_sectors(rs);
3971
+ rs_set_capacity(rs);
3972
+ }
3973
+
3974
+ /* Resize bitmap to adjust to changed region size (aka MD bitmap chunksize) or grown device size */
3975
+ if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) && mddev->bitmap &&
3976
+ (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags) ||
3977
+ (rs->requested_bitmap_chunk_sectors &&
3978
+ mddev->bitmap_info.chunksize != to_bytes(rs->requested_bitmap_chunk_sectors)))) {
3979
+ int chunksize = to_bytes(rs->requested_bitmap_chunk_sectors) ?: mddev->bitmap_info.chunksize;
3980
+
3981
+ r = md_bitmap_resize(mddev->bitmap, mddev->dev_sectors, chunksize, 0);
39813982 if (r)
39823983 DMERR("Failed to resize bitmap");
39833984 }
....@@ -3986,8 +3987,10 @@
39863987 /* Be prepared for mddev_resume() in raid_resume() */
39873988 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
39883989 if (mddev->recovery_cp && mddev->recovery_cp < MaxSector) {
3989
- set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
3990
+ set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
39903991 mddev->resync_min = mddev->recovery_cp;
3992
+ if (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags))
3993
+ mddev->resync_max_sectors = mddev->dev_sectors;
39913994 }
39923995
39933996 /* Check for any reshape request unless new raid set */
....@@ -4035,7 +4038,7 @@
40354038
40364039 static struct target_type raid_target = {
40374040 .name = "raid",
4038
- .version = {1, 14, 0},
4041
+ .version = {1, 15, 1},
40394042 .module = THIS_MODULE,
40404043 .ctr = raid_ctr,
40414044 .dtr = raid_dtr,