hc
2024-01-31 f9004dbfff8a3fbbd7e2a88c8a4327c7f2f8e5b2
kernel/drivers/md/md-cluster.c
....@@ -1,11 +1,6 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * Copyright (C) 2015, SUSE
3
- *
4
- * This program is free software; you can redistribute it and/or modify
5
- * it under the terms of the GNU General Public License as published by
6
- * the Free Software Foundation; either version 2, or (at your option)
7
- * any later version.
8
- *
94 */
105
116
....@@ -31,13 +26,6 @@
3126 void (*bast)(void *arg, int mode); /* blocking AST function pointer*/
3227 struct mddev *mddev; /* pointing back to mddev. */
3328 int mode;
34
-};
35
-
36
-struct suspend_info {
37
- int slot;
38
- sector_t lo;
39
- sector_t hi;
40
- struct list_head list;
4129 };
4230
4331 struct resync_info {
....@@ -80,7 +68,13 @@
8068 struct dlm_lock_resource **other_bitmap_lockres;
8169 struct dlm_lock_resource *resync_lockres;
8270 struct list_head suspend_list;
71
+
8372 spinlock_t suspend_lock;
73
+ /* record the region which write should be suspended */
74
+ sector_t suspend_lo;
75
+ sector_t suspend_hi;
76
+ int suspend_from; /* the slot which broadcast suspend_lo/hi */
77
+
8478 struct md_thread *recovery_thread;
8579 unsigned long recovery_map;
8680 /* communication loc resources */
....@@ -105,6 +99,7 @@
10599 RE_ADD,
106100 BITMAP_NEEDS_SYNC,
107101 CHANGE_CAPACITY,
102
+ BITMAP_RESIZE,
108103 };
109104
110105 struct cluster_msg {
....@@ -270,25 +265,22 @@
270265 ri->hi = cpu_to_le64(hi);
271266 }
272267
273
-static struct suspend_info *read_resync_info(struct mddev *mddev, struct dlm_lock_resource *lockres)
268
+static int read_resync_info(struct mddev *mddev,
269
+ struct dlm_lock_resource *lockres)
274270 {
275271 struct resync_info ri;
276
- struct suspend_info *s = NULL;
277
- sector_t hi = 0;
272
+ struct md_cluster_info *cinfo = mddev->cluster_info;
273
+ int ret = 0;
278274
279275 dlm_lock_sync(lockres, DLM_LOCK_CR);
280276 memcpy(&ri, lockres->lksb.sb_lvbptr, sizeof(struct resync_info));
281
- hi = le64_to_cpu(ri.hi);
282
- if (hi > 0) {
283
- s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL);
284
- if (!s)
285
- goto out;
286
- s->hi = hi;
287
- s->lo = le64_to_cpu(ri.lo);
277
+ if (le64_to_cpu(ri.hi) > 0) {
278
+ cinfo->suspend_hi = le64_to_cpu(ri.hi);
279
+ cinfo->suspend_lo = le64_to_cpu(ri.lo);
280
+ ret = 1;
288281 }
289282 dlm_unlock_sync(lockres);
290
-out:
291
- return s;
283
+ return ret;
292284 }
293285
294286 static void recover_bitmaps(struct md_thread *thread)
....@@ -298,7 +290,6 @@
298290 struct dlm_lock_resource *bm_lockres;
299291 char str[64];
300292 int slot, ret;
301
- struct suspend_info *s, *tmp;
302293 sector_t lo, hi;
303294
304295 while (cinfo->recovery_map) {
....@@ -325,12 +316,16 @@
325316
326317 /* Clear suspend_area associated with the bitmap */
327318 spin_lock_irq(&cinfo->suspend_lock);
328
- list_for_each_entry_safe(s, tmp, &cinfo->suspend_list, list)
329
- if (slot == s->slot) {
330
- list_del(&s->list);
331
- kfree(s);
332
- }
319
+ cinfo->suspend_hi = 0;
320
+ cinfo->suspend_lo = 0;
321
+ cinfo->suspend_from = -1;
333322 spin_unlock_irq(&cinfo->suspend_lock);
323
+
324
+ /* Kick off a reshape if needed */
325
+ if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
326
+ test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
327
+ mddev->reshape_position != MaxSector)
328
+ md_wakeup_thread(mddev->sync_thread);
334329
335330 if (hi > 0) {
336331 if (lo < mddev->recovery_cp)
....@@ -434,34 +429,23 @@
434429 }
435430 }
436431
437
-static void __remove_suspend_info(struct md_cluster_info *cinfo, int slot)
438
-{
439
- struct suspend_info *s, *tmp;
440
-
441
- list_for_each_entry_safe(s, tmp, &cinfo->suspend_list, list)
442
- if (slot == s->slot) {
443
- list_del(&s->list);
444
- kfree(s);
445
- break;
446
- }
447
-}
448
-
449432 static void remove_suspend_info(struct mddev *mddev, int slot)
450433 {
451434 struct md_cluster_info *cinfo = mddev->cluster_info;
452435 mddev->pers->quiesce(mddev, 1);
453436 spin_lock_irq(&cinfo->suspend_lock);
454
- __remove_suspend_info(cinfo, slot);
437
+ cinfo->suspend_hi = 0;
438
+ cinfo->suspend_lo = 0;
455439 spin_unlock_irq(&cinfo->suspend_lock);
456440 mddev->pers->quiesce(mddev, 0);
457441 }
458
-
459442
460443 static void process_suspend_info(struct mddev *mddev,
461444 int slot, sector_t lo, sector_t hi)
462445 {
463446 struct md_cluster_info *cinfo = mddev->cluster_info;
464
- struct suspend_info *s;
447
+ struct mdp_superblock_1 *sb = NULL;
448
+ struct md_rdev *rdev;
465449
466450 if (!hi) {
467451 /*
....@@ -475,6 +459,12 @@
475459 return;
476460 }
477461
462
+ rdev_for_each(rdev, mddev)
463
+ if (rdev->raid_disk > -1 && !test_bit(Faulty, &rdev->flags)) {
464
+ sb = page_address(rdev->sb_page);
465
+ break;
466
+ }
467
+
478468 /*
479469 * The bitmaps are not same for different nodes
480470 * if RESYNCING is happening in one node, then
....@@ -487,26 +477,26 @@
487477 * sync_low/hi is used to record the region which
488478 * arrived in the previous RESYNCING message,
489479 *
490
- * Call bitmap_sync_with_cluster to clear
491
- * NEEDED_MASK and set RESYNC_MASK since
492
- * resync thread is running in another node,
493
- * so we don't need to do the resync again
494
- * with the same section */
495
- md_bitmap_sync_with_cluster(mddev, cinfo->sync_low, cinfo->sync_hi, lo, hi);
480
+ * Call md_bitmap_sync_with_cluster to clear NEEDED_MASK
481
+ * and set RESYNC_MASK since resync thread is running
482
+ * in another node, so we don't need to do the resync
483
+ * again with the same section.
484
+ *
485
+ * Skip md_bitmap_sync_with_cluster in case reshape
486
+ * happening, because reshaping region is small and
487
+ * we don't want to trigger lots of WARN.
488
+ */
489
+ if (sb && !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE))
490
+ md_bitmap_sync_with_cluster(mddev, cinfo->sync_low,
491
+ cinfo->sync_hi, lo, hi);
496492 cinfo->sync_low = lo;
497493 cinfo->sync_hi = hi;
498494
499
- s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL);
500
- if (!s)
501
- return;
502
- s->slot = slot;
503
- s->lo = lo;
504
- s->hi = hi;
505495 mddev->pers->quiesce(mddev, 1);
506496 spin_lock_irq(&cinfo->suspend_lock);
507
- /* Remove existing entry (if exists) before adding */
508
- __remove_suspend_info(cinfo, slot);
509
- list_add(&s->list, &cinfo->suspend_list);
497
+ cinfo->suspend_from = slot;
498
+ cinfo->suspend_lo = lo;
499
+ cinfo->suspend_hi = hi;
510500 spin_unlock_irq(&cinfo->suspend_lock);
511501 mddev->pers->quiesce(mddev, 0);
512502 }
....@@ -592,7 +582,7 @@
592582 break;
593583 case CHANGE_CAPACITY:
594584 set_capacity(mddev->gendisk, mddev->array_sectors);
595
- revalidate_disk(mddev->gendisk);
585
+ revalidate_disk_size(mddev->gendisk, true);
596586 break;
597587 case RESYNCING:
598588 set_bit(MD_RESYNCING_REMOTE, &mddev->recovery);
....@@ -611,6 +601,11 @@
611601 break;
612602 case BITMAP_NEEDS_SYNC:
613603 __recover_slot(mddev, le32_to_cpu(msg->slot));
604
+ break;
605
+ case BITMAP_RESIZE:
606
+ if (le64_to_cpu(msg->high) != mddev->pers->size(mddev, 0, 0))
607
+ ret = md_bitmap_resize(mddev->bitmap,
608
+ le64_to_cpu(msg->high), 0, 0);
614609 break;
615610 default:
616611 ret = -1;
....@@ -805,7 +800,6 @@
805800 struct md_cluster_info *cinfo = mddev->cluster_info;
806801 int i, ret = 0;
807802 struct dlm_lock_resource *bm_lockres;
808
- struct suspend_info *s;
809803 char str[64];
810804 sector_t lo, hi;
811805
....@@ -824,16 +818,13 @@
824818 bm_lockres->flags |= DLM_LKF_NOQUEUE;
825819 ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
826820 if (ret == -EAGAIN) {
827
- s = read_resync_info(mddev, bm_lockres);
828
- if (s) {
821
+ if (read_resync_info(mddev, bm_lockres)) {
829822 pr_info("%s:%d Resync[%llu..%llu] in progress on %d\n",
830823 __func__, __LINE__,
831
- (unsigned long long) s->lo,
832
- (unsigned long long) s->hi, i);
833
- spin_lock_irq(&cinfo->suspend_lock);
834
- s->slot = i;
835
- list_add(&s->list, &cinfo->suspend_list);
836
- spin_unlock_irq(&cinfo->suspend_lock);
824
+ (unsigned long long) cinfo->suspend_lo,
825
+ (unsigned long long) cinfo->suspend_hi,
826
+ i);
827
+ cinfo->suspend_from = i;
837828 }
838829 ret = 0;
839830 lockres_free(bm_lockres);
....@@ -1006,10 +997,17 @@
1006997 if (!cinfo)
1007998 return 0;
1008999
1009
- /* BITMAP_NEEDS_SYNC message should be sent when node
1000
+ /*
1001
+ * BITMAP_NEEDS_SYNC message should be sent when node
10101002 * is leaving the cluster with dirty bitmap, also we
1011
- * can only deliver it when dlm connection is available */
1012
- if (cinfo->slot_number > 0 && mddev->recovery_cp != MaxSector)
1003
+ * can only deliver it when dlm connection is available.
1004
+ *
1005
+ * Also, we should send BITMAP_NEEDS_SYNC message in
1006
+ * case reshaping is interrupted.
1007
+ */
1008
+ if ((cinfo->slot_number > 0 && mddev->recovery_cp != MaxSector) ||
1009
+ (mddev->reshape_position != MaxSector &&
1010
+ test_bit(MD_CLOSING, &mddev->flags)))
10131011 resync_bitmap(mddev);
10141012
10151013 set_bit(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD, &cinfo->state);
....@@ -1105,6 +1103,82 @@
11051103 struct md_cluster_info *cinfo = mddev->cluster_info;
11061104 clear_bit(MD_CLUSTER_SEND_LOCKED_ALREADY, &cinfo->state);
11071105 unlock_comm(cinfo);
1106
+}
1107
+
1108
+static int update_bitmap_size(struct mddev *mddev, sector_t size)
1109
+{
1110
+ struct md_cluster_info *cinfo = mddev->cluster_info;
1111
+ struct cluster_msg cmsg = {0};
1112
+ int ret;
1113
+
1114
+ cmsg.type = cpu_to_le32(BITMAP_RESIZE);
1115
+ cmsg.high = cpu_to_le64(size);
1116
+ ret = sendmsg(cinfo, &cmsg, 0);
1117
+ if (ret)
1118
+ pr_err("%s:%d: failed to send BITMAP_RESIZE message (%d)\n",
1119
+ __func__, __LINE__, ret);
1120
+ return ret;
1121
+}
1122
+
1123
+static int resize_bitmaps(struct mddev *mddev, sector_t newsize, sector_t oldsize)
1124
+{
1125
+ struct bitmap_counts *counts;
1126
+ char str[64];
1127
+ struct dlm_lock_resource *bm_lockres;
1128
+ struct bitmap *bitmap = mddev->bitmap;
1129
+ unsigned long my_pages = bitmap->counts.pages;
1130
+ int i, rv;
1131
+
1132
+ /*
1133
+ * We need to ensure all the nodes can grow to a larger
1134
+ * bitmap size before make the reshaping.
1135
+ */
1136
+ rv = update_bitmap_size(mddev, newsize);
1137
+ if (rv)
1138
+ return rv;
1139
+
1140
+ for (i = 0; i < mddev->bitmap_info.nodes; i++) {
1141
+ if (i == md_cluster_ops->slot_number(mddev))
1142
+ continue;
1143
+
1144
+ bitmap = get_bitmap_from_slot(mddev, i);
1145
+ if (IS_ERR(bitmap)) {
1146
+ pr_err("can't get bitmap from slot %d\n", i);
1147
+ bitmap = NULL;
1148
+ goto out;
1149
+ }
1150
+ counts = &bitmap->counts;
1151
+
1152
+ /*
1153
+ * If we can hold the bitmap lock of one node then
1154
+ * the slot is not occupied, update the pages.
1155
+ */
1156
+ snprintf(str, 64, "bitmap%04d", i);
1157
+ bm_lockres = lockres_init(mddev, str, NULL, 1);
1158
+ if (!bm_lockres) {
1159
+ pr_err("Cannot initialize %s lock\n", str);
1160
+ goto out;
1161
+ }
1162
+ bm_lockres->flags |= DLM_LKF_NOQUEUE;
1163
+ rv = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
1164
+ if (!rv)
1165
+ counts->pages = my_pages;
1166
+ lockres_free(bm_lockres);
1167
+
1168
+ if (my_pages != counts->pages)
1169
+ /*
1170
+ * Let's revert the bitmap size if one node
1171
+ * can't resize bitmap
1172
+ */
1173
+ goto out;
1174
+ md_bitmap_free(bitmap);
1175
+ }
1176
+
1177
+ return 0;
1178
+out:
1179
+ md_bitmap_free(bitmap);
1180
+ update_bitmap_size(mddev, oldsize);
1181
+ return -1;
11081182 }
11091183
11101184 /*
....@@ -1231,12 +1305,12 @@
12311305 pr_err("%s:%d: failed to send CHANGE_CAPACITY msg\n",
12321306 __func__, __LINE__);
12331307 set_capacity(mddev->gendisk, mddev->array_sectors);
1234
- revalidate_disk(mddev->gendisk);
1308
+ revalidate_disk_size(mddev->gendisk, true);
12351309 } else {
12361310 /* revert to previous sectors */
12371311 ret = mddev->pers->resize(mddev, old_dev_sectors);
12381312 if (!ret)
1239
- revalidate_disk(mddev->gendisk);
1313
+ revalidate_disk_size(mddev->gendisk, true);
12401314 ret = __sendmsg(cinfo, &cmsg);
12411315 if (ret)
12421316 pr_err("%s:%d: failed to send METADATA_UPDATED msg\n",
....@@ -1249,6 +1323,16 @@
12491323 {
12501324 struct md_cluster_info *cinfo = mddev->cluster_info;
12511325 return dlm_lock_sync_interruptible(cinfo->resync_lockres, DLM_LOCK_EX, mddev);
1326
+}
1327
+
1328
+static void resync_info_get(struct mddev *mddev, sector_t *lo, sector_t *hi)
1329
+{
1330
+ struct md_cluster_info *cinfo = mddev->cluster_info;
1331
+
1332
+ spin_lock_irq(&cinfo->suspend_lock);
1333
+ *lo = cinfo->suspend_lo;
1334
+ *hi = cinfo->suspend_hi;
1335
+ spin_unlock_irq(&cinfo->suspend_lock);
12521336 }
12531337
12541338 static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
....@@ -1303,21 +1387,14 @@
13031387 {
13041388 struct md_cluster_info *cinfo = mddev->cluster_info;
13051389 int ret = 0;
1306
- struct suspend_info *s;
13071390
13081391 if ((direction == READ) &&
13091392 test_bit(MD_CLUSTER_SUSPEND_READ_BALANCING, &cinfo->state))
13101393 return 1;
13111394
13121395 spin_lock_irq(&cinfo->suspend_lock);
1313
- if (list_empty(&cinfo->suspend_list))
1314
- goto out;
1315
- list_for_each_entry(s, &cinfo->suspend_list, list)
1316
- if (hi > s->lo && lo < s->hi) {
1317
- ret = 1;
1318
- break;
1319
- }
1320
-out:
1396
+ if (hi > cinfo->suspend_lo && lo < cinfo->suspend_hi)
1397
+ ret = 1;
13211398 spin_unlock_irq(&cinfo->suspend_lock);
13221399 return ret;
13231400 }
....@@ -1492,6 +1569,7 @@
14921569 .resync_start = resync_start,
14931570 .resync_finish = resync_finish,
14941571 .resync_info_update = resync_info_update,
1572
+ .resync_info_get = resync_info_get,
14951573 .metadata_update_start = metadata_update_start,
14961574 .metadata_update_finish = metadata_update_finish,
14971575 .metadata_update_cancel = metadata_update_cancel,
....@@ -1502,6 +1580,7 @@
15021580 .remove_disk = remove_disk,
15031581 .load_bitmaps = load_bitmaps,
15041582 .gather_bitmaps = gather_bitmaps,
1583
+ .resize_bitmaps = resize_bitmaps,
15051584 .lock_all_bitmaps = lock_all_bitmaps,
15061585 .unlock_all_bitmaps = unlock_all_bitmaps,
15071586 .update_size = update_size,