hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/drivers/md/md-bitmap.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * bitmap.c two-level bitmap (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003
34 *
....@@ -53,14 +54,7 @@
5354 {
5455 unsigned char *mappage;
5556
56
- if (page >= bitmap->pages) {
57
- /* This can happen if bitmap_start_sync goes beyond
58
- * End-of-device while looking for a whole page.
59
- * It is harmless.
60
- */
61
- return -EINVAL;
62
- }
63
-
57
+ WARN_ON_ONCE(page >= bitmap->pages);
6458 if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */
6559 return 0;
6660
....@@ -323,14 +317,6 @@
323317 wake_up(&bitmap->write_wait);
324318 }
325319
326
-/* copied from buffer.c */
327
-static void
328
-__clear_page_buffers(struct page *page)
329
-{
330
- ClearPagePrivate(page);
331
- set_page_private(page, 0);
332
- put_page(page);
333
-}
334320 static void free_buffers(struct page *page)
335321 {
336322 struct buffer_head *bh;
....@@ -344,7 +330,7 @@
344330 free_buffer_head(bh);
345331 bh = next;
346332 }
347
- __clear_page_buffers(page);
333
+ detach_page_private(page);
348334 put_page(page);
349335 }
350336
....@@ -363,33 +349,38 @@
363349 int ret = 0;
364350 struct inode *inode = file_inode(file);
365351 struct buffer_head *bh;
366
- sector_t block;
352
+ sector_t block, blk_cur;
353
+ unsigned long blocksize = i_blocksize(inode);
367354
368355 pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE,
369356 (unsigned long long)index << PAGE_SHIFT);
370357
371
- bh = alloc_page_buffers(page, 1<<inode->i_blkbits, false);
358
+ bh = alloc_page_buffers(page, blocksize, false);
372359 if (!bh) {
373360 ret = -ENOMEM;
374361 goto out;
375362 }
376
- attach_page_buffers(page, bh);
377
- block = index << (PAGE_SHIFT - inode->i_blkbits);
363
+ attach_page_private(page, bh);
364
+ blk_cur = index << (PAGE_SHIFT - inode->i_blkbits);
378365 while (bh) {
366
+ block = blk_cur;
367
+
379368 if (count == 0)
380369 bh->b_blocknr = 0;
381370 else {
382
- bh->b_blocknr = bmap(inode, block);
383
- if (bh->b_blocknr == 0) {
384
- /* Cannot use this file! */
371
+ ret = bmap(inode, &block);
372
+ if (ret || !block) {
385373 ret = -EINVAL;
374
+ bh->b_blocknr = 0;
386375 goto out;
387376 }
377
+
378
+ bh->b_blocknr = block;
388379 bh->b_bdev = inode->i_sb->s_bdev;
389
- if (count < (1<<inode->i_blkbits))
380
+ if (count < blocksize)
390381 count = 0;
391382 else
392
- count -= (1<<inode->i_blkbits);
383
+ count -= blocksize;
393384
394385 bh->b_end_io = end_bitmap_write;
395386 bh->b_private = bitmap;
....@@ -398,7 +389,7 @@
398389 set_buffer_mapped(bh);
399390 submit_bh(REQ_OP_READ, 0, bh);
400391 }
401
- block++;
392
+ blk_cur++;
402393 bh = bh->b_this_page;
403394 }
404395 page->index = index;
....@@ -488,22 +479,22 @@
488479 sb = kmap_atomic(bitmap->storage.sb_page);
489480 pr_debug("%s: bitmap file superblock:\n", bmname(bitmap));
490481 pr_debug(" magic: %08x\n", le32_to_cpu(sb->magic));
491
- pr_debug(" version: %d\n", le32_to_cpu(sb->version));
482
+ pr_debug(" version: %u\n", le32_to_cpu(sb->version));
492483 pr_debug(" uuid: %08x.%08x.%08x.%08x\n",
493
- le32_to_cpu(*(__u32 *)(sb->uuid+0)),
494
- le32_to_cpu(*(__u32 *)(sb->uuid+4)),
495
- le32_to_cpu(*(__u32 *)(sb->uuid+8)),
496
- le32_to_cpu(*(__u32 *)(sb->uuid+12)));
484
+ le32_to_cpu(*(__le32 *)(sb->uuid+0)),
485
+ le32_to_cpu(*(__le32 *)(sb->uuid+4)),
486
+ le32_to_cpu(*(__le32 *)(sb->uuid+8)),
487
+ le32_to_cpu(*(__le32 *)(sb->uuid+12)));
497488 pr_debug(" events: %llu\n",
498489 (unsigned long long) le64_to_cpu(sb->events));
499490 pr_debug("events cleared: %llu\n",
500491 (unsigned long long) le64_to_cpu(sb->events_cleared));
501492 pr_debug(" state: %08x\n", le32_to_cpu(sb->state));
502
- pr_debug(" chunksize: %d B\n", le32_to_cpu(sb->chunksize));
503
- pr_debug(" daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep));
493
+ pr_debug(" chunksize: %u B\n", le32_to_cpu(sb->chunksize));
494
+ pr_debug(" daemon sleep: %us\n", le32_to_cpu(sb->daemon_sleep));
504495 pr_debug(" sync size: %llu KB\n",
505496 (unsigned long long)le64_to_cpu(sb->sync_size)/2);
506
- pr_debug("max write behind: %d\n", le32_to_cpu(sb->write_behind));
497
+ pr_debug("max write behind: %u\n", le32_to_cpu(sb->write_behind));
507498 kunmap_atomic(sb);
508499 }
509500
....@@ -608,8 +599,8 @@
608599 if (bitmap->cluster_slot >= 0) {
609600 sector_t bm_blocks = bitmap->mddev->resync_max_sectors;
610601
611
- sector_div(bm_blocks,
612
- bitmap->mddev->bitmap_info.chunksize >> 9);
602
+ bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks,
603
+ (bitmap->mddev->bitmap_info.chunksize >> 9));
613604 /* bits to bytes */
614605 bm_blocks = ((bm_blocks+7) >> 3) + sizeof(bitmap_super_t);
615606 /* to 4k blocks */
....@@ -641,14 +632,6 @@
641632 daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
642633 write_behind = le32_to_cpu(sb->write_behind);
643634 sectors_reserved = le32_to_cpu(sb->sectors_reserved);
644
- /* Setup nodes/clustername only if bitmap version is
645
- * cluster-compatible
646
- */
647
- if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
648
- nodes = le32_to_cpu(sb->nodes);
649
- strlcpy(bitmap->mddev->bitmap_info.cluster_name,
650
- sb->cluster_name, 64);
651
- }
652635
653636 /* verify that the bitmap-specific fields are valid */
654637 if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
....@@ -668,6 +651,16 @@
668651 pr_warn("%s: invalid bitmap file superblock: %s\n",
669652 bmname(bitmap), reason);
670653 goto out;
654
+ }
655
+
656
+ /*
657
+ * Setup nodes/clustername only if bitmap version is
658
+ * cluster-compatible
659
+ */
660
+ if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
661
+ nodes = le32_to_cpu(sb->nodes);
662
+ strlcpy(bitmap->mddev->bitmap_info.cluster_name,
663
+ sb->cluster_name, 64);
671664 }
672665
673666 /* keep the array size field of the bitmap superblock up to date */
....@@ -702,9 +695,9 @@
702695
703696 out:
704697 kunmap_atomic(sb);
705
- /* Assigning chunksize is required for "re_read" */
706
- bitmap->mddev->bitmap_info.chunksize = chunksize;
707698 if (err == 0 && nodes && (bitmap->cluster_slot < 0)) {
699
+ /* Assigning chunksize is required for "re_read" */
700
+ bitmap->mddev->bitmap_info.chunksize = chunksize;
708701 err = md_setup_cluster(bitmap->mddev, nodes);
709702 if (err) {
710703 pr_warn("%s: Could not setup cluster service (%d)\n",
....@@ -715,18 +708,18 @@
715708 goto re_read;
716709 }
717710
718
-
719711 out_no_sb:
720
- if (test_bit(BITMAP_STALE, &bitmap->flags))
721
- bitmap->events_cleared = bitmap->mddev->events;
722
- bitmap->mddev->bitmap_info.chunksize = chunksize;
723
- bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
724
- bitmap->mddev->bitmap_info.max_write_behind = write_behind;
725
- bitmap->mddev->bitmap_info.nodes = nodes;
726
- if (bitmap->mddev->bitmap_info.space == 0 ||
727
- bitmap->mddev->bitmap_info.space > sectors_reserved)
728
- bitmap->mddev->bitmap_info.space = sectors_reserved;
729
- if (err) {
712
+ if (err == 0) {
713
+ if (test_bit(BITMAP_STALE, &bitmap->flags))
714
+ bitmap->events_cleared = bitmap->mddev->events;
715
+ bitmap->mddev->bitmap_info.chunksize = chunksize;
716
+ bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
717
+ bitmap->mddev->bitmap_info.max_write_behind = write_behind;
718
+ bitmap->mddev->bitmap_info.nodes = nodes;
719
+ if (bitmap->mddev->bitmap_info.space == 0 ||
720
+ bitmap->mddev->bitmap_info.space > sectors_reserved)
721
+ bitmap->mddev->bitmap_info.space = sectors_reserved;
722
+ } else {
730723 md_bitmap_print_sb(bitmap);
731724 if (bitmap->cluster_slot < 0)
732725 md_cluster_stop(bitmap->mddev);
....@@ -1018,8 +1011,6 @@
10181011 /* look at each page to see if there are any set bits that need to be
10191012 * flushed out to disk */
10201013 for (i = 0; i < bitmap->storage.file_pages; i++) {
1021
- if (!bitmap->storage.filemap)
1022
- return;
10231014 dirty = test_and_clear_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
10241015 need_write = test_and_clear_page_attr(bitmap, i,
10251016 BITMAP_PAGE_NEEDWRITE);
....@@ -1337,7 +1328,8 @@
13371328 BITMAP_PAGE_DIRTY))
13381329 /* bitmap_unplug will handle the rest */
13391330 break;
1340
- if (test_and_clear_page_attr(bitmap, j,
1331
+ if (bitmap->storage.filemap &&
1332
+ test_and_clear_page_attr(bitmap, j,
13411333 BITMAP_PAGE_NEEDWRITE)) {
13421334 write_page(bitmap, bitmap->storage.filemap[j], 0);
13431335 }
....@@ -1366,6 +1358,14 @@
13661358 sector_t csize;
13671359 int err;
13681360
1361
+ if (page >= bitmap->pages) {
1362
+ /*
1363
+ * This can happen if bitmap_start_sync goes beyond
1364
+ * End-of-device while looking for a whole page or
1365
+ * user set a huge number to sysfs bitmap_set_bits.
1366
+ */
1367
+ return NULL;
1368
+ }
13691369 err = md_bitmap_checkpage(bitmap, page, create, 0);
13701370
13711371 if (bitmap->bp[page].hijacked ||
....@@ -1437,7 +1437,7 @@
14371437 case 0:
14381438 md_bitmap_file_set_bit(bitmap, offset);
14391439 md_bitmap_count_page(&bitmap->counts, offset, 1);
1440
- /* fall through */
1440
+ fallthrough;
14411441 case 1:
14421442 *bmc = 2;
14431443 }
....@@ -1635,7 +1635,7 @@
16351635 s += blocks;
16361636 }
16371637 bitmap->last_end_sync = jiffies;
1638
- sysfs_notify(&bitmap->mddev->kobj, NULL, "sync_completed");
1638
+ sysfs_notify_dirent_safe(bitmap->mddev->sysfs_completed);
16391639 }
16401640 EXPORT_SYMBOL(md_bitmap_cond_end_sync);
16411641
....@@ -1791,6 +1791,8 @@
17911791 return;
17921792
17931793 md_bitmap_wait_behind_writes(mddev);
1794
+ if (!mddev->serialize_policy)
1795
+ mddev_destroy_serial_pool(mddev, NULL, true);
17941796
17951797 mutex_lock(&mddev->bitmap_info.mutex);
17961798 spin_lock(&mddev->lock);
....@@ -1901,9 +1903,13 @@
19011903 sector_t start = 0;
19021904 sector_t sector = 0;
19031905 struct bitmap *bitmap = mddev->bitmap;
1906
+ struct md_rdev *rdev;
19041907
19051908 if (!bitmap)
19061909 goto out;
1910
+
1911
+ rdev_for_each(rdev, mddev)
1912
+ mddev_create_serial_pool(mddev, rdev, true);
19071913
19081914 if (mddev_is_clustered(mddev))
19091915 md_cluster_ops->load_bitmaps(mddev, mddev->bitmap_info.nodes);
....@@ -1949,6 +1955,7 @@
19491955 }
19501956 EXPORT_SYMBOL_GPL(md_bitmap_load);
19511957
1958
+/* caller need to free returned bitmap with md_bitmap_free() */
19521959 struct bitmap *get_bitmap_from_slot(struct mddev *mddev, int slot)
19531960 {
19541961 int rv = 0;
....@@ -2012,6 +2019,7 @@
20122019 md_bitmap_unplug(mddev->bitmap);
20132020 *low = lo;
20142021 *high = hi;
2022
+ md_bitmap_free(bitmap);
20152023
20162024 return rv;
20172025 }
....@@ -2099,7 +2107,8 @@
20992107 bytes = DIV_ROUND_UP(chunks, 8);
21002108 if (!bitmap->mddev->bitmap_info.external)
21012109 bytes += sizeof(bitmap_super_t);
2102
- } while (bytes > (space << 9));
2110
+ } while (bytes > (space << 9) && (chunkshift + BITMAP_BLOCK_SHIFT) <
2111
+ (BITS_PER_BYTE * sizeof(((bitmap_super_t *)0)->chunksize) - 1));
21032112 } else
21042113 chunkshift = ffz(~chunksize) - BITMAP_BLOCK_SHIFT;
21052114
....@@ -2144,7 +2153,7 @@
21442153 bitmap->counts.missing_pages = pages;
21452154 bitmap->counts.chunkshift = chunkshift;
21462155 bitmap->counts.chunks = chunks;
2147
- bitmap->mddev->bitmap_info.chunksize = 1 << (chunkshift +
2156
+ bitmap->mddev->bitmap_info.chunksize = 1UL << (chunkshift +
21482157 BITMAP_BLOCK_SHIFT);
21492158
21502159 blocks = min(old_counts.chunks << old_counts.chunkshift,
....@@ -2170,8 +2179,8 @@
21702179 bitmap->counts.missing_pages = old_counts.pages;
21712180 bitmap->counts.chunkshift = old_counts.chunkshift;
21722181 bitmap->counts.chunks = old_counts.chunks;
2173
- bitmap->mddev->bitmap_info.chunksize = 1 << (old_counts.chunkshift +
2174
- BITMAP_BLOCK_SHIFT);
2182
+ bitmap->mddev->bitmap_info.chunksize =
2183
+ 1UL << (old_counts.chunkshift + BITMAP_BLOCK_SHIFT);
21752184 blocks = old_counts.chunks << old_counts.chunkshift;
21762185 pr_warn("Could not pre-allocate in-memory bitmap for cluster raid\n");
21772186 break;
....@@ -2189,20 +2198,23 @@
21892198
21902199 if (set) {
21912200 bmc_new = md_bitmap_get_counter(&bitmap->counts, block, &new_blocks, 1);
2192
- if (*bmc_new == 0) {
2193
- /* need to set on-disk bits too. */
2194
- sector_t end = block + new_blocks;
2195
- sector_t start = block >> chunkshift;
2196
- start <<= chunkshift;
2197
- while (start < end) {
2198
- md_bitmap_file_set_bit(bitmap, block);
2199
- start += 1 << chunkshift;
2201
+ if (bmc_new) {
2202
+ if (*bmc_new == 0) {
2203
+ /* need to set on-disk bits too. */
2204
+ sector_t end = block + new_blocks;
2205
+ sector_t start = block >> chunkshift;
2206
+
2207
+ start <<= chunkshift;
2208
+ while (start < end) {
2209
+ md_bitmap_file_set_bit(bitmap, block);
2210
+ start += 1 << chunkshift;
2211
+ }
2212
+ *bmc_new = 2;
2213
+ md_bitmap_count_page(&bitmap->counts, block, 1);
2214
+ md_bitmap_set_pending(&bitmap->counts, block);
22002215 }
2201
- *bmc_new = 2;
2202
- md_bitmap_count_page(&bitmap->counts, block, 1);
2203
- md_bitmap_set_pending(&bitmap->counts, block);
2216
+ *bmc_new |= NEEDED_MASK;
22042217 }
2205
- *bmc_new |= NEEDED_MASK;
22062218 if (new_blocks < old_blocks)
22072219 old_blocks = new_blocks;
22082220 }
....@@ -2290,9 +2302,9 @@
22902302 goto out;
22912303 }
22922304 if (mddev->pers) {
2293
- mddev->pers->quiesce(mddev, 1);
2305
+ mddev_suspend(mddev);
22942306 md_bitmap_destroy(mddev);
2295
- mddev->pers->quiesce(mddev, 0);
2307
+ mddev_resume(mddev);
22962308 }
22972309 mddev->bitmap_info.offset = 0;
22982310 if (mddev->bitmap_info.file) {
....@@ -2329,8 +2341,8 @@
23292341 mddev->bitmap_info.offset = offset;
23302342 if (mddev->pers) {
23312343 struct bitmap *bitmap;
2332
- mddev->pers->quiesce(mddev, 1);
23332344 bitmap = md_bitmap_create(mddev, -1);
2345
+ mddev_suspend(mddev);
23342346 if (IS_ERR(bitmap))
23352347 rv = PTR_ERR(bitmap);
23362348 else {
....@@ -2339,11 +2351,12 @@
23392351 if (rv)
23402352 mddev->bitmap_info.offset = 0;
23412353 }
2342
- mddev->pers->quiesce(mddev, 0);
23432354 if (rv) {
23442355 md_bitmap_destroy(mddev);
2356
+ mddev_resume(mddev);
23452357 goto out;
23462358 }
2359
+ mddev_resume(mddev);
23472360 }
23482361 }
23492362 }
....@@ -2462,12 +2475,50 @@
24622475 backlog_store(struct mddev *mddev, const char *buf, size_t len)
24632476 {
24642477 unsigned long backlog;
2478
+ unsigned long old_mwb = mddev->bitmap_info.max_write_behind;
2479
+ struct md_rdev *rdev;
2480
+ bool has_write_mostly = false;
24652481 int rv = kstrtoul(buf, 10, &backlog);
24662482 if (rv)
24672483 return rv;
24682484 if (backlog > COUNTER_MAX)
24692485 return -EINVAL;
2486
+
2487
+ rv = mddev_lock(mddev);
2488
+ if (rv)
2489
+ return rv;
2490
+
2491
+ /*
2492
+ * Without write mostly device, it doesn't make sense to set
2493
+ * backlog for max_write_behind.
2494
+ */
2495
+ rdev_for_each(rdev, mddev) {
2496
+ if (test_bit(WriteMostly, &rdev->flags)) {
2497
+ has_write_mostly = true;
2498
+ break;
2499
+ }
2500
+ }
2501
+ if (!has_write_mostly) {
2502
+ pr_warn_ratelimited("%s: can't set backlog, no write mostly device available\n",
2503
+ mdname(mddev));
2504
+ mddev_unlock(mddev);
2505
+ return -EINVAL;
2506
+ }
2507
+
24702508 mddev->bitmap_info.max_write_behind = backlog;
2509
+ if (!backlog && mddev->serial_info_pool) {
2510
+ /* serial_info_pool is not needed if backlog is zero */
2511
+ if (!mddev->serialize_policy)
2512
+ mddev_destroy_serial_pool(mddev, NULL, false);
2513
+ } else if (backlog && !mddev->serial_info_pool) {
2514
+ /* serial_info_pool is needed since backlog is not zero */
2515
+ rdev_for_each(rdev, mddev)
2516
+ mddev_create_serial_pool(mddev, rdev, false);
2517
+ }
2518
+ if (old_mwb != backlog)
2519
+ md_bitmap_update_sb(mddev->bitmap);
2520
+
2521
+ mddev_unlock(mddev);
24712522 return len;
24722523 }
24732524
....@@ -2494,6 +2545,9 @@
24942545 if (csize < 512 ||
24952546 !is_power_of_2(csize))
24962547 return -EINVAL;
2548
+ if (BITS_PER_LONG > 32 && csize >= (1ULL << (BITS_PER_BYTE *
2549
+ sizeof(((bitmap_super_t *)0)->chunksize))))
2550
+ return -EOVERFLOW;
24972551 mddev->bitmap_info.chunksize = csize;
24982552 return len;
24992553 }
....@@ -2600,4 +2654,3 @@
26002654 .name = "bitmap",
26012655 .attrs = md_bitmap_attrs,
26022656 };
2603
-