| .. | .. |
|---|
| 6 | 6 | #include <linux/blkdev.h> |
|---|
| 7 | 7 | #include <linux/ratelimit.h> |
|---|
| 8 | 8 | #include <linux/sched/mm.h> |
|---|
| 9 | +#include <crypto/hash.h> |
|---|
| 9 | 10 | #include "ctree.h" |
|---|
| 11 | +#include "discard.h" |
|---|
| 10 | 12 | #include "volumes.h" |
|---|
| 11 | 13 | #include "disk-io.h" |
|---|
| 12 | 14 | #include "ordered-data.h" |
|---|
| .. | .. |
|---|
| 17 | 19 | #include "check-integrity.h" |
|---|
| 18 | 20 | #include "rcu-string.h" |
|---|
| 19 | 21 | #include "raid56.h" |
|---|
| 22 | +#include "block-group.h" |
|---|
| 20 | 23 | |
|---|
| 21 | 24 | /* |
|---|
| 22 | 25 | * This is only the first step towards a full-features scrub. It reads all |
|---|
| .. | .. |
|---|
| 146 | 149 | */ |
|---|
| 147 | 150 | unsigned long *ebitmap; |
|---|
| 148 | 151 | |
|---|
| 149 | | - unsigned long bitmap[0]; |
|---|
| 152 | + unsigned long bitmap[]; |
|---|
| 150 | 153 | }; |
|---|
| 151 | 154 | |
|---|
| 152 | 155 | struct scrub_ctx { |
|---|
| .. | .. |
|---|
| 322 | 325 | struct rb_node *parent = NULL; |
|---|
| 323 | 326 | struct full_stripe_lock *entry; |
|---|
| 324 | 327 | struct full_stripe_lock *ret; |
|---|
| 325 | | - unsigned int nofs_flag; |
|---|
| 326 | 328 | |
|---|
| 327 | 329 | lockdep_assert_held(&locks_root->lock); |
|---|
| 328 | 330 | |
|---|
| .. | .. |
|---|
| 342 | 344 | |
|---|
| 343 | 345 | /* |
|---|
| 344 | 346 | * Insert new lock. |
|---|
| 345 | | - * |
|---|
| 346 | | - * We must use GFP_NOFS because the scrub task might be waiting for a |
|---|
| 347 | | - * worker task executing this function and in turn a transaction commit |
|---|
| 348 | | - * might be waiting the scrub task to pause (which needs to wait for all |
|---|
| 349 | | - * the worker tasks to complete before pausing). |
|---|
| 350 | 347 | */ |
|---|
| 351 | | - nofs_flag = memalloc_nofs_save(); |
|---|
| 352 | 348 | ret = kmalloc(sizeof(*ret), GFP_KERNEL); |
|---|
| 353 | | - memalloc_nofs_restore(nofs_flag); |
|---|
| 354 | 349 | if (!ret) |
|---|
| 355 | 350 | return ERR_PTR(-ENOMEM); |
|---|
| 356 | 351 | ret->logical = fstripe_logical; |
|---|
| .. | .. |
|---|
| 395 | 390 | * |
|---|
| 396 | 391 | * Caller must ensure @cache is a RAID56 block group. |
|---|
| 397 | 392 | */ |
|---|
| 398 | | -static u64 get_full_stripe_logical(struct btrfs_block_group_cache *cache, |
|---|
| 399 | | - u64 bytenr) |
|---|
| 393 | +static u64 get_full_stripe_logical(struct btrfs_block_group *cache, u64 bytenr) |
|---|
| 400 | 394 | { |
|---|
| 401 | 395 | u64 ret; |
|---|
| 402 | 396 | |
|---|
| .. | .. |
|---|
| 410 | 404 | * round_down() can only handle power of 2, while RAID56 full |
|---|
| 411 | 405 | * stripe length can be 64KiB * n, so we need to manually round down. |
|---|
| 412 | 406 | */ |
|---|
| 413 | | - ret = div64_u64(bytenr - cache->key.objectid, cache->full_stripe_len) * |
|---|
| 414 | | - cache->full_stripe_len + cache->key.objectid; |
|---|
| 407 | + ret = div64_u64(bytenr - cache->start, cache->full_stripe_len) * |
|---|
| 408 | + cache->full_stripe_len + cache->start; |
|---|
| 415 | 409 | return ret; |
|---|
| 416 | 410 | } |
|---|
| 417 | 411 | |
|---|
| .. | .. |
|---|
| 429 | 423 | static int lock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr, |
|---|
| 430 | 424 | bool *locked_ret) |
|---|
| 431 | 425 | { |
|---|
| 432 | | - struct btrfs_block_group_cache *bg_cache; |
|---|
| 426 | + struct btrfs_block_group *bg_cache; |
|---|
| 433 | 427 | struct btrfs_full_stripe_locks_tree *locks_root; |
|---|
| 434 | 428 | struct full_stripe_lock *existing; |
|---|
| 435 | 429 | u64 fstripe_start; |
|---|
| .. | .. |
|---|
| 476 | 470 | static int unlock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr, |
|---|
| 477 | 471 | bool locked) |
|---|
| 478 | 472 | { |
|---|
| 479 | | - struct btrfs_block_group_cache *bg_cache; |
|---|
| 473 | + struct btrfs_block_group *bg_cache; |
|---|
| 480 | 474 | struct btrfs_full_stripe_locks_tree *locks_root; |
|---|
| 481 | 475 | struct full_stripe_lock *fstripe_lock; |
|---|
| 482 | 476 | u64 fstripe_start; |
|---|
| .. | .. |
|---|
| 604 | 598 | sbio->index = i; |
|---|
| 605 | 599 | sbio->sctx = sctx; |
|---|
| 606 | 600 | sbio->page_count = 0; |
|---|
| 607 | | - btrfs_init_work(&sbio->work, btrfs_scrub_helper, |
|---|
| 608 | | - scrub_bio_end_io_worker, NULL, NULL); |
|---|
| 601 | + btrfs_init_work(&sbio->work, scrub_bio_end_io_worker, NULL, |
|---|
| 602 | + NULL); |
|---|
| 609 | 603 | |
|---|
| 610 | 604 | if (i != SCRUB_BIOS_PER_SCTX - 1) |
|---|
| 611 | 605 | sctx->bios[i]->next_free = i + 1; |
|---|
| .. | .. |
|---|
| 653 | 647 | struct btrfs_fs_info *fs_info = swarn->dev->fs_info; |
|---|
| 654 | 648 | struct inode_fs_paths *ipath = NULL; |
|---|
| 655 | 649 | struct btrfs_root *local_root; |
|---|
| 656 | | - struct btrfs_key root_key; |
|---|
| 657 | 650 | struct btrfs_key key; |
|---|
| 658 | 651 | |
|---|
| 659 | | - root_key.objectid = root; |
|---|
| 660 | | - root_key.type = BTRFS_ROOT_ITEM_KEY; |
|---|
| 661 | | - root_key.offset = (u64)-1; |
|---|
| 662 | | - local_root = btrfs_read_fs_root_no_name(fs_info, &root_key); |
|---|
| 652 | + local_root = btrfs_get_fs_root(fs_info, root, true); |
|---|
| 663 | 653 | if (IS_ERR(local_root)) { |
|---|
| 664 | 654 | ret = PTR_ERR(local_root); |
|---|
| 665 | 655 | goto err; |
|---|
| .. | .. |
|---|
| 674 | 664 | |
|---|
| 675 | 665 | ret = btrfs_search_slot(NULL, local_root, &key, swarn->path, 0, 0); |
|---|
| 676 | 666 | if (ret) { |
|---|
| 667 | + btrfs_put_root(local_root); |
|---|
| 677 | 668 | btrfs_release_path(swarn->path); |
|---|
| 678 | 669 | goto err; |
|---|
| 679 | 670 | } |
|---|
| .. | .. |
|---|
| 694 | 685 | ipath = init_ipath(4096, local_root, swarn->path); |
|---|
| 695 | 686 | memalloc_nofs_restore(nofs_flag); |
|---|
| 696 | 687 | if (IS_ERR(ipath)) { |
|---|
| 688 | + btrfs_put_root(local_root); |
|---|
| 697 | 689 | ret = PTR_ERR(ipath); |
|---|
| 698 | 690 | ipath = NULL; |
|---|
| 699 | 691 | goto err; |
|---|
| .. | .. |
|---|
| 717 | 709 | min(isize - offset, (u64)PAGE_SIZE), nlink, |
|---|
| 718 | 710 | (char *)(unsigned long)ipath->fspath->val[i]); |
|---|
| 719 | 711 | |
|---|
| 712 | + btrfs_put_root(local_root); |
|---|
| 720 | 713 | free_ipath(ipath); |
|---|
| 721 | 714 | return 0; |
|---|
| 722 | 715 | |
|---|
| .. | .. |
|---|
| 841 | 834 | int page_num; |
|---|
| 842 | 835 | int success; |
|---|
| 843 | 836 | bool full_stripe_locked; |
|---|
| 844 | | - static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, |
|---|
| 837 | + unsigned int nofs_flag; |
|---|
| 838 | + static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, |
|---|
| 845 | 839 | DEFAULT_RATELIMIT_BURST); |
|---|
| 846 | 840 | |
|---|
| 847 | 841 | BUG_ON(sblock_to_check->page_count < 1); |
|---|
| .. | .. |
|---|
| 866 | 860 | dev = sblock_to_check->pagev[0]->dev; |
|---|
| 867 | 861 | |
|---|
| 868 | 862 | /* |
|---|
| 863 | + * We must use GFP_NOFS because the scrub task might be waiting for a |
|---|
| 864 | + * worker task executing this function and in turn a transaction commit |
|---|
| 865 | + * might be waiting the scrub task to pause (which needs to wait for all |
|---|
| 866 | + * the worker tasks to complete before pausing). |
|---|
| 867 | + * We do allocations in the workers through insert_full_stripe_lock() |
|---|
| 868 | + * and scrub_add_page_to_wr_bio(), which happens down the call chain of |
|---|
| 869 | + * this function. |
|---|
| 870 | + */ |
|---|
| 871 | + nofs_flag = memalloc_nofs_save(); |
|---|
| 872 | + /* |
|---|
| 869 | 873 | * For RAID5/6, race can happen for a different device scrub thread. |
|---|
| 870 | 874 | * For data corruption, Parity and Data threads will both try |
|---|
| 871 | 875 | * to recovery the data. |
|---|
| .. | .. |
|---|
| 874 | 878 | */ |
|---|
| 875 | 879 | ret = lock_full_stripe(fs_info, logical, &full_stripe_locked); |
|---|
| 876 | 880 | if (ret < 0) { |
|---|
| 881 | + memalloc_nofs_restore(nofs_flag); |
|---|
| 877 | 882 | spin_lock(&sctx->stat_lock); |
|---|
| 878 | 883 | if (ret == -ENOMEM) |
|---|
| 879 | 884 | sctx->stat.malloc_errors++; |
|---|
| .. | .. |
|---|
| 913 | 918 | */ |
|---|
| 914 | 919 | |
|---|
| 915 | 920 | sblocks_for_recheck = kcalloc(BTRFS_MAX_MIRRORS, |
|---|
| 916 | | - sizeof(*sblocks_for_recheck), GFP_NOFS); |
|---|
| 921 | + sizeof(*sblocks_for_recheck), GFP_KERNEL); |
|---|
| 917 | 922 | if (!sblocks_for_recheck) { |
|---|
| 918 | 923 | spin_lock(&sctx->stat_lock); |
|---|
| 919 | 924 | sctx->stat.malloc_errors++; |
|---|
| .. | .. |
|---|
| 964 | 969 | spin_lock(&sctx->stat_lock); |
|---|
| 965 | 970 | sctx->stat.read_errors++; |
|---|
| 966 | 971 | spin_unlock(&sctx->stat_lock); |
|---|
| 967 | | - if (__ratelimit(&_rs)) |
|---|
| 972 | + if (__ratelimit(&rs)) |
|---|
| 968 | 973 | scrub_print_warning("i/o error", sblock_to_check); |
|---|
| 969 | 974 | btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS); |
|---|
| 970 | 975 | } else if (sblock_bad->checksum_error) { |
|---|
| 971 | 976 | spin_lock(&sctx->stat_lock); |
|---|
| 972 | 977 | sctx->stat.csum_errors++; |
|---|
| 973 | 978 | spin_unlock(&sctx->stat_lock); |
|---|
| 974 | | - if (__ratelimit(&_rs)) |
|---|
| 979 | + if (__ratelimit(&rs)) |
|---|
| 975 | 980 | scrub_print_warning("checksum error", sblock_to_check); |
|---|
| 976 | 981 | btrfs_dev_stat_inc_and_print(dev, |
|---|
| 977 | 982 | BTRFS_DEV_STAT_CORRUPTION_ERRS); |
|---|
| .. | .. |
|---|
| 979 | 984 | spin_lock(&sctx->stat_lock); |
|---|
| 980 | 985 | sctx->stat.verify_errors++; |
|---|
| 981 | 986 | spin_unlock(&sctx->stat_lock); |
|---|
| 982 | | - if (__ratelimit(&_rs)) |
|---|
| 987 | + if (__ratelimit(&rs)) |
|---|
| 983 | 988 | scrub_print_warning("checksum/header error", |
|---|
| 984 | 989 | sblock_to_check); |
|---|
| 985 | 990 | if (sblock_bad->generation_error) |
|---|
| .. | .. |
|---|
| 1133 | 1138 | |
|---|
| 1134 | 1139 | if (scrub_write_page_to_dev_replace(sblock_other, |
|---|
| 1135 | 1140 | page_num) != 0) { |
|---|
| 1136 | | - btrfs_dev_replace_stats_inc( |
|---|
| 1141 | + atomic64_inc( |
|---|
| 1137 | 1142 | &fs_info->dev_replace.num_write_errors); |
|---|
| 1138 | 1143 | success = 0; |
|---|
| 1139 | 1144 | } |
|---|
| .. | .. |
|---|
| 1211 | 1216 | } |
|---|
| 1212 | 1217 | |
|---|
| 1213 | 1218 | ret = unlock_full_stripe(fs_info, logical, full_stripe_locked); |
|---|
| 1219 | + memalloc_nofs_restore(nofs_flag); |
|---|
| 1214 | 1220 | if (ret < 0) |
|---|
| 1215 | 1221 | return ret; |
|---|
| 1216 | 1222 | return 0; |
|---|
| .. | .. |
|---|
| 1573 | 1579 | if (btrfsic_submit_bio_wait(bio)) { |
|---|
| 1574 | 1580 | btrfs_dev_stat_inc_and_print(page_bad->dev, |
|---|
| 1575 | 1581 | BTRFS_DEV_STAT_WRITE_ERRS); |
|---|
| 1576 | | - btrfs_dev_replace_stats_inc( |
|---|
| 1577 | | - &fs_info->dev_replace.num_write_errors); |
|---|
| 1582 | + atomic64_inc(&fs_info->dev_replace.num_write_errors); |
|---|
| 1578 | 1583 | bio_put(bio); |
|---|
| 1579 | 1584 | return -EIO; |
|---|
| 1580 | 1585 | } |
|---|
| .. | .. |
|---|
| 1601 | 1606 | |
|---|
| 1602 | 1607 | ret = scrub_write_page_to_dev_replace(sblock, page_num); |
|---|
| 1603 | 1608 | if (ret) |
|---|
| 1604 | | - btrfs_dev_replace_stats_inc( |
|---|
| 1605 | | - &fs_info->dev_replace.num_write_errors); |
|---|
| 1609 | + atomic64_inc(&fs_info->dev_replace.num_write_errors); |
|---|
| 1606 | 1610 | } |
|---|
| 1607 | 1611 | } |
|---|
| 1608 | 1612 | |
|---|
| .. | .. |
|---|
| 1612 | 1616 | struct scrub_page *spage = sblock->pagev[page_num]; |
|---|
| 1613 | 1617 | |
|---|
| 1614 | 1618 | BUG_ON(spage->page == NULL); |
|---|
| 1615 | | - if (spage->io_error) { |
|---|
| 1616 | | - void *mapped_buffer = kmap_atomic(spage->page); |
|---|
| 1619 | + if (spage->io_error) |
|---|
| 1620 | + clear_page(page_address(spage->page)); |
|---|
| 1617 | 1621 | |
|---|
| 1618 | | - clear_page(mapped_buffer); |
|---|
| 1619 | | - flush_dcache_page(spage->page); |
|---|
| 1620 | | - kunmap_atomic(mapped_buffer); |
|---|
| 1621 | | - } |
|---|
| 1622 | 1622 | return scrub_add_page_to_wr_bio(sblock->sctx, spage); |
|---|
| 1623 | 1623 | } |
|---|
| 1624 | 1624 | |
|---|
| .. | .. |
|---|
| 1631 | 1631 | mutex_lock(&sctx->wr_lock); |
|---|
| 1632 | 1632 | again: |
|---|
| 1633 | 1633 | if (!sctx->wr_curr_bio) { |
|---|
| 1634 | | - unsigned int nofs_flag; |
|---|
| 1635 | | - |
|---|
| 1636 | | - /* |
|---|
| 1637 | | - * We must use GFP_NOFS because the scrub task might be waiting |
|---|
| 1638 | | - * for a worker task executing this function and in turn a |
|---|
| 1639 | | - * transaction commit might be waiting the scrub task to pause |
|---|
| 1640 | | - * (which needs to wait for all the worker tasks to complete |
|---|
| 1641 | | - * before pausing). |
|---|
| 1642 | | - */ |
|---|
| 1643 | | - nofs_flag = memalloc_nofs_save(); |
|---|
| 1644 | 1634 | sctx->wr_curr_bio = kzalloc(sizeof(*sctx->wr_curr_bio), |
|---|
| 1645 | 1635 | GFP_KERNEL); |
|---|
| 1646 | | - memalloc_nofs_restore(nofs_flag); |
|---|
| 1647 | 1636 | if (!sctx->wr_curr_bio) { |
|---|
| 1648 | 1637 | mutex_unlock(&sctx->wr_lock); |
|---|
| 1649 | 1638 | return -ENOMEM; |
|---|
| .. | .. |
|---|
| 1726 | 1715 | sbio->status = bio->bi_status; |
|---|
| 1727 | 1716 | sbio->bio = bio; |
|---|
| 1728 | 1717 | |
|---|
| 1729 | | - btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper, |
|---|
| 1730 | | - scrub_wr_bio_end_io_worker, NULL, NULL); |
|---|
| 1718 | + btrfs_init_work(&sbio->work, scrub_wr_bio_end_io_worker, NULL, NULL); |
|---|
| 1731 | 1719 | btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work); |
|---|
| 1732 | 1720 | } |
|---|
| 1733 | 1721 | |
|---|
| .. | .. |
|---|
| 1746 | 1734 | struct scrub_page *spage = sbio->pagev[i]; |
|---|
| 1747 | 1735 | |
|---|
| 1748 | 1736 | spage->io_error = 1; |
|---|
| 1749 | | - btrfs_dev_replace_stats_inc(&dev_replace-> |
|---|
| 1750 | | - num_write_errors); |
|---|
| 1737 | + atomic64_inc(&dev_replace->num_write_errors); |
|---|
| 1751 | 1738 | } |
|---|
| 1752 | 1739 | } |
|---|
| 1753 | 1740 | |
|---|
| .. | .. |
|---|
| 1796 | 1783 | static int scrub_checksum_data(struct scrub_block *sblock) |
|---|
| 1797 | 1784 | { |
|---|
| 1798 | 1785 | struct scrub_ctx *sctx = sblock->sctx; |
|---|
| 1786 | + struct btrfs_fs_info *fs_info = sctx->fs_info; |
|---|
| 1787 | + SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); |
|---|
| 1799 | 1788 | u8 csum[BTRFS_CSUM_SIZE]; |
|---|
| 1800 | | - u8 *on_disk_csum; |
|---|
| 1801 | | - struct page *page; |
|---|
| 1802 | | - void *buffer; |
|---|
| 1803 | | - u32 crc = ~(u32)0; |
|---|
| 1804 | | - u64 len; |
|---|
| 1805 | | - int index; |
|---|
| 1789 | + struct scrub_page *spage; |
|---|
| 1790 | + char *kaddr; |
|---|
| 1806 | 1791 | |
|---|
| 1807 | 1792 | BUG_ON(sblock->page_count < 1); |
|---|
| 1808 | | - if (!sblock->pagev[0]->have_csum) |
|---|
| 1793 | + spage = sblock->pagev[0]; |
|---|
| 1794 | + if (!spage->have_csum) |
|---|
| 1809 | 1795 | return 0; |
|---|
| 1810 | 1796 | |
|---|
| 1811 | | - on_disk_csum = sblock->pagev[0]->csum; |
|---|
| 1812 | | - page = sblock->pagev[0]->page; |
|---|
| 1813 | | - buffer = kmap_atomic(page); |
|---|
| 1797 | + kaddr = page_address(spage->page); |
|---|
| 1814 | 1798 | |
|---|
| 1815 | | - len = sctx->fs_info->sectorsize; |
|---|
| 1816 | | - index = 0; |
|---|
| 1817 | | - for (;;) { |
|---|
| 1818 | | - u64 l = min_t(u64, len, PAGE_SIZE); |
|---|
| 1799 | + shash->tfm = fs_info->csum_shash; |
|---|
| 1800 | + crypto_shash_init(shash); |
|---|
| 1801 | + crypto_shash_digest(shash, kaddr, PAGE_SIZE, csum); |
|---|
| 1819 | 1802 | |
|---|
| 1820 | | - crc = btrfs_csum_data(buffer, crc, l); |
|---|
| 1821 | | - kunmap_atomic(buffer); |
|---|
| 1822 | | - len -= l; |
|---|
| 1823 | | - if (len == 0) |
|---|
| 1824 | | - break; |
|---|
| 1825 | | - index++; |
|---|
| 1826 | | - BUG_ON(index >= sblock->page_count); |
|---|
| 1827 | | - BUG_ON(!sblock->pagev[index]->page); |
|---|
| 1828 | | - page = sblock->pagev[index]->page; |
|---|
| 1829 | | - buffer = kmap_atomic(page); |
|---|
| 1830 | | - } |
|---|
| 1831 | | - |
|---|
| 1832 | | - btrfs_csum_final(crc, csum); |
|---|
| 1833 | | - if (memcmp(csum, on_disk_csum, sctx->csum_size)) |
|---|
| 1803 | + if (memcmp(csum, spage->csum, sctx->csum_size)) |
|---|
| 1834 | 1804 | sblock->checksum_error = 1; |
|---|
| 1835 | 1805 | |
|---|
| 1836 | 1806 | return sblock->checksum_error; |
|---|
| .. | .. |
|---|
| 1841 | 1811 | struct scrub_ctx *sctx = sblock->sctx; |
|---|
| 1842 | 1812 | struct btrfs_header *h; |
|---|
| 1843 | 1813 | struct btrfs_fs_info *fs_info = sctx->fs_info; |
|---|
| 1814 | + SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); |
|---|
| 1844 | 1815 | u8 calculated_csum[BTRFS_CSUM_SIZE]; |
|---|
| 1845 | 1816 | u8 on_disk_csum[BTRFS_CSUM_SIZE]; |
|---|
| 1846 | | - struct page *page; |
|---|
| 1847 | | - void *mapped_buffer; |
|---|
| 1848 | | - u64 mapped_size; |
|---|
| 1849 | | - void *p; |
|---|
| 1850 | | - u32 crc = ~(u32)0; |
|---|
| 1851 | | - u64 len; |
|---|
| 1852 | | - int index; |
|---|
| 1817 | + const int num_pages = sctx->fs_info->nodesize >> PAGE_SHIFT; |
|---|
| 1818 | + int i; |
|---|
| 1819 | + struct scrub_page *spage; |
|---|
| 1820 | + char *kaddr; |
|---|
| 1853 | 1821 | |
|---|
| 1854 | 1822 | BUG_ON(sblock->page_count < 1); |
|---|
| 1855 | | - page = sblock->pagev[0]->page; |
|---|
| 1856 | | - mapped_buffer = kmap_atomic(page); |
|---|
| 1857 | | - h = (struct btrfs_header *)mapped_buffer; |
|---|
| 1823 | + spage = sblock->pagev[0]; |
|---|
| 1824 | + kaddr = page_address(spage->page); |
|---|
| 1825 | + h = (struct btrfs_header *)kaddr; |
|---|
| 1858 | 1826 | memcpy(on_disk_csum, h->csum, sctx->csum_size); |
|---|
| 1859 | 1827 | |
|---|
| 1860 | 1828 | /* |
|---|
| .. | .. |
|---|
| 1862 | 1830 | * a) don't have an extent buffer and |
|---|
| 1863 | 1831 | * b) the page is already kmapped |
|---|
| 1864 | 1832 | */ |
|---|
| 1865 | | - if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h)) |
|---|
| 1833 | + if (spage->logical != btrfs_stack_header_bytenr(h)) |
|---|
| 1866 | 1834 | sblock->header_error = 1; |
|---|
| 1867 | 1835 | |
|---|
| 1868 | | - if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h)) { |
|---|
| 1836 | + if (spage->generation != btrfs_stack_header_generation(h)) { |
|---|
| 1869 | 1837 | sblock->header_error = 1; |
|---|
| 1870 | 1838 | sblock->generation_error = 1; |
|---|
| 1871 | 1839 | } |
|---|
| 1872 | 1840 | |
|---|
| 1873 | | - if (!scrub_check_fsid(h->fsid, sblock->pagev[0])) |
|---|
| 1841 | + if (!scrub_check_fsid(h->fsid, spage)) |
|---|
| 1874 | 1842 | sblock->header_error = 1; |
|---|
| 1875 | 1843 | |
|---|
| 1876 | 1844 | if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, |
|---|
| 1877 | 1845 | BTRFS_UUID_SIZE)) |
|---|
| 1878 | 1846 | sblock->header_error = 1; |
|---|
| 1879 | 1847 | |
|---|
| 1880 | | - len = sctx->fs_info->nodesize - BTRFS_CSUM_SIZE; |
|---|
| 1881 | | - mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE; |
|---|
| 1882 | | - p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE; |
|---|
| 1883 | | - index = 0; |
|---|
| 1884 | | - for (;;) { |
|---|
| 1885 | | - u64 l = min_t(u64, len, mapped_size); |
|---|
| 1848 | + shash->tfm = fs_info->csum_shash; |
|---|
| 1849 | + crypto_shash_init(shash); |
|---|
| 1850 | + crypto_shash_update(shash, kaddr + BTRFS_CSUM_SIZE, |
|---|
| 1851 | + PAGE_SIZE - BTRFS_CSUM_SIZE); |
|---|
| 1886 | 1852 | |
|---|
| 1887 | | - crc = btrfs_csum_data(p, crc, l); |
|---|
| 1888 | | - kunmap_atomic(mapped_buffer); |
|---|
| 1889 | | - len -= l; |
|---|
| 1890 | | - if (len == 0) |
|---|
| 1891 | | - break; |
|---|
| 1892 | | - index++; |
|---|
| 1893 | | - BUG_ON(index >= sblock->page_count); |
|---|
| 1894 | | - BUG_ON(!sblock->pagev[index]->page); |
|---|
| 1895 | | - page = sblock->pagev[index]->page; |
|---|
| 1896 | | - mapped_buffer = kmap_atomic(page); |
|---|
| 1897 | | - mapped_size = PAGE_SIZE; |
|---|
| 1898 | | - p = mapped_buffer; |
|---|
| 1853 | + for (i = 1; i < num_pages; i++) { |
|---|
| 1854 | + kaddr = page_address(sblock->pagev[i]->page); |
|---|
| 1855 | + crypto_shash_update(shash, kaddr, PAGE_SIZE); |
|---|
| 1899 | 1856 | } |
|---|
| 1900 | 1857 | |
|---|
| 1901 | | - btrfs_csum_final(crc, calculated_csum); |
|---|
| 1858 | + crypto_shash_final(shash, calculated_csum); |
|---|
| 1902 | 1859 | if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size)) |
|---|
| 1903 | 1860 | sblock->checksum_error = 1; |
|---|
| 1904 | 1861 | |
|---|
| .. | .. |
|---|
| 1909 | 1866 | { |
|---|
| 1910 | 1867 | struct btrfs_super_block *s; |
|---|
| 1911 | 1868 | struct scrub_ctx *sctx = sblock->sctx; |
|---|
| 1869 | + struct btrfs_fs_info *fs_info = sctx->fs_info; |
|---|
| 1870 | + SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); |
|---|
| 1912 | 1871 | u8 calculated_csum[BTRFS_CSUM_SIZE]; |
|---|
| 1913 | | - u8 on_disk_csum[BTRFS_CSUM_SIZE]; |
|---|
| 1914 | | - struct page *page; |
|---|
| 1915 | | - void *mapped_buffer; |
|---|
| 1916 | | - u64 mapped_size; |
|---|
| 1917 | | - void *p; |
|---|
| 1918 | | - u32 crc = ~(u32)0; |
|---|
| 1872 | + struct scrub_page *spage; |
|---|
| 1873 | + char *kaddr; |
|---|
| 1919 | 1874 | int fail_gen = 0; |
|---|
| 1920 | 1875 | int fail_cor = 0; |
|---|
| 1921 | | - u64 len; |
|---|
| 1922 | | - int index; |
|---|
| 1923 | 1876 | |
|---|
| 1924 | 1877 | BUG_ON(sblock->page_count < 1); |
|---|
| 1925 | | - page = sblock->pagev[0]->page; |
|---|
| 1926 | | - mapped_buffer = kmap_atomic(page); |
|---|
| 1927 | | - s = (struct btrfs_super_block *)mapped_buffer; |
|---|
| 1928 | | - memcpy(on_disk_csum, s->csum, sctx->csum_size); |
|---|
| 1878 | + spage = sblock->pagev[0]; |
|---|
| 1879 | + kaddr = page_address(spage->page); |
|---|
| 1880 | + s = (struct btrfs_super_block *)kaddr; |
|---|
| 1929 | 1881 | |
|---|
| 1930 | | - if (sblock->pagev[0]->logical != btrfs_super_bytenr(s)) |
|---|
| 1882 | + if (spage->logical != btrfs_super_bytenr(s)) |
|---|
| 1931 | 1883 | ++fail_cor; |
|---|
| 1932 | 1884 | |
|---|
| 1933 | | - if (sblock->pagev[0]->generation != btrfs_super_generation(s)) |
|---|
| 1885 | + if (spage->generation != btrfs_super_generation(s)) |
|---|
| 1934 | 1886 | ++fail_gen; |
|---|
| 1935 | 1887 | |
|---|
| 1936 | | - if (!scrub_check_fsid(s->fsid, sblock->pagev[0])) |
|---|
| 1888 | + if (!scrub_check_fsid(s->fsid, spage)) |
|---|
| 1937 | 1889 | ++fail_cor; |
|---|
| 1938 | 1890 | |
|---|
| 1939 | | - len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE; |
|---|
| 1940 | | - mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE; |
|---|
| 1941 | | - p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE; |
|---|
| 1942 | | - index = 0; |
|---|
| 1943 | | - for (;;) { |
|---|
| 1944 | | - u64 l = min_t(u64, len, mapped_size); |
|---|
| 1891 | + shash->tfm = fs_info->csum_shash; |
|---|
| 1892 | + crypto_shash_init(shash); |
|---|
| 1893 | + crypto_shash_digest(shash, kaddr + BTRFS_CSUM_SIZE, |
|---|
| 1894 | + BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, calculated_csum); |
|---|
| 1945 | 1895 | |
|---|
| 1946 | | - crc = btrfs_csum_data(p, crc, l); |
|---|
| 1947 | | - kunmap_atomic(mapped_buffer); |
|---|
| 1948 | | - len -= l; |
|---|
| 1949 | | - if (len == 0) |
|---|
| 1950 | | - break; |
|---|
| 1951 | | - index++; |
|---|
| 1952 | | - BUG_ON(index >= sblock->page_count); |
|---|
| 1953 | | - BUG_ON(!sblock->pagev[index]->page); |
|---|
| 1954 | | - page = sblock->pagev[index]->page; |
|---|
| 1955 | | - mapped_buffer = kmap_atomic(page); |
|---|
| 1956 | | - mapped_size = PAGE_SIZE; |
|---|
| 1957 | | - p = mapped_buffer; |
|---|
| 1958 | | - } |
|---|
| 1959 | | - |
|---|
| 1960 | | - btrfs_csum_final(crc, calculated_csum); |
|---|
| 1961 | | - if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size)) |
|---|
| 1896 | + if (memcmp(calculated_csum, s->csum, sctx->csum_size)) |
|---|
| 1962 | 1897 | ++fail_cor; |
|---|
| 1963 | 1898 | |
|---|
| 1964 | 1899 | if (fail_cor + fail_gen) { |
|---|
| .. | .. |
|---|
| 1971 | 1906 | ++sctx->stat.super_errors; |
|---|
| 1972 | 1907 | spin_unlock(&sctx->stat_lock); |
|---|
| 1973 | 1908 | if (fail_cor) |
|---|
| 1974 | | - btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev, |
|---|
| 1909 | + btrfs_dev_stat_inc_and_print(spage->dev, |
|---|
| 1975 | 1910 | BTRFS_DEV_STAT_CORRUPTION_ERRS); |
|---|
| 1976 | 1911 | else |
|---|
| 1977 | | - btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev, |
|---|
| 1912 | + btrfs_dev_stat_inc_and_print(spage->dev, |
|---|
| 1978 | 1913 | BTRFS_DEV_STAT_GENERATION_ERRS); |
|---|
| 1979 | 1914 | } |
|---|
| 1980 | 1915 | |
|---|
| .. | .. |
|---|
| 2199 | 2134 | raid56_add_scrub_pages(rbio, spage->page, spage->logical); |
|---|
| 2200 | 2135 | } |
|---|
| 2201 | 2136 | |
|---|
| 2202 | | - btrfs_init_work(&sblock->work, btrfs_scrub_helper, |
|---|
| 2203 | | - scrub_missing_raid56_worker, NULL, NULL); |
|---|
| 2137 | + btrfs_init_work(&sblock->work, scrub_missing_raid56_worker, NULL, NULL); |
|---|
| 2204 | 2138 | scrub_block_get(sblock); |
|---|
| 2205 | 2139 | scrub_pending_bio_inc(sctx); |
|---|
| 2206 | 2140 | raid56_submit_missing_rbio(rbio); |
|---|
| .. | .. |
|---|
| 2456 | 2390 | ASSERT(index < UINT_MAX); |
|---|
| 2457 | 2391 | |
|---|
| 2458 | 2392 | num_sectors = sum->len / sctx->fs_info->sectorsize; |
|---|
| 2459 | | - memcpy(csum, sum->sums + index, sctx->csum_size); |
|---|
| 2393 | + memcpy(csum, sum->sums + index * sctx->csum_size, sctx->csum_size); |
|---|
| 2460 | 2394 | if (index == num_sectors - 1) { |
|---|
| 2461 | 2395 | list_del(&sum->list); |
|---|
| 2462 | 2396 | kfree(sum); |
|---|
| .. | .. |
|---|
| 2668 | 2602 | u64 last_offset; |
|---|
| 2669 | 2603 | u32 stripe_index; |
|---|
| 2670 | 2604 | u32 rot; |
|---|
| 2605 | + const int data_stripes = nr_data_stripes(map); |
|---|
| 2671 | 2606 | |
|---|
| 2672 | | - last_offset = (physical - map->stripes[num].physical) * |
|---|
| 2673 | | - nr_data_stripes(map); |
|---|
| 2607 | + last_offset = (physical - map->stripes[num].physical) * data_stripes; |
|---|
| 2674 | 2608 | if (stripe_start) |
|---|
| 2675 | 2609 | *stripe_start = last_offset; |
|---|
| 2676 | 2610 | |
|---|
| 2677 | 2611 | *offset = last_offset; |
|---|
| 2678 | | - for (i = 0; i < nr_data_stripes(map); i++) { |
|---|
| 2612 | + for (i = 0; i < data_stripes; i++) { |
|---|
| 2679 | 2613 | *offset = last_offset + i * map->stripe_len; |
|---|
| 2680 | 2614 | |
|---|
| 2681 | 2615 | stripe_nr = div64_u64(*offset, map->stripe_len); |
|---|
| 2682 | | - stripe_nr = div_u64(stripe_nr, nr_data_stripes(map)); |
|---|
| 2616 | + stripe_nr = div_u64(stripe_nr, data_stripes); |
|---|
| 2683 | 2617 | |
|---|
| 2684 | 2618 | /* Work out the disk rotation on this stripe-set */ |
|---|
| 2685 | 2619 | stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, &rot); |
|---|
| .. | .. |
|---|
| 2738 | 2672 | |
|---|
| 2739 | 2673 | bio_put(bio); |
|---|
| 2740 | 2674 | |
|---|
| 2741 | | - btrfs_init_work(&sparity->work, btrfs_scrubparity_helper, |
|---|
| 2742 | | - scrub_parity_bio_endio_worker, NULL, NULL); |
|---|
| 2675 | + btrfs_init_work(&sparity->work, scrub_parity_bio_endio_worker, NULL, |
|---|
| 2676 | + NULL); |
|---|
| 2743 | 2677 | btrfs_queue_work(fs_info->scrub_parity_workers, &sparity->work); |
|---|
| 2744 | 2678 | } |
|---|
| 2745 | 2679 | |
|---|
| .. | .. |
|---|
| 3041 | 2975 | static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, |
|---|
| 3042 | 2976 | struct map_lookup *map, |
|---|
| 3043 | 2977 | struct btrfs_device *scrub_dev, |
|---|
| 3044 | | - int num, u64 base, u64 length) |
|---|
| 2978 | + int num, u64 base, u64 length, |
|---|
| 2979 | + struct btrfs_block_group *cache) |
|---|
| 3045 | 2980 | { |
|---|
| 3046 | 2981 | struct btrfs_path *path, *ppath; |
|---|
| 3047 | 2982 | struct btrfs_fs_info *fs_info = sctx->fs_info; |
|---|
| .. | .. |
|---|
| 3087 | 3022 | offset = map->stripe_len * (num / map->sub_stripes); |
|---|
| 3088 | 3023 | increment = map->stripe_len * factor; |
|---|
| 3089 | 3024 | mirror_num = num % map->sub_stripes + 1; |
|---|
| 3090 | | - } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { |
|---|
| 3025 | + } else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) { |
|---|
| 3091 | 3026 | increment = map->stripe_len; |
|---|
| 3092 | 3027 | mirror_num = num % map->num_stripes + 1; |
|---|
| 3093 | 3028 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { |
|---|
| .. | .. |
|---|
| 3279 | 3214 | break; |
|---|
| 3280 | 3215 | } |
|---|
| 3281 | 3216 | |
|---|
| 3217 | + /* |
|---|
| 3218 | + * If our block group was removed in the meanwhile, just |
|---|
| 3219 | + * stop scrubbing since there is no point in continuing. |
|---|
| 3220 | + * Continuing would prevent reusing its device extents |
|---|
| 3221 | + * for new block groups for a long time. |
|---|
| 3222 | + */ |
|---|
| 3223 | + spin_lock(&cache->lock); |
|---|
| 3224 | + if (cache->removed) { |
|---|
| 3225 | + spin_unlock(&cache->lock); |
|---|
| 3226 | + ret = 0; |
|---|
| 3227 | + goto out; |
|---|
| 3228 | + } |
|---|
| 3229 | + spin_unlock(&cache->lock); |
|---|
| 3230 | + |
|---|
| 3282 | 3231 | extent = btrfs_item_ptr(l, slot, |
|---|
| 3283 | 3232 | struct btrfs_extent_item); |
|---|
| 3284 | 3233 | flags = btrfs_extent_flags(l, extent); |
|---|
| .. | .. |
|---|
| 3323 | 3272 | &extent_dev, |
|---|
| 3324 | 3273 | &extent_mirror_num); |
|---|
| 3325 | 3274 | |
|---|
| 3326 | | - ret = btrfs_lookup_csums_range(csum_root, |
|---|
| 3327 | | - extent_logical, |
|---|
| 3328 | | - extent_logical + |
|---|
| 3329 | | - extent_len - 1, |
|---|
| 3330 | | - &sctx->csum_list, 1); |
|---|
| 3331 | | - if (ret) |
|---|
| 3332 | | - goto out; |
|---|
| 3275 | + if (flags & BTRFS_EXTENT_FLAG_DATA) { |
|---|
| 3276 | + ret = btrfs_lookup_csums_range(csum_root, |
|---|
| 3277 | + extent_logical, |
|---|
| 3278 | + extent_logical + extent_len - 1, |
|---|
| 3279 | + &sctx->csum_list, 1); |
|---|
| 3280 | + if (ret) |
|---|
| 3281 | + goto out; |
|---|
| 3282 | + } |
|---|
| 3333 | 3283 | |
|---|
| 3334 | 3284 | ret = scrub_extent(sctx, map, extent_logical, extent_len, |
|---|
| 3335 | 3285 | extent_physical, extent_dev, flags, |
|---|
| .. | .. |
|---|
| 3415 | 3365 | struct btrfs_device *scrub_dev, |
|---|
| 3416 | 3366 | u64 chunk_offset, u64 length, |
|---|
| 3417 | 3367 | u64 dev_offset, |
|---|
| 3418 | | - struct btrfs_block_group_cache *cache) |
|---|
| 3368 | + struct btrfs_block_group *cache) |
|---|
| 3419 | 3369 | { |
|---|
| 3420 | 3370 | struct btrfs_fs_info *fs_info = sctx->fs_info; |
|---|
| 3421 | | - struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; |
|---|
| 3371 | + struct extent_map_tree *map_tree = &fs_info->mapping_tree; |
|---|
| 3422 | 3372 | struct map_lookup *map; |
|---|
| 3423 | 3373 | struct extent_map *em; |
|---|
| 3424 | 3374 | int i; |
|---|
| 3425 | 3375 | int ret = 0; |
|---|
| 3426 | 3376 | |
|---|
| 3427 | | - read_lock(&map_tree->map_tree.lock); |
|---|
| 3428 | | - em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); |
|---|
| 3429 | | - read_unlock(&map_tree->map_tree.lock); |
|---|
| 3377 | + read_lock(&map_tree->lock); |
|---|
| 3378 | + em = lookup_extent_mapping(map_tree, chunk_offset, 1); |
|---|
| 3379 | + read_unlock(&map_tree->lock); |
|---|
| 3430 | 3380 | |
|---|
| 3431 | 3381 | if (!em) { |
|---|
| 3432 | 3382 | /* |
|---|
| .. | .. |
|---|
| 3452 | 3402 | if (map->stripes[i].dev->bdev == scrub_dev->bdev && |
|---|
| 3453 | 3403 | map->stripes[i].physical == dev_offset) { |
|---|
| 3454 | 3404 | ret = scrub_stripe(sctx, map, scrub_dev, i, |
|---|
| 3455 | | - chunk_offset, length); |
|---|
| 3405 | + chunk_offset, length, cache); |
|---|
| 3456 | 3406 | if (ret) |
|---|
| 3457 | 3407 | goto out; |
|---|
| 3458 | 3408 | } |
|---|
| .. | .. |
|---|
| 3479 | 3429 | struct extent_buffer *l; |
|---|
| 3480 | 3430 | struct btrfs_key key; |
|---|
| 3481 | 3431 | struct btrfs_key found_key; |
|---|
| 3482 | | - struct btrfs_block_group_cache *cache; |
|---|
| 3432 | + struct btrfs_block_group *cache; |
|---|
| 3483 | 3433 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; |
|---|
| 3484 | 3434 | |
|---|
| 3485 | 3435 | path = btrfs_alloc_path(); |
|---|
| .. | .. |
|---|
| 3550 | 3500 | goto skip; |
|---|
| 3551 | 3501 | |
|---|
| 3552 | 3502 | /* |
|---|
| 3503 | + * Make sure that while we are scrubbing the corresponding block |
|---|
| 3504 | + * group doesn't get its logical address and its device extents |
|---|
| 3505 | + * reused for another block group, which can possibly be of a |
|---|
| 3506 | + * different type and different profile. We do this to prevent |
|---|
| 3507 | + * false error detections and crashes due to bogus attempts to |
|---|
| 3508 | + * repair extents. |
|---|
| 3509 | + */ |
|---|
| 3510 | + spin_lock(&cache->lock); |
|---|
| 3511 | + if (cache->removed) { |
|---|
| 3512 | + spin_unlock(&cache->lock); |
|---|
| 3513 | + btrfs_put_block_group(cache); |
|---|
| 3514 | + goto skip; |
|---|
| 3515 | + } |
|---|
| 3516 | + btrfs_freeze_block_group(cache); |
|---|
| 3517 | + spin_unlock(&cache->lock); |
|---|
| 3518 | + |
|---|
| 3519 | + /* |
|---|
| 3553 | 3520 | * we need call btrfs_inc_block_group_ro() with scrubs_paused, |
|---|
| 3554 | 3521 | * to avoid deadlock caused by: |
|---|
| 3555 | 3522 | * btrfs_inc_block_group_ro() |
|---|
| .. | .. |
|---|
| 3558 | 3525 | * -> btrfs_scrub_pause() |
|---|
| 3559 | 3526 | */ |
|---|
| 3560 | 3527 | scrub_pause_on(fs_info); |
|---|
| 3561 | | - ret = btrfs_inc_block_group_ro(cache); |
|---|
| 3562 | | - if (!ret && sctx->is_dev_replace) { |
|---|
| 3563 | | - /* |
|---|
| 3564 | | - * If we are doing a device replace wait for any tasks |
|---|
| 3565 | | - * that started dellaloc right before we set the block |
|---|
| 3566 | | - * group to RO mode, as they might have just allocated |
|---|
| 3567 | | - * an extent from it or decided they could do a nocow |
|---|
| 3568 | | - * write. And if any such tasks did that, wait for their |
|---|
| 3569 | | - * ordered extents to complete and then commit the |
|---|
| 3570 | | - * current transaction, so that we can later see the new |
|---|
| 3571 | | - * extent items in the extent tree - the ordered extents |
|---|
| 3572 | | - * create delayed data references (for cow writes) when |
|---|
| 3573 | | - * they complete, which will be run and insert the |
|---|
| 3574 | | - * corresponding extent items into the extent tree when |
|---|
| 3575 | | - * we commit the transaction they used when running |
|---|
| 3576 | | - * inode.c:btrfs_finish_ordered_io(). We later use |
|---|
| 3577 | | - * the commit root of the extent tree to find extents |
|---|
| 3578 | | - * to copy from the srcdev into the tgtdev, and we don't |
|---|
| 3579 | | - * want to miss any new extents. |
|---|
| 3580 | | - */ |
|---|
| 3581 | | - btrfs_wait_block_group_reservations(cache); |
|---|
| 3582 | | - btrfs_wait_nocow_writers(cache); |
|---|
| 3583 | | - ret = btrfs_wait_ordered_roots(fs_info, U64_MAX, |
|---|
| 3584 | | - cache->key.objectid, |
|---|
| 3585 | | - cache->key.offset); |
|---|
| 3586 | | - if (ret > 0) { |
|---|
| 3587 | | - struct btrfs_trans_handle *trans; |
|---|
| 3588 | 3528 | |
|---|
| 3589 | | - trans = btrfs_join_transaction(root); |
|---|
| 3590 | | - if (IS_ERR(trans)) |
|---|
| 3591 | | - ret = PTR_ERR(trans); |
|---|
| 3592 | | - else |
|---|
| 3593 | | - ret = btrfs_commit_transaction(trans); |
|---|
| 3594 | | - if (ret) { |
|---|
| 3595 | | - scrub_pause_off(fs_info); |
|---|
| 3596 | | - btrfs_put_block_group(cache); |
|---|
| 3597 | | - break; |
|---|
| 3598 | | - } |
|---|
| 3599 | | - } |
|---|
| 3600 | | - } |
|---|
| 3601 | | - scrub_pause_off(fs_info); |
|---|
| 3602 | | - |
|---|
| 3529 | + /* |
|---|
| 3530 | + * Don't do chunk preallocation for scrub. |
|---|
| 3531 | + * |
|---|
| 3532 | + * This is especially important for SYSTEM bgs, or we can hit |
|---|
| 3533 | + * -EFBIG from btrfs_finish_chunk_alloc() like: |
|---|
| 3534 | + * 1. The only SYSTEM bg is marked RO. |
|---|
| 3535 | + * Since SYSTEM bg is small, that's pretty common. |
|---|
| 3536 | + * 2. New SYSTEM bg will be allocated |
|---|
| 3537 | + * Due to regular version will allocate new chunk. |
|---|
| 3538 | + * 3. New SYSTEM bg is empty and will get cleaned up |
|---|
| 3539 | + * Before cleanup really happens, it's marked RO again. |
|---|
| 3540 | + * 4. Empty SYSTEM bg get scrubbed |
|---|
| 3541 | + * We go back to 2. |
|---|
| 3542 | + * |
|---|
| 3543 | + * This can easily boost the amount of SYSTEM chunks if cleaner |
|---|
| 3544 | + * thread can't be triggered fast enough, and use up all space |
|---|
| 3545 | + * of btrfs_super_block::sys_chunk_array |
|---|
| 3546 | + * |
|---|
| 3547 | + * While for dev replace, we need to try our best to mark block |
|---|
| 3548 | + * group RO, to prevent race between: |
|---|
| 3549 | + * - Write duplication |
|---|
| 3550 | + * Contains latest data |
|---|
| 3551 | + * - Scrub copy |
|---|
| 3552 | + * Contains data from commit tree |
|---|
| 3553 | + * |
|---|
| 3554 | + * If target block group is not marked RO, nocow writes can |
|---|
| 3555 | + * be overwritten by scrub copy, causing data corruption. |
|---|
| 3556 | + * So for dev-replace, it's not allowed to continue if a block |
|---|
| 3557 | + * group is not RO. |
|---|
| 3558 | + */ |
|---|
| 3559 | + ret = btrfs_inc_block_group_ro(cache, sctx->is_dev_replace); |
|---|
| 3603 | 3560 | if (ret == 0) { |
|---|
| 3604 | 3561 | ro_set = 1; |
|---|
| 3605 | | - } else if (ret == -ENOSPC) { |
|---|
| 3562 | + } else if (ret == -ENOSPC && !sctx->is_dev_replace) { |
|---|
| 3606 | 3563 | /* |
|---|
| 3607 | 3564 | * btrfs_inc_block_group_ro return -ENOSPC when it |
|---|
| 3608 | 3565 | * failed in creating new chunk for metadata. |
|---|
| 3609 | | - * It is not a problem for scrub/replace, because |
|---|
| 3566 | + * It is not a problem for scrub, because |
|---|
| 3610 | 3567 | * metadata are always cowed, and our scrub paused |
|---|
| 3611 | 3568 | * commit_transactions. |
|---|
| 3612 | 3569 | */ |
|---|
| 3613 | 3570 | ro_set = 0; |
|---|
| 3571 | + } else if (ret == -ETXTBSY) { |
|---|
| 3572 | + btrfs_warn(fs_info, |
|---|
| 3573 | + "skipping scrub of block group %llu due to active swapfile", |
|---|
| 3574 | + cache->start); |
|---|
| 3575 | + scrub_pause_off(fs_info); |
|---|
| 3576 | + ret = 0; |
|---|
| 3577 | + goto skip_unfreeze; |
|---|
| 3614 | 3578 | } else { |
|---|
| 3615 | 3579 | btrfs_warn(fs_info, |
|---|
| 3616 | 3580 | "failed setting block group ro: %d", ret); |
|---|
| 3581 | + btrfs_unfreeze_block_group(cache); |
|---|
| 3617 | 3582 | btrfs_put_block_group(cache); |
|---|
| 3583 | + scrub_pause_off(fs_info); |
|---|
| 3618 | 3584 | break; |
|---|
| 3619 | 3585 | } |
|---|
| 3620 | 3586 | |
|---|
| 3621 | | - btrfs_dev_replace_write_lock(&fs_info->dev_replace); |
|---|
| 3587 | + /* |
|---|
| 3588 | + * Now the target block is marked RO, wait for nocow writes to |
|---|
| 3589 | + * finish before dev-replace. |
|---|
| 3590 | + * COW is fine, as COW never overwrites extents in commit tree. |
|---|
| 3591 | + */ |
|---|
| 3592 | + if (sctx->is_dev_replace) { |
|---|
| 3593 | + btrfs_wait_nocow_writers(cache); |
|---|
| 3594 | + btrfs_wait_ordered_roots(fs_info, U64_MAX, cache->start, |
|---|
| 3595 | + cache->length); |
|---|
| 3596 | + } |
|---|
| 3597 | + |
|---|
| 3598 | + scrub_pause_off(fs_info); |
|---|
| 3599 | + down_write(&dev_replace->rwsem); |
|---|
| 3622 | 3600 | dev_replace->cursor_right = found_key.offset + length; |
|---|
| 3623 | 3601 | dev_replace->cursor_left = found_key.offset; |
|---|
| 3624 | 3602 | dev_replace->item_needs_writeback = 1; |
|---|
| 3625 | | - btrfs_dev_replace_write_unlock(&fs_info->dev_replace); |
|---|
| 3603 | + up_write(&dev_replace->rwsem); |
|---|
| 3604 | + |
|---|
| 3626 | 3605 | ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length, |
|---|
| 3627 | 3606 | found_key.offset, cache); |
|---|
| 3628 | 3607 | |
|---|
| .. | .. |
|---|
| 3658 | 3637 | |
|---|
| 3659 | 3638 | scrub_pause_off(fs_info); |
|---|
| 3660 | 3639 | |
|---|
| 3661 | | - btrfs_dev_replace_write_lock(&fs_info->dev_replace); |
|---|
| 3640 | + down_write(&dev_replace->rwsem); |
|---|
| 3662 | 3641 | dev_replace->cursor_left = dev_replace->cursor_right; |
|---|
| 3663 | 3642 | dev_replace->item_needs_writeback = 1; |
|---|
| 3664 | | - btrfs_dev_replace_write_unlock(&fs_info->dev_replace); |
|---|
| 3643 | + up_write(&dev_replace->rwsem); |
|---|
| 3665 | 3644 | |
|---|
| 3666 | 3645 | if (ro_set) |
|---|
| 3667 | 3646 | btrfs_dec_block_group_ro(cache); |
|---|
| .. | .. |
|---|
| 3675 | 3654 | */ |
|---|
| 3676 | 3655 | spin_lock(&cache->lock); |
|---|
| 3677 | 3656 | if (!cache->removed && !cache->ro && cache->reserved == 0 && |
|---|
| 3678 | | - btrfs_block_group_used(&cache->item) == 0) { |
|---|
| 3657 | + cache->used == 0) { |
|---|
| 3679 | 3658 | spin_unlock(&cache->lock); |
|---|
| 3680 | | - btrfs_mark_bg_unused(cache); |
|---|
| 3659 | + if (btrfs_test_opt(fs_info, DISCARD_ASYNC)) |
|---|
| 3660 | + btrfs_discard_queue_work(&fs_info->discard_ctl, |
|---|
| 3661 | + cache); |
|---|
| 3662 | + else |
|---|
| 3663 | + btrfs_mark_bg_unused(cache); |
|---|
| 3681 | 3664 | } else { |
|---|
| 3682 | 3665 | spin_unlock(&cache->lock); |
|---|
| 3683 | 3666 | } |
|---|
| 3684 | | - |
|---|
| 3667 | +skip_unfreeze: |
|---|
| 3668 | + btrfs_unfreeze_block_group(cache); |
|---|
| 3685 | 3669 | btrfs_put_block_group(cache); |
|---|
| 3686 | 3670 | if (ret) |
|---|
| 3687 | 3671 | break; |
|---|
| .. | .. |
|---|
| 3714 | 3698 | struct btrfs_fs_info *fs_info = sctx->fs_info; |
|---|
| 3715 | 3699 | |
|---|
| 3716 | 3700 | if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) |
|---|
| 3717 | | - return -EIO; |
|---|
| 3701 | + return -EROFS; |
|---|
| 3718 | 3702 | |
|---|
| 3719 | 3703 | /* Seed devices of a new filesystem has their own generation. */ |
|---|
| 3720 | 3704 | if (scrub_dev->fs_devices != fs_info->fs_devices) |
|---|
| .. | .. |
|---|
| 3739 | 3723 | return 0; |
|---|
| 3740 | 3724 | } |
|---|
| 3741 | 3725 | |
|---|
| 3726 | +static void scrub_workers_put(struct btrfs_fs_info *fs_info) |
|---|
| 3727 | +{ |
|---|
| 3728 | + if (refcount_dec_and_mutex_lock(&fs_info->scrub_workers_refcnt, |
|---|
| 3729 | + &fs_info->scrub_lock)) { |
|---|
| 3730 | + struct btrfs_workqueue *scrub_workers = NULL; |
|---|
| 3731 | + struct btrfs_workqueue *scrub_wr_comp = NULL; |
|---|
| 3732 | + struct btrfs_workqueue *scrub_parity = NULL; |
|---|
| 3733 | + |
|---|
| 3734 | + scrub_workers = fs_info->scrub_workers; |
|---|
| 3735 | + scrub_wr_comp = fs_info->scrub_wr_completion_workers; |
|---|
| 3736 | + scrub_parity = fs_info->scrub_parity_workers; |
|---|
| 3737 | + |
|---|
| 3738 | + fs_info->scrub_workers = NULL; |
|---|
| 3739 | + fs_info->scrub_wr_completion_workers = NULL; |
|---|
| 3740 | + fs_info->scrub_parity_workers = NULL; |
|---|
| 3741 | + mutex_unlock(&fs_info->scrub_lock); |
|---|
| 3742 | + |
|---|
| 3743 | + btrfs_destroy_workqueue(scrub_workers); |
|---|
| 3744 | + btrfs_destroy_workqueue(scrub_wr_comp); |
|---|
| 3745 | + btrfs_destroy_workqueue(scrub_parity); |
|---|
| 3746 | + } |
|---|
| 3747 | +} |
|---|
| 3748 | + |
|---|
| 3742 | 3749 | /* |
|---|
| 3743 | 3750 | * get a reference count on fs_info->scrub_workers. start worker if necessary |
|---|
| 3744 | 3751 | */ |
|---|
| 3745 | 3752 | static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, |
|---|
| 3746 | 3753 | int is_dev_replace) |
|---|
| 3747 | 3754 | { |
|---|
| 3755 | + struct btrfs_workqueue *scrub_workers = NULL; |
|---|
| 3756 | + struct btrfs_workqueue *scrub_wr_comp = NULL; |
|---|
| 3757 | + struct btrfs_workqueue *scrub_parity = NULL; |
|---|
| 3748 | 3758 | unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND; |
|---|
| 3749 | 3759 | int max_active = fs_info->thread_pool_size; |
|---|
| 3760 | + int ret = -ENOMEM; |
|---|
| 3750 | 3761 | |
|---|
| 3751 | | - if (fs_info->scrub_workers_refcnt == 0) { |
|---|
| 3752 | | - fs_info->scrub_workers = btrfs_alloc_workqueue(fs_info, "scrub", |
|---|
| 3753 | | - flags, is_dev_replace ? 1 : max_active, 4); |
|---|
| 3754 | | - if (!fs_info->scrub_workers) |
|---|
| 3755 | | - goto fail_scrub_workers; |
|---|
| 3762 | + if (refcount_inc_not_zero(&fs_info->scrub_workers_refcnt)) |
|---|
| 3763 | + return 0; |
|---|
| 3756 | 3764 | |
|---|
| 3757 | | - fs_info->scrub_wr_completion_workers = |
|---|
| 3758 | | - btrfs_alloc_workqueue(fs_info, "scrubwrc", flags, |
|---|
| 3765 | + scrub_workers = btrfs_alloc_workqueue(fs_info, "scrub", flags, |
|---|
| 3766 | + is_dev_replace ? 1 : max_active, 4); |
|---|
| 3767 | + if (!scrub_workers) |
|---|
| 3768 | + goto fail_scrub_workers; |
|---|
| 3769 | + |
|---|
| 3770 | + scrub_wr_comp = btrfs_alloc_workqueue(fs_info, "scrubwrc", flags, |
|---|
| 3759 | 3771 | max_active, 2); |
|---|
| 3760 | | - if (!fs_info->scrub_wr_completion_workers) |
|---|
| 3761 | | - goto fail_scrub_wr_completion_workers; |
|---|
| 3772 | + if (!scrub_wr_comp) |
|---|
| 3773 | + goto fail_scrub_wr_completion_workers; |
|---|
| 3762 | 3774 | |
|---|
| 3763 | | - fs_info->scrub_parity_workers = |
|---|
| 3764 | | - btrfs_alloc_workqueue(fs_info, "scrubparity", flags, |
|---|
| 3765 | | - max_active, 2); |
|---|
| 3766 | | - if (!fs_info->scrub_parity_workers) |
|---|
| 3767 | | - goto fail_scrub_parity_workers; |
|---|
| 3775 | + scrub_parity = btrfs_alloc_workqueue(fs_info, "scrubparity", flags, |
|---|
| 3776 | + max_active, 2); |
|---|
| 3777 | + if (!scrub_parity) |
|---|
| 3778 | + goto fail_scrub_parity_workers; |
|---|
| 3779 | + |
|---|
| 3780 | + mutex_lock(&fs_info->scrub_lock); |
|---|
| 3781 | + if (refcount_read(&fs_info->scrub_workers_refcnt) == 0) { |
|---|
| 3782 | + ASSERT(fs_info->scrub_workers == NULL && |
|---|
| 3783 | + fs_info->scrub_wr_completion_workers == NULL && |
|---|
| 3784 | + fs_info->scrub_parity_workers == NULL); |
|---|
| 3785 | + fs_info->scrub_workers = scrub_workers; |
|---|
| 3786 | + fs_info->scrub_wr_completion_workers = scrub_wr_comp; |
|---|
| 3787 | + fs_info->scrub_parity_workers = scrub_parity; |
|---|
| 3788 | + refcount_set(&fs_info->scrub_workers_refcnt, 1); |
|---|
| 3789 | + mutex_unlock(&fs_info->scrub_lock); |
|---|
| 3790 | + return 0; |
|---|
| 3768 | 3791 | } |
|---|
| 3769 | | - ++fs_info->scrub_workers_refcnt; |
|---|
| 3770 | | - return 0; |
|---|
| 3792 | + /* Other thread raced in and created the workers for us */ |
|---|
| 3793 | + refcount_inc(&fs_info->scrub_workers_refcnt); |
|---|
| 3794 | + mutex_unlock(&fs_info->scrub_lock); |
|---|
| 3771 | 3795 | |
|---|
| 3796 | + ret = 0; |
|---|
| 3797 | + btrfs_destroy_workqueue(scrub_parity); |
|---|
| 3772 | 3798 | fail_scrub_parity_workers: |
|---|
| 3773 | | - btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers); |
|---|
| 3799 | + btrfs_destroy_workqueue(scrub_wr_comp); |
|---|
| 3774 | 3800 | fail_scrub_wr_completion_workers: |
|---|
| 3775 | | - btrfs_destroy_workqueue(fs_info->scrub_workers); |
|---|
| 3801 | + btrfs_destroy_workqueue(scrub_workers); |
|---|
| 3776 | 3802 | fail_scrub_workers: |
|---|
| 3777 | | - return -ENOMEM; |
|---|
| 3803 | + return ret; |
|---|
| 3778 | 3804 | } |
|---|
| 3779 | 3805 | |
|---|
| 3780 | 3806 | int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, |
|---|
| .. | .. |
|---|
| 3785 | 3811 | int ret; |
|---|
| 3786 | 3812 | struct btrfs_device *dev; |
|---|
| 3787 | 3813 | unsigned int nofs_flag; |
|---|
| 3788 | | - struct btrfs_workqueue *scrub_workers = NULL; |
|---|
| 3789 | | - struct btrfs_workqueue *scrub_wr_comp = NULL; |
|---|
| 3790 | | - struct btrfs_workqueue *scrub_parity = NULL; |
|---|
| 3814 | + bool need_commit = false; |
|---|
| 3791 | 3815 | |
|---|
| 3792 | 3816 | if (btrfs_fs_closing(fs_info)) |
|---|
| 3793 | | - return -EINVAL; |
|---|
| 3817 | + return -EAGAIN; |
|---|
| 3794 | 3818 | |
|---|
| 3795 | 3819 | if (fs_info->nodesize > BTRFS_STRIPE_LEN) { |
|---|
| 3796 | 3820 | /* |
|---|
| .. | .. |
|---|
| 3834 | 3858 | if (IS_ERR(sctx)) |
|---|
| 3835 | 3859 | return PTR_ERR(sctx); |
|---|
| 3836 | 3860 | |
|---|
| 3861 | + ret = scrub_workers_get(fs_info, is_dev_replace); |
|---|
| 3862 | + if (ret) |
|---|
| 3863 | + goto out_free_ctx; |
|---|
| 3864 | + |
|---|
| 3837 | 3865 | mutex_lock(&fs_info->fs_devices->device_list_mutex); |
|---|
| 3838 | 3866 | dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true); |
|---|
| 3839 | 3867 | if (!dev || (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) && |
|---|
| 3840 | 3868 | !is_dev_replace)) { |
|---|
| 3841 | 3869 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); |
|---|
| 3842 | 3870 | ret = -ENODEV; |
|---|
| 3843 | | - goto out_free_ctx; |
|---|
| 3871 | + goto out; |
|---|
| 3844 | 3872 | } |
|---|
| 3845 | 3873 | |
|---|
| 3846 | 3874 | if (!is_dev_replace && !readonly && |
|---|
| 3847 | 3875 | !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) { |
|---|
| 3848 | 3876 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); |
|---|
| 3849 | | - btrfs_err_in_rcu(fs_info, "scrub: device %s is not writable", |
|---|
| 3850 | | - rcu_str_deref(dev->name)); |
|---|
| 3877 | + btrfs_err_in_rcu(fs_info, |
|---|
| 3878 | + "scrub on devid %llu: filesystem on %s is not writable", |
|---|
| 3879 | + devid, rcu_str_deref(dev->name)); |
|---|
| 3851 | 3880 | ret = -EROFS; |
|---|
| 3852 | | - goto out_free_ctx; |
|---|
| 3881 | + goto out; |
|---|
| 3853 | 3882 | } |
|---|
| 3854 | 3883 | |
|---|
| 3855 | 3884 | mutex_lock(&fs_info->scrub_lock); |
|---|
| .. | .. |
|---|
| 3858 | 3887 | mutex_unlock(&fs_info->scrub_lock); |
|---|
| 3859 | 3888 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); |
|---|
| 3860 | 3889 | ret = -EIO; |
|---|
| 3861 | | - goto out_free_ctx; |
|---|
| 3890 | + goto out; |
|---|
| 3862 | 3891 | } |
|---|
| 3863 | 3892 | |
|---|
| 3864 | | - btrfs_dev_replace_read_lock(&fs_info->dev_replace); |
|---|
| 3893 | + down_read(&fs_info->dev_replace.rwsem); |
|---|
| 3865 | 3894 | if (dev->scrub_ctx || |
|---|
| 3866 | 3895 | (!is_dev_replace && |
|---|
| 3867 | 3896 | btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) { |
|---|
| 3868 | | - btrfs_dev_replace_read_unlock(&fs_info->dev_replace); |
|---|
| 3897 | + up_read(&fs_info->dev_replace.rwsem); |
|---|
| 3869 | 3898 | mutex_unlock(&fs_info->scrub_lock); |
|---|
| 3870 | 3899 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); |
|---|
| 3871 | 3900 | ret = -EINPROGRESS; |
|---|
| 3872 | | - goto out_free_ctx; |
|---|
| 3901 | + goto out; |
|---|
| 3873 | 3902 | } |
|---|
| 3874 | | - btrfs_dev_replace_read_unlock(&fs_info->dev_replace); |
|---|
| 3875 | | - |
|---|
| 3876 | | - ret = scrub_workers_get(fs_info, is_dev_replace); |
|---|
| 3877 | | - if (ret) { |
|---|
| 3878 | | - mutex_unlock(&fs_info->scrub_lock); |
|---|
| 3879 | | - mutex_unlock(&fs_info->fs_devices->device_list_mutex); |
|---|
| 3880 | | - goto out_free_ctx; |
|---|
| 3881 | | - } |
|---|
| 3903 | + up_read(&fs_info->dev_replace.rwsem); |
|---|
| 3882 | 3904 | |
|---|
| 3883 | 3905 | sctx->readonly = readonly; |
|---|
| 3884 | 3906 | dev->scrub_ctx = sctx; |
|---|
| .. | .. |
|---|
| 3903 | 3925 | */ |
|---|
| 3904 | 3926 | nofs_flag = memalloc_nofs_save(); |
|---|
| 3905 | 3927 | if (!is_dev_replace) { |
|---|
| 3928 | + u64 old_super_errors; |
|---|
| 3929 | + |
|---|
| 3930 | + spin_lock(&sctx->stat_lock); |
|---|
| 3931 | + old_super_errors = sctx->stat.super_errors; |
|---|
| 3932 | + spin_unlock(&sctx->stat_lock); |
|---|
| 3933 | + |
|---|
| 3934 | + btrfs_info(fs_info, "scrub: started on devid %llu", devid); |
|---|
| 3906 | 3935 | /* |
|---|
| 3907 | 3936 | * by holding device list mutex, we can |
|---|
| 3908 | 3937 | * kick off writing super in log tree sync. |
|---|
| .. | .. |
|---|
| 3910 | 3939 | mutex_lock(&fs_info->fs_devices->device_list_mutex); |
|---|
| 3911 | 3940 | ret = scrub_supers(sctx, dev); |
|---|
| 3912 | 3941 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); |
|---|
| 3942 | + |
|---|
| 3943 | + spin_lock(&sctx->stat_lock); |
|---|
| 3944 | + /* |
|---|
| 3945 | + * Super block errors found, but we can not commit transaction |
|---|
| 3946 | + * at current context, since btrfs_commit_transaction() needs |
|---|
| 3947 | + * to pause the current running scrub (hold by ourselves). |
|---|
| 3948 | + */ |
|---|
| 3949 | + if (sctx->stat.super_errors > old_super_errors && !sctx->readonly) |
|---|
| 3950 | + need_commit = true; |
|---|
| 3951 | + spin_unlock(&sctx->stat_lock); |
|---|
| 3913 | 3952 | } |
|---|
| 3914 | 3953 | |
|---|
| 3915 | 3954 | if (!ret) |
|---|
| .. | .. |
|---|
| 3925 | 3964 | if (progress) |
|---|
| 3926 | 3965 | memcpy(progress, &sctx->stat, sizeof(*progress)); |
|---|
| 3927 | 3966 | |
|---|
| 3967 | + if (!is_dev_replace) |
|---|
| 3968 | + btrfs_info(fs_info, "scrub: %s on devid %llu with status: %d", |
|---|
| 3969 | + ret ? "not finished" : "finished", devid, ret); |
|---|
| 3970 | + |
|---|
| 3928 | 3971 | mutex_lock(&fs_info->scrub_lock); |
|---|
| 3929 | 3972 | dev->scrub_ctx = NULL; |
|---|
| 3930 | | - if (--fs_info->scrub_workers_refcnt == 0) { |
|---|
| 3931 | | - scrub_workers = fs_info->scrub_workers; |
|---|
| 3932 | | - scrub_wr_comp = fs_info->scrub_wr_completion_workers; |
|---|
| 3933 | | - scrub_parity = fs_info->scrub_parity_workers; |
|---|
| 3934 | | - } |
|---|
| 3935 | 3973 | mutex_unlock(&fs_info->scrub_lock); |
|---|
| 3936 | 3974 | |
|---|
| 3937 | | - btrfs_destroy_workqueue(scrub_workers); |
|---|
| 3938 | | - btrfs_destroy_workqueue(scrub_wr_comp); |
|---|
| 3939 | | - btrfs_destroy_workqueue(scrub_parity); |
|---|
| 3975 | + scrub_workers_put(fs_info); |
|---|
| 3940 | 3976 | scrub_put_ctx(sctx); |
|---|
| 3941 | 3977 | |
|---|
| 3942 | | - return ret; |
|---|
| 3978 | + /* |
|---|
| 3979 | + * We found some super block errors before, now try to force a |
|---|
| 3980 | + * transaction commit, as scrub has finished. |
|---|
| 3981 | + */ |
|---|
| 3982 | + if (need_commit) { |
|---|
| 3983 | + struct btrfs_trans_handle *trans; |
|---|
| 3943 | 3984 | |
|---|
| 3985 | + trans = btrfs_start_transaction(fs_info->tree_root, 0); |
|---|
| 3986 | + if (IS_ERR(trans)) { |
|---|
| 3987 | + ret = PTR_ERR(trans); |
|---|
| 3988 | + btrfs_err(fs_info, |
|---|
| 3989 | + "scrub: failed to start transaction to fix super block errors: %d", ret); |
|---|
| 3990 | + return ret; |
|---|
| 3991 | + } |
|---|
| 3992 | + ret = btrfs_commit_transaction(trans); |
|---|
| 3993 | + if (ret < 0) |
|---|
| 3994 | + btrfs_err(fs_info, |
|---|
| 3995 | + "scrub: failed to commit transaction to fix super block errors: %d", ret); |
|---|
| 3996 | + } |
|---|
| 3997 | + return ret; |
|---|
| 3998 | +out: |
|---|
| 3999 | + scrub_workers_put(fs_info); |
|---|
| 3944 | 4000 | out_free_ctx: |
|---|
| 3945 | 4001 | scrub_free_ctx(sctx); |
|---|
| 3946 | 4002 | |
|---|
| .. | .. |
|---|
| 3989 | 4045 | return 0; |
|---|
| 3990 | 4046 | } |
|---|
| 3991 | 4047 | |
|---|
| 3992 | | -int btrfs_scrub_cancel_dev(struct btrfs_fs_info *fs_info, |
|---|
| 3993 | | - struct btrfs_device *dev) |
|---|
| 4048 | +int btrfs_scrub_cancel_dev(struct btrfs_device *dev) |
|---|
| 3994 | 4049 | { |
|---|
| 4050 | + struct btrfs_fs_info *fs_info = dev->fs_info; |
|---|
| 3995 | 4051 | struct scrub_ctx *sctx; |
|---|
| 3996 | 4052 | |
|---|
| 3997 | 4053 | mutex_lock(&fs_info->scrub_lock); |
|---|