.. | .. |
---|
16 | 16 | #include <linux/slab.h> |
---|
17 | 17 | #include <linux/nospec.h> |
---|
18 | 18 | #include <linux/backing-dev.h> |
---|
| 19 | +#include <linux/freezer.h> |
---|
19 | 20 | #include <trace/events/ext4.h> |
---|
20 | | - |
---|
21 | | -#ifdef CONFIG_EXT4_DEBUG |
---|
22 | | -ushort ext4_mballoc_debug __read_mostly; |
---|
23 | | - |
---|
24 | | -module_param_named(mballoc_debug, ext4_mballoc_debug, ushort, 0644); |
---|
25 | | -MODULE_PARM_DESC(mballoc_debug, "Debugging level for ext4's mballoc"); |
---|
26 | | -#endif |
---|
27 | 21 | |
---|
28 | 22 | /* |
---|
29 | 23 | * MUSTDO: |
---|
.. | .. |
---|
131 | 125 | * /sys/fs/ext4/<partition>/mb_group_prealloc. The value is represented in |
---|
132 | 126 | * terms of number of blocks. If we have mounted the file system with -O |
---|
133 | 127 | * stripe=<value> option the group prealloc request is normalized to the |
---|
134 | | - * the smallest multiple of the stripe value (sbi->s_stripe) which is |
---|
| 128 | + * smallest multiple of the stripe value (sbi->s_stripe) which is |
---|
135 | 129 | * greater than the default mb_group_prealloc. |
---|
136 | 130 | * |
---|
137 | 131 | * The regular allocator (using the buddy cache) supports a few tunables. |
---|
.. | .. |
---|
356 | 350 | ext4_group_t group); |
---|
357 | 351 | static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, |
---|
358 | 352 | ext4_group_t group); |
---|
| 353 | +static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac); |
---|
| 354 | + |
---|
| 355 | +/* |
---|
| 356 | + * The algorithm using this percpu seq counter goes below: |
---|
| 357 | + * 1. We sample the percpu discard_pa_seq counter before trying for block |
---|
| 358 | + * allocation in ext4_mb_new_blocks(). |
---|
| 359 | + * 2. We increment this percpu discard_pa_seq counter when we either allocate |
---|
| 360 | + * or free these blocks i.e. while marking those blocks as used/free in |
---|
| 361 | + * mb_mark_used()/mb_free_blocks(). |
---|
| 362 | + * 3. We also increment this percpu seq counter when we successfully identify |
---|
| 363 | + * that the bb_prealloc_list is not empty and hence proceed for discarding |
---|
| 364 | + * of those PAs inside ext4_mb_discard_group_preallocations(). |
---|
| 365 | + * |
---|
| 366 | + * Now to make sure that the regular fast path of block allocation is not |
---|
| 367 | + * affected, as a small optimization we only sample the percpu seq counter |
---|
| 368 | + * on that cpu. Only when the block allocation fails and when freed blocks |
---|
| 369 | + * found were 0, that is when we sample percpu seq counter for all cpus using |
---|
| 370 | + * below function ext4_get_discard_pa_seq_sum(). This happens after making |
---|
| 371 | + * sure that all the PAs on grp->bb_prealloc_list got freed or if it's empty. |
---|
| 372 | + */ |
---|
| 373 | +static DEFINE_PER_CPU(u64, discard_pa_seq); |
---|
| 374 | +static inline u64 ext4_get_discard_pa_seq_sum(void) |
---|
| 375 | +{ |
---|
| 376 | + int __cpu; |
---|
| 377 | + u64 __seq = 0; |
---|
| 378 | + |
---|
| 379 | + for_each_possible_cpu(__cpu) |
---|
| 380 | + __seq += per_cpu(discard_pa_seq, __cpu); |
---|
| 381 | + return __seq; |
---|
| 382 | +} |
---|
359 | 383 | |
---|
360 | 384 | static inline void *mb_correct_addr_and_bit(int *bit, void *addr) |
---|
361 | 385 | { |
---|
.. | .. |
---|
493 | 517 | |
---|
494 | 518 | static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap) |
---|
495 | 519 | { |
---|
| 520 | + if (unlikely(e4b->bd_info->bb_bitmap == NULL)) |
---|
| 521 | + return; |
---|
496 | 522 | if (memcmp(e4b->bd_info->bb_bitmap, bitmap, e4b->bd_sb->s_blocksize)) { |
---|
497 | 523 | unsigned char *b1, *b2; |
---|
498 | 524 | int i; |
---|
.. | .. |
---|
511 | 537 | } |
---|
512 | 538 | } |
---|
513 | 539 | |
---|
| 540 | +static void mb_group_bb_bitmap_alloc(struct super_block *sb, |
---|
| 541 | + struct ext4_group_info *grp, ext4_group_t group) |
---|
| 542 | +{ |
---|
| 543 | + struct buffer_head *bh; |
---|
| 544 | + |
---|
| 545 | + grp->bb_bitmap = kmalloc(sb->s_blocksize, GFP_NOFS); |
---|
| 546 | + if (!grp->bb_bitmap) |
---|
| 547 | + return; |
---|
| 548 | + |
---|
| 549 | + bh = ext4_read_block_bitmap(sb, group); |
---|
| 550 | + if (IS_ERR_OR_NULL(bh)) { |
---|
| 551 | + kfree(grp->bb_bitmap); |
---|
| 552 | + grp->bb_bitmap = NULL; |
---|
| 553 | + return; |
---|
| 554 | + } |
---|
| 555 | + |
---|
| 556 | + memcpy(grp->bb_bitmap, bh->b_data, sb->s_blocksize); |
---|
| 557 | + put_bh(bh); |
---|
| 558 | +} |
---|
| 559 | + |
---|
| 560 | +static void mb_group_bb_bitmap_free(struct ext4_group_info *grp) |
---|
| 561 | +{ |
---|
| 562 | + kfree(grp->bb_bitmap); |
---|
| 563 | +} |
---|
| 564 | + |
---|
514 | 565 | #else |
---|
515 | 566 | static inline void mb_free_blocks_double(struct inode *inode, |
---|
516 | 567 | struct ext4_buddy *e4b, int first, int count) |
---|
.. | .. |
---|
523 | 574 | return; |
---|
524 | 575 | } |
---|
525 | 576 | static inline void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap) |
---|
| 577 | +{ |
---|
| 578 | + return; |
---|
| 579 | +} |
---|
| 580 | + |
---|
| 581 | +static inline void mb_group_bb_bitmap_alloc(struct super_block *sb, |
---|
| 582 | + struct ext4_group_info *grp, ext4_group_t group) |
---|
| 583 | +{ |
---|
| 584 | + return; |
---|
| 585 | +} |
---|
| 586 | + |
---|
| 587 | +static inline void mb_group_bb_bitmap_free(struct ext4_group_info *grp) |
---|
526 | 588 | { |
---|
527 | 589 | return; |
---|
528 | 590 | } |
---|
.. | .. |
---|
558 | 620 | void *buddy; |
---|
559 | 621 | void *buddy2; |
---|
560 | 622 | |
---|
561 | | - { |
---|
562 | | - static int mb_check_counter; |
---|
563 | | - if (mb_check_counter++ % 100 != 0) |
---|
564 | | - return 0; |
---|
565 | | - } |
---|
| 623 | + if (e4b->bd_info->bb_check_counter++ % 10) |
---|
| 624 | + return 0; |
---|
566 | 625 | |
---|
567 | 626 | while (order > 1) { |
---|
568 | 627 | buddy = mb_find_buddy(e4b, order, &max); |
---|
.. | .. |
---|
626 | 685 | MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments); |
---|
627 | 686 | |
---|
628 | 687 | grp = ext4_get_group_info(sb, e4b->bd_group); |
---|
| 688 | + if (!grp) |
---|
| 689 | + return NULL; |
---|
629 | 690 | list_for_each(cur, &grp->bb_prealloc_list) { |
---|
630 | 691 | ext4_group_t groupnr; |
---|
631 | 692 | struct ext4_prealloc_space *pa; |
---|
.. | .. |
---|
709 | 770 | |
---|
710 | 771 | static noinline_for_stack |
---|
711 | 772 | void ext4_mb_generate_buddy(struct super_block *sb, |
---|
712 | | - void *buddy, void *bitmap, ext4_group_t group) |
---|
| 773 | + void *buddy, void *bitmap, ext4_group_t group, |
---|
| 774 | + struct ext4_group_info *grp) |
---|
713 | 775 | { |
---|
714 | | - struct ext4_group_info *grp = ext4_get_group_info(sb, group); |
---|
715 | 776 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
---|
716 | 777 | ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb); |
---|
717 | 778 | ext4_grpblk_t i = 0; |
---|
.. | .. |
---|
758 | 819 | clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state)); |
---|
759 | 820 | |
---|
760 | 821 | period = get_cycles() - period; |
---|
761 | | - spin_lock(&sbi->s_bal_lock); |
---|
762 | | - sbi->s_mb_buddies_generated++; |
---|
763 | | - sbi->s_mb_generation_time += period; |
---|
764 | | - spin_unlock(&sbi->s_bal_lock); |
---|
765 | | -} |
---|
766 | | - |
---|
767 | | -static void mb_regenerate_buddy(struct ext4_buddy *e4b) |
---|
768 | | -{ |
---|
769 | | - int count; |
---|
770 | | - int order = 1; |
---|
771 | | - void *buddy; |
---|
772 | | - |
---|
773 | | - while ((buddy = mb_find_buddy(e4b, order++, &count))) { |
---|
774 | | - ext4_set_bits(buddy, 0, count); |
---|
775 | | - } |
---|
776 | | - e4b->bd_info->bb_fragments = 0; |
---|
777 | | - memset(e4b->bd_info->bb_counters, 0, |
---|
778 | | - sizeof(*e4b->bd_info->bb_counters) * |
---|
779 | | - (e4b->bd_sb->s_blocksize_bits + 2)); |
---|
780 | | - |
---|
781 | | - ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy, |
---|
782 | | - e4b->bd_bitmap, e4b->bd_group); |
---|
| 822 | + atomic_inc(&sbi->s_mb_buddies_generated); |
---|
| 823 | + atomic64_add(period, &sbi->s_mb_generation_time); |
---|
783 | 824 | } |
---|
784 | 825 | |
---|
785 | 826 | /* The buddy information is attached the buddy cache inode |
---|
.. | .. |
---|
820 | 861 | char *bitmap; |
---|
821 | 862 | struct ext4_group_info *grinfo; |
---|
822 | 863 | |
---|
823 | | - mb_debug(1, "init page %lu\n", page->index); |
---|
824 | | - |
---|
825 | 864 | inode = page->mapping->host; |
---|
826 | 865 | sb = inode->i_sb; |
---|
827 | 866 | ngroups = ext4_get_groups_count(sb); |
---|
828 | 867 | blocksize = i_blocksize(inode); |
---|
829 | 868 | blocks_per_page = PAGE_SIZE / blocksize; |
---|
| 869 | + |
---|
| 870 | + mb_debug(sb, "init page %lu\n", page->index); |
---|
830 | 871 | |
---|
831 | 872 | groups_per_page = blocks_per_page >> 1; |
---|
832 | 873 | if (groups_per_page == 0) |
---|
.. | .. |
---|
851 | 892 | break; |
---|
852 | 893 | |
---|
853 | 894 | grinfo = ext4_get_group_info(sb, group); |
---|
| 895 | + if (!grinfo) |
---|
| 896 | + continue; |
---|
854 | 897 | /* |
---|
855 | 898 | * If page is uptodate then we came here after online resize |
---|
856 | 899 | * which added some new uninitialized group info structs, so |
---|
.. | .. |
---|
861 | 904 | bh[i] = NULL; |
---|
862 | 905 | continue; |
---|
863 | 906 | } |
---|
864 | | - bh[i] = ext4_read_block_bitmap_nowait(sb, group); |
---|
| 907 | + bh[i] = ext4_read_block_bitmap_nowait(sb, group, false); |
---|
865 | 908 | if (IS_ERR(bh[i])) { |
---|
866 | 909 | err = PTR_ERR(bh[i]); |
---|
867 | 910 | bh[i] = NULL; |
---|
868 | 911 | goto out; |
---|
869 | 912 | } |
---|
870 | | - mb_debug(1, "read bitmap for group %u\n", group); |
---|
| 913 | + mb_debug(sb, "read bitmap for group %u\n", group); |
---|
871 | 914 | } |
---|
872 | 915 | |
---|
873 | 916 | /* wait for I/O completion */ |
---|
.. | .. |
---|
912 | 955 | if ((first_block + i) & 1) { |
---|
913 | 956 | /* this is block of buddy */ |
---|
914 | 957 | BUG_ON(incore == NULL); |
---|
915 | | - mb_debug(1, "put buddy for group %u in page %lu/%x\n", |
---|
| 958 | + mb_debug(sb, "put buddy for group %u in page %lu/%x\n", |
---|
916 | 959 | group, page->index, i * blocksize); |
---|
917 | 960 | trace_ext4_mb_buddy_bitmap_load(sb, group); |
---|
918 | 961 | grinfo = ext4_get_group_info(sb, group); |
---|
| 962 | + if (!grinfo) { |
---|
| 963 | + err = -EFSCORRUPTED; |
---|
| 964 | + goto out; |
---|
| 965 | + } |
---|
919 | 966 | grinfo->bb_fragments = 0; |
---|
920 | 967 | memset(grinfo->bb_counters, 0, |
---|
921 | 968 | sizeof(*grinfo->bb_counters) * |
---|
.. | .. |
---|
926 | 973 | ext4_lock_group(sb, group); |
---|
927 | 974 | /* init the buddy */ |
---|
928 | 975 | memset(data, 0xff, blocksize); |
---|
929 | | - ext4_mb_generate_buddy(sb, data, incore, group); |
---|
| 976 | + ext4_mb_generate_buddy(sb, data, incore, group, grinfo); |
---|
930 | 977 | ext4_unlock_group(sb, group); |
---|
931 | 978 | incore = NULL; |
---|
932 | 979 | } else { |
---|
933 | 980 | /* this is block of bitmap */ |
---|
934 | 981 | BUG_ON(incore != NULL); |
---|
935 | | - mb_debug(1, "put bitmap for group %u in page %lu/%x\n", |
---|
| 982 | + mb_debug(sb, "put bitmap for group %u in page %lu/%x\n", |
---|
936 | 983 | group, page->index, i * blocksize); |
---|
937 | 984 | trace_ext4_mb_bitmap_load(sb, group); |
---|
938 | 985 | |
---|
.. | .. |
---|
1038 | 1085 | int ret = 0; |
---|
1039 | 1086 | |
---|
1040 | 1087 | might_sleep(); |
---|
1041 | | - mb_debug(1, "init group %u\n", group); |
---|
| 1088 | + mb_debug(sb, "init group %u\n", group); |
---|
1042 | 1089 | this_grp = ext4_get_group_info(sb, group); |
---|
| 1090 | + if (!this_grp) |
---|
| 1091 | + return -EFSCORRUPTED; |
---|
| 1092 | + |
---|
1043 | 1093 | /* |
---|
1044 | 1094 | * This ensures that we don't reinit the buddy cache |
---|
1045 | 1095 | * page which map to the group from which we are already |
---|
.. | .. |
---|
1110 | 1160 | struct inode *inode = sbi->s_buddy_cache; |
---|
1111 | 1161 | |
---|
1112 | 1162 | might_sleep(); |
---|
1113 | | - mb_debug(1, "load group %u\n", group); |
---|
| 1163 | + mb_debug(sb, "load group %u\n", group); |
---|
1114 | 1164 | |
---|
1115 | 1165 | blocks_per_page = PAGE_SIZE / sb->s_blocksize; |
---|
1116 | 1166 | grp = ext4_get_group_info(sb, group); |
---|
| 1167 | + if (!grp) |
---|
| 1168 | + return -EFSCORRUPTED; |
---|
1117 | 1169 | |
---|
1118 | 1170 | e4b->bd_blkbits = sb->s_blocksize_bits; |
---|
1119 | 1171 | e4b->bd_info = grp; |
---|
.. | .. |
---|
1217 | 1269 | /* Pages marked accessed already */ |
---|
1218 | 1270 | e4b->bd_buddy_page = page; |
---|
1219 | 1271 | e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize); |
---|
1220 | | - |
---|
1221 | | - BUG_ON(e4b->bd_bitmap_page == NULL); |
---|
1222 | | - BUG_ON(e4b->bd_buddy_page == NULL); |
---|
1223 | 1272 | |
---|
1224 | 1273 | return 0; |
---|
1225 | 1274 | |
---|
.. | .. |
---|
1336 | 1385 | } |
---|
1337 | 1386 | } |
---|
1338 | 1387 | |
---|
1339 | | -/* |
---|
1340 | | - * _________________________________________________________________ */ |
---|
1341 | | - |
---|
1342 | 1388 | static inline int mb_buddy_adjust_border(int* bit, void* bitmap, int side) |
---|
1343 | 1389 | { |
---|
1344 | 1390 | if (mb_test_bit(*bit + side, bitmap)) { |
---|
.. | .. |
---|
1430 | 1476 | mb_check_buddy(e4b); |
---|
1431 | 1477 | mb_free_blocks_double(inode, e4b, first, count); |
---|
1432 | 1478 | |
---|
| 1479 | + this_cpu_inc(discard_pa_seq); |
---|
1433 | 1480 | e4b->bd_info->bb_free += count; |
---|
1434 | 1481 | if (first < e4b->bd_info->bb_first_free) |
---|
1435 | 1482 | e4b->bd_info->bb_first_free = first; |
---|
.. | .. |
---|
1449 | 1496 | |
---|
1450 | 1497 | blocknr = ext4_group_first_block_no(sb, e4b->bd_group); |
---|
1451 | 1498 | blocknr += EXT4_C2B(sbi, block); |
---|
1452 | | - ext4_grp_locked_error(sb, e4b->bd_group, |
---|
1453 | | - inode ? inode->i_ino : 0, |
---|
1454 | | - blocknr, |
---|
1455 | | - "freeing already freed block " |
---|
1456 | | - "(bit %u); block bitmap corrupt.", |
---|
1457 | | - block); |
---|
1458 | | - ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, |
---|
| 1499 | + if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) { |
---|
| 1500 | + ext4_grp_locked_error(sb, e4b->bd_group, |
---|
| 1501 | + inode ? inode->i_ino : 0, |
---|
| 1502 | + blocknr, |
---|
| 1503 | + "freeing already freed block (bit %u); block bitmap corrupt.", |
---|
| 1504 | + block); |
---|
| 1505 | + ext4_mark_group_bitmap_corrupted( |
---|
| 1506 | + sb, e4b->bd_group, |
---|
1459 | 1507 | EXT4_GROUP_INFO_BBITMAP_CORRUPT); |
---|
1460 | | - mb_regenerate_buddy(e4b); |
---|
| 1508 | + } |
---|
1461 | 1509 | goto done; |
---|
1462 | 1510 | } |
---|
1463 | 1511 | |
---|
.. | .. |
---|
1572 | 1620 | mb_check_buddy(e4b); |
---|
1573 | 1621 | mb_mark_used_double(e4b, start, len); |
---|
1574 | 1622 | |
---|
| 1623 | + this_cpu_inc(discard_pa_seq); |
---|
1575 | 1624 | e4b->bd_info->bb_free -= len; |
---|
1576 | 1625 | if (e4b->bd_info->bb_first_free == start) |
---|
1577 | 1626 | e4b->bd_info->bb_first_free += len; |
---|
.. | .. |
---|
1671 | 1720 | sbi->s_mb_last_start = ac->ac_f_ex.fe_start; |
---|
1672 | 1721 | spin_unlock(&sbi->s_md_lock); |
---|
1673 | 1722 | } |
---|
1674 | | -} |
---|
| 1723 | + /* |
---|
| 1724 | + * As we've just preallocated more space than |
---|
| 1725 | + * user requested originally, we store allocated |
---|
| 1726 | + * space in a special descriptor. |
---|
| 1727 | + */ |
---|
| 1728 | + if (ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len) |
---|
| 1729 | + ext4_mb_new_preallocation(ac); |
---|
1675 | 1730 | |
---|
1676 | | -/* |
---|
1677 | | - * regular allocator, for general purposes allocation |
---|
1678 | | - */ |
---|
| 1731 | +} |
---|
1679 | 1732 | |
---|
1680 | 1733 | static void ext4_mb_check_limits(struct ext4_allocation_context *ac, |
---|
1681 | 1734 | struct ext4_buddy *e4b, |
---|
.. | .. |
---|
1825 | 1878 | struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); |
---|
1826 | 1879 | struct ext4_free_extent ex; |
---|
1827 | 1880 | |
---|
1828 | | - if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL)) |
---|
| 1881 | + if (!grp) |
---|
| 1882 | + return -EFSCORRUPTED; |
---|
| 1883 | + if (!(ac->ac_flags & (EXT4_MB_HINT_TRY_GOAL | EXT4_MB_HINT_GOAL_ONLY))) |
---|
1829 | 1884 | return 0; |
---|
1830 | 1885 | if (grp->bb_free == 0) |
---|
1831 | 1886 | return 0; |
---|
.. | .. |
---|
1919 | 1974 | |
---|
1920 | 1975 | ext4_mb_use_best_found(ac, e4b); |
---|
1921 | 1976 | |
---|
1922 | | - BUG_ON(ac->ac_b_ex.fe_len != ac->ac_g_ex.fe_len); |
---|
| 1977 | + BUG_ON(ac->ac_f_ex.fe_len != ac->ac_g_ex.fe_len); |
---|
1923 | 1978 | |
---|
1924 | 1979 | if (EXT4_SB(sb)->s_mb_stats) |
---|
1925 | 1980 | atomic_inc(&EXT4_SB(sb)->s_bal_2orders); |
---|
.. | .. |
---|
1956 | 2011 | /* |
---|
1957 | 2012 | * IF we have corrupt bitmap, we won't find any |
---|
1958 | 2013 | * free blocks even though group info says we |
---|
1959 | | - * we have free blocks |
---|
| 2014 | + * have free blocks |
---|
1960 | 2015 | */ |
---|
1961 | 2016 | ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, |
---|
1962 | 2017 | "%d free clusters as per " |
---|
.. | .. |
---|
2036 | 2091 | } |
---|
2037 | 2092 | |
---|
2038 | 2093 | /* |
---|
2039 | | - * This is now called BEFORE we load the buddy bitmap. |
---|
| 2094 | + * This is also called BEFORE we load the buddy bitmap. |
---|
2040 | 2095 | * Returns either 1 or 0 indicating that the group is either suitable |
---|
2041 | | - * for the allocation or not. In addition it can also return negative |
---|
2042 | | - * error code when something goes wrong. |
---|
| 2096 | + * for the allocation or not. |
---|
2043 | 2097 | */ |
---|
2044 | | -static int ext4_mb_good_group(struct ext4_allocation_context *ac, |
---|
| 2098 | +static bool ext4_mb_good_group(struct ext4_allocation_context *ac, |
---|
2045 | 2099 | ext4_group_t group, int cr) |
---|
2046 | 2100 | { |
---|
2047 | | - unsigned free, fragments; |
---|
| 2101 | + ext4_grpblk_t free, fragments; |
---|
2048 | 2102 | int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb)); |
---|
2049 | 2103 | struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); |
---|
2050 | 2104 | |
---|
2051 | 2105 | BUG_ON(cr < 0 || cr >= 4); |
---|
2052 | 2106 | |
---|
| 2107 | + if (unlikely(!grp || EXT4_MB_GRP_BBITMAP_CORRUPT(grp))) |
---|
| 2108 | + return false; |
---|
| 2109 | + |
---|
2053 | 2110 | free = grp->bb_free; |
---|
2054 | 2111 | if (free == 0) |
---|
2055 | | - return 0; |
---|
2056 | | - if (cr <= 2 && free < ac->ac_g_ex.fe_len) |
---|
2057 | | - return 0; |
---|
2058 | | - |
---|
2059 | | - if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp))) |
---|
2060 | | - return 0; |
---|
2061 | | - |
---|
2062 | | - /* We only do this if the grp has never been initialized */ |
---|
2063 | | - if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { |
---|
2064 | | - int ret = ext4_mb_init_group(ac->ac_sb, group, GFP_NOFS); |
---|
2065 | | - if (ret) |
---|
2066 | | - return ret; |
---|
2067 | | - } |
---|
| 2112 | + return false; |
---|
2068 | 2113 | |
---|
2069 | 2114 | fragments = grp->bb_fragments; |
---|
2070 | 2115 | if (fragments == 0) |
---|
2071 | | - return 0; |
---|
| 2116 | + return false; |
---|
2072 | 2117 | |
---|
2073 | 2118 | switch (cr) { |
---|
2074 | 2119 | case 0: |
---|
.. | .. |
---|
2078 | 2123 | if ((ac->ac_flags & EXT4_MB_HINT_DATA) && |
---|
2079 | 2124 | (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) && |
---|
2080 | 2125 | ((group % flex_size) == 0)) |
---|
2081 | | - return 0; |
---|
| 2126 | + return false; |
---|
2082 | 2127 | |
---|
2083 | | - if ((ac->ac_2order > ac->ac_sb->s_blocksize_bits+1) || |
---|
2084 | | - (free / fragments) >= ac->ac_g_ex.fe_len) |
---|
2085 | | - return 1; |
---|
| 2128 | + if (free < ac->ac_g_ex.fe_len) |
---|
| 2129 | + return false; |
---|
| 2130 | + |
---|
| 2131 | + if (ac->ac_2order > ac->ac_sb->s_blocksize_bits+1) |
---|
| 2132 | + return true; |
---|
2086 | 2133 | |
---|
2087 | 2134 | if (grp->bb_largest_free_order < ac->ac_2order) |
---|
2088 | | - return 0; |
---|
| 2135 | + return false; |
---|
2089 | 2136 | |
---|
2090 | | - return 1; |
---|
| 2137 | + return true; |
---|
2091 | 2138 | case 1: |
---|
2092 | 2139 | if ((free / fragments) >= ac->ac_g_ex.fe_len) |
---|
2093 | | - return 1; |
---|
| 2140 | + return true; |
---|
2094 | 2141 | break; |
---|
2095 | 2142 | case 2: |
---|
2096 | 2143 | if (free >= ac->ac_g_ex.fe_len) |
---|
2097 | | - return 1; |
---|
| 2144 | + return true; |
---|
2098 | 2145 | break; |
---|
2099 | 2146 | case 3: |
---|
2100 | | - return 1; |
---|
| 2147 | + return true; |
---|
2101 | 2148 | default: |
---|
2102 | 2149 | BUG(); |
---|
2103 | 2150 | } |
---|
2104 | 2151 | |
---|
2105 | | - return 0; |
---|
| 2152 | + return false; |
---|
| 2153 | +} |
---|
| 2154 | + |
---|
| 2155 | +/* |
---|
| 2156 | + * This could return negative error code if something goes wrong |
---|
| 2157 | + * during ext4_mb_init_group(). This should not be called with |
---|
| 2158 | + * ext4_lock_group() held. |
---|
| 2159 | + */ |
---|
| 2160 | +static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac, |
---|
| 2161 | + ext4_group_t group, int cr) |
---|
| 2162 | +{ |
---|
| 2163 | + struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); |
---|
| 2164 | + struct super_block *sb = ac->ac_sb; |
---|
| 2165 | + struct ext4_sb_info *sbi = EXT4_SB(sb); |
---|
| 2166 | + bool should_lock = ac->ac_flags & EXT4_MB_STRICT_CHECK; |
---|
| 2167 | + ext4_grpblk_t free; |
---|
| 2168 | + int ret = 0; |
---|
| 2169 | + |
---|
| 2170 | + if (!grp) |
---|
| 2171 | + return -EFSCORRUPTED; |
---|
| 2172 | + if (sbi->s_mb_stats) |
---|
| 2173 | + atomic64_inc(&sbi->s_bal_cX_groups_considered[ac->ac_criteria]); |
---|
| 2174 | + if (should_lock) |
---|
| 2175 | + ext4_lock_group(sb, group); |
---|
| 2176 | + free = grp->bb_free; |
---|
| 2177 | + if (free == 0) |
---|
| 2178 | + goto out; |
---|
| 2179 | + if (cr <= 2 && free < ac->ac_g_ex.fe_len) |
---|
| 2180 | + goto out; |
---|
| 2181 | + if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp))) |
---|
| 2182 | + goto out; |
---|
| 2183 | + if (should_lock) |
---|
| 2184 | + ext4_unlock_group(sb, group); |
---|
| 2185 | + |
---|
| 2186 | + /* We only do this if the grp has never been initialized */ |
---|
| 2187 | + if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { |
---|
| 2188 | + struct ext4_group_desc *gdp = |
---|
| 2189 | + ext4_get_group_desc(sb, group, NULL); |
---|
| 2190 | + int ret; |
---|
| 2191 | + |
---|
| 2192 | + /* cr=0/1 is a very optimistic search to find large |
---|
| 2193 | + * good chunks almost for free. If buddy data is not |
---|
| 2194 | + * ready, then this optimization makes no sense. But |
---|
| 2195 | + * we never skip the first block group in a flex_bg, |
---|
| 2196 | + * since this gets used for metadata block allocation, |
---|
| 2197 | + * and we want to make sure we locate metadata blocks |
---|
| 2198 | + * in the first block group in the flex_bg if possible. |
---|
| 2199 | + */ |
---|
| 2200 | + if (cr < 2 && |
---|
| 2201 | + (!sbi->s_log_groups_per_flex || |
---|
| 2202 | + ((group & ((1 << sbi->s_log_groups_per_flex) - 1)) != 0)) && |
---|
| 2203 | + !(ext4_has_group_desc_csum(sb) && |
---|
| 2204 | + (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) |
---|
| 2205 | + return 0; |
---|
| 2206 | + ret = ext4_mb_init_group(sb, group, GFP_NOFS); |
---|
| 2207 | + if (ret) |
---|
| 2208 | + return ret; |
---|
| 2209 | + } |
---|
| 2210 | + |
---|
| 2211 | + if (should_lock) |
---|
| 2212 | + ext4_lock_group(sb, group); |
---|
| 2213 | + ret = ext4_mb_good_group(ac, group, cr); |
---|
| 2214 | +out: |
---|
| 2215 | + if (should_lock) |
---|
| 2216 | + ext4_unlock_group(sb, group); |
---|
| 2217 | + return ret; |
---|
| 2218 | +} |
---|
| 2219 | + |
---|
| 2220 | +/* |
---|
| 2221 | + * Start prefetching @nr block bitmaps starting at @group. |
---|
| 2222 | + * Return the next group which needs to be prefetched. |
---|
| 2223 | + */ |
---|
| 2224 | +ext4_group_t ext4_mb_prefetch(struct super_block *sb, ext4_group_t group, |
---|
| 2225 | + unsigned int nr, int *cnt) |
---|
| 2226 | +{ |
---|
| 2227 | + ext4_group_t ngroups = ext4_get_groups_count(sb); |
---|
| 2228 | + struct buffer_head *bh; |
---|
| 2229 | + struct blk_plug plug; |
---|
| 2230 | + |
---|
| 2231 | + blk_start_plug(&plug); |
---|
| 2232 | + while (nr-- > 0) { |
---|
| 2233 | + struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, |
---|
| 2234 | + NULL); |
---|
| 2235 | + struct ext4_group_info *grp = ext4_get_group_info(sb, group); |
---|
| 2236 | + |
---|
| 2237 | + /* |
---|
| 2238 | + * Prefetch block groups with free blocks; but don't |
---|
| 2239 | + * bother if it is marked uninitialized on disk, since |
---|
| 2240 | + * it won't require I/O to read. Also only try to |
---|
| 2241 | + * prefetch once, so we avoid getblk() call, which can |
---|
| 2242 | + * be expensive. |
---|
| 2243 | + */ |
---|
| 2244 | + if (gdp && grp && !EXT4_MB_GRP_TEST_AND_SET_READ(grp) && |
---|
| 2245 | + EXT4_MB_GRP_NEED_INIT(grp) && |
---|
| 2246 | + ext4_free_group_clusters(sb, gdp) > 0 && |
---|
| 2247 | + !(ext4_has_group_desc_csum(sb) && |
---|
| 2248 | + (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) { |
---|
| 2249 | + bh = ext4_read_block_bitmap_nowait(sb, group, true); |
---|
| 2250 | + if (bh && !IS_ERR(bh)) { |
---|
| 2251 | + if (!buffer_uptodate(bh) && cnt) |
---|
| 2252 | + (*cnt)++; |
---|
| 2253 | + brelse(bh); |
---|
| 2254 | + } |
---|
| 2255 | + } |
---|
| 2256 | + if (++group >= ngroups) |
---|
| 2257 | + group = 0; |
---|
| 2258 | + } |
---|
| 2259 | + blk_finish_plug(&plug); |
---|
| 2260 | + return group; |
---|
| 2261 | +} |
---|
| 2262 | + |
---|
| 2263 | +/* |
---|
| 2264 | + * Prefetching reads the block bitmap into the buffer cache; but we |
---|
| 2265 | + * need to make sure that the buddy bitmap in the page cache has been |
---|
| 2266 | + * initialized. Note that ext4_mb_init_group() will block if the I/O |
---|
| 2267 | + * is not yet completed, or indeed if it was not initiated by |
---|
| 2268 | + * ext4_mb_prefetch did not start the I/O. |
---|
| 2269 | + * |
---|
| 2270 | + * TODO: We should actually kick off the buddy bitmap setup in a work |
---|
| 2271 | + * queue when the buffer I/O is completed, so that we don't block |
---|
| 2272 | + * waiting for the block allocation bitmap read to finish when |
---|
| 2273 | + * ext4_mb_prefetch_fini is called from ext4_mb_regular_allocator(). |
---|
| 2274 | + */ |
---|
| 2275 | +void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group, |
---|
| 2276 | + unsigned int nr) |
---|
| 2277 | +{ |
---|
| 2278 | + while (nr-- > 0) { |
---|
| 2279 | + struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, |
---|
| 2280 | + NULL); |
---|
| 2281 | + struct ext4_group_info *grp = ext4_get_group_info(sb, group); |
---|
| 2282 | + |
---|
| 2283 | + if (!group) |
---|
| 2284 | + group = ext4_get_groups_count(sb); |
---|
| 2285 | + group--; |
---|
| 2286 | + grp = ext4_get_group_info(sb, group); |
---|
| 2287 | + |
---|
| 2288 | + if (grp && gdp && EXT4_MB_GRP_NEED_INIT(grp) && |
---|
| 2289 | + ext4_free_group_clusters(sb, gdp) > 0 && |
---|
| 2290 | + !(ext4_has_group_desc_csum(sb) && |
---|
| 2291 | + (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) { |
---|
| 2292 | + if (ext4_mb_init_group(sb, group, GFP_NOFS)) |
---|
| 2293 | + break; |
---|
| 2294 | + } |
---|
| 2295 | + } |
---|
2106 | 2296 | } |
---|
2107 | 2297 | |
---|
2108 | 2298 | static noinline_for_stack int |
---|
2109 | 2299 | ext4_mb_regular_allocator(struct ext4_allocation_context *ac) |
---|
2110 | 2300 | { |
---|
2111 | | - ext4_group_t ngroups, group, i; |
---|
2112 | | - int cr; |
---|
| 2301 | + ext4_group_t prefetch_grp = 0, ngroups, group, i; |
---|
| 2302 | + int cr = -1; |
---|
2113 | 2303 | int err = 0, first_err = 0; |
---|
| 2304 | + unsigned int nr = 0, prefetch_ios = 0; |
---|
2114 | 2305 | struct ext4_sb_info *sbi; |
---|
2115 | 2306 | struct super_block *sb; |
---|
2116 | 2307 | struct ext4_buddy e4b; |
---|
| 2308 | + int lost; |
---|
2117 | 2309 | |
---|
2118 | 2310 | sb = ac->ac_sb; |
---|
2119 | 2311 | sbi = EXT4_SB(sb); |
---|
.. | .. |
---|
2133 | 2325 | goto out; |
---|
2134 | 2326 | |
---|
2135 | 2327 | /* |
---|
2136 | | - * ac->ac2_order is set only if the fe_len is a power of 2 |
---|
2137 | | - * if ac2_order is set we also set criteria to 0 so that we |
---|
| 2328 | + * ac->ac_2order is set only if the fe_len is a power of 2 |
---|
| 2329 | + * if ac->ac_2order is set we also set criteria to 0 so that we |
---|
2138 | 2330 | * try exact allocation using buddy. |
---|
2139 | 2331 | */ |
---|
2140 | 2332 | i = fls(ac->ac_g_ex.fe_len); |
---|
.. | .. |
---|
2178 | 2370 | * from the goal value specified |
---|
2179 | 2371 | */ |
---|
2180 | 2372 | group = ac->ac_g_ex.fe_group; |
---|
| 2373 | + prefetch_grp = group; |
---|
2181 | 2374 | |
---|
2182 | 2375 | for (i = 0; i < ngroups; group++, i++) { |
---|
2183 | 2376 | int ret = 0; |
---|
.. | .. |
---|
2189 | 2382 | if (group >= ngroups) |
---|
2190 | 2383 | group = 0; |
---|
2191 | 2384 | |
---|
| 2385 | + /* |
---|
| 2386 | + * Batch reads of the block allocation bitmaps |
---|
| 2387 | + * to get multiple READs in flight; limit |
---|
| 2388 | + * prefetching at cr=0/1, otherwise mballoc can |
---|
| 2389 | + * spend a lot of time loading imperfect groups |
---|
| 2390 | + */ |
---|
| 2391 | + if ((prefetch_grp == group) && |
---|
| 2392 | + (cr > 1 || |
---|
| 2393 | + prefetch_ios < sbi->s_mb_prefetch_limit)) { |
---|
| 2394 | + unsigned int curr_ios = prefetch_ios; |
---|
| 2395 | + |
---|
| 2396 | + nr = sbi->s_mb_prefetch; |
---|
| 2397 | + if (ext4_has_feature_flex_bg(sb)) { |
---|
| 2398 | + nr = 1 << sbi->s_log_groups_per_flex; |
---|
| 2399 | + nr -= group & (nr - 1); |
---|
| 2400 | + nr = min(nr, sbi->s_mb_prefetch); |
---|
| 2401 | + } |
---|
| 2402 | + prefetch_grp = ext4_mb_prefetch(sb, group, |
---|
| 2403 | + nr, &prefetch_ios); |
---|
| 2404 | + if (prefetch_ios == curr_ios) |
---|
| 2405 | + nr = 0; |
---|
| 2406 | + } |
---|
| 2407 | + |
---|
2192 | 2408 | /* This now checks without needing the buddy page */ |
---|
2193 | | - ret = ext4_mb_good_group(ac, group, cr); |
---|
| 2409 | + ret = ext4_mb_good_group_nolock(ac, group, cr); |
---|
2194 | 2410 | if (ret <= 0) { |
---|
2195 | 2411 | if (!first_err) |
---|
2196 | 2412 | first_err = ret; |
---|
.. | .. |
---|
2208 | 2424 | * block group |
---|
2209 | 2425 | */ |
---|
2210 | 2426 | ret = ext4_mb_good_group(ac, group, cr); |
---|
2211 | | - if (ret <= 0) { |
---|
| 2427 | + if (ret == 0) { |
---|
2212 | 2428 | ext4_unlock_group(sb, group); |
---|
2213 | 2429 | ext4_mb_unload_buddy(&e4b); |
---|
2214 | | - if (!first_err) |
---|
2215 | | - first_err = ret; |
---|
2216 | 2430 | continue; |
---|
2217 | 2431 | } |
---|
2218 | 2432 | |
---|
.. | .. |
---|
2231 | 2445 | if (ac->ac_status != AC_STATUS_CONTINUE) |
---|
2232 | 2446 | break; |
---|
2233 | 2447 | } |
---|
| 2448 | + /* Processed all groups and haven't found blocks */ |
---|
| 2449 | + if (sbi->s_mb_stats && i == ngroups) |
---|
| 2450 | + atomic64_inc(&sbi->s_bal_cX_failed[cr]); |
---|
2234 | 2451 | } |
---|
2235 | 2452 | |
---|
2236 | 2453 | if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND && |
---|
.. | .. |
---|
2239 | 2456 | * We've been searching too long. Let's try to allocate |
---|
2240 | 2457 | * the best chunk we've found so far |
---|
2241 | 2458 | */ |
---|
2242 | | - |
---|
2243 | 2459 | ext4_mb_try_best_found(ac, &e4b); |
---|
2244 | 2460 | if (ac->ac_status != AC_STATUS_FOUND) { |
---|
2245 | 2461 | /* |
---|
2246 | 2462 | * Someone more lucky has already allocated it. |
---|
2247 | 2463 | * The only thing we can do is just take first |
---|
2248 | 2464 | * found block(s) |
---|
2249 | | - printk(KERN_DEBUG "EXT4-fs: someone won our chunk\n"); |
---|
2250 | 2465 | */ |
---|
| 2466 | + lost = atomic_inc_return(&sbi->s_mb_lost_chunks); |
---|
| 2467 | + mb_debug(sb, "lost chunk, group: %u, start: %d, len: %d, lost: %d\n", |
---|
| 2468 | + ac->ac_b_ex.fe_group, ac->ac_b_ex.fe_start, |
---|
| 2469 | + ac->ac_b_ex.fe_len, lost); |
---|
| 2470 | + |
---|
2251 | 2471 | ac->ac_b_ex.fe_group = 0; |
---|
2252 | 2472 | ac->ac_b_ex.fe_start = 0; |
---|
2253 | 2473 | ac->ac_b_ex.fe_len = 0; |
---|
2254 | 2474 | ac->ac_status = AC_STATUS_CONTINUE; |
---|
2255 | 2475 | ac->ac_flags |= EXT4_MB_HINT_FIRST; |
---|
2256 | 2476 | cr = 3; |
---|
2257 | | - atomic_inc(&sbi->s_mb_lost_chunks); |
---|
2258 | 2477 | goto repeat; |
---|
2259 | 2478 | } |
---|
2260 | 2479 | } |
---|
| 2480 | + |
---|
| 2481 | + if (sbi->s_mb_stats && ac->ac_status == AC_STATUS_FOUND) |
---|
| 2482 | + atomic64_inc(&sbi->s_bal_cX_hits[ac->ac_criteria]); |
---|
2261 | 2483 | out: |
---|
2262 | 2484 | if (!err && ac->ac_status != AC_STATUS_FOUND && first_err) |
---|
2263 | 2485 | err = first_err; |
---|
| 2486 | + |
---|
| 2487 | + mb_debug(sb, "Best len %d, origin len %d, ac_status %u, ac_flags 0x%x, cr %d ret %d\n", |
---|
| 2488 | + ac->ac_b_ex.fe_len, ac->ac_o_ex.fe_len, ac->ac_status, |
---|
| 2489 | + ac->ac_flags, cr, err); |
---|
| 2490 | + |
---|
| 2491 | + if (nr) |
---|
| 2492 | + ext4_mb_prefetch_fini(sb, prefetch_grp, nr); |
---|
| 2493 | + |
---|
2264 | 2494 | return err; |
---|
2265 | 2495 | } |
---|
2266 | 2496 | |
---|
.. | .. |
---|
2313 | 2543 | sizeof(struct ext4_group_info); |
---|
2314 | 2544 | |
---|
2315 | 2545 | grinfo = ext4_get_group_info(sb, group); |
---|
| 2546 | + if (!grinfo) |
---|
| 2547 | + return 0; |
---|
2316 | 2548 | /* Load the group info in memory only if not already loaded. */ |
---|
2317 | 2549 | if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) { |
---|
2318 | 2550 | err = ext4_mb_load_buddy(sb, group, &e4b); |
---|
.. | .. |
---|
2323 | 2555 | buddy_loaded = 1; |
---|
2324 | 2556 | } |
---|
2325 | 2557 | |
---|
2326 | | - memcpy(&sg, ext4_get_group_info(sb, group), i); |
---|
| 2558 | + memcpy(&sg, grinfo, i); |
---|
2327 | 2559 | |
---|
2328 | 2560 | if (buddy_loaded) |
---|
2329 | 2561 | ext4_mb_unload_buddy(&e4b); |
---|
.. | .. |
---|
2333 | 2565 | for (i = 0; i <= 13; i++) |
---|
2334 | 2566 | seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ? |
---|
2335 | 2567 | sg.info.bb_counters[i] : 0); |
---|
2336 | | - seq_printf(seq, " ]\n"); |
---|
| 2568 | + seq_puts(seq, " ]\n"); |
---|
2337 | 2569 | |
---|
2338 | 2570 | return 0; |
---|
2339 | 2571 | } |
---|
.. | .. |
---|
2348 | 2580 | .stop = ext4_mb_seq_groups_stop, |
---|
2349 | 2581 | .show = ext4_mb_seq_groups_show, |
---|
2350 | 2582 | }; |
---|
| 2583 | + |
---|
| 2584 | +int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset) |
---|
| 2585 | +{ |
---|
| 2586 | + struct super_block *sb = (struct super_block *)seq->private; |
---|
| 2587 | + struct ext4_sb_info *sbi = EXT4_SB(sb); |
---|
| 2588 | + |
---|
| 2589 | + seq_puts(seq, "mballoc:\n"); |
---|
| 2590 | + if (!sbi->s_mb_stats) { |
---|
| 2591 | + seq_puts(seq, "\tmb stats collection turned off.\n"); |
---|
| 2592 | + seq_puts(seq, "\tTo enable, please write \"1\" to sysfs file mb_stats.\n"); |
---|
| 2593 | + return 0; |
---|
| 2594 | + } |
---|
| 2595 | + seq_printf(seq, "\treqs: %u\n", atomic_read(&sbi->s_bal_reqs)); |
---|
| 2596 | + seq_printf(seq, "\tsuccess: %u\n", atomic_read(&sbi->s_bal_success)); |
---|
| 2597 | + |
---|
| 2598 | + seq_printf(seq, "\tgroups_scanned: %u\n", atomic_read(&sbi->s_bal_groups_scanned)); |
---|
| 2599 | + |
---|
| 2600 | + seq_puts(seq, "\tcr0_stats:\n"); |
---|
| 2601 | + seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[0])); |
---|
| 2602 | + seq_printf(seq, "\t\tgroups_considered: %llu\n", |
---|
| 2603 | + atomic64_read(&sbi->s_bal_cX_groups_considered[0])); |
---|
| 2604 | + seq_printf(seq, "\t\tuseless_loops: %llu\n", |
---|
| 2605 | + atomic64_read(&sbi->s_bal_cX_failed[0])); |
---|
| 2606 | + |
---|
| 2607 | + seq_puts(seq, "\tcr1_stats:\n"); |
---|
| 2608 | + seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[1])); |
---|
| 2609 | + seq_printf(seq, "\t\tgroups_considered: %llu\n", |
---|
| 2610 | + atomic64_read(&sbi->s_bal_cX_groups_considered[1])); |
---|
| 2611 | + seq_printf(seq, "\t\tuseless_loops: %llu\n", |
---|
| 2612 | + atomic64_read(&sbi->s_bal_cX_failed[1])); |
---|
| 2613 | + |
---|
| 2614 | + seq_puts(seq, "\tcr2_stats:\n"); |
---|
| 2615 | + seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[2])); |
---|
| 2616 | + seq_printf(seq, "\t\tgroups_considered: %llu\n", |
---|
| 2617 | + atomic64_read(&sbi->s_bal_cX_groups_considered[2])); |
---|
| 2618 | + seq_printf(seq, "\t\tuseless_loops: %llu\n", |
---|
| 2619 | + atomic64_read(&sbi->s_bal_cX_failed[2])); |
---|
| 2620 | + |
---|
| 2621 | + seq_puts(seq, "\tcr3_stats:\n"); |
---|
| 2622 | + seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[3])); |
---|
| 2623 | + seq_printf(seq, "\t\tgroups_considered: %llu\n", |
---|
| 2624 | + atomic64_read(&sbi->s_bal_cX_groups_considered[3])); |
---|
| 2625 | + seq_printf(seq, "\t\tuseless_loops: %llu\n", |
---|
| 2626 | + atomic64_read(&sbi->s_bal_cX_failed[3])); |
---|
| 2627 | + seq_printf(seq, "\textents_scanned: %u\n", atomic_read(&sbi->s_bal_ex_scanned)); |
---|
| 2628 | + seq_printf(seq, "\t\tgoal_hits: %u\n", atomic_read(&sbi->s_bal_goals)); |
---|
| 2629 | + seq_printf(seq, "\t\t2^n_hits: %u\n", atomic_read(&sbi->s_bal_2orders)); |
---|
| 2630 | + seq_printf(seq, "\t\tbreaks: %u\n", atomic_read(&sbi->s_bal_breaks)); |
---|
| 2631 | + seq_printf(seq, "\t\tlost: %u\n", atomic_read(&sbi->s_mb_lost_chunks)); |
---|
| 2632 | + |
---|
| 2633 | + seq_printf(seq, "\tbuddies_generated: %u/%u\n", |
---|
| 2634 | + atomic_read(&sbi->s_mb_buddies_generated), |
---|
| 2635 | + ext4_get_groups_count(sb)); |
---|
| 2636 | + seq_printf(seq, "\tbuddies_time_used: %llu\n", |
---|
| 2637 | + atomic64_read(&sbi->s_mb_generation_time)); |
---|
| 2638 | + seq_printf(seq, "\tpreallocated: %u\n", |
---|
| 2639 | + atomic_read(&sbi->s_mb_preallocated)); |
---|
| 2640 | + seq_printf(seq, "\tdiscarded: %u\n", |
---|
| 2641 | + atomic_read(&sbi->s_mb_discarded)); |
---|
| 2642 | + return 0; |
---|
| 2643 | +} |
---|
2351 | 2644 | |
---|
2352 | 2645 | static struct kmem_cache *get_groupinfo_cache(int blocksize_bits) |
---|
2353 | 2646 | { |
---|
.. | .. |
---|
2453 | 2746 | meta_group_info[i]->bb_free_root = RB_ROOT; |
---|
2454 | 2747 | meta_group_info[i]->bb_largest_free_order = -1; /* uninit */ |
---|
2455 | 2748 | |
---|
2456 | | -#ifdef DOUBLE_CHECK |
---|
2457 | | - { |
---|
2458 | | - struct buffer_head *bh; |
---|
2459 | | - meta_group_info[i]->bb_bitmap = |
---|
2460 | | - kmalloc(sb->s_blocksize, GFP_NOFS); |
---|
2461 | | - BUG_ON(meta_group_info[i]->bb_bitmap == NULL); |
---|
2462 | | - bh = ext4_read_block_bitmap(sb, group); |
---|
2463 | | - BUG_ON(IS_ERR_OR_NULL(bh)); |
---|
2464 | | - memcpy(meta_group_info[i]->bb_bitmap, bh->b_data, |
---|
2465 | | - sb->s_blocksize); |
---|
2466 | | - put_bh(bh); |
---|
2467 | | - } |
---|
2468 | | -#endif |
---|
2469 | | - |
---|
| 2749 | + mb_group_bb_bitmap_alloc(sb, meta_group_info[i], group); |
---|
2470 | 2750 | return 0; |
---|
2471 | 2751 | |
---|
2472 | 2752 | exit_group_info: |
---|
.. | .. |
---|
2510 | 2790 | sbi->s_buddy_cache->i_ino = EXT4_BAD_INO; |
---|
2511 | 2791 | EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; |
---|
2512 | 2792 | for (i = 0; i < ngroups; i++) { |
---|
| 2793 | + cond_resched(); |
---|
2513 | 2794 | desc = ext4_get_group_desc(sb, i, NULL); |
---|
2514 | 2795 | if (desc == NULL) { |
---|
2515 | 2796 | ext4_msg(sb, KERN_ERR, "can't read descriptor %u", i); |
---|
.. | .. |
---|
2519 | 2800 | goto err_freebuddy; |
---|
2520 | 2801 | } |
---|
2521 | 2802 | |
---|
| 2803 | + if (ext4_has_feature_flex_bg(sb)) { |
---|
| 2804 | + /* a single flex group is supposed to be read by a single IO. |
---|
| 2805 | + * 2 ^ s_log_groups_per_flex != UINT_MAX as s_mb_prefetch is |
---|
| 2806 | + * unsigned integer, so the maximum shift is 32. |
---|
| 2807 | + */ |
---|
| 2808 | + if (sbi->s_es->s_log_groups_per_flex >= 32) { |
---|
| 2809 | + ext4_msg(sb, KERN_ERR, "too many log groups per flexible block group"); |
---|
| 2810 | + goto err_freebuddy; |
---|
| 2811 | + } |
---|
| 2812 | + sbi->s_mb_prefetch = min_t(uint, 1 << sbi->s_es->s_log_groups_per_flex, |
---|
| 2813 | + BLK_MAX_SEGMENT_SIZE >> (sb->s_blocksize_bits - 9)); |
---|
| 2814 | + sbi->s_mb_prefetch *= 8; /* 8 prefetch IOs in flight at most */ |
---|
| 2815 | + } else { |
---|
| 2816 | + sbi->s_mb_prefetch = 32; |
---|
| 2817 | + } |
---|
| 2818 | + if (sbi->s_mb_prefetch > ext4_get_groups_count(sb)) |
---|
| 2819 | + sbi->s_mb_prefetch = ext4_get_groups_count(sb); |
---|
| 2820 | + /* now many real IOs to prefetch within a single allocation at cr=0 |
---|
| 2821 | + * given cr=0 is an CPU-related optimization we shouldn't try to |
---|
| 2822 | + * load too many groups, at some point we should start to use what |
---|
| 2823 | + * we've got in memory. |
---|
| 2824 | + * with an average random access time 5ms, it'd take a second to get |
---|
| 2825 | + * 200 groups (* N with flex_bg), so let's make this limit 4 |
---|
| 2826 | + */ |
---|
| 2827 | + sbi->s_mb_prefetch_limit = sbi->s_mb_prefetch * 4; |
---|
| 2828 | + if (sbi->s_mb_prefetch_limit > ext4_get_groups_count(sb)) |
---|
| 2829 | + sbi->s_mb_prefetch_limit = ext4_get_groups_count(sb); |
---|
| 2830 | + |
---|
2522 | 2831 | return 0; |
---|
2523 | 2832 | |
---|
2524 | 2833 | err_freebuddy: |
---|
2525 | 2834 | cachep = get_groupinfo_cache(sb->s_blocksize_bits); |
---|
2526 | | - while (i-- > 0) |
---|
2527 | | - kmem_cache_free(cachep, ext4_get_group_info(sb, i)); |
---|
| 2835 | + while (i-- > 0) { |
---|
| 2836 | + struct ext4_group_info *grp = ext4_get_group_info(sb, i); |
---|
| 2837 | + |
---|
| 2838 | + if (grp) |
---|
| 2839 | + kmem_cache_free(cachep, grp); |
---|
| 2840 | + } |
---|
2528 | 2841 | i = sbi->s_group_info_size; |
---|
2529 | 2842 | rcu_read_lock(); |
---|
2530 | 2843 | group_info = rcu_dereference(sbi->s_group_info); |
---|
.. | .. |
---|
2633 | 2946 | } while (i <= sb->s_blocksize_bits + 1); |
---|
2634 | 2947 | |
---|
2635 | 2948 | spin_lock_init(&sbi->s_md_lock); |
---|
2636 | | - spin_lock_init(&sbi->s_bal_lock); |
---|
2637 | 2949 | sbi->s_mb_free_pending = 0; |
---|
2638 | 2950 | INIT_LIST_HEAD(&sbi->s_freed_data_list); |
---|
2639 | 2951 | |
---|
.. | .. |
---|
2642 | 2954 | sbi->s_mb_stats = MB_DEFAULT_STATS; |
---|
2643 | 2955 | sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD; |
---|
2644 | 2956 | sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS; |
---|
| 2957 | + sbi->s_mb_max_inode_prealloc = MB_DEFAULT_MAX_INODE_PREALLOC; |
---|
2645 | 2958 | /* |
---|
2646 | 2959 | * The default group preallocation is 512, which for 4k block |
---|
2647 | 2960 | * sizes translates to 2 megabytes. However for bigalloc file |
---|
.. | .. |
---|
2702 | 3015 | } |
---|
2703 | 3016 | |
---|
2704 | 3017 | /* need to called with the ext4 group lock held */ |
---|
2705 | | -static void ext4_mb_cleanup_pa(struct ext4_group_info *grp) |
---|
| 3018 | +static int ext4_mb_cleanup_pa(struct ext4_group_info *grp) |
---|
2706 | 3019 | { |
---|
2707 | 3020 | struct ext4_prealloc_space *pa; |
---|
2708 | 3021 | struct list_head *cur, *tmp; |
---|
.. | .. |
---|
2714 | 3027 | count++; |
---|
2715 | 3028 | kmem_cache_free(ext4_pspace_cachep, pa); |
---|
2716 | 3029 | } |
---|
2717 | | - if (count) |
---|
2718 | | - mb_debug(1, "mballoc: %u PAs left\n", count); |
---|
2719 | | - |
---|
| 3030 | + return count; |
---|
2720 | 3031 | } |
---|
2721 | 3032 | |
---|
2722 | 3033 | int ext4_mb_release(struct super_block *sb) |
---|
.. | .. |
---|
2727 | 3038 | struct ext4_group_info *grinfo, ***group_info; |
---|
2728 | 3039 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
---|
2729 | 3040 | struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); |
---|
| 3041 | + int count; |
---|
2730 | 3042 | |
---|
2731 | 3043 | if (sbi->s_group_info) { |
---|
2732 | 3044 | for (i = 0; i < ngroups; i++) { |
---|
| 3045 | + cond_resched(); |
---|
2733 | 3046 | grinfo = ext4_get_group_info(sb, i); |
---|
2734 | | -#ifdef DOUBLE_CHECK |
---|
2735 | | - kfree(grinfo->bb_bitmap); |
---|
2736 | | -#endif |
---|
| 3047 | + if (!grinfo) |
---|
| 3048 | + continue; |
---|
| 3049 | + mb_group_bb_bitmap_free(grinfo); |
---|
2737 | 3050 | ext4_lock_group(sb, i); |
---|
2738 | | - ext4_mb_cleanup_pa(grinfo); |
---|
| 3051 | + count = ext4_mb_cleanup_pa(grinfo); |
---|
| 3052 | + if (count) |
---|
| 3053 | + mb_debug(sb, "mballoc: %d PAs left\n", |
---|
| 3054 | + count); |
---|
2739 | 3055 | ext4_unlock_group(sb, i); |
---|
2740 | 3056 | kmem_cache_free(cachep, grinfo); |
---|
2741 | 3057 | } |
---|
.. | .. |
---|
2759 | 3075 | atomic_read(&sbi->s_bal_reqs), |
---|
2760 | 3076 | atomic_read(&sbi->s_bal_success)); |
---|
2761 | 3077 | ext4_msg(sb, KERN_INFO, |
---|
2762 | | - "mballoc: %u extents scanned, %u goal hits, " |
---|
| 3078 | + "mballoc: %u extents scanned, %u groups scanned, %u goal hits, " |
---|
2763 | 3079 | "%u 2^N hits, %u breaks, %u lost", |
---|
2764 | 3080 | atomic_read(&sbi->s_bal_ex_scanned), |
---|
| 3081 | + atomic_read(&sbi->s_bal_groups_scanned), |
---|
2765 | 3082 | atomic_read(&sbi->s_bal_goals), |
---|
2766 | 3083 | atomic_read(&sbi->s_bal_2orders), |
---|
2767 | 3084 | atomic_read(&sbi->s_bal_breaks), |
---|
2768 | 3085 | atomic_read(&sbi->s_mb_lost_chunks)); |
---|
2769 | 3086 | ext4_msg(sb, KERN_INFO, |
---|
2770 | | - "mballoc: %lu generated and it took %Lu", |
---|
2771 | | - sbi->s_mb_buddies_generated, |
---|
2772 | | - sbi->s_mb_generation_time); |
---|
| 3087 | + "mballoc: %u generated and it took %llu", |
---|
| 3088 | + atomic_read(&sbi->s_mb_buddies_generated), |
---|
| 3089 | + atomic64_read(&sbi->s_mb_generation_time)); |
---|
2773 | 3090 | ext4_msg(sb, KERN_INFO, |
---|
2774 | 3091 | "mballoc: %u preallocated, %u discarded", |
---|
2775 | 3092 | atomic_read(&sbi->s_mb_preallocated), |
---|
.. | .. |
---|
2808 | 3125 | struct ext4_group_info *db; |
---|
2809 | 3126 | int err, count = 0, count2 = 0; |
---|
2810 | 3127 | |
---|
2811 | | - mb_debug(1, "gonna free %u blocks in group %u (0x%p):", |
---|
| 3128 | + mb_debug(sb, "gonna free %u blocks in group %u (0x%p):", |
---|
2812 | 3129 | entry->efd_count, entry->efd_group, entry); |
---|
2813 | 3130 | |
---|
2814 | 3131 | err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b); |
---|
.. | .. |
---|
2848 | 3165 | kmem_cache_free(ext4_free_data_cachep, entry); |
---|
2849 | 3166 | ext4_mb_unload_buddy(&e4b); |
---|
2850 | 3167 | |
---|
2851 | | - mb_debug(1, "freed %u blocks in %u structures\n", count, count2); |
---|
| 3168 | + mb_debug(sb, "freed %d blocks in %d structures\n", count, |
---|
| 3169 | + count2); |
---|
2852 | 3170 | } |
---|
2853 | 3171 | |
---|
2854 | 3172 | /* |
---|
.. | .. |
---|
2908 | 3226 | ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space, |
---|
2909 | 3227 | SLAB_RECLAIM_ACCOUNT); |
---|
2910 | 3228 | if (ext4_pspace_cachep == NULL) |
---|
2911 | | - return -ENOMEM; |
---|
| 3229 | + goto out; |
---|
2912 | 3230 | |
---|
2913 | 3231 | ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context, |
---|
2914 | 3232 | SLAB_RECLAIM_ACCOUNT); |
---|
2915 | | - if (ext4_ac_cachep == NULL) { |
---|
2916 | | - kmem_cache_destroy(ext4_pspace_cachep); |
---|
2917 | | - return -ENOMEM; |
---|
2918 | | - } |
---|
| 3233 | + if (ext4_ac_cachep == NULL) |
---|
| 3234 | + goto out_pa_free; |
---|
2919 | 3235 | |
---|
2920 | 3236 | ext4_free_data_cachep = KMEM_CACHE(ext4_free_data, |
---|
2921 | 3237 | SLAB_RECLAIM_ACCOUNT); |
---|
2922 | | - if (ext4_free_data_cachep == NULL) { |
---|
2923 | | - kmem_cache_destroy(ext4_pspace_cachep); |
---|
2924 | | - kmem_cache_destroy(ext4_ac_cachep); |
---|
2925 | | - return -ENOMEM; |
---|
2926 | | - } |
---|
| 3238 | + if (ext4_free_data_cachep == NULL) |
---|
| 3239 | + goto out_ac_free; |
---|
| 3240 | + |
---|
2927 | 3241 | return 0; |
---|
| 3242 | + |
---|
| 3243 | +out_ac_free: |
---|
| 3244 | + kmem_cache_destroy(ext4_ac_cachep); |
---|
| 3245 | +out_pa_free: |
---|
| 3246 | + kmem_cache_destroy(ext4_pspace_cachep); |
---|
| 3247 | +out: |
---|
| 3248 | + return -ENOMEM; |
---|
2928 | 3249 | } |
---|
2929 | 3250 | |
---|
2930 | 3251 | void ext4_exit_mballoc(void) |
---|
.. | .. |
---|
3061 | 3382 | } |
---|
3062 | 3383 | |
---|
3063 | 3384 | /* |
---|
| 3385 | + * Idempotent helper for Ext4 fast commit replay path to set the state of |
---|
| 3386 | + * blocks in bitmaps and update counters. |
---|
| 3387 | + */ |
---|
| 3388 | +void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block, |
---|
| 3389 | + int len, int state) |
---|
| 3390 | +{ |
---|
| 3391 | + struct buffer_head *bitmap_bh = NULL; |
---|
| 3392 | + struct ext4_group_desc *gdp; |
---|
| 3393 | + struct buffer_head *gdp_bh; |
---|
| 3394 | + struct ext4_sb_info *sbi = EXT4_SB(sb); |
---|
| 3395 | + ext4_group_t group; |
---|
| 3396 | + ext4_grpblk_t blkoff; |
---|
| 3397 | + int i, err; |
---|
| 3398 | + int already; |
---|
| 3399 | + unsigned int clen, clen_changed, thisgrp_len; |
---|
| 3400 | + |
---|
| 3401 | + while (len > 0) { |
---|
| 3402 | + ext4_get_group_no_and_offset(sb, block, &group, &blkoff); |
---|
| 3403 | + |
---|
| 3404 | + /* |
---|
| 3405 | + * Check to see if we are freeing blocks across a group |
---|
| 3406 | + * boundary. |
---|
| 3407 | + * In case of flex_bg, this can happen that (block, len) may |
---|
| 3408 | + * span across more than one group. In that case we need to |
---|
| 3409 | + * get the corresponding group metadata to work with. |
---|
| 3410 | + * For this we have goto again loop. |
---|
| 3411 | + */ |
---|
| 3412 | + thisgrp_len = min_t(unsigned int, (unsigned int)len, |
---|
| 3413 | + EXT4_BLOCKS_PER_GROUP(sb) - EXT4_C2B(sbi, blkoff)); |
---|
| 3414 | + clen = EXT4_NUM_B2C(sbi, thisgrp_len); |
---|
| 3415 | + |
---|
| 3416 | + bitmap_bh = ext4_read_block_bitmap(sb, group); |
---|
| 3417 | + if (IS_ERR(bitmap_bh)) { |
---|
| 3418 | + err = PTR_ERR(bitmap_bh); |
---|
| 3419 | + bitmap_bh = NULL; |
---|
| 3420 | + break; |
---|
| 3421 | + } |
---|
| 3422 | + |
---|
| 3423 | + err = -EIO; |
---|
| 3424 | + gdp = ext4_get_group_desc(sb, group, &gdp_bh); |
---|
| 3425 | + if (!gdp) |
---|
| 3426 | + break; |
---|
| 3427 | + |
---|
| 3428 | + ext4_lock_group(sb, group); |
---|
| 3429 | + already = 0; |
---|
| 3430 | + for (i = 0; i < clen; i++) |
---|
| 3431 | + if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) == |
---|
| 3432 | + !state) |
---|
| 3433 | + already++; |
---|
| 3434 | + |
---|
| 3435 | + clen_changed = clen - already; |
---|
| 3436 | + if (state) |
---|
| 3437 | + ext4_set_bits(bitmap_bh->b_data, blkoff, clen); |
---|
| 3438 | + else |
---|
| 3439 | + mb_test_and_clear_bits(bitmap_bh->b_data, blkoff, clen); |
---|
| 3440 | + if (ext4_has_group_desc_csum(sb) && |
---|
| 3441 | + (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { |
---|
| 3442 | + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); |
---|
| 3443 | + ext4_free_group_clusters_set(sb, gdp, |
---|
| 3444 | + ext4_free_clusters_after_init(sb, group, gdp)); |
---|
| 3445 | + } |
---|
| 3446 | + if (state) |
---|
| 3447 | + clen = ext4_free_group_clusters(sb, gdp) - clen_changed; |
---|
| 3448 | + else |
---|
| 3449 | + clen = ext4_free_group_clusters(sb, gdp) + clen_changed; |
---|
| 3450 | + |
---|
| 3451 | + ext4_free_group_clusters_set(sb, gdp, clen); |
---|
| 3452 | + ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh); |
---|
| 3453 | + ext4_group_desc_csum_set(sb, group, gdp); |
---|
| 3454 | + |
---|
| 3455 | + ext4_unlock_group(sb, group); |
---|
| 3456 | + |
---|
| 3457 | + if (sbi->s_log_groups_per_flex) { |
---|
| 3458 | + ext4_group_t flex_group = ext4_flex_group(sbi, group); |
---|
| 3459 | + struct flex_groups *fg = sbi_array_rcu_deref(sbi, |
---|
| 3460 | + s_flex_groups, flex_group); |
---|
| 3461 | + |
---|
| 3462 | + if (state) |
---|
| 3463 | + atomic64_sub(clen_changed, &fg->free_clusters); |
---|
| 3464 | + else |
---|
| 3465 | + atomic64_add(clen_changed, &fg->free_clusters); |
---|
| 3466 | + |
---|
| 3467 | + } |
---|
| 3468 | + |
---|
| 3469 | + err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh); |
---|
| 3470 | + if (err) |
---|
| 3471 | + break; |
---|
| 3472 | + sync_dirty_buffer(bitmap_bh); |
---|
| 3473 | + err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh); |
---|
| 3474 | + sync_dirty_buffer(gdp_bh); |
---|
| 3475 | + if (err) |
---|
| 3476 | + break; |
---|
| 3477 | + |
---|
| 3478 | + block += thisgrp_len; |
---|
| 3479 | + len -= thisgrp_len; |
---|
| 3480 | + brelse(bitmap_bh); |
---|
| 3481 | + BUG_ON(len < 0); |
---|
| 3482 | + } |
---|
| 3483 | + |
---|
| 3484 | + if (err) |
---|
| 3485 | + brelse(bitmap_bh); |
---|
| 3486 | +} |
---|
| 3487 | + |
---|
| 3488 | +/* |
---|
3064 | 3489 | * here we normalize request for locality group |
---|
3065 | 3490 | * Group request are normalized to s_mb_group_prealloc, which goes to |
---|
3066 | 3491 | * s_strip if we set the same via mount option. |
---|
.. | .. |
---|
3076 | 3501 | |
---|
3077 | 3502 | BUG_ON(lg == NULL); |
---|
3078 | 3503 | ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc; |
---|
3079 | | - mb_debug(1, "#%u: goal %u blocks for locality group\n", |
---|
3080 | | - current->pid, ac->ac_g_ex.fe_len); |
---|
| 3504 | + mb_debug(sb, "goal %u blocks for locality group\n", ac->ac_g_ex.fe_len); |
---|
3081 | 3505 | } |
---|
3082 | 3506 | |
---|
3083 | 3507 | /* |
---|
.. | .. |
---|
3089 | 3513 | struct ext4_allocation_request *ar) |
---|
3090 | 3514 | { |
---|
3091 | 3515 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); |
---|
| 3516 | + struct ext4_super_block *es = sbi->s_es; |
---|
3092 | 3517 | int bsbits, max; |
---|
3093 | 3518 | ext4_lblk_t end; |
---|
3094 | 3519 | loff_t size, start_off; |
---|
.. | .. |
---|
3169 | 3594 | } |
---|
3170 | 3595 | size = size >> bsbits; |
---|
3171 | 3596 | start = start_off >> bsbits; |
---|
| 3597 | + |
---|
| 3598 | + /* |
---|
| 3599 | + * For tiny groups (smaller than 8MB) the chosen allocation |
---|
| 3600 | + * alignment may be larger than group size. Make sure the |
---|
| 3601 | + * alignment does not move allocation to a different group which |
---|
| 3602 | + * makes mballoc fail assertions later. |
---|
| 3603 | + */ |
---|
| 3604 | + start = max(start, rounddown(ac->ac_o_ex.fe_logical, |
---|
| 3605 | + (ext4_lblk_t)EXT4_BLOCKS_PER_GROUP(ac->ac_sb))); |
---|
3172 | 3606 | |
---|
3173 | 3607 | /* don't cover already allocated blocks in selected range */ |
---|
3174 | 3608 | if (ar->pleft && start <= ar->lleft) { |
---|
.. | .. |
---|
3260 | 3694 | ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size); |
---|
3261 | 3695 | |
---|
3262 | 3696 | /* define goal start in order to merge */ |
---|
3263 | | - if (ar->pright && (ar->lright == (start + size))) { |
---|
| 3697 | + if (ar->pright && (ar->lright == (start + size)) && |
---|
| 3698 | + ar->pright >= size && |
---|
| 3699 | + ar->pright - size >= le32_to_cpu(es->s_first_data_block)) { |
---|
3264 | 3700 | /* merge to the right */ |
---|
3265 | 3701 | ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size, |
---|
3266 | | - &ac->ac_f_ex.fe_group, |
---|
3267 | | - &ac->ac_f_ex.fe_start); |
---|
| 3702 | + &ac->ac_g_ex.fe_group, |
---|
| 3703 | + &ac->ac_g_ex.fe_start); |
---|
3268 | 3704 | ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL; |
---|
3269 | 3705 | } |
---|
3270 | | - if (ar->pleft && (ar->lleft + 1 == start)) { |
---|
| 3706 | + if (ar->pleft && (ar->lleft + 1 == start) && |
---|
| 3707 | + ar->pleft + 1 < ext4_blocks_count(es)) { |
---|
3271 | 3708 | /* merge to the left */ |
---|
3272 | 3709 | ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1, |
---|
3273 | | - &ac->ac_f_ex.fe_group, |
---|
3274 | | - &ac->ac_f_ex.fe_start); |
---|
| 3710 | + &ac->ac_g_ex.fe_group, |
---|
| 3711 | + &ac->ac_g_ex.fe_start); |
---|
3275 | 3712 | ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL; |
---|
3276 | 3713 | } |
---|
3277 | 3714 | |
---|
3278 | | - mb_debug(1, "goal: %u(was %u) blocks at %u\n", (unsigned) size, |
---|
3279 | | - (unsigned) orig_size, (unsigned) start); |
---|
| 3715 | + mb_debug(ac->ac_sb, "goal: %lld(was %lld) blocks at %u\n", size, |
---|
| 3716 | + orig_size, start); |
---|
3280 | 3717 | } |
---|
3281 | 3718 | |
---|
3282 | 3719 | static void ext4_mb_collect_stats(struct ext4_allocation_context *ac) |
---|
3283 | 3720 | { |
---|
3284 | 3721 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); |
---|
3285 | 3722 | |
---|
3286 | | - if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) { |
---|
| 3723 | + if (sbi->s_mb_stats && ac->ac_g_ex.fe_len >= 1) { |
---|
3287 | 3724 | atomic_inc(&sbi->s_bal_reqs); |
---|
3288 | 3725 | atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated); |
---|
3289 | 3726 | if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len) |
---|
3290 | 3727 | atomic_inc(&sbi->s_bal_success); |
---|
3291 | 3728 | atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned); |
---|
| 3729 | + atomic_add(ac->ac_groups_scanned, &sbi->s_bal_groups_scanned); |
---|
3292 | 3730 | if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start && |
---|
3293 | 3731 | ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group) |
---|
3294 | 3732 | atomic_inc(&sbi->s_bal_goals); |
---|
.. | .. |
---|
3363 | 3801 | BUG_ON(start < pa->pa_pstart); |
---|
3364 | 3802 | BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len)); |
---|
3365 | 3803 | BUG_ON(pa->pa_free < len); |
---|
| 3804 | + BUG_ON(ac->ac_b_ex.fe_len <= 0); |
---|
3366 | 3805 | pa->pa_free -= len; |
---|
3367 | 3806 | |
---|
3368 | | - mb_debug(1, "use %llu/%u from inode pa %p\n", start, len, pa); |
---|
| 3807 | + mb_debug(ac->ac_sb, "use %llu/%d from inode pa %p\n", start, len, pa); |
---|
3369 | 3808 | } |
---|
3370 | 3809 | |
---|
3371 | 3810 | /* |
---|
.. | .. |
---|
3389 | 3828 | * in on-disk bitmap -- see ext4_mb_release_context() |
---|
3390 | 3829 | * Other CPUs are prevented from allocating from this pa by lg_mutex |
---|
3391 | 3830 | */ |
---|
3392 | | - mb_debug(1, "use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa); |
---|
| 3831 | + mb_debug(ac->ac_sb, "use %u/%u from group pa %p\n", |
---|
| 3832 | + pa->pa_lstart-len, len, pa); |
---|
3393 | 3833 | } |
---|
3394 | 3834 | |
---|
3395 | 3835 | /* |
---|
.. | .. |
---|
3424 | 3864 | /* |
---|
3425 | 3865 | * search goal blocks in preallocated space |
---|
3426 | 3866 | */ |
---|
3427 | | -static noinline_for_stack int |
---|
| 3867 | +static noinline_for_stack bool |
---|
3428 | 3868 | ext4_mb_use_preallocated(struct ext4_allocation_context *ac) |
---|
3429 | 3869 | { |
---|
3430 | 3870 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); |
---|
.. | .. |
---|
3436 | 3876 | |
---|
3437 | 3877 | /* only data can be preallocated */ |
---|
3438 | 3878 | if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) |
---|
3439 | | - return 0; |
---|
| 3879 | + return false; |
---|
3440 | 3880 | |
---|
3441 | 3881 | /* first, try per-file preallocation */ |
---|
3442 | 3882 | rcu_read_lock(); |
---|
.. | .. |
---|
3463 | 3903 | spin_unlock(&pa->pa_lock); |
---|
3464 | 3904 | ac->ac_criteria = 10; |
---|
3465 | 3905 | rcu_read_unlock(); |
---|
3466 | | - return 1; |
---|
| 3906 | + return true; |
---|
3467 | 3907 | } |
---|
3468 | 3908 | spin_unlock(&pa->pa_lock); |
---|
3469 | 3909 | } |
---|
.. | .. |
---|
3471 | 3911 | |
---|
3472 | 3912 | /* can we use group allocation? */ |
---|
3473 | 3913 | if (!(ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)) |
---|
3474 | | - return 0; |
---|
| 3914 | + return false; |
---|
3475 | 3915 | |
---|
3476 | 3916 | /* inode may have no locality group for some reason */ |
---|
3477 | 3917 | lg = ac->ac_lg; |
---|
3478 | 3918 | if (lg == NULL) |
---|
3479 | | - return 0; |
---|
| 3919 | + return false; |
---|
3480 | 3920 | order = fls(ac->ac_o_ex.fe_len) - 1; |
---|
3481 | 3921 | if (order > PREALLOC_TB_SIZE - 1) |
---|
3482 | 3922 | /* The max size of hash table is PREALLOC_TB_SIZE */ |
---|
.. | .. |
---|
3505 | 3945 | if (cpa) { |
---|
3506 | 3946 | ext4_mb_use_group_pa(ac, cpa); |
---|
3507 | 3947 | ac->ac_criteria = 20; |
---|
3508 | | - return 1; |
---|
| 3948 | + return true; |
---|
3509 | 3949 | } |
---|
3510 | | - return 0; |
---|
| 3950 | + return false; |
---|
3511 | 3951 | } |
---|
3512 | 3952 | |
---|
3513 | 3953 | /* |
---|
.. | .. |
---|
3524 | 3964 | struct ext4_free_data *entry; |
---|
3525 | 3965 | |
---|
3526 | 3966 | grp = ext4_get_group_info(sb, group); |
---|
| 3967 | + if (!grp) |
---|
| 3968 | + return; |
---|
3527 | 3969 | n = rb_first(&(grp->bb_free_root)); |
---|
3528 | 3970 | |
---|
3529 | 3971 | while (n) { |
---|
.. | .. |
---|
3551 | 3993 | int preallocated = 0; |
---|
3552 | 3994 | int len; |
---|
3553 | 3995 | |
---|
| 3996 | + if (!grp) |
---|
| 3997 | + return; |
---|
| 3998 | + |
---|
3554 | 3999 | /* all form of preallocation discards first load group, |
---|
3555 | 4000 | * so the only competing code is preallocation use. |
---|
3556 | 4001 | * we don't need any locking here |
---|
.. | .. |
---|
3572 | 4017 | ext4_set_bits(bitmap, start, len); |
---|
3573 | 4018 | preallocated += len; |
---|
3574 | 4019 | } |
---|
3575 | | - mb_debug(1, "preallocated %u for group %u\n", preallocated, group); |
---|
| 4020 | + mb_debug(sb, "preallocated %d for group %u\n", preallocated, group); |
---|
| 4021 | +} |
---|
| 4022 | + |
---|
| 4023 | +static void ext4_mb_mark_pa_deleted(struct super_block *sb, |
---|
| 4024 | + struct ext4_prealloc_space *pa) |
---|
| 4025 | +{ |
---|
| 4026 | + struct ext4_inode_info *ei; |
---|
| 4027 | + |
---|
| 4028 | + if (pa->pa_deleted) { |
---|
| 4029 | + ext4_warning(sb, "deleted pa, type:%d, pblk:%llu, lblk:%u, len:%d\n", |
---|
| 4030 | + pa->pa_type, pa->pa_pstart, pa->pa_lstart, |
---|
| 4031 | + pa->pa_len); |
---|
| 4032 | + return; |
---|
| 4033 | + } |
---|
| 4034 | + |
---|
| 4035 | + pa->pa_deleted = 1; |
---|
| 4036 | + |
---|
| 4037 | + if (pa->pa_type == MB_INODE_PA) { |
---|
| 4038 | + ei = EXT4_I(pa->pa_inode); |
---|
| 4039 | + atomic_dec(&ei->i_prealloc_active); |
---|
| 4040 | + } |
---|
3576 | 4041 | } |
---|
3577 | 4042 | |
---|
3578 | 4043 | static void ext4_mb_pa_callback(struct rcu_head *head) |
---|
.. | .. |
---|
3607 | 4072 | return; |
---|
3608 | 4073 | } |
---|
3609 | 4074 | |
---|
3610 | | - pa->pa_deleted = 1; |
---|
| 4075 | + ext4_mb_mark_pa_deleted(sb, pa); |
---|
3611 | 4076 | spin_unlock(&pa->pa_lock); |
---|
3612 | 4077 | |
---|
3613 | 4078 | grp_blk = pa->pa_pstart; |
---|
.. | .. |
---|
3648 | 4113 | /* |
---|
3649 | 4114 | * creates new preallocated space for given inode |
---|
3650 | 4115 | */ |
---|
3651 | | -static noinline_for_stack int |
---|
| 4116 | +static noinline_for_stack void |
---|
3652 | 4117 | ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) |
---|
3653 | 4118 | { |
---|
3654 | 4119 | struct super_block *sb = ac->ac_sb; |
---|
.. | .. |
---|
3661 | 4126 | BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len); |
---|
3662 | 4127 | BUG_ON(ac->ac_status != AC_STATUS_FOUND); |
---|
3663 | 4128 | BUG_ON(!S_ISREG(ac->ac_inode->i_mode)); |
---|
| 4129 | + BUG_ON(ac->ac_pa == NULL); |
---|
3664 | 4130 | |
---|
3665 | | - pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS); |
---|
3666 | | - if (pa == NULL) |
---|
3667 | | - return -ENOMEM; |
---|
| 4131 | + pa = ac->ac_pa; |
---|
3668 | 4132 | |
---|
3669 | 4133 | if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) { |
---|
3670 | | - int winl; |
---|
3671 | | - int wins; |
---|
3672 | | - int win; |
---|
3673 | | - int offs; |
---|
| 4134 | + int new_bex_start; |
---|
| 4135 | + int new_bex_end; |
---|
3674 | 4136 | |
---|
3675 | 4137 | /* we can't allocate as much as normalizer wants. |
---|
3676 | 4138 | * so, found space must get proper lstart |
---|
.. | .. |
---|
3678 | 4140 | BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical); |
---|
3679 | 4141 | BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len); |
---|
3680 | 4142 | |
---|
3681 | | - /* we're limited by original request in that |
---|
3682 | | - * logical block must be covered any way |
---|
3683 | | - * winl is window we can move our chunk within */ |
---|
3684 | | - winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical; |
---|
| 4143 | + /* |
---|
| 4144 | + * Use the below logic for adjusting best extent as it keeps |
---|
| 4145 | + * fragmentation in check while ensuring logical range of best |
---|
| 4146 | + * extent doesn't overflow out of goal extent: |
---|
| 4147 | + * |
---|
| 4148 | + * 1. Check if best ex can be kept at end of goal and still |
---|
| 4149 | + * cover original start |
---|
| 4150 | + * 2. Else, check if best ex can be kept at start of goal and |
---|
| 4151 | + * still cover original start |
---|
| 4152 | + * 3. Else, keep the best ex at start of original request. |
---|
| 4153 | + */ |
---|
| 4154 | + new_bex_end = ac->ac_g_ex.fe_logical + |
---|
| 4155 | + EXT4_C2B(sbi, ac->ac_g_ex.fe_len); |
---|
| 4156 | + new_bex_start = new_bex_end - EXT4_C2B(sbi, ac->ac_b_ex.fe_len); |
---|
| 4157 | + if (ac->ac_o_ex.fe_logical >= new_bex_start) |
---|
| 4158 | + goto adjust_bex; |
---|
3685 | 4159 | |
---|
3686 | | - /* also, we should cover whole original request */ |
---|
3687 | | - wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len); |
---|
| 4160 | + new_bex_start = ac->ac_g_ex.fe_logical; |
---|
| 4161 | + new_bex_end = |
---|
| 4162 | + new_bex_start + EXT4_C2B(sbi, ac->ac_b_ex.fe_len); |
---|
| 4163 | + if (ac->ac_o_ex.fe_logical < new_bex_end) |
---|
| 4164 | + goto adjust_bex; |
---|
3688 | 4165 | |
---|
3689 | | - /* the smallest one defines real window */ |
---|
3690 | | - win = min(winl, wins); |
---|
| 4166 | + new_bex_start = ac->ac_o_ex.fe_logical; |
---|
| 4167 | + new_bex_end = |
---|
| 4168 | + new_bex_start + EXT4_C2B(sbi, ac->ac_b_ex.fe_len); |
---|
3691 | 4169 | |
---|
3692 | | - offs = ac->ac_o_ex.fe_logical % |
---|
3693 | | - EXT4_C2B(sbi, ac->ac_b_ex.fe_len); |
---|
3694 | | - if (offs && offs < win) |
---|
3695 | | - win = offs; |
---|
| 4170 | +adjust_bex: |
---|
| 4171 | + ac->ac_b_ex.fe_logical = new_bex_start; |
---|
3696 | 4172 | |
---|
3697 | | - ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical - |
---|
3698 | | - EXT4_NUM_B2C(sbi, win); |
---|
3699 | 4173 | BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical); |
---|
3700 | 4174 | BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len); |
---|
| 4175 | + BUG_ON(new_bex_end > (ac->ac_g_ex.fe_logical + |
---|
| 4176 | + EXT4_C2B(sbi, ac->ac_g_ex.fe_len))); |
---|
3701 | 4177 | } |
---|
3702 | 4178 | |
---|
3703 | 4179 | /* preallocation can change ac_b_ex, thus we store actually |
---|
.. | .. |
---|
3708 | 4184 | pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); |
---|
3709 | 4185 | pa->pa_len = ac->ac_b_ex.fe_len; |
---|
3710 | 4186 | pa->pa_free = pa->pa_len; |
---|
3711 | | - atomic_set(&pa->pa_count, 1); |
---|
3712 | 4187 | spin_lock_init(&pa->pa_lock); |
---|
3713 | 4188 | INIT_LIST_HEAD(&pa->pa_inode_list); |
---|
3714 | 4189 | INIT_LIST_HEAD(&pa->pa_group_list); |
---|
3715 | 4190 | pa->pa_deleted = 0; |
---|
3716 | 4191 | pa->pa_type = MB_INODE_PA; |
---|
3717 | 4192 | |
---|
3718 | | - mb_debug(1, "new inode pa %p: %llu/%u for %u\n", pa, |
---|
3719 | | - pa->pa_pstart, pa->pa_len, pa->pa_lstart); |
---|
| 4193 | + mb_debug(sb, "new inode pa %p: %llu/%d for %u\n", pa, pa->pa_pstart, |
---|
| 4194 | + pa->pa_len, pa->pa_lstart); |
---|
3720 | 4195 | trace_ext4_mb_new_inode_pa(ac, pa); |
---|
3721 | 4196 | |
---|
3722 | 4197 | ext4_mb_use_inode_pa(ac, pa); |
---|
.. | .. |
---|
3724 | 4199 | |
---|
3725 | 4200 | ei = EXT4_I(ac->ac_inode); |
---|
3726 | 4201 | grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group); |
---|
| 4202 | + if (!grp) |
---|
| 4203 | + return; |
---|
3727 | 4204 | |
---|
3728 | 4205 | pa->pa_obj_lock = &ei->i_prealloc_lock; |
---|
3729 | 4206 | pa->pa_inode = ac->ac_inode; |
---|
3730 | 4207 | |
---|
3731 | | - ext4_lock_group(sb, ac->ac_b_ex.fe_group); |
---|
3732 | 4208 | list_add(&pa->pa_group_list, &grp->bb_prealloc_list); |
---|
3733 | | - ext4_unlock_group(sb, ac->ac_b_ex.fe_group); |
---|
3734 | 4209 | |
---|
3735 | 4210 | spin_lock(pa->pa_obj_lock); |
---|
3736 | 4211 | list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list); |
---|
3737 | 4212 | spin_unlock(pa->pa_obj_lock); |
---|
3738 | | - |
---|
3739 | | - return 0; |
---|
| 4213 | + atomic_inc(&ei->i_prealloc_active); |
---|
3740 | 4214 | } |
---|
3741 | 4215 | |
---|
3742 | 4216 | /* |
---|
3743 | 4217 | * creates new preallocated space for locality group inodes belongs to |
---|
3744 | 4218 | */ |
---|
3745 | | -static noinline_for_stack int |
---|
| 4219 | +static noinline_for_stack void |
---|
3746 | 4220 | ext4_mb_new_group_pa(struct ext4_allocation_context *ac) |
---|
3747 | 4221 | { |
---|
3748 | 4222 | struct super_block *sb = ac->ac_sb; |
---|
.. | .. |
---|
3754 | 4228 | BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len); |
---|
3755 | 4229 | BUG_ON(ac->ac_status != AC_STATUS_FOUND); |
---|
3756 | 4230 | BUG_ON(!S_ISREG(ac->ac_inode->i_mode)); |
---|
| 4231 | + BUG_ON(ac->ac_pa == NULL); |
---|
3757 | 4232 | |
---|
3758 | | - BUG_ON(ext4_pspace_cachep == NULL); |
---|
3759 | | - pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS); |
---|
3760 | | - if (pa == NULL) |
---|
3761 | | - return -ENOMEM; |
---|
| 4233 | + pa = ac->ac_pa; |
---|
3762 | 4234 | |
---|
3763 | 4235 | /* preallocation can change ac_b_ex, thus we store actually |
---|
3764 | 4236 | * allocated blocks for history */ |
---|
.. | .. |
---|
3768 | 4240 | pa->pa_lstart = pa->pa_pstart; |
---|
3769 | 4241 | pa->pa_len = ac->ac_b_ex.fe_len; |
---|
3770 | 4242 | pa->pa_free = pa->pa_len; |
---|
3771 | | - atomic_set(&pa->pa_count, 1); |
---|
3772 | 4243 | spin_lock_init(&pa->pa_lock); |
---|
3773 | 4244 | INIT_LIST_HEAD(&pa->pa_inode_list); |
---|
3774 | 4245 | INIT_LIST_HEAD(&pa->pa_group_list); |
---|
3775 | 4246 | pa->pa_deleted = 0; |
---|
3776 | 4247 | pa->pa_type = MB_GROUP_PA; |
---|
3777 | 4248 | |
---|
3778 | | - mb_debug(1, "new group pa %p: %llu/%u for %u\n", pa, |
---|
3779 | | - pa->pa_pstart, pa->pa_len, pa->pa_lstart); |
---|
| 4249 | + mb_debug(sb, "new group pa %p: %llu/%d for %u\n", pa, pa->pa_pstart, |
---|
| 4250 | + pa->pa_len, pa->pa_lstart); |
---|
3780 | 4251 | trace_ext4_mb_new_group_pa(ac, pa); |
---|
3781 | 4252 | |
---|
3782 | 4253 | ext4_mb_use_group_pa(ac, pa); |
---|
3783 | 4254 | atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); |
---|
3784 | 4255 | |
---|
3785 | 4256 | grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group); |
---|
| 4257 | + if (!grp) |
---|
| 4258 | + return; |
---|
3786 | 4259 | lg = ac->ac_lg; |
---|
3787 | 4260 | BUG_ON(lg == NULL); |
---|
3788 | 4261 | |
---|
3789 | 4262 | pa->pa_obj_lock = &lg->lg_prealloc_lock; |
---|
3790 | 4263 | pa->pa_inode = NULL; |
---|
3791 | 4264 | |
---|
3792 | | - ext4_lock_group(sb, ac->ac_b_ex.fe_group); |
---|
3793 | 4265 | list_add(&pa->pa_group_list, &grp->bb_prealloc_list); |
---|
3794 | | - ext4_unlock_group(sb, ac->ac_b_ex.fe_group); |
---|
3795 | 4266 | |
---|
3796 | 4267 | /* |
---|
3797 | 4268 | * We will later add the new pa to the right bucket |
---|
3798 | 4269 | * after updating the pa_free in ext4_mb_release_context |
---|
3799 | 4270 | */ |
---|
3800 | | - return 0; |
---|
3801 | 4271 | } |
---|
3802 | 4272 | |
---|
3803 | | -static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac) |
---|
| 4273 | +static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac) |
---|
3804 | 4274 | { |
---|
3805 | | - int err; |
---|
3806 | | - |
---|
3807 | 4275 | if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) |
---|
3808 | | - err = ext4_mb_new_group_pa(ac); |
---|
| 4276 | + ext4_mb_new_group_pa(ac); |
---|
3809 | 4277 | else |
---|
3810 | | - err = ext4_mb_new_inode_pa(ac); |
---|
3811 | | - return err; |
---|
| 4278 | + ext4_mb_new_inode_pa(ac); |
---|
3812 | 4279 | } |
---|
3813 | 4280 | |
---|
3814 | 4281 | /* |
---|
.. | .. |
---|
3843 | 4310 | if (bit >= end) |
---|
3844 | 4311 | break; |
---|
3845 | 4312 | next = mb_find_next_bit(bitmap_bh->b_data, end, bit); |
---|
3846 | | - mb_debug(1, " free preallocated %u/%u in group %u\n", |
---|
| 4313 | + mb_debug(sb, "free preallocated %u/%u in group %u\n", |
---|
3847 | 4314 | (unsigned) ext4_group_first_block_no(sb, group) + bit, |
---|
3848 | 4315 | (unsigned) next - bit, (unsigned) group); |
---|
3849 | 4316 | free += next - bit; |
---|
.. | .. |
---|
3857 | 4324 | } |
---|
3858 | 4325 | if (free != pa->pa_free) { |
---|
3859 | 4326 | ext4_msg(e4b->bd_sb, KERN_CRIT, |
---|
3860 | | - "pa %p: logic %lu, phys. %lu, len %lu", |
---|
| 4327 | + "pa %p: logic %lu, phys. %lu, len %d", |
---|
3861 | 4328 | pa, (unsigned long) pa->pa_lstart, |
---|
3862 | 4329 | (unsigned long) pa->pa_pstart, |
---|
3863 | | - (unsigned long) pa->pa_len); |
---|
| 4330 | + pa->pa_len); |
---|
3864 | 4331 | ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u", |
---|
3865 | 4332 | free, pa->pa_free); |
---|
3866 | 4333 | /* |
---|
.. | .. |
---|
3884 | 4351 | trace_ext4_mb_release_group_pa(sb, pa); |
---|
3885 | 4352 | BUG_ON(pa->pa_deleted == 0); |
---|
3886 | 4353 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); |
---|
3887 | | - BUG_ON(group != e4b->bd_group && pa->pa_len != 0); |
---|
| 4354 | + if (unlikely(group != e4b->bd_group && pa->pa_len != 0)) { |
---|
| 4355 | + ext4_warning(sb, "bad group: expected %u, group %u, pa_start %llu", |
---|
| 4356 | + e4b->bd_group, group, pa->pa_pstart); |
---|
| 4357 | + return 0; |
---|
| 4358 | + } |
---|
3888 | 4359 | mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len); |
---|
3889 | 4360 | atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded); |
---|
3890 | 4361 | trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len); |
---|
.. | .. |
---|
3903 | 4374 | */ |
---|
3904 | 4375 | static noinline_for_stack int |
---|
3905 | 4376 | ext4_mb_discard_group_preallocations(struct super_block *sb, |
---|
3906 | | - ext4_group_t group, int needed) |
---|
| 4377 | + ext4_group_t group, int *busy) |
---|
3907 | 4378 | { |
---|
3908 | 4379 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); |
---|
3909 | 4380 | struct buffer_head *bitmap_bh = NULL; |
---|
.. | .. |
---|
3911 | 4382 | struct list_head list; |
---|
3912 | 4383 | struct ext4_buddy e4b; |
---|
3913 | 4384 | int err; |
---|
3914 | | - int busy = 0; |
---|
3915 | 4385 | int free = 0; |
---|
3916 | 4386 | |
---|
3917 | | - mb_debug(1, "discard preallocation for group %u\n", group); |
---|
3918 | | - |
---|
3919 | | - if (list_empty(&grp->bb_prealloc_list)) |
---|
| 4387 | + if (!grp) |
---|
3920 | 4388 | return 0; |
---|
| 4389 | + mb_debug(sb, "discard preallocation for group %u\n", group); |
---|
| 4390 | + if (list_empty(&grp->bb_prealloc_list)) |
---|
| 4391 | + goto out_dbg; |
---|
3921 | 4392 | |
---|
3922 | 4393 | bitmap_bh = ext4_read_block_bitmap(sb, group); |
---|
3923 | 4394 | if (IS_ERR(bitmap_bh)) { |
---|
3924 | 4395 | err = PTR_ERR(bitmap_bh); |
---|
3925 | | - ext4_error(sb, "Error %d reading block bitmap for %u", |
---|
3926 | | - err, group); |
---|
3927 | | - return 0; |
---|
| 4396 | + ext4_error_err(sb, -err, |
---|
| 4397 | + "Error %d reading block bitmap for %u", |
---|
| 4398 | + err, group); |
---|
| 4399 | + goto out_dbg; |
---|
3928 | 4400 | } |
---|
3929 | 4401 | |
---|
3930 | 4402 | err = ext4_mb_load_buddy(sb, group, &e4b); |
---|
.. | .. |
---|
3932 | 4404 | ext4_warning(sb, "Error %d loading buddy information for %u", |
---|
3933 | 4405 | err, group); |
---|
3934 | 4406 | put_bh(bitmap_bh); |
---|
3935 | | - return 0; |
---|
| 4407 | + goto out_dbg; |
---|
3936 | 4408 | } |
---|
3937 | 4409 | |
---|
3938 | | - if (needed == 0) |
---|
3939 | | - needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1; |
---|
3940 | | - |
---|
3941 | 4410 | INIT_LIST_HEAD(&list); |
---|
3942 | | -repeat: |
---|
3943 | 4411 | ext4_lock_group(sb, group); |
---|
3944 | 4412 | list_for_each_entry_safe(pa, tmp, |
---|
3945 | 4413 | &grp->bb_prealloc_list, pa_group_list) { |
---|
3946 | 4414 | spin_lock(&pa->pa_lock); |
---|
3947 | 4415 | if (atomic_read(&pa->pa_count)) { |
---|
3948 | 4416 | spin_unlock(&pa->pa_lock); |
---|
3949 | | - busy = 1; |
---|
| 4417 | + *busy = 1; |
---|
3950 | 4418 | continue; |
---|
3951 | 4419 | } |
---|
3952 | 4420 | if (pa->pa_deleted) { |
---|
.. | .. |
---|
3955 | 4423 | } |
---|
3956 | 4424 | |
---|
3957 | 4425 | /* seems this one can be freed ... */ |
---|
3958 | | - pa->pa_deleted = 1; |
---|
| 4426 | + ext4_mb_mark_pa_deleted(sb, pa); |
---|
| 4427 | + |
---|
| 4428 | + if (!free) |
---|
| 4429 | + this_cpu_inc(discard_pa_seq); |
---|
3959 | 4430 | |
---|
3960 | 4431 | /* we can trust pa_free ... */ |
---|
3961 | 4432 | free += pa->pa_free; |
---|
.. | .. |
---|
3964 | 4435 | |
---|
3965 | 4436 | list_del(&pa->pa_group_list); |
---|
3966 | 4437 | list_add(&pa->u.pa_tmp_list, &list); |
---|
3967 | | - } |
---|
3968 | | - |
---|
3969 | | - /* if we still need more blocks and some PAs were used, try again */ |
---|
3970 | | - if (free < needed && busy) { |
---|
3971 | | - busy = 0; |
---|
3972 | | - ext4_unlock_group(sb, group); |
---|
3973 | | - cond_resched(); |
---|
3974 | | - goto repeat; |
---|
3975 | | - } |
---|
3976 | | - |
---|
3977 | | - /* found anything to free? */ |
---|
3978 | | - if (list_empty(&list)) { |
---|
3979 | | - BUG_ON(free != 0); |
---|
3980 | | - goto out; |
---|
3981 | 4438 | } |
---|
3982 | 4439 | |
---|
3983 | 4440 | /* now free all selected PAs */ |
---|
.. | .. |
---|
3997 | 4454 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); |
---|
3998 | 4455 | } |
---|
3999 | 4456 | |
---|
4000 | | -out: |
---|
4001 | 4457 | ext4_unlock_group(sb, group); |
---|
4002 | 4458 | ext4_mb_unload_buddy(&e4b); |
---|
4003 | 4459 | put_bh(bitmap_bh); |
---|
| 4460 | +out_dbg: |
---|
| 4461 | + mb_debug(sb, "discarded (%d) blocks preallocated for group %u bb_free (%d)\n", |
---|
| 4462 | + free, group, grp->bb_free); |
---|
4004 | 4463 | return free; |
---|
4005 | 4464 | } |
---|
4006 | 4465 | |
---|
.. | .. |
---|
4013 | 4472 | * |
---|
4014 | 4473 | * FIXME!! Make sure it is valid at all the call sites |
---|
4015 | 4474 | */ |
---|
4016 | | -void ext4_discard_preallocations(struct inode *inode) |
---|
| 4475 | +void ext4_discard_preallocations(struct inode *inode, unsigned int needed) |
---|
4017 | 4476 | { |
---|
4018 | 4477 | struct ext4_inode_info *ei = EXT4_I(inode); |
---|
4019 | 4478 | struct super_block *sb = inode->i_sb; |
---|
.. | .. |
---|
4029 | 4488 | return; |
---|
4030 | 4489 | } |
---|
4031 | 4490 | |
---|
4032 | | - mb_debug(1, "discard preallocation for inode %lu\n", inode->i_ino); |
---|
4033 | | - trace_ext4_discard_preallocations(inode); |
---|
| 4491 | + if (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY) |
---|
| 4492 | + return; |
---|
| 4493 | + |
---|
| 4494 | + mb_debug(sb, "discard preallocation for inode %lu\n", |
---|
| 4495 | + inode->i_ino); |
---|
| 4496 | + trace_ext4_discard_preallocations(inode, |
---|
| 4497 | + atomic_read(&ei->i_prealloc_active), needed); |
---|
4034 | 4498 | |
---|
4035 | 4499 | INIT_LIST_HEAD(&list); |
---|
| 4500 | + |
---|
| 4501 | + if (needed == 0) |
---|
| 4502 | + needed = UINT_MAX; |
---|
4036 | 4503 | |
---|
4037 | 4504 | repeat: |
---|
4038 | 4505 | /* first, collect all pa's in the inode */ |
---|
4039 | 4506 | spin_lock(&ei->i_prealloc_lock); |
---|
4040 | | - while (!list_empty(&ei->i_prealloc_list)) { |
---|
4041 | | - pa = list_entry(ei->i_prealloc_list.next, |
---|
| 4507 | + while (!list_empty(&ei->i_prealloc_list) && needed) { |
---|
| 4508 | + pa = list_entry(ei->i_prealloc_list.prev, |
---|
4042 | 4509 | struct ext4_prealloc_space, pa_inode_list); |
---|
4043 | 4510 | BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock); |
---|
4044 | 4511 | spin_lock(&pa->pa_lock); |
---|
.. | .. |
---|
4055 | 4522 | |
---|
4056 | 4523 | } |
---|
4057 | 4524 | if (pa->pa_deleted == 0) { |
---|
4058 | | - pa->pa_deleted = 1; |
---|
| 4525 | + ext4_mb_mark_pa_deleted(sb, pa); |
---|
4059 | 4526 | spin_unlock(&pa->pa_lock); |
---|
4060 | 4527 | list_del_rcu(&pa->pa_inode_list); |
---|
4061 | 4528 | list_add(&pa->u.pa_tmp_list, &list); |
---|
| 4529 | + needed--; |
---|
4062 | 4530 | continue; |
---|
4063 | 4531 | } |
---|
4064 | 4532 | |
---|
.. | .. |
---|
4090 | 4558 | err = ext4_mb_load_buddy_gfp(sb, group, &e4b, |
---|
4091 | 4559 | GFP_NOFS|__GFP_NOFAIL); |
---|
4092 | 4560 | if (err) { |
---|
4093 | | - ext4_error(sb, "Error %d loading buddy information for %u", |
---|
4094 | | - err, group); |
---|
| 4561 | + ext4_error_err(sb, -err, "Error %d loading buddy information for %u", |
---|
| 4562 | + err, group); |
---|
4095 | 4563 | continue; |
---|
4096 | 4564 | } |
---|
4097 | 4565 | |
---|
4098 | 4566 | bitmap_bh = ext4_read_block_bitmap(sb, group); |
---|
4099 | 4567 | if (IS_ERR(bitmap_bh)) { |
---|
4100 | 4568 | err = PTR_ERR(bitmap_bh); |
---|
4101 | | - ext4_error(sb, "Error %d reading block bitmap for %u", |
---|
4102 | | - err, group); |
---|
| 4569 | + ext4_error_err(sb, -err, "Error %d reading block bitmap for %u", |
---|
| 4570 | + err, group); |
---|
4103 | 4571 | ext4_mb_unload_buddy(&e4b); |
---|
4104 | 4572 | continue; |
---|
4105 | 4573 | } |
---|
.. | .. |
---|
4117 | 4585 | } |
---|
4118 | 4586 | } |
---|
4119 | 4587 | |
---|
| 4588 | +static int ext4_mb_pa_alloc(struct ext4_allocation_context *ac) |
---|
| 4589 | +{ |
---|
| 4590 | + struct ext4_prealloc_space *pa; |
---|
| 4591 | + |
---|
| 4592 | + BUG_ON(ext4_pspace_cachep == NULL); |
---|
| 4593 | + pa = kmem_cache_zalloc(ext4_pspace_cachep, GFP_NOFS); |
---|
| 4594 | + if (!pa) |
---|
| 4595 | + return -ENOMEM; |
---|
| 4596 | + atomic_set(&pa->pa_count, 1); |
---|
| 4597 | + ac->ac_pa = pa; |
---|
| 4598 | + return 0; |
---|
| 4599 | +} |
---|
| 4600 | + |
---|
| 4601 | +static void ext4_mb_pa_free(struct ext4_allocation_context *ac) |
---|
| 4602 | +{ |
---|
| 4603 | + struct ext4_prealloc_space *pa = ac->ac_pa; |
---|
| 4604 | + |
---|
| 4605 | + BUG_ON(!pa); |
---|
| 4606 | + ac->ac_pa = NULL; |
---|
| 4607 | + WARN_ON(!atomic_dec_and_test(&pa->pa_count)); |
---|
| 4608 | + kmem_cache_free(ext4_pspace_cachep, pa); |
---|
| 4609 | +} |
---|
| 4610 | + |
---|
4120 | 4611 | #ifdef CONFIG_EXT4_DEBUG |
---|
| 4612 | +static inline void ext4_mb_show_pa(struct super_block *sb) |
---|
| 4613 | +{ |
---|
| 4614 | + ext4_group_t i, ngroups; |
---|
| 4615 | + |
---|
| 4616 | + if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) |
---|
| 4617 | + return; |
---|
| 4618 | + |
---|
| 4619 | + ngroups = ext4_get_groups_count(sb); |
---|
| 4620 | + mb_debug(sb, "groups: "); |
---|
| 4621 | + for (i = 0; i < ngroups; i++) { |
---|
| 4622 | + struct ext4_group_info *grp = ext4_get_group_info(sb, i); |
---|
| 4623 | + struct ext4_prealloc_space *pa; |
---|
| 4624 | + ext4_grpblk_t start; |
---|
| 4625 | + struct list_head *cur; |
---|
| 4626 | + |
---|
| 4627 | + if (!grp) |
---|
| 4628 | + continue; |
---|
| 4629 | + ext4_lock_group(sb, i); |
---|
| 4630 | + list_for_each(cur, &grp->bb_prealloc_list) { |
---|
| 4631 | + pa = list_entry(cur, struct ext4_prealloc_space, |
---|
| 4632 | + pa_group_list); |
---|
| 4633 | + spin_lock(&pa->pa_lock); |
---|
| 4634 | + ext4_get_group_no_and_offset(sb, pa->pa_pstart, |
---|
| 4635 | + NULL, &start); |
---|
| 4636 | + spin_unlock(&pa->pa_lock); |
---|
| 4637 | + mb_debug(sb, "PA:%u:%d:%d\n", i, start, |
---|
| 4638 | + pa->pa_len); |
---|
| 4639 | + } |
---|
| 4640 | + ext4_unlock_group(sb, i); |
---|
| 4641 | + mb_debug(sb, "%u: %d/%d\n", i, grp->bb_free, |
---|
| 4642 | + grp->bb_fragments); |
---|
| 4643 | + } |
---|
| 4644 | +} |
---|
| 4645 | + |
---|
4121 | 4646 | static void ext4_mb_show_ac(struct ext4_allocation_context *ac) |
---|
4122 | 4647 | { |
---|
4123 | 4648 | struct super_block *sb = ac->ac_sb; |
---|
4124 | | - ext4_group_t ngroups, i; |
---|
4125 | 4649 | |
---|
4126 | | - if (!ext4_mballoc_debug || |
---|
4127 | | - (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) |
---|
| 4650 | + if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) |
---|
4128 | 4651 | return; |
---|
4129 | 4652 | |
---|
4130 | | - ext4_msg(ac->ac_sb, KERN_ERR, "Can't allocate:" |
---|
| 4653 | + mb_debug(sb, "Can't allocate:" |
---|
4131 | 4654 | " Allocation context details:"); |
---|
4132 | | - ext4_msg(ac->ac_sb, KERN_ERR, "status %d flags %d", |
---|
| 4655 | + mb_debug(sb, "status %u flags 0x%x", |
---|
4133 | 4656 | ac->ac_status, ac->ac_flags); |
---|
4134 | | - ext4_msg(ac->ac_sb, KERN_ERR, "orig %lu/%lu/%lu@%lu, " |
---|
4135 | | - "goal %lu/%lu/%lu@%lu, " |
---|
| 4657 | + mb_debug(sb, "orig %lu/%lu/%lu@%lu, " |
---|
| 4658 | + "goal %lu/%lu/%lu@%lu, " |
---|
4136 | 4659 | "best %lu/%lu/%lu@%lu cr %d", |
---|
4137 | 4660 | (unsigned long)ac->ac_o_ex.fe_group, |
---|
4138 | 4661 | (unsigned long)ac->ac_o_ex.fe_start, |
---|
.. | .. |
---|
4147 | 4670 | (unsigned long)ac->ac_b_ex.fe_len, |
---|
4148 | 4671 | (unsigned long)ac->ac_b_ex.fe_logical, |
---|
4149 | 4672 | (int)ac->ac_criteria); |
---|
4150 | | - ext4_msg(ac->ac_sb, KERN_ERR, "%d found", ac->ac_found); |
---|
4151 | | - ext4_msg(ac->ac_sb, KERN_ERR, "groups: "); |
---|
4152 | | - ngroups = ext4_get_groups_count(sb); |
---|
4153 | | - for (i = 0; i < ngroups; i++) { |
---|
4154 | | - struct ext4_group_info *grp = ext4_get_group_info(sb, i); |
---|
4155 | | - struct ext4_prealloc_space *pa; |
---|
4156 | | - ext4_grpblk_t start; |
---|
4157 | | - struct list_head *cur; |
---|
4158 | | - ext4_lock_group(sb, i); |
---|
4159 | | - list_for_each(cur, &grp->bb_prealloc_list) { |
---|
4160 | | - pa = list_entry(cur, struct ext4_prealloc_space, |
---|
4161 | | - pa_group_list); |
---|
4162 | | - spin_lock(&pa->pa_lock); |
---|
4163 | | - ext4_get_group_no_and_offset(sb, pa->pa_pstart, |
---|
4164 | | - NULL, &start); |
---|
4165 | | - spin_unlock(&pa->pa_lock); |
---|
4166 | | - printk(KERN_ERR "PA:%u:%d:%u \n", i, |
---|
4167 | | - start, pa->pa_len); |
---|
4168 | | - } |
---|
4169 | | - ext4_unlock_group(sb, i); |
---|
4170 | | - |
---|
4171 | | - if (grp->bb_free == 0) |
---|
4172 | | - continue; |
---|
4173 | | - printk(KERN_ERR "%u: %d/%d \n", |
---|
4174 | | - i, grp->bb_free, grp->bb_fragments); |
---|
4175 | | - } |
---|
4176 | | - printk(KERN_ERR "\n"); |
---|
| 4673 | + mb_debug(sb, "%u found", ac->ac_found); |
---|
| 4674 | + ext4_mb_show_pa(sb); |
---|
4177 | 4675 | } |
---|
4178 | 4676 | #else |
---|
| 4677 | +static inline void ext4_mb_show_pa(struct super_block *sb) |
---|
| 4678 | +{ |
---|
| 4679 | + return; |
---|
| 4680 | +} |
---|
4179 | 4681 | static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac) |
---|
4180 | 4682 | { |
---|
| 4683 | + ext4_mb_show_pa(ac->ac_sb); |
---|
4181 | 4684 | return; |
---|
4182 | 4685 | } |
---|
4183 | 4686 | #endif |
---|
.. | .. |
---|
4205 | 4708 | isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1) |
---|
4206 | 4709 | >> bsbits; |
---|
4207 | 4710 | |
---|
4208 | | - if ((size == isize) && |
---|
4209 | | - !ext4_fs_is_busy(sbi) && |
---|
4210 | | - (atomic_read(&ac->ac_inode->i_writecount) == 0)) { |
---|
| 4711 | + if ((size == isize) && !ext4_fs_is_busy(sbi) && |
---|
| 4712 | + !inode_is_open_for_write(ac->ac_inode)) { |
---|
4211 | 4713 | ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC; |
---|
4212 | 4714 | return; |
---|
4213 | 4715 | } |
---|
.. | .. |
---|
4277 | 4779 | ac->ac_g_ex = ac->ac_o_ex; |
---|
4278 | 4780 | ac->ac_flags = ar->flags; |
---|
4279 | 4781 | |
---|
4280 | | - /* we have to define context: we'll we work with a file or |
---|
| 4782 | + /* we have to define context: we'll work with a file or |
---|
4281 | 4783 | * locality group. this is a policy, actually */ |
---|
4282 | 4784 | ext4_mb_group_or_file(ac); |
---|
4283 | 4785 | |
---|
4284 | | - mb_debug(1, "init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, " |
---|
| 4786 | + mb_debug(sb, "init ac: %u blocks @ %u, goal %u, flags 0x%x, 2^%d, " |
---|
4285 | 4787 | "left: %u/%u, right %u/%u to %swritable\n", |
---|
4286 | 4788 | (unsigned) ar->len, (unsigned) ar->logical, |
---|
4287 | 4789 | (unsigned) ar->goal, ac->ac_flags, ac->ac_2order, |
---|
4288 | 4790 | (unsigned) ar->lleft, (unsigned) ar->pleft, |
---|
4289 | 4791 | (unsigned) ar->lright, (unsigned) ar->pright, |
---|
4290 | | - atomic_read(&ar->inode->i_writecount) ? "" : "non-"); |
---|
| 4792 | + inode_is_open_for_write(ar->inode) ? "" : "non-"); |
---|
4291 | 4793 | return 0; |
---|
4292 | 4794 | |
---|
4293 | 4795 | } |
---|
.. | .. |
---|
4302 | 4804 | struct list_head discard_list; |
---|
4303 | 4805 | struct ext4_prealloc_space *pa, *tmp; |
---|
4304 | 4806 | |
---|
4305 | | - mb_debug(1, "discard locality group preallocation\n"); |
---|
| 4807 | + mb_debug(sb, "discard locality group preallocation\n"); |
---|
4306 | 4808 | |
---|
4307 | 4809 | INIT_LIST_HEAD(&discard_list); |
---|
4308 | 4810 | |
---|
4309 | 4811 | spin_lock(&lg->lg_prealloc_lock); |
---|
4310 | 4812 | list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order], |
---|
4311 | | - pa_inode_list) { |
---|
| 4813 | + pa_inode_list, |
---|
| 4814 | + lockdep_is_held(&lg->lg_prealloc_lock)) { |
---|
4312 | 4815 | spin_lock(&pa->pa_lock); |
---|
4313 | 4816 | if (atomic_read(&pa->pa_count)) { |
---|
4314 | 4817 | /* |
---|
.. | .. |
---|
4327 | 4830 | BUG_ON(pa->pa_type != MB_GROUP_PA); |
---|
4328 | 4831 | |
---|
4329 | 4832 | /* seems this one can be freed ... */ |
---|
4330 | | - pa->pa_deleted = 1; |
---|
| 4833 | + ext4_mb_mark_pa_deleted(sb, pa); |
---|
4331 | 4834 | spin_unlock(&pa->pa_lock); |
---|
4332 | 4835 | |
---|
4333 | 4836 | list_del_rcu(&pa->pa_inode_list); |
---|
.. | .. |
---|
4353 | 4856 | err = ext4_mb_load_buddy_gfp(sb, group, &e4b, |
---|
4354 | 4857 | GFP_NOFS|__GFP_NOFAIL); |
---|
4355 | 4858 | if (err) { |
---|
4356 | | - ext4_error(sb, "Error %d loading buddy information for %u", |
---|
4357 | | - err, group); |
---|
| 4859 | + ext4_error_err(sb, -err, "Error %d loading buddy information for %u", |
---|
| 4860 | + err, group); |
---|
4358 | 4861 | continue; |
---|
4359 | 4862 | } |
---|
4360 | 4863 | ext4_lock_group(sb, group); |
---|
.. | .. |
---|
4391 | 4894 | /* Add the prealloc space to lg */ |
---|
4392 | 4895 | spin_lock(&lg->lg_prealloc_lock); |
---|
4393 | 4896 | list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order], |
---|
4394 | | - pa_inode_list) { |
---|
| 4897 | + pa_inode_list, |
---|
| 4898 | + lockdep_is_held(&lg->lg_prealloc_lock)) { |
---|
4395 | 4899 | spin_lock(&tmp_pa->pa_lock); |
---|
4396 | 4900 | if (tmp_pa->pa_deleted) { |
---|
4397 | 4901 | spin_unlock(&tmp_pa->pa_lock); |
---|
.. | .. |
---|
4425 | 4929 | } |
---|
4426 | 4930 | |
---|
4427 | 4931 | /* |
---|
| 4932 | + * if per-inode prealloc list is too long, trim some PA |
---|
| 4933 | + */ |
---|
| 4934 | +static void ext4_mb_trim_inode_pa(struct inode *inode) |
---|
| 4935 | +{ |
---|
| 4936 | + struct ext4_inode_info *ei = EXT4_I(inode); |
---|
| 4937 | + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
---|
| 4938 | + int count, delta; |
---|
| 4939 | + |
---|
| 4940 | + count = atomic_read(&ei->i_prealloc_active); |
---|
| 4941 | + delta = (sbi->s_mb_max_inode_prealloc >> 2) + 1; |
---|
| 4942 | + if (count > sbi->s_mb_max_inode_prealloc + delta) { |
---|
| 4943 | + count -= sbi->s_mb_max_inode_prealloc; |
---|
| 4944 | + ext4_discard_preallocations(inode, count); |
---|
| 4945 | + } |
---|
| 4946 | +} |
---|
| 4947 | + |
---|
| 4948 | +/* |
---|
4428 | 4949 | * release all resource we used in allocation |
---|
4429 | 4950 | */ |
---|
4430 | 4951 | static int ext4_mb_release_context(struct ext4_allocation_context *ac) |
---|
4431 | 4952 | { |
---|
| 4953 | + struct inode *inode = ac->ac_inode; |
---|
| 4954 | + struct ext4_inode_info *ei = EXT4_I(inode); |
---|
4432 | 4955 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); |
---|
4433 | 4956 | struct ext4_prealloc_space *pa = ac->ac_pa; |
---|
4434 | 4957 | if (pa) { |
---|
.. | .. |
---|
4440 | 4963 | pa->pa_free -= ac->ac_b_ex.fe_len; |
---|
4441 | 4964 | pa->pa_len -= ac->ac_b_ex.fe_len; |
---|
4442 | 4965 | spin_unlock(&pa->pa_lock); |
---|
| 4966 | + |
---|
| 4967 | + /* |
---|
| 4968 | + * We want to add the pa to the right bucket. |
---|
| 4969 | + * Remove it from the list and while adding |
---|
| 4970 | + * make sure the list to which we are adding |
---|
| 4971 | + * doesn't grow big. |
---|
| 4972 | + */ |
---|
| 4973 | + if (likely(pa->pa_free)) { |
---|
| 4974 | + spin_lock(pa->pa_obj_lock); |
---|
| 4975 | + list_del_rcu(&pa->pa_inode_list); |
---|
| 4976 | + spin_unlock(pa->pa_obj_lock); |
---|
| 4977 | + ext4_mb_add_n_trim(ac); |
---|
| 4978 | + } |
---|
4443 | 4979 | } |
---|
4444 | | - } |
---|
4445 | | - if (pa) { |
---|
4446 | | - /* |
---|
4447 | | - * We want to add the pa to the right bucket. |
---|
4448 | | - * Remove it from the list and while adding |
---|
4449 | | - * make sure the list to which we are adding |
---|
4450 | | - * doesn't grow big. |
---|
4451 | | - */ |
---|
4452 | | - if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) { |
---|
| 4980 | + |
---|
| 4981 | + if (pa->pa_type == MB_INODE_PA) { |
---|
| 4982 | + /* |
---|
| 4983 | + * treat per-inode prealloc list as a lru list, then try |
---|
| 4984 | + * to trim the least recently used PA. |
---|
| 4985 | + */ |
---|
4453 | 4986 | spin_lock(pa->pa_obj_lock); |
---|
4454 | | - list_del_rcu(&pa->pa_inode_list); |
---|
| 4987 | + list_move(&pa->pa_inode_list, &ei->i_prealloc_list); |
---|
4455 | 4988 | spin_unlock(pa->pa_obj_lock); |
---|
4456 | | - ext4_mb_add_n_trim(ac); |
---|
4457 | 4989 | } |
---|
| 4990 | + |
---|
4458 | 4991 | ext4_mb_put_pa(ac, ac->ac_sb, pa); |
---|
4459 | 4992 | } |
---|
4460 | 4993 | if (ac->ac_bitmap_page) |
---|
.. | .. |
---|
4464 | 4997 | if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) |
---|
4465 | 4998 | mutex_unlock(&ac->ac_lg->lg_mutex); |
---|
4466 | 4999 | ext4_mb_collect_stats(ac); |
---|
| 5000 | + ext4_mb_trim_inode_pa(inode); |
---|
4467 | 5001 | return 0; |
---|
4468 | 5002 | } |
---|
4469 | 5003 | |
---|
.. | .. |
---|
4471 | 5005 | { |
---|
4472 | 5006 | ext4_group_t i, ngroups = ext4_get_groups_count(sb); |
---|
4473 | 5007 | int ret; |
---|
4474 | | - int freed = 0; |
---|
| 5008 | + int freed = 0, busy = 0; |
---|
| 5009 | + int retry = 0; |
---|
4475 | 5010 | |
---|
4476 | 5011 | trace_ext4_mb_discard_preallocations(sb, needed); |
---|
| 5012 | + |
---|
| 5013 | + if (needed == 0) |
---|
| 5014 | + needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1; |
---|
| 5015 | + repeat: |
---|
4477 | 5016 | for (i = 0; i < ngroups && needed > 0; i++) { |
---|
4478 | | - ret = ext4_mb_discard_group_preallocations(sb, i, needed); |
---|
| 5017 | + ret = ext4_mb_discard_group_preallocations(sb, i, &busy); |
---|
4479 | 5018 | freed += ret; |
---|
4480 | 5019 | needed -= ret; |
---|
| 5020 | + cond_resched(); |
---|
| 5021 | + } |
---|
| 5022 | + |
---|
| 5023 | + if (needed > 0 && busy && ++retry < 3) { |
---|
| 5024 | + busy = 0; |
---|
| 5025 | + goto repeat; |
---|
4481 | 5026 | } |
---|
4482 | 5027 | |
---|
4483 | 5028 | return freed; |
---|
4484 | 5029 | } |
---|
| 5030 | + |
---|
| 5031 | +static bool ext4_mb_discard_preallocations_should_retry(struct super_block *sb, |
---|
| 5032 | + struct ext4_allocation_context *ac, u64 *seq) |
---|
| 5033 | +{ |
---|
| 5034 | + int freed; |
---|
| 5035 | + u64 seq_retry = 0; |
---|
| 5036 | + bool ret = false; |
---|
| 5037 | + |
---|
| 5038 | + freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len); |
---|
| 5039 | + if (freed) { |
---|
| 5040 | + ret = true; |
---|
| 5041 | + goto out_dbg; |
---|
| 5042 | + } |
---|
| 5043 | + seq_retry = ext4_get_discard_pa_seq_sum(); |
---|
| 5044 | + if (!(ac->ac_flags & EXT4_MB_STRICT_CHECK) || seq_retry != *seq) { |
---|
| 5045 | + ac->ac_flags |= EXT4_MB_STRICT_CHECK; |
---|
| 5046 | + *seq = seq_retry; |
---|
| 5047 | + ret = true; |
---|
| 5048 | + } |
---|
| 5049 | + |
---|
| 5050 | +out_dbg: |
---|
| 5051 | + mb_debug(sb, "freed %d, retry ? %s\n", freed, ret ? "yes" : "no"); |
---|
| 5052 | + return ret; |
---|
| 5053 | +} |
---|
| 5054 | + |
---|
| 5055 | +static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle, |
---|
| 5056 | + struct ext4_allocation_request *ar, int *errp); |
---|
4485 | 5057 | |
---|
4486 | 5058 | /* |
---|
4487 | 5059 | * Main entry point into mballoc to allocate blocks |
---|
.. | .. |
---|
4491 | 5063 | ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, |
---|
4492 | 5064 | struct ext4_allocation_request *ar, int *errp) |
---|
4493 | 5065 | { |
---|
4494 | | - int freed; |
---|
4495 | 5066 | struct ext4_allocation_context *ac = NULL; |
---|
4496 | 5067 | struct ext4_sb_info *sbi; |
---|
4497 | 5068 | struct super_block *sb; |
---|
4498 | 5069 | ext4_fsblk_t block = 0; |
---|
4499 | 5070 | unsigned int inquota = 0; |
---|
4500 | 5071 | unsigned int reserv_clstrs = 0; |
---|
| 5072 | + int retries = 0; |
---|
| 5073 | + u64 seq; |
---|
4501 | 5074 | |
---|
4502 | 5075 | might_sleep(); |
---|
4503 | 5076 | sb = ar->inode->i_sb; |
---|
4504 | 5077 | sbi = EXT4_SB(sb); |
---|
4505 | 5078 | |
---|
4506 | 5079 | trace_ext4_request_blocks(ar); |
---|
| 5080 | + if (sbi->s_mount_state & EXT4_FC_REPLAY) |
---|
| 5081 | + return ext4_mb_new_blocks_simple(handle, ar, errp); |
---|
4507 | 5082 | |
---|
4508 | 5083 | /* Allow to use superuser reservation for quota file */ |
---|
4509 | 5084 | if (ext4_is_quota_file(ar->inode)) |
---|
.. | .. |
---|
4522 | 5097 | ar->len = ar->len >> 1; |
---|
4523 | 5098 | } |
---|
4524 | 5099 | if (!ar->len) { |
---|
| 5100 | + ext4_mb_show_pa(sb); |
---|
4525 | 5101 | *errp = -ENOSPC; |
---|
4526 | 5102 | return 0; |
---|
4527 | 5103 | } |
---|
.. | .. |
---|
4559 | 5135 | } |
---|
4560 | 5136 | |
---|
4561 | 5137 | ac->ac_op = EXT4_MB_HISTORY_PREALLOC; |
---|
| 5138 | + seq = this_cpu_read(discard_pa_seq); |
---|
4562 | 5139 | if (!ext4_mb_use_preallocated(ac)) { |
---|
4563 | 5140 | ac->ac_op = EXT4_MB_HISTORY_ALLOC; |
---|
4564 | 5141 | ext4_mb_normalize_request(ac, ar); |
---|
| 5142 | + |
---|
| 5143 | + *errp = ext4_mb_pa_alloc(ac); |
---|
| 5144 | + if (*errp) |
---|
| 5145 | + goto errout; |
---|
4565 | 5146 | repeat: |
---|
4566 | 5147 | /* allocate space in core */ |
---|
4567 | 5148 | *errp = ext4_mb_regular_allocator(ac); |
---|
4568 | | - if (*errp) |
---|
4569 | | - goto discard_and_exit; |
---|
4570 | | - |
---|
4571 | | - /* as we've just preallocated more space than |
---|
4572 | | - * user requested originally, we store allocated |
---|
4573 | | - * space in a special descriptor */ |
---|
4574 | | - if (ac->ac_status == AC_STATUS_FOUND && |
---|
4575 | | - ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len) |
---|
4576 | | - *errp = ext4_mb_new_preallocation(ac); |
---|
| 5149 | + /* |
---|
| 5150 | + * pa allocated above is added to grp->bb_prealloc_list only |
---|
| 5151 | + * when we were able to allocate some block i.e. when |
---|
| 5152 | + * ac->ac_status == AC_STATUS_FOUND. |
---|
| 5153 | + * And error from above mean ac->ac_status != AC_STATUS_FOUND |
---|
| 5154 | + * So we have to free this pa here itself. |
---|
| 5155 | + */ |
---|
4577 | 5156 | if (*errp) { |
---|
4578 | | - discard_and_exit: |
---|
| 5157 | + ext4_mb_pa_free(ac); |
---|
4579 | 5158 | ext4_discard_allocated_blocks(ac); |
---|
4580 | 5159 | goto errout; |
---|
4581 | 5160 | } |
---|
| 5161 | + if (ac->ac_status == AC_STATUS_FOUND && |
---|
| 5162 | + ac->ac_o_ex.fe_len >= ac->ac_f_ex.fe_len) |
---|
| 5163 | + ext4_mb_pa_free(ac); |
---|
4582 | 5164 | } |
---|
4583 | 5165 | if (likely(ac->ac_status == AC_STATUS_FOUND)) { |
---|
4584 | 5166 | *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs); |
---|
.. | .. |
---|
4590 | 5172 | ar->len = ac->ac_b_ex.fe_len; |
---|
4591 | 5173 | } |
---|
4592 | 5174 | } else { |
---|
4593 | | - freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len); |
---|
4594 | | - if (freed) |
---|
| 5175 | + if (++retries < 3 && |
---|
| 5176 | + ext4_mb_discard_preallocations_should_retry(sb, ac, &seq)) |
---|
4595 | 5177 | goto repeat; |
---|
| 5178 | + /* |
---|
| 5179 | + * If block allocation fails then the pa allocated above |
---|
| 5180 | + * needs to be freed here itself. |
---|
| 5181 | + */ |
---|
| 5182 | + ext4_mb_pa_free(ac); |
---|
4596 | 5183 | *errp = -ENOSPC; |
---|
4597 | 5184 | } |
---|
4598 | 5185 | |
---|
.. | .. |
---|
4721 | 5308 | return 0; |
---|
4722 | 5309 | } |
---|
4723 | 5310 | |
---|
| 5311 | +/* |
---|
| 5312 | + * Simple allocator for Ext4 fast commit replay path. It searches for blocks |
---|
| 5313 | + * linearly starting at the goal block and also excludes the blocks which |
---|
| 5314 | + * are going to be in use after fast commit replay. |
---|
| 5315 | + */ |
---|
| 5316 | +static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle, |
---|
| 5317 | + struct ext4_allocation_request *ar, int *errp) |
---|
| 5318 | +{ |
---|
| 5319 | + struct buffer_head *bitmap_bh; |
---|
| 5320 | + struct super_block *sb = ar->inode->i_sb; |
---|
| 5321 | + ext4_group_t group; |
---|
| 5322 | + ext4_grpblk_t blkoff; |
---|
| 5323 | + ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb); |
---|
| 5324 | + ext4_grpblk_t i = 0; |
---|
| 5325 | + ext4_fsblk_t goal, block; |
---|
| 5326 | + struct ext4_super_block *es = EXT4_SB(sb)->s_es; |
---|
| 5327 | + |
---|
| 5328 | + goal = ar->goal; |
---|
| 5329 | + if (goal < le32_to_cpu(es->s_first_data_block) || |
---|
| 5330 | + goal >= ext4_blocks_count(es)) |
---|
| 5331 | + goal = le32_to_cpu(es->s_first_data_block); |
---|
| 5332 | + |
---|
| 5333 | + ar->len = 0; |
---|
| 5334 | + ext4_get_group_no_and_offset(sb, goal, &group, &blkoff); |
---|
| 5335 | + for (; group < ext4_get_groups_count(sb); group++) { |
---|
| 5336 | + bitmap_bh = ext4_read_block_bitmap(sb, group); |
---|
| 5337 | + if (IS_ERR(bitmap_bh)) { |
---|
| 5338 | + *errp = PTR_ERR(bitmap_bh); |
---|
| 5339 | + pr_warn("Failed to read block bitmap\n"); |
---|
| 5340 | + return 0; |
---|
| 5341 | + } |
---|
| 5342 | + |
---|
| 5343 | + ext4_get_group_no_and_offset(sb, |
---|
| 5344 | + max(ext4_group_first_block_no(sb, group), goal), |
---|
| 5345 | + NULL, &blkoff); |
---|
| 5346 | + while (1) { |
---|
| 5347 | + i = mb_find_next_zero_bit(bitmap_bh->b_data, max, |
---|
| 5348 | + blkoff); |
---|
| 5349 | + if (i >= max) |
---|
| 5350 | + break; |
---|
| 5351 | + if (ext4_fc_replay_check_excluded(sb, |
---|
| 5352 | + ext4_group_first_block_no(sb, group) + i)) { |
---|
| 5353 | + blkoff = i + 1; |
---|
| 5354 | + } else |
---|
| 5355 | + break; |
---|
| 5356 | + } |
---|
| 5357 | + brelse(bitmap_bh); |
---|
| 5358 | + if (i < max) |
---|
| 5359 | + break; |
---|
| 5360 | + } |
---|
| 5361 | + |
---|
| 5362 | + if (group >= ext4_get_groups_count(sb) || i >= max) { |
---|
| 5363 | + *errp = -ENOSPC; |
---|
| 5364 | + return 0; |
---|
| 5365 | + } |
---|
| 5366 | + |
---|
| 5367 | + block = ext4_group_first_block_no(sb, group) + i; |
---|
| 5368 | + ext4_mb_mark_bb(sb, block, 1, 1); |
---|
| 5369 | + ar->len = 1; |
---|
| 5370 | + |
---|
| 5371 | + return block; |
---|
| 5372 | +} |
---|
| 5373 | + |
---|
| 5374 | +static void ext4_free_blocks_simple(struct inode *inode, ext4_fsblk_t block, |
---|
| 5375 | + unsigned long count) |
---|
| 5376 | +{ |
---|
| 5377 | + struct buffer_head *bitmap_bh; |
---|
| 5378 | + struct super_block *sb = inode->i_sb; |
---|
| 5379 | + struct ext4_group_desc *gdp; |
---|
| 5380 | + struct buffer_head *gdp_bh; |
---|
| 5381 | + ext4_group_t group; |
---|
| 5382 | + ext4_grpblk_t blkoff; |
---|
| 5383 | + int already_freed = 0, err, i; |
---|
| 5384 | + |
---|
| 5385 | + ext4_get_group_no_and_offset(sb, block, &group, &blkoff); |
---|
| 5386 | + bitmap_bh = ext4_read_block_bitmap(sb, group); |
---|
| 5387 | + if (IS_ERR(bitmap_bh)) { |
---|
| 5388 | + err = PTR_ERR(bitmap_bh); |
---|
| 5389 | + pr_warn("Failed to read block bitmap\n"); |
---|
| 5390 | + return; |
---|
| 5391 | + } |
---|
| 5392 | + gdp = ext4_get_group_desc(sb, group, &gdp_bh); |
---|
| 5393 | + if (!gdp) |
---|
| 5394 | + return; |
---|
| 5395 | + |
---|
| 5396 | + for (i = 0; i < count; i++) { |
---|
| 5397 | + if (!mb_test_bit(blkoff + i, bitmap_bh->b_data)) |
---|
| 5398 | + already_freed++; |
---|
| 5399 | + } |
---|
| 5400 | + mb_clear_bits(bitmap_bh->b_data, blkoff, count); |
---|
| 5401 | + err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh); |
---|
| 5402 | + if (err) |
---|
| 5403 | + return; |
---|
| 5404 | + ext4_free_group_clusters_set( |
---|
| 5405 | + sb, gdp, ext4_free_group_clusters(sb, gdp) + |
---|
| 5406 | + count - already_freed); |
---|
| 5407 | + ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh); |
---|
| 5408 | + ext4_group_desc_csum_set(sb, group, gdp); |
---|
| 5409 | + ext4_handle_dirty_metadata(NULL, NULL, gdp_bh); |
---|
| 5410 | + sync_dirty_buffer(bitmap_bh); |
---|
| 5411 | + sync_dirty_buffer(gdp_bh); |
---|
| 5412 | + brelse(bitmap_bh); |
---|
| 5413 | +} |
---|
| 5414 | + |
---|
4724 | 5415 | /** |
---|
4725 | | - * ext4_free_blocks() -- Free given blocks and update quota |
---|
| 5416 | + * ext4_mb_clear_bb() -- helper function for freeing blocks. |
---|
| 5417 | + * Used by ext4_free_blocks() |
---|
4726 | 5418 | * @handle: handle for this transaction |
---|
4727 | 5419 | * @inode: inode |
---|
4728 | | - * @block: start physical block to free |
---|
4729 | | - * @count: number of blocks to count |
---|
| 5420 | + * @bh: optional buffer of the block to be freed |
---|
| 5421 | + * @block: starting physical block to be freed |
---|
| 5422 | + * @count: number of blocks to be freed |
---|
4730 | 5423 | * @flags: flags used by ext4_free_blocks |
---|
4731 | 5424 | */ |
---|
4732 | | -void ext4_free_blocks(handle_t *handle, struct inode *inode, |
---|
4733 | | - struct buffer_head *bh, ext4_fsblk_t block, |
---|
4734 | | - unsigned long count, int flags) |
---|
| 5425 | +static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode, |
---|
| 5426 | + ext4_fsblk_t block, unsigned long count, |
---|
| 5427 | + int flags) |
---|
4735 | 5428 | { |
---|
4736 | 5429 | struct buffer_head *bitmap_bh = NULL; |
---|
4737 | 5430 | struct super_block *sb = inode->i_sb; |
---|
4738 | 5431 | struct ext4_group_desc *gdp; |
---|
| 5432 | + struct ext4_group_info *grp; |
---|
4739 | 5433 | unsigned int overflow; |
---|
4740 | 5434 | ext4_grpblk_t bit; |
---|
4741 | 5435 | struct buffer_head *gd_bh; |
---|
.. | .. |
---|
4746 | 5440 | int err = 0; |
---|
4747 | 5441 | int ret; |
---|
4748 | 5442 | |
---|
4749 | | - might_sleep(); |
---|
4750 | | - if (bh) { |
---|
4751 | | - if (block) |
---|
4752 | | - BUG_ON(block != bh->b_blocknr); |
---|
4753 | | - else |
---|
4754 | | - block = bh->b_blocknr; |
---|
4755 | | - } |
---|
4756 | | - |
---|
4757 | 5443 | sbi = EXT4_SB(sb); |
---|
| 5444 | + |
---|
4758 | 5445 | if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && |
---|
4759 | 5446 | !ext4_inode_block_valid(inode, block, count)) { |
---|
4760 | | - ext4_error(sb, "Freeing blocks not in datazone - " |
---|
4761 | | - "block = %llu, count = %lu", block, count); |
---|
| 5447 | + ext4_error(sb, "Freeing blocks in system zone - " |
---|
| 5448 | + "Block = %llu, count = %lu", block, count); |
---|
| 5449 | + /* err = 0. ext4_std_error should be a no op */ |
---|
4762 | 5450 | goto error_return; |
---|
4763 | 5451 | } |
---|
4764 | | - |
---|
4765 | | - ext4_debug("freeing block %llu\n", block); |
---|
4766 | | - trace_ext4_free_blocks(inode, block, count, flags); |
---|
4767 | | - |
---|
4768 | | - if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { |
---|
4769 | | - BUG_ON(count > 1); |
---|
4770 | | - |
---|
4771 | | - ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, |
---|
4772 | | - inode, bh, block); |
---|
4773 | | - } |
---|
4774 | | - |
---|
4775 | | - /* |
---|
4776 | | - * If the extent to be freed does not begin on a cluster |
---|
4777 | | - * boundary, we need to deal with partial clusters at the |
---|
4778 | | - * beginning and end of the extent. Normally we will free |
---|
4779 | | - * blocks at the beginning or the end unless we are explicitly |
---|
4780 | | - * requested to avoid doing so. |
---|
4781 | | - */ |
---|
4782 | | - overflow = EXT4_PBLK_COFF(sbi, block); |
---|
4783 | | - if (overflow) { |
---|
4784 | | - if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) { |
---|
4785 | | - overflow = sbi->s_cluster_ratio - overflow; |
---|
4786 | | - block += overflow; |
---|
4787 | | - if (count > overflow) |
---|
4788 | | - count -= overflow; |
---|
4789 | | - else |
---|
4790 | | - return; |
---|
4791 | | - } else { |
---|
4792 | | - block -= overflow; |
---|
4793 | | - count += overflow; |
---|
4794 | | - } |
---|
4795 | | - } |
---|
4796 | | - overflow = EXT4_LBLK_COFF(sbi, count); |
---|
4797 | | - if (overflow) { |
---|
4798 | | - if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) { |
---|
4799 | | - if (count > overflow) |
---|
4800 | | - count -= overflow; |
---|
4801 | | - else |
---|
4802 | | - return; |
---|
4803 | | - } else |
---|
4804 | | - count += sbi->s_cluster_ratio - overflow; |
---|
4805 | | - } |
---|
4806 | | - |
---|
4807 | | - if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { |
---|
4808 | | - int i; |
---|
4809 | | - int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA; |
---|
4810 | | - |
---|
4811 | | - for (i = 0; i < count; i++) { |
---|
4812 | | - cond_resched(); |
---|
4813 | | - if (is_metadata) |
---|
4814 | | - bh = sb_find_get_block(inode->i_sb, block + i); |
---|
4815 | | - ext4_forget(handle, is_metadata, inode, bh, block + i); |
---|
4816 | | - } |
---|
4817 | | - } |
---|
| 5452 | + flags |= EXT4_FREE_BLOCKS_VALIDATED; |
---|
4818 | 5453 | |
---|
4819 | 5454 | do_more: |
---|
4820 | 5455 | overflow = 0; |
---|
4821 | 5456 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); |
---|
4822 | 5457 | |
---|
4823 | | - if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT( |
---|
4824 | | - ext4_get_group_info(sb, block_group)))) |
---|
| 5458 | + grp = ext4_get_group_info(sb, block_group); |
---|
| 5459 | + if (unlikely(!grp || EXT4_MB_GRP_BBITMAP_CORRUPT(grp))) |
---|
4825 | 5460 | return; |
---|
4826 | 5461 | |
---|
4827 | 5462 | /* |
---|
.. | .. |
---|
4832 | 5467 | overflow = EXT4_C2B(sbi, bit) + count - |
---|
4833 | 5468 | EXT4_BLOCKS_PER_GROUP(sb); |
---|
4834 | 5469 | count -= overflow; |
---|
| 5470 | + /* The range changed so it's no longer validated */ |
---|
| 5471 | + flags &= ~EXT4_FREE_BLOCKS_VALIDATED; |
---|
4835 | 5472 | } |
---|
4836 | 5473 | count_clusters = EXT4_NUM_B2C(sbi, count); |
---|
4837 | 5474 | bitmap_bh = ext4_read_block_bitmap(sb, block_group); |
---|
.. | .. |
---|
4846 | 5483 | goto error_return; |
---|
4847 | 5484 | } |
---|
4848 | 5485 | |
---|
4849 | | - if (in_range(ext4_block_bitmap(sb, gdp), block, count) || |
---|
4850 | | - in_range(ext4_inode_bitmap(sb, gdp), block, count) || |
---|
4851 | | - in_range(block, ext4_inode_table(sb, gdp), |
---|
4852 | | - sbi->s_itb_per_group) || |
---|
4853 | | - in_range(block + count - 1, ext4_inode_table(sb, gdp), |
---|
4854 | | - sbi->s_itb_per_group)) { |
---|
4855 | | - |
---|
| 5486 | + if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && |
---|
| 5487 | + !ext4_inode_block_valid(inode, block, count)) { |
---|
4856 | 5488 | ext4_error(sb, "Freeing blocks in system zone - " |
---|
4857 | 5489 | "Block = %llu, count = %lu", block, count); |
---|
4858 | 5490 | /* err = 0. ext4_std_error should be a no op */ |
---|
.. | .. |
---|
4918 | 5550 | * them with group lock_held |
---|
4919 | 5551 | */ |
---|
4920 | 5552 | if (test_opt(sb, DISCARD)) { |
---|
4921 | | - err = ext4_issue_discard(sb, block_group, bit, count, |
---|
4922 | | - NULL); |
---|
| 5553 | + err = ext4_issue_discard(sb, block_group, bit, |
---|
| 5554 | + count_clusters, NULL); |
---|
4923 | 5555 | if (err && err != -EOPNOTSUPP) |
---|
4924 | 5556 | ext4_msg(sb, KERN_WARNING, "discard request in" |
---|
4925 | | - " group:%d block:%d count:%lu failed" |
---|
| 5557 | + " group:%u block:%d count:%lu failed" |
---|
4926 | 5558 | " with %d", block_group, bit, count, |
---|
4927 | 5559 | err); |
---|
4928 | 5560 | } else |
---|
.. | .. |
---|
4946 | 5578 | flex_group)->free_clusters); |
---|
4947 | 5579 | } |
---|
4948 | 5580 | |
---|
4949 | | - if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) |
---|
4950 | | - dquot_free_block(inode, EXT4_C2B(sbi, count_clusters)); |
---|
4951 | | - percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters); |
---|
| 5581 | + /* |
---|
| 5582 | + * on a bigalloc file system, defer the s_freeclusters_counter |
---|
| 5583 | + * update to the caller (ext4_remove_space and friends) so they |
---|
| 5584 | + * can determine if a cluster freed here should be rereserved |
---|
| 5585 | + */ |
---|
| 5586 | + if (!(flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)) { |
---|
| 5587 | + if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) |
---|
| 5588 | + dquot_free_block(inode, EXT4_C2B(sbi, count_clusters)); |
---|
| 5589 | + percpu_counter_add(&sbi->s_freeclusters_counter, |
---|
| 5590 | + count_clusters); |
---|
| 5591 | + } |
---|
4952 | 5592 | |
---|
4953 | 5593 | ext4_mb_unload_buddy(&e4b); |
---|
4954 | 5594 | |
---|
.. | .. |
---|
4966 | 5606 | block += count; |
---|
4967 | 5607 | count = overflow; |
---|
4968 | 5608 | put_bh(bitmap_bh); |
---|
| 5609 | + /* The range changed so it's no longer validated */ |
---|
| 5610 | + flags &= ~EXT4_FREE_BLOCKS_VALIDATED; |
---|
4969 | 5611 | goto do_more; |
---|
4970 | 5612 | } |
---|
4971 | 5613 | error_return: |
---|
4972 | 5614 | brelse(bitmap_bh); |
---|
4973 | 5615 | ext4_std_error(sb, err); |
---|
| 5616 | + return; |
---|
| 5617 | +} |
---|
| 5618 | + |
---|
| 5619 | +/** |
---|
| 5620 | + * ext4_free_blocks() -- Free given blocks and update quota |
---|
| 5621 | + * @handle: handle for this transaction |
---|
| 5622 | + * @inode: inode |
---|
| 5623 | + * @bh: optional buffer of the block to be freed |
---|
| 5624 | + * @block: starting physical block to be freed |
---|
| 5625 | + * @count: number of blocks to be freed |
---|
| 5626 | + * @flags: flags used by ext4_free_blocks |
---|
| 5627 | + */ |
---|
| 5628 | +void ext4_free_blocks(handle_t *handle, struct inode *inode, |
---|
| 5629 | + struct buffer_head *bh, ext4_fsblk_t block, |
---|
| 5630 | + unsigned long count, int flags) |
---|
| 5631 | +{ |
---|
| 5632 | + struct super_block *sb = inode->i_sb; |
---|
| 5633 | + unsigned int overflow; |
---|
| 5634 | + struct ext4_sb_info *sbi; |
---|
| 5635 | + |
---|
| 5636 | + sbi = EXT4_SB(sb); |
---|
| 5637 | + |
---|
| 5638 | + if (bh) { |
---|
| 5639 | + if (block) |
---|
| 5640 | + BUG_ON(block != bh->b_blocknr); |
---|
| 5641 | + else |
---|
| 5642 | + block = bh->b_blocknr; |
---|
| 5643 | + } |
---|
| 5644 | + |
---|
| 5645 | + if (sbi->s_mount_state & EXT4_FC_REPLAY) { |
---|
| 5646 | + ext4_free_blocks_simple(inode, block, EXT4_NUM_B2C(sbi, count)); |
---|
| 5647 | + return; |
---|
| 5648 | + } |
---|
| 5649 | + |
---|
| 5650 | + might_sleep(); |
---|
| 5651 | + |
---|
| 5652 | + if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && |
---|
| 5653 | + !ext4_inode_block_valid(inode, block, count)) { |
---|
| 5654 | + ext4_error(sb, "Freeing blocks not in datazone - " |
---|
| 5655 | + "block = %llu, count = %lu", block, count); |
---|
| 5656 | + return; |
---|
| 5657 | + } |
---|
| 5658 | + flags |= EXT4_FREE_BLOCKS_VALIDATED; |
---|
| 5659 | + |
---|
| 5660 | + ext4_debug("freeing block %llu\n", block); |
---|
| 5661 | + trace_ext4_free_blocks(inode, block, count, flags); |
---|
| 5662 | + |
---|
| 5663 | + if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { |
---|
| 5664 | + BUG_ON(count > 1); |
---|
| 5665 | + |
---|
| 5666 | + ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, |
---|
| 5667 | + inode, bh, block); |
---|
| 5668 | + } |
---|
| 5669 | + |
---|
| 5670 | + /* |
---|
| 5671 | + * If the extent to be freed does not begin on a cluster |
---|
| 5672 | + * boundary, we need to deal with partial clusters at the |
---|
| 5673 | + * beginning and end of the extent. Normally we will free |
---|
| 5674 | + * blocks at the beginning or the end unless we are explicitly |
---|
| 5675 | + * requested to avoid doing so. |
---|
| 5676 | + */ |
---|
| 5677 | + overflow = EXT4_PBLK_COFF(sbi, block); |
---|
| 5678 | + if (overflow) { |
---|
| 5679 | + if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) { |
---|
| 5680 | + overflow = sbi->s_cluster_ratio - overflow; |
---|
| 5681 | + block += overflow; |
---|
| 5682 | + if (count > overflow) |
---|
| 5683 | + count -= overflow; |
---|
| 5684 | + else |
---|
| 5685 | + return; |
---|
| 5686 | + } else { |
---|
| 5687 | + block -= overflow; |
---|
| 5688 | + count += overflow; |
---|
| 5689 | + } |
---|
| 5690 | + /* The range changed so it's no longer validated */ |
---|
| 5691 | + flags &= ~EXT4_FREE_BLOCKS_VALIDATED; |
---|
| 5692 | + } |
---|
| 5693 | + overflow = EXT4_LBLK_COFF(sbi, count); |
---|
| 5694 | + if (overflow) { |
---|
| 5695 | + if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) { |
---|
| 5696 | + if (count > overflow) |
---|
| 5697 | + count -= overflow; |
---|
| 5698 | + else |
---|
| 5699 | + return; |
---|
| 5700 | + } else |
---|
| 5701 | + count += sbi->s_cluster_ratio - overflow; |
---|
| 5702 | + /* The range changed so it's no longer validated */ |
---|
| 5703 | + flags &= ~EXT4_FREE_BLOCKS_VALIDATED; |
---|
| 5704 | + } |
---|
| 5705 | + |
---|
| 5706 | + if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { |
---|
| 5707 | + int i; |
---|
| 5708 | + int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA; |
---|
| 5709 | + |
---|
| 5710 | + for (i = 0; i < count; i++) { |
---|
| 5711 | + cond_resched(); |
---|
| 5712 | + if (is_metadata) |
---|
| 5713 | + bh = sb_find_get_block(inode->i_sb, block + i); |
---|
| 5714 | + ext4_forget(handle, is_metadata, inode, bh, block + i); |
---|
| 5715 | + } |
---|
| 5716 | + } |
---|
| 5717 | + |
---|
| 5718 | + ext4_mb_clear_bb(handle, inode, block, count, flags); |
---|
4974 | 5719 | return; |
---|
4975 | 5720 | } |
---|
4976 | 5721 | |
---|
.. | .. |
---|
5030 | 5775 | goto error_return; |
---|
5031 | 5776 | } |
---|
5032 | 5777 | |
---|
5033 | | - if (in_range(ext4_block_bitmap(sb, desc), block, count) || |
---|
5034 | | - in_range(ext4_inode_bitmap(sb, desc), block, count) || |
---|
5035 | | - in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || |
---|
5036 | | - in_range(block + count - 1, ext4_inode_table(sb, desc), |
---|
5037 | | - sbi->s_itb_per_group)) { |
---|
| 5778 | + if (!ext4_sb_block_valid(sb, NULL, block, count)) { |
---|
5038 | 5779 | ext4_error(sb, "Adding blocks in system zones - " |
---|
5039 | 5780 | "Block = %llu, count = %lu", |
---|
5040 | 5781 | block, count); |
---|
.. | .. |
---|
5119 | 5860 | * @sb: super block for the file system |
---|
5120 | 5861 | * @start: starting block of the free extent in the alloc. group |
---|
5121 | 5862 | * @count: number of blocks to TRIM |
---|
5122 | | - * @group: alloc. group we are working with |
---|
5123 | 5863 | * @e4b: ext4 buddy for the group |
---|
5124 | 5864 | * |
---|
5125 | 5865 | * Trim "count" blocks starting at "start" in the "group". To assure that no |
---|
5126 | 5866 | * one will allocate those blocks, mark it as used in buddy bitmap. This must |
---|
5127 | 5867 | * be called with under the group lock. |
---|
5128 | 5868 | */ |
---|
5129 | | -static int ext4_trim_extent(struct super_block *sb, int start, int count, |
---|
5130 | | - ext4_group_t group, struct ext4_buddy *e4b) |
---|
| 5869 | +static int ext4_trim_extent(struct super_block *sb, |
---|
| 5870 | + int start, int count, struct ext4_buddy *e4b) |
---|
5131 | 5871 | __releases(bitlock) |
---|
5132 | 5872 | __acquires(bitlock) |
---|
5133 | 5873 | { |
---|
5134 | 5874 | struct ext4_free_extent ex; |
---|
| 5875 | + ext4_group_t group = e4b->bd_group; |
---|
5135 | 5876 | int ret = 0; |
---|
5136 | 5877 | |
---|
5137 | 5878 | trace_ext4_trim_extent(sb, group, start, count); |
---|
.. | .. |
---|
5152 | 5893 | ext4_lock_group(sb, group); |
---|
5153 | 5894 | mb_free_blocks(NULL, e4b, start, ex.fe_len); |
---|
5154 | 5895 | return ret; |
---|
| 5896 | +} |
---|
| 5897 | + |
---|
| 5898 | +static ext4_grpblk_t ext4_last_grp_cluster(struct super_block *sb, |
---|
| 5899 | + ext4_group_t grp) |
---|
| 5900 | +{ |
---|
| 5901 | + if (grp < ext4_get_groups_count(sb)) |
---|
| 5902 | + return EXT4_CLUSTERS_PER_GROUP(sb) - 1; |
---|
| 5903 | + return (ext4_blocks_count(EXT4_SB(sb)->s_es) - |
---|
| 5904 | + ext4_group_first_block_no(sb, grp) - 1) >> |
---|
| 5905 | + EXT4_CLUSTER_BITS(sb); |
---|
| 5906 | +} |
---|
| 5907 | + |
---|
| 5908 | +static bool ext4_trim_interrupted(void) |
---|
| 5909 | +{ |
---|
| 5910 | + return fatal_signal_pending(current) || freezing(current); |
---|
| 5911 | +} |
---|
| 5912 | + |
---|
| 5913 | +static int ext4_try_to_trim_range(struct super_block *sb, |
---|
| 5914 | + struct ext4_buddy *e4b, ext4_grpblk_t start, |
---|
| 5915 | + ext4_grpblk_t max, ext4_grpblk_t minblocks) |
---|
| 5916 | +{ |
---|
| 5917 | + ext4_grpblk_t next, count, free_count; |
---|
| 5918 | + bool set_trimmed = false; |
---|
| 5919 | + void *bitmap; |
---|
| 5920 | + |
---|
| 5921 | + bitmap = e4b->bd_bitmap; |
---|
| 5922 | + if (start == 0 && max >= ext4_last_grp_cluster(sb, e4b->bd_group)) |
---|
| 5923 | + set_trimmed = true; |
---|
| 5924 | + start = max(e4b->bd_info->bb_first_free, start); |
---|
| 5925 | + count = 0; |
---|
| 5926 | + free_count = 0; |
---|
| 5927 | + |
---|
| 5928 | + while (start <= max) { |
---|
| 5929 | + start = mb_find_next_zero_bit(bitmap, max + 1, start); |
---|
| 5930 | + if (start > max) |
---|
| 5931 | + break; |
---|
| 5932 | + next = mb_find_next_bit(bitmap, max + 1, start); |
---|
| 5933 | + |
---|
| 5934 | + if ((next - start) >= minblocks) { |
---|
| 5935 | + int ret = ext4_trim_extent(sb, start, next - start, e4b); |
---|
| 5936 | + |
---|
| 5937 | + if (ret && ret != -EOPNOTSUPP) |
---|
| 5938 | + return count; |
---|
| 5939 | + count += next - start; |
---|
| 5940 | + } |
---|
| 5941 | + free_count += next - start; |
---|
| 5942 | + start = next + 1; |
---|
| 5943 | + |
---|
| 5944 | + if (ext4_trim_interrupted()) |
---|
| 5945 | + return count; |
---|
| 5946 | + |
---|
| 5947 | + if (need_resched()) { |
---|
| 5948 | + ext4_unlock_group(sb, e4b->bd_group); |
---|
| 5949 | + cond_resched(); |
---|
| 5950 | + ext4_lock_group(sb, e4b->bd_group); |
---|
| 5951 | + } |
---|
| 5952 | + |
---|
| 5953 | + if ((e4b->bd_info->bb_free - free_count) < minblocks) |
---|
| 5954 | + break; |
---|
| 5955 | + } |
---|
| 5956 | + |
---|
| 5957 | + if (set_trimmed) |
---|
| 5958 | + EXT4_MB_GRP_SET_TRIMMED(e4b->bd_info); |
---|
| 5959 | + |
---|
| 5960 | + return count; |
---|
5155 | 5961 | } |
---|
5156 | 5962 | |
---|
5157 | 5963 | /** |
---|
.. | .. |
---|
5177 | 5983 | ext4_grpblk_t start, ext4_grpblk_t max, |
---|
5178 | 5984 | ext4_grpblk_t minblocks) |
---|
5179 | 5985 | { |
---|
5180 | | - void *bitmap; |
---|
5181 | | - ext4_grpblk_t next, count = 0, free_count = 0; |
---|
5182 | 5986 | struct ext4_buddy e4b; |
---|
5183 | | - int ret = 0; |
---|
| 5987 | + int ret; |
---|
5184 | 5988 | |
---|
5185 | 5989 | trace_ext4_trim_all_free(sb, group, start, max); |
---|
5186 | 5990 | |
---|
.. | .. |
---|
5190 | 5994 | ret, group); |
---|
5191 | 5995 | return ret; |
---|
5192 | 5996 | } |
---|
5193 | | - bitmap = e4b.bd_bitmap; |
---|
5194 | 5997 | |
---|
5195 | 5998 | ext4_lock_group(sb, group); |
---|
5196 | | - if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) && |
---|
5197 | | - minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks)) |
---|
5198 | | - goto out; |
---|
5199 | 5999 | |
---|
5200 | | - start = (e4b.bd_info->bb_first_free > start) ? |
---|
5201 | | - e4b.bd_info->bb_first_free : start; |
---|
| 6000 | + if (!EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) || |
---|
| 6001 | + minblocks < EXT4_SB(sb)->s_last_trim_minblks) |
---|
| 6002 | + ret = ext4_try_to_trim_range(sb, &e4b, start, max, minblocks); |
---|
| 6003 | + else |
---|
| 6004 | + ret = 0; |
---|
5202 | 6005 | |
---|
5203 | | - while (start <= max) { |
---|
5204 | | - start = mb_find_next_zero_bit(bitmap, max + 1, start); |
---|
5205 | | - if (start > max) |
---|
5206 | | - break; |
---|
5207 | | - next = mb_find_next_bit(bitmap, max + 1, start); |
---|
5208 | | - |
---|
5209 | | - if ((next - start) >= minblocks) { |
---|
5210 | | - ret = ext4_trim_extent(sb, start, |
---|
5211 | | - next - start, group, &e4b); |
---|
5212 | | - if (ret && ret != -EOPNOTSUPP) |
---|
5213 | | - break; |
---|
5214 | | - ret = 0; |
---|
5215 | | - count += next - start; |
---|
5216 | | - } |
---|
5217 | | - free_count += next - start; |
---|
5218 | | - start = next + 1; |
---|
5219 | | - |
---|
5220 | | - if (fatal_signal_pending(current)) { |
---|
5221 | | - count = -ERESTARTSYS; |
---|
5222 | | - break; |
---|
5223 | | - } |
---|
5224 | | - |
---|
5225 | | - if (need_resched()) { |
---|
5226 | | - ext4_unlock_group(sb, group); |
---|
5227 | | - cond_resched(); |
---|
5228 | | - ext4_lock_group(sb, group); |
---|
5229 | | - } |
---|
5230 | | - |
---|
5231 | | - if ((e4b.bd_info->bb_free - free_count) < minblocks) |
---|
5232 | | - break; |
---|
5233 | | - } |
---|
5234 | | - |
---|
5235 | | - if (!ret) { |
---|
5236 | | - ret = count; |
---|
5237 | | - EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info); |
---|
5238 | | - } |
---|
5239 | | -out: |
---|
5240 | 6006 | ext4_unlock_group(sb, group); |
---|
5241 | 6007 | ext4_mb_unload_buddy(&e4b); |
---|
5242 | 6008 | |
---|
5243 | 6009 | ext4_debug("trimmed %d blocks in the group %d\n", |
---|
5244 | | - count, group); |
---|
| 6010 | + ret, group); |
---|
5245 | 6011 | |
---|
5246 | 6012 | return ret; |
---|
5247 | 6013 | } |
---|
.. | .. |
---|
5286 | 6052 | if (minlen > EXT4_CLUSTERS_PER_GROUP(sb)) |
---|
5287 | 6053 | goto out; |
---|
5288 | 6054 | } |
---|
5289 | | - if (end >= max_blks) |
---|
| 6055 | + if (end >= max_blks - 1) |
---|
5290 | 6056 | end = max_blks - 1; |
---|
5291 | 6057 | if (end <= first_data_blk) |
---|
5292 | 6058 | goto out; |
---|
.. | .. |
---|
5303 | 6069 | end = EXT4_CLUSTERS_PER_GROUP(sb) - 1; |
---|
5304 | 6070 | |
---|
5305 | 6071 | for (group = first_group; group <= last_group; group++) { |
---|
| 6072 | + if (ext4_trim_interrupted()) |
---|
| 6073 | + break; |
---|
5306 | 6074 | grp = ext4_get_group_info(sb, group); |
---|
| 6075 | + if (!grp) |
---|
| 6076 | + continue; |
---|
5307 | 6077 | /* We only do this if the grp has never been initialized */ |
---|
5308 | 6078 | if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { |
---|
5309 | 6079 | ret = ext4_mb_init_group(sb, group, GFP_NOFS); |
---|
.. | .. |
---|
5319 | 6089 | */ |
---|
5320 | 6090 | if (group == last_group) |
---|
5321 | 6091 | end = last_cluster; |
---|
5322 | | - |
---|
5323 | 6092 | if (grp->bb_free >= minlen) { |
---|
5324 | 6093 | cnt = ext4_trim_all_free(sb, group, first_cluster, |
---|
5325 | | - end, minlen); |
---|
| 6094 | + end, minlen); |
---|
5326 | 6095 | if (cnt < 0) { |
---|
5327 | 6096 | ret = cnt; |
---|
5328 | 6097 | break; |
---|
.. | .. |
---|
5338 | 6107 | } |
---|
5339 | 6108 | |
---|
5340 | 6109 | if (!ret) |
---|
5341 | | - atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen); |
---|
| 6110 | + EXT4_SB(sb)->s_last_trim_minblks = minlen; |
---|
5342 | 6111 | |
---|
5343 | 6112 | out: |
---|
5344 | 6113 | range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits; |
---|
.. | .. |
---|
5367 | 6136 | |
---|
5368 | 6137 | ext4_lock_group(sb, group); |
---|
5369 | 6138 | |
---|
5370 | | - start = (e4b.bd_info->bb_first_free > start) ? |
---|
5371 | | - e4b.bd_info->bb_first_free : start; |
---|
| 6139 | + start = max(e4b.bd_info->bb_first_free, start); |
---|
5372 | 6140 | if (end >= EXT4_CLUSTERS_PER_GROUP(sb)) |
---|
5373 | 6141 | end = EXT4_CLUSTERS_PER_GROUP(sb) - 1; |
---|
5374 | 6142 | |
---|