.. | .. |
---|
16 | 16 | #include <linux/slab.h> |
---|
17 | 17 | #include <linux/nospec.h> |
---|
18 | 18 | #include <linux/backing-dev.h> |
---|
| 19 | +#include <linux/freezer.h> |
---|
19 | 20 | #include <trace/events/ext4.h> |
---|
20 | 21 | |
---|
21 | 22 | /* |
---|
.. | .. |
---|
684 | 685 | MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments); |
---|
685 | 686 | |
---|
686 | 687 | grp = ext4_get_group_info(sb, e4b->bd_group); |
---|
| 688 | + if (!grp) |
---|
| 689 | + return NULL; |
---|
687 | 690 | list_for_each(cur, &grp->bb_prealloc_list) { |
---|
688 | 691 | ext4_group_t groupnr; |
---|
689 | 692 | struct ext4_prealloc_space *pa; |
---|
.. | .. |
---|
767 | 770 | |
---|
768 | 771 | static noinline_for_stack |
---|
769 | 772 | void ext4_mb_generate_buddy(struct super_block *sb, |
---|
770 | | - void *buddy, void *bitmap, ext4_group_t group) |
---|
| 773 | + void *buddy, void *bitmap, ext4_group_t group, |
---|
| 774 | + struct ext4_group_info *grp) |
---|
771 | 775 | { |
---|
772 | | - struct ext4_group_info *grp = ext4_get_group_info(sb, group); |
---|
773 | 776 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
---|
774 | 777 | ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb); |
---|
775 | 778 | ext4_grpblk_t i = 0; |
---|
.. | .. |
---|
816 | 819 | clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state)); |
---|
817 | 820 | |
---|
818 | 821 | period = get_cycles() - period; |
---|
819 | | - spin_lock(&sbi->s_bal_lock); |
---|
820 | | - sbi->s_mb_buddies_generated++; |
---|
821 | | - sbi->s_mb_generation_time += period; |
---|
822 | | - spin_unlock(&sbi->s_bal_lock); |
---|
823 | | -} |
---|
824 | | - |
---|
825 | | -static void mb_regenerate_buddy(struct ext4_buddy *e4b) |
---|
826 | | -{ |
---|
827 | | - int count; |
---|
828 | | - int order = 1; |
---|
829 | | - void *buddy; |
---|
830 | | - |
---|
831 | | - while ((buddy = mb_find_buddy(e4b, order++, &count))) { |
---|
832 | | - ext4_set_bits(buddy, 0, count); |
---|
833 | | - } |
---|
834 | | - e4b->bd_info->bb_fragments = 0; |
---|
835 | | - memset(e4b->bd_info->bb_counters, 0, |
---|
836 | | - sizeof(*e4b->bd_info->bb_counters) * |
---|
837 | | - (e4b->bd_sb->s_blocksize_bits + 2)); |
---|
838 | | - |
---|
839 | | - ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy, |
---|
840 | | - e4b->bd_bitmap, e4b->bd_group); |
---|
| 822 | + atomic_inc(&sbi->s_mb_buddies_generated); |
---|
| 823 | + atomic64_add(period, &sbi->s_mb_generation_time); |
---|
841 | 824 | } |
---|
842 | 825 | |
---|
843 | 826 | /* The buddy information is attached the buddy cache inode |
---|
.. | .. |
---|
909 | 892 | break; |
---|
910 | 893 | |
---|
911 | 894 | grinfo = ext4_get_group_info(sb, group); |
---|
| 895 | + if (!grinfo) |
---|
| 896 | + continue; |
---|
912 | 897 | /* |
---|
913 | 898 | * If page is uptodate then we came here after online resize |
---|
914 | 899 | * which added some new uninitialized group info structs, so |
---|
.. | .. |
---|
974 | 959 | group, page->index, i * blocksize); |
---|
975 | 960 | trace_ext4_mb_buddy_bitmap_load(sb, group); |
---|
976 | 961 | grinfo = ext4_get_group_info(sb, group); |
---|
| 962 | + if (!grinfo) { |
---|
| 963 | + err = -EFSCORRUPTED; |
---|
| 964 | + goto out; |
---|
| 965 | + } |
---|
977 | 966 | grinfo->bb_fragments = 0; |
---|
978 | 967 | memset(grinfo->bb_counters, 0, |
---|
979 | 968 | sizeof(*grinfo->bb_counters) * |
---|
.. | .. |
---|
984 | 973 | ext4_lock_group(sb, group); |
---|
985 | 974 | /* init the buddy */ |
---|
986 | 975 | memset(data, 0xff, blocksize); |
---|
987 | | - ext4_mb_generate_buddy(sb, data, incore, group); |
---|
| 976 | + ext4_mb_generate_buddy(sb, data, incore, group, grinfo); |
---|
988 | 977 | ext4_unlock_group(sb, group); |
---|
989 | 978 | incore = NULL; |
---|
990 | 979 | } else { |
---|
.. | .. |
---|
1098 | 1087 | might_sleep(); |
---|
1099 | 1088 | mb_debug(sb, "init group %u\n", group); |
---|
1100 | 1089 | this_grp = ext4_get_group_info(sb, group); |
---|
| 1090 | + if (!this_grp) |
---|
| 1091 | + return -EFSCORRUPTED; |
---|
| 1092 | + |
---|
1101 | 1093 | /* |
---|
1102 | 1094 | * This ensures that we don't reinit the buddy cache |
---|
1103 | 1095 | * page which map to the group from which we are already |
---|
.. | .. |
---|
1172 | 1164 | |
---|
1173 | 1165 | blocks_per_page = PAGE_SIZE / sb->s_blocksize; |
---|
1174 | 1166 | grp = ext4_get_group_info(sb, group); |
---|
| 1167 | + if (!grp) |
---|
| 1168 | + return -EFSCORRUPTED; |
---|
1175 | 1169 | |
---|
1176 | 1170 | e4b->bd_blkbits = sb->s_blocksize_bits; |
---|
1177 | 1171 | e4b->bd_info = grp; |
---|
.. | .. |
---|
1512 | 1506 | sb, e4b->bd_group, |
---|
1513 | 1507 | EXT4_GROUP_INFO_BBITMAP_CORRUPT); |
---|
1514 | 1508 | } |
---|
1515 | | - mb_regenerate_buddy(e4b); |
---|
1516 | 1509 | goto done; |
---|
1517 | 1510 | } |
---|
1518 | 1511 | |
---|
.. | .. |
---|
1885 | 1878 | struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); |
---|
1886 | 1879 | struct ext4_free_extent ex; |
---|
1887 | 1880 | |
---|
1888 | | - if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL)) |
---|
| 1881 | + if (!grp) |
---|
| 1882 | + return -EFSCORRUPTED; |
---|
| 1883 | + if (!(ac->ac_flags & (EXT4_MB_HINT_TRY_GOAL | EXT4_MB_HINT_GOAL_ONLY))) |
---|
1889 | 1884 | return 0; |
---|
1890 | 1885 | if (grp->bb_free == 0) |
---|
1891 | 1886 | return 0; |
---|
.. | .. |
---|
2109 | 2104 | |
---|
2110 | 2105 | BUG_ON(cr < 0 || cr >= 4); |
---|
2111 | 2106 | |
---|
2112 | | - if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp))) |
---|
| 2107 | + if (unlikely(!grp || EXT4_MB_GRP_BBITMAP_CORRUPT(grp))) |
---|
2113 | 2108 | return false; |
---|
2114 | 2109 | |
---|
2115 | 2110 | free = grp->bb_free; |
---|
.. | .. |
---|
2172 | 2167 | ext4_grpblk_t free; |
---|
2173 | 2168 | int ret = 0; |
---|
2174 | 2169 | |
---|
| 2170 | + if (!grp) |
---|
| 2171 | + return -EFSCORRUPTED; |
---|
| 2172 | + if (sbi->s_mb_stats) |
---|
| 2173 | + atomic64_inc(&sbi->s_bal_cX_groups_considered[ac->ac_criteria]); |
---|
2175 | 2174 | if (should_lock) |
---|
2176 | 2175 | ext4_lock_group(sb, group); |
---|
2177 | 2176 | free = grp->bb_free; |
---|
.. | .. |
---|
2242 | 2241 | * prefetch once, so we avoid getblk() call, which can |
---|
2243 | 2242 | * be expensive. |
---|
2244 | 2243 | */ |
---|
2245 | | - if (!EXT4_MB_GRP_TEST_AND_SET_READ(grp) && |
---|
| 2244 | + if (gdp && grp && !EXT4_MB_GRP_TEST_AND_SET_READ(grp) && |
---|
2246 | 2245 | EXT4_MB_GRP_NEED_INIT(grp) && |
---|
2247 | 2246 | ext4_free_group_clusters(sb, gdp) > 0 && |
---|
2248 | 2247 | !(ext4_has_group_desc_csum(sb) && |
---|
.. | .. |
---|
2286 | 2285 | group--; |
---|
2287 | 2286 | grp = ext4_get_group_info(sb, group); |
---|
2288 | 2287 | |
---|
2289 | | - if (EXT4_MB_GRP_NEED_INIT(grp) && |
---|
| 2288 | + if (grp && gdp && EXT4_MB_GRP_NEED_INIT(grp) && |
---|
2290 | 2289 | ext4_free_group_clusters(sb, gdp) > 0 && |
---|
2291 | 2290 | !(ext4_has_group_desc_csum(sb) && |
---|
2292 | 2291 | (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) { |
---|
.. | .. |
---|
2446 | 2445 | if (ac->ac_status != AC_STATUS_CONTINUE) |
---|
2447 | 2446 | break; |
---|
2448 | 2447 | } |
---|
| 2448 | + /* Processed all groups and haven't found blocks */ |
---|
| 2449 | + if (sbi->s_mb_stats && i == ngroups) |
---|
| 2450 | + atomic64_inc(&sbi->s_bal_cX_failed[cr]); |
---|
2449 | 2451 | } |
---|
2450 | 2452 | |
---|
2451 | 2453 | if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND && |
---|
.. | .. |
---|
2475 | 2477 | goto repeat; |
---|
2476 | 2478 | } |
---|
2477 | 2479 | } |
---|
| 2480 | + |
---|
| 2481 | + if (sbi->s_mb_stats && ac->ac_status == AC_STATUS_FOUND) |
---|
| 2482 | + atomic64_inc(&sbi->s_bal_cX_hits[ac->ac_criteria]); |
---|
2478 | 2483 | out: |
---|
2479 | 2484 | if (!err && ac->ac_status != AC_STATUS_FOUND && first_err) |
---|
2480 | 2485 | err = first_err; |
---|
.. | .. |
---|
2538 | 2543 | sizeof(struct ext4_group_info); |
---|
2539 | 2544 | |
---|
2540 | 2545 | grinfo = ext4_get_group_info(sb, group); |
---|
| 2546 | + if (!grinfo) |
---|
| 2547 | + return 0; |
---|
2541 | 2548 | /* Load the group info in memory only if not already loaded. */ |
---|
2542 | 2549 | if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) { |
---|
2543 | 2550 | err = ext4_mb_load_buddy(sb, group, &e4b); |
---|
.. | .. |
---|
2548 | 2555 | buddy_loaded = 1; |
---|
2549 | 2556 | } |
---|
2550 | 2557 | |
---|
2551 | | - memcpy(&sg, ext4_get_group_info(sb, group), i); |
---|
| 2558 | + memcpy(&sg, grinfo, i); |
---|
2552 | 2559 | |
---|
2553 | 2560 | if (buddy_loaded) |
---|
2554 | 2561 | ext4_mb_unload_buddy(&e4b); |
---|
.. | .. |
---|
2573 | 2580 | .stop = ext4_mb_seq_groups_stop, |
---|
2574 | 2581 | .show = ext4_mb_seq_groups_show, |
---|
2575 | 2582 | }; |
---|
| 2583 | + |
---|
| 2584 | +int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset) |
---|
| 2585 | +{ |
---|
| 2586 | + struct super_block *sb = (struct super_block *)seq->private; |
---|
| 2587 | + struct ext4_sb_info *sbi = EXT4_SB(sb); |
---|
| 2588 | + |
---|
| 2589 | + seq_puts(seq, "mballoc:\n"); |
---|
| 2590 | + if (!sbi->s_mb_stats) { |
---|
| 2591 | + seq_puts(seq, "\tmb stats collection turned off.\n"); |
---|
| 2592 | + seq_puts(seq, "\tTo enable, please write \"1\" to sysfs file mb_stats.\n"); |
---|
| 2593 | + return 0; |
---|
| 2594 | + } |
---|
| 2595 | + seq_printf(seq, "\treqs: %u\n", atomic_read(&sbi->s_bal_reqs)); |
---|
| 2596 | + seq_printf(seq, "\tsuccess: %u\n", atomic_read(&sbi->s_bal_success)); |
---|
| 2597 | + |
---|
| 2598 | + seq_printf(seq, "\tgroups_scanned: %u\n", atomic_read(&sbi->s_bal_groups_scanned)); |
---|
| 2599 | + |
---|
| 2600 | + seq_puts(seq, "\tcr0_stats:\n"); |
---|
| 2601 | + seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[0])); |
---|
| 2602 | + seq_printf(seq, "\t\tgroups_considered: %llu\n", |
---|
| 2603 | + atomic64_read(&sbi->s_bal_cX_groups_considered[0])); |
---|
| 2604 | + seq_printf(seq, "\t\tuseless_loops: %llu\n", |
---|
| 2605 | + atomic64_read(&sbi->s_bal_cX_failed[0])); |
---|
| 2606 | + |
---|
| 2607 | + seq_puts(seq, "\tcr1_stats:\n"); |
---|
| 2608 | + seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[1])); |
---|
| 2609 | + seq_printf(seq, "\t\tgroups_considered: %llu\n", |
---|
| 2610 | + atomic64_read(&sbi->s_bal_cX_groups_considered[1])); |
---|
| 2611 | + seq_printf(seq, "\t\tuseless_loops: %llu\n", |
---|
| 2612 | + atomic64_read(&sbi->s_bal_cX_failed[1])); |
---|
| 2613 | + |
---|
| 2614 | + seq_puts(seq, "\tcr2_stats:\n"); |
---|
| 2615 | + seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[2])); |
---|
| 2616 | + seq_printf(seq, "\t\tgroups_considered: %llu\n", |
---|
| 2617 | + atomic64_read(&sbi->s_bal_cX_groups_considered[2])); |
---|
| 2618 | + seq_printf(seq, "\t\tuseless_loops: %llu\n", |
---|
| 2619 | + atomic64_read(&sbi->s_bal_cX_failed[2])); |
---|
| 2620 | + |
---|
| 2621 | + seq_puts(seq, "\tcr3_stats:\n"); |
---|
| 2622 | + seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[3])); |
---|
| 2623 | + seq_printf(seq, "\t\tgroups_considered: %llu\n", |
---|
| 2624 | + atomic64_read(&sbi->s_bal_cX_groups_considered[3])); |
---|
| 2625 | + seq_printf(seq, "\t\tuseless_loops: %llu\n", |
---|
| 2626 | + atomic64_read(&sbi->s_bal_cX_failed[3])); |
---|
| 2627 | + seq_printf(seq, "\textents_scanned: %u\n", atomic_read(&sbi->s_bal_ex_scanned)); |
---|
| 2628 | + seq_printf(seq, "\t\tgoal_hits: %u\n", atomic_read(&sbi->s_bal_goals)); |
---|
| 2629 | + seq_printf(seq, "\t\t2^n_hits: %u\n", atomic_read(&sbi->s_bal_2orders)); |
---|
| 2630 | + seq_printf(seq, "\t\tbreaks: %u\n", atomic_read(&sbi->s_bal_breaks)); |
---|
| 2631 | + seq_printf(seq, "\t\tlost: %u\n", atomic_read(&sbi->s_mb_lost_chunks)); |
---|
| 2632 | + |
---|
| 2633 | + seq_printf(seq, "\tbuddies_generated: %u/%u\n", |
---|
| 2634 | + atomic_read(&sbi->s_mb_buddies_generated), |
---|
| 2635 | + ext4_get_groups_count(sb)); |
---|
| 2636 | + seq_printf(seq, "\tbuddies_time_used: %llu\n", |
---|
| 2637 | + atomic64_read(&sbi->s_mb_generation_time)); |
---|
| 2638 | + seq_printf(seq, "\tpreallocated: %u\n", |
---|
| 2639 | + atomic_read(&sbi->s_mb_preallocated)); |
---|
| 2640 | + seq_printf(seq, "\tdiscarded: %u\n", |
---|
| 2641 | + atomic_read(&sbi->s_mb_discarded)); |
---|
| 2642 | + return 0; |
---|
| 2643 | +} |
---|
2576 | 2644 | |
---|
2577 | 2645 | static struct kmem_cache *get_groupinfo_cache(int blocksize_bits) |
---|
2578 | 2646 | { |
---|
.. | .. |
---|
2764 | 2832 | |
---|
2765 | 2833 | err_freebuddy: |
---|
2766 | 2834 | cachep = get_groupinfo_cache(sb->s_blocksize_bits); |
---|
2767 | | - while (i-- > 0) |
---|
2768 | | - kmem_cache_free(cachep, ext4_get_group_info(sb, i)); |
---|
| 2835 | + while (i-- > 0) { |
---|
| 2836 | + struct ext4_group_info *grp = ext4_get_group_info(sb, i); |
---|
| 2837 | + |
---|
| 2838 | + if (grp) |
---|
| 2839 | + kmem_cache_free(cachep, grp); |
---|
| 2840 | + } |
---|
2769 | 2841 | i = sbi->s_group_info_size; |
---|
2770 | 2842 | rcu_read_lock(); |
---|
2771 | 2843 | group_info = rcu_dereference(sbi->s_group_info); |
---|
.. | .. |
---|
2874 | 2946 | } while (i <= sb->s_blocksize_bits + 1); |
---|
2875 | 2947 | |
---|
2876 | 2948 | spin_lock_init(&sbi->s_md_lock); |
---|
2877 | | - spin_lock_init(&sbi->s_bal_lock); |
---|
2878 | 2949 | sbi->s_mb_free_pending = 0; |
---|
2879 | 2950 | INIT_LIST_HEAD(&sbi->s_freed_data_list); |
---|
2880 | 2951 | |
---|
.. | .. |
---|
2973 | 3044 | for (i = 0; i < ngroups; i++) { |
---|
2974 | 3045 | cond_resched(); |
---|
2975 | 3046 | grinfo = ext4_get_group_info(sb, i); |
---|
| 3047 | + if (!grinfo) |
---|
| 3048 | + continue; |
---|
2976 | 3049 | mb_group_bb_bitmap_free(grinfo); |
---|
2977 | 3050 | ext4_lock_group(sb, i); |
---|
2978 | 3051 | count = ext4_mb_cleanup_pa(grinfo); |
---|
.. | .. |
---|
3002 | 3075 | atomic_read(&sbi->s_bal_reqs), |
---|
3003 | 3076 | atomic_read(&sbi->s_bal_success)); |
---|
3004 | 3077 | ext4_msg(sb, KERN_INFO, |
---|
3005 | | - "mballoc: %u extents scanned, %u goal hits, " |
---|
| 3078 | + "mballoc: %u extents scanned, %u groups scanned, %u goal hits, " |
---|
3006 | 3079 | "%u 2^N hits, %u breaks, %u lost", |
---|
3007 | 3080 | atomic_read(&sbi->s_bal_ex_scanned), |
---|
| 3081 | + atomic_read(&sbi->s_bal_groups_scanned), |
---|
3008 | 3082 | atomic_read(&sbi->s_bal_goals), |
---|
3009 | 3083 | atomic_read(&sbi->s_bal_2orders), |
---|
3010 | 3084 | atomic_read(&sbi->s_bal_breaks), |
---|
3011 | 3085 | atomic_read(&sbi->s_mb_lost_chunks)); |
---|
3012 | 3086 | ext4_msg(sb, KERN_INFO, |
---|
3013 | | - "mballoc: %lu generated and it took %Lu", |
---|
3014 | | - sbi->s_mb_buddies_generated, |
---|
3015 | | - sbi->s_mb_generation_time); |
---|
| 3087 | + "mballoc: %u generated and it took %llu", |
---|
| 3088 | + atomic_read(&sbi->s_mb_buddies_generated), |
---|
| 3089 | + atomic64_read(&sbi->s_mb_generation_time)); |
---|
3016 | 3090 | ext4_msg(sb, KERN_INFO, |
---|
3017 | 3091 | "mballoc: %u preallocated, %u discarded", |
---|
3018 | 3092 | atomic_read(&sbi->s_mb_preallocated), |
---|
.. | .. |
---|
3439 | 3513 | struct ext4_allocation_request *ar) |
---|
3440 | 3514 | { |
---|
3441 | 3515 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); |
---|
| 3516 | + struct ext4_super_block *es = sbi->s_es; |
---|
3442 | 3517 | int bsbits, max; |
---|
3443 | 3518 | ext4_lblk_t end; |
---|
3444 | 3519 | loff_t size, start_off; |
---|
.. | .. |
---|
3619 | 3694 | ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size); |
---|
3620 | 3695 | |
---|
3621 | 3696 | /* define goal start in order to merge */ |
---|
3622 | | - if (ar->pright && (ar->lright == (start + size))) { |
---|
| 3697 | + if (ar->pright && (ar->lright == (start + size)) && |
---|
| 3698 | + ar->pright >= size && |
---|
| 3699 | + ar->pright - size >= le32_to_cpu(es->s_first_data_block)) { |
---|
3623 | 3700 | /* merge to the right */ |
---|
3624 | 3701 | ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size, |
---|
3625 | | - &ac->ac_f_ex.fe_group, |
---|
3626 | | - &ac->ac_f_ex.fe_start); |
---|
| 3702 | + &ac->ac_g_ex.fe_group, |
---|
| 3703 | + &ac->ac_g_ex.fe_start); |
---|
3627 | 3704 | ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL; |
---|
3628 | 3705 | } |
---|
3629 | | - if (ar->pleft && (ar->lleft + 1 == start)) { |
---|
| 3706 | + if (ar->pleft && (ar->lleft + 1 == start) && |
---|
| 3707 | + ar->pleft + 1 < ext4_blocks_count(es)) { |
---|
3630 | 3708 | /* merge to the left */ |
---|
3631 | 3709 | ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1, |
---|
3632 | | - &ac->ac_f_ex.fe_group, |
---|
3633 | | - &ac->ac_f_ex.fe_start); |
---|
| 3710 | + &ac->ac_g_ex.fe_group, |
---|
| 3711 | + &ac->ac_g_ex.fe_start); |
---|
3634 | 3712 | ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL; |
---|
3635 | 3713 | } |
---|
3636 | 3714 | |
---|
.. | .. |
---|
3642 | 3720 | { |
---|
3643 | 3721 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); |
---|
3644 | 3722 | |
---|
3645 | | - if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) { |
---|
| 3723 | + if (sbi->s_mb_stats && ac->ac_g_ex.fe_len >= 1) { |
---|
3646 | 3724 | atomic_inc(&sbi->s_bal_reqs); |
---|
3647 | 3725 | atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated); |
---|
3648 | 3726 | if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len) |
---|
3649 | 3727 | atomic_inc(&sbi->s_bal_success); |
---|
3650 | 3728 | atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned); |
---|
| 3729 | + atomic_add(ac->ac_groups_scanned, &sbi->s_bal_groups_scanned); |
---|
3651 | 3730 | if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start && |
---|
3652 | 3731 | ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group) |
---|
3653 | 3732 | atomic_inc(&sbi->s_bal_goals); |
---|
.. | .. |
---|
3722 | 3801 | BUG_ON(start < pa->pa_pstart); |
---|
3723 | 3802 | BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len)); |
---|
3724 | 3803 | BUG_ON(pa->pa_free < len); |
---|
| 3804 | + BUG_ON(ac->ac_b_ex.fe_len <= 0); |
---|
3725 | 3805 | pa->pa_free -= len; |
---|
3726 | 3806 | |
---|
3727 | 3807 | mb_debug(ac->ac_sb, "use %llu/%d from inode pa %p\n", start, len, pa); |
---|
.. | .. |
---|
3884 | 3964 | struct ext4_free_data *entry; |
---|
3885 | 3965 | |
---|
3886 | 3966 | grp = ext4_get_group_info(sb, group); |
---|
| 3967 | + if (!grp) |
---|
| 3968 | + return; |
---|
3887 | 3969 | n = rb_first(&(grp->bb_free_root)); |
---|
3888 | 3970 | |
---|
3889 | 3971 | while (n) { |
---|
.. | .. |
---|
3910 | 3992 | ext4_grpblk_t start; |
---|
3911 | 3993 | int preallocated = 0; |
---|
3912 | 3994 | int len; |
---|
| 3995 | + |
---|
| 3996 | + if (!grp) |
---|
| 3997 | + return; |
---|
3913 | 3998 | |
---|
3914 | 3999 | /* all form of preallocation discards first load group, |
---|
3915 | 4000 | * so the only competing code is preallocation use. |
---|
.. | .. |
---|
4046 | 4131 | pa = ac->ac_pa; |
---|
4047 | 4132 | |
---|
4048 | 4133 | if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) { |
---|
4049 | | - int winl; |
---|
4050 | | - int wins; |
---|
4051 | | - int win; |
---|
4052 | | - int offs; |
---|
| 4134 | + int new_bex_start; |
---|
| 4135 | + int new_bex_end; |
---|
4053 | 4136 | |
---|
4054 | 4137 | /* we can't allocate as much as normalizer wants. |
---|
4055 | 4138 | * so, found space must get proper lstart |
---|
.. | .. |
---|
4057 | 4140 | BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical); |
---|
4058 | 4141 | BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len); |
---|
4059 | 4142 | |
---|
4060 | | - /* we're limited by original request in that |
---|
4061 | | - * logical block must be covered any way |
---|
4062 | | - * winl is window we can move our chunk within */ |
---|
4063 | | - winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical; |
---|
| 4143 | + /* |
---|
| 4144 | + * Use the below logic for adjusting best extent as it keeps |
---|
| 4145 | + * fragmentation in check while ensuring logical range of best |
---|
| 4146 | + * extent doesn't overflow out of goal extent: |
---|
| 4147 | + * |
---|
| 4148 | + * 1. Check if best ex can be kept at end of goal and still |
---|
| 4149 | + * cover original start |
---|
| 4150 | + * 2. Else, check if best ex can be kept at start of goal and |
---|
| 4151 | + * still cover original start |
---|
| 4152 | + * 3. Else, keep the best ex at start of original request. |
---|
| 4153 | + */ |
---|
| 4154 | + new_bex_end = ac->ac_g_ex.fe_logical + |
---|
| 4155 | + EXT4_C2B(sbi, ac->ac_g_ex.fe_len); |
---|
| 4156 | + new_bex_start = new_bex_end - EXT4_C2B(sbi, ac->ac_b_ex.fe_len); |
---|
| 4157 | + if (ac->ac_o_ex.fe_logical >= new_bex_start) |
---|
| 4158 | + goto adjust_bex; |
---|
4064 | 4159 | |
---|
4065 | | - /* also, we should cover whole original request */ |
---|
4066 | | - wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len); |
---|
| 4160 | + new_bex_start = ac->ac_g_ex.fe_logical; |
---|
| 4161 | + new_bex_end = |
---|
| 4162 | + new_bex_start + EXT4_C2B(sbi, ac->ac_b_ex.fe_len); |
---|
| 4163 | + if (ac->ac_o_ex.fe_logical < new_bex_end) |
---|
| 4164 | + goto adjust_bex; |
---|
4067 | 4165 | |
---|
4068 | | - /* the smallest one defines real window */ |
---|
4069 | | - win = min(winl, wins); |
---|
| 4166 | + new_bex_start = ac->ac_o_ex.fe_logical; |
---|
| 4167 | + new_bex_end = |
---|
| 4168 | + new_bex_start + EXT4_C2B(sbi, ac->ac_b_ex.fe_len); |
---|
4070 | 4169 | |
---|
4071 | | - offs = ac->ac_o_ex.fe_logical % |
---|
4072 | | - EXT4_C2B(sbi, ac->ac_b_ex.fe_len); |
---|
4073 | | - if (offs && offs < win) |
---|
4074 | | - win = offs; |
---|
| 4170 | +adjust_bex: |
---|
| 4171 | + ac->ac_b_ex.fe_logical = new_bex_start; |
---|
4075 | 4172 | |
---|
4076 | | - ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical - |
---|
4077 | | - EXT4_NUM_B2C(sbi, win); |
---|
4078 | 4173 | BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical); |
---|
4079 | 4174 | BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len); |
---|
| 4175 | + BUG_ON(new_bex_end > (ac->ac_g_ex.fe_logical + |
---|
| 4176 | + EXT4_C2B(sbi, ac->ac_g_ex.fe_len))); |
---|
4080 | 4177 | } |
---|
4081 | 4178 | |
---|
4082 | 4179 | /* preallocation can change ac_b_ex, thus we store actually |
---|
.. | .. |
---|
4102 | 4199 | |
---|
4103 | 4200 | ei = EXT4_I(ac->ac_inode); |
---|
4104 | 4201 | grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group); |
---|
| 4202 | + if (!grp) |
---|
| 4203 | + return; |
---|
4105 | 4204 | |
---|
4106 | 4205 | pa->pa_obj_lock = &ei->i_prealloc_lock; |
---|
4107 | 4206 | pa->pa_inode = ac->ac_inode; |
---|
.. | .. |
---|
4155 | 4254 | atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); |
---|
4156 | 4255 | |
---|
4157 | 4256 | grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group); |
---|
| 4257 | + if (!grp) |
---|
| 4258 | + return; |
---|
4158 | 4259 | lg = ac->ac_lg; |
---|
4159 | 4260 | BUG_ON(lg == NULL); |
---|
4160 | 4261 | |
---|
.. | .. |
---|
4250 | 4351 | trace_ext4_mb_release_group_pa(sb, pa); |
---|
4251 | 4352 | BUG_ON(pa->pa_deleted == 0); |
---|
4252 | 4353 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); |
---|
4253 | | - BUG_ON(group != e4b->bd_group && pa->pa_len != 0); |
---|
| 4354 | + if (unlikely(group != e4b->bd_group && pa->pa_len != 0)) { |
---|
| 4355 | + ext4_warning(sb, "bad group: expected %u, group %u, pa_start %llu", |
---|
| 4356 | + e4b->bd_group, group, pa->pa_pstart); |
---|
| 4357 | + return 0; |
---|
| 4358 | + } |
---|
4254 | 4359 | mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len); |
---|
4255 | 4360 | atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded); |
---|
4256 | 4361 | trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len); |
---|
.. | .. |
---|
4279 | 4384 | int err; |
---|
4280 | 4385 | int free = 0; |
---|
4281 | 4386 | |
---|
| 4387 | + if (!grp) |
---|
| 4388 | + return 0; |
---|
4282 | 4389 | mb_debug(sb, "discard preallocation for group %u\n", group); |
---|
4283 | 4390 | if (list_empty(&grp->bb_prealloc_list)) |
---|
4284 | 4391 | goto out_dbg; |
---|
.. | .. |
---|
4516 | 4623 | struct ext4_prealloc_space *pa; |
---|
4517 | 4624 | ext4_grpblk_t start; |
---|
4518 | 4625 | struct list_head *cur; |
---|
| 4626 | + |
---|
| 4627 | + if (!grp) |
---|
| 4628 | + continue; |
---|
4519 | 4629 | ext4_lock_group(sb, i); |
---|
4520 | 4630 | list_for_each(cur, &grp->bb_prealloc_list) { |
---|
4521 | 4631 | pa = list_entry(cur, struct ext4_prealloc_space, |
---|
.. | .. |
---|
5303 | 5413 | } |
---|
5304 | 5414 | |
---|
5305 | 5415 | /** |
---|
5306 | | - * ext4_free_blocks() -- Free given blocks and update quota |
---|
| 5416 | + * ext4_mb_clear_bb() -- helper function for freeing blocks. |
---|
| 5417 | + * Used by ext4_free_blocks() |
---|
5307 | 5418 | * @handle: handle for this transaction |
---|
5308 | 5419 | * @inode: inode |
---|
5309 | 5420 | * @bh: optional buffer of the block to be freed |
---|
.. | .. |
---|
5311 | 5422 | * @count: number of blocks to be freed |
---|
5312 | 5423 | * @flags: flags used by ext4_free_blocks |
---|
5313 | 5424 | */ |
---|
5314 | | -void ext4_free_blocks(handle_t *handle, struct inode *inode, |
---|
5315 | | - struct buffer_head *bh, ext4_fsblk_t block, |
---|
5316 | | - unsigned long count, int flags) |
---|
| 5425 | +static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode, |
---|
| 5426 | + ext4_fsblk_t block, unsigned long count, |
---|
| 5427 | + int flags) |
---|
5317 | 5428 | { |
---|
5318 | 5429 | struct buffer_head *bitmap_bh = NULL; |
---|
5319 | 5430 | struct super_block *sb = inode->i_sb; |
---|
5320 | 5431 | struct ext4_group_desc *gdp; |
---|
| 5432 | + struct ext4_group_info *grp; |
---|
5321 | 5433 | unsigned int overflow; |
---|
5322 | 5434 | ext4_grpblk_t bit; |
---|
5323 | 5435 | struct buffer_head *gd_bh; |
---|
.. | .. |
---|
5330 | 5442 | |
---|
5331 | 5443 | sbi = EXT4_SB(sb); |
---|
5332 | 5444 | |
---|
5333 | | - if (sbi->s_mount_state & EXT4_FC_REPLAY) { |
---|
5334 | | - ext4_free_blocks_simple(inode, block, count); |
---|
5335 | | - return; |
---|
5336 | | - } |
---|
5337 | | - |
---|
5338 | | - might_sleep(); |
---|
5339 | | - if (bh) { |
---|
5340 | | - if (block) |
---|
5341 | | - BUG_ON(block != bh->b_blocknr); |
---|
5342 | | - else |
---|
5343 | | - block = bh->b_blocknr; |
---|
5344 | | - } |
---|
5345 | | - |
---|
5346 | 5445 | if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && |
---|
5347 | 5446 | !ext4_inode_block_valid(inode, block, count)) { |
---|
5348 | | - ext4_error(sb, "Freeing blocks not in datazone - " |
---|
5349 | | - "block = %llu, count = %lu", block, count); |
---|
| 5447 | + ext4_error(sb, "Freeing blocks in system zone - " |
---|
| 5448 | + "Block = %llu, count = %lu", block, count); |
---|
| 5449 | + /* err = 0. ext4_std_error should be a no op */ |
---|
5350 | 5450 | goto error_return; |
---|
5351 | 5451 | } |
---|
5352 | | - |
---|
5353 | | - ext4_debug("freeing block %llu\n", block); |
---|
5354 | | - trace_ext4_free_blocks(inode, block, count, flags); |
---|
5355 | | - |
---|
5356 | | - if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { |
---|
5357 | | - BUG_ON(count > 1); |
---|
5358 | | - |
---|
5359 | | - ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, |
---|
5360 | | - inode, bh, block); |
---|
5361 | | - } |
---|
5362 | | - |
---|
5363 | | - /* |
---|
5364 | | - * If the extent to be freed does not begin on a cluster |
---|
5365 | | - * boundary, we need to deal with partial clusters at the |
---|
5366 | | - * beginning and end of the extent. Normally we will free |
---|
5367 | | - * blocks at the beginning or the end unless we are explicitly |
---|
5368 | | - * requested to avoid doing so. |
---|
5369 | | - */ |
---|
5370 | | - overflow = EXT4_PBLK_COFF(sbi, block); |
---|
5371 | | - if (overflow) { |
---|
5372 | | - if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) { |
---|
5373 | | - overflow = sbi->s_cluster_ratio - overflow; |
---|
5374 | | - block += overflow; |
---|
5375 | | - if (count > overflow) |
---|
5376 | | - count -= overflow; |
---|
5377 | | - else |
---|
5378 | | - return; |
---|
5379 | | - } else { |
---|
5380 | | - block -= overflow; |
---|
5381 | | - count += overflow; |
---|
5382 | | - } |
---|
5383 | | - } |
---|
5384 | | - overflow = EXT4_LBLK_COFF(sbi, count); |
---|
5385 | | - if (overflow) { |
---|
5386 | | - if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) { |
---|
5387 | | - if (count > overflow) |
---|
5388 | | - count -= overflow; |
---|
5389 | | - else |
---|
5390 | | - return; |
---|
5391 | | - } else |
---|
5392 | | - count += sbi->s_cluster_ratio - overflow; |
---|
5393 | | - } |
---|
5394 | | - |
---|
5395 | | - if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { |
---|
5396 | | - int i; |
---|
5397 | | - int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA; |
---|
5398 | | - |
---|
5399 | | - for (i = 0; i < count; i++) { |
---|
5400 | | - cond_resched(); |
---|
5401 | | - if (is_metadata) |
---|
5402 | | - bh = sb_find_get_block(inode->i_sb, block + i); |
---|
5403 | | - ext4_forget(handle, is_metadata, inode, bh, block + i); |
---|
5404 | | - } |
---|
5405 | | - } |
---|
| 5452 | + flags |= EXT4_FREE_BLOCKS_VALIDATED; |
---|
5406 | 5453 | |
---|
5407 | 5454 | do_more: |
---|
5408 | 5455 | overflow = 0; |
---|
5409 | 5456 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); |
---|
5410 | 5457 | |
---|
5411 | | - if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT( |
---|
5412 | | - ext4_get_group_info(sb, block_group)))) |
---|
| 5458 | + grp = ext4_get_group_info(sb, block_group); |
---|
| 5459 | + if (unlikely(!grp || EXT4_MB_GRP_BBITMAP_CORRUPT(grp))) |
---|
5413 | 5460 | return; |
---|
5414 | 5461 | |
---|
5415 | 5462 | /* |
---|
.. | .. |
---|
5420 | 5467 | overflow = EXT4_C2B(sbi, bit) + count - |
---|
5421 | 5468 | EXT4_BLOCKS_PER_GROUP(sb); |
---|
5422 | 5469 | count -= overflow; |
---|
| 5470 | + /* The range changed so it's no longer validated */ |
---|
| 5471 | + flags &= ~EXT4_FREE_BLOCKS_VALIDATED; |
---|
5423 | 5472 | } |
---|
5424 | 5473 | count_clusters = EXT4_NUM_B2C(sbi, count); |
---|
5425 | 5474 | bitmap_bh = ext4_read_block_bitmap(sb, block_group); |
---|
.. | .. |
---|
5434 | 5483 | goto error_return; |
---|
5435 | 5484 | } |
---|
5436 | 5485 | |
---|
5437 | | - if (in_range(ext4_block_bitmap(sb, gdp), block, count) || |
---|
5438 | | - in_range(ext4_inode_bitmap(sb, gdp), block, count) || |
---|
5439 | | - in_range(block, ext4_inode_table(sb, gdp), |
---|
5440 | | - sbi->s_itb_per_group) || |
---|
5441 | | - in_range(block + count - 1, ext4_inode_table(sb, gdp), |
---|
5442 | | - sbi->s_itb_per_group)) { |
---|
5443 | | - |
---|
| 5486 | + if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && |
---|
| 5487 | + !ext4_inode_block_valid(inode, block, count)) { |
---|
5444 | 5488 | ext4_error(sb, "Freeing blocks in system zone - " |
---|
5445 | 5489 | "Block = %llu, count = %lu", block, count); |
---|
5446 | 5490 | /* err = 0. ext4_std_error should be a no op */ |
---|
.. | .. |
---|
5506 | 5550 | * them with group lock_held |
---|
5507 | 5551 | */ |
---|
5508 | 5552 | if (test_opt(sb, DISCARD)) { |
---|
5509 | | - err = ext4_issue_discard(sb, block_group, bit, count, |
---|
5510 | | - NULL); |
---|
| 5553 | + err = ext4_issue_discard(sb, block_group, bit, |
---|
| 5554 | + count_clusters, NULL); |
---|
5511 | 5555 | if (err && err != -EOPNOTSUPP) |
---|
5512 | 5556 | ext4_msg(sb, KERN_WARNING, "discard request in" |
---|
5513 | | - " group:%d block:%d count:%lu failed" |
---|
| 5557 | + " group:%u block:%d count:%lu failed" |
---|
5514 | 5558 | " with %d", block_group, bit, count, |
---|
5515 | 5559 | err); |
---|
5516 | 5560 | } else |
---|
.. | .. |
---|
5562 | 5606 | block += count; |
---|
5563 | 5607 | count = overflow; |
---|
5564 | 5608 | put_bh(bitmap_bh); |
---|
| 5609 | + /* The range changed so it's no longer validated */ |
---|
| 5610 | + flags &= ~EXT4_FREE_BLOCKS_VALIDATED; |
---|
5565 | 5611 | goto do_more; |
---|
5566 | 5612 | } |
---|
5567 | 5613 | error_return: |
---|
5568 | 5614 | brelse(bitmap_bh); |
---|
5569 | 5615 | ext4_std_error(sb, err); |
---|
| 5616 | + return; |
---|
| 5617 | +} |
---|
| 5618 | + |
---|
| 5619 | +/** |
---|
| 5620 | + * ext4_free_blocks() -- Free given blocks and update quota |
---|
| 5621 | + * @handle: handle for this transaction |
---|
| 5622 | + * @inode: inode |
---|
| 5623 | + * @bh: optional buffer of the block to be freed |
---|
| 5624 | + * @block: starting physical block to be freed |
---|
| 5625 | + * @count: number of blocks to be freed |
---|
| 5626 | + * @flags: flags used by ext4_free_blocks |
---|
| 5627 | + */ |
---|
| 5628 | +void ext4_free_blocks(handle_t *handle, struct inode *inode, |
---|
| 5629 | + struct buffer_head *bh, ext4_fsblk_t block, |
---|
| 5630 | + unsigned long count, int flags) |
---|
| 5631 | +{ |
---|
| 5632 | + struct super_block *sb = inode->i_sb; |
---|
| 5633 | + unsigned int overflow; |
---|
| 5634 | + struct ext4_sb_info *sbi; |
---|
| 5635 | + |
---|
| 5636 | + sbi = EXT4_SB(sb); |
---|
| 5637 | + |
---|
| 5638 | + if (bh) { |
---|
| 5639 | + if (block) |
---|
| 5640 | + BUG_ON(block != bh->b_blocknr); |
---|
| 5641 | + else |
---|
| 5642 | + block = bh->b_blocknr; |
---|
| 5643 | + } |
---|
| 5644 | + |
---|
| 5645 | + if (sbi->s_mount_state & EXT4_FC_REPLAY) { |
---|
| 5646 | + ext4_free_blocks_simple(inode, block, EXT4_NUM_B2C(sbi, count)); |
---|
| 5647 | + return; |
---|
| 5648 | + } |
---|
| 5649 | + |
---|
| 5650 | + might_sleep(); |
---|
| 5651 | + |
---|
| 5652 | + if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && |
---|
| 5653 | + !ext4_inode_block_valid(inode, block, count)) { |
---|
| 5654 | + ext4_error(sb, "Freeing blocks not in datazone - " |
---|
| 5655 | + "block = %llu, count = %lu", block, count); |
---|
| 5656 | + return; |
---|
| 5657 | + } |
---|
| 5658 | + flags |= EXT4_FREE_BLOCKS_VALIDATED; |
---|
| 5659 | + |
---|
| 5660 | + ext4_debug("freeing block %llu\n", block); |
---|
| 5661 | + trace_ext4_free_blocks(inode, block, count, flags); |
---|
| 5662 | + |
---|
| 5663 | + if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { |
---|
| 5664 | + BUG_ON(count > 1); |
---|
| 5665 | + |
---|
| 5666 | + ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, |
---|
| 5667 | + inode, bh, block); |
---|
| 5668 | + } |
---|
| 5669 | + |
---|
| 5670 | + /* |
---|
| 5671 | + * If the extent to be freed does not begin on a cluster |
---|
| 5672 | + * boundary, we need to deal with partial clusters at the |
---|
| 5673 | + * beginning and end of the extent. Normally we will free |
---|
| 5674 | + * blocks at the beginning or the end unless we are explicitly |
---|
| 5675 | + * requested to avoid doing so. |
---|
| 5676 | + */ |
---|
| 5677 | + overflow = EXT4_PBLK_COFF(sbi, block); |
---|
| 5678 | + if (overflow) { |
---|
| 5679 | + if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) { |
---|
| 5680 | + overflow = sbi->s_cluster_ratio - overflow; |
---|
| 5681 | + block += overflow; |
---|
| 5682 | + if (count > overflow) |
---|
| 5683 | + count -= overflow; |
---|
| 5684 | + else |
---|
| 5685 | + return; |
---|
| 5686 | + } else { |
---|
| 5687 | + block -= overflow; |
---|
| 5688 | + count += overflow; |
---|
| 5689 | + } |
---|
| 5690 | + /* The range changed so it's no longer validated */ |
---|
| 5691 | + flags &= ~EXT4_FREE_BLOCKS_VALIDATED; |
---|
| 5692 | + } |
---|
| 5693 | + overflow = EXT4_LBLK_COFF(sbi, count); |
---|
| 5694 | + if (overflow) { |
---|
| 5695 | + if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) { |
---|
| 5696 | + if (count > overflow) |
---|
| 5697 | + count -= overflow; |
---|
| 5698 | + else |
---|
| 5699 | + return; |
---|
| 5700 | + } else |
---|
| 5701 | + count += sbi->s_cluster_ratio - overflow; |
---|
| 5702 | + /* The range changed so it's no longer validated */ |
---|
| 5703 | + flags &= ~EXT4_FREE_BLOCKS_VALIDATED; |
---|
| 5704 | + } |
---|
| 5705 | + |
---|
| 5706 | + if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { |
---|
| 5707 | + int i; |
---|
| 5708 | + int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA; |
---|
| 5709 | + |
---|
| 5710 | + for (i = 0; i < count; i++) { |
---|
| 5711 | + cond_resched(); |
---|
| 5712 | + if (is_metadata) |
---|
| 5713 | + bh = sb_find_get_block(inode->i_sb, block + i); |
---|
| 5714 | + ext4_forget(handle, is_metadata, inode, bh, block + i); |
---|
| 5715 | + } |
---|
| 5716 | + } |
---|
| 5717 | + |
---|
| 5718 | + ext4_mb_clear_bb(handle, inode, block, count, flags); |
---|
5570 | 5719 | return; |
---|
5571 | 5720 | } |
---|
5572 | 5721 | |
---|
.. | .. |
---|
5626 | 5775 | goto error_return; |
---|
5627 | 5776 | } |
---|
5628 | 5777 | |
---|
5629 | | - if (in_range(ext4_block_bitmap(sb, desc), block, count) || |
---|
5630 | | - in_range(ext4_inode_bitmap(sb, desc), block, count) || |
---|
5631 | | - in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || |
---|
5632 | | - in_range(block + count - 1, ext4_inode_table(sb, desc), |
---|
5633 | | - sbi->s_itb_per_group)) { |
---|
| 5778 | + if (!ext4_sb_block_valid(sb, NULL, block, count)) { |
---|
5634 | 5779 | ext4_error(sb, "Adding blocks in system zones - " |
---|
5635 | 5780 | "Block = %llu, count = %lu", |
---|
5636 | 5781 | block, count); |
---|
.. | .. |
---|
5715 | 5860 | * @sb: super block for the file system |
---|
5716 | 5861 | * @start: starting block of the free extent in the alloc. group |
---|
5717 | 5862 | * @count: number of blocks to TRIM |
---|
5718 | | - * @group: alloc. group we are working with |
---|
5719 | 5863 | * @e4b: ext4 buddy for the group |
---|
5720 | 5864 | * |
---|
5721 | 5865 | * Trim "count" blocks starting at "start" in the "group". To assure that no |
---|
5722 | 5866 | * one will allocate those blocks, mark it as used in buddy bitmap. This must |
---|
5723 | 5867 | * be called with under the group lock. |
---|
5724 | 5868 | */ |
---|
5725 | | -static int ext4_trim_extent(struct super_block *sb, int start, int count, |
---|
5726 | | - ext4_group_t group, struct ext4_buddy *e4b) |
---|
| 5869 | +static int ext4_trim_extent(struct super_block *sb, |
---|
| 5870 | + int start, int count, struct ext4_buddy *e4b) |
---|
5727 | 5871 | __releases(bitlock) |
---|
5728 | 5872 | __acquires(bitlock) |
---|
5729 | 5873 | { |
---|
5730 | 5874 | struct ext4_free_extent ex; |
---|
| 5875 | + ext4_group_t group = e4b->bd_group; |
---|
5731 | 5876 | int ret = 0; |
---|
5732 | 5877 | |
---|
5733 | 5878 | trace_ext4_trim_extent(sb, group, start, count); |
---|
.. | .. |
---|
5748 | 5893 | ext4_lock_group(sb, group); |
---|
5749 | 5894 | mb_free_blocks(NULL, e4b, start, ex.fe_len); |
---|
5750 | 5895 | return ret; |
---|
| 5896 | +} |
---|
| 5897 | + |
---|
| 5898 | +static ext4_grpblk_t ext4_last_grp_cluster(struct super_block *sb, |
---|
| 5899 | + ext4_group_t grp) |
---|
| 5900 | +{ |
---|
| 5901 | + if (grp < ext4_get_groups_count(sb)) |
---|
| 5902 | + return EXT4_CLUSTERS_PER_GROUP(sb) - 1; |
---|
| 5903 | + return (ext4_blocks_count(EXT4_SB(sb)->s_es) - |
---|
| 5904 | + ext4_group_first_block_no(sb, grp) - 1) >> |
---|
| 5905 | + EXT4_CLUSTER_BITS(sb); |
---|
| 5906 | +} |
---|
| 5907 | + |
---|
| 5908 | +static bool ext4_trim_interrupted(void) |
---|
| 5909 | +{ |
---|
| 5910 | + return fatal_signal_pending(current) || freezing(current); |
---|
| 5911 | +} |
---|
| 5912 | + |
---|
| 5913 | +static int ext4_try_to_trim_range(struct super_block *sb, |
---|
| 5914 | + struct ext4_buddy *e4b, ext4_grpblk_t start, |
---|
| 5915 | + ext4_grpblk_t max, ext4_grpblk_t minblocks) |
---|
| 5916 | +{ |
---|
| 5917 | + ext4_grpblk_t next, count, free_count; |
---|
| 5918 | + bool set_trimmed = false; |
---|
| 5919 | + void *bitmap; |
---|
| 5920 | + |
---|
| 5921 | + bitmap = e4b->bd_bitmap; |
---|
| 5922 | + if (start == 0 && max >= ext4_last_grp_cluster(sb, e4b->bd_group)) |
---|
| 5923 | + set_trimmed = true; |
---|
| 5924 | + start = max(e4b->bd_info->bb_first_free, start); |
---|
| 5925 | + count = 0; |
---|
| 5926 | + free_count = 0; |
---|
| 5927 | + |
---|
| 5928 | + while (start <= max) { |
---|
| 5929 | + start = mb_find_next_zero_bit(bitmap, max + 1, start); |
---|
| 5930 | + if (start > max) |
---|
| 5931 | + break; |
---|
| 5932 | + next = mb_find_next_bit(bitmap, max + 1, start); |
---|
| 5933 | + |
---|
| 5934 | + if ((next - start) >= minblocks) { |
---|
| 5935 | + int ret = ext4_trim_extent(sb, start, next - start, e4b); |
---|
| 5936 | + |
---|
| 5937 | + if (ret && ret != -EOPNOTSUPP) |
---|
| 5938 | + return count; |
---|
| 5939 | + count += next - start; |
---|
| 5940 | + } |
---|
| 5941 | + free_count += next - start; |
---|
| 5942 | + start = next + 1; |
---|
| 5943 | + |
---|
| 5944 | + if (ext4_trim_interrupted()) |
---|
| 5945 | + return count; |
---|
| 5946 | + |
---|
| 5947 | + if (need_resched()) { |
---|
| 5948 | + ext4_unlock_group(sb, e4b->bd_group); |
---|
| 5949 | + cond_resched(); |
---|
| 5950 | + ext4_lock_group(sb, e4b->bd_group); |
---|
| 5951 | + } |
---|
| 5952 | + |
---|
| 5953 | + if ((e4b->bd_info->bb_free - free_count) < minblocks) |
---|
| 5954 | + break; |
---|
| 5955 | + } |
---|
| 5956 | + |
---|
| 5957 | + if (set_trimmed) |
---|
| 5958 | + EXT4_MB_GRP_SET_TRIMMED(e4b->bd_info); |
---|
| 5959 | + |
---|
| 5960 | + return count; |
---|
5751 | 5961 | } |
---|
5752 | 5962 | |
---|
5753 | 5963 | /** |
---|
.. | .. |
---|
5773 | 5983 | ext4_grpblk_t start, ext4_grpblk_t max, |
---|
5774 | 5984 | ext4_grpblk_t minblocks) |
---|
5775 | 5985 | { |
---|
5776 | | - void *bitmap; |
---|
5777 | | - ext4_grpblk_t next, count = 0, free_count = 0; |
---|
5778 | 5986 | struct ext4_buddy e4b; |
---|
5779 | | - int ret = 0; |
---|
| 5987 | + int ret; |
---|
5780 | 5988 | |
---|
5781 | 5989 | trace_ext4_trim_all_free(sb, group, start, max); |
---|
5782 | 5990 | |
---|
.. | .. |
---|
5786 | 5994 | ret, group); |
---|
5787 | 5995 | return ret; |
---|
5788 | 5996 | } |
---|
5789 | | - bitmap = e4b.bd_bitmap; |
---|
5790 | 5997 | |
---|
5791 | 5998 | ext4_lock_group(sb, group); |
---|
5792 | | - if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) && |
---|
5793 | | - minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks)) |
---|
5794 | | - goto out; |
---|
5795 | 5999 | |
---|
5796 | | - start = (e4b.bd_info->bb_first_free > start) ? |
---|
5797 | | - e4b.bd_info->bb_first_free : start; |
---|
| 6000 | + if (!EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) || |
---|
| 6001 | + minblocks < EXT4_SB(sb)->s_last_trim_minblks) |
---|
| 6002 | + ret = ext4_try_to_trim_range(sb, &e4b, start, max, minblocks); |
---|
| 6003 | + else |
---|
| 6004 | + ret = 0; |
---|
5798 | 6005 | |
---|
5799 | | - while (start <= max) { |
---|
5800 | | - start = mb_find_next_zero_bit(bitmap, max + 1, start); |
---|
5801 | | - if (start > max) |
---|
5802 | | - break; |
---|
5803 | | - next = mb_find_next_bit(bitmap, max + 1, start); |
---|
5804 | | - |
---|
5805 | | - if ((next - start) >= minblocks) { |
---|
5806 | | - ret = ext4_trim_extent(sb, start, |
---|
5807 | | - next - start, group, &e4b); |
---|
5808 | | - if (ret && ret != -EOPNOTSUPP) |
---|
5809 | | - break; |
---|
5810 | | - ret = 0; |
---|
5811 | | - count += next - start; |
---|
5812 | | - } |
---|
5813 | | - free_count += next - start; |
---|
5814 | | - start = next + 1; |
---|
5815 | | - |
---|
5816 | | - if (fatal_signal_pending(current)) { |
---|
5817 | | - count = -ERESTARTSYS; |
---|
5818 | | - break; |
---|
5819 | | - } |
---|
5820 | | - |
---|
5821 | | - if (need_resched()) { |
---|
5822 | | - ext4_unlock_group(sb, group); |
---|
5823 | | - cond_resched(); |
---|
5824 | | - ext4_lock_group(sb, group); |
---|
5825 | | - } |
---|
5826 | | - |
---|
5827 | | - if ((e4b.bd_info->bb_free - free_count) < minblocks) |
---|
5828 | | - break; |
---|
5829 | | - } |
---|
5830 | | - |
---|
5831 | | - if (!ret) { |
---|
5832 | | - ret = count; |
---|
5833 | | - EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info); |
---|
5834 | | - } |
---|
5835 | | -out: |
---|
5836 | 6006 | ext4_unlock_group(sb, group); |
---|
5837 | 6007 | ext4_mb_unload_buddy(&e4b); |
---|
5838 | 6008 | |
---|
5839 | 6009 | ext4_debug("trimmed %d blocks in the group %d\n", |
---|
5840 | | - count, group); |
---|
| 6010 | + ret, group); |
---|
5841 | 6011 | |
---|
5842 | 6012 | return ret; |
---|
5843 | 6013 | } |
---|
.. | .. |
---|
5882 | 6052 | if (minlen > EXT4_CLUSTERS_PER_GROUP(sb)) |
---|
5883 | 6053 | goto out; |
---|
5884 | 6054 | } |
---|
5885 | | - if (end >= max_blks) |
---|
| 6055 | + if (end >= max_blks - 1) |
---|
5886 | 6056 | end = max_blks - 1; |
---|
5887 | 6057 | if (end <= first_data_blk) |
---|
5888 | 6058 | goto out; |
---|
.. | .. |
---|
5899 | 6069 | end = EXT4_CLUSTERS_PER_GROUP(sb) - 1; |
---|
5900 | 6070 | |
---|
5901 | 6071 | for (group = first_group; group <= last_group; group++) { |
---|
| 6072 | + if (ext4_trim_interrupted()) |
---|
| 6073 | + break; |
---|
5902 | 6074 | grp = ext4_get_group_info(sb, group); |
---|
| 6075 | + if (!grp) |
---|
| 6076 | + continue; |
---|
5903 | 6077 | /* We only do this if the grp has never been initialized */ |
---|
5904 | 6078 | if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { |
---|
5905 | 6079 | ret = ext4_mb_init_group(sb, group, GFP_NOFS); |
---|
.. | .. |
---|
5915 | 6089 | */ |
---|
5916 | 6090 | if (group == last_group) |
---|
5917 | 6091 | end = last_cluster; |
---|
5918 | | - |
---|
5919 | 6092 | if (grp->bb_free >= minlen) { |
---|
5920 | 6093 | cnt = ext4_trim_all_free(sb, group, first_cluster, |
---|
5921 | | - end, minlen); |
---|
| 6094 | + end, minlen); |
---|
5922 | 6095 | if (cnt < 0) { |
---|
5923 | 6096 | ret = cnt; |
---|
5924 | 6097 | break; |
---|
.. | .. |
---|
5934 | 6107 | } |
---|
5935 | 6108 | |
---|
5936 | 6109 | if (!ret) |
---|
5937 | | - atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen); |
---|
| 6110 | + EXT4_SB(sb)->s_last_trim_minblks = minlen; |
---|
5938 | 6111 | |
---|
5939 | 6112 | out: |
---|
5940 | 6113 | range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits; |
---|
.. | .. |
---|
5963 | 6136 | |
---|
5964 | 6137 | ext4_lock_group(sb, group); |
---|
5965 | 6138 | |
---|
5966 | | - start = (e4b.bd_info->bb_first_free > start) ? |
---|
5967 | | - e4b.bd_info->bb_first_free : start; |
---|
| 6139 | + start = max(e4b.bd_info->bb_first_free, start); |
---|
5968 | 6140 | if (end >= EXT4_CLUSTERS_PER_GROUP(sb)) |
---|
5969 | 6141 | end = EXT4_CLUSTERS_PER_GROUP(sb) - 1; |
---|
5970 | 6142 | |
---|