| .. | .. |
|---|
| 16 | 16 | #include <linux/slab.h> |
|---|
| 17 | 17 | #include <linux/nospec.h> |
|---|
| 18 | 18 | #include <linux/backing-dev.h> |
|---|
| 19 | +#include <linux/freezer.h> |
|---|
| 19 | 20 | #include <trace/events/ext4.h> |
|---|
| 20 | 21 | |
|---|
| 21 | 22 | /* |
|---|
| .. | .. |
|---|
| 684 | 685 | MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments); |
|---|
| 685 | 686 | |
|---|
| 686 | 687 | grp = ext4_get_group_info(sb, e4b->bd_group); |
|---|
| 688 | + if (!grp) |
|---|
| 689 | + return NULL; |
|---|
| 687 | 690 | list_for_each(cur, &grp->bb_prealloc_list) { |
|---|
| 688 | 691 | ext4_group_t groupnr; |
|---|
| 689 | 692 | struct ext4_prealloc_space *pa; |
|---|
| .. | .. |
|---|
| 767 | 770 | |
|---|
| 768 | 771 | static noinline_for_stack |
|---|
| 769 | 772 | void ext4_mb_generate_buddy(struct super_block *sb, |
|---|
| 770 | | - void *buddy, void *bitmap, ext4_group_t group) |
|---|
| 773 | + void *buddy, void *bitmap, ext4_group_t group, |
|---|
| 774 | + struct ext4_group_info *grp) |
|---|
| 771 | 775 | { |
|---|
| 772 | | - struct ext4_group_info *grp = ext4_get_group_info(sb, group); |
|---|
| 773 | 776 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
|---|
| 774 | 777 | ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb); |
|---|
| 775 | 778 | ext4_grpblk_t i = 0; |
|---|
| .. | .. |
|---|
| 816 | 819 | clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state)); |
|---|
| 817 | 820 | |
|---|
| 818 | 821 | period = get_cycles() - period; |
|---|
| 819 | | - spin_lock(&sbi->s_bal_lock); |
|---|
| 820 | | - sbi->s_mb_buddies_generated++; |
|---|
| 821 | | - sbi->s_mb_generation_time += period; |
|---|
| 822 | | - spin_unlock(&sbi->s_bal_lock); |
|---|
| 823 | | -} |
|---|
| 824 | | - |
|---|
| 825 | | -static void mb_regenerate_buddy(struct ext4_buddy *e4b) |
|---|
| 826 | | -{ |
|---|
| 827 | | - int count; |
|---|
| 828 | | - int order = 1; |
|---|
| 829 | | - void *buddy; |
|---|
| 830 | | - |
|---|
| 831 | | - while ((buddy = mb_find_buddy(e4b, order++, &count))) { |
|---|
| 832 | | - ext4_set_bits(buddy, 0, count); |
|---|
| 833 | | - } |
|---|
| 834 | | - e4b->bd_info->bb_fragments = 0; |
|---|
| 835 | | - memset(e4b->bd_info->bb_counters, 0, |
|---|
| 836 | | - sizeof(*e4b->bd_info->bb_counters) * |
|---|
| 837 | | - (e4b->bd_sb->s_blocksize_bits + 2)); |
|---|
| 838 | | - |
|---|
| 839 | | - ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy, |
|---|
| 840 | | - e4b->bd_bitmap, e4b->bd_group); |
|---|
| 822 | + atomic_inc(&sbi->s_mb_buddies_generated); |
|---|
| 823 | + atomic64_add(period, &sbi->s_mb_generation_time); |
|---|
| 841 | 824 | } |
|---|
| 842 | 825 | |
|---|
| 843 | 826 | /* The buddy information is attached the buddy cache inode |
|---|
| .. | .. |
|---|
| 909 | 892 | break; |
|---|
| 910 | 893 | |
|---|
| 911 | 894 | grinfo = ext4_get_group_info(sb, group); |
|---|
| 895 | + if (!grinfo) |
|---|
| 896 | + continue; |
|---|
| 912 | 897 | /* |
|---|
| 913 | 898 | * If page is uptodate then we came here after online resize |
|---|
| 914 | 899 | * which added some new uninitialized group info structs, so |
|---|
| .. | .. |
|---|
| 974 | 959 | group, page->index, i * blocksize); |
|---|
| 975 | 960 | trace_ext4_mb_buddy_bitmap_load(sb, group); |
|---|
| 976 | 961 | grinfo = ext4_get_group_info(sb, group); |
|---|
| 962 | + if (!grinfo) { |
|---|
| 963 | + err = -EFSCORRUPTED; |
|---|
| 964 | + goto out; |
|---|
| 965 | + } |
|---|
| 977 | 966 | grinfo->bb_fragments = 0; |
|---|
| 978 | 967 | memset(grinfo->bb_counters, 0, |
|---|
| 979 | 968 | sizeof(*grinfo->bb_counters) * |
|---|
| .. | .. |
|---|
| 984 | 973 | ext4_lock_group(sb, group); |
|---|
| 985 | 974 | /* init the buddy */ |
|---|
| 986 | 975 | memset(data, 0xff, blocksize); |
|---|
| 987 | | - ext4_mb_generate_buddy(sb, data, incore, group); |
|---|
| 976 | + ext4_mb_generate_buddy(sb, data, incore, group, grinfo); |
|---|
| 988 | 977 | ext4_unlock_group(sb, group); |
|---|
| 989 | 978 | incore = NULL; |
|---|
| 990 | 979 | } else { |
|---|
| .. | .. |
|---|
| 1098 | 1087 | might_sleep(); |
|---|
| 1099 | 1088 | mb_debug(sb, "init group %u\n", group); |
|---|
| 1100 | 1089 | this_grp = ext4_get_group_info(sb, group); |
|---|
| 1090 | + if (!this_grp) |
|---|
| 1091 | + return -EFSCORRUPTED; |
|---|
| 1092 | + |
|---|
| 1101 | 1093 | /* |
|---|
| 1102 | 1094 | * This ensures that we don't reinit the buddy cache |
|---|
| 1103 | 1095 | * page which map to the group from which we are already |
|---|
| .. | .. |
|---|
| 1172 | 1164 | |
|---|
| 1173 | 1165 | blocks_per_page = PAGE_SIZE / sb->s_blocksize; |
|---|
| 1174 | 1166 | grp = ext4_get_group_info(sb, group); |
|---|
| 1167 | + if (!grp) |
|---|
| 1168 | + return -EFSCORRUPTED; |
|---|
| 1175 | 1169 | |
|---|
| 1176 | 1170 | e4b->bd_blkbits = sb->s_blocksize_bits; |
|---|
| 1177 | 1171 | e4b->bd_info = grp; |
|---|
| .. | .. |
|---|
| 1512 | 1506 | sb, e4b->bd_group, |
|---|
| 1513 | 1507 | EXT4_GROUP_INFO_BBITMAP_CORRUPT); |
|---|
| 1514 | 1508 | } |
|---|
| 1515 | | - mb_regenerate_buddy(e4b); |
|---|
| 1516 | 1509 | goto done; |
|---|
| 1517 | 1510 | } |
|---|
| 1518 | 1511 | |
|---|
| .. | .. |
|---|
| 1885 | 1878 | struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); |
|---|
| 1886 | 1879 | struct ext4_free_extent ex; |
|---|
| 1887 | 1880 | |
|---|
| 1888 | | - if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL)) |
|---|
| 1881 | + if (!grp) |
|---|
| 1882 | + return -EFSCORRUPTED; |
|---|
| 1883 | + if (!(ac->ac_flags & (EXT4_MB_HINT_TRY_GOAL | EXT4_MB_HINT_GOAL_ONLY))) |
|---|
| 1889 | 1884 | return 0; |
|---|
| 1890 | 1885 | if (grp->bb_free == 0) |
|---|
| 1891 | 1886 | return 0; |
|---|
| .. | .. |
|---|
| 2109 | 2104 | |
|---|
| 2110 | 2105 | BUG_ON(cr < 0 || cr >= 4); |
|---|
| 2111 | 2106 | |
|---|
| 2112 | | - if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp))) |
|---|
| 2107 | + if (unlikely(!grp || EXT4_MB_GRP_BBITMAP_CORRUPT(grp))) |
|---|
| 2113 | 2108 | return false; |
|---|
| 2114 | 2109 | |
|---|
| 2115 | 2110 | free = grp->bb_free; |
|---|
| .. | .. |
|---|
| 2172 | 2167 | ext4_grpblk_t free; |
|---|
| 2173 | 2168 | int ret = 0; |
|---|
| 2174 | 2169 | |
|---|
| 2170 | + if (!grp) |
|---|
| 2171 | + return -EFSCORRUPTED; |
|---|
| 2172 | + if (sbi->s_mb_stats) |
|---|
| 2173 | + atomic64_inc(&sbi->s_bal_cX_groups_considered[ac->ac_criteria]); |
|---|
| 2175 | 2174 | if (should_lock) |
|---|
| 2176 | 2175 | ext4_lock_group(sb, group); |
|---|
| 2177 | 2176 | free = grp->bb_free; |
|---|
| .. | .. |
|---|
| 2242 | 2241 | * prefetch once, so we avoid getblk() call, which can |
|---|
| 2243 | 2242 | * be expensive. |
|---|
| 2244 | 2243 | */ |
|---|
| 2245 | | - if (!EXT4_MB_GRP_TEST_AND_SET_READ(grp) && |
|---|
| 2244 | + if (gdp && grp && !EXT4_MB_GRP_TEST_AND_SET_READ(grp) && |
|---|
| 2246 | 2245 | EXT4_MB_GRP_NEED_INIT(grp) && |
|---|
| 2247 | 2246 | ext4_free_group_clusters(sb, gdp) > 0 && |
|---|
| 2248 | 2247 | !(ext4_has_group_desc_csum(sb) && |
|---|
| .. | .. |
|---|
| 2286 | 2285 | group--; |
|---|
| 2287 | 2286 | grp = ext4_get_group_info(sb, group); |
|---|
| 2288 | 2287 | |
|---|
| 2289 | | - if (EXT4_MB_GRP_NEED_INIT(grp) && |
|---|
| 2288 | + if (grp && gdp && EXT4_MB_GRP_NEED_INIT(grp) && |
|---|
| 2290 | 2289 | ext4_free_group_clusters(sb, gdp) > 0 && |
|---|
| 2291 | 2290 | !(ext4_has_group_desc_csum(sb) && |
|---|
| 2292 | 2291 | (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) { |
|---|
| .. | .. |
|---|
| 2446 | 2445 | if (ac->ac_status != AC_STATUS_CONTINUE) |
|---|
| 2447 | 2446 | break; |
|---|
| 2448 | 2447 | } |
|---|
| 2448 | + /* Processed all groups and haven't found blocks */ |
|---|
| 2449 | + if (sbi->s_mb_stats && i == ngroups) |
|---|
| 2450 | + atomic64_inc(&sbi->s_bal_cX_failed[cr]); |
|---|
| 2449 | 2451 | } |
|---|
| 2450 | 2452 | |
|---|
| 2451 | 2453 | if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND && |
|---|
| .. | .. |
|---|
| 2475 | 2477 | goto repeat; |
|---|
| 2476 | 2478 | } |
|---|
| 2477 | 2479 | } |
|---|
| 2480 | + |
|---|
| 2481 | + if (sbi->s_mb_stats && ac->ac_status == AC_STATUS_FOUND) |
|---|
| 2482 | + atomic64_inc(&sbi->s_bal_cX_hits[ac->ac_criteria]); |
|---|
| 2478 | 2483 | out: |
|---|
| 2479 | 2484 | if (!err && ac->ac_status != AC_STATUS_FOUND && first_err) |
|---|
| 2480 | 2485 | err = first_err; |
|---|
| .. | .. |
|---|
| 2538 | 2543 | sizeof(struct ext4_group_info); |
|---|
| 2539 | 2544 | |
|---|
| 2540 | 2545 | grinfo = ext4_get_group_info(sb, group); |
|---|
| 2546 | + if (!grinfo) |
|---|
| 2547 | + return 0; |
|---|
| 2541 | 2548 | /* Load the group info in memory only if not already loaded. */ |
|---|
| 2542 | 2549 | if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) { |
|---|
| 2543 | 2550 | err = ext4_mb_load_buddy(sb, group, &e4b); |
|---|
| .. | .. |
|---|
| 2548 | 2555 | buddy_loaded = 1; |
|---|
| 2549 | 2556 | } |
|---|
| 2550 | 2557 | |
|---|
| 2551 | | - memcpy(&sg, ext4_get_group_info(sb, group), i); |
|---|
| 2558 | + memcpy(&sg, grinfo, i); |
|---|
| 2552 | 2559 | |
|---|
| 2553 | 2560 | if (buddy_loaded) |
|---|
| 2554 | 2561 | ext4_mb_unload_buddy(&e4b); |
|---|
| .. | .. |
|---|
| 2573 | 2580 | .stop = ext4_mb_seq_groups_stop, |
|---|
| 2574 | 2581 | .show = ext4_mb_seq_groups_show, |
|---|
| 2575 | 2582 | }; |
|---|
| 2583 | + |
|---|
| 2584 | +int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset) |
|---|
| 2585 | +{ |
|---|
| 2586 | + struct super_block *sb = (struct super_block *)seq->private; |
|---|
| 2587 | + struct ext4_sb_info *sbi = EXT4_SB(sb); |
|---|
| 2588 | + |
|---|
| 2589 | + seq_puts(seq, "mballoc:\n"); |
|---|
| 2590 | + if (!sbi->s_mb_stats) { |
|---|
| 2591 | + seq_puts(seq, "\tmb stats collection turned off.\n"); |
|---|
| 2592 | + seq_puts(seq, "\tTo enable, please write \"1\" to sysfs file mb_stats.\n"); |
|---|
| 2593 | + return 0; |
|---|
| 2594 | + } |
|---|
| 2595 | + seq_printf(seq, "\treqs: %u\n", atomic_read(&sbi->s_bal_reqs)); |
|---|
| 2596 | + seq_printf(seq, "\tsuccess: %u\n", atomic_read(&sbi->s_bal_success)); |
|---|
| 2597 | + |
|---|
| 2598 | + seq_printf(seq, "\tgroups_scanned: %u\n", atomic_read(&sbi->s_bal_groups_scanned)); |
|---|
| 2599 | + |
|---|
| 2600 | + seq_puts(seq, "\tcr0_stats:\n"); |
|---|
| 2601 | + seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[0])); |
|---|
| 2602 | + seq_printf(seq, "\t\tgroups_considered: %llu\n", |
|---|
| 2603 | + atomic64_read(&sbi->s_bal_cX_groups_considered[0])); |
|---|
| 2604 | + seq_printf(seq, "\t\tuseless_loops: %llu\n", |
|---|
| 2605 | + atomic64_read(&sbi->s_bal_cX_failed[0])); |
|---|
| 2606 | + |
|---|
| 2607 | + seq_puts(seq, "\tcr1_stats:\n"); |
|---|
| 2608 | + seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[1])); |
|---|
| 2609 | + seq_printf(seq, "\t\tgroups_considered: %llu\n", |
|---|
| 2610 | + atomic64_read(&sbi->s_bal_cX_groups_considered[1])); |
|---|
| 2611 | + seq_printf(seq, "\t\tuseless_loops: %llu\n", |
|---|
| 2612 | + atomic64_read(&sbi->s_bal_cX_failed[1])); |
|---|
| 2613 | + |
|---|
| 2614 | + seq_puts(seq, "\tcr2_stats:\n"); |
|---|
| 2615 | + seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[2])); |
|---|
| 2616 | + seq_printf(seq, "\t\tgroups_considered: %llu\n", |
|---|
| 2617 | + atomic64_read(&sbi->s_bal_cX_groups_considered[2])); |
|---|
| 2618 | + seq_printf(seq, "\t\tuseless_loops: %llu\n", |
|---|
| 2619 | + atomic64_read(&sbi->s_bal_cX_failed[2])); |
|---|
| 2620 | + |
|---|
| 2621 | + seq_puts(seq, "\tcr3_stats:\n"); |
|---|
| 2622 | + seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[3])); |
|---|
| 2623 | + seq_printf(seq, "\t\tgroups_considered: %llu\n", |
|---|
| 2624 | + atomic64_read(&sbi->s_bal_cX_groups_considered[3])); |
|---|
| 2625 | + seq_printf(seq, "\t\tuseless_loops: %llu\n", |
|---|
| 2626 | + atomic64_read(&sbi->s_bal_cX_failed[3])); |
|---|
| 2627 | + seq_printf(seq, "\textents_scanned: %u\n", atomic_read(&sbi->s_bal_ex_scanned)); |
|---|
| 2628 | + seq_printf(seq, "\t\tgoal_hits: %u\n", atomic_read(&sbi->s_bal_goals)); |
|---|
| 2629 | + seq_printf(seq, "\t\t2^n_hits: %u\n", atomic_read(&sbi->s_bal_2orders)); |
|---|
| 2630 | + seq_printf(seq, "\t\tbreaks: %u\n", atomic_read(&sbi->s_bal_breaks)); |
|---|
| 2631 | + seq_printf(seq, "\t\tlost: %u\n", atomic_read(&sbi->s_mb_lost_chunks)); |
|---|
| 2632 | + |
|---|
| 2633 | + seq_printf(seq, "\tbuddies_generated: %u/%u\n", |
|---|
| 2634 | + atomic_read(&sbi->s_mb_buddies_generated), |
|---|
| 2635 | + ext4_get_groups_count(sb)); |
|---|
| 2636 | + seq_printf(seq, "\tbuddies_time_used: %llu\n", |
|---|
| 2637 | + atomic64_read(&sbi->s_mb_generation_time)); |
|---|
| 2638 | + seq_printf(seq, "\tpreallocated: %u\n", |
|---|
| 2639 | + atomic_read(&sbi->s_mb_preallocated)); |
|---|
| 2640 | + seq_printf(seq, "\tdiscarded: %u\n", |
|---|
| 2641 | + atomic_read(&sbi->s_mb_discarded)); |
|---|
| 2642 | + return 0; |
|---|
| 2643 | +} |
|---|
| 2576 | 2644 | |
|---|
| 2577 | 2645 | static struct kmem_cache *get_groupinfo_cache(int blocksize_bits) |
|---|
| 2578 | 2646 | { |
|---|
| .. | .. |
|---|
| 2764 | 2832 | |
|---|
| 2765 | 2833 | err_freebuddy: |
|---|
| 2766 | 2834 | cachep = get_groupinfo_cache(sb->s_blocksize_bits); |
|---|
| 2767 | | - while (i-- > 0) |
|---|
| 2768 | | - kmem_cache_free(cachep, ext4_get_group_info(sb, i)); |
|---|
| 2835 | + while (i-- > 0) { |
|---|
| 2836 | + struct ext4_group_info *grp = ext4_get_group_info(sb, i); |
|---|
| 2837 | + |
|---|
| 2838 | + if (grp) |
|---|
| 2839 | + kmem_cache_free(cachep, grp); |
|---|
| 2840 | + } |
|---|
| 2769 | 2841 | i = sbi->s_group_info_size; |
|---|
| 2770 | 2842 | rcu_read_lock(); |
|---|
| 2771 | 2843 | group_info = rcu_dereference(sbi->s_group_info); |
|---|
| .. | .. |
|---|
| 2874 | 2946 | } while (i <= sb->s_blocksize_bits + 1); |
|---|
| 2875 | 2947 | |
|---|
| 2876 | 2948 | spin_lock_init(&sbi->s_md_lock); |
|---|
| 2877 | | - spin_lock_init(&sbi->s_bal_lock); |
|---|
| 2878 | 2949 | sbi->s_mb_free_pending = 0; |
|---|
| 2879 | 2950 | INIT_LIST_HEAD(&sbi->s_freed_data_list); |
|---|
| 2880 | 2951 | |
|---|
| .. | .. |
|---|
| 2973 | 3044 | for (i = 0; i < ngroups; i++) { |
|---|
| 2974 | 3045 | cond_resched(); |
|---|
| 2975 | 3046 | grinfo = ext4_get_group_info(sb, i); |
|---|
| 3047 | + if (!grinfo) |
|---|
| 3048 | + continue; |
|---|
| 2976 | 3049 | mb_group_bb_bitmap_free(grinfo); |
|---|
| 2977 | 3050 | ext4_lock_group(sb, i); |
|---|
| 2978 | 3051 | count = ext4_mb_cleanup_pa(grinfo); |
|---|
| .. | .. |
|---|
| 3002 | 3075 | atomic_read(&sbi->s_bal_reqs), |
|---|
| 3003 | 3076 | atomic_read(&sbi->s_bal_success)); |
|---|
| 3004 | 3077 | ext4_msg(sb, KERN_INFO, |
|---|
| 3005 | | - "mballoc: %u extents scanned, %u goal hits, " |
|---|
| 3078 | + "mballoc: %u extents scanned, %u groups scanned, %u goal hits, " |
|---|
| 3006 | 3079 | "%u 2^N hits, %u breaks, %u lost", |
|---|
| 3007 | 3080 | atomic_read(&sbi->s_bal_ex_scanned), |
|---|
| 3081 | + atomic_read(&sbi->s_bal_groups_scanned), |
|---|
| 3008 | 3082 | atomic_read(&sbi->s_bal_goals), |
|---|
| 3009 | 3083 | atomic_read(&sbi->s_bal_2orders), |
|---|
| 3010 | 3084 | atomic_read(&sbi->s_bal_breaks), |
|---|
| 3011 | 3085 | atomic_read(&sbi->s_mb_lost_chunks)); |
|---|
| 3012 | 3086 | ext4_msg(sb, KERN_INFO, |
|---|
| 3013 | | - "mballoc: %lu generated and it took %Lu", |
|---|
| 3014 | | - sbi->s_mb_buddies_generated, |
|---|
| 3015 | | - sbi->s_mb_generation_time); |
|---|
| 3087 | + "mballoc: %u generated and it took %llu", |
|---|
| 3088 | + atomic_read(&sbi->s_mb_buddies_generated), |
|---|
| 3089 | + atomic64_read(&sbi->s_mb_generation_time)); |
|---|
| 3016 | 3090 | ext4_msg(sb, KERN_INFO, |
|---|
| 3017 | 3091 | "mballoc: %u preallocated, %u discarded", |
|---|
| 3018 | 3092 | atomic_read(&sbi->s_mb_preallocated), |
|---|
| .. | .. |
|---|
| 3439 | 3513 | struct ext4_allocation_request *ar) |
|---|
| 3440 | 3514 | { |
|---|
| 3441 | 3515 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); |
|---|
| 3516 | + struct ext4_super_block *es = sbi->s_es; |
|---|
| 3442 | 3517 | int bsbits, max; |
|---|
| 3443 | 3518 | ext4_lblk_t end; |
|---|
| 3444 | 3519 | loff_t size, start_off; |
|---|
| .. | .. |
|---|
| 3619 | 3694 | ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size); |
|---|
| 3620 | 3695 | |
|---|
| 3621 | 3696 | /* define goal start in order to merge */ |
|---|
| 3622 | | - if (ar->pright && (ar->lright == (start + size))) { |
|---|
| 3697 | + if (ar->pright && (ar->lright == (start + size)) && |
|---|
| 3698 | + ar->pright >= size && |
|---|
| 3699 | + ar->pright - size >= le32_to_cpu(es->s_first_data_block)) { |
|---|
| 3623 | 3700 | /* merge to the right */ |
|---|
| 3624 | 3701 | ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size, |
|---|
| 3625 | | - &ac->ac_f_ex.fe_group, |
|---|
| 3626 | | - &ac->ac_f_ex.fe_start); |
|---|
| 3702 | + &ac->ac_g_ex.fe_group, |
|---|
| 3703 | + &ac->ac_g_ex.fe_start); |
|---|
| 3627 | 3704 | ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL; |
|---|
| 3628 | 3705 | } |
|---|
| 3629 | | - if (ar->pleft && (ar->lleft + 1 == start)) { |
|---|
| 3706 | + if (ar->pleft && (ar->lleft + 1 == start) && |
|---|
| 3707 | + ar->pleft + 1 < ext4_blocks_count(es)) { |
|---|
| 3630 | 3708 | /* merge to the left */ |
|---|
| 3631 | 3709 | ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1, |
|---|
| 3632 | | - &ac->ac_f_ex.fe_group, |
|---|
| 3633 | | - &ac->ac_f_ex.fe_start); |
|---|
| 3710 | + &ac->ac_g_ex.fe_group, |
|---|
| 3711 | + &ac->ac_g_ex.fe_start); |
|---|
| 3634 | 3712 | ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL; |
|---|
| 3635 | 3713 | } |
|---|
| 3636 | 3714 | |
|---|
| .. | .. |
|---|
| 3642 | 3720 | { |
|---|
| 3643 | 3721 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); |
|---|
| 3644 | 3722 | |
|---|
| 3645 | | - if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) { |
|---|
| 3723 | + if (sbi->s_mb_stats && ac->ac_g_ex.fe_len >= 1) { |
|---|
| 3646 | 3724 | atomic_inc(&sbi->s_bal_reqs); |
|---|
| 3647 | 3725 | atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated); |
|---|
| 3648 | 3726 | if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len) |
|---|
| 3649 | 3727 | atomic_inc(&sbi->s_bal_success); |
|---|
| 3650 | 3728 | atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned); |
|---|
| 3729 | + atomic_add(ac->ac_groups_scanned, &sbi->s_bal_groups_scanned); |
|---|
| 3651 | 3730 | if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start && |
|---|
| 3652 | 3731 | ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group) |
|---|
| 3653 | 3732 | atomic_inc(&sbi->s_bal_goals); |
|---|
| .. | .. |
|---|
| 3722 | 3801 | BUG_ON(start < pa->pa_pstart); |
|---|
| 3723 | 3802 | BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len)); |
|---|
| 3724 | 3803 | BUG_ON(pa->pa_free < len); |
|---|
| 3804 | + BUG_ON(ac->ac_b_ex.fe_len <= 0); |
|---|
| 3725 | 3805 | pa->pa_free -= len; |
|---|
| 3726 | 3806 | |
|---|
| 3727 | 3807 | mb_debug(ac->ac_sb, "use %llu/%d from inode pa %p\n", start, len, pa); |
|---|
| .. | .. |
|---|
| 3884 | 3964 | struct ext4_free_data *entry; |
|---|
| 3885 | 3965 | |
|---|
| 3886 | 3966 | grp = ext4_get_group_info(sb, group); |
|---|
| 3967 | + if (!grp) |
|---|
| 3968 | + return; |
|---|
| 3887 | 3969 | n = rb_first(&(grp->bb_free_root)); |
|---|
| 3888 | 3970 | |
|---|
| 3889 | 3971 | while (n) { |
|---|
| .. | .. |
|---|
| 3910 | 3992 | ext4_grpblk_t start; |
|---|
| 3911 | 3993 | int preallocated = 0; |
|---|
| 3912 | 3994 | int len; |
|---|
| 3995 | + |
|---|
| 3996 | + if (!grp) |
|---|
| 3997 | + return; |
|---|
| 3913 | 3998 | |
|---|
| 3914 | 3999 | /* all form of preallocation discards first load group, |
|---|
| 3915 | 4000 | * so the only competing code is preallocation use. |
|---|
| .. | .. |
|---|
| 4046 | 4131 | pa = ac->ac_pa; |
|---|
| 4047 | 4132 | |
|---|
| 4048 | 4133 | if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) { |
|---|
| 4049 | | - int winl; |
|---|
| 4050 | | - int wins; |
|---|
| 4051 | | - int win; |
|---|
| 4052 | | - int offs; |
|---|
| 4134 | + int new_bex_start; |
|---|
| 4135 | + int new_bex_end; |
|---|
| 4053 | 4136 | |
|---|
| 4054 | 4137 | /* we can't allocate as much as normalizer wants. |
|---|
| 4055 | 4138 | * so, found space must get proper lstart |
|---|
| .. | .. |
|---|
| 4057 | 4140 | BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical); |
|---|
| 4058 | 4141 | BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len); |
|---|
| 4059 | 4142 | |
|---|
| 4060 | | - /* we're limited by original request in that |
|---|
| 4061 | | - * logical block must be covered any way |
|---|
| 4062 | | - * winl is window we can move our chunk within */ |
|---|
| 4063 | | - winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical; |
|---|
| 4143 | + /* |
|---|
| 4144 | + * Use the below logic for adjusting best extent as it keeps |
|---|
| 4145 | + * fragmentation in check while ensuring logical range of best |
|---|
| 4146 | + * extent doesn't overflow out of goal extent: |
|---|
| 4147 | + * |
|---|
| 4148 | + * 1. Check if best ex can be kept at end of goal and still |
|---|
| 4149 | + * cover original start |
|---|
| 4150 | + * 2. Else, check if best ex can be kept at start of goal and |
|---|
| 4151 | + * still cover original start |
|---|
| 4152 | + * 3. Else, keep the best ex at start of original request. |
|---|
| 4153 | + */ |
|---|
| 4154 | + new_bex_end = ac->ac_g_ex.fe_logical + |
|---|
| 4155 | + EXT4_C2B(sbi, ac->ac_g_ex.fe_len); |
|---|
| 4156 | + new_bex_start = new_bex_end - EXT4_C2B(sbi, ac->ac_b_ex.fe_len); |
|---|
| 4157 | + if (ac->ac_o_ex.fe_logical >= new_bex_start) |
|---|
| 4158 | + goto adjust_bex; |
|---|
| 4064 | 4159 | |
|---|
| 4065 | | - /* also, we should cover whole original request */ |
|---|
| 4066 | | - wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len); |
|---|
| 4160 | + new_bex_start = ac->ac_g_ex.fe_logical; |
|---|
| 4161 | + new_bex_end = |
|---|
| 4162 | + new_bex_start + EXT4_C2B(sbi, ac->ac_b_ex.fe_len); |
|---|
| 4163 | + if (ac->ac_o_ex.fe_logical < new_bex_end) |
|---|
| 4164 | + goto adjust_bex; |
|---|
| 4067 | 4165 | |
|---|
| 4068 | | - /* the smallest one defines real window */ |
|---|
| 4069 | | - win = min(winl, wins); |
|---|
| 4166 | + new_bex_start = ac->ac_o_ex.fe_logical; |
|---|
| 4167 | + new_bex_end = |
|---|
| 4168 | + new_bex_start + EXT4_C2B(sbi, ac->ac_b_ex.fe_len); |
|---|
| 4070 | 4169 | |
|---|
| 4071 | | - offs = ac->ac_o_ex.fe_logical % |
|---|
| 4072 | | - EXT4_C2B(sbi, ac->ac_b_ex.fe_len); |
|---|
| 4073 | | - if (offs && offs < win) |
|---|
| 4074 | | - win = offs; |
|---|
| 4170 | +adjust_bex: |
|---|
| 4171 | + ac->ac_b_ex.fe_logical = new_bex_start; |
|---|
| 4075 | 4172 | |
|---|
| 4076 | | - ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical - |
|---|
| 4077 | | - EXT4_NUM_B2C(sbi, win); |
|---|
| 4078 | 4173 | BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical); |
|---|
| 4079 | 4174 | BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len); |
|---|
| 4175 | + BUG_ON(new_bex_end > (ac->ac_g_ex.fe_logical + |
|---|
| 4176 | + EXT4_C2B(sbi, ac->ac_g_ex.fe_len))); |
|---|
| 4080 | 4177 | } |
|---|
| 4081 | 4178 | |
|---|
| 4082 | 4179 | /* preallocation can change ac_b_ex, thus we store actually |
|---|
| .. | .. |
|---|
| 4102 | 4199 | |
|---|
| 4103 | 4200 | ei = EXT4_I(ac->ac_inode); |
|---|
| 4104 | 4201 | grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group); |
|---|
| 4202 | + if (!grp) |
|---|
| 4203 | + return; |
|---|
| 4105 | 4204 | |
|---|
| 4106 | 4205 | pa->pa_obj_lock = &ei->i_prealloc_lock; |
|---|
| 4107 | 4206 | pa->pa_inode = ac->ac_inode; |
|---|
| .. | .. |
|---|
| 4155 | 4254 | atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); |
|---|
| 4156 | 4255 | |
|---|
| 4157 | 4256 | grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group); |
|---|
| 4257 | + if (!grp) |
|---|
| 4258 | + return; |
|---|
| 4158 | 4259 | lg = ac->ac_lg; |
|---|
| 4159 | 4260 | BUG_ON(lg == NULL); |
|---|
| 4160 | 4261 | |
|---|
| .. | .. |
|---|
| 4250 | 4351 | trace_ext4_mb_release_group_pa(sb, pa); |
|---|
| 4251 | 4352 | BUG_ON(pa->pa_deleted == 0); |
|---|
| 4252 | 4353 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); |
|---|
| 4253 | | - BUG_ON(group != e4b->bd_group && pa->pa_len != 0); |
|---|
| 4354 | + if (unlikely(group != e4b->bd_group && pa->pa_len != 0)) { |
|---|
| 4355 | + ext4_warning(sb, "bad group: expected %u, group %u, pa_start %llu", |
|---|
| 4356 | + e4b->bd_group, group, pa->pa_pstart); |
|---|
| 4357 | + return 0; |
|---|
| 4358 | + } |
|---|
| 4254 | 4359 | mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len); |
|---|
| 4255 | 4360 | atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded); |
|---|
| 4256 | 4361 | trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len); |
|---|
| .. | .. |
|---|
| 4279 | 4384 | int err; |
|---|
| 4280 | 4385 | int free = 0; |
|---|
| 4281 | 4386 | |
|---|
| 4387 | + if (!grp) |
|---|
| 4388 | + return 0; |
|---|
| 4282 | 4389 | mb_debug(sb, "discard preallocation for group %u\n", group); |
|---|
| 4283 | 4390 | if (list_empty(&grp->bb_prealloc_list)) |
|---|
| 4284 | 4391 | goto out_dbg; |
|---|
| .. | .. |
|---|
| 4516 | 4623 | struct ext4_prealloc_space *pa; |
|---|
| 4517 | 4624 | ext4_grpblk_t start; |
|---|
| 4518 | 4625 | struct list_head *cur; |
|---|
| 4626 | + |
|---|
| 4627 | + if (!grp) |
|---|
| 4628 | + continue; |
|---|
| 4519 | 4629 | ext4_lock_group(sb, i); |
|---|
| 4520 | 4630 | list_for_each(cur, &grp->bb_prealloc_list) { |
|---|
| 4521 | 4631 | pa = list_entry(cur, struct ext4_prealloc_space, |
|---|
| .. | .. |
|---|
| 5303 | 5413 | } |
|---|
| 5304 | 5414 | |
|---|
| 5305 | 5415 | /** |
|---|
| 5306 | | - * ext4_free_blocks() -- Free given blocks and update quota |
|---|
| 5416 | + * ext4_mb_clear_bb() -- helper function for freeing blocks. |
|---|
| 5417 | + * Used by ext4_free_blocks() |
|---|
| 5307 | 5418 | * @handle: handle for this transaction |
|---|
| 5308 | 5419 | * @inode: inode |
|---|
| 5309 | 5420 | * @bh: optional buffer of the block to be freed |
|---|
| .. | .. |
|---|
| 5311 | 5422 | * @count: number of blocks to be freed |
|---|
| 5312 | 5423 | * @flags: flags used by ext4_free_blocks |
|---|
| 5313 | 5424 | */ |
|---|
| 5314 | | -void ext4_free_blocks(handle_t *handle, struct inode *inode, |
|---|
| 5315 | | - struct buffer_head *bh, ext4_fsblk_t block, |
|---|
| 5316 | | - unsigned long count, int flags) |
|---|
| 5425 | +static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode, |
|---|
| 5426 | + ext4_fsblk_t block, unsigned long count, |
|---|
| 5427 | + int flags) |
|---|
| 5317 | 5428 | { |
|---|
| 5318 | 5429 | struct buffer_head *bitmap_bh = NULL; |
|---|
| 5319 | 5430 | struct super_block *sb = inode->i_sb; |
|---|
| 5320 | 5431 | struct ext4_group_desc *gdp; |
|---|
| 5432 | + struct ext4_group_info *grp; |
|---|
| 5321 | 5433 | unsigned int overflow; |
|---|
| 5322 | 5434 | ext4_grpblk_t bit; |
|---|
| 5323 | 5435 | struct buffer_head *gd_bh; |
|---|
| .. | .. |
|---|
| 5330 | 5442 | |
|---|
| 5331 | 5443 | sbi = EXT4_SB(sb); |
|---|
| 5332 | 5444 | |
|---|
| 5333 | | - if (sbi->s_mount_state & EXT4_FC_REPLAY) { |
|---|
| 5334 | | - ext4_free_blocks_simple(inode, block, count); |
|---|
| 5335 | | - return; |
|---|
| 5336 | | - } |
|---|
| 5337 | | - |
|---|
| 5338 | | - might_sleep(); |
|---|
| 5339 | | - if (bh) { |
|---|
| 5340 | | - if (block) |
|---|
| 5341 | | - BUG_ON(block != bh->b_blocknr); |
|---|
| 5342 | | - else |
|---|
| 5343 | | - block = bh->b_blocknr; |
|---|
| 5344 | | - } |
|---|
| 5345 | | - |
|---|
| 5346 | 5445 | if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && |
|---|
| 5347 | 5446 | !ext4_inode_block_valid(inode, block, count)) { |
|---|
| 5348 | | - ext4_error(sb, "Freeing blocks not in datazone - " |
|---|
| 5349 | | - "block = %llu, count = %lu", block, count); |
|---|
| 5447 | + ext4_error(sb, "Freeing blocks in system zone - " |
|---|
| 5448 | + "Block = %llu, count = %lu", block, count); |
|---|
| 5449 | + /* err = 0. ext4_std_error should be a no op */ |
|---|
| 5350 | 5450 | goto error_return; |
|---|
| 5351 | 5451 | } |
|---|
| 5352 | | - |
|---|
| 5353 | | - ext4_debug("freeing block %llu\n", block); |
|---|
| 5354 | | - trace_ext4_free_blocks(inode, block, count, flags); |
|---|
| 5355 | | - |
|---|
| 5356 | | - if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { |
|---|
| 5357 | | - BUG_ON(count > 1); |
|---|
| 5358 | | - |
|---|
| 5359 | | - ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, |
|---|
| 5360 | | - inode, bh, block); |
|---|
| 5361 | | - } |
|---|
| 5362 | | - |
|---|
| 5363 | | - /* |
|---|
| 5364 | | - * If the extent to be freed does not begin on a cluster |
|---|
| 5365 | | - * boundary, we need to deal with partial clusters at the |
|---|
| 5366 | | - * beginning and end of the extent. Normally we will free |
|---|
| 5367 | | - * blocks at the beginning or the end unless we are explicitly |
|---|
| 5368 | | - * requested to avoid doing so. |
|---|
| 5369 | | - */ |
|---|
| 5370 | | - overflow = EXT4_PBLK_COFF(sbi, block); |
|---|
| 5371 | | - if (overflow) { |
|---|
| 5372 | | - if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) { |
|---|
| 5373 | | - overflow = sbi->s_cluster_ratio - overflow; |
|---|
| 5374 | | - block += overflow; |
|---|
| 5375 | | - if (count > overflow) |
|---|
| 5376 | | - count -= overflow; |
|---|
| 5377 | | - else |
|---|
| 5378 | | - return; |
|---|
| 5379 | | - } else { |
|---|
| 5380 | | - block -= overflow; |
|---|
| 5381 | | - count += overflow; |
|---|
| 5382 | | - } |
|---|
| 5383 | | - } |
|---|
| 5384 | | - overflow = EXT4_LBLK_COFF(sbi, count); |
|---|
| 5385 | | - if (overflow) { |
|---|
| 5386 | | - if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) { |
|---|
| 5387 | | - if (count > overflow) |
|---|
| 5388 | | - count -= overflow; |
|---|
| 5389 | | - else |
|---|
| 5390 | | - return; |
|---|
| 5391 | | - } else |
|---|
| 5392 | | - count += sbi->s_cluster_ratio - overflow; |
|---|
| 5393 | | - } |
|---|
| 5394 | | - |
|---|
| 5395 | | - if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { |
|---|
| 5396 | | - int i; |
|---|
| 5397 | | - int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA; |
|---|
| 5398 | | - |
|---|
| 5399 | | - for (i = 0; i < count; i++) { |
|---|
| 5400 | | - cond_resched(); |
|---|
| 5401 | | - if (is_metadata) |
|---|
| 5402 | | - bh = sb_find_get_block(inode->i_sb, block + i); |
|---|
| 5403 | | - ext4_forget(handle, is_metadata, inode, bh, block + i); |
|---|
| 5404 | | - } |
|---|
| 5405 | | - } |
|---|
| 5452 | + flags |= EXT4_FREE_BLOCKS_VALIDATED; |
|---|
| 5406 | 5453 | |
|---|
| 5407 | 5454 | do_more: |
|---|
| 5408 | 5455 | overflow = 0; |
|---|
| 5409 | 5456 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); |
|---|
| 5410 | 5457 | |
|---|
| 5411 | | - if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT( |
|---|
| 5412 | | - ext4_get_group_info(sb, block_group)))) |
|---|
| 5458 | + grp = ext4_get_group_info(sb, block_group); |
|---|
| 5459 | + if (unlikely(!grp || EXT4_MB_GRP_BBITMAP_CORRUPT(grp))) |
|---|
| 5413 | 5460 | return; |
|---|
| 5414 | 5461 | |
|---|
| 5415 | 5462 | /* |
|---|
| .. | .. |
|---|
| 5420 | 5467 | overflow = EXT4_C2B(sbi, bit) + count - |
|---|
| 5421 | 5468 | EXT4_BLOCKS_PER_GROUP(sb); |
|---|
| 5422 | 5469 | count -= overflow; |
|---|
| 5470 | + /* The range changed so it's no longer validated */ |
|---|
| 5471 | + flags &= ~EXT4_FREE_BLOCKS_VALIDATED; |
|---|
| 5423 | 5472 | } |
|---|
| 5424 | 5473 | count_clusters = EXT4_NUM_B2C(sbi, count); |
|---|
| 5425 | 5474 | bitmap_bh = ext4_read_block_bitmap(sb, block_group); |
|---|
| .. | .. |
|---|
| 5434 | 5483 | goto error_return; |
|---|
| 5435 | 5484 | } |
|---|
| 5436 | 5485 | |
|---|
| 5437 | | - if (in_range(ext4_block_bitmap(sb, gdp), block, count) || |
|---|
| 5438 | | - in_range(ext4_inode_bitmap(sb, gdp), block, count) || |
|---|
| 5439 | | - in_range(block, ext4_inode_table(sb, gdp), |
|---|
| 5440 | | - sbi->s_itb_per_group) || |
|---|
| 5441 | | - in_range(block + count - 1, ext4_inode_table(sb, gdp), |
|---|
| 5442 | | - sbi->s_itb_per_group)) { |
|---|
| 5443 | | - |
|---|
| 5486 | + if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && |
|---|
| 5487 | + !ext4_inode_block_valid(inode, block, count)) { |
|---|
| 5444 | 5488 | ext4_error(sb, "Freeing blocks in system zone - " |
|---|
| 5445 | 5489 | "Block = %llu, count = %lu", block, count); |
|---|
| 5446 | 5490 | /* err = 0. ext4_std_error should be a no op */ |
|---|
| .. | .. |
|---|
| 5506 | 5550 | * them with group lock_held |
|---|
| 5507 | 5551 | */ |
|---|
| 5508 | 5552 | if (test_opt(sb, DISCARD)) { |
|---|
| 5509 | | - err = ext4_issue_discard(sb, block_group, bit, count, |
|---|
| 5510 | | - NULL); |
|---|
| 5553 | + err = ext4_issue_discard(sb, block_group, bit, |
|---|
| 5554 | + count_clusters, NULL); |
|---|
| 5511 | 5555 | if (err && err != -EOPNOTSUPP) |
|---|
| 5512 | 5556 | ext4_msg(sb, KERN_WARNING, "discard request in" |
|---|
| 5513 | | - " group:%d block:%d count:%lu failed" |
|---|
| 5557 | + " group:%u block:%d count:%lu failed" |
|---|
| 5514 | 5558 | " with %d", block_group, bit, count, |
|---|
| 5515 | 5559 | err); |
|---|
| 5516 | 5560 | } else |
|---|
| .. | .. |
|---|
| 5562 | 5606 | block += count; |
|---|
| 5563 | 5607 | count = overflow; |
|---|
| 5564 | 5608 | put_bh(bitmap_bh); |
|---|
| 5609 | + /* The range changed so it's no longer validated */ |
|---|
| 5610 | + flags &= ~EXT4_FREE_BLOCKS_VALIDATED; |
|---|
| 5565 | 5611 | goto do_more; |
|---|
| 5566 | 5612 | } |
|---|
| 5567 | 5613 | error_return: |
|---|
| 5568 | 5614 | brelse(bitmap_bh); |
|---|
| 5569 | 5615 | ext4_std_error(sb, err); |
|---|
| 5616 | + return; |
|---|
| 5617 | +} |
|---|
| 5618 | + |
|---|
| 5619 | +/** |
|---|
| 5620 | + * ext4_free_blocks() -- Free given blocks and update quota |
|---|
| 5621 | + * @handle: handle for this transaction |
|---|
| 5622 | + * @inode: inode |
|---|
| 5623 | + * @bh: optional buffer of the block to be freed |
|---|
| 5624 | + * @block: starting physical block to be freed |
|---|
| 5625 | + * @count: number of blocks to be freed |
|---|
| 5626 | + * @flags: flags used by ext4_free_blocks |
|---|
| 5627 | + */ |
|---|
| 5628 | +void ext4_free_blocks(handle_t *handle, struct inode *inode, |
|---|
| 5629 | + struct buffer_head *bh, ext4_fsblk_t block, |
|---|
| 5630 | + unsigned long count, int flags) |
|---|
| 5631 | +{ |
|---|
| 5632 | + struct super_block *sb = inode->i_sb; |
|---|
| 5633 | + unsigned int overflow; |
|---|
| 5634 | + struct ext4_sb_info *sbi; |
|---|
| 5635 | + |
|---|
| 5636 | + sbi = EXT4_SB(sb); |
|---|
| 5637 | + |
|---|
| 5638 | + if (bh) { |
|---|
| 5639 | + if (block) |
|---|
| 5640 | + BUG_ON(block != bh->b_blocknr); |
|---|
| 5641 | + else |
|---|
| 5642 | + block = bh->b_blocknr; |
|---|
| 5643 | + } |
|---|
| 5644 | + |
|---|
| 5645 | + if (sbi->s_mount_state & EXT4_FC_REPLAY) { |
|---|
| 5646 | + ext4_free_blocks_simple(inode, block, EXT4_NUM_B2C(sbi, count)); |
|---|
| 5647 | + return; |
|---|
| 5648 | + } |
|---|
| 5649 | + |
|---|
| 5650 | + might_sleep(); |
|---|
| 5651 | + |
|---|
| 5652 | + if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && |
|---|
| 5653 | + !ext4_inode_block_valid(inode, block, count)) { |
|---|
| 5654 | + ext4_error(sb, "Freeing blocks not in datazone - " |
|---|
| 5655 | + "block = %llu, count = %lu", block, count); |
|---|
| 5656 | + return; |
|---|
| 5657 | + } |
|---|
| 5658 | + flags |= EXT4_FREE_BLOCKS_VALIDATED; |
|---|
| 5659 | + |
|---|
| 5660 | + ext4_debug("freeing block %llu\n", block); |
|---|
| 5661 | + trace_ext4_free_blocks(inode, block, count, flags); |
|---|
| 5662 | + |
|---|
| 5663 | + if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { |
|---|
| 5664 | + BUG_ON(count > 1); |
|---|
| 5665 | + |
|---|
| 5666 | + ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, |
|---|
| 5667 | + inode, bh, block); |
|---|
| 5668 | + } |
|---|
| 5669 | + |
|---|
| 5670 | + /* |
|---|
| 5671 | + * If the extent to be freed does not begin on a cluster |
|---|
| 5672 | + * boundary, we need to deal with partial clusters at the |
|---|
| 5673 | + * beginning and end of the extent. Normally we will free |
|---|
| 5674 | + * blocks at the beginning or the end unless we are explicitly |
|---|
| 5675 | + * requested to avoid doing so. |
|---|
| 5676 | + */ |
|---|
| 5677 | + overflow = EXT4_PBLK_COFF(sbi, block); |
|---|
| 5678 | + if (overflow) { |
|---|
| 5679 | + if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) { |
|---|
| 5680 | + overflow = sbi->s_cluster_ratio - overflow; |
|---|
| 5681 | + block += overflow; |
|---|
| 5682 | + if (count > overflow) |
|---|
| 5683 | + count -= overflow; |
|---|
| 5684 | + else |
|---|
| 5685 | + return; |
|---|
| 5686 | + } else { |
|---|
| 5687 | + block -= overflow; |
|---|
| 5688 | + count += overflow; |
|---|
| 5689 | + } |
|---|
| 5690 | + /* The range changed so it's no longer validated */ |
|---|
| 5691 | + flags &= ~EXT4_FREE_BLOCKS_VALIDATED; |
|---|
| 5692 | + } |
|---|
| 5693 | + overflow = EXT4_LBLK_COFF(sbi, count); |
|---|
| 5694 | + if (overflow) { |
|---|
| 5695 | + if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) { |
|---|
| 5696 | + if (count > overflow) |
|---|
| 5697 | + count -= overflow; |
|---|
| 5698 | + else |
|---|
| 5699 | + return; |
|---|
| 5700 | + } else |
|---|
| 5701 | + count += sbi->s_cluster_ratio - overflow; |
|---|
| 5702 | + /* The range changed so it's no longer validated */ |
|---|
| 5703 | + flags &= ~EXT4_FREE_BLOCKS_VALIDATED; |
|---|
| 5704 | + } |
|---|
| 5705 | + |
|---|
| 5706 | + if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { |
|---|
| 5707 | + int i; |
|---|
| 5708 | + int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA; |
|---|
| 5709 | + |
|---|
| 5710 | + for (i = 0; i < count; i++) { |
|---|
| 5711 | + cond_resched(); |
|---|
| 5712 | + if (is_metadata) |
|---|
| 5713 | + bh = sb_find_get_block(inode->i_sb, block + i); |
|---|
| 5714 | + ext4_forget(handle, is_metadata, inode, bh, block + i); |
|---|
| 5715 | + } |
|---|
| 5716 | + } |
|---|
| 5717 | + |
|---|
| 5718 | + ext4_mb_clear_bb(handle, inode, block, count, flags); |
|---|
| 5570 | 5719 | return; |
|---|
| 5571 | 5720 | } |
|---|
| 5572 | 5721 | |
|---|
| .. | .. |
|---|
| 5626 | 5775 | goto error_return; |
|---|
| 5627 | 5776 | } |
|---|
| 5628 | 5777 | |
|---|
| 5629 | | - if (in_range(ext4_block_bitmap(sb, desc), block, count) || |
|---|
| 5630 | | - in_range(ext4_inode_bitmap(sb, desc), block, count) || |
|---|
| 5631 | | - in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || |
|---|
| 5632 | | - in_range(block + count - 1, ext4_inode_table(sb, desc), |
|---|
| 5633 | | - sbi->s_itb_per_group)) { |
|---|
| 5778 | + if (!ext4_sb_block_valid(sb, NULL, block, count)) { |
|---|
| 5634 | 5779 | ext4_error(sb, "Adding blocks in system zones - " |
|---|
| 5635 | 5780 | "Block = %llu, count = %lu", |
|---|
| 5636 | 5781 | block, count); |
|---|
| .. | .. |
|---|
| 5715 | 5860 | * @sb: super block for the file system |
|---|
| 5716 | 5861 | * @start: starting block of the free extent in the alloc. group |
|---|
| 5717 | 5862 | * @count: number of blocks to TRIM |
|---|
| 5718 | | - * @group: alloc. group we are working with |
|---|
| 5719 | 5863 | * @e4b: ext4 buddy for the group |
|---|
| 5720 | 5864 | * |
|---|
| 5721 | 5865 | * Trim "count" blocks starting at "start" in the "group". To assure that no |
|---|
| 5722 | 5866 | * one will allocate those blocks, mark it as used in buddy bitmap. This must |
|---|
| 5723 | 5867 | * be called with under the group lock. |
|---|
| 5724 | 5868 | */ |
|---|
| 5725 | | -static int ext4_trim_extent(struct super_block *sb, int start, int count, |
|---|
| 5726 | | - ext4_group_t group, struct ext4_buddy *e4b) |
|---|
| 5869 | +static int ext4_trim_extent(struct super_block *sb, |
|---|
| 5870 | + int start, int count, struct ext4_buddy *e4b) |
|---|
| 5727 | 5871 | __releases(bitlock) |
|---|
| 5728 | 5872 | __acquires(bitlock) |
|---|
| 5729 | 5873 | { |
|---|
| 5730 | 5874 | struct ext4_free_extent ex; |
|---|
| 5875 | + ext4_group_t group = e4b->bd_group; |
|---|
| 5731 | 5876 | int ret = 0; |
|---|
| 5732 | 5877 | |
|---|
| 5733 | 5878 | trace_ext4_trim_extent(sb, group, start, count); |
|---|
| .. | .. |
|---|
| 5748 | 5893 | ext4_lock_group(sb, group); |
|---|
| 5749 | 5894 | mb_free_blocks(NULL, e4b, start, ex.fe_len); |
|---|
| 5750 | 5895 | return ret; |
|---|
| 5896 | +} |
|---|
| 5897 | + |
|---|
| 5898 | +static ext4_grpblk_t ext4_last_grp_cluster(struct super_block *sb, |
|---|
| 5899 | + ext4_group_t grp) |
|---|
| 5900 | +{ |
|---|
| 5901 | + if (grp < ext4_get_groups_count(sb)) |
|---|
| 5902 | + return EXT4_CLUSTERS_PER_GROUP(sb) - 1; |
|---|
| 5903 | + return (ext4_blocks_count(EXT4_SB(sb)->s_es) - |
|---|
| 5904 | + ext4_group_first_block_no(sb, grp) - 1) >> |
|---|
| 5905 | + EXT4_CLUSTER_BITS(sb); |
|---|
| 5906 | +} |
|---|
| 5907 | + |
|---|
| 5908 | +static bool ext4_trim_interrupted(void) |
|---|
| 5909 | +{ |
|---|
| 5910 | + return fatal_signal_pending(current) || freezing(current); |
|---|
| 5911 | +} |
|---|
| 5912 | + |
|---|
| 5913 | +static int ext4_try_to_trim_range(struct super_block *sb, |
|---|
| 5914 | + struct ext4_buddy *e4b, ext4_grpblk_t start, |
|---|
| 5915 | + ext4_grpblk_t max, ext4_grpblk_t minblocks) |
|---|
| 5916 | +{ |
|---|
| 5917 | + ext4_grpblk_t next, count, free_count; |
|---|
| 5918 | + bool set_trimmed = false; |
|---|
| 5919 | + void *bitmap; |
|---|
| 5920 | + |
|---|
| 5921 | + bitmap = e4b->bd_bitmap; |
|---|
| 5922 | + if (start == 0 && max >= ext4_last_grp_cluster(sb, e4b->bd_group)) |
|---|
| 5923 | + set_trimmed = true; |
|---|
| 5924 | + start = max(e4b->bd_info->bb_first_free, start); |
|---|
| 5925 | + count = 0; |
|---|
| 5926 | + free_count = 0; |
|---|
| 5927 | + |
|---|
| 5928 | + while (start <= max) { |
|---|
| 5929 | + start = mb_find_next_zero_bit(bitmap, max + 1, start); |
|---|
| 5930 | + if (start > max) |
|---|
| 5931 | + break; |
|---|
| 5932 | + next = mb_find_next_bit(bitmap, max + 1, start); |
|---|
| 5933 | + |
|---|
| 5934 | + if ((next - start) >= minblocks) { |
|---|
| 5935 | + int ret = ext4_trim_extent(sb, start, next - start, e4b); |
|---|
| 5936 | + |
|---|
| 5937 | + if (ret && ret != -EOPNOTSUPP) |
|---|
| 5938 | + return count; |
|---|
| 5939 | + count += next - start; |
|---|
| 5940 | + } |
|---|
| 5941 | + free_count += next - start; |
|---|
| 5942 | + start = next + 1; |
|---|
| 5943 | + |
|---|
| 5944 | + if (ext4_trim_interrupted()) |
|---|
| 5945 | + return count; |
|---|
| 5946 | + |
|---|
| 5947 | + if (need_resched()) { |
|---|
| 5948 | + ext4_unlock_group(sb, e4b->bd_group); |
|---|
| 5949 | + cond_resched(); |
|---|
| 5950 | + ext4_lock_group(sb, e4b->bd_group); |
|---|
| 5951 | + } |
|---|
| 5952 | + |
|---|
| 5953 | + if ((e4b->bd_info->bb_free - free_count) < minblocks) |
|---|
| 5954 | + break; |
|---|
| 5955 | + } |
|---|
| 5956 | + |
|---|
| 5957 | + if (set_trimmed) |
|---|
| 5958 | + EXT4_MB_GRP_SET_TRIMMED(e4b->bd_info); |
|---|
| 5959 | + |
|---|
| 5960 | + return count; |
|---|
| 5751 | 5961 | } |
|---|
| 5752 | 5962 | |
|---|
| 5753 | 5963 | /** |
|---|
| .. | .. |
|---|
| 5773 | 5983 | ext4_grpblk_t start, ext4_grpblk_t max, |
|---|
| 5774 | 5984 | ext4_grpblk_t minblocks) |
|---|
| 5775 | 5985 | { |
|---|
| 5776 | | - void *bitmap; |
|---|
| 5777 | | - ext4_grpblk_t next, count = 0, free_count = 0; |
|---|
| 5778 | 5986 | struct ext4_buddy e4b; |
|---|
| 5779 | | - int ret = 0; |
|---|
| 5987 | + int ret; |
|---|
| 5780 | 5988 | |
|---|
| 5781 | 5989 | trace_ext4_trim_all_free(sb, group, start, max); |
|---|
| 5782 | 5990 | |
|---|
| .. | .. |
|---|
| 5786 | 5994 | ret, group); |
|---|
| 5787 | 5995 | return ret; |
|---|
| 5788 | 5996 | } |
|---|
| 5789 | | - bitmap = e4b.bd_bitmap; |
|---|
| 5790 | 5997 | |
|---|
| 5791 | 5998 | ext4_lock_group(sb, group); |
|---|
| 5792 | | - if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) && |
|---|
| 5793 | | - minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks)) |
|---|
| 5794 | | - goto out; |
|---|
| 5795 | 5999 | |
|---|
| 5796 | | - start = (e4b.bd_info->bb_first_free > start) ? |
|---|
| 5797 | | - e4b.bd_info->bb_first_free : start; |
|---|
| 6000 | + if (!EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) || |
|---|
| 6001 | + minblocks < EXT4_SB(sb)->s_last_trim_minblks) |
|---|
| 6002 | + ret = ext4_try_to_trim_range(sb, &e4b, start, max, minblocks); |
|---|
| 6003 | + else |
|---|
| 6004 | + ret = 0; |
|---|
| 5798 | 6005 | |
|---|
| 5799 | | - while (start <= max) { |
|---|
| 5800 | | - start = mb_find_next_zero_bit(bitmap, max + 1, start); |
|---|
| 5801 | | - if (start > max) |
|---|
| 5802 | | - break; |
|---|
| 5803 | | - next = mb_find_next_bit(bitmap, max + 1, start); |
|---|
| 5804 | | - |
|---|
| 5805 | | - if ((next - start) >= minblocks) { |
|---|
| 5806 | | - ret = ext4_trim_extent(sb, start, |
|---|
| 5807 | | - next - start, group, &e4b); |
|---|
| 5808 | | - if (ret && ret != -EOPNOTSUPP) |
|---|
| 5809 | | - break; |
|---|
| 5810 | | - ret = 0; |
|---|
| 5811 | | - count += next - start; |
|---|
| 5812 | | - } |
|---|
| 5813 | | - free_count += next - start; |
|---|
| 5814 | | - start = next + 1; |
|---|
| 5815 | | - |
|---|
| 5816 | | - if (fatal_signal_pending(current)) { |
|---|
| 5817 | | - count = -ERESTARTSYS; |
|---|
| 5818 | | - break; |
|---|
| 5819 | | - } |
|---|
| 5820 | | - |
|---|
| 5821 | | - if (need_resched()) { |
|---|
| 5822 | | - ext4_unlock_group(sb, group); |
|---|
| 5823 | | - cond_resched(); |
|---|
| 5824 | | - ext4_lock_group(sb, group); |
|---|
| 5825 | | - } |
|---|
| 5826 | | - |
|---|
| 5827 | | - if ((e4b.bd_info->bb_free - free_count) < minblocks) |
|---|
| 5828 | | - break; |
|---|
| 5829 | | - } |
|---|
| 5830 | | - |
|---|
| 5831 | | - if (!ret) { |
|---|
| 5832 | | - ret = count; |
|---|
| 5833 | | - EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info); |
|---|
| 5834 | | - } |
|---|
| 5835 | | -out: |
|---|
| 5836 | 6006 | ext4_unlock_group(sb, group); |
|---|
| 5837 | 6007 | ext4_mb_unload_buddy(&e4b); |
|---|
| 5838 | 6008 | |
|---|
| 5839 | 6009 | ext4_debug("trimmed %d blocks in the group %d\n", |
|---|
| 5840 | | - count, group); |
|---|
| 6010 | + ret, group); |
|---|
| 5841 | 6011 | |
|---|
| 5842 | 6012 | return ret; |
|---|
| 5843 | 6013 | } |
|---|
| .. | .. |
|---|
| 5882 | 6052 | if (minlen > EXT4_CLUSTERS_PER_GROUP(sb)) |
|---|
| 5883 | 6053 | goto out; |
|---|
| 5884 | 6054 | } |
|---|
| 5885 | | - if (end >= max_blks) |
|---|
| 6055 | + if (end >= max_blks - 1) |
|---|
| 5886 | 6056 | end = max_blks - 1; |
|---|
| 5887 | 6057 | if (end <= first_data_blk) |
|---|
| 5888 | 6058 | goto out; |
|---|
| .. | .. |
|---|
| 5899 | 6069 | end = EXT4_CLUSTERS_PER_GROUP(sb) - 1; |
|---|
| 5900 | 6070 | |
|---|
| 5901 | 6071 | for (group = first_group; group <= last_group; group++) { |
|---|
| 6072 | + if (ext4_trim_interrupted()) |
|---|
| 6073 | + break; |
|---|
| 5902 | 6074 | grp = ext4_get_group_info(sb, group); |
|---|
| 6075 | + if (!grp) |
|---|
| 6076 | + continue; |
|---|
| 5903 | 6077 | /* We only do this if the grp has never been initialized */ |
|---|
| 5904 | 6078 | if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { |
|---|
| 5905 | 6079 | ret = ext4_mb_init_group(sb, group, GFP_NOFS); |
|---|
| .. | .. |
|---|
| 5915 | 6089 | */ |
|---|
| 5916 | 6090 | if (group == last_group) |
|---|
| 5917 | 6091 | end = last_cluster; |
|---|
| 5918 | | - |
|---|
| 5919 | 6092 | if (grp->bb_free >= minlen) { |
|---|
| 5920 | 6093 | cnt = ext4_trim_all_free(sb, group, first_cluster, |
|---|
| 5921 | | - end, minlen); |
|---|
| 6094 | + end, minlen); |
|---|
| 5922 | 6095 | if (cnt < 0) { |
|---|
| 5923 | 6096 | ret = cnt; |
|---|
| 5924 | 6097 | break; |
|---|
| .. | .. |
|---|
| 5934 | 6107 | } |
|---|
| 5935 | 6108 | |
|---|
| 5936 | 6109 | if (!ret) |
|---|
| 5937 | | - atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen); |
|---|
| 6110 | + EXT4_SB(sb)->s_last_trim_minblks = minlen; |
|---|
| 5938 | 6111 | |
|---|
| 5939 | 6112 | out: |
|---|
| 5940 | 6113 | range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits; |
|---|
| .. | .. |
|---|
| 5963 | 6136 | |
|---|
| 5964 | 6137 | ext4_lock_group(sb, group); |
|---|
| 5965 | 6138 | |
|---|
| 5966 | | - start = (e4b.bd_info->bb_first_free > start) ? |
|---|
| 5967 | | - e4b.bd_info->bb_first_free : start; |
|---|
| 6139 | + start = max(e4b.bd_info->bb_first_free, start); |
|---|
| 5968 | 6140 | if (end >= EXT4_CLUSTERS_PER_GROUP(sb)) |
|---|
| 5969 | 6141 | end = EXT4_CLUSTERS_PER_GROUP(sb) - 1; |
|---|
| 5970 | 6142 | |
|---|