| .. | .. |
|---|
| 11 | 11 | #include <linux/slab.h> |
|---|
| 12 | 12 | #include <linux/workqueue.h> |
|---|
| 13 | 13 | #include <linux/btrfs.h> |
|---|
| 14 | | -#include <linux/sizes.h> |
|---|
| 14 | +#include <linux/sched/mm.h> |
|---|
| 15 | 15 | |
|---|
| 16 | 16 | #include "ctree.h" |
|---|
| 17 | 17 | #include "transaction.h" |
|---|
| .. | .. |
|---|
| 21 | 21 | #include "backref.h" |
|---|
| 22 | 22 | #include "extent_io.h" |
|---|
| 23 | 23 | #include "qgroup.h" |
|---|
| 24 | | - |
|---|
| 24 | +#include "block-group.h" |
|---|
| 25 | +#include "sysfs.h" |
|---|
| 25 | 26 | |
|---|
| 26 | 27 | /* TODO XXX FIXME |
|---|
| 27 | 28 | * - subvol delete -> delete when ref goes to 0? delete limits also? |
|---|
| .. | .. |
|---|
| 30 | 31 | * - sync |
|---|
| 31 | 32 | * - copy also limits on subvol creation |
|---|
| 32 | 33 | * - limit |
|---|
| 33 | | - * - caches fuer ulists |
|---|
| 34 | + * - caches for ulists |
|---|
| 34 | 35 | * - performance benchmarks |
|---|
| 35 | 36 | * - check all ioctl parameters |
|---|
| 36 | 37 | */ |
|---|
| .. | .. |
|---|
| 220 | 221 | return qgroup; |
|---|
| 221 | 222 | } |
|---|
| 222 | 223 | |
|---|
| 223 | | -static void __del_qgroup_rb(struct btrfs_qgroup *qgroup) |
|---|
| 224 | +static void __del_qgroup_rb(struct btrfs_fs_info *fs_info, |
|---|
| 225 | + struct btrfs_qgroup *qgroup) |
|---|
| 224 | 226 | { |
|---|
| 225 | 227 | struct btrfs_qgroup_list *list; |
|---|
| 226 | 228 | |
|---|
| .. | .. |
|---|
| 240 | 242 | list_del(&list->next_member); |
|---|
| 241 | 243 | kfree(list); |
|---|
| 242 | 244 | } |
|---|
| 243 | | - kfree(qgroup); |
|---|
| 244 | 245 | } |
|---|
| 245 | 246 | |
|---|
| 246 | 247 | /* must be called with qgroup_lock held */ |
|---|
| .. | .. |
|---|
| 252 | 253 | return -ENOENT; |
|---|
| 253 | 254 | |
|---|
| 254 | 255 | rb_erase(&qgroup->node, &fs_info->qgroup_tree); |
|---|
| 255 | | - __del_qgroup_rb(qgroup); |
|---|
| 256 | + __del_qgroup_rb(fs_info, qgroup); |
|---|
| 256 | 257 | return 0; |
|---|
| 257 | 258 | } |
|---|
| 258 | 259 | |
|---|
| .. | .. |
|---|
| 351 | 352 | goto out; |
|---|
| 352 | 353 | } |
|---|
| 353 | 354 | |
|---|
| 355 | + ret = btrfs_sysfs_add_qgroups(fs_info); |
|---|
| 356 | + if (ret < 0) |
|---|
| 357 | + goto out; |
|---|
| 354 | 358 | /* default this to quota off, in case no status key is found */ |
|---|
| 355 | 359 | fs_info->qgroup_flags = 0; |
|---|
| 356 | 360 | |
|---|
| .. | .. |
|---|
| 412 | 416 | goto out; |
|---|
| 413 | 417 | } |
|---|
| 414 | 418 | } |
|---|
| 419 | + ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup); |
|---|
| 420 | + if (ret < 0) |
|---|
| 421 | + goto out; |
|---|
| 422 | + |
|---|
| 415 | 423 | switch (found_key.type) { |
|---|
| 416 | 424 | case BTRFS_QGROUP_INFO_KEY: { |
|---|
| 417 | 425 | struct btrfs_qgroup_info_item *ptr; |
|---|
| .. | .. |
|---|
| 500 | 508 | ulist_free(fs_info->qgroup_ulist); |
|---|
| 501 | 509 | fs_info->qgroup_ulist = NULL; |
|---|
| 502 | 510 | fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; |
|---|
| 511 | + btrfs_sysfs_del_qgroups(fs_info); |
|---|
| 503 | 512 | } |
|---|
| 504 | 513 | |
|---|
| 505 | 514 | return ret < 0 ? ret : 0; |
|---|
| 515 | +} |
|---|
| 516 | + |
|---|
| 517 | +/* |
|---|
| 518 | + * Called in close_ctree() when quota is still enabled. This verifies we don't |
|---|
| 519 | + * leak some reserved space. |
|---|
| 520 | + * |
|---|
| 521 | + * Return false if no reserved space is left. |
|---|
| 522 | + * Return true if some reserved space is leaked. |
|---|
| 523 | + */ |
|---|
| 524 | +bool btrfs_check_quota_leak(struct btrfs_fs_info *fs_info) |
|---|
| 525 | +{ |
|---|
| 526 | + struct rb_node *node; |
|---|
| 527 | + bool ret = false; |
|---|
| 528 | + |
|---|
| 529 | + if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) |
|---|
| 530 | + return ret; |
|---|
| 531 | + /* |
|---|
| 532 | + * Since we're unmounting, there is no race and no need to grab qgroup |
|---|
| 533 | + * lock. And here we don't go post-order to provide a more user |
|---|
| 534 | + * friendly sorted result. |
|---|
| 535 | + */ |
|---|
| 536 | + for (node = rb_first(&fs_info->qgroup_tree); node; node = rb_next(node)) { |
|---|
| 537 | + struct btrfs_qgroup *qgroup; |
|---|
| 538 | + int i; |
|---|
| 539 | + |
|---|
| 540 | + qgroup = rb_entry(node, struct btrfs_qgroup, node); |
|---|
| 541 | + for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++) { |
|---|
| 542 | + if (qgroup->rsv.values[i]) { |
|---|
| 543 | + ret = true; |
|---|
| 544 | + btrfs_warn(fs_info, |
|---|
| 545 | + "qgroup %hu/%llu has unreleased space, type %d rsv %llu", |
|---|
| 546 | + btrfs_qgroup_level(qgroup->qgroupid), |
|---|
| 547 | + btrfs_qgroup_subvolid(qgroup->qgroupid), |
|---|
| 548 | + i, qgroup->rsv.values[i]); |
|---|
| 549 | + } |
|---|
| 550 | + } |
|---|
| 551 | + } |
|---|
| 552 | + return ret; |
|---|
| 506 | 553 | } |
|---|
| 507 | 554 | |
|---|
| 508 | 555 | /* |
|---|
| .. | .. |
|---|
| 519 | 566 | while ((n = rb_first(&fs_info->qgroup_tree))) { |
|---|
| 520 | 567 | qgroup = rb_entry(n, struct btrfs_qgroup, node); |
|---|
| 521 | 568 | rb_erase(n, &fs_info->qgroup_tree); |
|---|
| 522 | | - __del_qgroup_rb(qgroup); |
|---|
| 569 | + __del_qgroup_rb(fs_info, qgroup); |
|---|
| 570 | + btrfs_sysfs_del_one_qgroup(fs_info, qgroup); |
|---|
| 571 | + kfree(qgroup); |
|---|
| 523 | 572 | } |
|---|
| 524 | 573 | /* |
|---|
| 525 | | - * we call btrfs_free_qgroup_config() when umounting |
|---|
| 574 | + * We call btrfs_free_qgroup_config() when unmounting |
|---|
| 526 | 575 | * filesystem and disabling quota, so we set qgroup_ulist |
|---|
| 527 | 576 | * to be null here to avoid double free. |
|---|
| 528 | 577 | */ |
|---|
| 529 | 578 | ulist_free(fs_info->qgroup_ulist); |
|---|
| 530 | 579 | fs_info->qgroup_ulist = NULL; |
|---|
| 580 | + btrfs_sysfs_del_qgroups(fs_info); |
|---|
| 531 | 581 | } |
|---|
| 532 | 582 | |
|---|
| 533 | 583 | static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src, |
|---|
| .. | .. |
|---|
| 887 | 937 | struct btrfs_key found_key; |
|---|
| 888 | 938 | struct btrfs_qgroup *qgroup = NULL; |
|---|
| 889 | 939 | struct btrfs_trans_handle *trans = NULL; |
|---|
| 940 | + struct ulist *ulist = NULL; |
|---|
| 890 | 941 | int ret = 0; |
|---|
| 891 | 942 | int slot; |
|---|
| 943 | + |
|---|
| 944 | + /* |
|---|
| 945 | + * We need to have subvol_sem write locked, to prevent races between |
|---|
| 946 | + * concurrent tasks trying to enable quotas, because we will unlock |
|---|
| 947 | + * and relock qgroup_ioctl_lock before setting fs_info->quota_root |
|---|
| 948 | + * and before setting BTRFS_FS_QUOTA_ENABLED. |
|---|
| 949 | + */ |
|---|
| 950 | + lockdep_assert_held_write(&fs_info->subvol_sem); |
|---|
| 892 | 951 | |
|---|
| 893 | 952 | mutex_lock(&fs_info->qgroup_ioctl_lock); |
|---|
| 894 | 953 | if (fs_info->quota_root) |
|---|
| 895 | 954 | goto out; |
|---|
| 955 | + |
|---|
| 956 | + ulist = ulist_alloc(GFP_KERNEL); |
|---|
| 957 | + if (!ulist) { |
|---|
| 958 | + ret = -ENOMEM; |
|---|
| 959 | + goto out; |
|---|
| 960 | + } |
|---|
| 961 | + |
|---|
| 962 | + ret = btrfs_sysfs_add_qgroups(fs_info); |
|---|
| 963 | + if (ret < 0) |
|---|
| 964 | + goto out; |
|---|
| 965 | + |
|---|
| 966 | + /* |
|---|
| 967 | + * Unlock qgroup_ioctl_lock before starting the transaction. This is to |
|---|
| 968 | + * avoid lock acquisition inversion problems (reported by lockdep) between |
|---|
| 969 | + * qgroup_ioctl_lock and the vfs freeze semaphores, acquired when we |
|---|
| 970 | + * start a transaction. |
|---|
| 971 | + * After we started the transaction lock qgroup_ioctl_lock again and |
|---|
| 972 | + * check if someone else created the quota root in the meanwhile. If so, |
|---|
| 973 | + * just return success and release the transaction handle. |
|---|
| 974 | + * |
|---|
| 975 | + * Also we don't need to worry about someone else calling |
|---|
| 976 | + * btrfs_sysfs_add_qgroups() after we unlock and getting an error because |
|---|
| 977 | + * that function returns 0 (success) when the sysfs entries already exist. |
|---|
| 978 | + */ |
|---|
| 979 | + mutex_unlock(&fs_info->qgroup_ioctl_lock); |
|---|
| 896 | 980 | |
|---|
| 897 | 981 | /* |
|---|
| 898 | 982 | * 1 for quota root item |
|---|
| .. | .. |
|---|
| 903 | 987 | * would be a lot of overkill. |
|---|
| 904 | 988 | */ |
|---|
| 905 | 989 | trans = btrfs_start_transaction(tree_root, 2); |
|---|
| 990 | + |
|---|
| 991 | + mutex_lock(&fs_info->qgroup_ioctl_lock); |
|---|
| 906 | 992 | if (IS_ERR(trans)) { |
|---|
| 907 | 993 | ret = PTR_ERR(trans); |
|---|
| 908 | 994 | trans = NULL; |
|---|
| 909 | 995 | goto out; |
|---|
| 910 | 996 | } |
|---|
| 911 | 997 | |
|---|
| 912 | | - fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL); |
|---|
| 913 | | - if (!fs_info->qgroup_ulist) { |
|---|
| 914 | | - ret = -ENOMEM; |
|---|
| 915 | | - btrfs_abort_transaction(trans, ret); |
|---|
| 998 | + if (fs_info->quota_root) |
|---|
| 916 | 999 | goto out; |
|---|
| 917 | | - } |
|---|
| 1000 | + |
|---|
| 1001 | + fs_info->qgroup_ulist = ulist; |
|---|
| 1002 | + ulist = NULL; |
|---|
| 918 | 1003 | |
|---|
| 919 | 1004 | /* |
|---|
| 920 | 1005 | * initially create the quota tree |
|---|
| 921 | 1006 | */ |
|---|
| 922 | | - quota_root = btrfs_create_tree(trans, fs_info, |
|---|
| 923 | | - BTRFS_QUOTA_TREE_OBJECTID); |
|---|
| 1007 | + quota_root = btrfs_create_tree(trans, BTRFS_QUOTA_TREE_OBJECTID); |
|---|
| 924 | 1008 | if (IS_ERR(quota_root)) { |
|---|
| 925 | 1009 | ret = PTR_ERR(quota_root); |
|---|
| 926 | 1010 | btrfs_abort_transaction(trans, ret); |
|---|
| .. | .. |
|---|
| 976 | 1060 | btrfs_item_key_to_cpu(leaf, &found_key, slot); |
|---|
| 977 | 1061 | |
|---|
| 978 | 1062 | if (found_key.type == BTRFS_ROOT_REF_KEY) { |
|---|
| 1063 | + |
|---|
| 1064 | + /* Release locks on tree_root before we access quota_root */ |
|---|
| 1065 | + btrfs_release_path(path); |
|---|
| 1066 | + |
|---|
| 979 | 1067 | ret = add_qgroup_item(trans, quota_root, |
|---|
| 980 | 1068 | found_key.offset); |
|---|
| 981 | 1069 | if (ret) { |
|---|
| .. | .. |
|---|
| 988 | 1076 | ret = PTR_ERR(qgroup); |
|---|
| 989 | 1077 | btrfs_abort_transaction(trans, ret); |
|---|
| 990 | 1078 | goto out_free_path; |
|---|
| 1079 | + } |
|---|
| 1080 | + ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup); |
|---|
| 1081 | + if (ret < 0) { |
|---|
| 1082 | + btrfs_abort_transaction(trans, ret); |
|---|
| 1083 | + goto out_free_path; |
|---|
| 1084 | + } |
|---|
| 1085 | + ret = btrfs_search_slot_for_read(tree_root, &found_key, |
|---|
| 1086 | + path, 1, 0); |
|---|
| 1087 | + if (ret < 0) { |
|---|
| 1088 | + btrfs_abort_transaction(trans, ret); |
|---|
| 1089 | + goto out_free_path; |
|---|
| 1090 | + } |
|---|
| 1091 | + if (ret > 0) { |
|---|
| 1092 | + /* |
|---|
| 1093 | + * Shouldn't happen, but in case it does we |
|---|
| 1094 | + * don't need to do the btrfs_next_item, just |
|---|
| 1095 | + * continue. |
|---|
| 1096 | + */ |
|---|
| 1097 | + continue; |
|---|
| 991 | 1098 | } |
|---|
| 992 | 1099 | } |
|---|
| 993 | 1100 | ret = btrfs_next_item(tree_root, path); |
|---|
| .. | .. |
|---|
| 1013 | 1120 | btrfs_abort_transaction(trans, ret); |
|---|
| 1014 | 1121 | goto out_free_path; |
|---|
| 1015 | 1122 | } |
|---|
| 1123 | + ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup); |
|---|
| 1124 | + if (ret < 0) { |
|---|
| 1125 | + btrfs_abort_transaction(trans, ret); |
|---|
| 1126 | + goto out_free_path; |
|---|
| 1127 | + } |
|---|
| 1016 | 1128 | |
|---|
| 1129 | + mutex_unlock(&fs_info->qgroup_ioctl_lock); |
|---|
| 1130 | + /* |
|---|
| 1131 | + * Commit the transaction while not holding qgroup_ioctl_lock, to avoid |
|---|
| 1132 | + * a deadlock with tasks concurrently doing other qgroup operations, such |
|---|
| 1133 | + * adding/removing qgroups or adding/deleting qgroup relations for example, |
|---|
| 1134 | + * because all qgroup operations first start or join a transaction and then |
|---|
| 1135 | + * lock the qgroup_ioctl_lock mutex. |
|---|
| 1136 | + * We are safe from a concurrent task trying to enable quotas, by calling |
|---|
| 1137 | + * this function, since we are serialized by fs_info->subvol_sem. |
|---|
| 1138 | + */ |
|---|
| 1017 | 1139 | ret = btrfs_commit_transaction(trans); |
|---|
| 1018 | 1140 | trans = NULL; |
|---|
| 1141 | + mutex_lock(&fs_info->qgroup_ioctl_lock); |
|---|
| 1019 | 1142 | if (ret) |
|---|
| 1020 | 1143 | goto out_free_path; |
|---|
| 1021 | 1144 | |
|---|
| .. | .. |
|---|
| 1035 | 1158 | fs_info->qgroup_rescan_running = true; |
|---|
| 1036 | 1159 | btrfs_queue_work(fs_info->qgroup_rescan_workers, |
|---|
| 1037 | 1160 | &fs_info->qgroup_rescan_work); |
|---|
| 1161 | + } else { |
|---|
| 1162 | + /* |
|---|
| 1163 | + * We have set both BTRFS_FS_QUOTA_ENABLED and |
|---|
| 1164 | + * BTRFS_QGROUP_STATUS_FLAG_ON, so we can only fail with |
|---|
| 1165 | + * -EINPROGRESS. That can happen because someone started the |
|---|
| 1166 | + * rescan worker by calling quota rescan ioctl before we |
|---|
| 1167 | + * attempted to initialize the rescan worker. Failure due to |
|---|
| 1168 | + * quotas disabled in the meanwhile is not possible, because |
|---|
| 1169 | + * we are holding a write lock on fs_info->subvol_sem, which |
|---|
| 1170 | + * is also acquired when disabling quotas. |
|---|
| 1171 | + * Ignore such error, and any other error would need to undo |
|---|
| 1172 | + * everything we did in the transaction we just committed. |
|---|
| 1173 | + */ |
|---|
| 1174 | + ASSERT(ret == -EINPROGRESS); |
|---|
| 1175 | + ret = 0; |
|---|
| 1038 | 1176 | } |
|---|
| 1039 | 1177 | |
|---|
| 1040 | 1178 | out_free_path: |
|---|
| 1041 | 1179 | btrfs_free_path(path); |
|---|
| 1042 | 1180 | out_free_root: |
|---|
| 1043 | | - if (ret) { |
|---|
| 1044 | | - free_extent_buffer(quota_root->node); |
|---|
| 1045 | | - free_extent_buffer(quota_root->commit_root); |
|---|
| 1046 | | - kfree(quota_root); |
|---|
| 1047 | | - } |
|---|
| 1181 | + if (ret) |
|---|
| 1182 | + btrfs_put_root(quota_root); |
|---|
| 1048 | 1183 | out: |
|---|
| 1049 | 1184 | if (ret) { |
|---|
| 1050 | 1185 | ulist_free(fs_info->qgroup_ulist); |
|---|
| 1051 | 1186 | fs_info->qgroup_ulist = NULL; |
|---|
| 1052 | | - if (trans) |
|---|
| 1053 | | - btrfs_end_transaction(trans); |
|---|
| 1187 | + btrfs_sysfs_del_qgroups(fs_info); |
|---|
| 1054 | 1188 | } |
|---|
| 1055 | 1189 | mutex_unlock(&fs_info->qgroup_ioctl_lock); |
|---|
| 1190 | + if (ret && trans) |
|---|
| 1191 | + btrfs_end_transaction(trans); |
|---|
| 1192 | + else if (trans) |
|---|
| 1193 | + ret = btrfs_end_transaction(trans); |
|---|
| 1194 | + ulist_free(ulist); |
|---|
| 1056 | 1195 | return ret; |
|---|
| 1057 | 1196 | } |
|---|
| 1058 | 1197 | |
|---|
| .. | .. |
|---|
| 1062 | 1201 | struct btrfs_trans_handle *trans = NULL; |
|---|
| 1063 | 1202 | int ret = 0; |
|---|
| 1064 | 1203 | |
|---|
| 1204 | + /* |
|---|
| 1205 | + * We need to have subvol_sem write locked, to prevent races between |
|---|
| 1206 | + * concurrent tasks trying to disable quotas, because we will unlock |
|---|
| 1207 | + * and relock qgroup_ioctl_lock across BTRFS_FS_QUOTA_ENABLED changes. |
|---|
| 1208 | + */ |
|---|
| 1209 | + lockdep_assert_held_write(&fs_info->subvol_sem); |
|---|
| 1210 | + |
|---|
| 1065 | 1211 | mutex_lock(&fs_info->qgroup_ioctl_lock); |
|---|
| 1066 | 1212 | if (!fs_info->quota_root) |
|---|
| 1067 | 1213 | goto out; |
|---|
| 1214 | + |
|---|
| 1215 | + /* |
|---|
| 1216 | + * Unlock the qgroup_ioctl_lock mutex before waiting for the rescan worker to |
|---|
| 1217 | + * complete. Otherwise we can deadlock because btrfs_remove_qgroup() needs |
|---|
| 1218 | + * to lock that mutex while holding a transaction handle and the rescan |
|---|
| 1219 | + * worker needs to commit a transaction. |
|---|
| 1220 | + */ |
|---|
| 1221 | + mutex_unlock(&fs_info->qgroup_ioctl_lock); |
|---|
| 1222 | + |
|---|
| 1223 | + /* |
|---|
| 1224 | + * Request qgroup rescan worker to complete and wait for it. This wait |
|---|
| 1225 | + * must be done before transaction start for quota disable since it may |
|---|
| 1226 | + * deadlock with transaction by the qgroup rescan worker. |
|---|
| 1227 | + */ |
|---|
| 1228 | + clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); |
|---|
| 1229 | + btrfs_qgroup_wait_for_completion(fs_info, false); |
|---|
| 1068 | 1230 | |
|---|
| 1069 | 1231 | /* |
|---|
| 1070 | 1232 | * 1 For the root item |
|---|
| 1071 | 1233 | * |
|---|
| 1072 | 1234 | * We should also reserve enough items for the quota tree deletion in |
|---|
| 1073 | 1235 | * btrfs_clean_quota_tree but this is not done. |
|---|
| 1236 | + * |
|---|
| 1237 | + * Also, we must always start a transaction without holding the mutex |
|---|
| 1238 | + * qgroup_ioctl_lock, see btrfs_quota_enable(). |
|---|
| 1074 | 1239 | */ |
|---|
| 1075 | 1240 | trans = btrfs_start_transaction(fs_info->tree_root, 1); |
|---|
| 1241 | + |
|---|
| 1242 | + mutex_lock(&fs_info->qgroup_ioctl_lock); |
|---|
| 1076 | 1243 | if (IS_ERR(trans)) { |
|---|
| 1077 | 1244 | ret = PTR_ERR(trans); |
|---|
| 1245 | + trans = NULL; |
|---|
| 1246 | + set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); |
|---|
| 1078 | 1247 | goto out; |
|---|
| 1079 | 1248 | } |
|---|
| 1080 | 1249 | |
|---|
| 1081 | | - clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); |
|---|
| 1082 | | - btrfs_qgroup_wait_for_completion(fs_info, false); |
|---|
| 1250 | + if (!fs_info->quota_root) |
|---|
| 1251 | + goto out; |
|---|
| 1252 | + |
|---|
| 1083 | 1253 | spin_lock(&fs_info->qgroup_lock); |
|---|
| 1084 | 1254 | quota_root = fs_info->quota_root; |
|---|
| 1085 | 1255 | fs_info->quota_root = NULL; |
|---|
| .. | .. |
|---|
| 1091 | 1261 | ret = btrfs_clean_quota_tree(trans, quota_root); |
|---|
| 1092 | 1262 | if (ret) { |
|---|
| 1093 | 1263 | btrfs_abort_transaction(trans, ret); |
|---|
| 1094 | | - goto end_trans; |
|---|
| 1264 | + goto out; |
|---|
| 1095 | 1265 | } |
|---|
| 1096 | 1266 | |
|---|
| 1097 | 1267 | ret = btrfs_del_root(trans, "a_root->root_key); |
|---|
| 1098 | 1268 | if (ret) { |
|---|
| 1099 | 1269 | btrfs_abort_transaction(trans, ret); |
|---|
| 1100 | | - goto end_trans; |
|---|
| 1270 | + goto out; |
|---|
| 1101 | 1271 | } |
|---|
| 1102 | 1272 | |
|---|
| 1103 | 1273 | list_del("a_root->dirty_list); |
|---|
| 1104 | 1274 | |
|---|
| 1105 | 1275 | btrfs_tree_lock(quota_root->node); |
|---|
| 1106 | | - clean_tree_block(fs_info, quota_root->node); |
|---|
| 1276 | + btrfs_clean_tree_block(quota_root->node); |
|---|
| 1107 | 1277 | btrfs_tree_unlock(quota_root->node); |
|---|
| 1108 | 1278 | btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1); |
|---|
| 1109 | 1279 | |
|---|
| 1110 | | - free_extent_buffer(quota_root->node); |
|---|
| 1111 | | - free_extent_buffer(quota_root->commit_root); |
|---|
| 1112 | | - kfree(quota_root); |
|---|
| 1280 | + btrfs_put_root(quota_root); |
|---|
| 1113 | 1281 | |
|---|
| 1114 | | -end_trans: |
|---|
| 1115 | | - ret = btrfs_end_transaction(trans); |
|---|
| 1116 | 1282 | out: |
|---|
| 1117 | 1283 | mutex_unlock(&fs_info->qgroup_ioctl_lock); |
|---|
| 1284 | + if (ret && trans) |
|---|
| 1285 | + btrfs_end_transaction(trans); |
|---|
| 1286 | + else if (trans) |
|---|
| 1287 | + ret = btrfs_end_transaction(trans); |
|---|
| 1288 | + |
|---|
| 1118 | 1289 | return ret; |
|---|
| 1119 | 1290 | } |
|---|
| 1120 | 1291 | |
|---|
| .. | .. |
|---|
| 1129 | 1300 | * The easy accounting, we're updating qgroup relationship whose child qgroup |
|---|
| 1130 | 1301 | * only has exclusive extents. |
|---|
| 1131 | 1302 | * |
|---|
| 1132 | | - * In this case, all exclsuive extents will also be exlusive for parent, so |
|---|
| 1303 | + * In this case, all exclusive extents will also be exclusive for parent, so |
|---|
| 1133 | 1304 | * excl/rfer just get added/removed. |
|---|
| 1134 | 1305 | * |
|---|
| 1135 | 1306 | * So is qgroup reservation space, which should also be added/removed to |
|---|
| .. | .. |
|---|
| 1246 | 1417 | u64 dst) |
|---|
| 1247 | 1418 | { |
|---|
| 1248 | 1419 | struct btrfs_fs_info *fs_info = trans->fs_info; |
|---|
| 1249 | | - struct btrfs_root *quota_root; |
|---|
| 1250 | 1420 | struct btrfs_qgroup *parent; |
|---|
| 1251 | 1421 | struct btrfs_qgroup *member; |
|---|
| 1252 | 1422 | struct btrfs_qgroup_list *list; |
|---|
| 1253 | 1423 | struct ulist *tmp; |
|---|
| 1424 | + unsigned int nofs_flag; |
|---|
| 1254 | 1425 | int ret = 0; |
|---|
| 1255 | 1426 | |
|---|
| 1256 | 1427 | /* Check the level of src and dst first */ |
|---|
| 1257 | 1428 | if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst)) |
|---|
| 1258 | 1429 | return -EINVAL; |
|---|
| 1259 | 1430 | |
|---|
| 1431 | + /* We hold a transaction handle open, must do a NOFS allocation. */ |
|---|
| 1432 | + nofs_flag = memalloc_nofs_save(); |
|---|
| 1260 | 1433 | tmp = ulist_alloc(GFP_KERNEL); |
|---|
| 1434 | + memalloc_nofs_restore(nofs_flag); |
|---|
| 1261 | 1435 | if (!tmp) |
|---|
| 1262 | 1436 | return -ENOMEM; |
|---|
| 1263 | 1437 | |
|---|
| 1264 | 1438 | mutex_lock(&fs_info->qgroup_ioctl_lock); |
|---|
| 1265 | | - quota_root = fs_info->quota_root; |
|---|
| 1266 | | - if (!quota_root) { |
|---|
| 1267 | | - ret = -EINVAL; |
|---|
| 1439 | + if (!fs_info->quota_root) { |
|---|
| 1440 | + ret = -ENOTCONN; |
|---|
| 1268 | 1441 | goto out; |
|---|
| 1269 | 1442 | } |
|---|
| 1270 | 1443 | member = find_qgroup_rb(fs_info, src); |
|---|
| .. | .. |
|---|
| 1310 | 1483 | u64 dst) |
|---|
| 1311 | 1484 | { |
|---|
| 1312 | 1485 | struct btrfs_fs_info *fs_info = trans->fs_info; |
|---|
| 1313 | | - struct btrfs_root *quota_root; |
|---|
| 1314 | 1486 | struct btrfs_qgroup *parent; |
|---|
| 1315 | 1487 | struct btrfs_qgroup *member; |
|---|
| 1316 | 1488 | struct btrfs_qgroup_list *list; |
|---|
| 1317 | 1489 | struct ulist *tmp; |
|---|
| 1490 | + bool found = false; |
|---|
| 1491 | + unsigned int nofs_flag; |
|---|
| 1318 | 1492 | int ret = 0; |
|---|
| 1319 | | - int err; |
|---|
| 1493 | + int ret2; |
|---|
| 1320 | 1494 | |
|---|
| 1495 | + /* We hold a transaction handle open, must do a NOFS allocation. */ |
|---|
| 1496 | + nofs_flag = memalloc_nofs_save(); |
|---|
| 1321 | 1497 | tmp = ulist_alloc(GFP_KERNEL); |
|---|
| 1498 | + memalloc_nofs_restore(nofs_flag); |
|---|
| 1322 | 1499 | if (!tmp) |
|---|
| 1323 | 1500 | return -ENOMEM; |
|---|
| 1324 | 1501 | |
|---|
| 1325 | | - quota_root = fs_info->quota_root; |
|---|
| 1326 | | - if (!quota_root) { |
|---|
| 1327 | | - ret = -EINVAL; |
|---|
| 1502 | + if (!fs_info->quota_root) { |
|---|
| 1503 | + ret = -ENOTCONN; |
|---|
| 1328 | 1504 | goto out; |
|---|
| 1329 | 1505 | } |
|---|
| 1330 | 1506 | |
|---|
| 1331 | 1507 | member = find_qgroup_rb(fs_info, src); |
|---|
| 1332 | 1508 | parent = find_qgroup_rb(fs_info, dst); |
|---|
| 1333 | | - if (!member || !parent) { |
|---|
| 1334 | | - ret = -EINVAL; |
|---|
| 1335 | | - goto out; |
|---|
| 1336 | | - } |
|---|
| 1509 | + /* |
|---|
| 1510 | + * The parent/member pair doesn't exist, then try to delete the dead |
|---|
| 1511 | + * relation items only. |
|---|
| 1512 | + */ |
|---|
| 1513 | + if (!member || !parent) |
|---|
| 1514 | + goto delete_item; |
|---|
| 1337 | 1515 | |
|---|
| 1338 | 1516 | /* check if such qgroup relation exist firstly */ |
|---|
| 1339 | 1517 | list_for_each_entry(list, &member->groups, next_group) { |
|---|
| 1340 | | - if (list->group == parent) |
|---|
| 1341 | | - goto exist; |
|---|
| 1518 | + if (list->group == parent) { |
|---|
| 1519 | + found = true; |
|---|
| 1520 | + break; |
|---|
| 1521 | + } |
|---|
| 1342 | 1522 | } |
|---|
| 1343 | | - ret = -ENOENT; |
|---|
| 1344 | | - goto out; |
|---|
| 1345 | | -exist: |
|---|
| 1346 | | - ret = del_qgroup_relation_item(trans, src, dst); |
|---|
| 1347 | | - err = del_qgroup_relation_item(trans, dst, src); |
|---|
| 1348 | | - if (err && !ret) |
|---|
| 1349 | | - ret = err; |
|---|
| 1350 | 1523 | |
|---|
| 1351 | | - spin_lock(&fs_info->qgroup_lock); |
|---|
| 1352 | | - del_relation_rb(fs_info, src, dst); |
|---|
| 1353 | | - ret = quick_update_accounting(fs_info, tmp, src, dst, -1); |
|---|
| 1354 | | - spin_unlock(&fs_info->qgroup_lock); |
|---|
| 1524 | +delete_item: |
|---|
| 1525 | + ret = del_qgroup_relation_item(trans, src, dst); |
|---|
| 1526 | + if (ret < 0 && ret != -ENOENT) |
|---|
| 1527 | + goto out; |
|---|
| 1528 | + ret2 = del_qgroup_relation_item(trans, dst, src); |
|---|
| 1529 | + if (ret2 < 0 && ret2 != -ENOENT) |
|---|
| 1530 | + goto out; |
|---|
| 1531 | + |
|---|
| 1532 | + /* At least one deletion succeeded, return 0 */ |
|---|
| 1533 | + if (!ret || !ret2) |
|---|
| 1534 | + ret = 0; |
|---|
| 1535 | + |
|---|
| 1536 | + if (found) { |
|---|
| 1537 | + spin_lock(&fs_info->qgroup_lock); |
|---|
| 1538 | + del_relation_rb(fs_info, src, dst); |
|---|
| 1539 | + ret = quick_update_accounting(fs_info, tmp, src, dst, -1); |
|---|
| 1540 | + spin_unlock(&fs_info->qgroup_lock); |
|---|
| 1541 | + } |
|---|
| 1355 | 1542 | out: |
|---|
| 1356 | 1543 | ulist_free(tmp); |
|---|
| 1357 | 1544 | return ret; |
|---|
| .. | .. |
|---|
| 1378 | 1565 | int ret = 0; |
|---|
| 1379 | 1566 | |
|---|
| 1380 | 1567 | mutex_lock(&fs_info->qgroup_ioctl_lock); |
|---|
| 1381 | | - quota_root = fs_info->quota_root; |
|---|
| 1382 | | - if (!quota_root) { |
|---|
| 1383 | | - ret = -EINVAL; |
|---|
| 1568 | + if (!fs_info->quota_root) { |
|---|
| 1569 | + ret = -ENOTCONN; |
|---|
| 1384 | 1570 | goto out; |
|---|
| 1385 | 1571 | } |
|---|
| 1572 | + quota_root = fs_info->quota_root; |
|---|
| 1386 | 1573 | qgroup = find_qgroup_rb(fs_info, qgroupid); |
|---|
| 1387 | 1574 | if (qgroup) { |
|---|
| 1388 | 1575 | ret = -EEXIST; |
|---|
| .. | .. |
|---|
| 1397 | 1584 | qgroup = add_qgroup_rb(fs_info, qgroupid); |
|---|
| 1398 | 1585 | spin_unlock(&fs_info->qgroup_lock); |
|---|
| 1399 | 1586 | |
|---|
| 1400 | | - if (IS_ERR(qgroup)) |
|---|
| 1587 | + if (IS_ERR(qgroup)) { |
|---|
| 1401 | 1588 | ret = PTR_ERR(qgroup); |
|---|
| 1589 | + goto out; |
|---|
| 1590 | + } |
|---|
| 1591 | + ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup); |
|---|
| 1402 | 1592 | out: |
|---|
| 1403 | 1593 | mutex_unlock(&fs_info->qgroup_ioctl_lock); |
|---|
| 1404 | 1594 | return ret; |
|---|
| .. | .. |
|---|
| 1407 | 1597 | int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) |
|---|
| 1408 | 1598 | { |
|---|
| 1409 | 1599 | struct btrfs_fs_info *fs_info = trans->fs_info; |
|---|
| 1410 | | - struct btrfs_root *quota_root; |
|---|
| 1411 | 1600 | struct btrfs_qgroup *qgroup; |
|---|
| 1412 | 1601 | struct btrfs_qgroup_list *list; |
|---|
| 1413 | 1602 | int ret = 0; |
|---|
| 1414 | 1603 | |
|---|
| 1415 | 1604 | mutex_lock(&fs_info->qgroup_ioctl_lock); |
|---|
| 1416 | | - quota_root = fs_info->quota_root; |
|---|
| 1417 | | - if (!quota_root) { |
|---|
| 1418 | | - ret = -EINVAL; |
|---|
| 1605 | + if (!fs_info->quota_root) { |
|---|
| 1606 | + ret = -ENOTCONN; |
|---|
| 1419 | 1607 | goto out; |
|---|
| 1420 | 1608 | } |
|---|
| 1421 | 1609 | |
|---|
| .. | .. |
|---|
| 1423 | 1611 | if (!qgroup) { |
|---|
| 1424 | 1612 | ret = -ENOENT; |
|---|
| 1425 | 1613 | goto out; |
|---|
| 1426 | | - } else { |
|---|
| 1427 | | - /* check if there are no children of this qgroup */ |
|---|
| 1428 | | - if (!list_empty(&qgroup->members)) { |
|---|
| 1429 | | - ret = -EBUSY; |
|---|
| 1430 | | - goto out; |
|---|
| 1431 | | - } |
|---|
| 1432 | 1614 | } |
|---|
| 1615 | + |
|---|
| 1616 | + /* Check if there are no children of this qgroup */ |
|---|
| 1617 | + if (!list_empty(&qgroup->members)) { |
|---|
| 1618 | + ret = -EBUSY; |
|---|
| 1619 | + goto out; |
|---|
| 1620 | + } |
|---|
| 1621 | + |
|---|
| 1433 | 1622 | ret = del_qgroup_item(trans, qgroupid); |
|---|
| 1434 | 1623 | if (ret && ret != -ENOENT) |
|---|
| 1435 | 1624 | goto out; |
|---|
| .. | .. |
|---|
| 1446 | 1635 | spin_lock(&fs_info->qgroup_lock); |
|---|
| 1447 | 1636 | del_qgroup_rb(fs_info, qgroupid); |
|---|
| 1448 | 1637 | spin_unlock(&fs_info->qgroup_lock); |
|---|
| 1638 | + |
|---|
| 1639 | + /* |
|---|
| 1640 | + * Remove the qgroup from sysfs now without holding the qgroup_lock |
|---|
| 1641 | + * spinlock, since the sysfs_remove_group() function needs to take |
|---|
| 1642 | + * the mutex kernfs_mutex through kernfs_remove_by_name_ns(). |
|---|
| 1643 | + */ |
|---|
| 1644 | + btrfs_sysfs_del_one_qgroup(fs_info, qgroup); |
|---|
| 1645 | + kfree(qgroup); |
|---|
| 1449 | 1646 | out: |
|---|
| 1450 | 1647 | mutex_unlock(&fs_info->qgroup_ioctl_lock); |
|---|
| 1451 | 1648 | return ret; |
|---|
| .. | .. |
|---|
| 1455 | 1652 | struct btrfs_qgroup_limit *limit) |
|---|
| 1456 | 1653 | { |
|---|
| 1457 | 1654 | struct btrfs_fs_info *fs_info = trans->fs_info; |
|---|
| 1458 | | - struct btrfs_root *quota_root; |
|---|
| 1459 | 1655 | struct btrfs_qgroup *qgroup; |
|---|
| 1460 | 1656 | int ret = 0; |
|---|
| 1461 | 1657 | /* Sometimes we would want to clear the limit on this qgroup. |
|---|
| .. | .. |
|---|
| 1465 | 1661 | const u64 CLEAR_VALUE = -1; |
|---|
| 1466 | 1662 | |
|---|
| 1467 | 1663 | mutex_lock(&fs_info->qgroup_ioctl_lock); |
|---|
| 1468 | | - quota_root = fs_info->quota_root; |
|---|
| 1469 | | - if (!quota_root) { |
|---|
| 1470 | | - ret = -EINVAL; |
|---|
| 1664 | + if (!fs_info->quota_root) { |
|---|
| 1665 | + ret = -ENOTCONN; |
|---|
| 1471 | 1666 | goto out; |
|---|
| 1472 | 1667 | } |
|---|
| 1473 | 1668 | |
|---|
| .. | .. |
|---|
| 1546 | 1741 | parent_node = *p; |
|---|
| 1547 | 1742 | entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record, |
|---|
| 1548 | 1743 | node); |
|---|
| 1549 | | - if (bytenr < entry->bytenr) |
|---|
| 1744 | + if (bytenr < entry->bytenr) { |
|---|
| 1550 | 1745 | p = &(*p)->rb_left; |
|---|
| 1551 | | - else if (bytenr > entry->bytenr) |
|---|
| 1746 | + } else if (bytenr > entry->bytenr) { |
|---|
| 1552 | 1747 | p = &(*p)->rb_right; |
|---|
| 1553 | | - else |
|---|
| 1748 | + } else { |
|---|
| 1749 | + if (record->data_rsv && !entry->data_rsv) { |
|---|
| 1750 | + entry->data_rsv = record->data_rsv; |
|---|
| 1751 | + entry->data_rsv_refroot = |
|---|
| 1752 | + record->data_rsv_refroot; |
|---|
| 1753 | + } |
|---|
| 1554 | 1754 | return 1; |
|---|
| 1755 | + } |
|---|
| 1555 | 1756 | } |
|---|
| 1556 | 1757 | |
|---|
| 1557 | 1758 | rb_link_node(&record->node, parent_node, p); |
|---|
| .. | .. |
|---|
| 1597 | 1798 | if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) |
|---|
| 1598 | 1799 | || bytenr == 0 || num_bytes == 0) |
|---|
| 1599 | 1800 | return 0; |
|---|
| 1600 | | - record = kmalloc(sizeof(*record), gfp_flag); |
|---|
| 1801 | + record = kzalloc(sizeof(*record), gfp_flag); |
|---|
| 1601 | 1802 | if (!record) |
|---|
| 1602 | 1803 | return -ENOMEM; |
|---|
| 1603 | 1804 | |
|---|
| .. | .. |
|---|
| 1719 | 1920 | return 0; |
|---|
| 1720 | 1921 | } |
|---|
| 1721 | 1922 | |
|---|
| 1923 | +/* |
|---|
| 1924 | + * Helper function to trace a subtree tree block swap. |
|---|
| 1925 | + * |
|---|
| 1926 | + * The swap will happen in highest tree block, but there may be a lot of |
|---|
| 1927 | + * tree blocks involved. |
|---|
| 1928 | + * |
|---|
| 1929 | + * For example: |
|---|
| 1930 | + * OO = Old tree blocks |
|---|
| 1931 | + * NN = New tree blocks allocated during balance |
|---|
| 1932 | + * |
|---|
| 1933 | + * File tree (257) Reloc tree for 257 |
|---|
| 1934 | + * L2 OO NN |
|---|
| 1935 | + * / \ / \ |
|---|
| 1936 | + * L1 OO OO (a) OO NN (a) |
|---|
| 1937 | + * / \ / \ / \ / \ |
|---|
| 1938 | + * L0 OO OO OO OO OO OO NN NN |
|---|
| 1939 | + * (b) (c) (b) (c) |
|---|
| 1940 | + * |
|---|
| 1941 | + * When calling qgroup_trace_extent_swap(), we will pass: |
|---|
| 1942 | + * @src_eb = OO(a) |
|---|
| 1943 | + * @dst_path = [ nodes[1] = NN(a), nodes[0] = NN(c) ] |
|---|
| 1944 | + * @dst_level = 0 |
|---|
| 1945 | + * @root_level = 1 |
|---|
| 1946 | + * |
|---|
| 1947 | + * In that case, qgroup_trace_extent_swap() will search from OO(a) to |
|---|
| 1948 | + * reach OO(c), then mark both OO(c) and NN(c) as qgroup dirty. |
|---|
| 1949 | + * |
|---|
| 1950 | + * The main work of qgroup_trace_extent_swap() can be split into 3 parts: |
|---|
| 1951 | + * |
|---|
| 1952 | + * 1) Tree search from @src_eb |
|---|
| 1953 | + * It should acts as a simplified btrfs_search_slot(). |
|---|
| 1954 | + * The key for search can be extracted from @dst_path->nodes[dst_level] |
|---|
| 1955 | + * (first key). |
|---|
| 1956 | + * |
|---|
| 1957 | + * 2) Mark the final tree blocks in @src_path and @dst_path qgroup dirty |
|---|
| 1958 | + * NOTE: In above case, OO(a) and NN(a) won't be marked qgroup dirty. |
|---|
| 1959 | + * They should be marked during previous (@dst_level = 1) iteration. |
|---|
| 1960 | + * |
|---|
| 1961 | + * 3) Mark file extents in leaves dirty |
|---|
| 1962 | + * We don't have good way to pick out new file extents only. |
|---|
| 1963 | + * So we still follow the old method by scanning all file extents in |
|---|
| 1964 | + * the leave. |
|---|
| 1965 | + * |
|---|
| 1966 | + * This function can free us from keeping two paths, thus later we only need |
|---|
| 1967 | + * to care about how to iterate all new tree blocks in reloc tree. |
|---|
| 1968 | + */ |
|---|
| 1969 | +static int qgroup_trace_extent_swap(struct btrfs_trans_handle* trans, |
|---|
| 1970 | + struct extent_buffer *src_eb, |
|---|
| 1971 | + struct btrfs_path *dst_path, |
|---|
| 1972 | + int dst_level, int root_level, |
|---|
| 1973 | + bool trace_leaf) |
|---|
| 1974 | +{ |
|---|
| 1975 | + struct btrfs_key key; |
|---|
| 1976 | + struct btrfs_path *src_path; |
|---|
| 1977 | + struct btrfs_fs_info *fs_info = trans->fs_info; |
|---|
| 1978 | + u32 nodesize = fs_info->nodesize; |
|---|
| 1979 | + int cur_level = root_level; |
|---|
| 1980 | + int ret; |
|---|
| 1981 | + |
|---|
| 1982 | + BUG_ON(dst_level > root_level); |
|---|
| 1983 | + /* Level mismatch */ |
|---|
| 1984 | + if (btrfs_header_level(src_eb) != root_level) |
|---|
| 1985 | + return -EINVAL; |
|---|
| 1986 | + |
|---|
| 1987 | + src_path = btrfs_alloc_path(); |
|---|
| 1988 | + if (!src_path) { |
|---|
| 1989 | + ret = -ENOMEM; |
|---|
| 1990 | + goto out; |
|---|
| 1991 | + } |
|---|
| 1992 | + |
|---|
| 1993 | + if (dst_level) |
|---|
| 1994 | + btrfs_node_key_to_cpu(dst_path->nodes[dst_level], &key, 0); |
|---|
| 1995 | + else |
|---|
| 1996 | + btrfs_item_key_to_cpu(dst_path->nodes[dst_level], &key, 0); |
|---|
| 1997 | + |
|---|
| 1998 | + /* For src_path */ |
|---|
| 1999 | + atomic_inc(&src_eb->refs); |
|---|
| 2000 | + src_path->nodes[root_level] = src_eb; |
|---|
| 2001 | + src_path->slots[root_level] = dst_path->slots[root_level]; |
|---|
| 2002 | + src_path->locks[root_level] = 0; |
|---|
| 2003 | + |
|---|
| 2004 | + /* A simplified version of btrfs_search_slot() */ |
|---|
| 2005 | + while (cur_level >= dst_level) { |
|---|
| 2006 | + struct btrfs_key src_key; |
|---|
| 2007 | + struct btrfs_key dst_key; |
|---|
| 2008 | + |
|---|
| 2009 | + if (src_path->nodes[cur_level] == NULL) { |
|---|
| 2010 | + struct btrfs_key first_key; |
|---|
| 2011 | + struct extent_buffer *eb; |
|---|
| 2012 | + int parent_slot; |
|---|
| 2013 | + u64 child_gen; |
|---|
| 2014 | + u64 child_bytenr; |
|---|
| 2015 | + |
|---|
| 2016 | + eb = src_path->nodes[cur_level + 1]; |
|---|
| 2017 | + parent_slot = src_path->slots[cur_level + 1]; |
|---|
| 2018 | + child_bytenr = btrfs_node_blockptr(eb, parent_slot); |
|---|
| 2019 | + child_gen = btrfs_node_ptr_generation(eb, parent_slot); |
|---|
| 2020 | + btrfs_node_key_to_cpu(eb, &first_key, parent_slot); |
|---|
| 2021 | + |
|---|
| 2022 | + eb = read_tree_block(fs_info, child_bytenr, child_gen, |
|---|
| 2023 | + cur_level, &first_key); |
|---|
| 2024 | + if (IS_ERR(eb)) { |
|---|
| 2025 | + ret = PTR_ERR(eb); |
|---|
| 2026 | + goto out; |
|---|
| 2027 | + } else if (!extent_buffer_uptodate(eb)) { |
|---|
| 2028 | + free_extent_buffer(eb); |
|---|
| 2029 | + ret = -EIO; |
|---|
| 2030 | + goto out; |
|---|
| 2031 | + } |
|---|
| 2032 | + |
|---|
| 2033 | + src_path->nodes[cur_level] = eb; |
|---|
| 2034 | + |
|---|
| 2035 | + btrfs_tree_read_lock(eb); |
|---|
| 2036 | + btrfs_set_lock_blocking_read(eb); |
|---|
| 2037 | + src_path->locks[cur_level] = BTRFS_READ_LOCK_BLOCKING; |
|---|
| 2038 | + } |
|---|
| 2039 | + |
|---|
| 2040 | + src_path->slots[cur_level] = dst_path->slots[cur_level]; |
|---|
| 2041 | + if (cur_level) { |
|---|
| 2042 | + btrfs_node_key_to_cpu(dst_path->nodes[cur_level], |
|---|
| 2043 | + &dst_key, dst_path->slots[cur_level]); |
|---|
| 2044 | + btrfs_node_key_to_cpu(src_path->nodes[cur_level], |
|---|
| 2045 | + &src_key, src_path->slots[cur_level]); |
|---|
| 2046 | + } else { |
|---|
| 2047 | + btrfs_item_key_to_cpu(dst_path->nodes[cur_level], |
|---|
| 2048 | + &dst_key, dst_path->slots[cur_level]); |
|---|
| 2049 | + btrfs_item_key_to_cpu(src_path->nodes[cur_level], |
|---|
| 2050 | + &src_key, src_path->slots[cur_level]); |
|---|
| 2051 | + } |
|---|
| 2052 | + /* Content mismatch, something went wrong */ |
|---|
| 2053 | + if (btrfs_comp_cpu_keys(&dst_key, &src_key)) { |
|---|
| 2054 | + ret = -ENOENT; |
|---|
| 2055 | + goto out; |
|---|
| 2056 | + } |
|---|
| 2057 | + cur_level--; |
|---|
| 2058 | + } |
|---|
| 2059 | + |
|---|
| 2060 | + /* |
|---|
| 2061 | + * Now both @dst_path and @src_path have been populated, record the tree |
|---|
| 2062 | + * blocks for qgroup accounting. |
|---|
| 2063 | + */ |
|---|
| 2064 | + ret = btrfs_qgroup_trace_extent(trans, src_path->nodes[dst_level]->start, |
|---|
| 2065 | + nodesize, GFP_NOFS); |
|---|
| 2066 | + if (ret < 0) |
|---|
| 2067 | + goto out; |
|---|
| 2068 | + ret = btrfs_qgroup_trace_extent(trans, |
|---|
| 2069 | + dst_path->nodes[dst_level]->start, |
|---|
| 2070 | + nodesize, GFP_NOFS); |
|---|
| 2071 | + if (ret < 0) |
|---|
| 2072 | + goto out; |
|---|
| 2073 | + |
|---|
| 2074 | + /* Record leaf file extents */ |
|---|
| 2075 | + if (dst_level == 0 && trace_leaf) { |
|---|
| 2076 | + ret = btrfs_qgroup_trace_leaf_items(trans, src_path->nodes[0]); |
|---|
| 2077 | + if (ret < 0) |
|---|
| 2078 | + goto out; |
|---|
| 2079 | + ret = btrfs_qgroup_trace_leaf_items(trans, dst_path->nodes[0]); |
|---|
| 2080 | + } |
|---|
| 2081 | +out: |
|---|
| 2082 | + btrfs_free_path(src_path); |
|---|
| 2083 | + return ret; |
|---|
| 2084 | +} |
|---|
| 2085 | + |
|---|
| 2086 | +/* |
|---|
| 2087 | + * Helper function to do recursive generation-aware depth-first search, to |
|---|
| 2088 | + * locate all new tree blocks in a subtree of reloc tree. |
|---|
| 2089 | + * |
|---|
| 2090 | + * E.g. (OO = Old tree blocks, NN = New tree blocks, whose gen == last_snapshot) |
|---|
| 2091 | + * reloc tree |
|---|
| 2092 | + * L2 NN (a) |
|---|
| 2093 | + * / \ |
|---|
| 2094 | + * L1 OO NN (b) |
|---|
| 2095 | + * / \ / \ |
|---|
| 2096 | + * L0 OO OO OO NN |
|---|
| 2097 | + * (c) (d) |
|---|
| 2098 | + * If we pass: |
|---|
| 2099 | + * @dst_path = [ nodes[1] = NN(b), nodes[0] = NULL ], |
|---|
| 2100 | + * @cur_level = 1 |
|---|
| 2101 | + * @root_level = 1 |
|---|
| 2102 | + * |
|---|
| 2103 | + * We will iterate through tree blocks NN(b), NN(d) and info qgroup to trace |
|---|
| 2104 | + * above tree blocks along with their counter parts in file tree. |
|---|
| 2105 | + * While during search, old tree blocks OO(c) will be skipped as tree block swap |
|---|
| 2106 | + * won't affect OO(c). |
|---|
| 2107 | + */ |
|---|
| 2108 | +static int qgroup_trace_new_subtree_blocks(struct btrfs_trans_handle* trans, |
|---|
| 2109 | + struct extent_buffer *src_eb, |
|---|
| 2110 | + struct btrfs_path *dst_path, |
|---|
| 2111 | + int cur_level, int root_level, |
|---|
| 2112 | + u64 last_snapshot, bool trace_leaf) |
|---|
| 2113 | +{ |
|---|
| 2114 | + struct btrfs_fs_info *fs_info = trans->fs_info; |
|---|
| 2115 | + struct extent_buffer *eb; |
|---|
| 2116 | + bool need_cleanup = false; |
|---|
| 2117 | + int ret = 0; |
|---|
| 2118 | + int i; |
|---|
| 2119 | + |
|---|
| 2120 | + /* Level sanity check */ |
|---|
| 2121 | + if (cur_level < 0 || cur_level >= BTRFS_MAX_LEVEL - 1 || |
|---|
| 2122 | + root_level < 0 || root_level >= BTRFS_MAX_LEVEL - 1 || |
|---|
| 2123 | + root_level < cur_level) { |
|---|
| 2124 | + btrfs_err_rl(fs_info, |
|---|
| 2125 | + "%s: bad levels, cur_level=%d root_level=%d", |
|---|
| 2126 | + __func__, cur_level, root_level); |
|---|
| 2127 | + return -EUCLEAN; |
|---|
| 2128 | + } |
|---|
| 2129 | + |
|---|
| 2130 | + /* Read the tree block if needed */ |
|---|
| 2131 | + if (dst_path->nodes[cur_level] == NULL) { |
|---|
| 2132 | + struct btrfs_key first_key; |
|---|
| 2133 | + int parent_slot; |
|---|
| 2134 | + u64 child_gen; |
|---|
| 2135 | + u64 child_bytenr; |
|---|
| 2136 | + |
|---|
| 2137 | + /* |
|---|
| 2138 | + * dst_path->nodes[root_level] must be initialized before |
|---|
| 2139 | + * calling this function. |
|---|
| 2140 | + */ |
|---|
| 2141 | + if (cur_level == root_level) { |
|---|
| 2142 | + btrfs_err_rl(fs_info, |
|---|
| 2143 | + "%s: dst_path->nodes[%d] not initialized, root_level=%d cur_level=%d", |
|---|
| 2144 | + __func__, root_level, root_level, cur_level); |
|---|
| 2145 | + return -EUCLEAN; |
|---|
| 2146 | + } |
|---|
| 2147 | + |
|---|
| 2148 | + /* |
|---|
| 2149 | + * We need to get child blockptr/gen from parent before we can |
|---|
| 2150 | + * read it. |
|---|
| 2151 | + */ |
|---|
| 2152 | + eb = dst_path->nodes[cur_level + 1]; |
|---|
| 2153 | + parent_slot = dst_path->slots[cur_level + 1]; |
|---|
| 2154 | + child_bytenr = btrfs_node_blockptr(eb, parent_slot); |
|---|
| 2155 | + child_gen = btrfs_node_ptr_generation(eb, parent_slot); |
|---|
| 2156 | + btrfs_node_key_to_cpu(eb, &first_key, parent_slot); |
|---|
| 2157 | + |
|---|
| 2158 | + /* This node is old, no need to trace */ |
|---|
| 2159 | + if (child_gen < last_snapshot) |
|---|
| 2160 | + goto out; |
|---|
| 2161 | + |
|---|
| 2162 | + eb = read_tree_block(fs_info, child_bytenr, child_gen, |
|---|
| 2163 | + cur_level, &first_key); |
|---|
| 2164 | + if (IS_ERR(eb)) { |
|---|
| 2165 | + ret = PTR_ERR(eb); |
|---|
| 2166 | + goto out; |
|---|
| 2167 | + } else if (!extent_buffer_uptodate(eb)) { |
|---|
| 2168 | + free_extent_buffer(eb); |
|---|
| 2169 | + ret = -EIO; |
|---|
| 2170 | + goto out; |
|---|
| 2171 | + } |
|---|
| 2172 | + |
|---|
| 2173 | + dst_path->nodes[cur_level] = eb; |
|---|
| 2174 | + dst_path->slots[cur_level] = 0; |
|---|
| 2175 | + |
|---|
| 2176 | + btrfs_tree_read_lock(eb); |
|---|
| 2177 | + btrfs_set_lock_blocking_read(eb); |
|---|
| 2178 | + dst_path->locks[cur_level] = BTRFS_READ_LOCK_BLOCKING; |
|---|
| 2179 | + need_cleanup = true; |
|---|
| 2180 | + } |
|---|
| 2181 | + |
|---|
| 2182 | + /* Now record this tree block and its counter part for qgroups */ |
|---|
| 2183 | + ret = qgroup_trace_extent_swap(trans, src_eb, dst_path, cur_level, |
|---|
| 2184 | + root_level, trace_leaf); |
|---|
| 2185 | + if (ret < 0) |
|---|
| 2186 | + goto cleanup; |
|---|
| 2187 | + |
|---|
| 2188 | + eb = dst_path->nodes[cur_level]; |
|---|
| 2189 | + |
|---|
| 2190 | + if (cur_level > 0) { |
|---|
| 2191 | + /* Iterate all child tree blocks */ |
|---|
| 2192 | + for (i = 0; i < btrfs_header_nritems(eb); i++) { |
|---|
| 2193 | + /* Skip old tree blocks as they won't be swapped */ |
|---|
| 2194 | + if (btrfs_node_ptr_generation(eb, i) < last_snapshot) |
|---|
| 2195 | + continue; |
|---|
| 2196 | + dst_path->slots[cur_level] = i; |
|---|
| 2197 | + |
|---|
| 2198 | + /* Recursive call (at most 7 times) */ |
|---|
| 2199 | + ret = qgroup_trace_new_subtree_blocks(trans, src_eb, |
|---|
| 2200 | + dst_path, cur_level - 1, root_level, |
|---|
| 2201 | + last_snapshot, trace_leaf); |
|---|
| 2202 | + if (ret < 0) |
|---|
| 2203 | + goto cleanup; |
|---|
| 2204 | + } |
|---|
| 2205 | + } |
|---|
| 2206 | + |
|---|
| 2207 | +cleanup: |
|---|
| 2208 | + if (need_cleanup) { |
|---|
| 2209 | + /* Clean up */ |
|---|
| 2210 | + btrfs_tree_unlock_rw(dst_path->nodes[cur_level], |
|---|
| 2211 | + dst_path->locks[cur_level]); |
|---|
| 2212 | + free_extent_buffer(dst_path->nodes[cur_level]); |
|---|
| 2213 | + dst_path->nodes[cur_level] = NULL; |
|---|
| 2214 | + dst_path->slots[cur_level] = 0; |
|---|
| 2215 | + dst_path->locks[cur_level] = 0; |
|---|
| 2216 | + } |
|---|
| 2217 | +out: |
|---|
| 2218 | + return ret; |
|---|
| 2219 | +} |
|---|
| 2220 | + |
|---|
| 2221 | +static int qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans, |
|---|
| 2222 | + struct extent_buffer *src_eb, |
|---|
| 2223 | + struct extent_buffer *dst_eb, |
|---|
| 2224 | + u64 last_snapshot, bool trace_leaf) |
|---|
| 2225 | +{ |
|---|
| 2226 | + struct btrfs_fs_info *fs_info = trans->fs_info; |
|---|
| 2227 | + struct btrfs_path *dst_path = NULL; |
|---|
| 2228 | + int level; |
|---|
| 2229 | + int ret; |
|---|
| 2230 | + |
|---|
| 2231 | + if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) |
|---|
| 2232 | + return 0; |
|---|
| 2233 | + |
|---|
| 2234 | + /* Wrong parameter order */ |
|---|
| 2235 | + if (btrfs_header_generation(src_eb) > btrfs_header_generation(dst_eb)) { |
|---|
| 2236 | + btrfs_err_rl(fs_info, |
|---|
| 2237 | + "%s: bad parameter order, src_gen=%llu dst_gen=%llu", __func__, |
|---|
| 2238 | + btrfs_header_generation(src_eb), |
|---|
| 2239 | + btrfs_header_generation(dst_eb)); |
|---|
| 2240 | + return -EUCLEAN; |
|---|
| 2241 | + } |
|---|
| 2242 | + |
|---|
| 2243 | + if (!extent_buffer_uptodate(src_eb) || !extent_buffer_uptodate(dst_eb)) { |
|---|
| 2244 | + ret = -EIO; |
|---|
| 2245 | + goto out; |
|---|
| 2246 | + } |
|---|
| 2247 | + |
|---|
| 2248 | + level = btrfs_header_level(dst_eb); |
|---|
| 2249 | + dst_path = btrfs_alloc_path(); |
|---|
| 2250 | + if (!dst_path) { |
|---|
| 2251 | + ret = -ENOMEM; |
|---|
| 2252 | + goto out; |
|---|
| 2253 | + } |
|---|
| 2254 | + /* For dst_path */ |
|---|
| 2255 | + atomic_inc(&dst_eb->refs); |
|---|
| 2256 | + dst_path->nodes[level] = dst_eb; |
|---|
| 2257 | + dst_path->slots[level] = 0; |
|---|
| 2258 | + dst_path->locks[level] = 0; |
|---|
| 2259 | + |
|---|
| 2260 | + /* Do the generation aware breadth-first search */ |
|---|
| 2261 | + ret = qgroup_trace_new_subtree_blocks(trans, src_eb, dst_path, level, |
|---|
| 2262 | + level, last_snapshot, trace_leaf); |
|---|
| 2263 | + if (ret < 0) |
|---|
| 2264 | + goto out; |
|---|
| 2265 | + ret = 0; |
|---|
| 2266 | + |
|---|
| 2267 | +out: |
|---|
| 2268 | + btrfs_free_path(dst_path); |
|---|
| 2269 | + if (ret < 0) |
|---|
| 2270 | + fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; |
|---|
| 2271 | + return ret; |
|---|
| 2272 | +} |
|---|
| 2273 | + |
|---|
| 1722 | 2274 | int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, |
|---|
| 1723 | 2275 | struct extent_buffer *root_eb, |
|---|
| 1724 | 2276 | u64 root_gen, int root_level) |
|---|
| .. | .. |
|---|
| 1759 | 2311 | * walk back up the tree (adjusting slot pointers as we go) |
|---|
| 1760 | 2312 | * and restart the search process. |
|---|
| 1761 | 2313 | */ |
|---|
| 1762 | | - extent_buffer_get(root_eb); /* For path */ |
|---|
| 2314 | + atomic_inc(&root_eb->refs); /* For path */ |
|---|
| 1763 | 2315 | path->nodes[root_level] = root_eb; |
|---|
| 1764 | 2316 | path->slots[root_level] = 0; |
|---|
| 1765 | 2317 | path->locks[root_level] = 0; /* so release_path doesn't try to unlock */ |
|---|
| .. | .. |
|---|
| 1797 | 2349 | path->slots[level] = 0; |
|---|
| 1798 | 2350 | |
|---|
| 1799 | 2351 | btrfs_tree_read_lock(eb); |
|---|
| 1800 | | - btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); |
|---|
| 2352 | + btrfs_set_lock_blocking_read(eb); |
|---|
| 1801 | 2353 | path->locks[level] = BTRFS_READ_LOCK_BLOCKING; |
|---|
| 1802 | 2354 | |
|---|
| 1803 | 2355 | ret = btrfs_qgroup_trace_extent(trans, child_bytenr, |
|---|
| .. | .. |
|---|
| 1894 | 2446 | * Update qgroup rfer/excl counters. |
|---|
| 1895 | 2447 | * Rfer update is easy, codes can explain themselves. |
|---|
| 1896 | 2448 | * |
|---|
| 1897 | | - * Excl update is tricky, the update is split into 2 part. |
|---|
| 2449 | + * Excl update is tricky, the update is split into 2 parts. |
|---|
| 1898 | 2450 | * Part 1: Possible exclusive <-> sharing detect: |
|---|
| 1899 | 2451 | * | A | !A | |
|---|
| 1900 | 2452 | * ------------------------------------- |
|---|
| .. | .. |
|---|
| 2143 | 2695 | struct btrfs_delayed_ref_root *delayed_refs; |
|---|
| 2144 | 2696 | struct ulist *new_roots = NULL; |
|---|
| 2145 | 2697 | struct rb_node *node; |
|---|
| 2698 | + u64 num_dirty_extents = 0; |
|---|
| 2146 | 2699 | u64 qgroup_to_skip; |
|---|
| 2147 | 2700 | int ret = 0; |
|---|
| 2148 | 2701 | |
|---|
| .. | .. |
|---|
| 2152 | 2705 | record = rb_entry(node, struct btrfs_qgroup_extent_record, |
|---|
| 2153 | 2706 | node); |
|---|
| 2154 | 2707 | |
|---|
| 2708 | + num_dirty_extents++; |
|---|
| 2155 | 2709 | trace_btrfs_qgroup_account_extents(fs_info, record); |
|---|
| 2156 | 2710 | |
|---|
| 2157 | 2711 | if (!ret) { |
|---|
| .. | .. |
|---|
| 2168 | 2722 | goto cleanup; |
|---|
| 2169 | 2723 | } |
|---|
| 2170 | 2724 | |
|---|
| 2725 | + /* Free the reserved data space */ |
|---|
| 2726 | + btrfs_qgroup_free_refroot(fs_info, |
|---|
| 2727 | + record->data_rsv_refroot, |
|---|
| 2728 | + record->data_rsv, |
|---|
| 2729 | + BTRFS_QGROUP_RSV_DATA); |
|---|
| 2171 | 2730 | /* |
|---|
| 2172 | 2731 | * Use SEQ_LAST as time_seq to do special search, which |
|---|
| 2173 | 2732 | * doesn't lock tree or delayed_refs and search current |
|---|
| .. | .. |
|---|
| 2197 | 2756 | kfree(record); |
|---|
| 2198 | 2757 | |
|---|
| 2199 | 2758 | } |
|---|
| 2759 | + trace_qgroup_num_dirty_extents(fs_info, trans->transid, |
|---|
| 2760 | + num_dirty_extents); |
|---|
| 2200 | 2761 | return ret; |
|---|
| 2201 | 2762 | } |
|---|
| 2202 | 2763 | |
|---|
| .. | .. |
|---|
| 2206 | 2767 | int btrfs_run_qgroups(struct btrfs_trans_handle *trans) |
|---|
| 2207 | 2768 | { |
|---|
| 2208 | 2769 | struct btrfs_fs_info *fs_info = trans->fs_info; |
|---|
| 2209 | | - struct btrfs_root *quota_root = fs_info->quota_root; |
|---|
| 2210 | 2770 | int ret = 0; |
|---|
| 2211 | 2771 | |
|---|
| 2212 | | - if (!quota_root) |
|---|
| 2772 | + if (!fs_info->quota_root) |
|---|
| 2213 | 2773 | return ret; |
|---|
| 2214 | 2774 | |
|---|
| 2215 | 2775 | spin_lock(&fs_info->qgroup_lock); |
|---|
| .. | .. |
|---|
| 2353 | 2913 | dstgroup->rsv_rfer = inherit->lim.rsv_rfer; |
|---|
| 2354 | 2914 | dstgroup->rsv_excl = inherit->lim.rsv_excl; |
|---|
| 2355 | 2915 | |
|---|
| 2356 | | - ret = update_qgroup_limit_item(trans, dstgroup); |
|---|
| 2357 | | - if (ret) { |
|---|
| 2358 | | - fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; |
|---|
| 2359 | | - btrfs_info(fs_info, |
|---|
| 2360 | | - "unable to update quota limit for %llu", |
|---|
| 2361 | | - dstgroup->qgroupid); |
|---|
| 2362 | | - goto unlock; |
|---|
| 2363 | | - } |
|---|
| 2916 | + qgroup_dirty(fs_info, dstgroup); |
|---|
| 2364 | 2917 | } |
|---|
| 2365 | 2918 | |
|---|
| 2366 | 2919 | if (srcid) { |
|---|
| .. | .. |
|---|
| 2455 | 3008 | |
|---|
| 2456 | 3009 | unlock: |
|---|
| 2457 | 3010 | spin_unlock(&fs_info->qgroup_lock); |
|---|
| 3011 | + if (!ret) |
|---|
| 3012 | + ret = btrfs_sysfs_add_one_qgroup(fs_info, dstgroup); |
|---|
| 2458 | 3013 | out: |
|---|
| 2459 | 3014 | if (!committing) |
|---|
| 2460 | 3015 | mutex_unlock(&fs_info->qgroup_ioctl_lock); |
|---|
| .. | .. |
|---|
| 2463 | 3018 | return ret; |
|---|
| 2464 | 3019 | } |
|---|
| 2465 | 3020 | |
|---|
| 2466 | | -/* |
|---|
| 2467 | | - * Two limits to commit transaction in advance. |
|---|
| 2468 | | - * |
|---|
| 2469 | | - * For RATIO, it will be 1/RATIO of the remaining limit as threshold. |
|---|
| 2470 | | - * For SIZE, it will be in byte unit as threshold. |
|---|
| 2471 | | - */ |
|---|
| 2472 | | -#define QGROUP_FREE_RATIO 32 |
|---|
| 2473 | | -#define QGROUP_FREE_SIZE SZ_32M |
|---|
| 2474 | | -static bool qgroup_check_limits(struct btrfs_fs_info *fs_info, |
|---|
| 2475 | | - const struct btrfs_qgroup *qg, u64 num_bytes) |
|---|
| 3021 | +static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes) |
|---|
| 2476 | 3022 | { |
|---|
| 2477 | | - u64 free; |
|---|
| 2478 | | - u64 threshold; |
|---|
| 2479 | | - |
|---|
| 2480 | 3023 | if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && |
|---|
| 2481 | 3024 | qgroup_rsv_total(qg) + (s64)qg->rfer + num_bytes > qg->max_rfer) |
|---|
| 2482 | 3025 | return false; |
|---|
| .. | .. |
|---|
| 2485 | 3028 | qgroup_rsv_total(qg) + (s64)qg->excl + num_bytes > qg->max_excl) |
|---|
| 2486 | 3029 | return false; |
|---|
| 2487 | 3030 | |
|---|
| 2488 | | - /* |
|---|
| 2489 | | - * Even if we passed the check, it's better to check if reservation |
|---|
| 2490 | | - * for meta_pertrans is pushing us near limit. |
|---|
| 2491 | | - * If there is too much pertrans reservation or it's near the limit, |
|---|
| 2492 | | - * let's try commit transaction to free some, using transaction_kthread |
|---|
| 2493 | | - */ |
|---|
| 2494 | | - if ((qg->lim_flags & (BTRFS_QGROUP_LIMIT_MAX_RFER | |
|---|
| 2495 | | - BTRFS_QGROUP_LIMIT_MAX_EXCL))) { |
|---|
| 2496 | | - if (qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) { |
|---|
| 2497 | | - free = qg->max_excl - qgroup_rsv_total(qg) - qg->excl; |
|---|
| 2498 | | - threshold = min_t(u64, qg->max_excl / QGROUP_FREE_RATIO, |
|---|
| 2499 | | - QGROUP_FREE_SIZE); |
|---|
| 2500 | | - } else { |
|---|
| 2501 | | - free = qg->max_rfer - qgroup_rsv_total(qg) - qg->rfer; |
|---|
| 2502 | | - threshold = min_t(u64, qg->max_rfer / QGROUP_FREE_RATIO, |
|---|
| 2503 | | - QGROUP_FREE_SIZE); |
|---|
| 2504 | | - } |
|---|
| 2505 | | - |
|---|
| 2506 | | - /* |
|---|
| 2507 | | - * Use transaction_kthread to commit transaction, so we no |
|---|
| 2508 | | - * longer need to bother nested transaction nor lock context. |
|---|
| 2509 | | - */ |
|---|
| 2510 | | - if (free < threshold) |
|---|
| 2511 | | - btrfs_commit_transaction_locksafe(fs_info); |
|---|
| 2512 | | - } |
|---|
| 2513 | | - |
|---|
| 2514 | 3031 | return true; |
|---|
| 2515 | 3032 | } |
|---|
| 2516 | 3033 | |
|---|
| 2517 | 3034 | static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce, |
|---|
| 2518 | 3035 | enum btrfs_qgroup_rsv_type type) |
|---|
| 2519 | 3036 | { |
|---|
| 2520 | | - struct btrfs_root *quota_root; |
|---|
| 2521 | 3037 | struct btrfs_qgroup *qgroup; |
|---|
| 2522 | 3038 | struct btrfs_fs_info *fs_info = root->fs_info; |
|---|
| 2523 | 3039 | u64 ref_root = root->root_key.objectid; |
|---|
| .. | .. |
|---|
| 2536 | 3052 | enforce = false; |
|---|
| 2537 | 3053 | |
|---|
| 2538 | 3054 | spin_lock(&fs_info->qgroup_lock); |
|---|
| 2539 | | - quota_root = fs_info->quota_root; |
|---|
| 2540 | | - if (!quota_root) |
|---|
| 3055 | + if (!fs_info->quota_root) |
|---|
| 2541 | 3056 | goto out; |
|---|
| 2542 | 3057 | |
|---|
| 2543 | 3058 | qgroup = find_qgroup_rb(fs_info, ref_root); |
|---|
| .. | .. |
|---|
| 2560 | 3075 | |
|---|
| 2561 | 3076 | qg = unode_aux_to_qgroup(unode); |
|---|
| 2562 | 3077 | |
|---|
| 2563 | | - if (enforce && !qgroup_check_limits(fs_info, qg, num_bytes)) { |
|---|
| 3078 | + if (enforce && !qgroup_check_limits(qg, num_bytes)) { |
|---|
| 2564 | 3079 | ret = -EDQUOT; |
|---|
| 2565 | 3080 | goto out; |
|---|
| 2566 | 3081 | } |
|---|
| .. | .. |
|---|
| 2583 | 3098 | |
|---|
| 2584 | 3099 | qg = unode_aux_to_qgroup(unode); |
|---|
| 2585 | 3100 | |
|---|
| 2586 | | - trace_qgroup_update_reserve(fs_info, qg, num_bytes, type); |
|---|
| 2587 | 3101 | qgroup_rsv_add(fs_info, qg, num_bytes, type); |
|---|
| 2588 | 3102 | } |
|---|
| 2589 | 3103 | |
|---|
| .. | .. |
|---|
| 2605 | 3119 | u64 ref_root, u64 num_bytes, |
|---|
| 2606 | 3120 | enum btrfs_qgroup_rsv_type type) |
|---|
| 2607 | 3121 | { |
|---|
| 2608 | | - struct btrfs_root *quota_root; |
|---|
| 2609 | 3122 | struct btrfs_qgroup *qgroup; |
|---|
| 2610 | 3123 | struct ulist_node *unode; |
|---|
| 2611 | 3124 | struct ulist_iterator uiter; |
|---|
| .. | .. |
|---|
| 2623 | 3136 | } |
|---|
| 2624 | 3137 | spin_lock(&fs_info->qgroup_lock); |
|---|
| 2625 | 3138 | |
|---|
| 2626 | | - quota_root = fs_info->quota_root; |
|---|
| 2627 | | - if (!quota_root) |
|---|
| 3139 | + if (!fs_info->quota_root) |
|---|
| 2628 | 3140 | goto out; |
|---|
| 2629 | 3141 | |
|---|
| 2630 | 3142 | qgroup = find_qgroup_rb(fs_info, ref_root); |
|---|
| .. | .. |
|---|
| 2650 | 3162 | |
|---|
| 2651 | 3163 | qg = unode_aux_to_qgroup(unode); |
|---|
| 2652 | 3164 | |
|---|
| 2653 | | - trace_qgroup_update_reserve(fs_info, qg, -(s64)num_bytes, type); |
|---|
| 2654 | 3165 | qgroup_rsv_release(fs_info, qg, num_bytes, type); |
|---|
| 2655 | 3166 | |
|---|
| 2656 | 3167 | list_for_each_entry(glist, &qg->groups, next_group) { |
|---|
| .. | .. |
|---|
| 2734 | 3245 | mutex_unlock(&fs_info->qgroup_rescan_lock); |
|---|
| 2735 | 3246 | goto out; |
|---|
| 2736 | 3247 | } |
|---|
| 2737 | | - extent_buffer_get(scratch_leaf); |
|---|
| 2738 | | - btrfs_tree_read_lock(scratch_leaf); |
|---|
| 2739 | | - btrfs_set_lock_blocking_rw(scratch_leaf, BTRFS_READ_LOCK); |
|---|
| 2740 | 3248 | slot = path->slots[0]; |
|---|
| 2741 | 3249 | btrfs_release_path(path); |
|---|
| 2742 | 3250 | mutex_unlock(&fs_info->qgroup_rescan_lock); |
|---|
| .. | .. |
|---|
| 2762 | 3270 | goto out; |
|---|
| 2763 | 3271 | } |
|---|
| 2764 | 3272 | out: |
|---|
| 2765 | | - if (scratch_leaf) { |
|---|
| 2766 | | - btrfs_tree_read_unlock_blocking(scratch_leaf); |
|---|
| 3273 | + if (scratch_leaf) |
|---|
| 2767 | 3274 | free_extent_buffer(scratch_leaf); |
|---|
| 2768 | | - } |
|---|
| 2769 | 3275 | |
|---|
| 2770 | 3276 | if (done && !ret) { |
|---|
| 2771 | 3277 | ret = 1; |
|---|
| .. | .. |
|---|
| 2777 | 3283 | static bool rescan_should_stop(struct btrfs_fs_info *fs_info) |
|---|
| 2778 | 3284 | { |
|---|
| 2779 | 3285 | return btrfs_fs_closing(fs_info) || |
|---|
| 2780 | | - test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); |
|---|
| 3286 | + test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state) || |
|---|
| 3287 | + !test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); |
|---|
| 2781 | 3288 | } |
|---|
| 2782 | 3289 | |
|---|
| 2783 | 3290 | static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) |
|---|
| .. | .. |
|---|
| 2807 | 3314 | err = PTR_ERR(trans); |
|---|
| 2808 | 3315 | break; |
|---|
| 2809 | 3316 | } |
|---|
| 2810 | | - if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { |
|---|
| 2811 | | - err = -EINTR; |
|---|
| 2812 | | - } else { |
|---|
| 2813 | | - err = qgroup_rescan_leaf(trans, path); |
|---|
| 2814 | | - } |
|---|
| 3317 | + |
|---|
| 3318 | + err = qgroup_rescan_leaf(trans, path); |
|---|
| 3319 | + |
|---|
| 2815 | 3320 | if (err > 0) |
|---|
| 2816 | 3321 | btrfs_commit_transaction(trans); |
|---|
| 2817 | 3322 | else |
|---|
| .. | .. |
|---|
| 2825 | 3330 | if (err > 0 && |
|---|
| 2826 | 3331 | fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) { |
|---|
| 2827 | 3332 | fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; |
|---|
| 2828 | | - } else if (err < 0) { |
|---|
| 3333 | + } else if (err < 0 || stopped) { |
|---|
| 2829 | 3334 | fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; |
|---|
| 2830 | 3335 | } |
|---|
| 2831 | 3336 | mutex_unlock(&fs_info->qgroup_rescan_lock); |
|---|
| .. | .. |
|---|
| 2902 | 3407 | } |
|---|
| 2903 | 3408 | |
|---|
| 2904 | 3409 | mutex_lock(&fs_info->qgroup_rescan_lock); |
|---|
| 2905 | | - spin_lock(&fs_info->qgroup_lock); |
|---|
| 2906 | 3410 | |
|---|
| 2907 | 3411 | if (init_flags) { |
|---|
| 2908 | 3412 | if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { |
|---|
| .. | .. |
|---|
| 2914 | 3418 | btrfs_warn(fs_info, |
|---|
| 2915 | 3419 | "qgroup rescan init failed, qgroup is not enabled"); |
|---|
| 2916 | 3420 | ret = -EINVAL; |
|---|
| 3421 | + } else if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { |
|---|
| 3422 | + /* Quota disable is in progress */ |
|---|
| 3423 | + ret = -EBUSY; |
|---|
| 2917 | 3424 | } |
|---|
| 2918 | 3425 | |
|---|
| 2919 | 3426 | if (ret) { |
|---|
| 2920 | | - spin_unlock(&fs_info->qgroup_lock); |
|---|
| 2921 | 3427 | mutex_unlock(&fs_info->qgroup_rescan_lock); |
|---|
| 2922 | 3428 | return ret; |
|---|
| 2923 | 3429 | } |
|---|
| .. | .. |
|---|
| 2928 | 3434 | sizeof(fs_info->qgroup_rescan_progress)); |
|---|
| 2929 | 3435 | fs_info->qgroup_rescan_progress.objectid = progress_objectid; |
|---|
| 2930 | 3436 | init_completion(&fs_info->qgroup_rescan_completion); |
|---|
| 2931 | | - |
|---|
| 2932 | | - spin_unlock(&fs_info->qgroup_lock); |
|---|
| 2933 | 3437 | mutex_unlock(&fs_info->qgroup_rescan_lock); |
|---|
| 2934 | 3438 | |
|---|
| 2935 | | - memset(&fs_info->qgroup_rescan_work, 0, |
|---|
| 2936 | | - sizeof(fs_info->qgroup_rescan_work)); |
|---|
| 2937 | 3439 | btrfs_init_work(&fs_info->qgroup_rescan_work, |
|---|
| 2938 | | - btrfs_qgroup_rescan_helper, |
|---|
| 2939 | 3440 | btrfs_qgroup_rescan_worker, NULL, NULL); |
|---|
| 2940 | 3441 | return 0; |
|---|
| 2941 | 3442 | } |
|---|
| .. | .. |
|---|
| 3009 | 3510 | int ret = 0; |
|---|
| 3010 | 3511 | |
|---|
| 3011 | 3512 | mutex_lock(&fs_info->qgroup_rescan_lock); |
|---|
| 3012 | | - spin_lock(&fs_info->qgroup_lock); |
|---|
| 3013 | 3513 | running = fs_info->qgroup_rescan_running; |
|---|
| 3014 | | - spin_unlock(&fs_info->qgroup_lock); |
|---|
| 3015 | 3514 | mutex_unlock(&fs_info->qgroup_rescan_lock); |
|---|
| 3016 | 3515 | |
|---|
| 3017 | 3516 | if (!running) |
|---|
| .. | .. |
|---|
| 3042 | 3541 | } |
|---|
| 3043 | 3542 | } |
|---|
| 3044 | 3543 | |
|---|
| 3544 | +#define rbtree_iterate_from_safe(node, next, start) \ |
|---|
| 3545 | + for (node = start; node && ({ next = rb_next(node); 1;}); node = next) |
|---|
| 3546 | + |
|---|
| 3547 | +static int qgroup_unreserve_range(struct btrfs_inode *inode, |
|---|
| 3548 | + struct extent_changeset *reserved, u64 start, |
|---|
| 3549 | + u64 len) |
|---|
| 3550 | +{ |
|---|
| 3551 | + struct rb_node *node; |
|---|
| 3552 | + struct rb_node *next; |
|---|
| 3553 | + struct ulist_node *entry; |
|---|
| 3554 | + int ret = 0; |
|---|
| 3555 | + |
|---|
| 3556 | + node = reserved->range_changed.root.rb_node; |
|---|
| 3557 | + if (!node) |
|---|
| 3558 | + return 0; |
|---|
| 3559 | + while (node) { |
|---|
| 3560 | + entry = rb_entry(node, struct ulist_node, rb_node); |
|---|
| 3561 | + if (entry->val < start) |
|---|
| 3562 | + node = node->rb_right; |
|---|
| 3563 | + else |
|---|
| 3564 | + node = node->rb_left; |
|---|
| 3565 | + } |
|---|
| 3566 | + |
|---|
| 3567 | + if (entry->val > start && rb_prev(&entry->rb_node)) |
|---|
| 3568 | + entry = rb_entry(rb_prev(&entry->rb_node), struct ulist_node, |
|---|
| 3569 | + rb_node); |
|---|
| 3570 | + |
|---|
| 3571 | + rbtree_iterate_from_safe(node, next, &entry->rb_node) { |
|---|
| 3572 | + u64 entry_start; |
|---|
| 3573 | + u64 entry_end; |
|---|
| 3574 | + u64 entry_len; |
|---|
| 3575 | + int clear_ret; |
|---|
| 3576 | + |
|---|
| 3577 | + entry = rb_entry(node, struct ulist_node, rb_node); |
|---|
| 3578 | + entry_start = entry->val; |
|---|
| 3579 | + entry_end = entry->aux; |
|---|
| 3580 | + entry_len = entry_end - entry_start + 1; |
|---|
| 3581 | + |
|---|
| 3582 | + if (entry_start >= start + len) |
|---|
| 3583 | + break; |
|---|
| 3584 | + if (entry_start + entry_len <= start) |
|---|
| 3585 | + continue; |
|---|
| 3586 | + /* |
|---|
| 3587 | + * Now the entry is in [start, start + len), revert the |
|---|
| 3588 | + * EXTENT_QGROUP_RESERVED bit. |
|---|
| 3589 | + */ |
|---|
| 3590 | + clear_ret = clear_extent_bits(&inode->io_tree, entry_start, |
|---|
| 3591 | + entry_end, EXTENT_QGROUP_RESERVED); |
|---|
| 3592 | + if (!ret && clear_ret < 0) |
|---|
| 3593 | + ret = clear_ret; |
|---|
| 3594 | + |
|---|
| 3595 | + ulist_del(&reserved->range_changed, entry->val, entry->aux); |
|---|
| 3596 | + if (likely(reserved->bytes_changed >= entry_len)) { |
|---|
| 3597 | + reserved->bytes_changed -= entry_len; |
|---|
| 3598 | + } else { |
|---|
| 3599 | + WARN_ON(1); |
|---|
| 3600 | + reserved->bytes_changed = 0; |
|---|
| 3601 | + } |
|---|
| 3602 | + } |
|---|
| 3603 | + |
|---|
| 3604 | + return ret; |
|---|
| 3605 | +} |
|---|
| 3606 | + |
|---|
| 3045 | 3607 | /* |
|---|
| 3046 | | - * Reserve qgroup space for range [start, start + len). |
|---|
| 3608 | + * Try to free some space for qgroup. |
|---|
| 3047 | 3609 | * |
|---|
| 3048 | | - * This function will either reserve space from related qgroups or doing |
|---|
| 3049 | | - * nothing if the range is already reserved. |
|---|
| 3610 | + * For qgroup, there are only 3 ways to free qgroup space: |
|---|
| 3611 | + * - Flush nodatacow write |
|---|
| 3612 | + * Any nodatacow write will free its reserved data space at run_delalloc_range(). |
|---|
| 3613 | + * In theory, we should only flush nodatacow inodes, but it's not yet |
|---|
| 3614 | + * possible, so we need to flush the whole root. |
|---|
| 3050 | 3615 | * |
|---|
| 3051 | | - * Return 0 for successful reserve |
|---|
| 3052 | | - * Return <0 for error (including -EQUOT) |
|---|
| 3616 | + * - Wait for ordered extents |
|---|
| 3617 | + * When ordered extents are finished, their reserved metadata is finally |
|---|
| 3618 | + * converted to per_trans status, which can be freed by later commit |
|---|
| 3619 | + * transaction. |
|---|
| 3053 | 3620 | * |
|---|
| 3054 | | - * NOTE: this function may sleep for memory allocation. |
|---|
| 3055 | | - * if btrfs_qgroup_reserve_data() is called multiple times with |
|---|
| 3056 | | - * same @reserved, caller must ensure when error happens it's OK |
|---|
| 3057 | | - * to free *ALL* reserved space. |
|---|
| 3621 | + * - Commit transaction |
|---|
| 3622 | + * This would free the meta_per_trans space. |
|---|
| 3623 | + * In theory this shouldn't provide much space, but any more qgroup space |
|---|
| 3624 | + * is needed. |
|---|
| 3058 | 3625 | */ |
|---|
| 3059 | | -int btrfs_qgroup_reserve_data(struct inode *inode, |
|---|
| 3626 | +static int try_flush_qgroup(struct btrfs_root *root) |
|---|
| 3627 | +{ |
|---|
| 3628 | + struct btrfs_trans_handle *trans; |
|---|
| 3629 | + int ret; |
|---|
| 3630 | + bool can_commit = true; |
|---|
| 3631 | + |
|---|
| 3632 | + /* |
|---|
| 3633 | + * If current process holds a transaction, we shouldn't flush, as we |
|---|
| 3634 | + * assume all space reservation happens before a transaction handle is |
|---|
| 3635 | + * held. |
|---|
| 3636 | + * |
|---|
| 3637 | + * But there are cases like btrfs_delayed_item_reserve_metadata() where |
|---|
| 3638 | + * we try to reserve space with one transction handle already held. |
|---|
| 3639 | + * In that case we can't commit transaction, but at least try to end it |
|---|
| 3640 | + * and hope the started data writes can free some space. |
|---|
| 3641 | + */ |
|---|
| 3642 | + if (current->journal_info && |
|---|
| 3643 | + current->journal_info != BTRFS_SEND_TRANS_STUB) |
|---|
| 3644 | + can_commit = false; |
|---|
| 3645 | + |
|---|
| 3646 | + /* |
|---|
| 3647 | + * We don't want to run flush again and again, so if there is a running |
|---|
| 3648 | + * one, we won't try to start a new flush, but exit directly. |
|---|
| 3649 | + */ |
|---|
| 3650 | + if (test_and_set_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)) { |
|---|
| 3651 | + /* |
|---|
| 3652 | + * We are already holding a transaction, thus we can block other |
|---|
| 3653 | + * threads from flushing. So exit right now. This increases |
|---|
| 3654 | + * the chance of EDQUOT for heavy load and near limit cases. |
|---|
| 3655 | + * But we can argue that if we're already near limit, EDQUOT is |
|---|
| 3656 | + * unavoidable anyway. |
|---|
| 3657 | + */ |
|---|
| 3658 | + if (!can_commit) |
|---|
| 3659 | + return 0; |
|---|
| 3660 | + |
|---|
| 3661 | + wait_event(root->qgroup_flush_wait, |
|---|
| 3662 | + !test_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)); |
|---|
| 3663 | + return 0; |
|---|
| 3664 | + } |
|---|
| 3665 | + |
|---|
| 3666 | + ret = btrfs_start_delalloc_snapshot(root); |
|---|
| 3667 | + if (ret < 0) |
|---|
| 3668 | + goto out; |
|---|
| 3669 | + btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1); |
|---|
| 3670 | + |
|---|
| 3671 | + trans = btrfs_join_transaction(root); |
|---|
| 3672 | + if (IS_ERR(trans)) { |
|---|
| 3673 | + ret = PTR_ERR(trans); |
|---|
| 3674 | + goto out; |
|---|
| 3675 | + } |
|---|
| 3676 | + |
|---|
| 3677 | + if (can_commit) |
|---|
| 3678 | + ret = btrfs_commit_transaction(trans); |
|---|
| 3679 | + else |
|---|
| 3680 | + ret = btrfs_end_transaction(trans); |
|---|
| 3681 | +out: |
|---|
| 3682 | + clear_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state); |
|---|
| 3683 | + wake_up(&root->qgroup_flush_wait); |
|---|
| 3684 | + return ret; |
|---|
| 3685 | +} |
|---|
| 3686 | + |
|---|
| 3687 | +static int qgroup_reserve_data(struct btrfs_inode *inode, |
|---|
| 3060 | 3688 | struct extent_changeset **reserved_ret, u64 start, |
|---|
| 3061 | 3689 | u64 len) |
|---|
| 3062 | 3690 | { |
|---|
| 3063 | | - struct btrfs_root *root = BTRFS_I(inode)->root; |
|---|
| 3064 | | - struct ulist_node *unode; |
|---|
| 3065 | | - struct ulist_iterator uiter; |
|---|
| 3691 | + struct btrfs_root *root = inode->root; |
|---|
| 3066 | 3692 | struct extent_changeset *reserved; |
|---|
| 3693 | + bool new_reserved = false; |
|---|
| 3067 | 3694 | u64 orig_reserved; |
|---|
| 3068 | 3695 | u64 to_reserve; |
|---|
| 3069 | 3696 | int ret; |
|---|
| 3070 | 3697 | |
|---|
| 3071 | 3698 | if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags) || |
|---|
| 3072 | | - !is_fstree(root->objectid) || len == 0) |
|---|
| 3699 | + !is_fstree(root->root_key.objectid) || len == 0) |
|---|
| 3073 | 3700 | return 0; |
|---|
| 3074 | 3701 | |
|---|
| 3075 | 3702 | /* @reserved parameter is mandatory for qgroup */ |
|---|
| 3076 | 3703 | if (WARN_ON(!reserved_ret)) |
|---|
| 3077 | 3704 | return -EINVAL; |
|---|
| 3078 | 3705 | if (!*reserved_ret) { |
|---|
| 3706 | + new_reserved = true; |
|---|
| 3079 | 3707 | *reserved_ret = extent_changeset_alloc(); |
|---|
| 3080 | 3708 | if (!*reserved_ret) |
|---|
| 3081 | 3709 | return -ENOMEM; |
|---|
| .. | .. |
|---|
| 3083 | 3711 | reserved = *reserved_ret; |
|---|
| 3084 | 3712 | /* Record already reserved space */ |
|---|
| 3085 | 3713 | orig_reserved = reserved->bytes_changed; |
|---|
| 3086 | | - ret = set_record_extent_bits(&BTRFS_I(inode)->io_tree, start, |
|---|
| 3714 | + ret = set_record_extent_bits(&inode->io_tree, start, |
|---|
| 3087 | 3715 | start + len -1, EXTENT_QGROUP_RESERVED, reserved); |
|---|
| 3088 | 3716 | |
|---|
| 3089 | 3717 | /* Newly reserved space */ |
|---|
| 3090 | 3718 | to_reserve = reserved->bytes_changed - orig_reserved; |
|---|
| 3091 | | - trace_btrfs_qgroup_reserve_data(inode, start, len, |
|---|
| 3719 | + trace_btrfs_qgroup_reserve_data(&inode->vfs_inode, start, len, |
|---|
| 3092 | 3720 | to_reserve, QGROUP_RESERVE); |
|---|
| 3093 | 3721 | if (ret < 0) |
|---|
| 3094 | | - goto cleanup; |
|---|
| 3722 | + goto out; |
|---|
| 3095 | 3723 | ret = qgroup_reserve(root, to_reserve, true, BTRFS_QGROUP_RSV_DATA); |
|---|
| 3096 | 3724 | if (ret < 0) |
|---|
| 3097 | 3725 | goto cleanup; |
|---|
| .. | .. |
|---|
| 3099 | 3727 | return ret; |
|---|
| 3100 | 3728 | |
|---|
| 3101 | 3729 | cleanup: |
|---|
| 3102 | | - /* cleanup *ALL* already reserved ranges */ |
|---|
| 3103 | | - ULIST_ITER_INIT(&uiter); |
|---|
| 3104 | | - while ((unode = ulist_next(&reserved->range_changed, &uiter))) |
|---|
| 3105 | | - clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val, |
|---|
| 3106 | | - unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL); |
|---|
| 3107 | | - /* Also free data bytes of already reserved one */ |
|---|
| 3108 | | - btrfs_qgroup_free_refroot(root->fs_info, root->root_key.objectid, |
|---|
| 3109 | | - orig_reserved, BTRFS_QGROUP_RSV_DATA); |
|---|
| 3110 | | - extent_changeset_release(reserved); |
|---|
| 3730 | + qgroup_unreserve_range(inode, reserved, start, len); |
|---|
| 3731 | +out: |
|---|
| 3732 | + if (new_reserved) { |
|---|
| 3733 | + extent_changeset_release(reserved); |
|---|
| 3734 | + kfree(reserved); |
|---|
| 3735 | + *reserved_ret = NULL; |
|---|
| 3736 | + } |
|---|
| 3111 | 3737 | return ret; |
|---|
| 3112 | 3738 | } |
|---|
| 3113 | 3739 | |
|---|
| 3740 | +/* |
|---|
| 3741 | + * Reserve qgroup space for range [start, start + len). |
|---|
| 3742 | + * |
|---|
| 3743 | + * This function will either reserve space from related qgroups or do nothing |
|---|
| 3744 | + * if the range is already reserved. |
|---|
| 3745 | + * |
|---|
| 3746 | + * Return 0 for successful reservation |
|---|
| 3747 | + * Return <0 for error (including -EQUOT) |
|---|
| 3748 | + * |
|---|
| 3749 | + * NOTE: This function may sleep for memory allocation, dirty page flushing and |
|---|
| 3750 | + * commit transaction. So caller should not hold any dirty page locked. |
|---|
| 3751 | + */ |
|---|
| 3752 | +int btrfs_qgroup_reserve_data(struct btrfs_inode *inode, |
|---|
| 3753 | + struct extent_changeset **reserved_ret, u64 start, |
|---|
| 3754 | + u64 len) |
|---|
| 3755 | +{ |
|---|
| 3756 | + int ret; |
|---|
| 3757 | + |
|---|
| 3758 | + ret = qgroup_reserve_data(inode, reserved_ret, start, len); |
|---|
| 3759 | + if (ret <= 0 && ret != -EDQUOT) |
|---|
| 3760 | + return ret; |
|---|
| 3761 | + |
|---|
| 3762 | + ret = try_flush_qgroup(inode->root); |
|---|
| 3763 | + if (ret < 0) |
|---|
| 3764 | + return ret; |
|---|
| 3765 | + return qgroup_reserve_data(inode, reserved_ret, start, len); |
|---|
| 3766 | +} |
|---|
| 3767 | + |
|---|
| 3114 | 3768 | /* Free ranges specified by @reserved, normally in error path */ |
|---|
| 3115 | | -static int qgroup_free_reserved_data(struct inode *inode, |
|---|
| 3769 | +static int qgroup_free_reserved_data(struct btrfs_inode *inode, |
|---|
| 3116 | 3770 | struct extent_changeset *reserved, u64 start, u64 len) |
|---|
| 3117 | 3771 | { |
|---|
| 3118 | | - struct btrfs_root *root = BTRFS_I(inode)->root; |
|---|
| 3772 | + struct btrfs_root *root = inode->root; |
|---|
| 3119 | 3773 | struct ulist_node *unode; |
|---|
| 3120 | 3774 | struct ulist_iterator uiter; |
|---|
| 3121 | 3775 | struct extent_changeset changeset; |
|---|
| .. | .. |
|---|
| 3151 | 3805 | * EXTENT_QGROUP_RESERVED, we won't double free. |
|---|
| 3152 | 3806 | * So not need to rush. |
|---|
| 3153 | 3807 | */ |
|---|
| 3154 | | - ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, |
|---|
| 3155 | | - free_start, free_start + free_len - 1, |
|---|
| 3808 | + ret = clear_record_extent_bits(&inode->io_tree, free_start, |
|---|
| 3809 | + free_start + free_len - 1, |
|---|
| 3156 | 3810 | EXTENT_QGROUP_RESERVED, &changeset); |
|---|
| 3157 | 3811 | if (ret < 0) |
|---|
| 3158 | 3812 | goto out; |
|---|
| 3159 | 3813 | freed += changeset.bytes_changed; |
|---|
| 3160 | 3814 | } |
|---|
| 3161 | | - btrfs_qgroup_free_refroot(root->fs_info, root->objectid, freed, |
|---|
| 3815 | + btrfs_qgroup_free_refroot(root->fs_info, root->root_key.objectid, freed, |
|---|
| 3162 | 3816 | BTRFS_QGROUP_RSV_DATA); |
|---|
| 3163 | 3817 | ret = freed; |
|---|
| 3164 | 3818 | out: |
|---|
| .. | .. |
|---|
| 3166 | 3820 | return ret; |
|---|
| 3167 | 3821 | } |
|---|
| 3168 | 3822 | |
|---|
| 3169 | | -static int __btrfs_qgroup_release_data(struct inode *inode, |
|---|
| 3823 | +static int __btrfs_qgroup_release_data(struct btrfs_inode *inode, |
|---|
| 3170 | 3824 | struct extent_changeset *reserved, u64 start, u64 len, |
|---|
| 3171 | 3825 | int free) |
|---|
| 3172 | 3826 | { |
|---|
| .. | .. |
|---|
| 3174 | 3828 | int trace_op = QGROUP_RELEASE; |
|---|
| 3175 | 3829 | int ret; |
|---|
| 3176 | 3830 | |
|---|
| 3177 | | - if (!test_bit(BTRFS_FS_QUOTA_ENABLED, |
|---|
| 3178 | | - &BTRFS_I(inode)->root->fs_info->flags)) |
|---|
| 3831 | + if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &inode->root->fs_info->flags)) |
|---|
| 3179 | 3832 | return 0; |
|---|
| 3180 | 3833 | |
|---|
| 3181 | 3834 | /* In release case, we shouldn't have @reserved */ |
|---|
| .. | .. |
|---|
| 3183 | 3836 | if (free && reserved) |
|---|
| 3184 | 3837 | return qgroup_free_reserved_data(inode, reserved, start, len); |
|---|
| 3185 | 3838 | extent_changeset_init(&changeset); |
|---|
| 3186 | | - ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, start, |
|---|
| 3187 | | - start + len -1, EXTENT_QGROUP_RESERVED, &changeset); |
|---|
| 3839 | + ret = clear_record_extent_bits(&inode->io_tree, start, start + len -1, |
|---|
| 3840 | + EXTENT_QGROUP_RESERVED, &changeset); |
|---|
| 3188 | 3841 | if (ret < 0) |
|---|
| 3189 | 3842 | goto out; |
|---|
| 3190 | 3843 | |
|---|
| 3191 | 3844 | if (free) |
|---|
| 3192 | 3845 | trace_op = QGROUP_FREE; |
|---|
| 3193 | | - trace_btrfs_qgroup_release_data(inode, start, len, |
|---|
| 3846 | + trace_btrfs_qgroup_release_data(&inode->vfs_inode, start, len, |
|---|
| 3194 | 3847 | changeset.bytes_changed, trace_op); |
|---|
| 3195 | 3848 | if (free) |
|---|
| 3196 | | - btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info, |
|---|
| 3197 | | - BTRFS_I(inode)->root->objectid, |
|---|
| 3849 | + btrfs_qgroup_free_refroot(inode->root->fs_info, |
|---|
| 3850 | + inode->root->root_key.objectid, |
|---|
| 3198 | 3851 | changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA); |
|---|
| 3199 | 3852 | ret = changeset.bytes_changed; |
|---|
| 3200 | 3853 | out: |
|---|
| .. | .. |
|---|
| 3214 | 3867 | * |
|---|
| 3215 | 3868 | * NOTE: This function may sleep for memory allocation. |
|---|
| 3216 | 3869 | */ |
|---|
| 3217 | | -int btrfs_qgroup_free_data(struct inode *inode, |
|---|
| 3870 | +int btrfs_qgroup_free_data(struct btrfs_inode *inode, |
|---|
| 3218 | 3871 | struct extent_changeset *reserved, u64 start, u64 len) |
|---|
| 3219 | 3872 | { |
|---|
| 3220 | 3873 | return __btrfs_qgroup_release_data(inode, reserved, start, len, 1); |
|---|
| .. | .. |
|---|
| 3235 | 3888 | * |
|---|
| 3236 | 3889 | * NOTE: This function may sleep for memory allocation. |
|---|
| 3237 | 3890 | */ |
|---|
| 3238 | | -int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len) |
|---|
| 3891 | +int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len) |
|---|
| 3239 | 3892 | { |
|---|
| 3240 | 3893 | return __btrfs_qgroup_release_data(inode, NULL, start, len, 0); |
|---|
| 3241 | 3894 | } |
|---|
| .. | .. |
|---|
| 3280 | 3933 | return num_bytes; |
|---|
| 3281 | 3934 | } |
|---|
| 3282 | 3935 | |
|---|
| 3283 | | -int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, |
|---|
| 3284 | | - enum btrfs_qgroup_rsv_type type, bool enforce) |
|---|
| 3936 | +int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, |
|---|
| 3937 | + enum btrfs_qgroup_rsv_type type, bool enforce) |
|---|
| 3285 | 3938 | { |
|---|
| 3286 | 3939 | struct btrfs_fs_info *fs_info = root->fs_info; |
|---|
| 3287 | 3940 | int ret; |
|---|
| 3288 | 3941 | |
|---|
| 3289 | 3942 | if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || |
|---|
| 3290 | | - !is_fstree(root->objectid) || num_bytes == 0) |
|---|
| 3943 | + !is_fstree(root->root_key.objectid) || num_bytes == 0) |
|---|
| 3291 | 3944 | return 0; |
|---|
| 3292 | 3945 | |
|---|
| 3293 | 3946 | BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); |
|---|
| .. | .. |
|---|
| 3307 | 3960 | return ret; |
|---|
| 3308 | 3961 | } |
|---|
| 3309 | 3962 | |
|---|
| 3963 | +int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, |
|---|
| 3964 | + enum btrfs_qgroup_rsv_type type, bool enforce) |
|---|
| 3965 | +{ |
|---|
| 3966 | + int ret; |
|---|
| 3967 | + |
|---|
| 3968 | + ret = btrfs_qgroup_reserve_meta(root, num_bytes, type, enforce); |
|---|
| 3969 | + if (ret <= 0 && ret != -EDQUOT) |
|---|
| 3970 | + return ret; |
|---|
| 3971 | + |
|---|
| 3972 | + ret = try_flush_qgroup(root); |
|---|
| 3973 | + if (ret < 0) |
|---|
| 3974 | + return ret; |
|---|
| 3975 | + return btrfs_qgroup_reserve_meta(root, num_bytes, type, enforce); |
|---|
| 3976 | +} |
|---|
| 3977 | + |
|---|
| 3310 | 3978 | void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root) |
|---|
| 3311 | 3979 | { |
|---|
| 3312 | 3980 | struct btrfs_fs_info *fs_info = root->fs_info; |
|---|
| 3313 | 3981 | |
|---|
| 3314 | 3982 | if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || |
|---|
| 3315 | | - !is_fstree(root->objectid)) |
|---|
| 3983 | + !is_fstree(root->root_key.objectid)) |
|---|
| 3316 | 3984 | return; |
|---|
| 3317 | 3985 | |
|---|
| 3318 | 3986 | /* TODO: Update trace point to handle such free */ |
|---|
| 3319 | 3987 | trace_qgroup_meta_free_all_pertrans(root); |
|---|
| 3320 | 3988 | /* Special value -1 means to free all reserved space */ |
|---|
| 3321 | | - btrfs_qgroup_free_refroot(fs_info, root->objectid, (u64)-1, |
|---|
| 3989 | + btrfs_qgroup_free_refroot(fs_info, root->root_key.objectid, (u64)-1, |
|---|
| 3322 | 3990 | BTRFS_QGROUP_RSV_META_PERTRANS); |
|---|
| 3323 | 3991 | } |
|---|
| 3324 | 3992 | |
|---|
| .. | .. |
|---|
| 3328 | 3996 | struct btrfs_fs_info *fs_info = root->fs_info; |
|---|
| 3329 | 3997 | |
|---|
| 3330 | 3998 | if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || |
|---|
| 3331 | | - !is_fstree(root->objectid)) |
|---|
| 3999 | + !is_fstree(root->root_key.objectid)) |
|---|
| 3332 | 4000 | return; |
|---|
| 3333 | 4001 | |
|---|
| 3334 | 4002 | /* |
|---|
| .. | .. |
|---|
| 3339 | 4007 | num_bytes = sub_root_meta_rsv(root, num_bytes, type); |
|---|
| 3340 | 4008 | BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); |
|---|
| 3341 | 4009 | trace_qgroup_meta_reserve(root, -(s64)num_bytes, type); |
|---|
| 3342 | | - btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes, type); |
|---|
| 4010 | + btrfs_qgroup_free_refroot(fs_info, root->root_key.objectid, |
|---|
| 4011 | + num_bytes, type); |
|---|
| 3343 | 4012 | } |
|---|
| 3344 | 4013 | |
|---|
| 3345 | 4014 | static void qgroup_convert_meta(struct btrfs_fs_info *fs_info, u64 ref_root, |
|---|
| 3346 | 4015 | int num_bytes) |
|---|
| 3347 | 4016 | { |
|---|
| 3348 | | - struct btrfs_root *quota_root = fs_info->quota_root; |
|---|
| 3349 | 4017 | struct btrfs_qgroup *qgroup; |
|---|
| 3350 | 4018 | struct ulist_node *unode; |
|---|
| 3351 | 4019 | struct ulist_iterator uiter; |
|---|
| .. | .. |
|---|
| 3353 | 4021 | |
|---|
| 3354 | 4022 | if (num_bytes == 0) |
|---|
| 3355 | 4023 | return; |
|---|
| 3356 | | - if (!quota_root) |
|---|
| 4024 | + if (!fs_info->quota_root) |
|---|
| 3357 | 4025 | return; |
|---|
| 3358 | 4026 | |
|---|
| 3359 | 4027 | spin_lock(&fs_info->qgroup_lock); |
|---|
| .. | .. |
|---|
| 3393 | 4061 | struct btrfs_fs_info *fs_info = root->fs_info; |
|---|
| 3394 | 4062 | |
|---|
| 3395 | 4063 | if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || |
|---|
| 3396 | | - !is_fstree(root->objectid)) |
|---|
| 4064 | + !is_fstree(root->root_key.objectid)) |
|---|
| 3397 | 4065 | return; |
|---|
| 3398 | 4066 | /* Same as btrfs_qgroup_free_meta_prealloc() */ |
|---|
| 3399 | 4067 | num_bytes = sub_root_meta_rsv(root, num_bytes, |
|---|
| 3400 | 4068 | BTRFS_QGROUP_RSV_META_PREALLOC); |
|---|
| 3401 | 4069 | trace_qgroup_meta_convert(root, num_bytes); |
|---|
| 3402 | | - qgroup_convert_meta(fs_info, root->objectid, num_bytes); |
|---|
| 4070 | + qgroup_convert_meta(fs_info, root->root_key.objectid, num_bytes); |
|---|
| 3403 | 4071 | } |
|---|
| 3404 | 4072 | |
|---|
| 3405 | 4073 | /* |
|---|
| 3406 | 4074 | * Check qgroup reserved space leaking, normally at destroy inode |
|---|
| 3407 | 4075 | * time |
|---|
| 3408 | 4076 | */ |
|---|
| 3409 | | -void btrfs_qgroup_check_reserved_leak(struct inode *inode) |
|---|
| 4077 | +void btrfs_qgroup_check_reserved_leak(struct btrfs_inode *inode) |
|---|
| 3410 | 4078 | { |
|---|
| 3411 | 4079 | struct extent_changeset changeset; |
|---|
| 3412 | 4080 | struct ulist_node *unode; |
|---|
| .. | .. |
|---|
| 3414 | 4082 | int ret; |
|---|
| 3415 | 4083 | |
|---|
| 3416 | 4084 | extent_changeset_init(&changeset); |
|---|
| 3417 | | - ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, 0, (u64)-1, |
|---|
| 4085 | + ret = clear_record_extent_bits(&inode->io_tree, 0, (u64)-1, |
|---|
| 3418 | 4086 | EXTENT_QGROUP_RESERVED, &changeset); |
|---|
| 3419 | 4087 | |
|---|
| 3420 | 4088 | WARN_ON(ret < 0); |
|---|
| 3421 | 4089 | if (WARN_ON(changeset.bytes_changed)) { |
|---|
| 3422 | 4090 | ULIST_ITER_INIT(&iter); |
|---|
| 3423 | 4091 | while ((unode = ulist_next(&changeset.range_changed, &iter))) { |
|---|
| 3424 | | - btrfs_warn(BTRFS_I(inode)->root->fs_info, |
|---|
| 3425 | | - "leaking qgroup reserved space, ino: %lu, start: %llu, end: %llu", |
|---|
| 3426 | | - inode->i_ino, unode->val, unode->aux); |
|---|
| 4092 | + btrfs_warn(inode->root->fs_info, |
|---|
| 4093 | + "leaking qgroup reserved space, ino: %llu, start: %llu, end: %llu", |
|---|
| 4094 | + btrfs_ino(inode), unode->val, unode->aux); |
|---|
| 3427 | 4095 | } |
|---|
| 3428 | | - btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info, |
|---|
| 3429 | | - BTRFS_I(inode)->root->objectid, |
|---|
| 4096 | + btrfs_qgroup_free_refroot(inode->root->fs_info, |
|---|
| 4097 | + inode->root->root_key.objectid, |
|---|
| 3430 | 4098 | changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA); |
|---|
| 3431 | 4099 | |
|---|
| 3432 | 4100 | } |
|---|
| 3433 | 4101 | extent_changeset_release(&changeset); |
|---|
| 3434 | 4102 | } |
|---|
| 4103 | + |
|---|
| 4104 | +void btrfs_qgroup_init_swapped_blocks( |
|---|
| 4105 | + struct btrfs_qgroup_swapped_blocks *swapped_blocks) |
|---|
| 4106 | +{ |
|---|
| 4107 | + int i; |
|---|
| 4108 | + |
|---|
| 4109 | + spin_lock_init(&swapped_blocks->lock); |
|---|
| 4110 | + for (i = 0; i < BTRFS_MAX_LEVEL; i++) |
|---|
| 4111 | + swapped_blocks->blocks[i] = RB_ROOT; |
|---|
| 4112 | + swapped_blocks->swapped = false; |
|---|
| 4113 | +} |
|---|
| 4114 | + |
|---|
| 4115 | +/* |
|---|
| 4116 | + * Delete all swapped blocks record of @root. |
|---|
| 4117 | + * Every record here means we skipped a full subtree scan for qgroup. |
|---|
| 4118 | + * |
|---|
| 4119 | + * Gets called when committing one transaction. |
|---|
| 4120 | + */ |
|---|
| 4121 | +void btrfs_qgroup_clean_swapped_blocks(struct btrfs_root *root) |
|---|
| 4122 | +{ |
|---|
| 4123 | + struct btrfs_qgroup_swapped_blocks *swapped_blocks; |
|---|
| 4124 | + int i; |
|---|
| 4125 | + |
|---|
| 4126 | + swapped_blocks = &root->swapped_blocks; |
|---|
| 4127 | + |
|---|
| 4128 | + spin_lock(&swapped_blocks->lock); |
|---|
| 4129 | + if (!swapped_blocks->swapped) |
|---|
| 4130 | + goto out; |
|---|
| 4131 | + for (i = 0; i < BTRFS_MAX_LEVEL; i++) { |
|---|
| 4132 | + struct rb_root *cur_root = &swapped_blocks->blocks[i]; |
|---|
| 4133 | + struct btrfs_qgroup_swapped_block *entry; |
|---|
| 4134 | + struct btrfs_qgroup_swapped_block *next; |
|---|
| 4135 | + |
|---|
| 4136 | + rbtree_postorder_for_each_entry_safe(entry, next, cur_root, |
|---|
| 4137 | + node) |
|---|
| 4138 | + kfree(entry); |
|---|
| 4139 | + swapped_blocks->blocks[i] = RB_ROOT; |
|---|
| 4140 | + } |
|---|
| 4141 | + swapped_blocks->swapped = false; |
|---|
| 4142 | +out: |
|---|
| 4143 | + spin_unlock(&swapped_blocks->lock); |
|---|
| 4144 | +} |
|---|
| 4145 | + |
|---|
| 4146 | +/* |
|---|
| 4147 | + * Add subtree roots record into @subvol_root. |
|---|
| 4148 | + * |
|---|
| 4149 | + * @subvol_root: tree root of the subvolume tree get swapped |
|---|
| 4150 | + * @bg: block group under balance |
|---|
| 4151 | + * @subvol_parent/slot: pointer to the subtree root in subvolume tree |
|---|
| 4152 | + * @reloc_parent/slot: pointer to the subtree root in reloc tree |
|---|
| 4153 | + * BOTH POINTERS ARE BEFORE TREE SWAP |
|---|
| 4154 | + * @last_snapshot: last snapshot generation of the subvolume tree |
|---|
| 4155 | + */ |
|---|
| 4156 | +int btrfs_qgroup_add_swapped_blocks(struct btrfs_trans_handle *trans, |
|---|
| 4157 | + struct btrfs_root *subvol_root, |
|---|
| 4158 | + struct btrfs_block_group *bg, |
|---|
| 4159 | + struct extent_buffer *subvol_parent, int subvol_slot, |
|---|
| 4160 | + struct extent_buffer *reloc_parent, int reloc_slot, |
|---|
| 4161 | + u64 last_snapshot) |
|---|
| 4162 | +{ |
|---|
| 4163 | + struct btrfs_fs_info *fs_info = subvol_root->fs_info; |
|---|
| 4164 | + struct btrfs_qgroup_swapped_blocks *blocks = &subvol_root->swapped_blocks; |
|---|
| 4165 | + struct btrfs_qgroup_swapped_block *block; |
|---|
| 4166 | + struct rb_node **cur; |
|---|
| 4167 | + struct rb_node *parent = NULL; |
|---|
| 4168 | + int level = btrfs_header_level(subvol_parent) - 1; |
|---|
| 4169 | + int ret = 0; |
|---|
| 4170 | + |
|---|
| 4171 | + if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) |
|---|
| 4172 | + return 0; |
|---|
| 4173 | + |
|---|
| 4174 | + if (btrfs_node_ptr_generation(subvol_parent, subvol_slot) > |
|---|
| 4175 | + btrfs_node_ptr_generation(reloc_parent, reloc_slot)) { |
|---|
| 4176 | + btrfs_err_rl(fs_info, |
|---|
| 4177 | + "%s: bad parameter order, subvol_gen=%llu reloc_gen=%llu", |
|---|
| 4178 | + __func__, |
|---|
| 4179 | + btrfs_node_ptr_generation(subvol_parent, subvol_slot), |
|---|
| 4180 | + btrfs_node_ptr_generation(reloc_parent, reloc_slot)); |
|---|
| 4181 | + return -EUCLEAN; |
|---|
| 4182 | + } |
|---|
| 4183 | + |
|---|
| 4184 | + block = kmalloc(sizeof(*block), GFP_NOFS); |
|---|
| 4185 | + if (!block) { |
|---|
| 4186 | + ret = -ENOMEM; |
|---|
| 4187 | + goto out; |
|---|
| 4188 | + } |
|---|
| 4189 | + |
|---|
| 4190 | + /* |
|---|
| 4191 | + * @reloc_parent/slot is still before swap, while @block is going to |
|---|
| 4192 | + * record the bytenr after swap, so we do the swap here. |
|---|
| 4193 | + */ |
|---|
| 4194 | + block->subvol_bytenr = btrfs_node_blockptr(reloc_parent, reloc_slot); |
|---|
| 4195 | + block->subvol_generation = btrfs_node_ptr_generation(reloc_parent, |
|---|
| 4196 | + reloc_slot); |
|---|
| 4197 | + block->reloc_bytenr = btrfs_node_blockptr(subvol_parent, subvol_slot); |
|---|
| 4198 | + block->reloc_generation = btrfs_node_ptr_generation(subvol_parent, |
|---|
| 4199 | + subvol_slot); |
|---|
| 4200 | + block->last_snapshot = last_snapshot; |
|---|
| 4201 | + block->level = level; |
|---|
| 4202 | + |
|---|
| 4203 | + /* |
|---|
| 4204 | + * If we have bg == NULL, we're called from btrfs_recover_relocation(), |
|---|
| 4205 | + * no one else can modify tree blocks thus we qgroup will not change |
|---|
| 4206 | + * no matter the value of trace_leaf. |
|---|
| 4207 | + */ |
|---|
| 4208 | + if (bg && bg->flags & BTRFS_BLOCK_GROUP_DATA) |
|---|
| 4209 | + block->trace_leaf = true; |
|---|
| 4210 | + else |
|---|
| 4211 | + block->trace_leaf = false; |
|---|
| 4212 | + btrfs_node_key_to_cpu(reloc_parent, &block->first_key, reloc_slot); |
|---|
| 4213 | + |
|---|
| 4214 | + /* Insert @block into @blocks */ |
|---|
| 4215 | + spin_lock(&blocks->lock); |
|---|
| 4216 | + cur = &blocks->blocks[level].rb_node; |
|---|
| 4217 | + while (*cur) { |
|---|
| 4218 | + struct btrfs_qgroup_swapped_block *entry; |
|---|
| 4219 | + |
|---|
| 4220 | + parent = *cur; |
|---|
| 4221 | + entry = rb_entry(parent, struct btrfs_qgroup_swapped_block, |
|---|
| 4222 | + node); |
|---|
| 4223 | + |
|---|
| 4224 | + if (entry->subvol_bytenr < block->subvol_bytenr) { |
|---|
| 4225 | + cur = &(*cur)->rb_left; |
|---|
| 4226 | + } else if (entry->subvol_bytenr > block->subvol_bytenr) { |
|---|
| 4227 | + cur = &(*cur)->rb_right; |
|---|
| 4228 | + } else { |
|---|
| 4229 | + if (entry->subvol_generation != |
|---|
| 4230 | + block->subvol_generation || |
|---|
| 4231 | + entry->reloc_bytenr != block->reloc_bytenr || |
|---|
| 4232 | + entry->reloc_generation != |
|---|
| 4233 | + block->reloc_generation) { |
|---|
| 4234 | + /* |
|---|
| 4235 | + * Duplicated but mismatch entry found. |
|---|
| 4236 | + * Shouldn't happen. |
|---|
| 4237 | + * |
|---|
| 4238 | + * Marking qgroup inconsistent should be enough |
|---|
| 4239 | + * for end users. |
|---|
| 4240 | + */ |
|---|
| 4241 | + WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); |
|---|
| 4242 | + ret = -EEXIST; |
|---|
| 4243 | + } |
|---|
| 4244 | + kfree(block); |
|---|
| 4245 | + goto out_unlock; |
|---|
| 4246 | + } |
|---|
| 4247 | + } |
|---|
| 4248 | + rb_link_node(&block->node, parent, cur); |
|---|
| 4249 | + rb_insert_color(&block->node, &blocks->blocks[level]); |
|---|
| 4250 | + blocks->swapped = true; |
|---|
| 4251 | +out_unlock: |
|---|
| 4252 | + spin_unlock(&blocks->lock); |
|---|
| 4253 | +out: |
|---|
| 4254 | + if (ret < 0) |
|---|
| 4255 | + fs_info->qgroup_flags |= |
|---|
| 4256 | + BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; |
|---|
| 4257 | + return ret; |
|---|
| 4258 | +} |
|---|
| 4259 | + |
|---|
| 4260 | +/* |
|---|
| 4261 | + * Check if the tree block is a subtree root, and if so do the needed |
|---|
| 4262 | + * delayed subtree trace for qgroup. |
|---|
| 4263 | + * |
|---|
| 4264 | + * This is called during btrfs_cow_block(). |
|---|
| 4265 | + */ |
|---|
| 4266 | +int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans, |
|---|
| 4267 | + struct btrfs_root *root, |
|---|
| 4268 | + struct extent_buffer *subvol_eb) |
|---|
| 4269 | +{ |
|---|
| 4270 | + struct btrfs_fs_info *fs_info = root->fs_info; |
|---|
| 4271 | + struct btrfs_qgroup_swapped_blocks *blocks = &root->swapped_blocks; |
|---|
| 4272 | + struct btrfs_qgroup_swapped_block *block; |
|---|
| 4273 | + struct extent_buffer *reloc_eb = NULL; |
|---|
| 4274 | + struct rb_node *node; |
|---|
| 4275 | + bool found = false; |
|---|
| 4276 | + bool swapped = false; |
|---|
| 4277 | + int level = btrfs_header_level(subvol_eb); |
|---|
| 4278 | + int ret = 0; |
|---|
| 4279 | + int i; |
|---|
| 4280 | + |
|---|
| 4281 | + if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) |
|---|
| 4282 | + return 0; |
|---|
| 4283 | + if (!is_fstree(root->root_key.objectid) || !root->reloc_root) |
|---|
| 4284 | + return 0; |
|---|
| 4285 | + |
|---|
| 4286 | + spin_lock(&blocks->lock); |
|---|
| 4287 | + if (!blocks->swapped) { |
|---|
| 4288 | + spin_unlock(&blocks->lock); |
|---|
| 4289 | + return 0; |
|---|
| 4290 | + } |
|---|
| 4291 | + node = blocks->blocks[level].rb_node; |
|---|
| 4292 | + |
|---|
| 4293 | + while (node) { |
|---|
| 4294 | + block = rb_entry(node, struct btrfs_qgroup_swapped_block, node); |
|---|
| 4295 | + if (block->subvol_bytenr < subvol_eb->start) { |
|---|
| 4296 | + node = node->rb_left; |
|---|
| 4297 | + } else if (block->subvol_bytenr > subvol_eb->start) { |
|---|
| 4298 | + node = node->rb_right; |
|---|
| 4299 | + } else { |
|---|
| 4300 | + found = true; |
|---|
| 4301 | + break; |
|---|
| 4302 | + } |
|---|
| 4303 | + } |
|---|
| 4304 | + if (!found) { |
|---|
| 4305 | + spin_unlock(&blocks->lock); |
|---|
| 4306 | + goto out; |
|---|
| 4307 | + } |
|---|
| 4308 | + /* Found one, remove it from @blocks first and update blocks->swapped */ |
|---|
| 4309 | + rb_erase(&block->node, &blocks->blocks[level]); |
|---|
| 4310 | + for (i = 0; i < BTRFS_MAX_LEVEL; i++) { |
|---|
| 4311 | + if (RB_EMPTY_ROOT(&blocks->blocks[i])) { |
|---|
| 4312 | + swapped = true; |
|---|
| 4313 | + break; |
|---|
| 4314 | + } |
|---|
| 4315 | + } |
|---|
| 4316 | + blocks->swapped = swapped; |
|---|
| 4317 | + spin_unlock(&blocks->lock); |
|---|
| 4318 | + |
|---|
| 4319 | + /* Read out reloc subtree root */ |
|---|
| 4320 | + reloc_eb = read_tree_block(fs_info, block->reloc_bytenr, |
|---|
| 4321 | + block->reloc_generation, block->level, |
|---|
| 4322 | + &block->first_key); |
|---|
| 4323 | + if (IS_ERR(reloc_eb)) { |
|---|
| 4324 | + ret = PTR_ERR(reloc_eb); |
|---|
| 4325 | + reloc_eb = NULL; |
|---|
| 4326 | + goto free_out; |
|---|
| 4327 | + } |
|---|
| 4328 | + if (!extent_buffer_uptodate(reloc_eb)) { |
|---|
| 4329 | + ret = -EIO; |
|---|
| 4330 | + goto free_out; |
|---|
| 4331 | + } |
|---|
| 4332 | + |
|---|
| 4333 | + ret = qgroup_trace_subtree_swap(trans, reloc_eb, subvol_eb, |
|---|
| 4334 | + block->last_snapshot, block->trace_leaf); |
|---|
| 4335 | +free_out: |
|---|
| 4336 | + kfree(block); |
|---|
| 4337 | + free_extent_buffer(reloc_eb); |
|---|
| 4338 | +out: |
|---|
| 4339 | + if (ret < 0) { |
|---|
| 4340 | + btrfs_err_rl(fs_info, |
|---|
| 4341 | + "failed to account subtree at bytenr %llu: %d", |
|---|
| 4342 | + subvol_eb->start, ret); |
|---|
| 4343 | + fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; |
|---|
| 4344 | + } |
|---|
| 4345 | + return ret; |
|---|
| 4346 | +} |
|---|
| 4347 | + |
|---|
| 4348 | +void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans) |
|---|
| 4349 | +{ |
|---|
| 4350 | + struct btrfs_qgroup_extent_record *entry; |
|---|
| 4351 | + struct btrfs_qgroup_extent_record *next; |
|---|
| 4352 | + struct rb_root *root; |
|---|
| 4353 | + |
|---|
| 4354 | + root = &trans->delayed_refs.dirty_extent_root; |
|---|
| 4355 | + rbtree_postorder_for_each_entry_safe(entry, next, root, node) { |
|---|
| 4356 | + ulist_free(entry->old_roots); |
|---|
| 4357 | + kfree(entry); |
|---|
| 4358 | + } |
|---|
| 4359 | +} |
|---|