.. | .. |
---|
11 | 11 | #include <linux/slab.h> |
---|
12 | 12 | #include <linux/workqueue.h> |
---|
13 | 13 | #include <linux/btrfs.h> |
---|
14 | | -#include <linux/sizes.h> |
---|
| 14 | +#include <linux/sched/mm.h> |
---|
15 | 15 | |
---|
16 | 16 | #include "ctree.h" |
---|
17 | 17 | #include "transaction.h" |
---|
.. | .. |
---|
21 | 21 | #include "backref.h" |
---|
22 | 22 | #include "extent_io.h" |
---|
23 | 23 | #include "qgroup.h" |
---|
24 | | - |
---|
| 24 | +#include "block-group.h" |
---|
| 25 | +#include "sysfs.h" |
---|
25 | 26 | |
---|
26 | 27 | /* TODO XXX FIXME |
---|
27 | 28 | * - subvol delete -> delete when ref goes to 0? delete limits also? |
---|
.. | .. |
---|
30 | 31 | * - sync |
---|
31 | 32 | * - copy also limits on subvol creation |
---|
32 | 33 | * - limit |
---|
33 | | - * - caches fuer ulists |
---|
| 34 | + * - caches for ulists |
---|
34 | 35 | * - performance benchmarks |
---|
35 | 36 | * - check all ioctl parameters |
---|
36 | 37 | */ |
---|
.. | .. |
---|
220 | 221 | return qgroup; |
---|
221 | 222 | } |
---|
222 | 223 | |
---|
223 | | -static void __del_qgroup_rb(struct btrfs_qgroup *qgroup) |
---|
| 224 | +static void __del_qgroup_rb(struct btrfs_fs_info *fs_info, |
---|
| 225 | + struct btrfs_qgroup *qgroup) |
---|
224 | 226 | { |
---|
225 | 227 | struct btrfs_qgroup_list *list; |
---|
226 | 228 | |
---|
.. | .. |
---|
240 | 242 | list_del(&list->next_member); |
---|
241 | 243 | kfree(list); |
---|
242 | 244 | } |
---|
243 | | - kfree(qgroup); |
---|
244 | 245 | } |
---|
245 | 246 | |
---|
246 | 247 | /* must be called with qgroup_lock held */ |
---|
.. | .. |
---|
252 | 253 | return -ENOENT; |
---|
253 | 254 | |
---|
254 | 255 | rb_erase(&qgroup->node, &fs_info->qgroup_tree); |
---|
255 | | - __del_qgroup_rb(qgroup); |
---|
| 256 | + __del_qgroup_rb(fs_info, qgroup); |
---|
256 | 257 | return 0; |
---|
257 | 258 | } |
---|
258 | 259 | |
---|
.. | .. |
---|
351 | 352 | goto out; |
---|
352 | 353 | } |
---|
353 | 354 | |
---|
| 355 | + ret = btrfs_sysfs_add_qgroups(fs_info); |
---|
| 356 | + if (ret < 0) |
---|
| 357 | + goto out; |
---|
354 | 358 | /* default this to quota off, in case no status key is found */ |
---|
355 | 359 | fs_info->qgroup_flags = 0; |
---|
356 | 360 | |
---|
.. | .. |
---|
412 | 416 | goto out; |
---|
413 | 417 | } |
---|
414 | 418 | } |
---|
| 419 | + ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup); |
---|
| 420 | + if (ret < 0) |
---|
| 421 | + goto out; |
---|
| 422 | + |
---|
415 | 423 | switch (found_key.type) { |
---|
416 | 424 | case BTRFS_QGROUP_INFO_KEY: { |
---|
417 | 425 | struct btrfs_qgroup_info_item *ptr; |
---|
.. | .. |
---|
500 | 508 | ulist_free(fs_info->qgroup_ulist); |
---|
501 | 509 | fs_info->qgroup_ulist = NULL; |
---|
502 | 510 | fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; |
---|
| 511 | + btrfs_sysfs_del_qgroups(fs_info); |
---|
503 | 512 | } |
---|
504 | 513 | |
---|
505 | 514 | return ret < 0 ? ret : 0; |
---|
| 515 | +} |
---|
| 516 | + |
---|
| 517 | +/* |
---|
| 518 | + * Called in close_ctree() when quota is still enabled. This verifies we don't |
---|
| 519 | + * leak some reserved space. |
---|
| 520 | + * |
---|
| 521 | + * Return false if no reserved space is left. |
---|
| 522 | + * Return true if some reserved space is leaked. |
---|
| 523 | + */ |
---|
| 524 | +bool btrfs_check_quota_leak(struct btrfs_fs_info *fs_info) |
---|
| 525 | +{ |
---|
| 526 | + struct rb_node *node; |
---|
| 527 | + bool ret = false; |
---|
| 528 | + |
---|
| 529 | + if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) |
---|
| 530 | + return ret; |
---|
| 531 | + /* |
---|
| 532 | + * Since we're unmounting, there is no race and no need to grab qgroup |
---|
| 533 | + * lock. And here we don't go post-order to provide a more user |
---|
| 534 | + * friendly sorted result. |
---|
| 535 | + */ |
---|
| 536 | + for (node = rb_first(&fs_info->qgroup_tree); node; node = rb_next(node)) { |
---|
| 537 | + struct btrfs_qgroup *qgroup; |
---|
| 538 | + int i; |
---|
| 539 | + |
---|
| 540 | + qgroup = rb_entry(node, struct btrfs_qgroup, node); |
---|
| 541 | + for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++) { |
---|
| 542 | + if (qgroup->rsv.values[i]) { |
---|
| 543 | + ret = true; |
---|
| 544 | + btrfs_warn(fs_info, |
---|
| 545 | + "qgroup %hu/%llu has unreleased space, type %d rsv %llu", |
---|
| 546 | + btrfs_qgroup_level(qgroup->qgroupid), |
---|
| 547 | + btrfs_qgroup_subvolid(qgroup->qgroupid), |
---|
| 548 | + i, qgroup->rsv.values[i]); |
---|
| 549 | + } |
---|
| 550 | + } |
---|
| 551 | + } |
---|
| 552 | + return ret; |
---|
506 | 553 | } |
---|
507 | 554 | |
---|
508 | 555 | /* |
---|
.. | .. |
---|
519 | 566 | while ((n = rb_first(&fs_info->qgroup_tree))) { |
---|
520 | 567 | qgroup = rb_entry(n, struct btrfs_qgroup, node); |
---|
521 | 568 | rb_erase(n, &fs_info->qgroup_tree); |
---|
522 | | - __del_qgroup_rb(qgroup); |
---|
| 569 | + __del_qgroup_rb(fs_info, qgroup); |
---|
| 570 | + btrfs_sysfs_del_one_qgroup(fs_info, qgroup); |
---|
| 571 | + kfree(qgroup); |
---|
523 | 572 | } |
---|
524 | 573 | /* |
---|
525 | | - * we call btrfs_free_qgroup_config() when umounting |
---|
| 574 | + * We call btrfs_free_qgroup_config() when unmounting |
---|
526 | 575 | * filesystem and disabling quota, so we set qgroup_ulist |
---|
527 | 576 | * to be null here to avoid double free. |
---|
528 | 577 | */ |
---|
529 | 578 | ulist_free(fs_info->qgroup_ulist); |
---|
530 | 579 | fs_info->qgroup_ulist = NULL; |
---|
| 580 | + btrfs_sysfs_del_qgroups(fs_info); |
---|
531 | 581 | } |
---|
532 | 582 | |
---|
533 | 583 | static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src, |
---|
.. | .. |
---|
887 | 937 | struct btrfs_key found_key; |
---|
888 | 938 | struct btrfs_qgroup *qgroup = NULL; |
---|
889 | 939 | struct btrfs_trans_handle *trans = NULL; |
---|
| 940 | + struct ulist *ulist = NULL; |
---|
890 | 941 | int ret = 0; |
---|
891 | 942 | int slot; |
---|
| 943 | + |
---|
| 944 | + /* |
---|
| 945 | + * We need to have subvol_sem write locked, to prevent races between |
---|
| 946 | + * concurrent tasks trying to enable quotas, because we will unlock |
---|
| 947 | + * and relock qgroup_ioctl_lock before setting fs_info->quota_root |
---|
| 948 | + * and before setting BTRFS_FS_QUOTA_ENABLED. |
---|
| 949 | + */ |
---|
| 950 | + lockdep_assert_held_write(&fs_info->subvol_sem); |
---|
892 | 951 | |
---|
893 | 952 | mutex_lock(&fs_info->qgroup_ioctl_lock); |
---|
894 | 953 | if (fs_info->quota_root) |
---|
895 | 954 | goto out; |
---|
| 955 | + |
---|
| 956 | + ulist = ulist_alloc(GFP_KERNEL); |
---|
| 957 | + if (!ulist) { |
---|
| 958 | + ret = -ENOMEM; |
---|
| 959 | + goto out; |
---|
| 960 | + } |
---|
| 961 | + |
---|
| 962 | + ret = btrfs_sysfs_add_qgroups(fs_info); |
---|
| 963 | + if (ret < 0) |
---|
| 964 | + goto out; |
---|
| 965 | + |
---|
| 966 | + /* |
---|
| 967 | + * Unlock qgroup_ioctl_lock before starting the transaction. This is to |
---|
| 968 | + * avoid lock acquisition inversion problems (reported by lockdep) between |
---|
| 969 | + * qgroup_ioctl_lock and the vfs freeze semaphores, acquired when we |
---|
| 970 | + * start a transaction. |
---|
| 971 | + * After we started the transaction lock qgroup_ioctl_lock again and |
---|
| 972 | + * check if someone else created the quota root in the meanwhile. If so, |
---|
| 973 | + * just return success and release the transaction handle. |
---|
| 974 | + * |
---|
| 975 | + * Also we don't need to worry about someone else calling |
---|
| 976 | + * btrfs_sysfs_add_qgroups() after we unlock and getting an error because |
---|
| 977 | + * that function returns 0 (success) when the sysfs entries already exist. |
---|
| 978 | + */ |
---|
| 979 | + mutex_unlock(&fs_info->qgroup_ioctl_lock); |
---|
896 | 980 | |
---|
897 | 981 | /* |
---|
898 | 982 | * 1 for quota root item |
---|
.. | .. |
---|
903 | 987 | * would be a lot of overkill. |
---|
904 | 988 | */ |
---|
905 | 989 | trans = btrfs_start_transaction(tree_root, 2); |
---|
| 990 | + |
---|
| 991 | + mutex_lock(&fs_info->qgroup_ioctl_lock); |
---|
906 | 992 | if (IS_ERR(trans)) { |
---|
907 | 993 | ret = PTR_ERR(trans); |
---|
908 | 994 | trans = NULL; |
---|
909 | 995 | goto out; |
---|
910 | 996 | } |
---|
911 | 997 | |
---|
912 | | - fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL); |
---|
913 | | - if (!fs_info->qgroup_ulist) { |
---|
914 | | - ret = -ENOMEM; |
---|
915 | | - btrfs_abort_transaction(trans, ret); |
---|
| 998 | + if (fs_info->quota_root) |
---|
916 | 999 | goto out; |
---|
917 | | - } |
---|
| 1000 | + |
---|
| 1001 | + fs_info->qgroup_ulist = ulist; |
---|
| 1002 | + ulist = NULL; |
---|
918 | 1003 | |
---|
919 | 1004 | /* |
---|
920 | 1005 | * initially create the quota tree |
---|
921 | 1006 | */ |
---|
922 | | - quota_root = btrfs_create_tree(trans, fs_info, |
---|
923 | | - BTRFS_QUOTA_TREE_OBJECTID); |
---|
| 1007 | + quota_root = btrfs_create_tree(trans, BTRFS_QUOTA_TREE_OBJECTID); |
---|
924 | 1008 | if (IS_ERR(quota_root)) { |
---|
925 | 1009 | ret = PTR_ERR(quota_root); |
---|
926 | 1010 | btrfs_abort_transaction(trans, ret); |
---|
.. | .. |
---|
976 | 1060 | btrfs_item_key_to_cpu(leaf, &found_key, slot); |
---|
977 | 1061 | |
---|
978 | 1062 | if (found_key.type == BTRFS_ROOT_REF_KEY) { |
---|
| 1063 | + |
---|
| 1064 | + /* Release locks on tree_root before we access quota_root */ |
---|
| 1065 | + btrfs_release_path(path); |
---|
| 1066 | + |
---|
979 | 1067 | ret = add_qgroup_item(trans, quota_root, |
---|
980 | 1068 | found_key.offset); |
---|
981 | 1069 | if (ret) { |
---|
.. | .. |
---|
988 | 1076 | ret = PTR_ERR(qgroup); |
---|
989 | 1077 | btrfs_abort_transaction(trans, ret); |
---|
990 | 1078 | goto out_free_path; |
---|
| 1079 | + } |
---|
| 1080 | + ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup); |
---|
| 1081 | + if (ret < 0) { |
---|
| 1082 | + btrfs_abort_transaction(trans, ret); |
---|
| 1083 | + goto out_free_path; |
---|
| 1084 | + } |
---|
| 1085 | + ret = btrfs_search_slot_for_read(tree_root, &found_key, |
---|
| 1086 | + path, 1, 0); |
---|
| 1087 | + if (ret < 0) { |
---|
| 1088 | + btrfs_abort_transaction(trans, ret); |
---|
| 1089 | + goto out_free_path; |
---|
| 1090 | + } |
---|
| 1091 | + if (ret > 0) { |
---|
| 1092 | + /* |
---|
| 1093 | + * Shouldn't happen, but in case it does we |
---|
| 1094 | + * don't need to do the btrfs_next_item, just |
---|
| 1095 | + * continue. |
---|
| 1096 | + */ |
---|
| 1097 | + continue; |
---|
991 | 1098 | } |
---|
992 | 1099 | } |
---|
993 | 1100 | ret = btrfs_next_item(tree_root, path); |
---|
.. | .. |
---|
1013 | 1120 | btrfs_abort_transaction(trans, ret); |
---|
1014 | 1121 | goto out_free_path; |
---|
1015 | 1122 | } |
---|
| 1123 | + ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup); |
---|
| 1124 | + if (ret < 0) { |
---|
| 1125 | + btrfs_abort_transaction(trans, ret); |
---|
| 1126 | + goto out_free_path; |
---|
| 1127 | + } |
---|
1016 | 1128 | |
---|
| 1129 | + mutex_unlock(&fs_info->qgroup_ioctl_lock); |
---|
| 1130 | + /* |
---|
| 1131 | + * Commit the transaction while not holding qgroup_ioctl_lock, to avoid |
---|
| 1132 | + * a deadlock with tasks concurrently doing other qgroup operations, such |
---|
| 1133 | + * adding/removing qgroups or adding/deleting qgroup relations for example, |
---|
| 1134 | + * because all qgroup operations first start or join a transaction and then |
---|
| 1135 | + * lock the qgroup_ioctl_lock mutex. |
---|
| 1136 | + * We are safe from a concurrent task trying to enable quotas, by calling |
---|
| 1137 | + * this function, since we are serialized by fs_info->subvol_sem. |
---|
| 1138 | + */ |
---|
1017 | 1139 | ret = btrfs_commit_transaction(trans); |
---|
1018 | 1140 | trans = NULL; |
---|
| 1141 | + mutex_lock(&fs_info->qgroup_ioctl_lock); |
---|
1019 | 1142 | if (ret) |
---|
1020 | 1143 | goto out_free_path; |
---|
1021 | 1144 | |
---|
.. | .. |
---|
1035 | 1158 | fs_info->qgroup_rescan_running = true; |
---|
1036 | 1159 | btrfs_queue_work(fs_info->qgroup_rescan_workers, |
---|
1037 | 1160 | &fs_info->qgroup_rescan_work); |
---|
| 1161 | + } else { |
---|
| 1162 | + /* |
---|
| 1163 | + * We have set both BTRFS_FS_QUOTA_ENABLED and |
---|
| 1164 | + * BTRFS_QGROUP_STATUS_FLAG_ON, so we can only fail with |
---|
| 1165 | + * -EINPROGRESS. That can happen because someone started the |
---|
| 1166 | + * rescan worker by calling quota rescan ioctl before we |
---|
| 1167 | + * attempted to initialize the rescan worker. Failure due to |
---|
| 1168 | + * quotas disabled in the meanwhile is not possible, because |
---|
| 1169 | + * we are holding a write lock on fs_info->subvol_sem, which |
---|
| 1170 | + * is also acquired when disabling quotas. |
---|
| 1171 | + * Ignore such error, and any other error would need to undo |
---|
| 1172 | + * everything we did in the transaction we just committed. |
---|
| 1173 | + */ |
---|
| 1174 | + ASSERT(ret == -EINPROGRESS); |
---|
| 1175 | + ret = 0; |
---|
1038 | 1176 | } |
---|
1039 | 1177 | |
---|
1040 | 1178 | out_free_path: |
---|
1041 | 1179 | btrfs_free_path(path); |
---|
1042 | 1180 | out_free_root: |
---|
1043 | | - if (ret) { |
---|
1044 | | - free_extent_buffer(quota_root->node); |
---|
1045 | | - free_extent_buffer(quota_root->commit_root); |
---|
1046 | | - kfree(quota_root); |
---|
1047 | | - } |
---|
| 1181 | + if (ret) |
---|
| 1182 | + btrfs_put_root(quota_root); |
---|
1048 | 1183 | out: |
---|
1049 | 1184 | if (ret) { |
---|
1050 | 1185 | ulist_free(fs_info->qgroup_ulist); |
---|
1051 | 1186 | fs_info->qgroup_ulist = NULL; |
---|
1052 | | - if (trans) |
---|
1053 | | - btrfs_end_transaction(trans); |
---|
| 1187 | + btrfs_sysfs_del_qgroups(fs_info); |
---|
1054 | 1188 | } |
---|
1055 | 1189 | mutex_unlock(&fs_info->qgroup_ioctl_lock); |
---|
| 1190 | + if (ret && trans) |
---|
| 1191 | + btrfs_end_transaction(trans); |
---|
| 1192 | + else if (trans) |
---|
| 1193 | + ret = btrfs_end_transaction(trans); |
---|
| 1194 | + ulist_free(ulist); |
---|
1056 | 1195 | return ret; |
---|
1057 | 1196 | } |
---|
1058 | 1197 | |
---|
.. | .. |
---|
1062 | 1201 | struct btrfs_trans_handle *trans = NULL; |
---|
1063 | 1202 | int ret = 0; |
---|
1064 | 1203 | |
---|
| 1204 | + /* |
---|
| 1205 | + * We need to have subvol_sem write locked to prevent races with |
---|
| 1206 | + * snapshot creation. |
---|
| 1207 | + */ |
---|
| 1208 | + lockdep_assert_held_write(&fs_info->subvol_sem); |
---|
| 1209 | + |
---|
| 1210 | + /* |
---|
| 1211 | + * Lock the cleaner mutex to prevent races with concurrent relocation, |
---|
| 1212 | + * because relocation may be building backrefs for blocks of the quota |
---|
| 1213 | + * root while we are deleting the root. This is like dropping fs roots |
---|
| 1214 | + * of deleted snapshots/subvolumes, we need the same protection. |
---|
| 1215 | + * |
---|
| 1216 | + * This also prevents races between concurrent tasks trying to disable |
---|
| 1217 | + * quotas, because we will unlock and relock qgroup_ioctl_lock across |
---|
| 1218 | + * BTRFS_FS_QUOTA_ENABLED changes. |
---|
| 1219 | + */ |
---|
| 1220 | + mutex_lock(&fs_info->cleaner_mutex); |
---|
| 1221 | + |
---|
1065 | 1222 | mutex_lock(&fs_info->qgroup_ioctl_lock); |
---|
1066 | 1223 | if (!fs_info->quota_root) |
---|
1067 | 1224 | goto out; |
---|
| 1225 | + |
---|
| 1226 | + /* |
---|
| 1227 | + * Unlock the qgroup_ioctl_lock mutex before waiting for the rescan worker to |
---|
| 1228 | + * complete. Otherwise we can deadlock because btrfs_remove_qgroup() needs |
---|
| 1229 | + * to lock that mutex while holding a transaction handle and the rescan |
---|
| 1230 | + * worker needs to commit a transaction. |
---|
| 1231 | + */ |
---|
| 1232 | + mutex_unlock(&fs_info->qgroup_ioctl_lock); |
---|
| 1233 | + |
---|
| 1234 | + /* |
---|
| 1235 | + * Request qgroup rescan worker to complete and wait for it. This wait |
---|
| 1236 | + * must be done before transaction start for quota disable since it may |
---|
| 1237 | + * deadlock with transaction by the qgroup rescan worker. |
---|
| 1238 | + */ |
---|
| 1239 | + clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); |
---|
| 1240 | + btrfs_qgroup_wait_for_completion(fs_info, false); |
---|
1068 | 1241 | |
---|
1069 | 1242 | /* |
---|
1070 | 1243 | * 1 For the root item |
---|
1071 | 1244 | * |
---|
1072 | 1245 | * We should also reserve enough items for the quota tree deletion in |
---|
1073 | 1246 | * btrfs_clean_quota_tree but this is not done. |
---|
| 1247 | + * |
---|
| 1248 | + * Also, we must always start a transaction without holding the mutex |
---|
| 1249 | + * qgroup_ioctl_lock, see btrfs_quota_enable(). |
---|
1074 | 1250 | */ |
---|
1075 | 1251 | trans = btrfs_start_transaction(fs_info->tree_root, 1); |
---|
| 1252 | + |
---|
| 1253 | + mutex_lock(&fs_info->qgroup_ioctl_lock); |
---|
1076 | 1254 | if (IS_ERR(trans)) { |
---|
1077 | 1255 | ret = PTR_ERR(trans); |
---|
| 1256 | + trans = NULL; |
---|
| 1257 | + set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); |
---|
1078 | 1258 | goto out; |
---|
1079 | 1259 | } |
---|
1080 | 1260 | |
---|
1081 | | - clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); |
---|
1082 | | - btrfs_qgroup_wait_for_completion(fs_info, false); |
---|
| 1261 | + if (!fs_info->quota_root) |
---|
| 1262 | + goto out; |
---|
| 1263 | + |
---|
1083 | 1264 | spin_lock(&fs_info->qgroup_lock); |
---|
1084 | 1265 | quota_root = fs_info->quota_root; |
---|
1085 | 1266 | fs_info->quota_root = NULL; |
---|
.. | .. |
---|
1091 | 1272 | ret = btrfs_clean_quota_tree(trans, quota_root); |
---|
1092 | 1273 | if (ret) { |
---|
1093 | 1274 | btrfs_abort_transaction(trans, ret); |
---|
1094 | | - goto end_trans; |
---|
| 1275 | + goto out; |
---|
1095 | 1276 | } |
---|
1096 | 1277 | |
---|
1097 | 1278 | ret = btrfs_del_root(trans, "a_root->root_key); |
---|
1098 | 1279 | if (ret) { |
---|
1099 | 1280 | btrfs_abort_transaction(trans, ret); |
---|
1100 | | - goto end_trans; |
---|
| 1281 | + goto out; |
---|
1101 | 1282 | } |
---|
1102 | 1283 | |
---|
| 1284 | + spin_lock(&fs_info->trans_lock); |
---|
1103 | 1285 | list_del("a_root->dirty_list); |
---|
| 1286 | + spin_unlock(&fs_info->trans_lock); |
---|
1104 | 1287 | |
---|
1105 | 1288 | btrfs_tree_lock(quota_root->node); |
---|
1106 | | - clean_tree_block(fs_info, quota_root->node); |
---|
| 1289 | + btrfs_clean_tree_block(quota_root->node); |
---|
1107 | 1290 | btrfs_tree_unlock(quota_root->node); |
---|
1108 | 1291 | btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1); |
---|
1109 | 1292 | |
---|
1110 | | - free_extent_buffer(quota_root->node); |
---|
1111 | | - free_extent_buffer(quota_root->commit_root); |
---|
1112 | | - kfree(quota_root); |
---|
| 1293 | + btrfs_put_root(quota_root); |
---|
1113 | 1294 | |
---|
1114 | | -end_trans: |
---|
1115 | | - ret = btrfs_end_transaction(trans); |
---|
1116 | 1295 | out: |
---|
1117 | 1296 | mutex_unlock(&fs_info->qgroup_ioctl_lock); |
---|
| 1297 | + if (ret && trans) |
---|
| 1298 | + btrfs_end_transaction(trans); |
---|
| 1299 | + else if (trans) |
---|
| 1300 | + ret = btrfs_end_transaction(trans); |
---|
| 1301 | + mutex_unlock(&fs_info->cleaner_mutex); |
---|
| 1302 | + |
---|
1118 | 1303 | return ret; |
---|
1119 | 1304 | } |
---|
1120 | 1305 | |
---|
.. | .. |
---|
1129 | 1314 | * The easy accounting, we're updating qgroup relationship whose child qgroup |
---|
1130 | 1315 | * only has exclusive extents. |
---|
1131 | 1316 | * |
---|
1132 | | - * In this case, all exclsuive extents will also be exlusive for parent, so |
---|
| 1317 | + * In this case, all exclusive extents will also be exclusive for parent, so |
---|
1133 | 1318 | * excl/rfer just get added/removed. |
---|
1134 | 1319 | * |
---|
1135 | 1320 | * So is qgroup reservation space, which should also be added/removed to |
---|
.. | .. |
---|
1246 | 1431 | u64 dst) |
---|
1247 | 1432 | { |
---|
1248 | 1433 | struct btrfs_fs_info *fs_info = trans->fs_info; |
---|
1249 | | - struct btrfs_root *quota_root; |
---|
1250 | 1434 | struct btrfs_qgroup *parent; |
---|
1251 | 1435 | struct btrfs_qgroup *member; |
---|
1252 | 1436 | struct btrfs_qgroup_list *list; |
---|
1253 | 1437 | struct ulist *tmp; |
---|
| 1438 | + unsigned int nofs_flag; |
---|
1254 | 1439 | int ret = 0; |
---|
1255 | 1440 | |
---|
1256 | 1441 | /* Check the level of src and dst first */ |
---|
1257 | 1442 | if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst)) |
---|
1258 | 1443 | return -EINVAL; |
---|
1259 | 1444 | |
---|
| 1445 | + /* We hold a transaction handle open, must do a NOFS allocation. */ |
---|
| 1446 | + nofs_flag = memalloc_nofs_save(); |
---|
1260 | 1447 | tmp = ulist_alloc(GFP_KERNEL); |
---|
| 1448 | + memalloc_nofs_restore(nofs_flag); |
---|
1261 | 1449 | if (!tmp) |
---|
1262 | 1450 | return -ENOMEM; |
---|
1263 | 1451 | |
---|
1264 | 1452 | mutex_lock(&fs_info->qgroup_ioctl_lock); |
---|
1265 | | - quota_root = fs_info->quota_root; |
---|
1266 | | - if (!quota_root) { |
---|
1267 | | - ret = -EINVAL; |
---|
| 1453 | + if (!fs_info->quota_root) { |
---|
| 1454 | + ret = -ENOTCONN; |
---|
1268 | 1455 | goto out; |
---|
1269 | 1456 | } |
---|
1270 | 1457 | member = find_qgroup_rb(fs_info, src); |
---|
.. | .. |
---|
1310 | 1497 | u64 dst) |
---|
1311 | 1498 | { |
---|
1312 | 1499 | struct btrfs_fs_info *fs_info = trans->fs_info; |
---|
1313 | | - struct btrfs_root *quota_root; |
---|
1314 | 1500 | struct btrfs_qgroup *parent; |
---|
1315 | 1501 | struct btrfs_qgroup *member; |
---|
1316 | 1502 | struct btrfs_qgroup_list *list; |
---|
1317 | 1503 | struct ulist *tmp; |
---|
| 1504 | + bool found = false; |
---|
| 1505 | + unsigned int nofs_flag; |
---|
1318 | 1506 | int ret = 0; |
---|
1319 | | - int err; |
---|
| 1507 | + int ret2; |
---|
1320 | 1508 | |
---|
| 1509 | + /* We hold a transaction handle open, must do a NOFS allocation. */ |
---|
| 1510 | + nofs_flag = memalloc_nofs_save(); |
---|
1321 | 1511 | tmp = ulist_alloc(GFP_KERNEL); |
---|
| 1512 | + memalloc_nofs_restore(nofs_flag); |
---|
1322 | 1513 | if (!tmp) |
---|
1323 | 1514 | return -ENOMEM; |
---|
1324 | 1515 | |
---|
1325 | | - quota_root = fs_info->quota_root; |
---|
1326 | | - if (!quota_root) { |
---|
1327 | | - ret = -EINVAL; |
---|
| 1516 | + if (!fs_info->quota_root) { |
---|
| 1517 | + ret = -ENOTCONN; |
---|
1328 | 1518 | goto out; |
---|
1329 | 1519 | } |
---|
1330 | 1520 | |
---|
1331 | 1521 | member = find_qgroup_rb(fs_info, src); |
---|
1332 | 1522 | parent = find_qgroup_rb(fs_info, dst); |
---|
1333 | | - if (!member || !parent) { |
---|
1334 | | - ret = -EINVAL; |
---|
1335 | | - goto out; |
---|
1336 | | - } |
---|
| 1523 | + /* |
---|
| 1524 | + * The parent/member pair doesn't exist, then try to delete the dead |
---|
| 1525 | + * relation items only. |
---|
| 1526 | + */ |
---|
| 1527 | + if (!member || !parent) |
---|
| 1528 | + goto delete_item; |
---|
1337 | 1529 | |
---|
1338 | 1530 | /* check if such qgroup relation exist firstly */ |
---|
1339 | 1531 | list_for_each_entry(list, &member->groups, next_group) { |
---|
1340 | | - if (list->group == parent) |
---|
1341 | | - goto exist; |
---|
| 1532 | + if (list->group == parent) { |
---|
| 1533 | + found = true; |
---|
| 1534 | + break; |
---|
| 1535 | + } |
---|
1342 | 1536 | } |
---|
1343 | | - ret = -ENOENT; |
---|
1344 | | - goto out; |
---|
1345 | | -exist: |
---|
1346 | | - ret = del_qgroup_relation_item(trans, src, dst); |
---|
1347 | | - err = del_qgroup_relation_item(trans, dst, src); |
---|
1348 | | - if (err && !ret) |
---|
1349 | | - ret = err; |
---|
1350 | 1537 | |
---|
1351 | | - spin_lock(&fs_info->qgroup_lock); |
---|
1352 | | - del_relation_rb(fs_info, src, dst); |
---|
1353 | | - ret = quick_update_accounting(fs_info, tmp, src, dst, -1); |
---|
1354 | | - spin_unlock(&fs_info->qgroup_lock); |
---|
| 1538 | +delete_item: |
---|
| 1539 | + ret = del_qgroup_relation_item(trans, src, dst); |
---|
| 1540 | + if (ret < 0 && ret != -ENOENT) |
---|
| 1541 | + goto out; |
---|
| 1542 | + ret2 = del_qgroup_relation_item(trans, dst, src); |
---|
| 1543 | + if (ret2 < 0 && ret2 != -ENOENT) |
---|
| 1544 | + goto out; |
---|
| 1545 | + |
---|
| 1546 | + /* At least one deletion succeeded, return 0 */ |
---|
| 1547 | + if (!ret || !ret2) |
---|
| 1548 | + ret = 0; |
---|
| 1549 | + |
---|
| 1550 | + if (found) { |
---|
| 1551 | + spin_lock(&fs_info->qgroup_lock); |
---|
| 1552 | + del_relation_rb(fs_info, src, dst); |
---|
| 1553 | + ret = quick_update_accounting(fs_info, tmp, src, dst, -1); |
---|
| 1554 | + spin_unlock(&fs_info->qgroup_lock); |
---|
| 1555 | + } |
---|
1355 | 1556 | out: |
---|
1356 | 1557 | ulist_free(tmp); |
---|
1357 | 1558 | return ret; |
---|
.. | .. |
---|
1378 | 1579 | int ret = 0; |
---|
1379 | 1580 | |
---|
1380 | 1581 | mutex_lock(&fs_info->qgroup_ioctl_lock); |
---|
1381 | | - quota_root = fs_info->quota_root; |
---|
1382 | | - if (!quota_root) { |
---|
1383 | | - ret = -EINVAL; |
---|
| 1582 | + if (!fs_info->quota_root) { |
---|
| 1583 | + ret = -ENOTCONN; |
---|
1384 | 1584 | goto out; |
---|
1385 | 1585 | } |
---|
| 1586 | + quota_root = fs_info->quota_root; |
---|
1386 | 1587 | qgroup = find_qgroup_rb(fs_info, qgroupid); |
---|
1387 | 1588 | if (qgroup) { |
---|
1388 | 1589 | ret = -EEXIST; |
---|
.. | .. |
---|
1397 | 1598 | qgroup = add_qgroup_rb(fs_info, qgroupid); |
---|
1398 | 1599 | spin_unlock(&fs_info->qgroup_lock); |
---|
1399 | 1600 | |
---|
1400 | | - if (IS_ERR(qgroup)) |
---|
| 1601 | + if (IS_ERR(qgroup)) { |
---|
1401 | 1602 | ret = PTR_ERR(qgroup); |
---|
| 1603 | + goto out; |
---|
| 1604 | + } |
---|
| 1605 | + ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup); |
---|
1402 | 1606 | out: |
---|
1403 | 1607 | mutex_unlock(&fs_info->qgroup_ioctl_lock); |
---|
1404 | 1608 | return ret; |
---|
.. | .. |
---|
1407 | 1611 | int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) |
---|
1408 | 1612 | { |
---|
1409 | 1613 | struct btrfs_fs_info *fs_info = trans->fs_info; |
---|
1410 | | - struct btrfs_root *quota_root; |
---|
1411 | 1614 | struct btrfs_qgroup *qgroup; |
---|
1412 | 1615 | struct btrfs_qgroup_list *list; |
---|
1413 | 1616 | int ret = 0; |
---|
1414 | 1617 | |
---|
1415 | 1618 | mutex_lock(&fs_info->qgroup_ioctl_lock); |
---|
1416 | | - quota_root = fs_info->quota_root; |
---|
1417 | | - if (!quota_root) { |
---|
1418 | | - ret = -EINVAL; |
---|
| 1619 | + if (!fs_info->quota_root) { |
---|
| 1620 | + ret = -ENOTCONN; |
---|
1419 | 1621 | goto out; |
---|
1420 | 1622 | } |
---|
1421 | 1623 | |
---|
.. | .. |
---|
1423 | 1625 | if (!qgroup) { |
---|
1424 | 1626 | ret = -ENOENT; |
---|
1425 | 1627 | goto out; |
---|
1426 | | - } else { |
---|
1427 | | - /* check if there are no children of this qgroup */ |
---|
1428 | | - if (!list_empty(&qgroup->members)) { |
---|
1429 | | - ret = -EBUSY; |
---|
1430 | | - goto out; |
---|
1431 | | - } |
---|
1432 | 1628 | } |
---|
| 1629 | + |
---|
| 1630 | + /* Check if there are no children of this qgroup */ |
---|
| 1631 | + if (!list_empty(&qgroup->members)) { |
---|
| 1632 | + ret = -EBUSY; |
---|
| 1633 | + goto out; |
---|
| 1634 | + } |
---|
| 1635 | + |
---|
1433 | 1636 | ret = del_qgroup_item(trans, qgroupid); |
---|
1434 | 1637 | if (ret && ret != -ENOENT) |
---|
1435 | 1638 | goto out; |
---|
.. | .. |
---|
1446 | 1649 | spin_lock(&fs_info->qgroup_lock); |
---|
1447 | 1650 | del_qgroup_rb(fs_info, qgroupid); |
---|
1448 | 1651 | spin_unlock(&fs_info->qgroup_lock); |
---|
| 1652 | + |
---|
| 1653 | + /* |
---|
| 1654 | + * Remove the qgroup from sysfs now without holding the qgroup_lock |
---|
| 1655 | + * spinlock, since the sysfs_remove_group() function needs to take |
---|
| 1656 | + * the mutex kernfs_mutex through kernfs_remove_by_name_ns(). |
---|
| 1657 | + */ |
---|
| 1658 | + btrfs_sysfs_del_one_qgroup(fs_info, qgroup); |
---|
| 1659 | + kfree(qgroup); |
---|
1449 | 1660 | out: |
---|
1450 | 1661 | mutex_unlock(&fs_info->qgroup_ioctl_lock); |
---|
1451 | 1662 | return ret; |
---|
.. | .. |
---|
1455 | 1666 | struct btrfs_qgroup_limit *limit) |
---|
1456 | 1667 | { |
---|
1457 | 1668 | struct btrfs_fs_info *fs_info = trans->fs_info; |
---|
1458 | | - struct btrfs_root *quota_root; |
---|
1459 | 1669 | struct btrfs_qgroup *qgroup; |
---|
1460 | 1670 | int ret = 0; |
---|
1461 | 1671 | /* Sometimes we would want to clear the limit on this qgroup. |
---|
.. | .. |
---|
1465 | 1675 | const u64 CLEAR_VALUE = -1; |
---|
1466 | 1676 | |
---|
1467 | 1677 | mutex_lock(&fs_info->qgroup_ioctl_lock); |
---|
1468 | | - quota_root = fs_info->quota_root; |
---|
1469 | | - if (!quota_root) { |
---|
1470 | | - ret = -EINVAL; |
---|
| 1678 | + if (!fs_info->quota_root) { |
---|
| 1679 | + ret = -ENOTCONN; |
---|
1471 | 1680 | goto out; |
---|
1472 | 1681 | } |
---|
1473 | 1682 | |
---|
.. | .. |
---|
1546 | 1755 | parent_node = *p; |
---|
1547 | 1756 | entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record, |
---|
1548 | 1757 | node); |
---|
1549 | | - if (bytenr < entry->bytenr) |
---|
| 1758 | + if (bytenr < entry->bytenr) { |
---|
1550 | 1759 | p = &(*p)->rb_left; |
---|
1551 | | - else if (bytenr > entry->bytenr) |
---|
| 1760 | + } else if (bytenr > entry->bytenr) { |
---|
1552 | 1761 | p = &(*p)->rb_right; |
---|
1553 | | - else |
---|
| 1762 | + } else { |
---|
| 1763 | + if (record->data_rsv && !entry->data_rsv) { |
---|
| 1764 | + entry->data_rsv = record->data_rsv; |
---|
| 1765 | + entry->data_rsv_refroot = |
---|
| 1766 | + record->data_rsv_refroot; |
---|
| 1767 | + } |
---|
1554 | 1768 | return 1; |
---|
| 1769 | + } |
---|
1555 | 1770 | } |
---|
1556 | 1771 | |
---|
1557 | 1772 | rb_link_node(&record->node, parent_node, p); |
---|
.. | .. |
---|
1597 | 1812 | if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) |
---|
1598 | 1813 | || bytenr == 0 || num_bytes == 0) |
---|
1599 | 1814 | return 0; |
---|
1600 | | - record = kmalloc(sizeof(*record), gfp_flag); |
---|
| 1815 | + record = kzalloc(sizeof(*record), gfp_flag); |
---|
1601 | 1816 | if (!record) |
---|
1602 | 1817 | return -ENOMEM; |
---|
1603 | 1818 | |
---|
.. | .. |
---|
1719 | 1934 | return 0; |
---|
1720 | 1935 | } |
---|
1721 | 1936 | |
---|
| 1937 | +/* |
---|
| 1938 | + * Helper function to trace a subtree tree block swap. |
---|
| 1939 | + * |
---|
| 1940 | + * The swap will happen in highest tree block, but there may be a lot of |
---|
| 1941 | + * tree blocks involved. |
---|
| 1942 | + * |
---|
| 1943 | + * For example: |
---|
| 1944 | + * OO = Old tree blocks |
---|
| 1945 | + * NN = New tree blocks allocated during balance |
---|
| 1946 | + * |
---|
| 1947 | + * File tree (257) Reloc tree for 257 |
---|
| 1948 | + * L2 OO NN |
---|
| 1949 | + * / \ / \ |
---|
| 1950 | + * L1 OO OO (a) OO NN (a) |
---|
| 1951 | + * / \ / \ / \ / \ |
---|
| 1952 | + * L0 OO OO OO OO OO OO NN NN |
---|
| 1953 | + * (b) (c) (b) (c) |
---|
| 1954 | + * |
---|
| 1955 | + * When calling qgroup_trace_extent_swap(), we will pass: |
---|
| 1956 | + * @src_eb = OO(a) |
---|
| 1957 | + * @dst_path = [ nodes[1] = NN(a), nodes[0] = NN(c) ] |
---|
| 1958 | + * @dst_level = 0 |
---|
| 1959 | + * @root_level = 1 |
---|
| 1960 | + * |
---|
| 1961 | + * In that case, qgroup_trace_extent_swap() will search from OO(a) to |
---|
| 1962 | + * reach OO(c), then mark both OO(c) and NN(c) as qgroup dirty. |
---|
| 1963 | + * |
---|
| 1964 | + * The main work of qgroup_trace_extent_swap() can be split into 3 parts: |
---|
| 1965 | + * |
---|
| 1966 | + * 1) Tree search from @src_eb |
---|
| 1967 | + * It should acts as a simplified btrfs_search_slot(). |
---|
| 1968 | + * The key for search can be extracted from @dst_path->nodes[dst_level] |
---|
| 1969 | + * (first key). |
---|
| 1970 | + * |
---|
| 1971 | + * 2) Mark the final tree blocks in @src_path and @dst_path qgroup dirty |
---|
| 1972 | + * NOTE: In above case, OO(a) and NN(a) won't be marked qgroup dirty. |
---|
| 1973 | + * They should be marked during previous (@dst_level = 1) iteration. |
---|
| 1974 | + * |
---|
| 1975 | + * 3) Mark file extents in leaves dirty |
---|
| 1976 | + * We don't have good way to pick out new file extents only. |
---|
| 1977 | + * So we still follow the old method by scanning all file extents in |
---|
| 1978 | + * the leave. |
---|
| 1979 | + * |
---|
| 1980 | + * This function can free us from keeping two paths, thus later we only need |
---|
| 1981 | + * to care about how to iterate all new tree blocks in reloc tree. |
---|
| 1982 | + */ |
---|
| 1983 | +static int qgroup_trace_extent_swap(struct btrfs_trans_handle* trans, |
---|
| 1984 | + struct extent_buffer *src_eb, |
---|
| 1985 | + struct btrfs_path *dst_path, |
---|
| 1986 | + int dst_level, int root_level, |
---|
| 1987 | + bool trace_leaf) |
---|
| 1988 | +{ |
---|
| 1989 | + struct btrfs_key key; |
---|
| 1990 | + struct btrfs_path *src_path; |
---|
| 1991 | + struct btrfs_fs_info *fs_info = trans->fs_info; |
---|
| 1992 | + u32 nodesize = fs_info->nodesize; |
---|
| 1993 | + int cur_level = root_level; |
---|
| 1994 | + int ret; |
---|
| 1995 | + |
---|
| 1996 | + BUG_ON(dst_level > root_level); |
---|
| 1997 | + /* Level mismatch */ |
---|
| 1998 | + if (btrfs_header_level(src_eb) != root_level) |
---|
| 1999 | + return -EINVAL; |
---|
| 2000 | + |
---|
| 2001 | + src_path = btrfs_alloc_path(); |
---|
| 2002 | + if (!src_path) { |
---|
| 2003 | + ret = -ENOMEM; |
---|
| 2004 | + goto out; |
---|
| 2005 | + } |
---|
| 2006 | + |
---|
| 2007 | + if (dst_level) |
---|
| 2008 | + btrfs_node_key_to_cpu(dst_path->nodes[dst_level], &key, 0); |
---|
| 2009 | + else |
---|
| 2010 | + btrfs_item_key_to_cpu(dst_path->nodes[dst_level], &key, 0); |
---|
| 2011 | + |
---|
| 2012 | + /* For src_path */ |
---|
| 2013 | + atomic_inc(&src_eb->refs); |
---|
| 2014 | + src_path->nodes[root_level] = src_eb; |
---|
| 2015 | + src_path->slots[root_level] = dst_path->slots[root_level]; |
---|
| 2016 | + src_path->locks[root_level] = 0; |
---|
| 2017 | + |
---|
| 2018 | + /* A simplified version of btrfs_search_slot() */ |
---|
| 2019 | + while (cur_level >= dst_level) { |
---|
| 2020 | + struct btrfs_key src_key; |
---|
| 2021 | + struct btrfs_key dst_key; |
---|
| 2022 | + |
---|
| 2023 | + if (src_path->nodes[cur_level] == NULL) { |
---|
| 2024 | + struct btrfs_key first_key; |
---|
| 2025 | + struct extent_buffer *eb; |
---|
| 2026 | + int parent_slot; |
---|
| 2027 | + u64 child_gen; |
---|
| 2028 | + u64 child_bytenr; |
---|
| 2029 | + |
---|
| 2030 | + eb = src_path->nodes[cur_level + 1]; |
---|
| 2031 | + parent_slot = src_path->slots[cur_level + 1]; |
---|
| 2032 | + child_bytenr = btrfs_node_blockptr(eb, parent_slot); |
---|
| 2033 | + child_gen = btrfs_node_ptr_generation(eb, parent_slot); |
---|
| 2034 | + btrfs_node_key_to_cpu(eb, &first_key, parent_slot); |
---|
| 2035 | + |
---|
| 2036 | + eb = read_tree_block(fs_info, child_bytenr, child_gen, |
---|
| 2037 | + cur_level, &first_key); |
---|
| 2038 | + if (IS_ERR(eb)) { |
---|
| 2039 | + ret = PTR_ERR(eb); |
---|
| 2040 | + goto out; |
---|
| 2041 | + } else if (!extent_buffer_uptodate(eb)) { |
---|
| 2042 | + free_extent_buffer(eb); |
---|
| 2043 | + ret = -EIO; |
---|
| 2044 | + goto out; |
---|
| 2045 | + } |
---|
| 2046 | + |
---|
| 2047 | + src_path->nodes[cur_level] = eb; |
---|
| 2048 | + |
---|
| 2049 | + btrfs_tree_read_lock(eb); |
---|
| 2050 | + btrfs_set_lock_blocking_read(eb); |
---|
| 2051 | + src_path->locks[cur_level] = BTRFS_READ_LOCK_BLOCKING; |
---|
| 2052 | + } |
---|
| 2053 | + |
---|
| 2054 | + src_path->slots[cur_level] = dst_path->slots[cur_level]; |
---|
| 2055 | + if (cur_level) { |
---|
| 2056 | + btrfs_node_key_to_cpu(dst_path->nodes[cur_level], |
---|
| 2057 | + &dst_key, dst_path->slots[cur_level]); |
---|
| 2058 | + btrfs_node_key_to_cpu(src_path->nodes[cur_level], |
---|
| 2059 | + &src_key, src_path->slots[cur_level]); |
---|
| 2060 | + } else { |
---|
| 2061 | + btrfs_item_key_to_cpu(dst_path->nodes[cur_level], |
---|
| 2062 | + &dst_key, dst_path->slots[cur_level]); |
---|
| 2063 | + btrfs_item_key_to_cpu(src_path->nodes[cur_level], |
---|
| 2064 | + &src_key, src_path->slots[cur_level]); |
---|
| 2065 | + } |
---|
| 2066 | + /* Content mismatch, something went wrong */ |
---|
| 2067 | + if (btrfs_comp_cpu_keys(&dst_key, &src_key)) { |
---|
| 2068 | + ret = -ENOENT; |
---|
| 2069 | + goto out; |
---|
| 2070 | + } |
---|
| 2071 | + cur_level--; |
---|
| 2072 | + } |
---|
| 2073 | + |
---|
| 2074 | + /* |
---|
| 2075 | + * Now both @dst_path and @src_path have been populated, record the tree |
---|
| 2076 | + * blocks for qgroup accounting. |
---|
| 2077 | + */ |
---|
| 2078 | + ret = btrfs_qgroup_trace_extent(trans, src_path->nodes[dst_level]->start, |
---|
| 2079 | + nodesize, GFP_NOFS); |
---|
| 2080 | + if (ret < 0) |
---|
| 2081 | + goto out; |
---|
| 2082 | + ret = btrfs_qgroup_trace_extent(trans, |
---|
| 2083 | + dst_path->nodes[dst_level]->start, |
---|
| 2084 | + nodesize, GFP_NOFS); |
---|
| 2085 | + if (ret < 0) |
---|
| 2086 | + goto out; |
---|
| 2087 | + |
---|
| 2088 | + /* Record leaf file extents */ |
---|
| 2089 | + if (dst_level == 0 && trace_leaf) { |
---|
| 2090 | + ret = btrfs_qgroup_trace_leaf_items(trans, src_path->nodes[0]); |
---|
| 2091 | + if (ret < 0) |
---|
| 2092 | + goto out; |
---|
| 2093 | + ret = btrfs_qgroup_trace_leaf_items(trans, dst_path->nodes[0]); |
---|
| 2094 | + } |
---|
| 2095 | +out: |
---|
| 2096 | + btrfs_free_path(src_path); |
---|
| 2097 | + return ret; |
---|
| 2098 | +} |
---|
| 2099 | + |
---|
| 2100 | +/* |
---|
| 2101 | + * Helper function to do recursive generation-aware depth-first search, to |
---|
| 2102 | + * locate all new tree blocks in a subtree of reloc tree. |
---|
| 2103 | + * |
---|
| 2104 | + * E.g. (OO = Old tree blocks, NN = New tree blocks, whose gen == last_snapshot) |
---|
| 2105 | + * reloc tree |
---|
| 2106 | + * L2 NN (a) |
---|
| 2107 | + * / \ |
---|
| 2108 | + * L1 OO NN (b) |
---|
| 2109 | + * / \ / \ |
---|
| 2110 | + * L0 OO OO OO NN |
---|
| 2111 | + * (c) (d) |
---|
| 2112 | + * If we pass: |
---|
| 2113 | + * @dst_path = [ nodes[1] = NN(b), nodes[0] = NULL ], |
---|
| 2114 | + * @cur_level = 1 |
---|
| 2115 | + * @root_level = 1 |
---|
| 2116 | + * |
---|
| 2117 | + * We will iterate through tree blocks NN(b), NN(d) and info qgroup to trace |
---|
| 2118 | + * above tree blocks along with their counter parts in file tree. |
---|
| 2119 | + * While during search, old tree blocks OO(c) will be skipped as tree block swap |
---|
| 2120 | + * won't affect OO(c). |
---|
| 2121 | + */ |
---|
| 2122 | +static int qgroup_trace_new_subtree_blocks(struct btrfs_trans_handle* trans, |
---|
| 2123 | + struct extent_buffer *src_eb, |
---|
| 2124 | + struct btrfs_path *dst_path, |
---|
| 2125 | + int cur_level, int root_level, |
---|
| 2126 | + u64 last_snapshot, bool trace_leaf) |
---|
| 2127 | +{ |
---|
| 2128 | + struct btrfs_fs_info *fs_info = trans->fs_info; |
---|
| 2129 | + struct extent_buffer *eb; |
---|
| 2130 | + bool need_cleanup = false; |
---|
| 2131 | + int ret = 0; |
---|
| 2132 | + int i; |
---|
| 2133 | + |
---|
| 2134 | + /* Level sanity check */ |
---|
| 2135 | + if (cur_level < 0 || cur_level >= BTRFS_MAX_LEVEL - 1 || |
---|
| 2136 | + root_level < 0 || root_level >= BTRFS_MAX_LEVEL - 1 || |
---|
| 2137 | + root_level < cur_level) { |
---|
| 2138 | + btrfs_err_rl(fs_info, |
---|
| 2139 | + "%s: bad levels, cur_level=%d root_level=%d", |
---|
| 2140 | + __func__, cur_level, root_level); |
---|
| 2141 | + return -EUCLEAN; |
---|
| 2142 | + } |
---|
| 2143 | + |
---|
| 2144 | + /* Read the tree block if needed */ |
---|
| 2145 | + if (dst_path->nodes[cur_level] == NULL) { |
---|
| 2146 | + struct btrfs_key first_key; |
---|
| 2147 | + int parent_slot; |
---|
| 2148 | + u64 child_gen; |
---|
| 2149 | + u64 child_bytenr; |
---|
| 2150 | + |
---|
| 2151 | + /* |
---|
| 2152 | + * dst_path->nodes[root_level] must be initialized before |
---|
| 2153 | + * calling this function. |
---|
| 2154 | + */ |
---|
| 2155 | + if (cur_level == root_level) { |
---|
| 2156 | + btrfs_err_rl(fs_info, |
---|
| 2157 | + "%s: dst_path->nodes[%d] not initialized, root_level=%d cur_level=%d", |
---|
| 2158 | + __func__, root_level, root_level, cur_level); |
---|
| 2159 | + return -EUCLEAN; |
---|
| 2160 | + } |
---|
| 2161 | + |
---|
| 2162 | + /* |
---|
| 2163 | + * We need to get child blockptr/gen from parent before we can |
---|
| 2164 | + * read it. |
---|
| 2165 | + */ |
---|
| 2166 | + eb = dst_path->nodes[cur_level + 1]; |
---|
| 2167 | + parent_slot = dst_path->slots[cur_level + 1]; |
---|
| 2168 | + child_bytenr = btrfs_node_blockptr(eb, parent_slot); |
---|
| 2169 | + child_gen = btrfs_node_ptr_generation(eb, parent_slot); |
---|
| 2170 | + btrfs_node_key_to_cpu(eb, &first_key, parent_slot); |
---|
| 2171 | + |
---|
| 2172 | + /* This node is old, no need to trace */ |
---|
| 2173 | + if (child_gen < last_snapshot) |
---|
| 2174 | + goto out; |
---|
| 2175 | + |
---|
| 2176 | + eb = read_tree_block(fs_info, child_bytenr, child_gen, |
---|
| 2177 | + cur_level, &first_key); |
---|
| 2178 | + if (IS_ERR(eb)) { |
---|
| 2179 | + ret = PTR_ERR(eb); |
---|
| 2180 | + goto out; |
---|
| 2181 | + } else if (!extent_buffer_uptodate(eb)) { |
---|
| 2182 | + free_extent_buffer(eb); |
---|
| 2183 | + ret = -EIO; |
---|
| 2184 | + goto out; |
---|
| 2185 | + } |
---|
| 2186 | + |
---|
| 2187 | + dst_path->nodes[cur_level] = eb; |
---|
| 2188 | + dst_path->slots[cur_level] = 0; |
---|
| 2189 | + |
---|
| 2190 | + btrfs_tree_read_lock(eb); |
---|
| 2191 | + btrfs_set_lock_blocking_read(eb); |
---|
| 2192 | + dst_path->locks[cur_level] = BTRFS_READ_LOCK_BLOCKING; |
---|
| 2193 | + need_cleanup = true; |
---|
| 2194 | + } |
---|
| 2195 | + |
---|
| 2196 | + /* Now record this tree block and its counter part for qgroups */ |
---|
| 2197 | + ret = qgroup_trace_extent_swap(trans, src_eb, dst_path, cur_level, |
---|
| 2198 | + root_level, trace_leaf); |
---|
| 2199 | + if (ret < 0) |
---|
| 2200 | + goto cleanup; |
---|
| 2201 | + |
---|
| 2202 | + eb = dst_path->nodes[cur_level]; |
---|
| 2203 | + |
---|
| 2204 | + if (cur_level > 0) { |
---|
| 2205 | + /* Iterate all child tree blocks */ |
---|
| 2206 | + for (i = 0; i < btrfs_header_nritems(eb); i++) { |
---|
| 2207 | + /* Skip old tree blocks as they won't be swapped */ |
---|
| 2208 | + if (btrfs_node_ptr_generation(eb, i) < last_snapshot) |
---|
| 2209 | + continue; |
---|
| 2210 | + dst_path->slots[cur_level] = i; |
---|
| 2211 | + |
---|
| 2212 | + /* Recursive call (at most 7 times) */ |
---|
| 2213 | + ret = qgroup_trace_new_subtree_blocks(trans, src_eb, |
---|
| 2214 | + dst_path, cur_level - 1, root_level, |
---|
| 2215 | + last_snapshot, trace_leaf); |
---|
| 2216 | + if (ret < 0) |
---|
| 2217 | + goto cleanup; |
---|
| 2218 | + } |
---|
| 2219 | + } |
---|
| 2220 | + |
---|
| 2221 | +cleanup: |
---|
| 2222 | + if (need_cleanup) { |
---|
| 2223 | + /* Clean up */ |
---|
| 2224 | + btrfs_tree_unlock_rw(dst_path->nodes[cur_level], |
---|
| 2225 | + dst_path->locks[cur_level]); |
---|
| 2226 | + free_extent_buffer(dst_path->nodes[cur_level]); |
---|
| 2227 | + dst_path->nodes[cur_level] = NULL; |
---|
| 2228 | + dst_path->slots[cur_level] = 0; |
---|
| 2229 | + dst_path->locks[cur_level] = 0; |
---|
| 2230 | + } |
---|
| 2231 | +out: |
---|
| 2232 | + return ret; |
---|
| 2233 | +} |
---|
| 2234 | + |
---|
| 2235 | +static int qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans, |
---|
| 2236 | + struct extent_buffer *src_eb, |
---|
| 2237 | + struct extent_buffer *dst_eb, |
---|
| 2238 | + u64 last_snapshot, bool trace_leaf) |
---|
| 2239 | +{ |
---|
| 2240 | + struct btrfs_fs_info *fs_info = trans->fs_info; |
---|
| 2241 | + struct btrfs_path *dst_path = NULL; |
---|
| 2242 | + int level; |
---|
| 2243 | + int ret; |
---|
| 2244 | + |
---|
| 2245 | + if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) |
---|
| 2246 | + return 0; |
---|
| 2247 | + |
---|
| 2248 | + /* Wrong parameter order */ |
---|
| 2249 | + if (btrfs_header_generation(src_eb) > btrfs_header_generation(dst_eb)) { |
---|
| 2250 | + btrfs_err_rl(fs_info, |
---|
| 2251 | + "%s: bad parameter order, src_gen=%llu dst_gen=%llu", __func__, |
---|
| 2252 | + btrfs_header_generation(src_eb), |
---|
| 2253 | + btrfs_header_generation(dst_eb)); |
---|
| 2254 | + return -EUCLEAN; |
---|
| 2255 | + } |
---|
| 2256 | + |
---|
| 2257 | + if (!extent_buffer_uptodate(src_eb) || !extent_buffer_uptodate(dst_eb)) { |
---|
| 2258 | + ret = -EIO; |
---|
| 2259 | + goto out; |
---|
| 2260 | + } |
---|
| 2261 | + |
---|
| 2262 | + level = btrfs_header_level(dst_eb); |
---|
| 2263 | + dst_path = btrfs_alloc_path(); |
---|
| 2264 | + if (!dst_path) { |
---|
| 2265 | + ret = -ENOMEM; |
---|
| 2266 | + goto out; |
---|
| 2267 | + } |
---|
| 2268 | + /* For dst_path */ |
---|
| 2269 | + atomic_inc(&dst_eb->refs); |
---|
| 2270 | + dst_path->nodes[level] = dst_eb; |
---|
| 2271 | + dst_path->slots[level] = 0; |
---|
| 2272 | + dst_path->locks[level] = 0; |
---|
| 2273 | + |
---|
| 2274 | + /* Do the generation aware breadth-first search */ |
---|
| 2275 | + ret = qgroup_trace_new_subtree_blocks(trans, src_eb, dst_path, level, |
---|
| 2276 | + level, last_snapshot, trace_leaf); |
---|
| 2277 | + if (ret < 0) |
---|
| 2278 | + goto out; |
---|
| 2279 | + ret = 0; |
---|
| 2280 | + |
---|
| 2281 | +out: |
---|
| 2282 | + btrfs_free_path(dst_path); |
---|
| 2283 | + if (ret < 0) |
---|
| 2284 | + fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; |
---|
| 2285 | + return ret; |
---|
| 2286 | +} |
---|
| 2287 | + |
---|
1722 | 2288 | int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, |
---|
1723 | 2289 | struct extent_buffer *root_eb, |
---|
1724 | 2290 | u64 root_gen, int root_level) |
---|
.. | .. |
---|
1759 | 2325 | * walk back up the tree (adjusting slot pointers as we go) |
---|
1760 | 2326 | * and restart the search process. |
---|
1761 | 2327 | */ |
---|
1762 | | - extent_buffer_get(root_eb); /* For path */ |
---|
| 2328 | + atomic_inc(&root_eb->refs); /* For path */ |
---|
1763 | 2329 | path->nodes[root_level] = root_eb; |
---|
1764 | 2330 | path->slots[root_level] = 0; |
---|
1765 | 2331 | path->locks[root_level] = 0; /* so release_path doesn't try to unlock */ |
---|
.. | .. |
---|
1797 | 2363 | path->slots[level] = 0; |
---|
1798 | 2364 | |
---|
1799 | 2365 | btrfs_tree_read_lock(eb); |
---|
1800 | | - btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); |
---|
| 2366 | + btrfs_set_lock_blocking_read(eb); |
---|
1801 | 2367 | path->locks[level] = BTRFS_READ_LOCK_BLOCKING; |
---|
1802 | 2368 | |
---|
1803 | 2369 | ret = btrfs_qgroup_trace_extent(trans, child_bytenr, |
---|
.. | .. |
---|
1894 | 2460 | * Update qgroup rfer/excl counters. |
---|
1895 | 2461 | * Rfer update is easy, codes can explain themselves. |
---|
1896 | 2462 | * |
---|
1897 | | - * Excl update is tricky, the update is split into 2 part. |
---|
| 2463 | + * Excl update is tricky, the update is split into 2 parts. |
---|
1898 | 2464 | * Part 1: Possible exclusive <-> sharing detect: |
---|
1899 | 2465 | * | A | !A | |
---|
1900 | 2466 | * ------------------------------------- |
---|
.. | .. |
---|
2143 | 2709 | struct btrfs_delayed_ref_root *delayed_refs; |
---|
2144 | 2710 | struct ulist *new_roots = NULL; |
---|
2145 | 2711 | struct rb_node *node; |
---|
| 2712 | + u64 num_dirty_extents = 0; |
---|
2146 | 2713 | u64 qgroup_to_skip; |
---|
2147 | 2714 | int ret = 0; |
---|
2148 | 2715 | |
---|
.. | .. |
---|
2152 | 2719 | record = rb_entry(node, struct btrfs_qgroup_extent_record, |
---|
2153 | 2720 | node); |
---|
2154 | 2721 | |
---|
| 2722 | + num_dirty_extents++; |
---|
2155 | 2723 | trace_btrfs_qgroup_account_extents(fs_info, record); |
---|
2156 | 2724 | |
---|
2157 | 2725 | if (!ret) { |
---|
.. | .. |
---|
2168 | 2736 | goto cleanup; |
---|
2169 | 2737 | } |
---|
2170 | 2738 | |
---|
| 2739 | + /* Free the reserved data space */ |
---|
| 2740 | + btrfs_qgroup_free_refroot(fs_info, |
---|
| 2741 | + record->data_rsv_refroot, |
---|
| 2742 | + record->data_rsv, |
---|
| 2743 | + BTRFS_QGROUP_RSV_DATA); |
---|
2171 | 2744 | /* |
---|
2172 | 2745 | * Use SEQ_LAST as time_seq to do special search, which |
---|
2173 | 2746 | * doesn't lock tree or delayed_refs and search current |
---|
.. | .. |
---|
2197 | 2770 | kfree(record); |
---|
2198 | 2771 | |
---|
2199 | 2772 | } |
---|
| 2773 | + trace_qgroup_num_dirty_extents(fs_info, trans->transid, |
---|
| 2774 | + num_dirty_extents); |
---|
2200 | 2775 | return ret; |
---|
2201 | 2776 | } |
---|
2202 | 2777 | |
---|
2203 | 2778 | /* |
---|
2204 | | - * called from commit_transaction. Writes all changed qgroups to disk. |
---|
| 2779 | + * Writes all changed qgroups to disk. |
---|
| 2780 | + * Called by the transaction commit path and the qgroup assign ioctl. |
---|
2205 | 2781 | */ |
---|
2206 | 2782 | int btrfs_run_qgroups(struct btrfs_trans_handle *trans) |
---|
2207 | 2783 | { |
---|
2208 | 2784 | struct btrfs_fs_info *fs_info = trans->fs_info; |
---|
2209 | | - struct btrfs_root *quota_root = fs_info->quota_root; |
---|
2210 | 2785 | int ret = 0; |
---|
2211 | 2786 | |
---|
2212 | | - if (!quota_root) |
---|
| 2787 | + /* |
---|
| 2788 | + * In case we are called from the qgroup assign ioctl, assert that we |
---|
| 2789 | + * are holding the qgroup_ioctl_lock, otherwise we can race with a quota |
---|
| 2790 | + * disable operation (ioctl) and access a freed quota root. |
---|
| 2791 | + */ |
---|
| 2792 | + if (trans->transaction->state != TRANS_STATE_COMMIT_DOING) |
---|
| 2793 | + lockdep_assert_held(&fs_info->qgroup_ioctl_lock); |
---|
| 2794 | + |
---|
| 2795 | + if (!fs_info->quota_root) |
---|
2213 | 2796 | return ret; |
---|
2214 | 2797 | |
---|
2215 | 2798 | spin_lock(&fs_info->qgroup_lock); |
---|
.. | .. |
---|
2353 | 2936 | dstgroup->rsv_rfer = inherit->lim.rsv_rfer; |
---|
2354 | 2937 | dstgroup->rsv_excl = inherit->lim.rsv_excl; |
---|
2355 | 2938 | |
---|
2356 | | - ret = update_qgroup_limit_item(trans, dstgroup); |
---|
2357 | | - if (ret) { |
---|
2358 | | - fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; |
---|
2359 | | - btrfs_info(fs_info, |
---|
2360 | | - "unable to update quota limit for %llu", |
---|
2361 | | - dstgroup->qgroupid); |
---|
2362 | | - goto unlock; |
---|
2363 | | - } |
---|
| 2939 | + qgroup_dirty(fs_info, dstgroup); |
---|
2364 | 2940 | } |
---|
2365 | 2941 | |
---|
2366 | 2942 | if (srcid) { |
---|
.. | .. |
---|
2455 | 3031 | |
---|
2456 | 3032 | unlock: |
---|
2457 | 3033 | spin_unlock(&fs_info->qgroup_lock); |
---|
| 3034 | + if (!ret) |
---|
| 3035 | + ret = btrfs_sysfs_add_one_qgroup(fs_info, dstgroup); |
---|
2458 | 3036 | out: |
---|
2459 | 3037 | if (!committing) |
---|
2460 | 3038 | mutex_unlock(&fs_info->qgroup_ioctl_lock); |
---|
.. | .. |
---|
2463 | 3041 | return ret; |
---|
2464 | 3042 | } |
---|
2465 | 3043 | |
---|
2466 | | -/* |
---|
2467 | | - * Two limits to commit transaction in advance. |
---|
2468 | | - * |
---|
2469 | | - * For RATIO, it will be 1/RATIO of the remaining limit as threshold. |
---|
2470 | | - * For SIZE, it will be in byte unit as threshold. |
---|
2471 | | - */ |
---|
2472 | | -#define QGROUP_FREE_RATIO 32 |
---|
2473 | | -#define QGROUP_FREE_SIZE SZ_32M |
---|
2474 | | -static bool qgroup_check_limits(struct btrfs_fs_info *fs_info, |
---|
2475 | | - const struct btrfs_qgroup *qg, u64 num_bytes) |
---|
| 3044 | +static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes) |
---|
2476 | 3045 | { |
---|
2477 | | - u64 free; |
---|
2478 | | - u64 threshold; |
---|
2479 | | - |
---|
2480 | 3046 | if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && |
---|
2481 | 3047 | qgroup_rsv_total(qg) + (s64)qg->rfer + num_bytes > qg->max_rfer) |
---|
2482 | 3048 | return false; |
---|
.. | .. |
---|
2485 | 3051 | qgroup_rsv_total(qg) + (s64)qg->excl + num_bytes > qg->max_excl) |
---|
2486 | 3052 | return false; |
---|
2487 | 3053 | |
---|
2488 | | - /* |
---|
2489 | | - * Even if we passed the check, it's better to check if reservation |
---|
2490 | | - * for meta_pertrans is pushing us near limit. |
---|
2491 | | - * If there is too much pertrans reservation or it's near the limit, |
---|
2492 | | - * let's try commit transaction to free some, using transaction_kthread |
---|
2493 | | - */ |
---|
2494 | | - if ((qg->lim_flags & (BTRFS_QGROUP_LIMIT_MAX_RFER | |
---|
2495 | | - BTRFS_QGROUP_LIMIT_MAX_EXCL))) { |
---|
2496 | | - if (qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) { |
---|
2497 | | - free = qg->max_excl - qgroup_rsv_total(qg) - qg->excl; |
---|
2498 | | - threshold = min_t(u64, qg->max_excl / QGROUP_FREE_RATIO, |
---|
2499 | | - QGROUP_FREE_SIZE); |
---|
2500 | | - } else { |
---|
2501 | | - free = qg->max_rfer - qgroup_rsv_total(qg) - qg->rfer; |
---|
2502 | | - threshold = min_t(u64, qg->max_rfer / QGROUP_FREE_RATIO, |
---|
2503 | | - QGROUP_FREE_SIZE); |
---|
2504 | | - } |
---|
2505 | | - |
---|
2506 | | - /* |
---|
2507 | | - * Use transaction_kthread to commit transaction, so we no |
---|
2508 | | - * longer need to bother nested transaction nor lock context. |
---|
2509 | | - */ |
---|
2510 | | - if (free < threshold) |
---|
2511 | | - btrfs_commit_transaction_locksafe(fs_info); |
---|
2512 | | - } |
---|
2513 | | - |
---|
2514 | 3054 | return true; |
---|
2515 | 3055 | } |
---|
2516 | 3056 | |
---|
2517 | 3057 | static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce, |
---|
2518 | 3058 | enum btrfs_qgroup_rsv_type type) |
---|
2519 | 3059 | { |
---|
2520 | | - struct btrfs_root *quota_root; |
---|
2521 | 3060 | struct btrfs_qgroup *qgroup; |
---|
2522 | 3061 | struct btrfs_fs_info *fs_info = root->fs_info; |
---|
2523 | 3062 | u64 ref_root = root->root_key.objectid; |
---|
.. | .. |
---|
2536 | 3075 | enforce = false; |
---|
2537 | 3076 | |
---|
2538 | 3077 | spin_lock(&fs_info->qgroup_lock); |
---|
2539 | | - quota_root = fs_info->quota_root; |
---|
2540 | | - if (!quota_root) |
---|
| 3078 | + if (!fs_info->quota_root) |
---|
2541 | 3079 | goto out; |
---|
2542 | 3080 | |
---|
2543 | 3081 | qgroup = find_qgroup_rb(fs_info, ref_root); |
---|
.. | .. |
---|
2560 | 3098 | |
---|
2561 | 3099 | qg = unode_aux_to_qgroup(unode); |
---|
2562 | 3100 | |
---|
2563 | | - if (enforce && !qgroup_check_limits(fs_info, qg, num_bytes)) { |
---|
| 3101 | + if (enforce && !qgroup_check_limits(qg, num_bytes)) { |
---|
2564 | 3102 | ret = -EDQUOT; |
---|
2565 | 3103 | goto out; |
---|
2566 | 3104 | } |
---|
.. | .. |
---|
2583 | 3121 | |
---|
2584 | 3122 | qg = unode_aux_to_qgroup(unode); |
---|
2585 | 3123 | |
---|
2586 | | - trace_qgroup_update_reserve(fs_info, qg, num_bytes, type); |
---|
2587 | 3124 | qgroup_rsv_add(fs_info, qg, num_bytes, type); |
---|
2588 | 3125 | } |
---|
2589 | 3126 | |
---|
.. | .. |
---|
2605 | 3142 | u64 ref_root, u64 num_bytes, |
---|
2606 | 3143 | enum btrfs_qgroup_rsv_type type) |
---|
2607 | 3144 | { |
---|
2608 | | - struct btrfs_root *quota_root; |
---|
2609 | 3145 | struct btrfs_qgroup *qgroup; |
---|
2610 | 3146 | struct ulist_node *unode; |
---|
2611 | 3147 | struct ulist_iterator uiter; |
---|
.. | .. |
---|
2623 | 3159 | } |
---|
2624 | 3160 | spin_lock(&fs_info->qgroup_lock); |
---|
2625 | 3161 | |
---|
2626 | | - quota_root = fs_info->quota_root; |
---|
2627 | | - if (!quota_root) |
---|
| 3162 | + if (!fs_info->quota_root) |
---|
2628 | 3163 | goto out; |
---|
2629 | 3164 | |
---|
2630 | 3165 | qgroup = find_qgroup_rb(fs_info, ref_root); |
---|
.. | .. |
---|
2650 | 3185 | |
---|
2651 | 3186 | qg = unode_aux_to_qgroup(unode); |
---|
2652 | 3187 | |
---|
2653 | | - trace_qgroup_update_reserve(fs_info, qg, -(s64)num_bytes, type); |
---|
2654 | 3188 | qgroup_rsv_release(fs_info, qg, num_bytes, type); |
---|
2655 | 3189 | |
---|
2656 | 3190 | list_for_each_entry(glist, &qg->groups, next_group) { |
---|
.. | .. |
---|
2734 | 3268 | mutex_unlock(&fs_info->qgroup_rescan_lock); |
---|
2735 | 3269 | goto out; |
---|
2736 | 3270 | } |
---|
2737 | | - extent_buffer_get(scratch_leaf); |
---|
2738 | | - btrfs_tree_read_lock(scratch_leaf); |
---|
2739 | | - btrfs_set_lock_blocking_rw(scratch_leaf, BTRFS_READ_LOCK); |
---|
2740 | 3271 | slot = path->slots[0]; |
---|
2741 | 3272 | btrfs_release_path(path); |
---|
2742 | 3273 | mutex_unlock(&fs_info->qgroup_rescan_lock); |
---|
.. | .. |
---|
2762 | 3293 | goto out; |
---|
2763 | 3294 | } |
---|
2764 | 3295 | out: |
---|
2765 | | - if (scratch_leaf) { |
---|
2766 | | - btrfs_tree_read_unlock_blocking(scratch_leaf); |
---|
| 3296 | + if (scratch_leaf) |
---|
2767 | 3297 | free_extent_buffer(scratch_leaf); |
---|
2768 | | - } |
---|
2769 | 3298 | |
---|
2770 | 3299 | if (done && !ret) { |
---|
2771 | 3300 | ret = 1; |
---|
.. | .. |
---|
2777 | 3306 | static bool rescan_should_stop(struct btrfs_fs_info *fs_info) |
---|
2778 | 3307 | { |
---|
2779 | 3308 | return btrfs_fs_closing(fs_info) || |
---|
2780 | | - test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); |
---|
| 3309 | + test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state) || |
---|
| 3310 | + !test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); |
---|
2781 | 3311 | } |
---|
2782 | 3312 | |
---|
2783 | 3313 | static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) |
---|
.. | .. |
---|
2789 | 3319 | int err = -ENOMEM; |
---|
2790 | 3320 | int ret = 0; |
---|
2791 | 3321 | bool stopped = false; |
---|
| 3322 | + bool did_leaf_rescans = false; |
---|
2792 | 3323 | |
---|
2793 | 3324 | path = btrfs_alloc_path(); |
---|
2794 | 3325 | if (!path) |
---|
.. | .. |
---|
2807 | 3338 | err = PTR_ERR(trans); |
---|
2808 | 3339 | break; |
---|
2809 | 3340 | } |
---|
2810 | | - if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { |
---|
2811 | | - err = -EINTR; |
---|
2812 | | - } else { |
---|
2813 | | - err = qgroup_rescan_leaf(trans, path); |
---|
2814 | | - } |
---|
| 3341 | + |
---|
| 3342 | + err = qgroup_rescan_leaf(trans, path); |
---|
| 3343 | + did_leaf_rescans = true; |
---|
| 3344 | + |
---|
2815 | 3345 | if (err > 0) |
---|
2816 | 3346 | btrfs_commit_transaction(trans); |
---|
2817 | 3347 | else |
---|
.. | .. |
---|
2825 | 3355 | if (err > 0 && |
---|
2826 | 3356 | fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) { |
---|
2827 | 3357 | fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; |
---|
2828 | | - } else if (err < 0) { |
---|
| 3358 | + } else if (err < 0 || stopped) { |
---|
2829 | 3359 | fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; |
---|
2830 | 3360 | } |
---|
2831 | 3361 | mutex_unlock(&fs_info->qgroup_rescan_lock); |
---|
2832 | 3362 | |
---|
2833 | 3363 | /* |
---|
2834 | | - * only update status, since the previous part has already updated the |
---|
2835 | | - * qgroup info. |
---|
| 3364 | + * Only update status, since the previous part has already updated the |
---|
| 3365 | + * qgroup info, and only if we did any actual work. This also prevents |
---|
| 3366 | + * race with a concurrent quota disable, which has already set |
---|
| 3367 | + * fs_info->quota_root to NULL and cleared BTRFS_FS_QUOTA_ENABLED at |
---|
| 3368 | + * btrfs_quota_disable(). |
---|
2836 | 3369 | */ |
---|
2837 | | - trans = btrfs_start_transaction(fs_info->quota_root, 1); |
---|
2838 | | - if (IS_ERR(trans)) { |
---|
2839 | | - err = PTR_ERR(trans); |
---|
| 3370 | + if (did_leaf_rescans) { |
---|
| 3371 | + trans = btrfs_start_transaction(fs_info->quota_root, 1); |
---|
| 3372 | + if (IS_ERR(trans)) { |
---|
| 3373 | + err = PTR_ERR(trans); |
---|
| 3374 | + trans = NULL; |
---|
| 3375 | + btrfs_err(fs_info, |
---|
| 3376 | + "fail to start transaction for status update: %d", |
---|
| 3377 | + err); |
---|
| 3378 | + } |
---|
| 3379 | + } else { |
---|
2840 | 3380 | trans = NULL; |
---|
2841 | | - btrfs_err(fs_info, |
---|
2842 | | - "fail to start transaction for status update: %d", |
---|
2843 | | - err); |
---|
2844 | 3381 | } |
---|
2845 | 3382 | |
---|
2846 | 3383 | mutex_lock(&fs_info->qgroup_rescan_lock); |
---|
.. | .. |
---|
2902 | 3439 | } |
---|
2903 | 3440 | |
---|
2904 | 3441 | mutex_lock(&fs_info->qgroup_rescan_lock); |
---|
2905 | | - spin_lock(&fs_info->qgroup_lock); |
---|
2906 | 3442 | |
---|
2907 | 3443 | if (init_flags) { |
---|
2908 | 3444 | if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { |
---|
.. | .. |
---|
2914 | 3450 | btrfs_warn(fs_info, |
---|
2915 | 3451 | "qgroup rescan init failed, qgroup is not enabled"); |
---|
2916 | 3452 | ret = -EINVAL; |
---|
| 3453 | + } else if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { |
---|
| 3454 | + /* Quota disable is in progress */ |
---|
| 3455 | + ret = -EBUSY; |
---|
2917 | 3456 | } |
---|
2918 | 3457 | |
---|
2919 | 3458 | if (ret) { |
---|
2920 | | - spin_unlock(&fs_info->qgroup_lock); |
---|
2921 | 3459 | mutex_unlock(&fs_info->qgroup_rescan_lock); |
---|
2922 | 3460 | return ret; |
---|
2923 | 3461 | } |
---|
.. | .. |
---|
2928 | 3466 | sizeof(fs_info->qgroup_rescan_progress)); |
---|
2929 | 3467 | fs_info->qgroup_rescan_progress.objectid = progress_objectid; |
---|
2930 | 3468 | init_completion(&fs_info->qgroup_rescan_completion); |
---|
2931 | | - |
---|
2932 | | - spin_unlock(&fs_info->qgroup_lock); |
---|
2933 | 3469 | mutex_unlock(&fs_info->qgroup_rescan_lock); |
---|
2934 | 3470 | |
---|
2935 | | - memset(&fs_info->qgroup_rescan_work, 0, |
---|
2936 | | - sizeof(fs_info->qgroup_rescan_work)); |
---|
2937 | 3471 | btrfs_init_work(&fs_info->qgroup_rescan_work, |
---|
2938 | | - btrfs_qgroup_rescan_helper, |
---|
2939 | 3472 | btrfs_qgroup_rescan_worker, NULL, NULL); |
---|
2940 | 3473 | return 0; |
---|
2941 | 3474 | } |
---|
.. | .. |
---|
3009 | 3542 | int ret = 0; |
---|
3010 | 3543 | |
---|
3011 | 3544 | mutex_lock(&fs_info->qgroup_rescan_lock); |
---|
3012 | | - spin_lock(&fs_info->qgroup_lock); |
---|
3013 | 3545 | running = fs_info->qgroup_rescan_running; |
---|
3014 | | - spin_unlock(&fs_info->qgroup_lock); |
---|
3015 | 3546 | mutex_unlock(&fs_info->qgroup_rescan_lock); |
---|
3016 | 3547 | |
---|
3017 | 3548 | if (!running) |
---|
.. | .. |
---|
3042 | 3573 | } |
---|
3043 | 3574 | } |
---|
3044 | 3575 | |
---|
| 3576 | +#define rbtree_iterate_from_safe(node, next, start) \ |
---|
| 3577 | + for (node = start; node && ({ next = rb_next(node); 1;}); node = next) |
---|
| 3578 | + |
---|
| 3579 | +static int qgroup_unreserve_range(struct btrfs_inode *inode, |
---|
| 3580 | + struct extent_changeset *reserved, u64 start, |
---|
| 3581 | + u64 len) |
---|
| 3582 | +{ |
---|
| 3583 | + struct rb_node *node; |
---|
| 3584 | + struct rb_node *next; |
---|
| 3585 | + struct ulist_node *entry; |
---|
| 3586 | + int ret = 0; |
---|
| 3587 | + |
---|
| 3588 | + node = reserved->range_changed.root.rb_node; |
---|
| 3589 | + if (!node) |
---|
| 3590 | + return 0; |
---|
| 3591 | + while (node) { |
---|
| 3592 | + entry = rb_entry(node, struct ulist_node, rb_node); |
---|
| 3593 | + if (entry->val < start) |
---|
| 3594 | + node = node->rb_right; |
---|
| 3595 | + else |
---|
| 3596 | + node = node->rb_left; |
---|
| 3597 | + } |
---|
| 3598 | + |
---|
| 3599 | + if (entry->val > start && rb_prev(&entry->rb_node)) |
---|
| 3600 | + entry = rb_entry(rb_prev(&entry->rb_node), struct ulist_node, |
---|
| 3601 | + rb_node); |
---|
| 3602 | + |
---|
| 3603 | + rbtree_iterate_from_safe(node, next, &entry->rb_node) { |
---|
| 3604 | + u64 entry_start; |
---|
| 3605 | + u64 entry_end; |
---|
| 3606 | + u64 entry_len; |
---|
| 3607 | + int clear_ret; |
---|
| 3608 | + |
---|
| 3609 | + entry = rb_entry(node, struct ulist_node, rb_node); |
---|
| 3610 | + entry_start = entry->val; |
---|
| 3611 | + entry_end = entry->aux; |
---|
| 3612 | + entry_len = entry_end - entry_start + 1; |
---|
| 3613 | + |
---|
| 3614 | + if (entry_start >= start + len) |
---|
| 3615 | + break; |
---|
| 3616 | + if (entry_start + entry_len <= start) |
---|
| 3617 | + continue; |
---|
| 3618 | + /* |
---|
| 3619 | + * Now the entry is in [start, start + len), revert the |
---|
| 3620 | + * EXTENT_QGROUP_RESERVED bit. |
---|
| 3621 | + */ |
---|
| 3622 | + clear_ret = clear_extent_bits(&inode->io_tree, entry_start, |
---|
| 3623 | + entry_end, EXTENT_QGROUP_RESERVED); |
---|
| 3624 | + if (!ret && clear_ret < 0) |
---|
| 3625 | + ret = clear_ret; |
---|
| 3626 | + |
---|
| 3627 | + ulist_del(&reserved->range_changed, entry->val, entry->aux); |
---|
| 3628 | + if (likely(reserved->bytes_changed >= entry_len)) { |
---|
| 3629 | + reserved->bytes_changed -= entry_len; |
---|
| 3630 | + } else { |
---|
| 3631 | + WARN_ON(1); |
---|
| 3632 | + reserved->bytes_changed = 0; |
---|
| 3633 | + } |
---|
| 3634 | + } |
---|
| 3635 | + |
---|
| 3636 | + return ret; |
---|
| 3637 | +} |
---|
| 3638 | + |
---|
3045 | 3639 | /* |
---|
3046 | | - * Reserve qgroup space for range [start, start + len). |
---|
| 3640 | + * Try to free some space for qgroup. |
---|
3047 | 3641 | * |
---|
3048 | | - * This function will either reserve space from related qgroups or doing |
---|
3049 | | - * nothing if the range is already reserved. |
---|
| 3642 | + * For qgroup, there are only 3 ways to free qgroup space: |
---|
| 3643 | + * - Flush nodatacow write |
---|
| 3644 | + * Any nodatacow write will free its reserved data space at run_delalloc_range(). |
---|
| 3645 | + * In theory, we should only flush nodatacow inodes, but it's not yet |
---|
| 3646 | + * possible, so we need to flush the whole root. |
---|
3050 | 3647 | * |
---|
3051 | | - * Return 0 for successful reserve |
---|
3052 | | - * Return <0 for error (including -EQUOT) |
---|
| 3648 | + * - Wait for ordered extents |
---|
| 3649 | + * When ordered extents are finished, their reserved metadata is finally |
---|
| 3650 | + * converted to per_trans status, which can be freed by later commit |
---|
| 3651 | + * transaction. |
---|
3053 | 3652 | * |
---|
3054 | | - * NOTE: this function may sleep for memory allocation. |
---|
3055 | | - * if btrfs_qgroup_reserve_data() is called multiple times with |
---|
3056 | | - * same @reserved, caller must ensure when error happens it's OK |
---|
3057 | | - * to free *ALL* reserved space. |
---|
| 3653 | + * - Commit transaction |
---|
| 3654 | + * This would free the meta_per_trans space. |
---|
| 3655 | + * In theory this shouldn't provide much space, but any more qgroup space |
---|
| 3656 | + * is needed. |
---|
3058 | 3657 | */ |
---|
3059 | | -int btrfs_qgroup_reserve_data(struct inode *inode, |
---|
| 3658 | +static int try_flush_qgroup(struct btrfs_root *root) |
---|
| 3659 | +{ |
---|
| 3660 | + struct btrfs_trans_handle *trans; |
---|
| 3661 | + int ret; |
---|
| 3662 | + bool can_commit = true; |
---|
| 3663 | + |
---|
| 3664 | + /* |
---|
| 3665 | + * If current process holds a transaction, we shouldn't flush, as we |
---|
| 3666 | + * assume all space reservation happens before a transaction handle is |
---|
| 3667 | + * held. |
---|
| 3668 | + * |
---|
| 3669 | + * But there are cases like btrfs_delayed_item_reserve_metadata() where |
---|
| 3670 | + * we try to reserve space with one transction handle already held. |
---|
| 3671 | + * In that case we can't commit transaction, but at least try to end it |
---|
| 3672 | + * and hope the started data writes can free some space. |
---|
| 3673 | + */ |
---|
| 3674 | + if (current->journal_info && |
---|
| 3675 | + current->journal_info != BTRFS_SEND_TRANS_STUB) |
---|
| 3676 | + can_commit = false; |
---|
| 3677 | + |
---|
| 3678 | + /* |
---|
| 3679 | + * We don't want to run flush again and again, so if there is a running |
---|
| 3680 | + * one, we won't try to start a new flush, but exit directly. |
---|
| 3681 | + */ |
---|
| 3682 | + if (test_and_set_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)) { |
---|
| 3683 | + /* |
---|
| 3684 | + * We are already holding a transaction, thus we can block other |
---|
| 3685 | + * threads from flushing. So exit right now. This increases |
---|
| 3686 | + * the chance of EDQUOT for heavy load and near limit cases. |
---|
| 3687 | + * But we can argue that if we're already near limit, EDQUOT is |
---|
| 3688 | + * unavoidable anyway. |
---|
| 3689 | + */ |
---|
| 3690 | + if (!can_commit) |
---|
| 3691 | + return 0; |
---|
| 3692 | + |
---|
| 3693 | + wait_event(root->qgroup_flush_wait, |
---|
| 3694 | + !test_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)); |
---|
| 3695 | + return 0; |
---|
| 3696 | + } |
---|
| 3697 | + |
---|
| 3698 | + ret = btrfs_start_delalloc_snapshot(root); |
---|
| 3699 | + if (ret < 0) |
---|
| 3700 | + goto out; |
---|
| 3701 | + btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1); |
---|
| 3702 | + |
---|
| 3703 | + trans = btrfs_join_transaction(root); |
---|
| 3704 | + if (IS_ERR(trans)) { |
---|
| 3705 | + ret = PTR_ERR(trans); |
---|
| 3706 | + goto out; |
---|
| 3707 | + } |
---|
| 3708 | + |
---|
| 3709 | + if (can_commit) |
---|
| 3710 | + ret = btrfs_commit_transaction(trans); |
---|
| 3711 | + else |
---|
| 3712 | + ret = btrfs_end_transaction(trans); |
---|
| 3713 | +out: |
---|
| 3714 | + clear_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state); |
---|
| 3715 | + wake_up(&root->qgroup_flush_wait); |
---|
| 3716 | + return ret; |
---|
| 3717 | +} |
---|
| 3718 | + |
---|
| 3719 | +static int qgroup_reserve_data(struct btrfs_inode *inode, |
---|
3060 | 3720 | struct extent_changeset **reserved_ret, u64 start, |
---|
3061 | 3721 | u64 len) |
---|
3062 | 3722 | { |
---|
3063 | | - struct btrfs_root *root = BTRFS_I(inode)->root; |
---|
3064 | | - struct ulist_node *unode; |
---|
3065 | | - struct ulist_iterator uiter; |
---|
| 3723 | + struct btrfs_root *root = inode->root; |
---|
3066 | 3724 | struct extent_changeset *reserved; |
---|
| 3725 | + bool new_reserved = false; |
---|
3067 | 3726 | u64 orig_reserved; |
---|
3068 | 3727 | u64 to_reserve; |
---|
3069 | 3728 | int ret; |
---|
3070 | 3729 | |
---|
3071 | 3730 | if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags) || |
---|
3072 | | - !is_fstree(root->objectid) || len == 0) |
---|
| 3731 | + !is_fstree(root->root_key.objectid) || len == 0) |
---|
3073 | 3732 | return 0; |
---|
3074 | 3733 | |
---|
3075 | 3734 | /* @reserved parameter is mandatory for qgroup */ |
---|
3076 | 3735 | if (WARN_ON(!reserved_ret)) |
---|
3077 | 3736 | return -EINVAL; |
---|
3078 | 3737 | if (!*reserved_ret) { |
---|
| 3738 | + new_reserved = true; |
---|
3079 | 3739 | *reserved_ret = extent_changeset_alloc(); |
---|
3080 | 3740 | if (!*reserved_ret) |
---|
3081 | 3741 | return -ENOMEM; |
---|
.. | .. |
---|
3083 | 3743 | reserved = *reserved_ret; |
---|
3084 | 3744 | /* Record already reserved space */ |
---|
3085 | 3745 | orig_reserved = reserved->bytes_changed; |
---|
3086 | | - ret = set_record_extent_bits(&BTRFS_I(inode)->io_tree, start, |
---|
| 3746 | + ret = set_record_extent_bits(&inode->io_tree, start, |
---|
3087 | 3747 | start + len -1, EXTENT_QGROUP_RESERVED, reserved); |
---|
3088 | 3748 | |
---|
3089 | 3749 | /* Newly reserved space */ |
---|
3090 | 3750 | to_reserve = reserved->bytes_changed - orig_reserved; |
---|
3091 | | - trace_btrfs_qgroup_reserve_data(inode, start, len, |
---|
| 3751 | + trace_btrfs_qgroup_reserve_data(&inode->vfs_inode, start, len, |
---|
3092 | 3752 | to_reserve, QGROUP_RESERVE); |
---|
3093 | 3753 | if (ret < 0) |
---|
3094 | | - goto cleanup; |
---|
| 3754 | + goto out; |
---|
3095 | 3755 | ret = qgroup_reserve(root, to_reserve, true, BTRFS_QGROUP_RSV_DATA); |
---|
3096 | 3756 | if (ret < 0) |
---|
3097 | 3757 | goto cleanup; |
---|
.. | .. |
---|
3099 | 3759 | return ret; |
---|
3100 | 3760 | |
---|
3101 | 3761 | cleanup: |
---|
3102 | | - /* cleanup *ALL* already reserved ranges */ |
---|
3103 | | - ULIST_ITER_INIT(&uiter); |
---|
3104 | | - while ((unode = ulist_next(&reserved->range_changed, &uiter))) |
---|
3105 | | - clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val, |
---|
3106 | | - unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL); |
---|
3107 | | - /* Also free data bytes of already reserved one */ |
---|
3108 | | - btrfs_qgroup_free_refroot(root->fs_info, root->root_key.objectid, |
---|
3109 | | - orig_reserved, BTRFS_QGROUP_RSV_DATA); |
---|
3110 | | - extent_changeset_release(reserved); |
---|
| 3762 | + qgroup_unreserve_range(inode, reserved, start, len); |
---|
| 3763 | +out: |
---|
| 3764 | + if (new_reserved) { |
---|
| 3765 | + extent_changeset_release(reserved); |
---|
| 3766 | + kfree(reserved); |
---|
| 3767 | + *reserved_ret = NULL; |
---|
| 3768 | + } |
---|
3111 | 3769 | return ret; |
---|
3112 | 3770 | } |
---|
3113 | 3771 | |
---|
| 3772 | +/* |
---|
| 3773 | + * Reserve qgroup space for range [start, start + len). |
---|
| 3774 | + * |
---|
| 3775 | + * This function will either reserve space from related qgroups or do nothing |
---|
| 3776 | + * if the range is already reserved. |
---|
| 3777 | + * |
---|
| 3778 | + * Return 0 for successful reservation |
---|
| 3779 | + * Return <0 for error (including -EQUOT) |
---|
| 3780 | + * |
---|
| 3781 | + * NOTE: This function may sleep for memory allocation, dirty page flushing and |
---|
| 3782 | + * commit transaction. So caller should not hold any dirty page locked. |
---|
| 3783 | + */ |
---|
| 3784 | +int btrfs_qgroup_reserve_data(struct btrfs_inode *inode, |
---|
| 3785 | + struct extent_changeset **reserved_ret, u64 start, |
---|
| 3786 | + u64 len) |
---|
| 3787 | +{ |
---|
| 3788 | + int ret; |
---|
| 3789 | + |
---|
| 3790 | + ret = qgroup_reserve_data(inode, reserved_ret, start, len); |
---|
| 3791 | + if (ret <= 0 && ret != -EDQUOT) |
---|
| 3792 | + return ret; |
---|
| 3793 | + |
---|
| 3794 | + ret = try_flush_qgroup(inode->root); |
---|
| 3795 | + if (ret < 0) |
---|
| 3796 | + return ret; |
---|
| 3797 | + return qgroup_reserve_data(inode, reserved_ret, start, len); |
---|
| 3798 | +} |
---|
| 3799 | + |
---|
3114 | 3800 | /* Free ranges specified by @reserved, normally in error path */ |
---|
3115 | | -static int qgroup_free_reserved_data(struct inode *inode, |
---|
| 3801 | +static int qgroup_free_reserved_data(struct btrfs_inode *inode, |
---|
3116 | 3802 | struct extent_changeset *reserved, u64 start, u64 len) |
---|
3117 | 3803 | { |
---|
3118 | | - struct btrfs_root *root = BTRFS_I(inode)->root; |
---|
| 3804 | + struct btrfs_root *root = inode->root; |
---|
3119 | 3805 | struct ulist_node *unode; |
---|
3120 | 3806 | struct ulist_iterator uiter; |
---|
3121 | 3807 | struct extent_changeset changeset; |
---|
.. | .. |
---|
3151 | 3837 | * EXTENT_QGROUP_RESERVED, we won't double free. |
---|
3152 | 3838 | * So not need to rush. |
---|
3153 | 3839 | */ |
---|
3154 | | - ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, |
---|
3155 | | - free_start, free_start + free_len - 1, |
---|
| 3840 | + ret = clear_record_extent_bits(&inode->io_tree, free_start, |
---|
| 3841 | + free_start + free_len - 1, |
---|
3156 | 3842 | EXTENT_QGROUP_RESERVED, &changeset); |
---|
3157 | 3843 | if (ret < 0) |
---|
3158 | 3844 | goto out; |
---|
3159 | 3845 | freed += changeset.bytes_changed; |
---|
3160 | 3846 | } |
---|
3161 | | - btrfs_qgroup_free_refroot(root->fs_info, root->objectid, freed, |
---|
| 3847 | + btrfs_qgroup_free_refroot(root->fs_info, root->root_key.objectid, freed, |
---|
3162 | 3848 | BTRFS_QGROUP_RSV_DATA); |
---|
3163 | 3849 | ret = freed; |
---|
3164 | 3850 | out: |
---|
.. | .. |
---|
3166 | 3852 | return ret; |
---|
3167 | 3853 | } |
---|
3168 | 3854 | |
---|
3169 | | -static int __btrfs_qgroup_release_data(struct inode *inode, |
---|
| 3855 | +static int __btrfs_qgroup_release_data(struct btrfs_inode *inode, |
---|
3170 | 3856 | struct extent_changeset *reserved, u64 start, u64 len, |
---|
3171 | 3857 | int free) |
---|
3172 | 3858 | { |
---|
.. | .. |
---|
3174 | 3860 | int trace_op = QGROUP_RELEASE; |
---|
3175 | 3861 | int ret; |
---|
3176 | 3862 | |
---|
3177 | | - if (!test_bit(BTRFS_FS_QUOTA_ENABLED, |
---|
3178 | | - &BTRFS_I(inode)->root->fs_info->flags)) |
---|
| 3863 | + if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &inode->root->fs_info->flags)) |
---|
3179 | 3864 | return 0; |
---|
3180 | 3865 | |
---|
3181 | 3866 | /* In release case, we shouldn't have @reserved */ |
---|
.. | .. |
---|
3183 | 3868 | if (free && reserved) |
---|
3184 | 3869 | return qgroup_free_reserved_data(inode, reserved, start, len); |
---|
3185 | 3870 | extent_changeset_init(&changeset); |
---|
3186 | | - ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, start, |
---|
3187 | | - start + len -1, EXTENT_QGROUP_RESERVED, &changeset); |
---|
| 3871 | + ret = clear_record_extent_bits(&inode->io_tree, start, start + len -1, |
---|
| 3872 | + EXTENT_QGROUP_RESERVED, &changeset); |
---|
3188 | 3873 | if (ret < 0) |
---|
3189 | 3874 | goto out; |
---|
3190 | 3875 | |
---|
3191 | 3876 | if (free) |
---|
3192 | 3877 | trace_op = QGROUP_FREE; |
---|
3193 | | - trace_btrfs_qgroup_release_data(inode, start, len, |
---|
| 3878 | + trace_btrfs_qgroup_release_data(&inode->vfs_inode, start, len, |
---|
3194 | 3879 | changeset.bytes_changed, trace_op); |
---|
3195 | 3880 | if (free) |
---|
3196 | | - btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info, |
---|
3197 | | - BTRFS_I(inode)->root->objectid, |
---|
| 3881 | + btrfs_qgroup_free_refroot(inode->root->fs_info, |
---|
| 3882 | + inode->root->root_key.objectid, |
---|
3198 | 3883 | changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA); |
---|
3199 | 3884 | ret = changeset.bytes_changed; |
---|
3200 | 3885 | out: |
---|
.. | .. |
---|
3214 | 3899 | * |
---|
3215 | 3900 | * NOTE: This function may sleep for memory allocation. |
---|
3216 | 3901 | */ |
---|
3217 | | -int btrfs_qgroup_free_data(struct inode *inode, |
---|
| 3902 | +int btrfs_qgroup_free_data(struct btrfs_inode *inode, |
---|
3218 | 3903 | struct extent_changeset *reserved, u64 start, u64 len) |
---|
3219 | 3904 | { |
---|
3220 | 3905 | return __btrfs_qgroup_release_data(inode, reserved, start, len, 1); |
---|
.. | .. |
---|
3235 | 3920 | * |
---|
3236 | 3921 | * NOTE: This function may sleep for memory allocation. |
---|
3237 | 3922 | */ |
---|
3238 | | -int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len) |
---|
| 3923 | +int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len) |
---|
3239 | 3924 | { |
---|
3240 | 3925 | return __btrfs_qgroup_release_data(inode, NULL, start, len, 0); |
---|
3241 | 3926 | } |
---|
.. | .. |
---|
3280 | 3965 | return num_bytes; |
---|
3281 | 3966 | } |
---|
3282 | 3967 | |
---|
3283 | | -int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, |
---|
3284 | | - enum btrfs_qgroup_rsv_type type, bool enforce) |
---|
| 3968 | +int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, |
---|
| 3969 | + enum btrfs_qgroup_rsv_type type, bool enforce) |
---|
3285 | 3970 | { |
---|
3286 | 3971 | struct btrfs_fs_info *fs_info = root->fs_info; |
---|
3287 | 3972 | int ret; |
---|
3288 | 3973 | |
---|
3289 | 3974 | if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || |
---|
3290 | | - !is_fstree(root->objectid) || num_bytes == 0) |
---|
| 3975 | + !is_fstree(root->root_key.objectid) || num_bytes == 0) |
---|
3291 | 3976 | return 0; |
---|
3292 | 3977 | |
---|
3293 | 3978 | BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); |
---|
.. | .. |
---|
3307 | 3992 | return ret; |
---|
3308 | 3993 | } |
---|
3309 | 3994 | |
---|
| 3995 | +int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, |
---|
| 3996 | + enum btrfs_qgroup_rsv_type type, bool enforce) |
---|
| 3997 | +{ |
---|
| 3998 | + int ret; |
---|
| 3999 | + |
---|
| 4000 | + ret = btrfs_qgroup_reserve_meta(root, num_bytes, type, enforce); |
---|
| 4001 | + if (ret <= 0 && ret != -EDQUOT) |
---|
| 4002 | + return ret; |
---|
| 4003 | + |
---|
| 4004 | + ret = try_flush_qgroup(root); |
---|
| 4005 | + if (ret < 0) |
---|
| 4006 | + return ret; |
---|
| 4007 | + return btrfs_qgroup_reserve_meta(root, num_bytes, type, enforce); |
---|
| 4008 | +} |
---|
| 4009 | + |
---|
3310 | 4010 | void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root) |
---|
3311 | 4011 | { |
---|
3312 | 4012 | struct btrfs_fs_info *fs_info = root->fs_info; |
---|
3313 | 4013 | |
---|
3314 | 4014 | if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || |
---|
3315 | | - !is_fstree(root->objectid)) |
---|
| 4015 | + !is_fstree(root->root_key.objectid)) |
---|
3316 | 4016 | return; |
---|
3317 | 4017 | |
---|
3318 | 4018 | /* TODO: Update trace point to handle such free */ |
---|
3319 | 4019 | trace_qgroup_meta_free_all_pertrans(root); |
---|
3320 | 4020 | /* Special value -1 means to free all reserved space */ |
---|
3321 | | - btrfs_qgroup_free_refroot(fs_info, root->objectid, (u64)-1, |
---|
| 4021 | + btrfs_qgroup_free_refroot(fs_info, root->root_key.objectid, (u64)-1, |
---|
3322 | 4022 | BTRFS_QGROUP_RSV_META_PERTRANS); |
---|
3323 | 4023 | } |
---|
3324 | 4024 | |
---|
.. | .. |
---|
3328 | 4028 | struct btrfs_fs_info *fs_info = root->fs_info; |
---|
3329 | 4029 | |
---|
3330 | 4030 | if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || |
---|
3331 | | - !is_fstree(root->objectid)) |
---|
| 4031 | + !is_fstree(root->root_key.objectid)) |
---|
3332 | 4032 | return; |
---|
3333 | 4033 | |
---|
3334 | 4034 | /* |
---|
.. | .. |
---|
3339 | 4039 | num_bytes = sub_root_meta_rsv(root, num_bytes, type); |
---|
3340 | 4040 | BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); |
---|
3341 | 4041 | trace_qgroup_meta_reserve(root, -(s64)num_bytes, type); |
---|
3342 | | - btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes, type); |
---|
| 4042 | + btrfs_qgroup_free_refroot(fs_info, root->root_key.objectid, |
---|
| 4043 | + num_bytes, type); |
---|
3343 | 4044 | } |
---|
3344 | 4045 | |
---|
3345 | 4046 | static void qgroup_convert_meta(struct btrfs_fs_info *fs_info, u64 ref_root, |
---|
3346 | 4047 | int num_bytes) |
---|
3347 | 4048 | { |
---|
3348 | | - struct btrfs_root *quota_root = fs_info->quota_root; |
---|
3349 | 4049 | struct btrfs_qgroup *qgroup; |
---|
3350 | 4050 | struct ulist_node *unode; |
---|
3351 | 4051 | struct ulist_iterator uiter; |
---|
.. | .. |
---|
3353 | 4053 | |
---|
3354 | 4054 | if (num_bytes == 0) |
---|
3355 | 4055 | return; |
---|
3356 | | - if (!quota_root) |
---|
| 4056 | + if (!fs_info->quota_root) |
---|
3357 | 4057 | return; |
---|
3358 | 4058 | |
---|
3359 | 4059 | spin_lock(&fs_info->qgroup_lock); |
---|
.. | .. |
---|
3393 | 4093 | struct btrfs_fs_info *fs_info = root->fs_info; |
---|
3394 | 4094 | |
---|
3395 | 4095 | if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || |
---|
3396 | | - !is_fstree(root->objectid)) |
---|
| 4096 | + !is_fstree(root->root_key.objectid)) |
---|
3397 | 4097 | return; |
---|
3398 | 4098 | /* Same as btrfs_qgroup_free_meta_prealloc() */ |
---|
3399 | 4099 | num_bytes = sub_root_meta_rsv(root, num_bytes, |
---|
3400 | 4100 | BTRFS_QGROUP_RSV_META_PREALLOC); |
---|
3401 | 4101 | trace_qgroup_meta_convert(root, num_bytes); |
---|
3402 | | - qgroup_convert_meta(fs_info, root->objectid, num_bytes); |
---|
| 4102 | + qgroup_convert_meta(fs_info, root->root_key.objectid, num_bytes); |
---|
3403 | 4103 | } |
---|
3404 | 4104 | |
---|
3405 | 4105 | /* |
---|
3406 | 4106 | * Check qgroup reserved space leaking, normally at destroy inode |
---|
3407 | 4107 | * time |
---|
3408 | 4108 | */ |
---|
3409 | | -void btrfs_qgroup_check_reserved_leak(struct inode *inode) |
---|
| 4109 | +void btrfs_qgroup_check_reserved_leak(struct btrfs_inode *inode) |
---|
3410 | 4110 | { |
---|
3411 | 4111 | struct extent_changeset changeset; |
---|
3412 | 4112 | struct ulist_node *unode; |
---|
.. | .. |
---|
3414 | 4114 | int ret; |
---|
3415 | 4115 | |
---|
3416 | 4116 | extent_changeset_init(&changeset); |
---|
3417 | | - ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, 0, (u64)-1, |
---|
| 4117 | + ret = clear_record_extent_bits(&inode->io_tree, 0, (u64)-1, |
---|
3418 | 4118 | EXTENT_QGROUP_RESERVED, &changeset); |
---|
3419 | 4119 | |
---|
3420 | 4120 | WARN_ON(ret < 0); |
---|
3421 | 4121 | if (WARN_ON(changeset.bytes_changed)) { |
---|
3422 | 4122 | ULIST_ITER_INIT(&iter); |
---|
3423 | 4123 | while ((unode = ulist_next(&changeset.range_changed, &iter))) { |
---|
3424 | | - btrfs_warn(BTRFS_I(inode)->root->fs_info, |
---|
3425 | | - "leaking qgroup reserved space, ino: %lu, start: %llu, end: %llu", |
---|
3426 | | - inode->i_ino, unode->val, unode->aux); |
---|
| 4124 | + btrfs_warn(inode->root->fs_info, |
---|
| 4125 | + "leaking qgroup reserved space, ino: %llu, start: %llu, end: %llu", |
---|
| 4126 | + btrfs_ino(inode), unode->val, unode->aux); |
---|
3427 | 4127 | } |
---|
3428 | | - btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info, |
---|
3429 | | - BTRFS_I(inode)->root->objectid, |
---|
| 4128 | + btrfs_qgroup_free_refroot(inode->root->fs_info, |
---|
| 4129 | + inode->root->root_key.objectid, |
---|
3430 | 4130 | changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA); |
---|
3431 | 4131 | |
---|
3432 | 4132 | } |
---|
3433 | 4133 | extent_changeset_release(&changeset); |
---|
3434 | 4134 | } |
---|
| 4135 | + |
---|
| 4136 | +void btrfs_qgroup_init_swapped_blocks( |
---|
| 4137 | + struct btrfs_qgroup_swapped_blocks *swapped_blocks) |
---|
| 4138 | +{ |
---|
| 4139 | + int i; |
---|
| 4140 | + |
---|
| 4141 | + spin_lock_init(&swapped_blocks->lock); |
---|
| 4142 | + for (i = 0; i < BTRFS_MAX_LEVEL; i++) |
---|
| 4143 | + swapped_blocks->blocks[i] = RB_ROOT; |
---|
| 4144 | + swapped_blocks->swapped = false; |
---|
| 4145 | +} |
---|
| 4146 | + |
---|
| 4147 | +/* |
---|
| 4148 | + * Delete all swapped blocks record of @root. |
---|
| 4149 | + * Every record here means we skipped a full subtree scan for qgroup. |
---|
| 4150 | + * |
---|
| 4151 | + * Gets called when committing one transaction. |
---|
| 4152 | + */ |
---|
| 4153 | +void btrfs_qgroup_clean_swapped_blocks(struct btrfs_root *root) |
---|
| 4154 | +{ |
---|
| 4155 | + struct btrfs_qgroup_swapped_blocks *swapped_blocks; |
---|
| 4156 | + int i; |
---|
| 4157 | + |
---|
| 4158 | + swapped_blocks = &root->swapped_blocks; |
---|
| 4159 | + |
---|
| 4160 | + spin_lock(&swapped_blocks->lock); |
---|
| 4161 | + if (!swapped_blocks->swapped) |
---|
| 4162 | + goto out; |
---|
| 4163 | + for (i = 0; i < BTRFS_MAX_LEVEL; i++) { |
---|
| 4164 | + struct rb_root *cur_root = &swapped_blocks->blocks[i]; |
---|
| 4165 | + struct btrfs_qgroup_swapped_block *entry; |
---|
| 4166 | + struct btrfs_qgroup_swapped_block *next; |
---|
| 4167 | + |
---|
| 4168 | + rbtree_postorder_for_each_entry_safe(entry, next, cur_root, |
---|
| 4169 | + node) |
---|
| 4170 | + kfree(entry); |
---|
| 4171 | + swapped_blocks->blocks[i] = RB_ROOT; |
---|
| 4172 | + } |
---|
| 4173 | + swapped_blocks->swapped = false; |
---|
| 4174 | +out: |
---|
| 4175 | + spin_unlock(&swapped_blocks->lock); |
---|
| 4176 | +} |
---|
| 4177 | + |
---|
| 4178 | +/* |
---|
| 4179 | + * Add subtree roots record into @subvol_root. |
---|
| 4180 | + * |
---|
| 4181 | + * @subvol_root: tree root of the subvolume tree get swapped |
---|
| 4182 | + * @bg: block group under balance |
---|
| 4183 | + * @subvol_parent/slot: pointer to the subtree root in subvolume tree |
---|
| 4184 | + * @reloc_parent/slot: pointer to the subtree root in reloc tree |
---|
| 4185 | + * BOTH POINTERS ARE BEFORE TREE SWAP |
---|
| 4186 | + * @last_snapshot: last snapshot generation of the subvolume tree |
---|
| 4187 | + */ |
---|
| 4188 | +int btrfs_qgroup_add_swapped_blocks(struct btrfs_trans_handle *trans, |
---|
| 4189 | + struct btrfs_root *subvol_root, |
---|
| 4190 | + struct btrfs_block_group *bg, |
---|
| 4191 | + struct extent_buffer *subvol_parent, int subvol_slot, |
---|
| 4192 | + struct extent_buffer *reloc_parent, int reloc_slot, |
---|
| 4193 | + u64 last_snapshot) |
---|
| 4194 | +{ |
---|
| 4195 | + struct btrfs_fs_info *fs_info = subvol_root->fs_info; |
---|
| 4196 | + struct btrfs_qgroup_swapped_blocks *blocks = &subvol_root->swapped_blocks; |
---|
| 4197 | + struct btrfs_qgroup_swapped_block *block; |
---|
| 4198 | + struct rb_node **cur; |
---|
| 4199 | + struct rb_node *parent = NULL; |
---|
| 4200 | + int level = btrfs_header_level(subvol_parent) - 1; |
---|
| 4201 | + int ret = 0; |
---|
| 4202 | + |
---|
| 4203 | + if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) |
---|
| 4204 | + return 0; |
---|
| 4205 | + |
---|
| 4206 | + if (btrfs_node_ptr_generation(subvol_parent, subvol_slot) > |
---|
| 4207 | + btrfs_node_ptr_generation(reloc_parent, reloc_slot)) { |
---|
| 4208 | + btrfs_err_rl(fs_info, |
---|
| 4209 | + "%s: bad parameter order, subvol_gen=%llu reloc_gen=%llu", |
---|
| 4210 | + __func__, |
---|
| 4211 | + btrfs_node_ptr_generation(subvol_parent, subvol_slot), |
---|
| 4212 | + btrfs_node_ptr_generation(reloc_parent, reloc_slot)); |
---|
| 4213 | + return -EUCLEAN; |
---|
| 4214 | + } |
---|
| 4215 | + |
---|
| 4216 | + block = kmalloc(sizeof(*block), GFP_NOFS); |
---|
| 4217 | + if (!block) { |
---|
| 4218 | + ret = -ENOMEM; |
---|
| 4219 | + goto out; |
---|
| 4220 | + } |
---|
| 4221 | + |
---|
| 4222 | + /* |
---|
| 4223 | + * @reloc_parent/slot is still before swap, while @block is going to |
---|
| 4224 | + * record the bytenr after swap, so we do the swap here. |
---|
| 4225 | + */ |
---|
| 4226 | + block->subvol_bytenr = btrfs_node_blockptr(reloc_parent, reloc_slot); |
---|
| 4227 | + block->subvol_generation = btrfs_node_ptr_generation(reloc_parent, |
---|
| 4228 | + reloc_slot); |
---|
| 4229 | + block->reloc_bytenr = btrfs_node_blockptr(subvol_parent, subvol_slot); |
---|
| 4230 | + block->reloc_generation = btrfs_node_ptr_generation(subvol_parent, |
---|
| 4231 | + subvol_slot); |
---|
| 4232 | + block->last_snapshot = last_snapshot; |
---|
| 4233 | + block->level = level; |
---|
| 4234 | + |
---|
| 4235 | + /* |
---|
| 4236 | + * If we have bg == NULL, we're called from btrfs_recover_relocation(), |
---|
| 4237 | + * no one else can modify tree blocks thus we qgroup will not change |
---|
| 4238 | + * no matter the value of trace_leaf. |
---|
| 4239 | + */ |
---|
| 4240 | + if (bg && bg->flags & BTRFS_BLOCK_GROUP_DATA) |
---|
| 4241 | + block->trace_leaf = true; |
---|
| 4242 | + else |
---|
| 4243 | + block->trace_leaf = false; |
---|
| 4244 | + btrfs_node_key_to_cpu(reloc_parent, &block->first_key, reloc_slot); |
---|
| 4245 | + |
---|
| 4246 | + /* Insert @block into @blocks */ |
---|
| 4247 | + spin_lock(&blocks->lock); |
---|
| 4248 | + cur = &blocks->blocks[level].rb_node; |
---|
| 4249 | + while (*cur) { |
---|
| 4250 | + struct btrfs_qgroup_swapped_block *entry; |
---|
| 4251 | + |
---|
| 4252 | + parent = *cur; |
---|
| 4253 | + entry = rb_entry(parent, struct btrfs_qgroup_swapped_block, |
---|
| 4254 | + node); |
---|
| 4255 | + |
---|
| 4256 | + if (entry->subvol_bytenr < block->subvol_bytenr) { |
---|
| 4257 | + cur = &(*cur)->rb_left; |
---|
| 4258 | + } else if (entry->subvol_bytenr > block->subvol_bytenr) { |
---|
| 4259 | + cur = &(*cur)->rb_right; |
---|
| 4260 | + } else { |
---|
| 4261 | + if (entry->subvol_generation != |
---|
| 4262 | + block->subvol_generation || |
---|
| 4263 | + entry->reloc_bytenr != block->reloc_bytenr || |
---|
| 4264 | + entry->reloc_generation != |
---|
| 4265 | + block->reloc_generation) { |
---|
| 4266 | + /* |
---|
| 4267 | + * Duplicated but mismatch entry found. |
---|
| 4268 | + * Shouldn't happen. |
---|
| 4269 | + * |
---|
| 4270 | + * Marking qgroup inconsistent should be enough |
---|
| 4271 | + * for end users. |
---|
| 4272 | + */ |
---|
| 4273 | + WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); |
---|
| 4274 | + ret = -EEXIST; |
---|
| 4275 | + } |
---|
| 4276 | + kfree(block); |
---|
| 4277 | + goto out_unlock; |
---|
| 4278 | + } |
---|
| 4279 | + } |
---|
| 4280 | + rb_link_node(&block->node, parent, cur); |
---|
| 4281 | + rb_insert_color(&block->node, &blocks->blocks[level]); |
---|
| 4282 | + blocks->swapped = true; |
---|
| 4283 | +out_unlock: |
---|
| 4284 | + spin_unlock(&blocks->lock); |
---|
| 4285 | +out: |
---|
| 4286 | + if (ret < 0) |
---|
| 4287 | + fs_info->qgroup_flags |= |
---|
| 4288 | + BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; |
---|
| 4289 | + return ret; |
---|
| 4290 | +} |
---|
| 4291 | + |
---|
| 4292 | +/* |
---|
| 4293 | + * Check if the tree block is a subtree root, and if so do the needed |
---|
| 4294 | + * delayed subtree trace for qgroup. |
---|
| 4295 | + * |
---|
| 4296 | + * This is called during btrfs_cow_block(). |
---|
| 4297 | + */ |
---|
| 4298 | +int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans, |
---|
| 4299 | + struct btrfs_root *root, |
---|
| 4300 | + struct extent_buffer *subvol_eb) |
---|
| 4301 | +{ |
---|
| 4302 | + struct btrfs_fs_info *fs_info = root->fs_info; |
---|
| 4303 | + struct btrfs_qgroup_swapped_blocks *blocks = &root->swapped_blocks; |
---|
| 4304 | + struct btrfs_qgroup_swapped_block *block; |
---|
| 4305 | + struct extent_buffer *reloc_eb = NULL; |
---|
| 4306 | + struct rb_node *node; |
---|
| 4307 | + bool found = false; |
---|
| 4308 | + bool swapped = false; |
---|
| 4309 | + int level = btrfs_header_level(subvol_eb); |
---|
| 4310 | + int ret = 0; |
---|
| 4311 | + int i; |
---|
| 4312 | + |
---|
| 4313 | + if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) |
---|
| 4314 | + return 0; |
---|
| 4315 | + if (!is_fstree(root->root_key.objectid) || !root->reloc_root) |
---|
| 4316 | + return 0; |
---|
| 4317 | + |
---|
| 4318 | + spin_lock(&blocks->lock); |
---|
| 4319 | + if (!blocks->swapped) { |
---|
| 4320 | + spin_unlock(&blocks->lock); |
---|
| 4321 | + return 0; |
---|
| 4322 | + } |
---|
| 4323 | + node = blocks->blocks[level].rb_node; |
---|
| 4324 | + |
---|
| 4325 | + while (node) { |
---|
| 4326 | + block = rb_entry(node, struct btrfs_qgroup_swapped_block, node); |
---|
| 4327 | + if (block->subvol_bytenr < subvol_eb->start) { |
---|
| 4328 | + node = node->rb_left; |
---|
| 4329 | + } else if (block->subvol_bytenr > subvol_eb->start) { |
---|
| 4330 | + node = node->rb_right; |
---|
| 4331 | + } else { |
---|
| 4332 | + found = true; |
---|
| 4333 | + break; |
---|
| 4334 | + } |
---|
| 4335 | + } |
---|
| 4336 | + if (!found) { |
---|
| 4337 | + spin_unlock(&blocks->lock); |
---|
| 4338 | + goto out; |
---|
| 4339 | + } |
---|
| 4340 | + /* Found one, remove it from @blocks first and update blocks->swapped */ |
---|
| 4341 | + rb_erase(&block->node, &blocks->blocks[level]); |
---|
| 4342 | + for (i = 0; i < BTRFS_MAX_LEVEL; i++) { |
---|
| 4343 | + if (RB_EMPTY_ROOT(&blocks->blocks[i])) { |
---|
| 4344 | + swapped = true; |
---|
| 4345 | + break; |
---|
| 4346 | + } |
---|
| 4347 | + } |
---|
| 4348 | + blocks->swapped = swapped; |
---|
| 4349 | + spin_unlock(&blocks->lock); |
---|
| 4350 | + |
---|
| 4351 | + /* Read out reloc subtree root */ |
---|
| 4352 | + reloc_eb = read_tree_block(fs_info, block->reloc_bytenr, |
---|
| 4353 | + block->reloc_generation, block->level, |
---|
| 4354 | + &block->first_key); |
---|
| 4355 | + if (IS_ERR(reloc_eb)) { |
---|
| 4356 | + ret = PTR_ERR(reloc_eb); |
---|
| 4357 | + reloc_eb = NULL; |
---|
| 4358 | + goto free_out; |
---|
| 4359 | + } |
---|
| 4360 | + if (!extent_buffer_uptodate(reloc_eb)) { |
---|
| 4361 | + ret = -EIO; |
---|
| 4362 | + goto free_out; |
---|
| 4363 | + } |
---|
| 4364 | + |
---|
| 4365 | + ret = qgroup_trace_subtree_swap(trans, reloc_eb, subvol_eb, |
---|
| 4366 | + block->last_snapshot, block->trace_leaf); |
---|
| 4367 | +free_out: |
---|
| 4368 | + kfree(block); |
---|
| 4369 | + free_extent_buffer(reloc_eb); |
---|
| 4370 | +out: |
---|
| 4371 | + if (ret < 0) { |
---|
| 4372 | + btrfs_err_rl(fs_info, |
---|
| 4373 | + "failed to account subtree at bytenr %llu: %d", |
---|
| 4374 | + subvol_eb->start, ret); |
---|
| 4375 | + fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; |
---|
| 4376 | + } |
---|
| 4377 | + return ret; |
---|
| 4378 | +} |
---|
| 4379 | + |
---|
| 4380 | +void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans) |
---|
| 4381 | +{ |
---|
| 4382 | + struct btrfs_qgroup_extent_record *entry; |
---|
| 4383 | + struct btrfs_qgroup_extent_record *next; |
---|
| 4384 | + struct rb_root *root; |
---|
| 4385 | + |
---|
| 4386 | + root = &trans->delayed_refs.dirty_extent_root; |
---|
| 4387 | + rbtree_postorder_for_each_entry_safe(entry, next, root, node) { |
---|
| 4388 | + ulist_free(entry->old_roots); |
---|
| 4389 | + kfree(entry); |
---|
| 4390 | + } |
---|
| 4391 | + *root = RB_ROOT; |
---|
| 4392 | +} |
---|