.. | .. |
---|
6 | 6 | #ifndef BTRFS_QGROUP_H |
---|
7 | 7 | #define BTRFS_QGROUP_H |
---|
8 | 8 | |
---|
| 9 | +#include <linux/spinlock.h> |
---|
| 10 | +#include <linux/rbtree.h> |
---|
| 11 | +#include <linux/kobject.h> |
---|
9 | 12 | #include "ulist.h" |
---|
10 | 13 | #include "delayed-ref.h" |
---|
11 | 14 | |
---|
.. | .. |
---|
38 | 41 | */ |
---|
39 | 42 | |
---|
40 | 43 | /* |
---|
| 44 | + * Special performance optimization for balance. |
---|
| 45 | + * |
---|
| 46 | + * For balance, we need to swap subtree of subvolume and reloc trees. |
---|
| 47 | + * In theory, we need to trace all subtree blocks of both subvolume and reloc |
---|
| 48 | + * trees, since their owner has changed during such swap. |
---|
| 49 | + * |
---|
| 50 | + * However since balance has ensured that both subtrees are containing the |
---|
| 51 | + * same contents and have the same tree structures, such swap won't cause |
---|
| 52 | + * qgroup number change. |
---|
| 53 | + * |
---|
| 54 | + * But there is a race window between subtree swap and transaction commit, |
---|
| 55 | + * during that window, if we increase/decrease tree level or merge/split tree |
---|
| 56 | + * blocks, we still need to trace the original subtrees. |
---|
| 57 | + * |
---|
| 58 | + * So for balance, we use a delayed subtree tracing, whose workflow is: |
---|
| 59 | + * |
---|
| 60 | + * 1) Record the subtree root block get swapped. |
---|
| 61 | + * |
---|
| 62 | + * During subtree swap: |
---|
| 63 | + * O = Old tree blocks |
---|
| 64 | + * N = New tree blocks |
---|
| 65 | + * reloc tree subvolume tree X |
---|
| 66 | + * Root Root |
---|
| 67 | + * / \ / \ |
---|
| 68 | + * NA OB OA OB |
---|
| 69 | + * / | | \ / | | \ |
---|
| 70 | + * NC ND OE OF OC OD OE OF |
---|
| 71 | + * |
---|
| 72 | + * In this case, NA and OA are going to be swapped, record (NA, OA) into |
---|
| 73 | + * subvolume tree X. |
---|
| 74 | + * |
---|
| 75 | + * 2) After subtree swap. |
---|
| 76 | + * reloc tree subvolume tree X |
---|
| 77 | + * Root Root |
---|
| 78 | + * / \ / \ |
---|
| 79 | + * OA OB NA OB |
---|
| 80 | + * / | | \ / | | \ |
---|
| 81 | + * OC OD OE OF NC ND OE OF |
---|
| 82 | + * |
---|
| 83 | + * 3a) COW happens for OB |
---|
| 84 | + * If we are going to COW tree block OB, we check OB's bytenr against |
---|
| 85 | + * tree X's swapped_blocks structure. |
---|
| 86 | + * If it doesn't fit any, nothing will happen. |
---|
| 87 | + * |
---|
| 88 | + * 3b) COW happens for NA |
---|
| 89 | + * Check NA's bytenr against tree X's swapped_blocks, and get a hit. |
---|
| 90 | + * Then we do subtree scan on both subtrees OA and NA. |
---|
| 91 | + * Resulting 6 tree blocks to be scanned (OA, OC, OD, NA, NC, ND). |
---|
| 92 | + * |
---|
| 93 | + * Then no matter what we do to subvolume tree X, qgroup numbers will |
---|
| 94 | + * still be correct. |
---|
| 95 | + * Then NA's record gets removed from X's swapped_blocks. |
---|
| 96 | + * |
---|
| 97 | + * 4) Transaction commit |
---|
| 98 | + * Any record in X's swapped_blocks gets removed, since there is no |
---|
| 99 | + * modification to the swapped subtrees, no need to trigger heavy qgroup |
---|
| 100 | + * subtree rescan for them. |
---|
| 101 | + */ |
---|
| 102 | + |
---|
| 103 | +/* |
---|
41 | 104 | * Record a dirty extent, and info qgroup to update quota on it |
---|
42 | 105 | * TODO: Use kmem cache to alloc it. |
---|
43 | 106 | */ |
---|
.. | .. |
---|
45 | 108 | struct rb_node node; |
---|
46 | 109 | u64 bytenr; |
---|
47 | 110 | u64 num_bytes; |
---|
| 111 | + |
---|
| 112 | + /* |
---|
| 113 | + * For qgroup reserved data space freeing. |
---|
| 114 | + * |
---|
| 115 | + * @data_rsv_refroot and @data_rsv will be recorded after |
---|
| 116 | + * BTRFS_ADD_DELAYED_EXTENT is called. |
---|
| 117 | + * And will be used to free reserved qgroup space at |
---|
| 118 | + * transaction commit time. |
---|
| 119 | + */ |
---|
| 120 | + u32 data_rsv; /* reserved data space needs to be freed */ |
---|
| 121 | + u64 data_rsv_refroot; /* which root the reserved data belongs to */ |
---|
48 | 122 | struct ulist *old_roots; |
---|
| 123 | +}; |
---|
| 124 | + |
---|
| 125 | +struct btrfs_qgroup_swapped_block { |
---|
| 126 | + struct rb_node node; |
---|
| 127 | + |
---|
| 128 | + int level; |
---|
| 129 | + bool trace_leaf; |
---|
| 130 | + |
---|
| 131 | + /* bytenr/generation of the tree block in subvolume tree after swap */ |
---|
| 132 | + u64 subvol_bytenr; |
---|
| 133 | + u64 subvol_generation; |
---|
| 134 | + |
---|
| 135 | + /* bytenr/generation of the tree block in reloc tree after swap */ |
---|
| 136 | + u64 reloc_bytenr; |
---|
| 137 | + u64 reloc_generation; |
---|
| 138 | + |
---|
| 139 | + u64 last_snapshot; |
---|
| 140 | + struct btrfs_key first_key; |
---|
49 | 141 | }; |
---|
50 | 142 | |
---|
51 | 143 | /* |
---|
.. | .. |
---|
70 | 162 | * be converted into META_PERTRANS. |
---|
71 | 163 | */ |
---|
72 | 164 | enum btrfs_qgroup_rsv_type { |
---|
73 | | - BTRFS_QGROUP_RSV_DATA = 0, |
---|
| 165 | + BTRFS_QGROUP_RSV_DATA, |
---|
74 | 166 | BTRFS_QGROUP_RSV_META_PERTRANS, |
---|
75 | 167 | BTRFS_QGROUP_RSV_META_PREALLOC, |
---|
76 | 168 | BTRFS_QGROUP_RSV_LAST, |
---|
.. | .. |
---|
81 | 173 | * |
---|
82 | 174 | * Each type should have different reservation behavior. |
---|
83 | 175 | * E.g, data follows its io_tree flag modification, while |
---|
84 | | - * *currently* meta is just reserve-and-clear during transcation. |
---|
| 176 | + * *currently* meta is just reserve-and-clear during transaction. |
---|
85 | 177 | * |
---|
86 | 178 | * TODO: Add new type for reservation which can survive transaction commit. |
---|
87 | | - * Currect metadata reservation behavior is not suitable for such case. |
---|
| 179 | + * Current metadata reservation behavior is not suitable for such case. |
---|
88 | 180 | */ |
---|
89 | 181 | struct btrfs_qgroup_rsv { |
---|
90 | 182 | u64 values[BTRFS_QGROUP_RSV_LAST]; |
---|
.. | .. |
---|
132 | 224 | */ |
---|
133 | 225 | u64 old_refcnt; |
---|
134 | 226 | u64 new_refcnt; |
---|
| 227 | + |
---|
| 228 | + /* |
---|
| 229 | + * Sysfs kobjectid |
---|
| 230 | + */ |
---|
| 231 | + struct kobject kobj; |
---|
135 | 232 | }; |
---|
| 233 | + |
---|
| 234 | +static inline u64 btrfs_qgroup_subvolid(u64 qgroupid) |
---|
| 235 | +{ |
---|
| 236 | + return (qgroupid & ((1ULL << BTRFS_QGROUP_LEVEL_SHIFT) - 1)); |
---|
| 237 | +} |
---|
136 | 238 | |
---|
137 | 239 | /* |
---|
138 | 240 | * For qgroup event trace points only |
---|
.. | .. |
---|
246 | 348 | void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, |
---|
247 | 349 | u64 ref_root, u64 num_bytes, |
---|
248 | 350 | enum btrfs_qgroup_rsv_type type); |
---|
249 | | -static inline void btrfs_qgroup_free_delayed_ref(struct btrfs_fs_info *fs_info, |
---|
250 | | - u64 ref_root, u64 num_bytes) |
---|
251 | | -{ |
---|
252 | | - if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) |
---|
253 | | - return; |
---|
254 | | - trace_btrfs_qgroup_free_delayed_ref(fs_info, ref_root, num_bytes); |
---|
255 | | - btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes, |
---|
256 | | - BTRFS_QGROUP_RSV_DATA); |
---|
257 | | -} |
---|
258 | 351 | |
---|
259 | 352 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS |
---|
260 | 353 | int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, |
---|
.. | .. |
---|
262 | 355 | #endif |
---|
263 | 356 | |
---|
264 | 357 | /* New io_tree based accurate qgroup reserve API */ |
---|
265 | | -int btrfs_qgroup_reserve_data(struct inode *inode, |
---|
| 358 | +int btrfs_qgroup_reserve_data(struct btrfs_inode *inode, |
---|
266 | 359 | struct extent_changeset **reserved, u64 start, u64 len); |
---|
267 | | -int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len); |
---|
268 | | -int btrfs_qgroup_free_data(struct inode *inode, |
---|
269 | | - struct extent_changeset *reserved, u64 start, u64 len); |
---|
270 | | - |
---|
| 360 | +int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len); |
---|
| 361 | +int btrfs_qgroup_free_data(struct btrfs_inode *inode, |
---|
| 362 | + struct extent_changeset *reserved, u64 start, |
---|
| 363 | + u64 len); |
---|
| 364 | +int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, |
---|
| 365 | + enum btrfs_qgroup_rsv_type type, bool enforce); |
---|
271 | 366 | int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, |
---|
272 | 367 | enum btrfs_qgroup_rsv_type type, bool enforce); |
---|
273 | 368 | /* Reserve metadata space for pertrans and prealloc type */ |
---|
.. | .. |
---|
317 | 412 | */ |
---|
318 | 413 | void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes); |
---|
319 | 414 | |
---|
320 | | -void btrfs_qgroup_check_reserved_leak(struct inode *inode); |
---|
| 415 | +void btrfs_qgroup_check_reserved_leak(struct btrfs_inode *inode); |
---|
| 416 | + |
---|
| 417 | +/* btrfs_qgroup_swapped_blocks related functions */ |
---|
| 418 | +void btrfs_qgroup_init_swapped_blocks( |
---|
| 419 | + struct btrfs_qgroup_swapped_blocks *swapped_blocks); |
---|
| 420 | + |
---|
| 421 | +void btrfs_qgroup_clean_swapped_blocks(struct btrfs_root *root); |
---|
| 422 | +int btrfs_qgroup_add_swapped_blocks(struct btrfs_trans_handle *trans, |
---|
| 423 | + struct btrfs_root *subvol_root, |
---|
| 424 | + struct btrfs_block_group *bg, |
---|
| 425 | + struct extent_buffer *subvol_parent, int subvol_slot, |
---|
| 426 | + struct extent_buffer *reloc_parent, int reloc_slot, |
---|
| 427 | + u64 last_snapshot); |
---|
| 428 | +int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans, |
---|
| 429 | + struct btrfs_root *root, struct extent_buffer *eb); |
---|
| 430 | +void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans); |
---|
| 431 | +bool btrfs_check_quota_leak(struct btrfs_fs_info *fs_info); |
---|
321 | 432 | |
---|
322 | 433 | #endif |
---|