.. | .. |
---|
6 | 6 | #include <linux/slab.h> |
---|
7 | 7 | #include <linux/blkdev.h> |
---|
8 | 8 | #include <linux/writeback.h> |
---|
| 9 | +#include <linux/sched/mm.h> |
---|
| 10 | +#include "misc.h" |
---|
9 | 11 | #include "ctree.h" |
---|
10 | 12 | #include "transaction.h" |
---|
11 | 13 | #include "btrfs_inode.h" |
---|
12 | 14 | #include "extent_io.h" |
---|
13 | 15 | #include "disk-io.h" |
---|
14 | 16 | #include "compression.h" |
---|
| 17 | +#include "delalloc-space.h" |
---|
| 18 | +#include "qgroup.h" |
---|
15 | 19 | |
---|
16 | 20 | static struct kmem_cache *btrfs_ordered_extent_cache; |
---|
17 | 21 | |
---|
18 | 22 | static u64 entry_end(struct btrfs_ordered_extent *entry) |
---|
19 | 23 | { |
---|
20 | | - if (entry->file_offset + entry->len < entry->file_offset) |
---|
| 24 | + if (entry->file_offset + entry->num_bytes < entry->file_offset) |
---|
21 | 25 | return (u64)-1; |
---|
22 | | - return entry->file_offset + entry->len; |
---|
| 26 | + return entry->file_offset + entry->num_bytes; |
---|
23 | 27 | } |
---|
24 | 28 | |
---|
25 | 29 | /* returns NULL if the insertion worked, or it returns the node it did find |
---|
.. | .. |
---|
47 | 51 | rb_link_node(node, parent, p); |
---|
48 | 52 | rb_insert_color(node, root); |
---|
49 | 53 | return NULL; |
---|
50 | | -} |
---|
51 | | - |
---|
52 | | -static void ordered_data_tree_panic(struct inode *inode, int errno, |
---|
53 | | - u64 offset) |
---|
54 | | -{ |
---|
55 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
56 | | - btrfs_panic(fs_info, errno, |
---|
57 | | - "Inconsistency in ordered tree at offset %llu", offset); |
---|
58 | 54 | } |
---|
59 | 55 | |
---|
60 | 56 | /* |
---|
.. | .. |
---|
117 | 113 | static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset) |
---|
118 | 114 | { |
---|
119 | 115 | if (file_offset < entry->file_offset || |
---|
120 | | - entry->file_offset + entry->len <= file_offset) |
---|
| 116 | + entry->file_offset + entry->num_bytes <= file_offset) |
---|
121 | 117 | return 0; |
---|
122 | 118 | return 1; |
---|
123 | 119 | } |
---|
.. | .. |
---|
126 | 122 | u64 len) |
---|
127 | 123 | { |
---|
128 | 124 | if (file_offset + len <= entry->file_offset || |
---|
129 | | - entry->file_offset + entry->len <= file_offset) |
---|
| 125 | + entry->file_offset + entry->num_bytes <= file_offset) |
---|
130 | 126 | return 0; |
---|
131 | 127 | return 1; |
---|
132 | 128 | } |
---|
.. | .. |
---|
157 | 153 | return ret; |
---|
158 | 154 | } |
---|
159 | 155 | |
---|
160 | | -/* allocate and add a new ordered_extent into the per-inode tree. |
---|
161 | | - * file_offset is the logical offset in the file |
---|
162 | | - * |
---|
163 | | - * start is the disk block number of an extent already reserved in the |
---|
164 | | - * extent allocation tree |
---|
165 | | - * |
---|
166 | | - * len is the length of the extent |
---|
| 156 | +/* |
---|
| 157 | + * Allocate and add a new ordered_extent into the per-inode tree. |
---|
167 | 158 | * |
---|
168 | 159 | * The tree is given a single reference on the ordered extent that was |
---|
169 | 160 | * inserted. |
---|
170 | 161 | */ |
---|
171 | | -static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, |
---|
172 | | - u64 start, u64 len, u64 disk_len, |
---|
173 | | - int type, int dio, int compress_type) |
---|
| 162 | +static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset, |
---|
| 163 | + u64 disk_bytenr, u64 num_bytes, |
---|
| 164 | + u64 disk_num_bytes, int type, int dio, |
---|
| 165 | + int compress_type) |
---|
174 | 166 | { |
---|
175 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
176 | | - struct btrfs_root *root = BTRFS_I(inode)->root; |
---|
177 | | - struct btrfs_ordered_inode_tree *tree; |
---|
| 167 | + struct btrfs_root *root = inode->root; |
---|
| 168 | + struct btrfs_fs_info *fs_info = root->fs_info; |
---|
| 169 | + struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree; |
---|
178 | 170 | struct rb_node *node; |
---|
179 | 171 | struct btrfs_ordered_extent *entry; |
---|
| 172 | + int ret; |
---|
180 | 173 | |
---|
181 | | - tree = &BTRFS_I(inode)->ordered_tree; |
---|
| 174 | + if (type == BTRFS_ORDERED_NOCOW || type == BTRFS_ORDERED_PREALLOC) { |
---|
| 175 | + /* For nocow write, we can release the qgroup rsv right now */ |
---|
| 176 | + ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes); |
---|
| 177 | + if (ret < 0) |
---|
| 178 | + return ret; |
---|
| 179 | + ret = 0; |
---|
| 180 | + } else { |
---|
| 181 | + /* |
---|
| 182 | + * The ordered extent has reserved qgroup space, release now |
---|
| 183 | + * and pass the reserved number for qgroup_record to free. |
---|
| 184 | + */ |
---|
| 185 | + ret = btrfs_qgroup_release_data(inode, file_offset, num_bytes); |
---|
| 186 | + if (ret < 0) |
---|
| 187 | + return ret; |
---|
| 188 | + } |
---|
182 | 189 | entry = kmem_cache_zalloc(btrfs_ordered_extent_cache, GFP_NOFS); |
---|
183 | 190 | if (!entry) |
---|
184 | 191 | return -ENOMEM; |
---|
185 | 192 | |
---|
186 | 193 | entry->file_offset = file_offset; |
---|
187 | | - entry->start = start; |
---|
188 | | - entry->len = len; |
---|
189 | | - entry->disk_len = disk_len; |
---|
190 | | - entry->bytes_left = len; |
---|
191 | | - entry->inode = igrab(inode); |
---|
| 194 | + entry->disk_bytenr = disk_bytenr; |
---|
| 195 | + entry->num_bytes = num_bytes; |
---|
| 196 | + entry->disk_num_bytes = disk_num_bytes; |
---|
| 197 | + entry->bytes_left = num_bytes; |
---|
| 198 | + entry->inode = igrab(&inode->vfs_inode); |
---|
192 | 199 | entry->compress_type = compress_type; |
---|
193 | 200 | entry->truncated_len = (u64)-1; |
---|
| 201 | + entry->qgroup_rsv = ret; |
---|
194 | 202 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) |
---|
195 | 203 | set_bit(type, &entry->flags); |
---|
196 | 204 | |
---|
197 | | - if (dio) |
---|
| 205 | + if (dio) { |
---|
| 206 | + percpu_counter_add_batch(&fs_info->dio_bytes, num_bytes, |
---|
| 207 | + fs_info->delalloc_batch); |
---|
198 | 208 | set_bit(BTRFS_ORDERED_DIRECT, &entry->flags); |
---|
| 209 | + } |
---|
199 | 210 | |
---|
200 | 211 | /* one ref for the tree */ |
---|
201 | 212 | refcount_set(&entry->refs, 1); |
---|
202 | 213 | init_waitqueue_head(&entry->wait); |
---|
203 | 214 | INIT_LIST_HEAD(&entry->list); |
---|
| 215 | + INIT_LIST_HEAD(&entry->log_list); |
---|
204 | 216 | INIT_LIST_HEAD(&entry->root_extent_list); |
---|
205 | 217 | INIT_LIST_HEAD(&entry->work_list); |
---|
206 | 218 | init_completion(&entry->completion); |
---|
207 | | - INIT_LIST_HEAD(&entry->log_list); |
---|
208 | | - INIT_LIST_HEAD(&entry->trans_list); |
---|
209 | 219 | |
---|
210 | 220 | trace_btrfs_ordered_extent_add(inode, entry); |
---|
211 | 221 | |
---|
.. | .. |
---|
213 | 223 | node = tree_insert(&tree->tree, file_offset, |
---|
214 | 224 | &entry->rb_node); |
---|
215 | 225 | if (node) |
---|
216 | | - ordered_data_tree_panic(inode, -EEXIST, file_offset); |
---|
| 226 | + btrfs_panic(fs_info, -EEXIST, |
---|
| 227 | + "inconsistency in ordered tree at offset %llu", |
---|
| 228 | + file_offset); |
---|
217 | 229 | spin_unlock_irq(&tree->lock); |
---|
218 | 230 | |
---|
219 | 231 | spin_lock(&root->ordered_extent_lock); |
---|
.. | .. |
---|
233 | 245 | * that work has been done at higher layers, so this is truly the |
---|
234 | 246 | * smallest the extent is going to get. |
---|
235 | 247 | */ |
---|
236 | | - spin_lock(&BTRFS_I(inode)->lock); |
---|
237 | | - btrfs_mod_outstanding_extents(BTRFS_I(inode), 1); |
---|
238 | | - spin_unlock(&BTRFS_I(inode)->lock); |
---|
| 248 | + spin_lock(&inode->lock); |
---|
| 249 | + btrfs_mod_outstanding_extents(inode, 1); |
---|
| 250 | + spin_unlock(&inode->lock); |
---|
239 | 251 | |
---|
240 | 252 | return 0; |
---|
241 | 253 | } |
---|
242 | 254 | |
---|
243 | | -int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, |
---|
244 | | - u64 start, u64 len, u64 disk_len, int type) |
---|
| 255 | +int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset, |
---|
| 256 | + u64 disk_bytenr, u64 num_bytes, u64 disk_num_bytes, |
---|
| 257 | + int type) |
---|
245 | 258 | { |
---|
246 | | - return __btrfs_add_ordered_extent(inode, file_offset, start, len, |
---|
247 | | - disk_len, type, 0, |
---|
| 259 | + return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr, |
---|
| 260 | + num_bytes, disk_num_bytes, type, 0, |
---|
248 | 261 | BTRFS_COMPRESS_NONE); |
---|
249 | 262 | } |
---|
250 | 263 | |
---|
251 | | -int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, |
---|
252 | | - u64 start, u64 len, u64 disk_len, int type) |
---|
| 264 | +int btrfs_add_ordered_extent_dio(struct btrfs_inode *inode, u64 file_offset, |
---|
| 265 | + u64 disk_bytenr, u64 num_bytes, |
---|
| 266 | + u64 disk_num_bytes, int type) |
---|
253 | 267 | { |
---|
254 | | - return __btrfs_add_ordered_extent(inode, file_offset, start, len, |
---|
255 | | - disk_len, type, 1, |
---|
| 268 | + return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr, |
---|
| 269 | + num_bytes, disk_num_bytes, type, 1, |
---|
256 | 270 | BTRFS_COMPRESS_NONE); |
---|
257 | 271 | } |
---|
258 | 272 | |
---|
259 | | -int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset, |
---|
260 | | - u64 start, u64 len, u64 disk_len, |
---|
261 | | - int type, int compress_type) |
---|
| 273 | +int btrfs_add_ordered_extent_compress(struct btrfs_inode *inode, u64 file_offset, |
---|
| 274 | + u64 disk_bytenr, u64 num_bytes, |
---|
| 275 | + u64 disk_num_bytes, int type, |
---|
| 276 | + int compress_type) |
---|
262 | 277 | { |
---|
263 | | - return __btrfs_add_ordered_extent(inode, file_offset, start, len, |
---|
264 | | - disk_len, type, 0, |
---|
| 278 | + return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr, |
---|
| 279 | + num_bytes, disk_num_bytes, type, 0, |
---|
265 | 280 | compress_type); |
---|
266 | 281 | } |
---|
267 | 282 | |
---|
.. | .. |
---|
270 | 285 | * when an ordered extent is finished. If the list covers more than one |
---|
271 | 286 | * ordered extent, it is split across multiples. |
---|
272 | 287 | */ |
---|
273 | | -void btrfs_add_ordered_sum(struct inode *inode, |
---|
274 | | - struct btrfs_ordered_extent *entry, |
---|
| 288 | +void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry, |
---|
275 | 289 | struct btrfs_ordered_sum *sum) |
---|
276 | 290 | { |
---|
277 | 291 | struct btrfs_ordered_inode_tree *tree; |
---|
278 | 292 | |
---|
279 | | - tree = &BTRFS_I(inode)->ordered_tree; |
---|
| 293 | + tree = &BTRFS_I(entry->inode)->ordered_tree; |
---|
280 | 294 | spin_lock_irq(&tree->lock); |
---|
281 | 295 | list_add_tail(&sum->list, &entry->list); |
---|
282 | 296 | spin_unlock_irq(&tree->lock); |
---|
.. | .. |
---|
294 | 308 | * file_offset is updated to one byte past the range that is recorded as |
---|
295 | 309 | * complete. This allows you to walk forward in the file. |
---|
296 | 310 | */ |
---|
297 | | -int btrfs_dec_test_first_ordered_pending(struct inode *inode, |
---|
| 311 | +int btrfs_dec_test_first_ordered_pending(struct btrfs_inode *inode, |
---|
298 | 312 | struct btrfs_ordered_extent **cached, |
---|
299 | 313 | u64 *file_offset, u64 io_size, int uptodate) |
---|
300 | 314 | { |
---|
301 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
302 | | - struct btrfs_ordered_inode_tree *tree; |
---|
| 315 | + struct btrfs_fs_info *fs_info = inode->root->fs_info; |
---|
| 316 | + struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree; |
---|
303 | 317 | struct rb_node *node; |
---|
304 | 318 | struct btrfs_ordered_extent *entry = NULL; |
---|
305 | 319 | int ret; |
---|
.. | .. |
---|
308 | 322 | u64 dec_start; |
---|
309 | 323 | u64 to_dec; |
---|
310 | 324 | |
---|
311 | | - tree = &BTRFS_I(inode)->ordered_tree; |
---|
312 | 325 | spin_lock_irqsave(&tree->lock, flags); |
---|
313 | 326 | node = tree_search(tree, *file_offset); |
---|
314 | 327 | if (!node) { |
---|
.. | .. |
---|
323 | 336 | } |
---|
324 | 337 | |
---|
325 | 338 | dec_start = max(*file_offset, entry->file_offset); |
---|
326 | | - dec_end = min(*file_offset + io_size, entry->file_offset + |
---|
327 | | - entry->len); |
---|
| 339 | + dec_end = min(*file_offset + io_size, |
---|
| 340 | + entry->file_offset + entry->num_bytes); |
---|
328 | 341 | *file_offset = dec_end; |
---|
329 | 342 | if (dec_start > dec_end) { |
---|
330 | 343 | btrfs_crit(fs_info, "bad ordering dec_start %llu end %llu", |
---|
.. | .. |
---|
365 | 378 | * test_and_set_bit on a flag in the struct btrfs_ordered_extent is used |
---|
366 | 379 | * to make sure this function only returns 1 once for a given ordered extent. |
---|
367 | 380 | */ |
---|
368 | | -int btrfs_dec_test_ordered_pending(struct inode *inode, |
---|
| 381 | +int btrfs_dec_test_ordered_pending(struct btrfs_inode *inode, |
---|
369 | 382 | struct btrfs_ordered_extent **cached, |
---|
370 | 383 | u64 file_offset, u64 io_size, int uptodate) |
---|
371 | 384 | { |
---|
372 | | - struct btrfs_ordered_inode_tree *tree; |
---|
| 385 | + struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree; |
---|
373 | 386 | struct rb_node *node; |
---|
374 | 387 | struct btrfs_ordered_extent *entry = NULL; |
---|
375 | 388 | unsigned long flags; |
---|
376 | 389 | int ret; |
---|
377 | 390 | |
---|
378 | | - tree = &BTRFS_I(inode)->ordered_tree; |
---|
379 | 391 | spin_lock_irqsave(&tree->lock, flags); |
---|
380 | 392 | if (cached && *cached) { |
---|
381 | 393 | entry = *cached; |
---|
.. | .. |
---|
396 | 408 | } |
---|
397 | 409 | |
---|
398 | 410 | if (io_size > entry->bytes_left) { |
---|
399 | | - btrfs_crit(BTRFS_I(inode)->root->fs_info, |
---|
| 411 | + btrfs_crit(inode->root->fs_info, |
---|
400 | 412 | "bad ordered accounting left %llu size %llu", |
---|
401 | 413 | entry->bytes_left, io_size); |
---|
402 | 414 | } |
---|
.. | .. |
---|
429 | 441 | struct list_head *cur; |
---|
430 | 442 | struct btrfs_ordered_sum *sum; |
---|
431 | 443 | |
---|
432 | | - trace_btrfs_ordered_extent_put(entry->inode, entry); |
---|
| 444 | + trace_btrfs_ordered_extent_put(BTRFS_I(entry->inode), entry); |
---|
433 | 445 | |
---|
434 | 446 | if (refcount_dec_and_test(&entry->refs)) { |
---|
435 | | - ASSERT(list_empty(&entry->log_list)); |
---|
436 | | - ASSERT(list_empty(&entry->trans_list)); |
---|
437 | 447 | ASSERT(list_empty(&entry->root_extent_list)); |
---|
| 448 | + ASSERT(list_empty(&entry->log_list)); |
---|
438 | 449 | ASSERT(RB_EMPTY_NODE(&entry->rb_node)); |
---|
439 | 450 | if (entry->inode) |
---|
440 | 451 | btrfs_add_delayed_iput(entry->inode); |
---|
.. | .. |
---|
442 | 453 | cur = entry->list.next; |
---|
443 | 454 | sum = list_entry(cur, struct btrfs_ordered_sum, list); |
---|
444 | 455 | list_del(&sum->list); |
---|
445 | | - kfree(sum); |
---|
| 456 | + kvfree(sum); |
---|
446 | 457 | } |
---|
447 | 458 | kmem_cache_free(btrfs_ordered_extent_cache, entry); |
---|
448 | 459 | } |
---|
.. | .. |
---|
452 | 463 | * remove an ordered extent from the tree. No references are dropped |
---|
453 | 464 | * and waiters are woken up. |
---|
454 | 465 | */ |
---|
455 | | -void btrfs_remove_ordered_extent(struct inode *inode, |
---|
| 466 | +void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode, |
---|
456 | 467 | struct btrfs_ordered_extent *entry) |
---|
457 | 468 | { |
---|
458 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
459 | 469 | struct btrfs_ordered_inode_tree *tree; |
---|
460 | | - struct btrfs_inode *btrfs_inode = BTRFS_I(inode); |
---|
461 | 470 | struct btrfs_root *root = btrfs_inode->root; |
---|
| 471 | + struct btrfs_fs_info *fs_info = root->fs_info; |
---|
462 | 472 | struct rb_node *node; |
---|
463 | | - bool dec_pending_ordered = false; |
---|
| 473 | + bool pending; |
---|
464 | 474 | |
---|
465 | 475 | /* This is paired with btrfs_add_ordered_extent. */ |
---|
466 | 476 | spin_lock(&btrfs_inode->lock); |
---|
467 | 477 | btrfs_mod_outstanding_extents(btrfs_inode, -1); |
---|
468 | 478 | spin_unlock(&btrfs_inode->lock); |
---|
469 | 479 | if (root != fs_info->tree_root) |
---|
470 | | - btrfs_delalloc_release_metadata(btrfs_inode, entry->len, false); |
---|
| 480 | + btrfs_delalloc_release_metadata(btrfs_inode, entry->num_bytes, |
---|
| 481 | + false); |
---|
| 482 | + |
---|
| 483 | + if (test_bit(BTRFS_ORDERED_DIRECT, &entry->flags)) |
---|
| 484 | + percpu_counter_add_batch(&fs_info->dio_bytes, -entry->num_bytes, |
---|
| 485 | + fs_info->delalloc_batch); |
---|
471 | 486 | |
---|
472 | 487 | tree = &btrfs_inode->ordered_tree; |
---|
473 | 488 | spin_lock_irq(&tree->lock); |
---|
.. | .. |
---|
477 | 492 | if (tree->last == node) |
---|
478 | 493 | tree->last = NULL; |
---|
479 | 494 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); |
---|
480 | | - if (test_and_clear_bit(BTRFS_ORDERED_PENDING, &entry->flags)) |
---|
481 | | - dec_pending_ordered = true; |
---|
| 495 | + pending = test_and_clear_bit(BTRFS_ORDERED_PENDING, &entry->flags); |
---|
482 | 496 | spin_unlock_irq(&tree->lock); |
---|
483 | 497 | |
---|
484 | 498 | /* |
---|
485 | 499 | * The current running transaction is waiting on us, we need to let it |
---|
486 | 500 | * know that we're complete and wake it up. |
---|
487 | 501 | */ |
---|
488 | | - if (dec_pending_ordered) { |
---|
| 502 | + if (pending) { |
---|
489 | 503 | struct btrfs_transaction *trans; |
---|
490 | 504 | |
---|
491 | 505 | /* |
---|
.. | .. |
---|
512 | 526 | list_del_init(&entry->root_extent_list); |
---|
513 | 527 | root->nr_ordered_extents--; |
---|
514 | 528 | |
---|
515 | | - trace_btrfs_ordered_extent_remove(inode, entry); |
---|
| 529 | + trace_btrfs_ordered_extent_remove(btrfs_inode, entry); |
---|
516 | 530 | |
---|
517 | 531 | if (!root->nr_ordered_extents) { |
---|
518 | 532 | spin_lock(&fs_info->ordered_root_lock); |
---|
.. | .. |
---|
529 | 543 | struct btrfs_ordered_extent *ordered; |
---|
530 | 544 | |
---|
531 | 545 | ordered = container_of(work, struct btrfs_ordered_extent, flush_work); |
---|
532 | | - btrfs_start_ordered_extent(ordered->inode, ordered, 1); |
---|
| 546 | + btrfs_start_ordered_extent(ordered, 1); |
---|
533 | 547 | complete(&ordered->completion); |
---|
534 | 548 | } |
---|
535 | 549 | |
---|
.. | .. |
---|
555 | 569 | ordered = list_first_entry(&splice, struct btrfs_ordered_extent, |
---|
556 | 570 | root_extent_list); |
---|
557 | 571 | |
---|
558 | | - if (range_end <= ordered->start || |
---|
559 | | - ordered->start + ordered->disk_len <= range_start) { |
---|
| 572 | + if (range_end <= ordered->disk_bytenr || |
---|
| 573 | + ordered->disk_bytenr + ordered->disk_num_bytes <= range_start) { |
---|
560 | 574 | list_move_tail(&ordered->root_extent_list, &skipped); |
---|
561 | 575 | cond_resched_lock(&root->ordered_extent_lock); |
---|
562 | 576 | continue; |
---|
.. | .. |
---|
568 | 582 | spin_unlock(&root->ordered_extent_lock); |
---|
569 | 583 | |
---|
570 | 584 | btrfs_init_work(&ordered->flush_work, |
---|
571 | | - btrfs_flush_delalloc_helper, |
---|
572 | 585 | btrfs_run_ordered_extent_work, NULL, NULL); |
---|
573 | 586 | list_add_tail(&ordered->work_list, &works); |
---|
574 | 587 | btrfs_queue_work(fs_info->flush_workers, &ordered->flush_work); |
---|
.. | .. |
---|
594 | 607 | return count; |
---|
595 | 608 | } |
---|
596 | 609 | |
---|
597 | | -u64 btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr, |
---|
| 610 | +void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr, |
---|
598 | 611 | const u64 range_start, const u64 range_len) |
---|
599 | 612 | { |
---|
600 | 613 | struct btrfs_root *root; |
---|
601 | 614 | struct list_head splice; |
---|
602 | | - u64 total_done = 0; |
---|
603 | 615 | u64 done; |
---|
604 | 616 | |
---|
605 | 617 | INIT_LIST_HEAD(&splice); |
---|
.. | .. |
---|
610 | 622 | while (!list_empty(&splice) && nr) { |
---|
611 | 623 | root = list_first_entry(&splice, struct btrfs_root, |
---|
612 | 624 | ordered_root); |
---|
613 | | - root = btrfs_grab_fs_root(root); |
---|
| 625 | + root = btrfs_grab_root(root); |
---|
614 | 626 | BUG_ON(!root); |
---|
615 | 627 | list_move_tail(&root->ordered_root, |
---|
616 | 628 | &fs_info->ordered_roots); |
---|
.. | .. |
---|
618 | 630 | |
---|
619 | 631 | done = btrfs_wait_ordered_extents(root, nr, |
---|
620 | 632 | range_start, range_len); |
---|
621 | | - btrfs_put_fs_root(root); |
---|
622 | | - total_done += done; |
---|
| 633 | + btrfs_put_root(root); |
---|
623 | 634 | |
---|
624 | 635 | spin_lock(&fs_info->ordered_root_lock); |
---|
625 | 636 | if (nr != U64_MAX) { |
---|
.. | .. |
---|
629 | 640 | list_splice_tail(&splice, &fs_info->ordered_roots); |
---|
630 | 641 | spin_unlock(&fs_info->ordered_root_lock); |
---|
631 | 642 | mutex_unlock(&fs_info->ordered_operations_mutex); |
---|
632 | | - |
---|
633 | | - return total_done; |
---|
634 | 643 | } |
---|
635 | 644 | |
---|
636 | 645 | /* |
---|
.. | .. |
---|
640 | 649 | * in the extent, and it waits on the io completion code to insert |
---|
641 | 650 | * metadata into the btree corresponding to the extent |
---|
642 | 651 | */ |
---|
643 | | -void btrfs_start_ordered_extent(struct inode *inode, |
---|
644 | | - struct btrfs_ordered_extent *entry, |
---|
645 | | - int wait) |
---|
| 652 | +void btrfs_start_ordered_extent(struct btrfs_ordered_extent *entry, int wait) |
---|
646 | 653 | { |
---|
647 | 654 | u64 start = entry->file_offset; |
---|
648 | | - u64 end = start + entry->len - 1; |
---|
| 655 | + u64 end = start + entry->num_bytes - 1; |
---|
| 656 | + struct btrfs_inode *inode = BTRFS_I(entry->inode); |
---|
649 | 657 | |
---|
650 | 658 | trace_btrfs_ordered_extent_start(inode, entry); |
---|
651 | 659 | |
---|
.. | .. |
---|
655 | 663 | * for the flusher thread to find them |
---|
656 | 664 | */ |
---|
657 | 665 | if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags)) |
---|
658 | | - filemap_fdatawrite_range(inode->i_mapping, start, end); |
---|
| 666 | + filemap_fdatawrite_range(inode->vfs_inode.i_mapping, start, end); |
---|
659 | 667 | if (wait) { |
---|
660 | 668 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, |
---|
661 | 669 | &entry->flags)); |
---|
.. | .. |
---|
699 | 707 | |
---|
700 | 708 | end = orig_end; |
---|
701 | 709 | while (1) { |
---|
702 | | - ordered = btrfs_lookup_first_ordered_extent(inode, end); |
---|
| 710 | + ordered = btrfs_lookup_first_ordered_extent(BTRFS_I(inode), end); |
---|
703 | 711 | if (!ordered) |
---|
704 | 712 | break; |
---|
705 | 713 | if (ordered->file_offset > orig_end) { |
---|
706 | 714 | btrfs_put_ordered_extent(ordered); |
---|
707 | 715 | break; |
---|
708 | 716 | } |
---|
709 | | - if (ordered->file_offset + ordered->len <= start) { |
---|
| 717 | + if (ordered->file_offset + ordered->num_bytes <= start) { |
---|
710 | 718 | btrfs_put_ordered_extent(ordered); |
---|
711 | 719 | break; |
---|
712 | 720 | } |
---|
713 | | - btrfs_start_ordered_extent(inode, ordered, 1); |
---|
| 721 | + btrfs_start_ordered_extent(ordered, 1); |
---|
714 | 722 | end = ordered->file_offset; |
---|
715 | 723 | /* |
---|
716 | 724 | * If the ordered extent had an error save the error but don't |
---|
.. | .. |
---|
731 | 739 | * find an ordered extent corresponding to file_offset. return NULL if |
---|
732 | 740 | * nothing is found, otherwise take a reference on the extent and return it |
---|
733 | 741 | */ |
---|
734 | | -struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, |
---|
| 742 | +struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode *inode, |
---|
735 | 743 | u64 file_offset) |
---|
736 | 744 | { |
---|
737 | 745 | struct btrfs_ordered_inode_tree *tree; |
---|
738 | 746 | struct rb_node *node; |
---|
739 | 747 | struct btrfs_ordered_extent *entry = NULL; |
---|
740 | 748 | |
---|
741 | | - tree = &BTRFS_I(inode)->ordered_tree; |
---|
| 749 | + tree = &inode->ordered_tree; |
---|
742 | 750 | spin_lock_irq(&tree->lock); |
---|
743 | 751 | node = tree_search(tree, file_offset); |
---|
744 | 752 | if (!node) |
---|
.. | .. |
---|
795 | 803 | } |
---|
796 | 804 | |
---|
797 | 805 | /* |
---|
| 806 | + * Adds all ordered extents to the given list. The list ends up sorted by the |
---|
| 807 | + * file_offset of the ordered extents. |
---|
| 808 | + */ |
---|
| 809 | +void btrfs_get_ordered_extents_for_logging(struct btrfs_inode *inode, |
---|
| 810 | + struct list_head *list) |
---|
| 811 | +{ |
---|
| 812 | + struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree; |
---|
| 813 | + struct rb_node *n; |
---|
| 814 | + |
---|
| 815 | + ASSERT(inode_is_locked(&inode->vfs_inode)); |
---|
| 816 | + |
---|
| 817 | + spin_lock_irq(&tree->lock); |
---|
| 818 | + for (n = rb_first(&tree->tree); n; n = rb_next(n)) { |
---|
| 819 | + struct btrfs_ordered_extent *ordered; |
---|
| 820 | + |
---|
| 821 | + ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node); |
---|
| 822 | + |
---|
| 823 | + if (test_bit(BTRFS_ORDERED_LOGGED, &ordered->flags)) |
---|
| 824 | + continue; |
---|
| 825 | + |
---|
| 826 | + ASSERT(list_empty(&ordered->log_list)); |
---|
| 827 | + list_add_tail(&ordered->log_list, list); |
---|
| 828 | + refcount_inc(&ordered->refs); |
---|
| 829 | + } |
---|
| 830 | + spin_unlock_irq(&tree->lock); |
---|
| 831 | +} |
---|
| 832 | + |
---|
| 833 | +/* |
---|
798 | 834 | * lookup and return any extent before 'file_offset'. NULL is returned |
---|
799 | 835 | * if none is found |
---|
800 | 836 | */ |
---|
801 | 837 | struct btrfs_ordered_extent * |
---|
802 | | -btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset) |
---|
| 838 | +btrfs_lookup_first_ordered_extent(struct btrfs_inode *inode, u64 file_offset) |
---|
803 | 839 | { |
---|
804 | 840 | struct btrfs_ordered_inode_tree *tree; |
---|
805 | 841 | struct rb_node *node; |
---|
806 | 842 | struct btrfs_ordered_extent *entry = NULL; |
---|
807 | 843 | |
---|
808 | | - tree = &BTRFS_I(inode)->ordered_tree; |
---|
| 844 | + tree = &inode->ordered_tree; |
---|
809 | 845 | spin_lock_irq(&tree->lock); |
---|
810 | 846 | node = tree_search(tree, file_offset); |
---|
811 | 847 | if (!node) |
---|
.. | .. |
---|
819 | 855 | } |
---|
820 | 856 | |
---|
821 | 857 | /* |
---|
822 | | - * After an extent is done, call this to conditionally update the on disk |
---|
823 | | - * i_size. i_size is updated to cover any fully written part of the file. |
---|
824 | | - */ |
---|
825 | | -int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, |
---|
826 | | - struct btrfs_ordered_extent *ordered) |
---|
827 | | -{ |
---|
828 | | - struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; |
---|
829 | | - u64 disk_i_size; |
---|
830 | | - u64 new_i_size; |
---|
831 | | - u64 i_size = i_size_read(inode); |
---|
832 | | - struct rb_node *node; |
---|
833 | | - struct rb_node *prev = NULL; |
---|
834 | | - struct btrfs_ordered_extent *test; |
---|
835 | | - int ret = 1; |
---|
836 | | - u64 orig_offset = offset; |
---|
837 | | - |
---|
838 | | - spin_lock_irq(&tree->lock); |
---|
839 | | - if (ordered) { |
---|
840 | | - offset = entry_end(ordered); |
---|
841 | | - if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags)) |
---|
842 | | - offset = min(offset, |
---|
843 | | - ordered->file_offset + |
---|
844 | | - ordered->truncated_len); |
---|
845 | | - } else { |
---|
846 | | - offset = ALIGN(offset, btrfs_inode_sectorsize(inode)); |
---|
847 | | - } |
---|
848 | | - disk_i_size = BTRFS_I(inode)->disk_i_size; |
---|
849 | | - |
---|
850 | | - /* |
---|
851 | | - * truncate file. |
---|
852 | | - * If ordered is not NULL, then this is called from endio and |
---|
853 | | - * disk_i_size will be updated by either truncate itself or any |
---|
854 | | - * in-flight IOs which are inside the disk_i_size. |
---|
855 | | - * |
---|
856 | | - * Because btrfs_setsize() may set i_size with disk_i_size if truncate |
---|
857 | | - * fails somehow, we need to make sure we have a precise disk_i_size by |
---|
858 | | - * updating it as usual. |
---|
859 | | - * |
---|
860 | | - */ |
---|
861 | | - if (!ordered && disk_i_size > i_size) { |
---|
862 | | - BTRFS_I(inode)->disk_i_size = orig_offset; |
---|
863 | | - ret = 0; |
---|
864 | | - goto out; |
---|
865 | | - } |
---|
866 | | - |
---|
867 | | - /* |
---|
868 | | - * if the disk i_size is already at the inode->i_size, or |
---|
869 | | - * this ordered extent is inside the disk i_size, we're done |
---|
870 | | - */ |
---|
871 | | - if (disk_i_size == i_size) |
---|
872 | | - goto out; |
---|
873 | | - |
---|
874 | | - /* |
---|
875 | | - * We still need to update disk_i_size if outstanding_isize is greater |
---|
876 | | - * than disk_i_size. |
---|
877 | | - */ |
---|
878 | | - if (offset <= disk_i_size && |
---|
879 | | - (!ordered || ordered->outstanding_isize <= disk_i_size)) |
---|
880 | | - goto out; |
---|
881 | | - |
---|
882 | | - /* |
---|
883 | | - * walk backward from this ordered extent to disk_i_size. |
---|
884 | | - * if we find an ordered extent then we can't update disk i_size |
---|
885 | | - * yet |
---|
886 | | - */ |
---|
887 | | - if (ordered) { |
---|
888 | | - node = rb_prev(&ordered->rb_node); |
---|
889 | | - } else { |
---|
890 | | - prev = tree_search(tree, offset); |
---|
891 | | - /* |
---|
892 | | - * we insert file extents without involving ordered struct, |
---|
893 | | - * so there should be no ordered struct cover this offset |
---|
894 | | - */ |
---|
895 | | - if (prev) { |
---|
896 | | - test = rb_entry(prev, struct btrfs_ordered_extent, |
---|
897 | | - rb_node); |
---|
898 | | - BUG_ON(offset_in_entry(test, offset)); |
---|
899 | | - } |
---|
900 | | - node = prev; |
---|
901 | | - } |
---|
902 | | - for (; node; node = rb_prev(node)) { |
---|
903 | | - test = rb_entry(node, struct btrfs_ordered_extent, rb_node); |
---|
904 | | - |
---|
905 | | - /* We treat this entry as if it doesn't exist */ |
---|
906 | | - if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags)) |
---|
907 | | - continue; |
---|
908 | | - |
---|
909 | | - if (entry_end(test) <= disk_i_size) |
---|
910 | | - break; |
---|
911 | | - if (test->file_offset >= i_size) |
---|
912 | | - break; |
---|
913 | | - |
---|
914 | | - /* |
---|
915 | | - * We don't update disk_i_size now, so record this undealt |
---|
916 | | - * i_size. Or we will not know the real i_size. |
---|
917 | | - */ |
---|
918 | | - if (test->outstanding_isize < offset) |
---|
919 | | - test->outstanding_isize = offset; |
---|
920 | | - if (ordered && |
---|
921 | | - ordered->outstanding_isize > test->outstanding_isize) |
---|
922 | | - test->outstanding_isize = ordered->outstanding_isize; |
---|
923 | | - goto out; |
---|
924 | | - } |
---|
925 | | - new_i_size = min_t(u64, offset, i_size); |
---|
926 | | - |
---|
927 | | - /* |
---|
928 | | - * Some ordered extents may completed before the current one, and |
---|
929 | | - * we hold the real i_size in ->outstanding_isize. |
---|
930 | | - */ |
---|
931 | | - if (ordered && ordered->outstanding_isize > new_i_size) |
---|
932 | | - new_i_size = min_t(u64, ordered->outstanding_isize, i_size); |
---|
933 | | - BTRFS_I(inode)->disk_i_size = new_i_size; |
---|
934 | | - ret = 0; |
---|
935 | | -out: |
---|
936 | | - /* |
---|
937 | | - * We need to do this because we can't remove ordered extents until |
---|
938 | | - * after the i_disk_size has been updated and then the inode has been |
---|
939 | | - * updated to reflect the change, so we need to tell anybody who finds |
---|
940 | | - * this ordered extent that we've already done all the real work, we |
---|
941 | | - * just haven't completed all the other work. |
---|
942 | | - */ |
---|
943 | | - if (ordered) |
---|
944 | | - set_bit(BTRFS_ORDERED_UPDATED_ISIZE, &ordered->flags); |
---|
945 | | - spin_unlock_irq(&tree->lock); |
---|
946 | | - return ret; |
---|
947 | | -} |
---|
948 | | - |
---|
949 | | -/* |
---|
950 | 858 | * search the ordered extents for one corresponding to 'offset' and |
---|
951 | 859 | * try to find a checksum. This is used because we allow pages to |
---|
952 | 860 | * be reclaimed before their checksum is actually put into the btree |
---|
953 | 861 | */ |
---|
954 | | -int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, |
---|
955 | | - u32 *sum, int len) |
---|
| 862 | +int btrfs_find_ordered_sum(struct btrfs_inode *inode, u64 offset, |
---|
| 863 | + u64 disk_bytenr, u8 *sum, int len) |
---|
956 | 864 | { |
---|
| 865 | + struct btrfs_fs_info *fs_info = inode->root->fs_info; |
---|
957 | 866 | struct btrfs_ordered_sum *ordered_sum; |
---|
958 | 867 | struct btrfs_ordered_extent *ordered; |
---|
959 | | - struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; |
---|
| 868 | + struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree; |
---|
960 | 869 | unsigned long num_sectors; |
---|
961 | 870 | unsigned long i; |
---|
962 | 871 | u32 sectorsize = btrfs_inode_sectorsize(inode); |
---|
| 872 | + const u8 blocksize_bits = inode->vfs_inode.i_sb->s_blocksize_bits; |
---|
| 873 | + const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); |
---|
963 | 874 | int index = 0; |
---|
964 | 875 | |
---|
965 | 876 | ordered = btrfs_lookup_ordered_extent(inode, offset); |
---|
.. | .. |
---|
970 | 881 | list_for_each_entry_reverse(ordered_sum, &ordered->list, list) { |
---|
971 | 882 | if (disk_bytenr >= ordered_sum->bytenr && |
---|
972 | 883 | disk_bytenr < ordered_sum->bytenr + ordered_sum->len) { |
---|
973 | | - i = (disk_bytenr - ordered_sum->bytenr) >> |
---|
974 | | - inode->i_sb->s_blocksize_bits; |
---|
975 | | - num_sectors = ordered_sum->len >> |
---|
976 | | - inode->i_sb->s_blocksize_bits; |
---|
| 884 | + i = (disk_bytenr - ordered_sum->bytenr) >> blocksize_bits; |
---|
| 885 | + num_sectors = ordered_sum->len >> blocksize_bits; |
---|
977 | 886 | num_sectors = min_t(int, len - index, num_sectors - i); |
---|
978 | | - memcpy(sum + index, ordered_sum->sums + i, |
---|
979 | | - num_sectors); |
---|
| 887 | + memcpy(sum + index, ordered_sum->sums + i * csum_size, |
---|
| 888 | + num_sectors * csum_size); |
---|
980 | 889 | |
---|
981 | | - index += (int)num_sectors; |
---|
| 890 | + index += (int)num_sectors * csum_size; |
---|
982 | 891 | if (index == len) |
---|
983 | 892 | goto out; |
---|
984 | 893 | disk_bytenr += num_sectors * sectorsize; |
---|
.. | .. |
---|
990 | 899 | return index; |
---|
991 | 900 | } |
---|
992 | 901 | |
---|
| 902 | +/* |
---|
| 903 | + * btrfs_flush_ordered_range - Lock the passed range and ensures all pending |
---|
| 904 | + * ordered extents in it are run to completion. |
---|
| 905 | + * |
---|
| 906 | + * @inode: Inode whose ordered tree is to be searched |
---|
| 907 | + * @start: Beginning of range to flush |
---|
| 908 | + * @end: Last byte of range to lock |
---|
| 909 | + * @cached_state: If passed, will return the extent state responsible for the |
---|
| 910 | + * locked range. It's the caller's responsibility to free the cached state. |
---|
| 911 | + * |
---|
| 912 | + * This function always returns with the given range locked, ensuring after it's |
---|
| 913 | + * called no order extent can be pending. |
---|
| 914 | + */ |
---|
| 915 | +void btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start, |
---|
| 916 | + u64 end, |
---|
| 917 | + struct extent_state **cached_state) |
---|
| 918 | +{ |
---|
| 919 | + struct btrfs_ordered_extent *ordered; |
---|
| 920 | + struct extent_state *cache = NULL; |
---|
| 921 | + struct extent_state **cachedp = &cache; |
---|
| 922 | + |
---|
| 923 | + if (cached_state) |
---|
| 924 | + cachedp = cached_state; |
---|
| 925 | + |
---|
| 926 | + while (1) { |
---|
| 927 | + lock_extent_bits(&inode->io_tree, start, end, cachedp); |
---|
| 928 | + ordered = btrfs_lookup_ordered_range(inode, start, |
---|
| 929 | + end - start + 1); |
---|
| 930 | + if (!ordered) { |
---|
| 931 | + /* |
---|
| 932 | + * If no external cached_state has been passed then |
---|
| 933 | + * decrement the extra ref taken for cachedp since we |
---|
| 934 | + * aren't exposing it outside of this function |
---|
| 935 | + */ |
---|
| 936 | + if (!cached_state) |
---|
| 937 | + refcount_dec(&cache->refs); |
---|
| 938 | + break; |
---|
| 939 | + } |
---|
| 940 | + unlock_extent_cached(&inode->io_tree, start, end, cachedp); |
---|
| 941 | + btrfs_start_ordered_extent(ordered, 1); |
---|
| 942 | + btrfs_put_ordered_extent(ordered); |
---|
| 943 | + } |
---|
| 944 | +} |
---|
| 945 | + |
---|
993 | 946 | int __init ordered_data_init(void) |
---|
994 | 947 | { |
---|
995 | 948 | btrfs_ordered_extent_cache = kmem_cache_create("btrfs_ordered_extent", |
---|