.. | .. |
---|
7 | 7 | #include <linux/slab.h> |
---|
8 | 8 | #include <linux/iversion.h> |
---|
9 | 9 | #include <linux/sched/mm.h> |
---|
| 10 | +#include "misc.h" |
---|
10 | 11 | #include "delayed-inode.h" |
---|
11 | 12 | #include "disk-io.h" |
---|
12 | 13 | #include "transaction.h" |
---|
13 | 14 | #include "ctree.h" |
---|
14 | 15 | #include "qgroup.h" |
---|
| 16 | +#include "locking.h" |
---|
15 | 17 | |
---|
16 | 18 | #define BTRFS_DELAYED_WRITEBACK 512 |
---|
17 | 19 | #define BTRFS_DELAYED_BACKGROUND 128 |
---|
.. | .. |
---|
43 | 45 | delayed_node->root = root; |
---|
44 | 46 | delayed_node->inode_id = inode_id; |
---|
45 | 47 | refcount_set(&delayed_node->refs, 0); |
---|
46 | | - delayed_node->ins_root = RB_ROOT; |
---|
47 | | - delayed_node->del_root = RB_ROOT; |
---|
| 48 | + delayed_node->ins_root = RB_ROOT_CACHED; |
---|
| 49 | + delayed_node->del_root = RB_ROOT_CACHED; |
---|
48 | 50 | mutex_init(&delayed_node->mutex); |
---|
49 | 51 | INIT_LIST_HEAD(&delayed_node->n_list); |
---|
50 | 52 | INIT_LIST_HEAD(&delayed_node->p_list); |
---|
.. | .. |
---|
391 | 393 | struct btrfs_delayed_node *delayed_node, |
---|
392 | 394 | struct btrfs_key *key) |
---|
393 | 395 | { |
---|
394 | | - return __btrfs_lookup_delayed_item(&delayed_node->ins_root, key, |
---|
| 396 | + return __btrfs_lookup_delayed_item(&delayed_node->ins_root.rb_root, key, |
---|
395 | 397 | NULL, NULL); |
---|
396 | 398 | } |
---|
397 | 399 | |
---|
.. | .. |
---|
401 | 403 | { |
---|
402 | 404 | struct rb_node **p, *node; |
---|
403 | 405 | struct rb_node *parent_node = NULL; |
---|
404 | | - struct rb_root *root; |
---|
| 406 | + struct rb_root_cached *root; |
---|
405 | 407 | struct btrfs_delayed_item *item; |
---|
406 | 408 | int cmp; |
---|
| 409 | + bool leftmost = true; |
---|
407 | 410 | |
---|
408 | 411 | if (action == BTRFS_DELAYED_INSERTION_ITEM) |
---|
409 | 412 | root = &delayed_node->ins_root; |
---|
.. | .. |
---|
411 | 414 | root = &delayed_node->del_root; |
---|
412 | 415 | else |
---|
413 | 416 | BUG(); |
---|
414 | | - p = &root->rb_node; |
---|
| 417 | + p = &root->rb_root.rb_node; |
---|
415 | 418 | node = &ins->rb_node; |
---|
416 | 419 | |
---|
417 | 420 | while (*p) { |
---|
.. | .. |
---|
420 | 423 | rb_node); |
---|
421 | 424 | |
---|
422 | 425 | cmp = btrfs_comp_cpu_keys(&item->key, &ins->key); |
---|
423 | | - if (cmp < 0) |
---|
| 426 | + if (cmp < 0) { |
---|
424 | 427 | p = &(*p)->rb_right; |
---|
425 | | - else if (cmp > 0) |
---|
| 428 | + leftmost = false; |
---|
| 429 | + } else if (cmp > 0) { |
---|
426 | 430 | p = &(*p)->rb_left; |
---|
427 | | - else |
---|
| 431 | + } else { |
---|
428 | 432 | return -EEXIST; |
---|
| 433 | + } |
---|
429 | 434 | } |
---|
430 | 435 | |
---|
431 | 436 | rb_link_node(node, parent_node, p); |
---|
432 | | - rb_insert_color(node, root); |
---|
| 437 | + rb_insert_color_cached(node, root, leftmost); |
---|
433 | 438 | ins->delayed_node = delayed_node; |
---|
434 | 439 | ins->ins_or_del = action; |
---|
435 | 440 | |
---|
.. | .. |
---|
469 | 474 | |
---|
470 | 475 | static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item) |
---|
471 | 476 | { |
---|
472 | | - struct rb_root *root; |
---|
| 477 | + struct rb_root_cached *root; |
---|
473 | 478 | struct btrfs_delayed_root *delayed_root; |
---|
474 | 479 | |
---|
| 480 | + /* Not associated with any delayed_node */ |
---|
| 481 | + if (!delayed_item->delayed_node) |
---|
| 482 | + return; |
---|
475 | 483 | delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root; |
---|
476 | 484 | |
---|
477 | 485 | BUG_ON(!delayed_root); |
---|
.. | .. |
---|
483 | 491 | else |
---|
484 | 492 | root = &delayed_item->delayed_node->del_root; |
---|
485 | 493 | |
---|
486 | | - rb_erase(&delayed_item->rb_node, root); |
---|
| 494 | + rb_erase_cached(&delayed_item->rb_node, root); |
---|
487 | 495 | delayed_item->delayed_node->count--; |
---|
488 | 496 | |
---|
489 | 497 | finish_one_item(delayed_root); |
---|
.. | .. |
---|
504 | 512 | struct rb_node *p; |
---|
505 | 513 | struct btrfs_delayed_item *item = NULL; |
---|
506 | 514 | |
---|
507 | | - p = rb_first(&delayed_node->ins_root); |
---|
| 515 | + p = rb_first_cached(&delayed_node->ins_root); |
---|
508 | 516 | if (p) |
---|
509 | 517 | item = rb_entry(p, struct btrfs_delayed_item, rb_node); |
---|
510 | 518 | |
---|
.. | .. |
---|
517 | 525 | struct rb_node *p; |
---|
518 | 526 | struct btrfs_delayed_item *item = NULL; |
---|
519 | 527 | |
---|
520 | | - p = rb_first(&delayed_node->del_root); |
---|
| 528 | + p = rb_first_cached(&delayed_node->del_root); |
---|
521 | 529 | if (p) |
---|
522 | 530 | item = rb_entry(p, struct btrfs_delayed_item, rb_node); |
---|
523 | 531 | |
---|
.. | .. |
---|
553 | 561 | src_rsv = trans->block_rsv; |
---|
554 | 562 | dst_rsv = &fs_info->delayed_block_rsv; |
---|
555 | 563 | |
---|
556 | | - num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1); |
---|
| 564 | + num_bytes = btrfs_calc_insert_metadata_size(fs_info, 1); |
---|
557 | 565 | |
---|
558 | 566 | /* |
---|
559 | 567 | * Here we migrate space rsv from transaction rsv, since have already |
---|
560 | 568 | * reserved space when starting a transaction. So no need to reserve |
---|
561 | 569 | * qgroup space here. |
---|
562 | 570 | */ |
---|
563 | | - ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1); |
---|
| 571 | + ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, true); |
---|
564 | 572 | if (!ret) { |
---|
565 | 573 | trace_btrfs_space_reservation(fs_info, "delayed_item", |
---|
566 | 574 | item->key.objectid, |
---|
.. | .. |
---|
588 | 596 | trace_btrfs_space_reservation(fs_info, "delayed_item", |
---|
589 | 597 | item->key.objectid, item->bytes_reserved, |
---|
590 | 598 | 0); |
---|
591 | | - btrfs_block_rsv_release(fs_info, rsv, |
---|
592 | | - item->bytes_reserved); |
---|
| 599 | + btrfs_block_rsv_release(fs_info, rsv, item->bytes_reserved, NULL); |
---|
593 | 600 | } |
---|
594 | 601 | |
---|
595 | 602 | static int btrfs_delayed_inode_reserve_metadata( |
---|
.. | .. |
---|
607 | 614 | src_rsv = trans->block_rsv; |
---|
608 | 615 | dst_rsv = &fs_info->delayed_block_rsv; |
---|
609 | 616 | |
---|
610 | | - num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1); |
---|
| 617 | + num_bytes = btrfs_calc_metadata_size(fs_info, 1); |
---|
611 | 618 | |
---|
612 | 619 | /* |
---|
613 | 620 | * btrfs_dirty_inode will update the inode under btrfs_join_transaction |
---|
.. | .. |
---|
620 | 627 | */ |
---|
621 | 628 | if (!src_rsv || (!trans->bytes_reserved && |
---|
622 | 629 | src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) { |
---|
623 | | - ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true); |
---|
| 630 | + ret = btrfs_qgroup_reserve_meta(root, num_bytes, |
---|
| 631 | + BTRFS_QGROUP_RSV_META_PREALLOC, true); |
---|
624 | 632 | if (ret < 0) |
---|
625 | 633 | return ret; |
---|
626 | 634 | ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes, |
---|
.. | .. |
---|
647 | 655 | return ret; |
---|
648 | 656 | } |
---|
649 | 657 | |
---|
650 | | - ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1); |
---|
| 658 | + ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, true); |
---|
651 | 659 | if (!ret) { |
---|
652 | 660 | trace_btrfs_space_reservation(fs_info, "delayed_inode", |
---|
653 | 661 | btrfs_ino(inode), num_bytes, 1); |
---|
.. | .. |
---|
669 | 677 | rsv = &fs_info->delayed_block_rsv; |
---|
670 | 678 | trace_btrfs_space_reservation(fs_info, "delayed_inode", |
---|
671 | 679 | node->inode_id, node->bytes_reserved, 0); |
---|
672 | | - btrfs_block_rsv_release(fs_info, rsv, |
---|
673 | | - node->bytes_reserved); |
---|
| 680 | + btrfs_block_rsv_release(fs_info, rsv, node->bytes_reserved, NULL); |
---|
674 | 681 | if (qgroup_free) |
---|
675 | 682 | btrfs_qgroup_free_meta_prealloc(node->root, |
---|
676 | 683 | node->bytes_reserved); |
---|
.. | .. |
---|
688 | 695 | struct btrfs_path *path, |
---|
689 | 696 | struct btrfs_delayed_item *item) |
---|
690 | 697 | { |
---|
691 | | - struct btrfs_fs_info *fs_info = root->fs_info; |
---|
692 | 698 | struct btrfs_delayed_item *curr, *next; |
---|
693 | 699 | int free_space; |
---|
694 | 700 | int total_data_size = 0, total_size = 0; |
---|
.. | .. |
---|
705 | 711 | BUG_ON(!path->nodes[0]); |
---|
706 | 712 | |
---|
707 | 713 | leaf = path->nodes[0]; |
---|
708 | | - free_space = btrfs_leaf_free_space(fs_info, leaf); |
---|
| 714 | + free_space = btrfs_leaf_free_space(leaf); |
---|
709 | 715 | INIT_LIST_HEAD(&head); |
---|
710 | 716 | |
---|
711 | 717 | next = item; |
---|
.. | .. |
---|
762 | 768 | i++; |
---|
763 | 769 | } |
---|
764 | 770 | |
---|
765 | | - /* reset all the locked nodes in the patch to spinning locks. */ |
---|
766 | | - btrfs_clear_path_blocking(path, NULL, 0); |
---|
767 | | - |
---|
768 | 771 | /* insert the keys of the items */ |
---|
769 | | - setup_items_for_insert(root, path, keys, data_size, |
---|
770 | | - total_data_size, total_size, nitems); |
---|
| 772 | + setup_items_for_insert(root, path, keys, data_size, nitems); |
---|
771 | 773 | |
---|
772 | 774 | /* insert the dir index items */ |
---|
773 | 775 | slot = path->slots[0]; |
---|
.. | .. |
---|
1153 | 1155 | int ret = 0; |
---|
1154 | 1156 | bool count = (nr > 0); |
---|
1155 | 1157 | |
---|
1156 | | - if (trans->aborted) |
---|
| 1158 | + if (TRANS_ABORTED(trans)) |
---|
1157 | 1159 | return -EIO; |
---|
1158 | 1160 | |
---|
1159 | 1161 | path = btrfs_alloc_path(); |
---|
.. | .. |
---|
1171 | 1173 | ret = __btrfs_commit_inode_delayed_items(trans, path, |
---|
1172 | 1174 | curr_node); |
---|
1173 | 1175 | if (ret) { |
---|
1174 | | - btrfs_release_delayed_node(curr_node); |
---|
1175 | | - curr_node = NULL; |
---|
1176 | 1176 | btrfs_abort_transaction(trans, ret); |
---|
1177 | 1177 | break; |
---|
1178 | 1178 | } |
---|
1179 | 1179 | |
---|
1180 | 1180 | prev_node = curr_node; |
---|
1181 | 1181 | curr_node = btrfs_next_delayed_node(curr_node); |
---|
| 1182 | + /* |
---|
| 1183 | + * See the comment below about releasing path before releasing |
---|
| 1184 | + * node. If the commit of delayed items was successful the path |
---|
| 1185 | + * should always be released, but in case of an error, it may |
---|
| 1186 | + * point to locked extent buffers (a leaf at the very least). |
---|
| 1187 | + */ |
---|
| 1188 | + ASSERT(path->nodes[0] == NULL); |
---|
1182 | 1189 | btrfs_release_delayed_node(prev_node); |
---|
1183 | 1190 | } |
---|
1184 | 1191 | |
---|
| 1192 | + /* |
---|
| 1193 | + * Release the path to avoid a potential deadlock and lockdep splat when |
---|
| 1194 | + * releasing the delayed node, as that requires taking the delayed node's |
---|
| 1195 | + * mutex. If another task starts running delayed items before we take |
---|
| 1196 | + * the mutex, it will first lock the mutex and then it may try to lock |
---|
| 1197 | + * the same btree path (leaf). |
---|
| 1198 | + */ |
---|
| 1199 | + btrfs_free_path(path); |
---|
| 1200 | + |
---|
1185 | 1201 | if (curr_node) |
---|
1186 | 1202 | btrfs_release_delayed_node(curr_node); |
---|
1187 | | - btrfs_free_path(path); |
---|
1188 | 1203 | trans->block_rsv = block_rsv; |
---|
1189 | 1204 | |
---|
1190 | 1205 | return ret; |
---|
.. | .. |
---|
1382 | 1397 | return -ENOMEM; |
---|
1383 | 1398 | |
---|
1384 | 1399 | async_work->delayed_root = delayed_root; |
---|
1385 | | - btrfs_init_work(&async_work->work, btrfs_delayed_meta_helper, |
---|
1386 | | - btrfs_async_run_delayed_root, NULL, NULL); |
---|
| 1400 | + btrfs_init_work(&async_work->work, btrfs_async_run_delayed_root, NULL, |
---|
| 1401 | + NULL); |
---|
1387 | 1402 | async_work->nr = nr; |
---|
1388 | 1403 | |
---|
1389 | 1404 | btrfs_queue_work(fs_info->delayed_workers, &async_work->work); |
---|
.. | .. |
---|
1480 | 1495 | if (unlikely(ret)) { |
---|
1481 | 1496 | btrfs_err(trans->fs_info, |
---|
1482 | 1497 | "err add delayed dir index item(name: %.*s) into the insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)", |
---|
1483 | | - name_len, name, delayed_node->root->objectid, |
---|
| 1498 | + name_len, name, delayed_node->root->root_key.objectid, |
---|
1484 | 1499 | delayed_node->inode_id, ret); |
---|
1485 | 1500 | BUG(); |
---|
1486 | 1501 | } |
---|
.. | .. |
---|
1544 | 1559 | * we have reserved enough space when we start a new transaction, |
---|
1545 | 1560 | * so reserving metadata failure is impossible. |
---|
1546 | 1561 | */ |
---|
1547 | | - BUG_ON(ret); |
---|
| 1562 | + if (ret < 0) { |
---|
| 1563 | + btrfs_err(trans->fs_info, |
---|
| 1564 | +"metadata reservation failed for delayed dir item deltiona, should have been reserved"); |
---|
| 1565 | + btrfs_release_delayed_item(item); |
---|
| 1566 | + goto end; |
---|
| 1567 | + } |
---|
1548 | 1568 | |
---|
1549 | 1569 | mutex_lock(&node->mutex); |
---|
1550 | 1570 | ret = __btrfs_add_delayed_deletion_item(node, item); |
---|
1551 | 1571 | if (unlikely(ret)) { |
---|
1552 | 1572 | btrfs_err(trans->fs_info, |
---|
1553 | 1573 | "err add delayed dir index item(index: %llu) into the deletion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)", |
---|
1554 | | - index, node->root->objectid, node->inode_id, ret); |
---|
1555 | | - BUG(); |
---|
| 1574 | + index, node->root->root_key.objectid, |
---|
| 1575 | + node->inode_id, ret); |
---|
| 1576 | + btrfs_delayed_item_release_metadata(dir->root, item); |
---|
| 1577 | + btrfs_release_delayed_item(item); |
---|
1556 | 1578 | } |
---|
1557 | 1579 | mutex_unlock(&node->mutex); |
---|
1558 | 1580 | end: |
---|
.. | .. |
---|
1709 | 1731 | name = (char *)(di + 1); |
---|
1710 | 1732 | name_len = btrfs_stack_dir_name_len(di); |
---|
1711 | 1733 | |
---|
1712 | | - d_type = btrfs_filetype_table[di->type]; |
---|
| 1734 | + d_type = fs_ftype_to_dtype(di->type); |
---|
1713 | 1735 | btrfs_disk_key_to_cpu(&location, &di->location); |
---|
1714 | 1736 | |
---|
1715 | 1737 | over = !dir_emit(ctx, name, name_len, |
---|
.. | .. |
---|
1767 | 1789 | |
---|
1768 | 1790 | int btrfs_fill_inode(struct inode *inode, u32 *rdev) |
---|
1769 | 1791 | { |
---|
| 1792 | + struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; |
---|
1770 | 1793 | struct btrfs_delayed_node *delayed_node; |
---|
1771 | 1794 | struct btrfs_inode_item *inode_item; |
---|
1772 | 1795 | |
---|
.. | .. |
---|
1786 | 1809 | i_uid_write(inode, btrfs_stack_inode_uid(inode_item)); |
---|
1787 | 1810 | i_gid_write(inode, btrfs_stack_inode_gid(inode_item)); |
---|
1788 | 1811 | btrfs_i_size_write(BTRFS_I(inode), btrfs_stack_inode_size(inode_item)); |
---|
| 1812 | + btrfs_inode_set_file_extent_range(BTRFS_I(inode), 0, |
---|
| 1813 | + round_up(i_size_read(inode), fs_info->sectorsize)); |
---|
1789 | 1814 | inode->i_mode = btrfs_stack_inode_mode(inode_item); |
---|
1790 | 1815 | set_nlink(inode, btrfs_stack_inode_nlink(inode_item)); |
---|
1791 | 1816 | inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item)); |
---|