| .. | .. |
|---|
| 13 | 13 | #include <linux/posix_acl.h> |
|---|
| 14 | 14 | #include <linux/random.h> |
|---|
| 15 | 15 | #include <linux/sort.h> |
|---|
| 16 | +#include <linux/iversion.h> |
|---|
| 16 | 17 | |
|---|
| 17 | 18 | #include "super.h" |
|---|
| 18 | 19 | #include "mds_client.h" |
|---|
| .. | .. |
|---|
| 33 | 34 | |
|---|
| 34 | 35 | static const struct inode_operations ceph_symlink_iops; |
|---|
| 35 | 36 | |
|---|
| 36 | | -static void ceph_invalidate_work(struct work_struct *work); |
|---|
| 37 | | -static void ceph_writeback_work(struct work_struct *work); |
|---|
| 38 | | -static void ceph_vmtruncate_work(struct work_struct *work); |
|---|
| 37 | +static void ceph_inode_work(struct work_struct *work); |
|---|
| 39 | 38 | |
|---|
| 40 | 39 | /* |
|---|
| 41 | 40 | * find or create an inode, given the ceph ino number |
|---|
| 42 | 41 | */ |
|---|
| 43 | 42 | static int ceph_set_ino_cb(struct inode *inode, void *data) |
|---|
| 44 | 43 | { |
|---|
| 45 | | - ceph_inode(inode)->i_vino = *(struct ceph_vino *)data; |
|---|
| 46 | | - inode->i_ino = ceph_vino_to_ino(*(struct ceph_vino *)data); |
|---|
| 44 | + struct ceph_inode_info *ci = ceph_inode(inode); |
|---|
| 45 | + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); |
|---|
| 46 | + |
|---|
| 47 | + ci->i_vino = *(struct ceph_vino *)data; |
|---|
| 48 | + inode->i_ino = ceph_vino_to_ino_t(ci->i_vino); |
|---|
| 49 | + inode_set_iversion_raw(inode, 0); |
|---|
| 50 | + percpu_counter_inc(&mdsc->metric.total_inodes); |
|---|
| 51 | + |
|---|
| 47 | 52 | return 0; |
|---|
| 48 | 53 | } |
|---|
| 49 | 54 | |
|---|
| 50 | 55 | struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino) |
|---|
| 51 | 56 | { |
|---|
| 52 | 57 | struct inode *inode; |
|---|
| 53 | | - ino_t t = ceph_vino_to_ino(vino); |
|---|
| 54 | 58 | |
|---|
| 55 | | - inode = iget5_locked(sb, t, ceph_ino_compare, ceph_set_ino_cb, &vino); |
|---|
| 59 | + if (ceph_vino_is_reserved(vino)) |
|---|
| 60 | + return ERR_PTR(-EREMOTEIO); |
|---|
| 61 | + |
|---|
| 62 | + inode = iget5_locked(sb, (unsigned long)vino.ino, ceph_ino_compare, |
|---|
| 63 | + ceph_set_ino_cb, &vino); |
|---|
| 56 | 64 | if (!inode) |
|---|
| 57 | 65 | return ERR_PTR(-ENOMEM); |
|---|
| 58 | | - if (inode->i_state & I_NEW) { |
|---|
| 59 | | - dout("get_inode created new inode %p %llx.%llx ino %llx\n", |
|---|
| 60 | | - inode, ceph_vinop(inode), (u64)inode->i_ino); |
|---|
| 61 | | - unlock_new_inode(inode); |
|---|
| 62 | | - } |
|---|
| 63 | 66 | |
|---|
| 64 | | - dout("get_inode on %lu=%llx.%llx got %p\n", inode->i_ino, vino.ino, |
|---|
| 65 | | - vino.snap, inode); |
|---|
| 67 | + dout("get_inode on %llu=%llx.%llx got %p new %d\n", ceph_present_inode(inode), |
|---|
| 68 | + ceph_vinop(inode), inode, !!(inode->i_state & I_NEW)); |
|---|
| 66 | 69 | return inode; |
|---|
| 67 | 70 | } |
|---|
| 68 | 71 | |
|---|
| .. | .. |
|---|
| 84 | 87 | inode->i_mode = parent->i_mode; |
|---|
| 85 | 88 | inode->i_uid = parent->i_uid; |
|---|
| 86 | 89 | inode->i_gid = parent->i_gid; |
|---|
| 87 | | - inode->i_op = &ceph_snapdir_iops; |
|---|
| 88 | | - inode->i_fop = &ceph_snapdir_fops; |
|---|
| 89 | | - ci->i_snap_caps = CEPH_CAP_PIN; /* so we can open */ |
|---|
| 90 | + inode->i_mtime = parent->i_mtime; |
|---|
| 91 | + inode->i_ctime = parent->i_ctime; |
|---|
| 92 | + inode->i_atime = parent->i_atime; |
|---|
| 90 | 93 | ci->i_rbytes = 0; |
|---|
| 94 | + ci->i_btime = ceph_inode(parent)->i_btime; |
|---|
| 95 | + |
|---|
| 96 | + if (inode->i_state & I_NEW) { |
|---|
| 97 | + inode->i_op = &ceph_snapdir_iops; |
|---|
| 98 | + inode->i_fop = &ceph_snapdir_fops; |
|---|
| 99 | + ci->i_snap_caps = CEPH_CAP_PIN; /* so we can open */ |
|---|
| 100 | + unlock_new_inode(inode); |
|---|
| 101 | + } |
|---|
| 102 | + |
|---|
| 91 | 103 | return inode; |
|---|
| 92 | 104 | } |
|---|
| 93 | 105 | |
|---|
| .. | .. |
|---|
| 445 | 457 | ci->i_max_files = 0; |
|---|
| 446 | 458 | |
|---|
| 447 | 459 | memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout)); |
|---|
| 460 | + memset(&ci->i_cached_layout, 0, sizeof(ci->i_cached_layout)); |
|---|
| 448 | 461 | RCU_INIT_POINTER(ci->i_layout.pool_ns, NULL); |
|---|
| 449 | 462 | |
|---|
| 450 | 463 | ci->i_fragtree = RB_ROOT; |
|---|
| .. | .. |
|---|
| 469 | 482 | ci->i_prealloc_cap_flush = NULL; |
|---|
| 470 | 483 | INIT_LIST_HEAD(&ci->i_cap_flush_list); |
|---|
| 471 | 484 | init_waitqueue_head(&ci->i_cap_wq); |
|---|
| 472 | | - ci->i_hold_caps_min = 0; |
|---|
| 473 | 485 | ci->i_hold_caps_max = 0; |
|---|
| 474 | 486 | INIT_LIST_HEAD(&ci->i_cap_delay_list); |
|---|
| 475 | 487 | INIT_LIST_HEAD(&ci->i_cap_snaps); |
|---|
| 476 | 488 | ci->i_head_snapc = NULL; |
|---|
| 477 | 489 | ci->i_snap_caps = 0; |
|---|
| 478 | 490 | |
|---|
| 491 | + ci->i_last_rd = ci->i_last_wr = jiffies - 3600 * HZ; |
|---|
| 479 | 492 | for (i = 0; i < CEPH_FILE_MODE_BITS; i++) |
|---|
| 480 | 493 | ci->i_nr_by_mode[i] = 0; |
|---|
| 481 | 494 | |
|---|
| .. | .. |
|---|
| 494 | 507 | ci->i_rdcache_ref = 0; |
|---|
| 495 | 508 | ci->i_wr_ref = 0; |
|---|
| 496 | 509 | ci->i_wb_ref = 0; |
|---|
| 510 | + ci->i_fx_ref = 0; |
|---|
| 497 | 511 | ci->i_wrbuffer_ref = 0; |
|---|
| 498 | 512 | ci->i_wrbuffer_ref_head = 0; |
|---|
| 499 | 513 | atomic_set(&ci->i_filelock_ref, 0); |
|---|
| 500 | | - atomic_set(&ci->i_shared_gen, 0); |
|---|
| 514 | + atomic_set(&ci->i_shared_gen, 1); |
|---|
| 501 | 515 | ci->i_rdcache_gen = 0; |
|---|
| 502 | 516 | ci->i_rdcache_revoking = 0; |
|---|
| 503 | 517 | |
|---|
| .. | .. |
|---|
| 509 | 523 | INIT_LIST_HEAD(&ci->i_snap_realm_item); |
|---|
| 510 | 524 | INIT_LIST_HEAD(&ci->i_snap_flush_item); |
|---|
| 511 | 525 | |
|---|
| 512 | | - INIT_WORK(&ci->i_wb_work, ceph_writeback_work); |
|---|
| 513 | | - INIT_WORK(&ci->i_pg_inv_work, ceph_invalidate_work); |
|---|
| 514 | | - |
|---|
| 515 | | - INIT_WORK(&ci->i_vmtruncate_work, ceph_vmtruncate_work); |
|---|
| 526 | + INIT_WORK(&ci->i_work, ceph_inode_work); |
|---|
| 527 | + ci->i_work_mask = 0; |
|---|
| 528 | + memset(&ci->i_btime, '\0', sizeof(ci->i_btime)); |
|---|
| 516 | 529 | |
|---|
| 517 | 530 | ceph_fscache_inode_init(ci); |
|---|
| 518 | 531 | |
|---|
| 519 | 532 | return &ci->vfs_inode; |
|---|
| 520 | 533 | } |
|---|
| 521 | 534 | |
|---|
| 522 | | -static void ceph_i_callback(struct rcu_head *head) |
|---|
| 535 | +void ceph_free_inode(struct inode *inode) |
|---|
| 523 | 536 | { |
|---|
| 524 | | - struct inode *inode = container_of(head, struct inode, i_rcu); |
|---|
| 525 | 537 | struct ceph_inode_info *ci = ceph_inode(inode); |
|---|
| 526 | 538 | |
|---|
| 527 | 539 | kfree(ci->i_symlink); |
|---|
| .. | .. |
|---|
| 531 | 543 | void ceph_evict_inode(struct inode *inode) |
|---|
| 532 | 544 | { |
|---|
| 533 | 545 | struct ceph_inode_info *ci = ceph_inode(inode); |
|---|
| 546 | + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); |
|---|
| 534 | 547 | struct ceph_inode_frag *frag; |
|---|
| 535 | 548 | struct rb_node *n; |
|---|
| 536 | 549 | |
|---|
| 537 | 550 | dout("evict_inode %p ino %llx.%llx\n", inode, ceph_vinop(inode)); |
|---|
| 551 | + |
|---|
| 552 | + percpu_counter_dec(&mdsc->metric.total_inodes); |
|---|
| 538 | 553 | |
|---|
| 539 | 554 | truncate_inode_pages_final(&inode->i_data); |
|---|
| 540 | 555 | clear_inode(inode); |
|---|
| 541 | 556 | |
|---|
| 542 | 557 | ceph_fscache_unregister_inode_cookie(ci); |
|---|
| 543 | 558 | |
|---|
| 544 | | - ceph_queue_caps_release(inode); |
|---|
| 559 | + __ceph_remove_caps(ci); |
|---|
| 545 | 560 | |
|---|
| 546 | 561 | if (__ceph_has_any_quota(ci)) |
|---|
| 547 | 562 | ceph_adjust_quota_realms_count(inode, false); |
|---|
| .. | .. |
|---|
| 551 | 566 | * caps in i_snap_caps. |
|---|
| 552 | 567 | */ |
|---|
| 553 | 568 | if (ci->i_snap_realm) { |
|---|
| 554 | | - struct ceph_mds_client *mdsc = |
|---|
| 555 | | - ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; |
|---|
| 556 | | - struct ceph_snap_realm *realm = ci->i_snap_realm; |
|---|
| 557 | | - |
|---|
| 558 | | - dout(" dropping residual ref to snap realm %p\n", realm); |
|---|
| 559 | | - spin_lock(&realm->inodes_with_caps_lock); |
|---|
| 560 | | - list_del_init(&ci->i_snap_realm_item); |
|---|
| 561 | | - ci->i_snap_realm = NULL; |
|---|
| 562 | | - if (realm->ino == ci->i_vino.ino) |
|---|
| 563 | | - realm->inode = NULL; |
|---|
| 564 | | - spin_unlock(&realm->inodes_with_caps_lock); |
|---|
| 565 | | - ceph_put_snap_realm(mdsc, realm); |
|---|
| 569 | + if (ceph_snap(inode) == CEPH_NOSNAP) { |
|---|
| 570 | + struct ceph_snap_realm *realm = ci->i_snap_realm; |
|---|
| 571 | + dout(" dropping residual ref to snap realm %p\n", |
|---|
| 572 | + realm); |
|---|
| 573 | + spin_lock(&realm->inodes_with_caps_lock); |
|---|
| 574 | + list_del_init(&ci->i_snap_realm_item); |
|---|
| 575 | + ci->i_snap_realm = NULL; |
|---|
| 576 | + if (realm->ino == ci->i_vino.ino) |
|---|
| 577 | + realm->inode = NULL; |
|---|
| 578 | + spin_unlock(&realm->inodes_with_caps_lock); |
|---|
| 579 | + ceph_put_snap_realm(mdsc, realm); |
|---|
| 580 | + } else { |
|---|
| 581 | + ceph_put_snapid_map(mdsc, ci->i_snapid_map); |
|---|
| 582 | + ci->i_snap_realm = NULL; |
|---|
| 583 | + } |
|---|
| 566 | 584 | } |
|---|
| 567 | 585 | |
|---|
| 568 | 586 | while ((n = rb_first(&ci->i_fragtree)) != NULL) { |
|---|
| .. | .. |
|---|
| 579 | 597 | ceph_buffer_put(ci->i_xattrs.prealloc_blob); |
|---|
| 580 | 598 | |
|---|
| 581 | 599 | ceph_put_string(rcu_dereference_raw(ci->i_layout.pool_ns)); |
|---|
| 582 | | -} |
|---|
| 583 | | - |
|---|
| 584 | | -void ceph_destroy_inode(struct inode *inode) |
|---|
| 585 | | -{ |
|---|
| 586 | | - call_rcu(&inode->i_rcu, ceph_i_callback); |
|---|
| 587 | | -} |
|---|
| 588 | | - |
|---|
| 589 | | -int ceph_drop_inode(struct inode *inode) |
|---|
| 590 | | -{ |
|---|
| 591 | | - /* |
|---|
| 592 | | - * Positve dentry and corresponding inode are always accompanied |
|---|
| 593 | | - * in MDS reply. So no need to keep inode in the cache after |
|---|
| 594 | | - * dropping all its aliases. |
|---|
| 595 | | - */ |
|---|
| 596 | | - return 1; |
|---|
| 600 | + ceph_put_string(rcu_dereference_raw(ci->i_cached_layout.pool_ns)); |
|---|
| 597 | 601 | } |
|---|
| 598 | 602 | |
|---|
| 599 | 603 | static inline blkcnt_t calc_inode_blocks(u64 size) |
|---|
| .. | .. |
|---|
| 644 | 648 | if ((issued & (CEPH_CAP_FILE_CACHE| |
|---|
| 645 | 649 | CEPH_CAP_FILE_BUFFER)) || |
|---|
| 646 | 650 | mapping_mapped(inode->i_mapping) || |
|---|
| 647 | | - __ceph_caps_file_wanted(ci)) { |
|---|
| 651 | + __ceph_is_file_opened(ci)) { |
|---|
| 648 | 652 | ci->i_truncate_pending++; |
|---|
| 649 | 653 | queue_trunc = 1; |
|---|
| 650 | 654 | } |
|---|
| .. | .. |
|---|
| 735 | 739 | * Populate an inode based on info from mds. May be called on new or |
|---|
| 736 | 740 | * existing inodes. |
|---|
| 737 | 741 | */ |
|---|
| 738 | | -static int fill_inode(struct inode *inode, struct page *locked_page, |
|---|
| 739 | | - struct ceph_mds_reply_info_in *iinfo, |
|---|
| 740 | | - struct ceph_mds_reply_dirfrag *dirinfo, |
|---|
| 741 | | - struct ceph_mds_session *session, |
|---|
| 742 | | - unsigned long ttl_from, int cap_fmode, |
|---|
| 743 | | - struct ceph_cap_reservation *caps_reservation) |
|---|
| 742 | +int ceph_fill_inode(struct inode *inode, struct page *locked_page, |
|---|
| 743 | + struct ceph_mds_reply_info_in *iinfo, |
|---|
| 744 | + struct ceph_mds_reply_dirfrag *dirinfo, |
|---|
| 745 | + struct ceph_mds_session *session, int cap_fmode, |
|---|
| 746 | + struct ceph_cap_reservation *caps_reservation) |
|---|
| 744 | 747 | { |
|---|
| 745 | | - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; |
|---|
| 748 | + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); |
|---|
| 746 | 749 | struct ceph_mds_reply_inode *info = iinfo->in; |
|---|
| 747 | 750 | struct ceph_inode_info *ci = ceph_inode(inode); |
|---|
| 748 | 751 | int issued, new_issued, info_caps; |
|---|
| .. | .. |
|---|
| 757 | 760 | bool new_version = false; |
|---|
| 758 | 761 | bool fill_inline = false; |
|---|
| 759 | 762 | |
|---|
| 760 | | - dout("fill_inode %p ino %llx.%llx v %llu had %llu\n", |
|---|
| 763 | + lockdep_assert_held(&mdsc->snap_rwsem); |
|---|
| 764 | + |
|---|
| 765 | + dout("%s %p ino %llx.%llx v %llu had %llu\n", __func__, |
|---|
| 761 | 766 | inode, ceph_vinop(inode), le64_to_cpu(info->version), |
|---|
| 762 | 767 | ci->i_version); |
|---|
| 763 | 768 | |
|---|
| .. | .. |
|---|
| 778 | 783 | if (iinfo->xattr_len > 4) { |
|---|
| 779 | 784 | xattr_blob = ceph_buffer_new(iinfo->xattr_len, GFP_NOFS); |
|---|
| 780 | 785 | if (!xattr_blob) |
|---|
| 781 | | - pr_err("fill_inode ENOMEM xattr blob %d bytes\n", |
|---|
| 786 | + pr_err("%s ENOMEM xattr blob %d bytes\n", __func__, |
|---|
| 782 | 787 | iinfo->xattr_len); |
|---|
| 783 | 788 | } |
|---|
| 784 | 789 | |
|---|
| 785 | 790 | if (iinfo->pool_ns_len > 0) |
|---|
| 786 | 791 | pool_ns = ceph_find_or_create_string(iinfo->pool_ns_data, |
|---|
| 787 | 792 | iinfo->pool_ns_len); |
|---|
| 793 | + |
|---|
| 794 | + if (ceph_snap(inode) != CEPH_NOSNAP && !ci->i_snapid_map) |
|---|
| 795 | + ci->i_snapid_map = ceph_get_snapid_map(mdsc, ceph_snap(inode)); |
|---|
| 788 | 796 | |
|---|
| 789 | 797 | spin_lock(&ci->i_ceph_lock); |
|---|
| 790 | 798 | |
|---|
| .. | .. |
|---|
| 803 | 811 | ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && |
|---|
| 804 | 812 | le64_to_cpu(info->version) > (ci->i_version & ~1))) |
|---|
| 805 | 813 | new_version = true; |
|---|
| 814 | + |
|---|
| 815 | + /* Update change_attribute */ |
|---|
| 816 | + inode_set_max_iversion_raw(inode, iinfo->change_attr); |
|---|
| 806 | 817 | |
|---|
| 807 | 818 | __ceph_caps_issued(ci, &issued); |
|---|
| 808 | 819 | issued |= __ceph_caps_dirty(ci); |
|---|
| .. | .. |
|---|
| 827 | 838 | dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode, |
|---|
| 828 | 839 | from_kuid(&init_user_ns, inode->i_uid), |
|---|
| 829 | 840 | from_kgid(&init_user_ns, inode->i_gid)); |
|---|
| 841 | + ceph_decode_timespec64(&ci->i_btime, &iinfo->btime); |
|---|
| 842 | + ceph_decode_timespec64(&ci->i_snap_btime, &iinfo->snap_btime); |
|---|
| 830 | 843 | } |
|---|
| 831 | 844 | |
|---|
| 832 | 845 | if ((new_version || (new_issued & CEPH_CAP_LINK_SHARED)) && |
|---|
| .. | .. |
|---|
| 884 | 897 | ci->i_rbytes = le64_to_cpu(info->rbytes); |
|---|
| 885 | 898 | ci->i_rfiles = le64_to_cpu(info->rfiles); |
|---|
| 886 | 899 | ci->i_rsubdirs = le64_to_cpu(info->rsubdirs); |
|---|
| 900 | + ci->i_dir_pin = iinfo->dir_pin; |
|---|
| 887 | 901 | ceph_decode_timespec64(&ci->i_rctime, &info->rctime); |
|---|
| 888 | 902 | } |
|---|
| 889 | 903 | } |
|---|
| .. | .. |
|---|
| 900 | 914 | iinfo->xattr_data, iinfo->xattr_len); |
|---|
| 901 | 915 | ci->i_xattrs.version = le64_to_cpu(info->xattr_version); |
|---|
| 902 | 916 | ceph_forget_all_cached_acls(inode); |
|---|
| 917 | + ceph_security_invalidate_secctx(inode); |
|---|
| 903 | 918 | xattr_blob = NULL; |
|---|
| 904 | 919 | } |
|---|
| 905 | 920 | |
|---|
| .. | .. |
|---|
| 914 | 929 | case S_IFBLK: |
|---|
| 915 | 930 | case S_IFCHR: |
|---|
| 916 | 931 | case S_IFSOCK: |
|---|
| 932 | + inode->i_blkbits = PAGE_SHIFT; |
|---|
| 917 | 933 | init_special_inode(inode, inode->i_mode, inode->i_rdev); |
|---|
| 918 | 934 | inode->i_op = &ceph_file_iops; |
|---|
| 919 | 935 | break; |
|---|
| .. | .. |
|---|
| 930 | 946 | spin_unlock(&ci->i_ceph_lock); |
|---|
| 931 | 947 | |
|---|
| 932 | 948 | if (symlen != i_size_read(inode)) { |
|---|
| 933 | | - pr_err("fill_inode %llx.%llx BAD symlink " |
|---|
| 934 | | - "size %lld\n", ceph_vinop(inode), |
|---|
| 949 | + pr_err("%s %llx.%llx BAD symlink " |
|---|
| 950 | + "size %lld\n", __func__, |
|---|
| 951 | + ceph_vinop(inode), |
|---|
| 935 | 952 | i_size_read(inode)); |
|---|
| 936 | 953 | i_size_write(inode, symlen); |
|---|
| 937 | 954 | inode->i_blocks = calc_inode_blocks(symlen); |
|---|
| .. | .. |
|---|
| 955 | 972 | inode->i_fop = &ceph_dir_fops; |
|---|
| 956 | 973 | break; |
|---|
| 957 | 974 | default: |
|---|
| 958 | | - pr_err("fill_inode %llx.%llx BAD mode 0%o\n", |
|---|
| 975 | + pr_err("%s %llx.%llx BAD mode 0%o\n", __func__, |
|---|
| 959 | 976 | ceph_vinop(inode), inode->i_mode); |
|---|
| 960 | 977 | } |
|---|
| 961 | 978 | |
|---|
| .. | .. |
|---|
| 964 | 981 | if (ceph_snap(inode) == CEPH_NOSNAP) { |
|---|
| 965 | 982 | ceph_add_cap(inode, session, |
|---|
| 966 | 983 | le64_to_cpu(info->cap.cap_id), |
|---|
| 967 | | - cap_fmode, info_caps, |
|---|
| 984 | + info_caps, |
|---|
| 968 | 985 | le32_to_cpu(info->cap.wanted), |
|---|
| 969 | 986 | le32_to_cpu(info->cap.seq), |
|---|
| 970 | 987 | le32_to_cpu(info->cap.mseq), |
|---|
| .. | .. |
|---|
| 989 | 1006 | dout(" %p got snap_caps %s\n", inode, |
|---|
| 990 | 1007 | ceph_cap_string(info_caps)); |
|---|
| 991 | 1008 | ci->i_snap_caps |= info_caps; |
|---|
| 992 | | - if (cap_fmode >= 0) |
|---|
| 993 | | - __ceph_get_fmode(ci, cap_fmode); |
|---|
| 994 | 1009 | } |
|---|
| 995 | | - } else if (cap_fmode >= 0) { |
|---|
| 996 | | - pr_warn("mds issued no caps on %llx.%llx\n", |
|---|
| 997 | | - ceph_vinop(inode)); |
|---|
| 998 | | - __ceph_get_fmode(ci, cap_fmode); |
|---|
| 999 | 1010 | } |
|---|
| 1000 | 1011 | |
|---|
| 1001 | 1012 | if (iinfo->inline_version > 0 && |
|---|
| .. | .. |
|---|
| 1005 | 1016 | if (ci->i_inline_version != CEPH_INLINE_NONE && |
|---|
| 1006 | 1017 | (locked_page || (info_caps & cache_caps))) |
|---|
| 1007 | 1018 | fill_inline = true; |
|---|
| 1019 | + } |
|---|
| 1020 | + |
|---|
| 1021 | + if (cap_fmode >= 0) { |
|---|
| 1022 | + if (!info_caps) |
|---|
| 1023 | + pr_warn("mds issued no caps on %llx.%llx\n", |
|---|
| 1024 | + ceph_vinop(inode)); |
|---|
| 1025 | + __ceph_touch_fmode(ci, mdsc, cap_fmode); |
|---|
| 1008 | 1026 | } |
|---|
| 1009 | 1027 | |
|---|
| 1010 | 1028 | spin_unlock(&ci->i_ceph_lock); |
|---|
| .. | .. |
|---|
| 1039 | 1057 | } |
|---|
| 1040 | 1058 | |
|---|
| 1041 | 1059 | /* |
|---|
| 1042 | | - * caller should hold session s_mutex. |
|---|
| 1060 | + * caller should hold session s_mutex and dentry->d_lock. |
|---|
| 1043 | 1061 | */ |
|---|
| 1044 | | -static void update_dentry_lease(struct dentry *dentry, |
|---|
| 1045 | | - struct ceph_mds_reply_lease *lease, |
|---|
| 1046 | | - struct ceph_mds_session *session, |
|---|
| 1047 | | - unsigned long from_time, |
|---|
| 1048 | | - struct ceph_vino *tgt_vino, |
|---|
| 1049 | | - struct ceph_vino *dir_vino) |
|---|
| 1062 | +static void __update_dentry_lease(struct inode *dir, struct dentry *dentry, |
|---|
| 1063 | + struct ceph_mds_reply_lease *lease, |
|---|
| 1064 | + struct ceph_mds_session *session, |
|---|
| 1065 | + unsigned long from_time, |
|---|
| 1066 | + struct ceph_mds_session **old_lease_session) |
|---|
| 1050 | 1067 | { |
|---|
| 1051 | 1068 | struct ceph_dentry_info *di = ceph_dentry(dentry); |
|---|
| 1069 | + unsigned mask = le16_to_cpu(lease->mask); |
|---|
| 1052 | 1070 | long unsigned duration = le32_to_cpu(lease->duration_ms); |
|---|
| 1053 | 1071 | long unsigned ttl = from_time + (duration * HZ) / 1000; |
|---|
| 1054 | 1072 | long unsigned half_ttl = from_time + (duration * HZ / 2) / 1000; |
|---|
| 1055 | | - struct inode *dir; |
|---|
| 1056 | | - struct ceph_mds_session *old_lease_session = NULL; |
|---|
| 1057 | 1073 | |
|---|
| 1058 | | - /* |
|---|
| 1059 | | - * Make sure dentry's inode matches tgt_vino. NULL tgt_vino means that |
|---|
| 1060 | | - * we expect a negative dentry. |
|---|
| 1061 | | - */ |
|---|
| 1062 | | - if (!tgt_vino && d_really_is_positive(dentry)) |
|---|
| 1063 | | - return; |
|---|
| 1064 | | - |
|---|
| 1065 | | - if (tgt_vino && (d_really_is_negative(dentry) || |
|---|
| 1066 | | - !ceph_ino_compare(d_inode(dentry), tgt_vino))) |
|---|
| 1067 | | - return; |
|---|
| 1068 | | - |
|---|
| 1069 | | - spin_lock(&dentry->d_lock); |
|---|
| 1070 | 1074 | dout("update_dentry_lease %p duration %lu ms ttl %lu\n", |
|---|
| 1071 | 1075 | dentry, duration, ttl); |
|---|
| 1072 | 1076 | |
|---|
| 1073 | | - dir = d_inode(dentry->d_parent); |
|---|
| 1074 | | - |
|---|
| 1075 | | - /* make sure parent matches dir_vino */ |
|---|
| 1076 | | - if (!ceph_ino_compare(dir, dir_vino)) |
|---|
| 1077 | | - goto out_unlock; |
|---|
| 1078 | | - |
|---|
| 1079 | 1077 | /* only track leases on regular dentries */ |
|---|
| 1080 | 1078 | if (ceph_snap(dir) != CEPH_NOSNAP) |
|---|
| 1081 | | - goto out_unlock; |
|---|
| 1079 | + return; |
|---|
| 1080 | + |
|---|
| 1081 | + if (mask & CEPH_LEASE_PRIMARY_LINK) |
|---|
| 1082 | + di->flags |= CEPH_DENTRY_PRIMARY_LINK; |
|---|
| 1083 | + else |
|---|
| 1084 | + di->flags &= ~CEPH_DENTRY_PRIMARY_LINK; |
|---|
| 1082 | 1085 | |
|---|
| 1083 | 1086 | di->lease_shared_gen = atomic_read(&ceph_inode(dir)->i_shared_gen); |
|---|
| 1084 | | - |
|---|
| 1085 | | - if (duration == 0) |
|---|
| 1086 | | - goto out_unlock; |
|---|
| 1087 | + if (!(mask & CEPH_LEASE_VALID)) { |
|---|
| 1088 | + __ceph_dentry_dir_lease_touch(di); |
|---|
| 1089 | + return; |
|---|
| 1090 | + } |
|---|
| 1087 | 1091 | |
|---|
| 1088 | 1092 | if (di->lease_gen == session->s_cap_gen && |
|---|
| 1089 | 1093 | time_before(ttl, di->time)) |
|---|
| 1090 | | - goto out_unlock; /* we already have a newer lease. */ |
|---|
| 1094 | + return; /* we already have a newer lease. */ |
|---|
| 1091 | 1095 | |
|---|
| 1092 | 1096 | if (di->lease_session && di->lease_session != session) { |
|---|
| 1093 | | - old_lease_session = di->lease_session; |
|---|
| 1097 | + *old_lease_session = di->lease_session; |
|---|
| 1094 | 1098 | di->lease_session = NULL; |
|---|
| 1095 | 1099 | } |
|---|
| 1096 | | - |
|---|
| 1097 | | - ceph_dentry_lru_touch(dentry); |
|---|
| 1098 | 1100 | |
|---|
| 1099 | 1101 | if (!di->lease_session) |
|---|
| 1100 | 1102 | di->lease_session = ceph_get_mds_session(session); |
|---|
| .. | .. |
|---|
| 1103 | 1105 | di->lease_renew_after = half_ttl; |
|---|
| 1104 | 1106 | di->lease_renew_from = 0; |
|---|
| 1105 | 1107 | di->time = ttl; |
|---|
| 1108 | + |
|---|
| 1109 | + __ceph_dentry_lease_touch(di); |
|---|
| 1110 | +} |
|---|
| 1111 | + |
|---|
| 1112 | +static inline void update_dentry_lease(struct inode *dir, struct dentry *dentry, |
|---|
| 1113 | + struct ceph_mds_reply_lease *lease, |
|---|
| 1114 | + struct ceph_mds_session *session, |
|---|
| 1115 | + unsigned long from_time) |
|---|
| 1116 | +{ |
|---|
| 1117 | + struct ceph_mds_session *old_lease_session = NULL; |
|---|
| 1118 | + spin_lock(&dentry->d_lock); |
|---|
| 1119 | + __update_dentry_lease(dir, dentry, lease, session, from_time, |
|---|
| 1120 | + &old_lease_session); |
|---|
| 1121 | + spin_unlock(&dentry->d_lock); |
|---|
| 1122 | + ceph_put_mds_session(old_lease_session); |
|---|
| 1123 | +} |
|---|
| 1124 | + |
|---|
| 1125 | +/* |
|---|
| 1126 | + * update dentry lease without having parent inode locked |
|---|
| 1127 | + */ |
|---|
| 1128 | +static void update_dentry_lease_careful(struct dentry *dentry, |
|---|
| 1129 | + struct ceph_mds_reply_lease *lease, |
|---|
| 1130 | + struct ceph_mds_session *session, |
|---|
| 1131 | + unsigned long from_time, |
|---|
| 1132 | + char *dname, u32 dname_len, |
|---|
| 1133 | + struct ceph_vino *pdvino, |
|---|
| 1134 | + struct ceph_vino *ptvino) |
|---|
| 1135 | + |
|---|
| 1136 | +{ |
|---|
| 1137 | + struct inode *dir; |
|---|
| 1138 | + struct ceph_mds_session *old_lease_session = NULL; |
|---|
| 1139 | + |
|---|
| 1140 | + spin_lock(&dentry->d_lock); |
|---|
| 1141 | + /* make sure dentry's name matches target */ |
|---|
| 1142 | + if (dentry->d_name.len != dname_len || |
|---|
| 1143 | + memcmp(dentry->d_name.name, dname, dname_len)) |
|---|
| 1144 | + goto out_unlock; |
|---|
| 1145 | + |
|---|
| 1146 | + dir = d_inode(dentry->d_parent); |
|---|
| 1147 | + /* make sure parent matches dvino */ |
|---|
| 1148 | + if (!ceph_ino_compare(dir, pdvino)) |
|---|
| 1149 | + goto out_unlock; |
|---|
| 1150 | + |
|---|
| 1151 | + /* make sure dentry's inode matches target. NULL ptvino means that |
|---|
| 1152 | + * we expect a negative dentry */ |
|---|
| 1153 | + if (ptvino) { |
|---|
| 1154 | + if (d_really_is_negative(dentry)) |
|---|
| 1155 | + goto out_unlock; |
|---|
| 1156 | + if (!ceph_ino_compare(d_inode(dentry), ptvino)) |
|---|
| 1157 | + goto out_unlock; |
|---|
| 1158 | + } else { |
|---|
| 1159 | + if (d_really_is_positive(dentry)) |
|---|
| 1160 | + goto out_unlock; |
|---|
| 1161 | + } |
|---|
| 1162 | + |
|---|
| 1163 | + __update_dentry_lease(dir, dentry, lease, session, |
|---|
| 1164 | + from_time, &old_lease_session); |
|---|
| 1106 | 1165 | out_unlock: |
|---|
| 1107 | 1166 | spin_unlock(&dentry->d_lock); |
|---|
| 1108 | | - if (old_lease_session) |
|---|
| 1109 | | - ceph_put_mds_session(old_lease_session); |
|---|
| 1167 | + ceph_put_mds_session(old_lease_session); |
|---|
| 1110 | 1168 | } |
|---|
| 1111 | 1169 | |
|---|
| 1112 | 1170 | /* |
|---|
| 1113 | 1171 | * splice a dentry to an inode. |
|---|
| 1114 | 1172 | * caller must hold directory i_mutex for this to be safe. |
|---|
| 1115 | 1173 | */ |
|---|
| 1116 | | -static struct dentry *splice_dentry(struct dentry *dn, struct inode *in) |
|---|
| 1174 | +static int splice_dentry(struct dentry **pdn, struct inode *in) |
|---|
| 1117 | 1175 | { |
|---|
| 1176 | + struct dentry *dn = *pdn; |
|---|
| 1118 | 1177 | struct dentry *realdn; |
|---|
| 1119 | 1178 | |
|---|
| 1120 | 1179 | BUG_ON(d_inode(dn)); |
|---|
| .. | .. |
|---|
| 1147 | 1206 | if (IS_ERR(realdn)) { |
|---|
| 1148 | 1207 | pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n", |
|---|
| 1149 | 1208 | PTR_ERR(realdn), dn, in, ceph_vinop(in)); |
|---|
| 1150 | | - dn = realdn; |
|---|
| 1151 | | - /* |
|---|
| 1152 | | - * Caller should release 'dn' in the case of error. |
|---|
| 1153 | | - * If 'req->r_dentry' is passed to this function, |
|---|
| 1154 | | - * caller should leave 'req->r_dentry' untouched. |
|---|
| 1155 | | - */ |
|---|
| 1156 | | - goto out; |
|---|
| 1157 | | - } else if (realdn) { |
|---|
| 1209 | + return PTR_ERR(realdn); |
|---|
| 1210 | + } |
|---|
| 1211 | + |
|---|
| 1212 | + if (realdn) { |
|---|
| 1158 | 1213 | dout("dn %p (%d) spliced with %p (%d) " |
|---|
| 1159 | 1214 | "inode %p ino %llx.%llx\n", |
|---|
| 1160 | 1215 | dn, d_count(dn), |
|---|
| 1161 | 1216 | realdn, d_count(realdn), |
|---|
| 1162 | 1217 | d_inode(realdn), ceph_vinop(d_inode(realdn))); |
|---|
| 1163 | 1218 | dput(dn); |
|---|
| 1164 | | - dn = realdn; |
|---|
| 1219 | + *pdn = realdn; |
|---|
| 1165 | 1220 | } else { |
|---|
| 1166 | 1221 | BUG_ON(!ceph_dentry(dn)); |
|---|
| 1167 | 1222 | dout("dn %p attached to %p ino %llx.%llx\n", |
|---|
| 1168 | 1223 | dn, d_inode(dn), ceph_vinop(d_inode(dn))); |
|---|
| 1169 | 1224 | } |
|---|
| 1170 | | -out: |
|---|
| 1171 | | - return dn; |
|---|
| 1225 | + return 0; |
|---|
| 1172 | 1226 | } |
|---|
| 1173 | 1227 | |
|---|
| 1174 | 1228 | /* |
|---|
| .. | .. |
|---|
| 1205 | 1259 | struct inode *dir = req->r_parent; |
|---|
| 1206 | 1260 | |
|---|
| 1207 | 1261 | if (dir) { |
|---|
| 1208 | | - err = fill_inode(dir, NULL, |
|---|
| 1209 | | - &rinfo->diri, rinfo->dirfrag, |
|---|
| 1210 | | - session, req->r_request_started, -1, |
|---|
| 1211 | | - &req->r_caps_reservation); |
|---|
| 1262 | + err = ceph_fill_inode(dir, NULL, &rinfo->diri, |
|---|
| 1263 | + rinfo->dirfrag, session, -1, |
|---|
| 1264 | + &req->r_caps_reservation); |
|---|
| 1212 | 1265 | if (err < 0) |
|---|
| 1213 | 1266 | goto done; |
|---|
| 1214 | 1267 | } else { |
|---|
| 1215 | 1268 | WARN_ON_ONCE(1); |
|---|
| 1216 | 1269 | } |
|---|
| 1217 | 1270 | |
|---|
| 1218 | | - if (dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME) { |
|---|
| 1271 | + if (dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME && |
|---|
| 1272 | + test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) && |
|---|
| 1273 | + !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) { |
|---|
| 1219 | 1274 | struct qstr dname; |
|---|
| 1220 | 1275 | struct dentry *dn, *parent; |
|---|
| 1221 | 1276 | |
|---|
| .. | .. |
|---|
| 1270 | 1325 | err = PTR_ERR(in); |
|---|
| 1271 | 1326 | goto done; |
|---|
| 1272 | 1327 | } |
|---|
| 1273 | | - req->r_target_inode = in; |
|---|
| 1274 | 1328 | |
|---|
| 1275 | | - err = fill_inode(in, req->r_locked_page, &rinfo->targeti, NULL, |
|---|
| 1276 | | - session, req->r_request_started, |
|---|
| 1329 | + err = ceph_fill_inode(in, req->r_locked_page, &rinfo->targeti, |
|---|
| 1330 | + NULL, session, |
|---|
| 1277 | 1331 | (!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) && |
|---|
| 1278 | | - rinfo->head->result == 0) ? req->r_fmode : -1, |
|---|
| 1332 | + !test_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags) && |
|---|
| 1333 | + rinfo->head->result == 0) ? req->r_fmode : -1, |
|---|
| 1279 | 1334 | &req->r_caps_reservation); |
|---|
| 1280 | 1335 | if (err < 0) { |
|---|
| 1281 | | - pr_err("fill_inode badness %p %llx.%llx\n", |
|---|
| 1336 | + pr_err("ceph_fill_inode badness %p %llx.%llx\n", |
|---|
| 1282 | 1337 | in, ceph_vinop(in)); |
|---|
| 1338 | + if (in->i_state & I_NEW) |
|---|
| 1339 | + discard_new_inode(in); |
|---|
| 1340 | + else |
|---|
| 1341 | + iput(in); |
|---|
| 1283 | 1342 | goto done; |
|---|
| 1284 | 1343 | } |
|---|
| 1344 | + req->r_target_inode = in; |
|---|
| 1345 | + if (in->i_state & I_NEW) |
|---|
| 1346 | + unlock_new_inode(in); |
|---|
| 1285 | 1347 | } |
|---|
| 1286 | 1348 | |
|---|
| 1287 | 1349 | /* |
|---|
| .. | .. |
|---|
| 1353 | 1415 | dout("dn %p gets new offset %lld\n", req->r_old_dentry, |
|---|
| 1354 | 1416 | ceph_dentry(req->r_old_dentry)->offset); |
|---|
| 1355 | 1417 | |
|---|
| 1356 | | - dn = req->r_old_dentry; /* use old_dentry */ |
|---|
| 1418 | + /* swap r_dentry and r_old_dentry in case that |
|---|
| 1419 | + * splice_dentry() gets called later. This is safe |
|---|
| 1420 | + * because no other place will use them */ |
|---|
| 1421 | + req->r_dentry = req->r_old_dentry; |
|---|
| 1422 | + req->r_old_dentry = dn; |
|---|
| 1423 | + dn = req->r_dentry; |
|---|
| 1357 | 1424 | } |
|---|
| 1358 | 1425 | |
|---|
| 1359 | 1426 | /* null dentry? */ |
|---|
| .. | .. |
|---|
| 1366 | 1433 | } else if (have_lease) { |
|---|
| 1367 | 1434 | if (d_unhashed(dn)) |
|---|
| 1368 | 1435 | d_add(dn, NULL); |
|---|
| 1369 | | - update_dentry_lease(dn, rinfo->dlease, |
|---|
| 1370 | | - session, |
|---|
| 1371 | | - req->r_request_started, |
|---|
| 1372 | | - NULL, &dvino); |
|---|
| 1436 | + update_dentry_lease(dir, dn, |
|---|
| 1437 | + rinfo->dlease, session, |
|---|
| 1438 | + req->r_request_started); |
|---|
| 1373 | 1439 | } |
|---|
| 1374 | 1440 | goto done; |
|---|
| 1375 | 1441 | } |
|---|
| .. | .. |
|---|
| 1378 | 1444 | if (d_really_is_negative(dn)) { |
|---|
| 1379 | 1445 | ceph_dir_clear_ordered(dir); |
|---|
| 1380 | 1446 | ihold(in); |
|---|
| 1381 | | - dn = splice_dentry(dn, in); |
|---|
| 1382 | | - if (IS_ERR(dn)) { |
|---|
| 1383 | | - err = PTR_ERR(dn); |
|---|
| 1447 | + err = splice_dentry(&req->r_dentry, in); |
|---|
| 1448 | + if (err < 0) |
|---|
| 1384 | 1449 | goto done; |
|---|
| 1385 | | - } |
|---|
| 1386 | | - req->r_dentry = dn; /* may have spliced */ |
|---|
| 1450 | + dn = req->r_dentry; /* may have spliced */ |
|---|
| 1387 | 1451 | } else if (d_really_is_positive(dn) && d_inode(dn) != in) { |
|---|
| 1388 | 1452 | dout(" %p links to %p %llx.%llx, not %llx.%llx\n", |
|---|
| 1389 | 1453 | dn, d_inode(dn), ceph_vinop(d_inode(dn)), |
|---|
| .. | .. |
|---|
| 1393 | 1457 | } |
|---|
| 1394 | 1458 | |
|---|
| 1395 | 1459 | if (have_lease) { |
|---|
| 1396 | | - tvino.ino = le64_to_cpu(rinfo->targeti.in->ino); |
|---|
| 1397 | | - tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid); |
|---|
| 1398 | | - update_dentry_lease(dn, rinfo->dlease, session, |
|---|
| 1399 | | - req->r_request_started, |
|---|
| 1400 | | - &tvino, &dvino); |
|---|
| 1460 | + update_dentry_lease(dir, dn, |
|---|
| 1461 | + rinfo->dlease, session, |
|---|
| 1462 | + req->r_request_started); |
|---|
| 1401 | 1463 | } |
|---|
| 1402 | 1464 | dout(" final dn %p\n", dn); |
|---|
| 1403 | 1465 | } else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP || |
|---|
| 1404 | 1466 | req->r_op == CEPH_MDS_OP_MKSNAP) && |
|---|
| 1405 | 1467 | test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) && |
|---|
| 1406 | 1468 | !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) { |
|---|
| 1407 | | - struct dentry *dn = req->r_dentry; |
|---|
| 1408 | 1469 | struct inode *dir = req->r_parent; |
|---|
| 1409 | 1470 | |
|---|
| 1410 | 1471 | /* fill out a snapdir LOOKUPSNAP dentry */ |
|---|
| 1411 | | - BUG_ON(!dn); |
|---|
| 1412 | 1472 | BUG_ON(!dir); |
|---|
| 1413 | 1473 | BUG_ON(ceph_snap(dir) != CEPH_SNAPDIR); |
|---|
| 1414 | | - dout(" linking snapped dir %p to dn %p\n", in, dn); |
|---|
| 1474 | + BUG_ON(!req->r_dentry); |
|---|
| 1475 | + dout(" linking snapped dir %p to dn %p\n", in, req->r_dentry); |
|---|
| 1415 | 1476 | ceph_dir_clear_ordered(dir); |
|---|
| 1416 | 1477 | ihold(in); |
|---|
| 1417 | | - dn = splice_dentry(dn, in); |
|---|
| 1418 | | - if (IS_ERR(dn)) { |
|---|
| 1419 | | - err = PTR_ERR(dn); |
|---|
| 1478 | + err = splice_dentry(&req->r_dentry, in); |
|---|
| 1479 | + if (err < 0) |
|---|
| 1420 | 1480 | goto done; |
|---|
| 1421 | | - } |
|---|
| 1422 | | - req->r_dentry = dn; /* may have spliced */ |
|---|
| 1423 | | - } else if (rinfo->head->is_dentry) { |
|---|
| 1481 | + } else if (rinfo->head->is_dentry && req->r_dentry) { |
|---|
| 1482 | + /* parent inode is not locked, be carefull */ |
|---|
| 1424 | 1483 | struct ceph_vino *ptvino = NULL; |
|---|
| 1425 | | - |
|---|
| 1426 | | - if ((le32_to_cpu(rinfo->diri.in->cap.caps) & CEPH_CAP_FILE_SHARED) || |
|---|
| 1427 | | - le32_to_cpu(rinfo->dlease->duration_ms)) { |
|---|
| 1428 | | - dvino.ino = le64_to_cpu(rinfo->diri.in->ino); |
|---|
| 1429 | | - dvino.snap = le64_to_cpu(rinfo->diri.in->snapid); |
|---|
| 1430 | | - |
|---|
| 1431 | | - if (rinfo->head->is_target) { |
|---|
| 1432 | | - tvino.ino = le64_to_cpu(rinfo->targeti.in->ino); |
|---|
| 1433 | | - tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid); |
|---|
| 1434 | | - ptvino = &tvino; |
|---|
| 1435 | | - } |
|---|
| 1436 | | - |
|---|
| 1437 | | - update_dentry_lease(req->r_dentry, rinfo->dlease, |
|---|
| 1438 | | - session, req->r_request_started, ptvino, |
|---|
| 1439 | | - &dvino); |
|---|
| 1440 | | - } else { |
|---|
| 1441 | | - dout("%s: no dentry lease or dir cap\n", __func__); |
|---|
| 1484 | + dvino.ino = le64_to_cpu(rinfo->diri.in->ino); |
|---|
| 1485 | + dvino.snap = le64_to_cpu(rinfo->diri.in->snapid); |
|---|
| 1486 | + if (rinfo->head->is_target) { |
|---|
| 1487 | + tvino.ino = le64_to_cpu(rinfo->targeti.in->ino); |
|---|
| 1488 | + tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid); |
|---|
| 1489 | + ptvino = &tvino; |
|---|
| 1442 | 1490 | } |
|---|
| 1491 | + update_dentry_lease_careful(req->r_dentry, rinfo->dlease, |
|---|
| 1492 | + session, req->r_request_started, |
|---|
| 1493 | + rinfo->dname, rinfo->dname_len, |
|---|
| 1494 | + &dvino, ptvino); |
|---|
| 1443 | 1495 | } |
|---|
| 1444 | 1496 | done: |
|---|
| 1445 | 1497 | dout("fill_trace done err=%d\n", err); |
|---|
| .. | .. |
|---|
| 1470 | 1522 | dout("new_inode badness got %d\n", err); |
|---|
| 1471 | 1523 | continue; |
|---|
| 1472 | 1524 | } |
|---|
| 1473 | | - rc = fill_inode(in, NULL, &rde->inode, NULL, session, |
|---|
| 1474 | | - req->r_request_started, -1, |
|---|
| 1475 | | - &req->r_caps_reservation); |
|---|
| 1525 | + rc = ceph_fill_inode(in, NULL, &rde->inode, NULL, session, |
|---|
| 1526 | + -1, &req->r_caps_reservation); |
|---|
| 1476 | 1527 | if (rc < 0) { |
|---|
| 1477 | | - pr_err("fill_inode badness on %p got %d\n", in, rc); |
|---|
| 1528 | + pr_err("ceph_fill_inode badness on %p got %d\n", |
|---|
| 1529 | + in, rc); |
|---|
| 1478 | 1530 | err = rc; |
|---|
| 1531 | + if (in->i_state & I_NEW) { |
|---|
| 1532 | + ihold(in); |
|---|
| 1533 | + discard_new_inode(in); |
|---|
| 1534 | + } |
|---|
| 1535 | + } else if (in->i_state & I_NEW) { |
|---|
| 1536 | + unlock_new_inode(in); |
|---|
| 1479 | 1537 | } |
|---|
| 1480 | | - iput(in); |
|---|
| 1538 | + |
|---|
| 1539 | + /* avoid calling iput_final() in mds dispatch threads */ |
|---|
| 1540 | + ceph_async_iput(in); |
|---|
| 1481 | 1541 | } |
|---|
| 1482 | 1542 | |
|---|
| 1483 | 1543 | return err; |
|---|
| .. | .. |
|---|
| 1600 | 1660 | /* FIXME: release caps/leases if error occurs */ |
|---|
| 1601 | 1661 | for (i = 0; i < rinfo->dir_nr; i++) { |
|---|
| 1602 | 1662 | struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i; |
|---|
| 1603 | | - struct ceph_vino tvino, dvino; |
|---|
| 1663 | + struct ceph_vino tvino; |
|---|
| 1604 | 1664 | |
|---|
| 1605 | 1665 | dname.name = rde->name; |
|---|
| 1606 | 1666 | dname.len = rde->name_len; |
|---|
| .. | .. |
|---|
| 1670 | 1730 | } |
|---|
| 1671 | 1731 | } |
|---|
| 1672 | 1732 | |
|---|
| 1673 | | - ret = fill_inode(in, NULL, &rde->inode, NULL, session, |
|---|
| 1674 | | - req->r_request_started, -1, |
|---|
| 1675 | | - &req->r_caps_reservation); |
|---|
| 1733 | + ret = ceph_fill_inode(in, NULL, &rde->inode, NULL, session, |
|---|
| 1734 | + -1, &req->r_caps_reservation); |
|---|
| 1676 | 1735 | if (ret < 0) { |
|---|
| 1677 | | - pr_err("fill_inode badness on %p\n", in); |
|---|
| 1678 | | - if (d_really_is_negative(dn)) |
|---|
| 1679 | | - iput(in); |
|---|
| 1736 | + pr_err("ceph_fill_inode badness on %p\n", in); |
|---|
| 1737 | + if (d_really_is_negative(dn)) { |
|---|
| 1738 | + /* avoid calling iput_final() in mds |
|---|
| 1739 | + * dispatch threads */ |
|---|
| 1740 | + if (in->i_state & I_NEW) { |
|---|
| 1741 | + ihold(in); |
|---|
| 1742 | + discard_new_inode(in); |
|---|
| 1743 | + } |
|---|
| 1744 | + ceph_async_iput(in); |
|---|
| 1745 | + } |
|---|
| 1680 | 1746 | d_drop(dn); |
|---|
| 1681 | 1747 | err = ret; |
|---|
| 1682 | 1748 | goto next_item; |
|---|
| 1683 | 1749 | } |
|---|
| 1750 | + if (in->i_state & I_NEW) |
|---|
| 1751 | + unlock_new_inode(in); |
|---|
| 1684 | 1752 | |
|---|
| 1685 | 1753 | if (d_really_is_negative(dn)) { |
|---|
| 1686 | | - struct dentry *realdn; |
|---|
| 1687 | | - |
|---|
| 1688 | 1754 | if (ceph_security_xattr_deadlock(in)) { |
|---|
| 1689 | 1755 | dout(" skip splicing dn %p to inode %p" |
|---|
| 1690 | 1756 | " (security xattr deadlock)\n", dn, in); |
|---|
| 1691 | | - iput(in); |
|---|
| 1757 | + ceph_async_iput(in); |
|---|
| 1692 | 1758 | skipped++; |
|---|
| 1693 | 1759 | goto next_item; |
|---|
| 1694 | 1760 | } |
|---|
| 1695 | 1761 | |
|---|
| 1696 | | - realdn = splice_dentry(dn, in); |
|---|
| 1697 | | - if (IS_ERR(realdn)) { |
|---|
| 1698 | | - err = PTR_ERR(realdn); |
|---|
| 1699 | | - d_drop(dn); |
|---|
| 1762 | + err = splice_dentry(&dn, in); |
|---|
| 1763 | + if (err < 0) |
|---|
| 1700 | 1764 | goto next_item; |
|---|
| 1701 | | - } |
|---|
| 1702 | | - dn = realdn; |
|---|
| 1703 | 1765 | } |
|---|
| 1704 | 1766 | |
|---|
| 1705 | 1767 | ceph_dentry(dn)->offset = rde->offset; |
|---|
| 1706 | 1768 | |
|---|
| 1707 | | - dvino = ceph_vino(d_inode(parent)); |
|---|
| 1708 | | - update_dentry_lease(dn, rde->lease, req->r_session, |
|---|
| 1709 | | - req->r_request_started, &tvino, &dvino); |
|---|
| 1769 | + update_dentry_lease(d_inode(parent), dn, |
|---|
| 1770 | + rde->lease, req->r_session, |
|---|
| 1771 | + req->r_request_started); |
|---|
| 1710 | 1772 | |
|---|
| 1711 | 1773 | if (err == 0 && skipped == 0 && cache_ctl.index >= 0) { |
|---|
| 1712 | 1774 | ret = fill_readdir_cache(d_inode(parent), dn, |
|---|
| .. | .. |
|---|
| 1715 | 1777 | err = ret; |
|---|
| 1716 | 1778 | } |
|---|
| 1717 | 1779 | next_item: |
|---|
| 1718 | | - if (dn) |
|---|
| 1719 | | - dput(dn); |
|---|
| 1780 | + dput(dn); |
|---|
| 1720 | 1781 | } |
|---|
| 1721 | 1782 | out: |
|---|
| 1722 | 1783 | if (err == 0 && skipped == 0) { |
|---|
| .. | .. |
|---|
| 1745 | 1806 | } |
|---|
| 1746 | 1807 | |
|---|
| 1747 | 1808 | /* |
|---|
| 1809 | + * Put reference to inode, but avoid calling iput_final() in current thread. |
|---|
| 1810 | + * iput_final() may wait for reahahead pages. The wait can cause deadlock in |
|---|
| 1811 | + * some contexts. |
|---|
| 1812 | + */ |
|---|
| 1813 | +void ceph_async_iput(struct inode *inode) |
|---|
| 1814 | +{ |
|---|
| 1815 | + if (!inode) |
|---|
| 1816 | + return; |
|---|
| 1817 | + for (;;) { |
|---|
| 1818 | + if (atomic_add_unless(&inode->i_count, -1, 1)) |
|---|
| 1819 | + break; |
|---|
| 1820 | + if (queue_work(ceph_inode_to_client(inode)->inode_wq, |
|---|
| 1821 | + &ceph_inode(inode)->i_work)) |
|---|
| 1822 | + break; |
|---|
| 1823 | + /* queue work failed, i_count must be at least 2 */ |
|---|
| 1824 | + } |
|---|
| 1825 | +} |
|---|
| 1826 | + |
|---|
| 1827 | +/* |
|---|
| 1748 | 1828 | * Write back inode data in a worker thread. (This can't be done |
|---|
| 1749 | 1829 | * in the message handler context.) |
|---|
| 1750 | 1830 | */ |
|---|
| 1751 | 1831 | void ceph_queue_writeback(struct inode *inode) |
|---|
| 1752 | 1832 | { |
|---|
| 1833 | + struct ceph_inode_info *ci = ceph_inode(inode); |
|---|
| 1834 | + set_bit(CEPH_I_WORK_WRITEBACK, &ci->i_work_mask); |
|---|
| 1835 | + |
|---|
| 1753 | 1836 | ihold(inode); |
|---|
| 1754 | | - if (queue_work(ceph_inode_to_client(inode)->wb_wq, |
|---|
| 1755 | | - &ceph_inode(inode)->i_wb_work)) { |
|---|
| 1837 | + if (queue_work(ceph_inode_to_client(inode)->inode_wq, |
|---|
| 1838 | + &ci->i_work)) { |
|---|
| 1756 | 1839 | dout("ceph_queue_writeback %p\n", inode); |
|---|
| 1757 | 1840 | } else { |
|---|
| 1758 | | - dout("ceph_queue_writeback %p failed\n", inode); |
|---|
| 1841 | + dout("ceph_queue_writeback %p already queued, mask=%lx\n", |
|---|
| 1842 | + inode, ci->i_work_mask); |
|---|
| 1759 | 1843 | iput(inode); |
|---|
| 1760 | 1844 | } |
|---|
| 1761 | | -} |
|---|
| 1762 | | - |
|---|
| 1763 | | -static void ceph_writeback_work(struct work_struct *work) |
|---|
| 1764 | | -{ |
|---|
| 1765 | | - struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info, |
|---|
| 1766 | | - i_wb_work); |
|---|
| 1767 | | - struct inode *inode = &ci->vfs_inode; |
|---|
| 1768 | | - |
|---|
| 1769 | | - dout("writeback %p\n", inode); |
|---|
| 1770 | | - filemap_fdatawrite(&inode->i_data); |
|---|
| 1771 | | - iput(inode); |
|---|
| 1772 | 1845 | } |
|---|
| 1773 | 1846 | |
|---|
| 1774 | 1847 | /* |
|---|
| .. | .. |
|---|
| 1776 | 1849 | */ |
|---|
| 1777 | 1850 | void ceph_queue_invalidate(struct inode *inode) |
|---|
| 1778 | 1851 | { |
|---|
| 1852 | + struct ceph_inode_info *ci = ceph_inode(inode); |
|---|
| 1853 | + set_bit(CEPH_I_WORK_INVALIDATE_PAGES, &ci->i_work_mask); |
|---|
| 1854 | + |
|---|
| 1779 | 1855 | ihold(inode); |
|---|
| 1780 | | - if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq, |
|---|
| 1781 | | - &ceph_inode(inode)->i_pg_inv_work)) { |
|---|
| 1856 | + if (queue_work(ceph_inode_to_client(inode)->inode_wq, |
|---|
| 1857 | + &ceph_inode(inode)->i_work)) { |
|---|
| 1782 | 1858 | dout("ceph_queue_invalidate %p\n", inode); |
|---|
| 1783 | 1859 | } else { |
|---|
| 1784 | | - dout("ceph_queue_invalidate %p failed\n", inode); |
|---|
| 1860 | + dout("ceph_queue_invalidate %p already queued, mask=%lx\n", |
|---|
| 1861 | + inode, ci->i_work_mask); |
|---|
| 1785 | 1862 | iput(inode); |
|---|
| 1786 | 1863 | } |
|---|
| 1787 | 1864 | } |
|---|
| 1788 | 1865 | |
|---|
| 1789 | 1866 | /* |
|---|
| 1790 | | - * Invalidate inode pages in a worker thread. (This can't be done |
|---|
| 1791 | | - * in the message handler context.) |
|---|
| 1867 | + * Queue an async vmtruncate. If we fail to queue work, we will handle |
|---|
| 1868 | + * the truncation the next time we call __ceph_do_pending_vmtruncate. |
|---|
| 1792 | 1869 | */ |
|---|
| 1793 | | -static void ceph_invalidate_work(struct work_struct *work) |
|---|
| 1870 | +void ceph_queue_vmtruncate(struct inode *inode) |
|---|
| 1794 | 1871 | { |
|---|
| 1795 | | - struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info, |
|---|
| 1796 | | - i_pg_inv_work); |
|---|
| 1797 | | - struct inode *inode = &ci->vfs_inode; |
|---|
| 1872 | + struct ceph_inode_info *ci = ceph_inode(inode); |
|---|
| 1873 | + set_bit(CEPH_I_WORK_VMTRUNCATE, &ci->i_work_mask); |
|---|
| 1874 | + |
|---|
| 1875 | + ihold(inode); |
|---|
| 1876 | + if (queue_work(ceph_inode_to_client(inode)->inode_wq, |
|---|
| 1877 | + &ci->i_work)) { |
|---|
| 1878 | + dout("ceph_queue_vmtruncate %p\n", inode); |
|---|
| 1879 | + } else { |
|---|
| 1880 | + dout("ceph_queue_vmtruncate %p already queued, mask=%lx\n", |
|---|
| 1881 | + inode, ci->i_work_mask); |
|---|
| 1882 | + iput(inode); |
|---|
| 1883 | + } |
|---|
| 1884 | +} |
|---|
| 1885 | + |
|---|
| 1886 | +static void ceph_do_invalidate_pages(struct inode *inode) |
|---|
| 1887 | +{ |
|---|
| 1888 | + struct ceph_inode_info *ci = ceph_inode(inode); |
|---|
| 1798 | 1889 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
|---|
| 1799 | 1890 | u32 orig_gen; |
|---|
| 1800 | 1891 | int check = 0; |
|---|
| .. | .. |
|---|
| 1847 | 1938 | out: |
|---|
| 1848 | 1939 | if (check) |
|---|
| 1849 | 1940 | ceph_check_caps(ci, 0, NULL); |
|---|
| 1850 | | - iput(inode); |
|---|
| 1851 | | -} |
|---|
| 1852 | | - |
|---|
| 1853 | | - |
|---|
| 1854 | | -/* |
|---|
| 1855 | | - * called by trunc_wq; |
|---|
| 1856 | | - * |
|---|
| 1857 | | - * We also truncate in a separate thread as well. |
|---|
| 1858 | | - */ |
|---|
| 1859 | | -static void ceph_vmtruncate_work(struct work_struct *work) |
|---|
| 1860 | | -{ |
|---|
| 1861 | | - struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info, |
|---|
| 1862 | | - i_vmtruncate_work); |
|---|
| 1863 | | - struct inode *inode = &ci->vfs_inode; |
|---|
| 1864 | | - |
|---|
| 1865 | | - dout("vmtruncate_work %p\n", inode); |
|---|
| 1866 | | - __ceph_do_pending_vmtruncate(inode); |
|---|
| 1867 | | - iput(inode); |
|---|
| 1868 | | -} |
|---|
| 1869 | | - |
|---|
| 1870 | | -/* |
|---|
| 1871 | | - * Queue an async vmtruncate. If we fail to queue work, we will handle |
|---|
| 1872 | | - * the truncation the next time we call __ceph_do_pending_vmtruncate. |
|---|
| 1873 | | - */ |
|---|
| 1874 | | -void ceph_queue_vmtruncate(struct inode *inode) |
|---|
| 1875 | | -{ |
|---|
| 1876 | | - struct ceph_inode_info *ci = ceph_inode(inode); |
|---|
| 1877 | | - |
|---|
| 1878 | | - ihold(inode); |
|---|
| 1879 | | - |
|---|
| 1880 | | - if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq, |
|---|
| 1881 | | - &ci->i_vmtruncate_work)) { |
|---|
| 1882 | | - dout("ceph_queue_vmtruncate %p\n", inode); |
|---|
| 1883 | | - } else { |
|---|
| 1884 | | - dout("ceph_queue_vmtruncate %p failed, pending=%d\n", |
|---|
| 1885 | | - inode, ci->i_truncate_pending); |
|---|
| 1886 | | - iput(inode); |
|---|
| 1887 | | - } |
|---|
| 1888 | 1941 | } |
|---|
| 1889 | 1942 | |
|---|
| 1890 | 1943 | /* |
|---|
| .. | .. |
|---|
| 1943 | 1996 | mutex_unlock(&ci->i_truncate_mutex); |
|---|
| 1944 | 1997 | |
|---|
| 1945 | 1998 | if (wrbuffer_refs == 0) |
|---|
| 1946 | | - ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); |
|---|
| 1999 | + ceph_check_caps(ci, 0, NULL); |
|---|
| 1947 | 2000 | |
|---|
| 1948 | 2001 | wake_up_all(&ci->i_cap_wq); |
|---|
| 2002 | +} |
|---|
| 2003 | + |
|---|
| 2004 | +static void ceph_inode_work(struct work_struct *work) |
|---|
| 2005 | +{ |
|---|
| 2006 | + struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info, |
|---|
| 2007 | + i_work); |
|---|
| 2008 | + struct inode *inode = &ci->vfs_inode; |
|---|
| 2009 | + |
|---|
| 2010 | + if (test_and_clear_bit(CEPH_I_WORK_WRITEBACK, &ci->i_work_mask)) { |
|---|
| 2011 | + dout("writeback %p\n", inode); |
|---|
| 2012 | + filemap_fdatawrite(&inode->i_data); |
|---|
| 2013 | + } |
|---|
| 2014 | + if (test_and_clear_bit(CEPH_I_WORK_INVALIDATE_PAGES, &ci->i_work_mask)) |
|---|
| 2015 | + ceph_do_invalidate_pages(inode); |
|---|
| 2016 | + |
|---|
| 2017 | + if (test_and_clear_bit(CEPH_I_WORK_VMTRUNCATE, &ci->i_work_mask)) |
|---|
| 2018 | + __ceph_do_pending_vmtruncate(inode); |
|---|
| 2019 | + |
|---|
| 2020 | + iput(inode); |
|---|
| 1949 | 2021 | } |
|---|
| 1950 | 2022 | |
|---|
| 1951 | 2023 | /* |
|---|
| .. | .. |
|---|
| 1961 | 2033 | int __ceph_setattr(struct inode *inode, struct iattr *attr) |
|---|
| 1962 | 2034 | { |
|---|
| 1963 | 2035 | struct ceph_inode_info *ci = ceph_inode(inode); |
|---|
| 1964 | | - const unsigned int ia_valid = attr->ia_valid; |
|---|
| 2036 | + unsigned int ia_valid = attr->ia_valid; |
|---|
| 1965 | 2037 | struct ceph_mds_request *req; |
|---|
| 1966 | 2038 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; |
|---|
| 1967 | 2039 | struct ceph_cap_flush *prealloc_cf; |
|---|
| .. | .. |
|---|
| 2066 | 2138 | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR; |
|---|
| 2067 | 2139 | } |
|---|
| 2068 | 2140 | } |
|---|
| 2141 | + if (ia_valid & ATTR_SIZE) { |
|---|
| 2142 | + dout("setattr %p size %lld -> %lld\n", inode, |
|---|
| 2143 | + inode->i_size, attr->ia_size); |
|---|
| 2144 | + if ((issued & CEPH_CAP_FILE_EXCL) && |
|---|
| 2145 | + attr->ia_size > inode->i_size) { |
|---|
| 2146 | + i_size_write(inode, attr->ia_size); |
|---|
| 2147 | + inode->i_blocks = calc_inode_blocks(attr->ia_size); |
|---|
| 2148 | + ci->i_reported_size = attr->ia_size; |
|---|
| 2149 | + dirtied |= CEPH_CAP_FILE_EXCL; |
|---|
| 2150 | + ia_valid |= ATTR_MTIME; |
|---|
| 2151 | + } else if ((issued & CEPH_CAP_FILE_SHARED) == 0 || |
|---|
| 2152 | + attr->ia_size != inode->i_size) { |
|---|
| 2153 | + req->r_args.setattr.size = cpu_to_le64(attr->ia_size); |
|---|
| 2154 | + req->r_args.setattr.old_size = |
|---|
| 2155 | + cpu_to_le64(inode->i_size); |
|---|
| 2156 | + mask |= CEPH_SETATTR_SIZE; |
|---|
| 2157 | + release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL | |
|---|
| 2158 | + CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR; |
|---|
| 2159 | + } |
|---|
| 2160 | + } |
|---|
| 2069 | 2161 | if (ia_valid & ATTR_MTIME) { |
|---|
| 2070 | 2162 | dout("setattr %p mtime %lld.%ld -> %lld.%ld\n", inode, |
|---|
| 2071 | 2163 | inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec, |
|---|
| .. | .. |
|---|
| 2085 | 2177 | &attr->ia_mtime); |
|---|
| 2086 | 2178 | mask |= CEPH_SETATTR_MTIME; |
|---|
| 2087 | 2179 | release |= CEPH_CAP_FILE_SHARED | |
|---|
| 2088 | | - CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR; |
|---|
| 2089 | | - } |
|---|
| 2090 | | - } |
|---|
| 2091 | | - if (ia_valid & ATTR_SIZE) { |
|---|
| 2092 | | - dout("setattr %p size %lld -> %lld\n", inode, |
|---|
| 2093 | | - inode->i_size, attr->ia_size); |
|---|
| 2094 | | - if ((issued & CEPH_CAP_FILE_EXCL) && |
|---|
| 2095 | | - attr->ia_size > inode->i_size) { |
|---|
| 2096 | | - i_size_write(inode, attr->ia_size); |
|---|
| 2097 | | - inode->i_blocks = calc_inode_blocks(attr->ia_size); |
|---|
| 2098 | | - ci->i_reported_size = attr->ia_size; |
|---|
| 2099 | | - dirtied |= CEPH_CAP_FILE_EXCL; |
|---|
| 2100 | | - } else if ((issued & CEPH_CAP_FILE_SHARED) == 0 || |
|---|
| 2101 | | - attr->ia_size != inode->i_size) { |
|---|
| 2102 | | - req->r_args.setattr.size = cpu_to_le64(attr->ia_size); |
|---|
| 2103 | | - req->r_args.setattr.old_size = |
|---|
| 2104 | | - cpu_to_le64(inode->i_size); |
|---|
| 2105 | | - mask |= CEPH_SETATTR_SIZE; |
|---|
| 2106 | | - release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL | |
|---|
| 2107 | 2180 | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR; |
|---|
| 2108 | 2181 | } |
|---|
| 2109 | 2182 | } |
|---|
| .. | .. |
|---|
| 2223 | 2296 | |
|---|
| 2224 | 2297 | dout("do_getattr inode %p mask %s mode 0%o\n", |
|---|
| 2225 | 2298 | inode, ceph_cap_string(mask), inode->i_mode); |
|---|
| 2226 | | - if (!force && ceph_caps_issued_mask(ceph_inode(inode), mask, 1)) |
|---|
| 2227 | | - return 0; |
|---|
| 2299 | + if (!force && ceph_caps_issued_mask_metric(ceph_inode(inode), mask, 1)) |
|---|
| 2300 | + return 0; |
|---|
| 2228 | 2301 | |
|---|
| 2229 | 2302 | mode = (mask & CEPH_STAT_RSTAT) ? USE_AUTH_MDS : USE_ANY_MDS; |
|---|
| 2230 | 2303 | req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, mode); |
|---|
| .. | .. |
|---|
| 2271 | 2344 | return err; |
|---|
| 2272 | 2345 | } |
|---|
| 2273 | 2346 | |
|---|
| 2347 | +/* Craft a mask of needed caps given a set of requested statx attrs. */ |
|---|
| 2348 | +static int statx_to_caps(u32 want) |
|---|
| 2349 | +{ |
|---|
| 2350 | + int mask = 0; |
|---|
| 2351 | + |
|---|
| 2352 | + if (want & (STATX_MODE|STATX_UID|STATX_GID|STATX_CTIME|STATX_BTIME)) |
|---|
| 2353 | + mask |= CEPH_CAP_AUTH_SHARED; |
|---|
| 2354 | + |
|---|
| 2355 | + if (want & (STATX_NLINK|STATX_CTIME)) |
|---|
| 2356 | + mask |= CEPH_CAP_LINK_SHARED; |
|---|
| 2357 | + |
|---|
| 2358 | + if (want & (STATX_ATIME|STATX_MTIME|STATX_CTIME|STATX_SIZE| |
|---|
| 2359 | + STATX_BLOCKS)) |
|---|
| 2360 | + mask |= CEPH_CAP_FILE_SHARED; |
|---|
| 2361 | + |
|---|
| 2362 | + if (want & (STATX_CTIME)) |
|---|
| 2363 | + mask |= CEPH_CAP_XATTR_SHARED; |
|---|
| 2364 | + |
|---|
| 2365 | + return mask; |
|---|
| 2366 | +} |
|---|
| 2367 | + |
|---|
| 2274 | 2368 | /* |
|---|
| 2275 | | - * Get all attributes. Hopefully somedata we'll have a statlite() |
|---|
| 2276 | | - * and can limit the fields we require to be accurate. |
|---|
| 2369 | + * Get all the attributes. If we have sufficient caps for the requested attrs, |
|---|
| 2370 | + * then we can avoid talking to the MDS at all. |
|---|
| 2277 | 2371 | */ |
|---|
| 2278 | 2372 | int ceph_getattr(const struct path *path, struct kstat *stat, |
|---|
| 2279 | 2373 | u32 request_mask, unsigned int flags) |
|---|
| 2280 | 2374 | { |
|---|
| 2281 | 2375 | struct inode *inode = d_inode(path->dentry); |
|---|
| 2282 | 2376 | struct ceph_inode_info *ci = ceph_inode(inode); |
|---|
| 2283 | | - int err; |
|---|
| 2377 | + u32 valid_mask = STATX_BASIC_STATS; |
|---|
| 2378 | + int err = 0; |
|---|
| 2284 | 2379 | |
|---|
| 2285 | | - err = ceph_do_getattr(inode, CEPH_STAT_CAP_INODE_ALL, false); |
|---|
| 2286 | | - if (!err) { |
|---|
| 2287 | | - generic_fillattr(inode, stat); |
|---|
| 2288 | | - stat->ino = ceph_translate_ino(inode->i_sb, inode->i_ino); |
|---|
| 2289 | | - if (ceph_snap(inode) != CEPH_NOSNAP) |
|---|
| 2290 | | - stat->dev = ceph_snap(inode); |
|---|
| 2291 | | - else |
|---|
| 2292 | | - stat->dev = 0; |
|---|
| 2293 | | - if (S_ISDIR(inode->i_mode)) { |
|---|
| 2294 | | - if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), |
|---|
| 2295 | | - RBYTES)) |
|---|
| 2296 | | - stat->size = ci->i_rbytes; |
|---|
| 2297 | | - else |
|---|
| 2298 | | - stat->size = ci->i_files + ci->i_subdirs; |
|---|
| 2299 | | - stat->blocks = 0; |
|---|
| 2300 | | - stat->blksize = 65536; |
|---|
| 2301 | | - /* |
|---|
| 2302 | | - * Some applications rely on the number of st_nlink |
|---|
| 2303 | | - * value on directories to be either 0 (if unlinked) |
|---|
| 2304 | | - * or 2 + number of subdirectories. |
|---|
| 2305 | | - */ |
|---|
| 2306 | | - if (stat->nlink == 1) |
|---|
| 2307 | | - /* '.' + '..' + subdirs */ |
|---|
| 2308 | | - stat->nlink = 1 + 1 + ci->i_subdirs; |
|---|
| 2309 | | - } |
|---|
| 2380 | + /* Skip the getattr altogether if we're asked not to sync */ |
|---|
| 2381 | + if (!(flags & AT_STATX_DONT_SYNC)) { |
|---|
| 2382 | + err = ceph_do_getattr(inode, statx_to_caps(request_mask), |
|---|
| 2383 | + flags & AT_STATX_FORCE_SYNC); |
|---|
| 2384 | + if (err) |
|---|
| 2385 | + return err; |
|---|
| 2310 | 2386 | } |
|---|
| 2387 | + |
|---|
| 2388 | + generic_fillattr(inode, stat); |
|---|
| 2389 | + stat->ino = ceph_present_inode(inode); |
|---|
| 2390 | + |
|---|
| 2391 | + /* |
|---|
| 2392 | + * btime on newly-allocated inodes is 0, so if this is still set to |
|---|
| 2393 | + * that, then assume that it's not valid. |
|---|
| 2394 | + */ |
|---|
| 2395 | + if (ci->i_btime.tv_sec || ci->i_btime.tv_nsec) { |
|---|
| 2396 | + stat->btime = ci->i_btime; |
|---|
| 2397 | + valid_mask |= STATX_BTIME; |
|---|
| 2398 | + } |
|---|
| 2399 | + |
|---|
| 2400 | + if (ceph_snap(inode) == CEPH_NOSNAP) |
|---|
| 2401 | + stat->dev = inode->i_sb->s_dev; |
|---|
| 2402 | + else |
|---|
| 2403 | + stat->dev = ci->i_snapid_map ? ci->i_snapid_map->dev : 0; |
|---|
| 2404 | + |
|---|
| 2405 | + if (S_ISDIR(inode->i_mode)) { |
|---|
| 2406 | + if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), |
|---|
| 2407 | + RBYTES)) |
|---|
| 2408 | + stat->size = ci->i_rbytes; |
|---|
| 2409 | + else |
|---|
| 2410 | + stat->size = ci->i_files + ci->i_subdirs; |
|---|
| 2411 | + stat->blocks = 0; |
|---|
| 2412 | + stat->blksize = 65536; |
|---|
| 2413 | + /* |
|---|
| 2414 | + * Some applications rely on the number of st_nlink |
|---|
| 2415 | + * value on directories to be either 0 (if unlinked) |
|---|
| 2416 | + * or 2 + number of subdirectories. |
|---|
| 2417 | + */ |
|---|
| 2418 | + if (stat->nlink == 1) |
|---|
| 2419 | + /* '.' + '..' + subdirs */ |
|---|
| 2420 | + stat->nlink = 1 + 1 + ci->i_subdirs; |
|---|
| 2421 | + } |
|---|
| 2422 | + |
|---|
| 2423 | + stat->result_mask = request_mask & valid_mask; |
|---|
| 2311 | 2424 | return err; |
|---|
| 2312 | 2425 | } |
|---|