| .. | .. |
|---|
| 3 | 3 | |
|---|
| 4 | 4 | #include <linux/sort.h> |
|---|
| 5 | 5 | #include <linux/slab.h> |
|---|
| 6 | | - |
|---|
| 6 | +#include <linux/iversion.h> |
|---|
| 7 | 7 | #include "super.h" |
|---|
| 8 | 8 | #include "mds_client.h" |
|---|
| 9 | | - |
|---|
| 10 | 9 | #include <linux/ceph/decode.h> |
|---|
| 10 | + |
|---|
| 11 | +/* unused map expires after 5 minutes */ |
|---|
| 12 | +#define CEPH_SNAPID_MAP_TIMEOUT (5 * 60 * HZ) |
|---|
| 11 | 13 | |
|---|
| 12 | 14 | /* |
|---|
| 13 | 15 | * Snapshots in ceph are driven in large part by cooperation from the |
|---|
| .. | .. |
|---|
| 58 | 60 | /* |
|---|
| 59 | 61 | * increase ref count for the realm |
|---|
| 60 | 62 | * |
|---|
| 61 | | - * caller must hold snap_rwsem for write. |
|---|
| 63 | + * caller must hold snap_rwsem. |
|---|
| 62 | 64 | */ |
|---|
| 63 | 65 | void ceph_get_snap_realm(struct ceph_mds_client *mdsc, |
|---|
| 64 | 66 | struct ceph_snap_realm *realm) |
|---|
| 65 | 67 | { |
|---|
| 66 | | - dout("get_realm %p %d -> %d\n", realm, |
|---|
| 67 | | - atomic_read(&realm->nref), atomic_read(&realm->nref)+1); |
|---|
| 68 | + lockdep_assert_held(&mdsc->snap_rwsem); |
|---|
| 69 | + |
|---|
| 68 | 70 | /* |
|---|
| 69 | | - * since we _only_ increment realm refs or empty the empty |
|---|
| 70 | | - * list with snap_rwsem held, adjusting the empty list here is |
|---|
| 71 | | - * safe. we do need to protect against concurrent empty list |
|---|
| 72 | | - * additions, however. |
|---|
| 71 | + * The 0->1 and 1->0 transitions must take the snap_empty_lock |
|---|
| 72 | + * atomically with the refcount change. Go ahead and bump the |
|---|
| 73 | + * nref here, unless it's 0, in which case we take the spinlock |
|---|
| 74 | + * and then do the increment and remove it from the list. |
|---|
| 73 | 75 | */ |
|---|
| 74 | | - if (atomic_inc_return(&realm->nref) == 1) { |
|---|
| 75 | | - spin_lock(&mdsc->snap_empty_lock); |
|---|
| 76 | + if (atomic_inc_not_zero(&realm->nref)) |
|---|
| 77 | + return; |
|---|
| 78 | + |
|---|
| 79 | + spin_lock(&mdsc->snap_empty_lock); |
|---|
| 80 | + if (atomic_inc_return(&realm->nref) == 1) |
|---|
| 76 | 81 | list_del_init(&realm->empty_item); |
|---|
| 77 | | - spin_unlock(&mdsc->snap_empty_lock); |
|---|
| 78 | | - } |
|---|
| 82 | + spin_unlock(&mdsc->snap_empty_lock); |
|---|
| 79 | 83 | } |
|---|
| 80 | 84 | |
|---|
| 81 | 85 | static void __insert_snap_realm(struct rb_root *root, |
|---|
| .. | .. |
|---|
| 111 | 115 | { |
|---|
| 112 | 116 | struct ceph_snap_realm *realm; |
|---|
| 113 | 117 | |
|---|
| 118 | + lockdep_assert_held_write(&mdsc->snap_rwsem); |
|---|
| 119 | + |
|---|
| 114 | 120 | realm = kzalloc(sizeof(*realm), GFP_NOFS); |
|---|
| 115 | 121 | if (!realm) |
|---|
| 116 | 122 | return ERR_PTR(-ENOMEM); |
|---|
| .. | .. |
|---|
| 124 | 130 | INIT_LIST_HEAD(&realm->inodes_with_caps); |
|---|
| 125 | 131 | spin_lock_init(&realm->inodes_with_caps_lock); |
|---|
| 126 | 132 | __insert_snap_realm(&mdsc->snap_realms, realm); |
|---|
| 133 | + mdsc->num_snap_realms++; |
|---|
| 134 | + |
|---|
| 127 | 135 | dout("create_snap_realm %llx %p\n", realm->ino, realm); |
|---|
| 128 | 136 | return realm; |
|---|
| 129 | 137 | } |
|---|
| .. | .. |
|---|
| 131 | 139 | /* |
|---|
| 132 | 140 | * lookup the realm rooted at @ino. |
|---|
| 133 | 141 | * |
|---|
| 134 | | - * caller must hold snap_rwsem for write. |
|---|
| 142 | + * caller must hold snap_rwsem. |
|---|
| 135 | 143 | */ |
|---|
| 136 | 144 | static struct ceph_snap_realm *__lookup_snap_realm(struct ceph_mds_client *mdsc, |
|---|
| 137 | 145 | u64 ino) |
|---|
| 138 | 146 | { |
|---|
| 139 | 147 | struct rb_node *n = mdsc->snap_realms.rb_node; |
|---|
| 140 | 148 | struct ceph_snap_realm *r; |
|---|
| 149 | + |
|---|
| 150 | + lockdep_assert_held(&mdsc->snap_rwsem); |
|---|
| 141 | 151 | |
|---|
| 142 | 152 | while (n) { |
|---|
| 143 | 153 | r = rb_entry(n, struct ceph_snap_realm, node); |
|---|
| .. | .. |
|---|
| 172 | 182 | static void __destroy_snap_realm(struct ceph_mds_client *mdsc, |
|---|
| 173 | 183 | struct ceph_snap_realm *realm) |
|---|
| 174 | 184 | { |
|---|
| 185 | + lockdep_assert_held_write(&mdsc->snap_rwsem); |
|---|
| 186 | + |
|---|
| 175 | 187 | dout("__destroy_snap_realm %p %llx\n", realm, realm->ino); |
|---|
| 176 | 188 | |
|---|
| 177 | 189 | rb_erase(&realm->node, &mdsc->snap_realms); |
|---|
| 190 | + mdsc->num_snap_realms--; |
|---|
| 178 | 191 | |
|---|
| 179 | 192 | if (realm->parent) { |
|---|
| 180 | 193 | list_del_init(&realm->child_item); |
|---|
| .. | .. |
|---|
| 193 | 206 | static void __put_snap_realm(struct ceph_mds_client *mdsc, |
|---|
| 194 | 207 | struct ceph_snap_realm *realm) |
|---|
| 195 | 208 | { |
|---|
| 196 | | - dout("__put_snap_realm %llx %p %d -> %d\n", realm->ino, realm, |
|---|
| 197 | | - atomic_read(&realm->nref), atomic_read(&realm->nref)-1); |
|---|
| 209 | + lockdep_assert_held_write(&mdsc->snap_rwsem); |
|---|
| 210 | + |
|---|
| 211 | + /* |
|---|
| 212 | + * We do not require the snap_empty_lock here, as any caller that |
|---|
| 213 | + * increments the value must hold the snap_rwsem. |
|---|
| 214 | + */ |
|---|
| 198 | 215 | if (atomic_dec_and_test(&realm->nref)) |
|---|
| 199 | 216 | __destroy_snap_realm(mdsc, realm); |
|---|
| 200 | 217 | } |
|---|
| 201 | 218 | |
|---|
| 202 | 219 | /* |
|---|
| 203 | | - * caller needn't hold any locks |
|---|
| 220 | + * See comments in ceph_get_snap_realm. Caller needn't hold any locks. |
|---|
| 204 | 221 | */ |
|---|
| 205 | 222 | void ceph_put_snap_realm(struct ceph_mds_client *mdsc, |
|---|
| 206 | 223 | struct ceph_snap_realm *realm) |
|---|
| 207 | 224 | { |
|---|
| 208 | | - dout("put_snap_realm %llx %p %d -> %d\n", realm->ino, realm, |
|---|
| 209 | | - atomic_read(&realm->nref), atomic_read(&realm->nref)-1); |
|---|
| 210 | | - if (!atomic_dec_and_test(&realm->nref)) |
|---|
| 225 | + if (!atomic_dec_and_lock(&realm->nref, &mdsc->snap_empty_lock)) |
|---|
| 211 | 226 | return; |
|---|
| 212 | 227 | |
|---|
| 213 | 228 | if (down_write_trylock(&mdsc->snap_rwsem)) { |
|---|
| 229 | + spin_unlock(&mdsc->snap_empty_lock); |
|---|
| 214 | 230 | __destroy_snap_realm(mdsc, realm); |
|---|
| 215 | 231 | up_write(&mdsc->snap_rwsem); |
|---|
| 216 | 232 | } else { |
|---|
| 217 | | - spin_lock(&mdsc->snap_empty_lock); |
|---|
| 218 | 233 | list_add(&realm->empty_item, &mdsc->snap_empty); |
|---|
| 219 | 234 | spin_unlock(&mdsc->snap_empty_lock); |
|---|
| 220 | 235 | } |
|---|
| .. | .. |
|---|
| 230 | 245 | static void __cleanup_empty_realms(struct ceph_mds_client *mdsc) |
|---|
| 231 | 246 | { |
|---|
| 232 | 247 | struct ceph_snap_realm *realm; |
|---|
| 248 | + |
|---|
| 249 | + lockdep_assert_held_write(&mdsc->snap_rwsem); |
|---|
| 233 | 250 | |
|---|
| 234 | 251 | spin_lock(&mdsc->snap_empty_lock); |
|---|
| 235 | 252 | while (!list_empty(&mdsc->snap_empty)) { |
|---|
| .. | .. |
|---|
| 263 | 280 | u64 parentino) |
|---|
| 264 | 281 | { |
|---|
| 265 | 282 | struct ceph_snap_realm *parent; |
|---|
| 283 | + |
|---|
| 284 | + lockdep_assert_held_write(&mdsc->snap_rwsem); |
|---|
| 266 | 285 | |
|---|
| 267 | 286 | if (realm->parent_ino == parentino) |
|---|
| 268 | 287 | return 0; |
|---|
| .. | .. |
|---|
| 468 | 487 | pr_err("ENOMEM allocating ceph_cap_snap on %p\n", inode); |
|---|
| 469 | 488 | return; |
|---|
| 470 | 489 | } |
|---|
| 490 | + capsnap->cap_flush.is_capsnap = true; |
|---|
| 491 | + INIT_LIST_HEAD(&capsnap->cap_flush.i_list); |
|---|
| 492 | + INIT_LIST_HEAD(&capsnap->cap_flush.g_list); |
|---|
| 471 | 493 | |
|---|
| 472 | 494 | spin_lock(&ci->i_ceph_lock); |
|---|
| 473 | 495 | used = __ceph_caps_used(ci); |
|---|
| .. | .. |
|---|
| 597 | 619 | struct ceph_cap_snap *capsnap) |
|---|
| 598 | 620 | { |
|---|
| 599 | 621 | struct inode *inode = &ci->vfs_inode; |
|---|
| 600 | | - struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; |
|---|
| 622 | + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); |
|---|
| 601 | 623 | |
|---|
| 602 | 624 | BUG_ON(capsnap->writing); |
|---|
| 603 | 625 | capsnap->size = inode->i_size; |
|---|
| 604 | 626 | capsnap->mtime = inode->i_mtime; |
|---|
| 605 | 627 | capsnap->atime = inode->i_atime; |
|---|
| 606 | 628 | capsnap->ctime = inode->i_ctime; |
|---|
| 629 | + capsnap->btime = ci->i_btime; |
|---|
| 630 | + capsnap->change_attr = inode_peek_iversion_raw(inode); |
|---|
| 607 | 631 | capsnap->time_warp_seq = ci->i_time_warp_seq; |
|---|
| 608 | 632 | capsnap->truncate_size = ci->i_truncate_size; |
|---|
| 609 | 633 | capsnap->truncate_seq = ci->i_truncate_seq; |
|---|
| .. | .. |
|---|
| 623 | 647 | capsnap->size); |
|---|
| 624 | 648 | |
|---|
| 625 | 649 | spin_lock(&mdsc->snap_flush_lock); |
|---|
| 626 | | - if (list_empty(&ci->i_snap_flush_item)) |
|---|
| 650 | + if (list_empty(&ci->i_snap_flush_item)) { |
|---|
| 651 | + ihold(inode); |
|---|
| 627 | 652 | list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list); |
|---|
| 653 | + } |
|---|
| 628 | 654 | spin_unlock(&mdsc->snap_flush_lock); |
|---|
| 629 | 655 | return 1; /* caller may want to ceph_flush_snaps */ |
|---|
| 630 | 656 | } |
|---|
| .. | .. |
|---|
| 646 | 672 | if (!inode) |
|---|
| 647 | 673 | continue; |
|---|
| 648 | 674 | spin_unlock(&realm->inodes_with_caps_lock); |
|---|
| 649 | | - iput(lastinode); |
|---|
| 675 | + /* avoid calling iput_final() while holding |
|---|
| 676 | + * mdsc->snap_rwsem or in mds dispatch threads */ |
|---|
| 677 | + ceph_async_iput(lastinode); |
|---|
| 650 | 678 | lastinode = inode; |
|---|
| 651 | 679 | ceph_queue_cap_snap(ci); |
|---|
| 652 | 680 | spin_lock(&realm->inodes_with_caps_lock); |
|---|
| 653 | 681 | } |
|---|
| 654 | 682 | spin_unlock(&realm->inodes_with_caps_lock); |
|---|
| 655 | | - iput(lastinode); |
|---|
| 683 | + ceph_async_iput(lastinode); |
|---|
| 656 | 684 | |
|---|
| 657 | 685 | dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino); |
|---|
| 658 | 686 | } |
|---|
| .. | .. |
|---|
| 671 | 699 | struct ceph_mds_snap_realm *ri; /* encoded */ |
|---|
| 672 | 700 | __le64 *snaps; /* encoded */ |
|---|
| 673 | 701 | __le64 *prior_parent_snaps; /* encoded */ |
|---|
| 674 | | - struct ceph_snap_realm *realm = NULL; |
|---|
| 702 | + struct ceph_snap_realm *realm; |
|---|
| 675 | 703 | struct ceph_snap_realm *first_realm = NULL; |
|---|
| 676 | | - int invalidate = 0; |
|---|
| 704 | + struct ceph_snap_realm *realm_to_rebuild = NULL; |
|---|
| 705 | + int rebuild_snapcs; |
|---|
| 677 | 706 | int err = -ENOMEM; |
|---|
| 678 | 707 | LIST_HEAD(dirty_realms); |
|---|
| 679 | 708 | |
|---|
| 709 | + lockdep_assert_held_write(&mdsc->snap_rwsem); |
|---|
| 710 | + |
|---|
| 680 | 711 | dout("update_snap_trace deletion=%d\n", deletion); |
|---|
| 681 | 712 | more: |
|---|
| 713 | + realm = NULL; |
|---|
| 714 | + rebuild_snapcs = 0; |
|---|
| 682 | 715 | ceph_decode_need(&p, e, sizeof(*ri), bad); |
|---|
| 683 | 716 | ri = p; |
|---|
| 684 | 717 | p += sizeof(*ri); |
|---|
| .. | .. |
|---|
| 702 | 735 | err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent)); |
|---|
| 703 | 736 | if (err < 0) |
|---|
| 704 | 737 | goto fail; |
|---|
| 705 | | - invalidate += err; |
|---|
| 738 | + rebuild_snapcs += err; |
|---|
| 706 | 739 | |
|---|
| 707 | 740 | if (le64_to_cpu(ri->seq) > realm->seq) { |
|---|
| 708 | 741 | dout("update_snap_trace updating %llx %p %lld -> %lld\n", |
|---|
| .. | .. |
|---|
| 727 | 760 | if (realm->seq > mdsc->last_snap_seq) |
|---|
| 728 | 761 | mdsc->last_snap_seq = realm->seq; |
|---|
| 729 | 762 | |
|---|
| 730 | | - invalidate = 1; |
|---|
| 763 | + rebuild_snapcs = 1; |
|---|
| 731 | 764 | } else if (!realm->cached_context) { |
|---|
| 732 | 765 | dout("update_snap_trace %llx %p seq %lld new\n", |
|---|
| 733 | 766 | realm->ino, realm, realm->seq); |
|---|
| 734 | | - invalidate = 1; |
|---|
| 767 | + rebuild_snapcs = 1; |
|---|
| 735 | 768 | } else { |
|---|
| 736 | 769 | dout("update_snap_trace %llx %p seq %lld unchanged\n", |
|---|
| 737 | 770 | realm->ino, realm, realm->seq); |
|---|
| 738 | 771 | } |
|---|
| 739 | 772 | |
|---|
| 740 | | - dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino, |
|---|
| 741 | | - realm, invalidate, p, e); |
|---|
| 773 | + dout("done with %llx %p, rebuild_snapcs=%d, %p %p\n", realm->ino, |
|---|
| 774 | + realm, rebuild_snapcs, p, e); |
|---|
| 742 | 775 | |
|---|
| 743 | | - /* invalidate when we reach the _end_ (root) of the trace */ |
|---|
| 744 | | - if (invalidate && p >= e) |
|---|
| 745 | | - rebuild_snap_realms(realm, &dirty_realms); |
|---|
| 776 | + /* |
|---|
| 777 | + * this will always track the uppest parent realm from which |
|---|
| 778 | + * we need to rebuild the snapshot contexts _downward_ in |
|---|
| 779 | + * hierarchy. |
|---|
| 780 | + */ |
|---|
| 781 | + if (rebuild_snapcs) |
|---|
| 782 | + realm_to_rebuild = realm; |
|---|
| 783 | + |
|---|
| 784 | + /* rebuild_snapcs when we reach the _end_ (root) of the trace */ |
|---|
| 785 | + if (realm_to_rebuild && p >= e) |
|---|
| 786 | + rebuild_snap_realms(realm_to_rebuild, &dirty_realms); |
|---|
| 746 | 787 | |
|---|
| 747 | 788 | if (!first_realm) |
|---|
| 748 | 789 | first_realm = realm; |
|---|
| .. | .. |
|---|
| 804 | 845 | ihold(inode); |
|---|
| 805 | 846 | spin_unlock(&mdsc->snap_flush_lock); |
|---|
| 806 | 847 | ceph_flush_snaps(ci, &session); |
|---|
| 807 | | - iput(inode); |
|---|
| 848 | + /* avoid calling iput_final() while holding |
|---|
| 849 | + * session->s_mutex or in mds dispatch threads */ |
|---|
| 850 | + ceph_async_iput(inode); |
|---|
| 808 | 851 | spin_lock(&mdsc->snap_flush_lock); |
|---|
| 809 | 852 | } |
|---|
| 810 | 853 | spin_unlock(&mdsc->snap_flush_lock); |
|---|
| .. | .. |
|---|
| 862 | 905 | ceph_snap_op_name(op), split, trace_len); |
|---|
| 863 | 906 | |
|---|
| 864 | 907 | mutex_lock(&session->s_mutex); |
|---|
| 865 | | - session->s_seq++; |
|---|
| 908 | + inc_session_sequence(session); |
|---|
| 866 | 909 | mutex_unlock(&session->s_mutex); |
|---|
| 867 | 910 | |
|---|
| 868 | 911 | down_write(&mdsc->snap_rwsem); |
|---|
| .. | .. |
|---|
| 948 | 991 | ceph_get_snap_realm(mdsc, realm); |
|---|
| 949 | 992 | ceph_put_snap_realm(mdsc, oldrealm); |
|---|
| 950 | 993 | |
|---|
| 951 | | - iput(inode); |
|---|
| 994 | + /* avoid calling iput_final() while holding |
|---|
| 995 | + * mdsc->snap_rwsem or mds in dispatch threads */ |
|---|
| 996 | + ceph_async_iput(inode); |
|---|
| 952 | 997 | continue; |
|---|
| 953 | 998 | |
|---|
| 954 | 999 | skip_inode: |
|---|
| 955 | 1000 | spin_unlock(&ci->i_ceph_lock); |
|---|
| 956 | | - iput(inode); |
|---|
| 1001 | + ceph_async_iput(inode); |
|---|
| 957 | 1002 | } |
|---|
| 958 | 1003 | |
|---|
| 959 | 1004 | /* we may have taken some of the old realm's children. */ |
|---|
| .. | .. |
|---|
| 965 | 1010 | continue; |
|---|
| 966 | 1011 | adjust_snap_realm_parent(mdsc, child, realm->ino); |
|---|
| 967 | 1012 | } |
|---|
| 1013 | + } else { |
|---|
| 1014 | + /* |
|---|
| 1015 | + * In the non-split case both 'num_split_inos' and |
|---|
| 1016 | + * 'num_split_realms' should be 0, making this a no-op. |
|---|
| 1017 | + * However the MDS happens to populate 'split_realms' list |
|---|
| 1018 | + * in one of the UPDATE op cases by mistake. |
|---|
| 1019 | + * |
|---|
| 1020 | + * Skip both lists just in case to ensure that 'p' is |
|---|
| 1021 | + * positioned at the start of realm info, as expected by |
|---|
| 1022 | + * ceph_update_snap_trace(). |
|---|
| 1023 | + */ |
|---|
| 1024 | + p += sizeof(u64) * num_split_inos; |
|---|
| 1025 | + p += sizeof(u64) * num_split_realms; |
|---|
| 968 | 1026 | } |
|---|
| 969 | 1027 | |
|---|
| 970 | 1028 | /* |
|---|
| .. | .. |
|---|
| 993 | 1051 | up_write(&mdsc->snap_rwsem); |
|---|
| 994 | 1052 | return; |
|---|
| 995 | 1053 | } |
|---|
| 1054 | + |
|---|
| 1055 | +struct ceph_snapid_map* ceph_get_snapid_map(struct ceph_mds_client *mdsc, |
|---|
| 1056 | + u64 snap) |
|---|
| 1057 | +{ |
|---|
| 1058 | + struct ceph_snapid_map *sm, *exist; |
|---|
| 1059 | + struct rb_node **p, *parent; |
|---|
| 1060 | + int ret; |
|---|
| 1061 | + |
|---|
| 1062 | + exist = NULL; |
|---|
| 1063 | + spin_lock(&mdsc->snapid_map_lock); |
|---|
| 1064 | + p = &mdsc->snapid_map_tree.rb_node; |
|---|
| 1065 | + while (*p) { |
|---|
| 1066 | + exist = rb_entry(*p, struct ceph_snapid_map, node); |
|---|
| 1067 | + if (snap > exist->snap) { |
|---|
| 1068 | + p = &(*p)->rb_left; |
|---|
| 1069 | + } else if (snap < exist->snap) { |
|---|
| 1070 | + p = &(*p)->rb_right; |
|---|
| 1071 | + } else { |
|---|
| 1072 | + if (atomic_inc_return(&exist->ref) == 1) |
|---|
| 1073 | + list_del_init(&exist->lru); |
|---|
| 1074 | + break; |
|---|
| 1075 | + } |
|---|
| 1076 | + exist = NULL; |
|---|
| 1077 | + } |
|---|
| 1078 | + spin_unlock(&mdsc->snapid_map_lock); |
|---|
| 1079 | + if (exist) { |
|---|
| 1080 | + dout("found snapid map %llx -> %x\n", exist->snap, exist->dev); |
|---|
| 1081 | + return exist; |
|---|
| 1082 | + } |
|---|
| 1083 | + |
|---|
| 1084 | + sm = kmalloc(sizeof(*sm), GFP_NOFS); |
|---|
| 1085 | + if (!sm) |
|---|
| 1086 | + return NULL; |
|---|
| 1087 | + |
|---|
| 1088 | + ret = get_anon_bdev(&sm->dev); |
|---|
| 1089 | + if (ret < 0) { |
|---|
| 1090 | + kfree(sm); |
|---|
| 1091 | + return NULL; |
|---|
| 1092 | + } |
|---|
| 1093 | + |
|---|
| 1094 | + INIT_LIST_HEAD(&sm->lru); |
|---|
| 1095 | + atomic_set(&sm->ref, 1); |
|---|
| 1096 | + sm->snap = snap; |
|---|
| 1097 | + |
|---|
| 1098 | + exist = NULL; |
|---|
| 1099 | + parent = NULL; |
|---|
| 1100 | + p = &mdsc->snapid_map_tree.rb_node; |
|---|
| 1101 | + spin_lock(&mdsc->snapid_map_lock); |
|---|
| 1102 | + while (*p) { |
|---|
| 1103 | + parent = *p; |
|---|
| 1104 | + exist = rb_entry(*p, struct ceph_snapid_map, node); |
|---|
| 1105 | + if (snap > exist->snap) |
|---|
| 1106 | + p = &(*p)->rb_left; |
|---|
| 1107 | + else if (snap < exist->snap) |
|---|
| 1108 | + p = &(*p)->rb_right; |
|---|
| 1109 | + else |
|---|
| 1110 | + break; |
|---|
| 1111 | + exist = NULL; |
|---|
| 1112 | + } |
|---|
| 1113 | + if (exist) { |
|---|
| 1114 | + if (atomic_inc_return(&exist->ref) == 1) |
|---|
| 1115 | + list_del_init(&exist->lru); |
|---|
| 1116 | + } else { |
|---|
| 1117 | + rb_link_node(&sm->node, parent, p); |
|---|
| 1118 | + rb_insert_color(&sm->node, &mdsc->snapid_map_tree); |
|---|
| 1119 | + } |
|---|
| 1120 | + spin_unlock(&mdsc->snapid_map_lock); |
|---|
| 1121 | + if (exist) { |
|---|
| 1122 | + free_anon_bdev(sm->dev); |
|---|
| 1123 | + kfree(sm); |
|---|
| 1124 | + dout("found snapid map %llx -> %x\n", exist->snap, exist->dev); |
|---|
| 1125 | + return exist; |
|---|
| 1126 | + } |
|---|
| 1127 | + |
|---|
| 1128 | + dout("create snapid map %llx -> %x\n", sm->snap, sm->dev); |
|---|
| 1129 | + return sm; |
|---|
| 1130 | +} |
|---|
| 1131 | + |
|---|
| 1132 | +void ceph_put_snapid_map(struct ceph_mds_client* mdsc, |
|---|
| 1133 | + struct ceph_snapid_map *sm) |
|---|
| 1134 | +{ |
|---|
| 1135 | + if (!sm) |
|---|
| 1136 | + return; |
|---|
| 1137 | + if (atomic_dec_and_lock(&sm->ref, &mdsc->snapid_map_lock)) { |
|---|
| 1138 | + if (!RB_EMPTY_NODE(&sm->node)) { |
|---|
| 1139 | + sm->last_used = jiffies; |
|---|
| 1140 | + list_add_tail(&sm->lru, &mdsc->snapid_map_lru); |
|---|
| 1141 | + spin_unlock(&mdsc->snapid_map_lock); |
|---|
| 1142 | + } else { |
|---|
| 1143 | + /* already cleaned up by |
|---|
| 1144 | + * ceph_cleanup_snapid_map() */ |
|---|
| 1145 | + spin_unlock(&mdsc->snapid_map_lock); |
|---|
| 1146 | + kfree(sm); |
|---|
| 1147 | + } |
|---|
| 1148 | + } |
|---|
| 1149 | +} |
|---|
| 1150 | + |
|---|
| 1151 | +void ceph_trim_snapid_map(struct ceph_mds_client *mdsc) |
|---|
| 1152 | +{ |
|---|
| 1153 | + struct ceph_snapid_map *sm; |
|---|
| 1154 | + unsigned long now; |
|---|
| 1155 | + LIST_HEAD(to_free); |
|---|
| 1156 | + |
|---|
| 1157 | + spin_lock(&mdsc->snapid_map_lock); |
|---|
| 1158 | + now = jiffies; |
|---|
| 1159 | + |
|---|
| 1160 | + while (!list_empty(&mdsc->snapid_map_lru)) { |
|---|
| 1161 | + sm = list_first_entry(&mdsc->snapid_map_lru, |
|---|
| 1162 | + struct ceph_snapid_map, lru); |
|---|
| 1163 | + if (time_after(sm->last_used + CEPH_SNAPID_MAP_TIMEOUT, now)) |
|---|
| 1164 | + break; |
|---|
| 1165 | + |
|---|
| 1166 | + rb_erase(&sm->node, &mdsc->snapid_map_tree); |
|---|
| 1167 | + list_move(&sm->lru, &to_free); |
|---|
| 1168 | + } |
|---|
| 1169 | + spin_unlock(&mdsc->snapid_map_lock); |
|---|
| 1170 | + |
|---|
| 1171 | + while (!list_empty(&to_free)) { |
|---|
| 1172 | + sm = list_first_entry(&to_free, struct ceph_snapid_map, lru); |
|---|
| 1173 | + list_del(&sm->lru); |
|---|
| 1174 | + dout("trim snapid map %llx -> %x\n", sm->snap, sm->dev); |
|---|
| 1175 | + free_anon_bdev(sm->dev); |
|---|
| 1176 | + kfree(sm); |
|---|
| 1177 | + } |
|---|
| 1178 | +} |
|---|
| 1179 | + |
|---|
| 1180 | +void ceph_cleanup_snapid_map(struct ceph_mds_client *mdsc) |
|---|
| 1181 | +{ |
|---|
| 1182 | + struct ceph_snapid_map *sm; |
|---|
| 1183 | + struct rb_node *p; |
|---|
| 1184 | + LIST_HEAD(to_free); |
|---|
| 1185 | + |
|---|
| 1186 | + spin_lock(&mdsc->snapid_map_lock); |
|---|
| 1187 | + while ((p = rb_first(&mdsc->snapid_map_tree))) { |
|---|
| 1188 | + sm = rb_entry(p, struct ceph_snapid_map, node); |
|---|
| 1189 | + rb_erase(p, &mdsc->snapid_map_tree); |
|---|
| 1190 | + RB_CLEAR_NODE(p); |
|---|
| 1191 | + list_move(&sm->lru, &to_free); |
|---|
| 1192 | + } |
|---|
| 1193 | + spin_unlock(&mdsc->snapid_map_lock); |
|---|
| 1194 | + |
|---|
| 1195 | + while (!list_empty(&to_free)) { |
|---|
| 1196 | + sm = list_first_entry(&to_free, struct ceph_snapid_map, lru); |
|---|
| 1197 | + list_del(&sm->lru); |
|---|
| 1198 | + free_anon_bdev(sm->dev); |
|---|
| 1199 | + if (WARN_ON_ONCE(atomic_read(&sm->ref))) { |
|---|
| 1200 | + pr_err("snapid map %llx -> %x still in use\n", |
|---|
| 1201 | + sm->snap, sm->dev); |
|---|
| 1202 | + } |
|---|
| 1203 | + kfree(sm); |
|---|
| 1204 | + } |
|---|
| 1205 | +} |
|---|