.. | .. |
---|
3 | 3 | * quota.c - CephFS quota |
---|
4 | 4 | * |
---|
5 | 5 | * Copyright (C) 2017-2018 SUSE |
---|
6 | | - * |
---|
7 | | - * This program is free software; you can redistribute it and/or |
---|
8 | | - * modify it under the terms of the GNU General Public License |
---|
9 | | - * as published by the Free Software Foundation; either version 2 |
---|
10 | | - * of the License, or (at your option) any later version. |
---|
11 | | - * |
---|
12 | | - * This program is distributed in the hope that it will be useful, |
---|
13 | | - * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
14 | | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
15 | | - * GNU General Public License for more details. |
---|
16 | | - * |
---|
17 | | - * You should have received a copy of the GNU General Public License |
---|
18 | | - * along with this program; if not, see <http://www.gnu.org/licenses/>. |
---|
19 | 6 | */ |
---|
20 | 7 | |
---|
21 | 8 | #include <linux/statfs.h> |
---|
.. | .. |
---|
25 | 12 | |
---|
26 | 13 | void ceph_adjust_quota_realms_count(struct inode *inode, bool inc) |
---|
27 | 14 | { |
---|
28 | | - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; |
---|
| 15 | + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); |
---|
29 | 16 | if (inc) |
---|
30 | 17 | atomic64_inc(&mdsc->quotarealms_count); |
---|
31 | 18 | else |
---|
.. | .. |
---|
34 | 21 | |
---|
35 | 22 | static inline bool ceph_has_realms_with_quotas(struct inode *inode) |
---|
36 | 23 | { |
---|
37 | | - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; |
---|
38 | | - return atomic64_read(&mdsc->quotarealms_count) > 0; |
---|
| 24 | + struct super_block *sb = inode->i_sb; |
---|
| 25 | + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(sb); |
---|
| 26 | + struct inode *root = d_inode(sb->s_root); |
---|
| 27 | + |
---|
| 28 | + if (atomic64_read(&mdsc->quotarealms_count) > 0) |
---|
| 29 | + return true; |
---|
| 30 | + /* if root is the real CephFS root, we don't have quota realms */ |
---|
| 31 | + if (root && ceph_ino(root) == CEPH_INO_ROOT) |
---|
| 32 | + return false; |
---|
| 33 | + /* otherwise, we can't know for sure */ |
---|
| 34 | + return true; |
---|
39 | 35 | } |
---|
40 | 36 | |
---|
41 | 37 | void ceph_handle_quota(struct ceph_mds_client *mdsc, |
---|
.. | .. |
---|
57 | 53 | |
---|
58 | 54 | /* increment msg sequence number */ |
---|
59 | 55 | mutex_lock(&session->s_mutex); |
---|
60 | | - session->s_seq++; |
---|
| 56 | + inc_session_sequence(session); |
---|
61 | 57 | mutex_unlock(&session->s_mutex); |
---|
62 | 58 | |
---|
63 | 59 | /* lookup inode */ |
---|
.. | .. |
---|
78 | 74 | le64_to_cpu(h->max_files)); |
---|
79 | 75 | spin_unlock(&ci->i_ceph_lock); |
---|
80 | 76 | |
---|
81 | | - iput(inode); |
---|
| 77 | + /* avoid calling iput_final() in dispatch thread */ |
---|
| 78 | + ceph_async_iput(inode); |
---|
| 79 | +} |
---|
| 80 | + |
---|
| 81 | +static struct ceph_quotarealm_inode * |
---|
| 82 | +find_quotarealm_inode(struct ceph_mds_client *mdsc, u64 ino) |
---|
| 83 | +{ |
---|
| 84 | + struct ceph_quotarealm_inode *qri = NULL; |
---|
| 85 | + struct rb_node **node, *parent = NULL; |
---|
| 86 | + |
---|
| 87 | + mutex_lock(&mdsc->quotarealms_inodes_mutex); |
---|
| 88 | + node = &(mdsc->quotarealms_inodes.rb_node); |
---|
| 89 | + while (*node) { |
---|
| 90 | + parent = *node; |
---|
| 91 | + qri = container_of(*node, struct ceph_quotarealm_inode, node); |
---|
| 92 | + |
---|
| 93 | + if (ino < qri->ino) |
---|
| 94 | + node = &((*node)->rb_left); |
---|
| 95 | + else if (ino > qri->ino) |
---|
| 96 | + node = &((*node)->rb_right); |
---|
| 97 | + else |
---|
| 98 | + break; |
---|
| 99 | + } |
---|
| 100 | + if (!qri || (qri->ino != ino)) { |
---|
| 101 | + /* Not found, create a new one and insert it */ |
---|
| 102 | + qri = kmalloc(sizeof(*qri), GFP_KERNEL); |
---|
| 103 | + if (qri) { |
---|
| 104 | + qri->ino = ino; |
---|
| 105 | + qri->inode = NULL; |
---|
| 106 | + qri->timeout = 0; |
---|
| 107 | + mutex_init(&qri->mutex); |
---|
| 108 | + rb_link_node(&qri->node, parent, node); |
---|
| 109 | + rb_insert_color(&qri->node, &mdsc->quotarealms_inodes); |
---|
| 110 | + } else |
---|
| 111 | + pr_warn("Failed to alloc quotarealms_inode\n"); |
---|
| 112 | + } |
---|
| 113 | + mutex_unlock(&mdsc->quotarealms_inodes_mutex); |
---|
| 114 | + |
---|
| 115 | + return qri; |
---|
| 116 | +} |
---|
| 117 | + |
---|
| 118 | +/* |
---|
| 119 | + * This function will try to lookup a realm inode which isn't visible in the |
---|
| 120 | + * filesystem mountpoint. A list of these kind of inodes (not visible) is |
---|
| 121 | + * maintained in the mdsc and freed only when the filesystem is umounted. |
---|
| 122 | + * |
---|
| 123 | + * Note that these inodes are kept in this list even if the lookup fails, which |
---|
| 124 | + * allows to prevent useless lookup requests. |
---|
| 125 | + */ |
---|
| 126 | +static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc, |
---|
| 127 | + struct super_block *sb, |
---|
| 128 | + struct ceph_snap_realm *realm) |
---|
| 129 | +{ |
---|
| 130 | + struct ceph_quotarealm_inode *qri; |
---|
| 131 | + struct inode *in; |
---|
| 132 | + |
---|
| 133 | + qri = find_quotarealm_inode(mdsc, realm->ino); |
---|
| 134 | + if (!qri) |
---|
| 135 | + return NULL; |
---|
| 136 | + |
---|
| 137 | + mutex_lock(&qri->mutex); |
---|
| 138 | + if (qri->inode && ceph_is_any_caps(qri->inode)) { |
---|
| 139 | + /* A request has already returned the inode */ |
---|
| 140 | + mutex_unlock(&qri->mutex); |
---|
| 141 | + return qri->inode; |
---|
| 142 | + } |
---|
| 143 | + /* Check if this inode lookup has failed recently */ |
---|
| 144 | + if (qri->timeout && |
---|
| 145 | + time_before_eq(jiffies, qri->timeout)) { |
---|
| 146 | + mutex_unlock(&qri->mutex); |
---|
| 147 | + return NULL; |
---|
| 148 | + } |
---|
| 149 | + if (qri->inode) { |
---|
| 150 | + /* get caps */ |
---|
| 151 | + int ret = __ceph_do_getattr(qri->inode, NULL, |
---|
| 152 | + CEPH_STAT_CAP_INODE, true); |
---|
| 153 | + if (ret >= 0) |
---|
| 154 | + in = qri->inode; |
---|
| 155 | + else |
---|
| 156 | + in = ERR_PTR(ret); |
---|
| 157 | + } else { |
---|
| 158 | + in = ceph_lookup_inode(sb, realm->ino); |
---|
| 159 | + } |
---|
| 160 | + |
---|
| 161 | + if (IS_ERR(in)) { |
---|
| 162 | + dout("Can't lookup inode %llx (err: %ld)\n", |
---|
| 163 | + realm->ino, PTR_ERR(in)); |
---|
| 164 | + qri->timeout = jiffies + msecs_to_jiffies(60 * 1000); /* XXX */ |
---|
| 165 | + } else { |
---|
| 166 | + qri->timeout = 0; |
---|
| 167 | + qri->inode = in; |
---|
| 168 | + } |
---|
| 169 | + mutex_unlock(&qri->mutex); |
---|
| 170 | + |
---|
| 171 | + return in; |
---|
| 172 | +} |
---|
| 173 | + |
---|
| 174 | +void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc) |
---|
| 175 | +{ |
---|
| 176 | + struct ceph_quotarealm_inode *qri; |
---|
| 177 | + struct rb_node *node; |
---|
| 178 | + |
---|
| 179 | + /* |
---|
| 180 | + * It should now be safe to clean quotarealms_inode tree without holding |
---|
| 181 | + * mdsc->quotarealms_inodes_mutex... |
---|
| 182 | + */ |
---|
| 183 | + mutex_lock(&mdsc->quotarealms_inodes_mutex); |
---|
| 184 | + while (!RB_EMPTY_ROOT(&mdsc->quotarealms_inodes)) { |
---|
| 185 | + node = rb_first(&mdsc->quotarealms_inodes); |
---|
| 186 | + qri = rb_entry(node, struct ceph_quotarealm_inode, node); |
---|
| 187 | + rb_erase(node, &mdsc->quotarealms_inodes); |
---|
| 188 | + iput(qri->inode); |
---|
| 189 | + kfree(qri); |
---|
| 190 | + } |
---|
| 191 | + mutex_unlock(&mdsc->quotarealms_inodes_mutex); |
---|
82 | 192 | } |
---|
83 | 193 | |
---|
84 | 194 | /* |
---|
.. | .. |
---|
89 | 199 | * |
---|
90 | 200 | * Note that the caller is responsible for calling ceph_put_snap_realm() on the |
---|
91 | 201 | * returned realm. |
---|
| 202 | + * |
---|
| 203 | + * Callers of this function need to hold mdsc->snap_rwsem. However, if there's |
---|
| 204 | + * a need to do an inode lookup, this rwsem will be temporarily dropped. Hence |
---|
| 205 | + * the 'retry' argument: if rwsem needs to be dropped and 'retry' is 'false' |
---|
| 206 | + * this function will return -EAGAIN; otherwise, the snaprealms walk-through |
---|
| 207 | + * will be restarted. |
---|
92 | 208 | */ |
---|
93 | 209 | static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc, |
---|
94 | | - struct inode *inode) |
---|
| 210 | + struct inode *inode, bool retry) |
---|
95 | 211 | { |
---|
96 | 212 | struct ceph_inode_info *ci = NULL; |
---|
97 | 213 | struct ceph_snap_realm *realm, *next; |
---|
.. | .. |
---|
101 | 217 | if (ceph_snap(inode) != CEPH_NOSNAP) |
---|
102 | 218 | return NULL; |
---|
103 | 219 | |
---|
| 220 | +restart: |
---|
104 | 221 | realm = ceph_inode(inode)->i_snap_realm; |
---|
105 | 222 | if (realm) |
---|
106 | 223 | ceph_get_snap_realm(mdsc, realm); |
---|
.. | .. |
---|
108 | 225 | pr_err_ratelimited("get_quota_realm: ino (%llx.%llx) " |
---|
109 | 226 | "null i_snap_realm\n", ceph_vinop(inode)); |
---|
110 | 227 | while (realm) { |
---|
| 228 | + bool has_inode; |
---|
| 229 | + |
---|
111 | 230 | spin_lock(&realm->inodes_with_caps_lock); |
---|
112 | | - in = realm->inode ? igrab(realm->inode) : NULL; |
---|
| 231 | + has_inode = realm->inode; |
---|
| 232 | + in = has_inode ? igrab(realm->inode) : NULL; |
---|
113 | 233 | spin_unlock(&realm->inodes_with_caps_lock); |
---|
114 | | - if (!in) |
---|
| 234 | + if (has_inode && !in) |
---|
115 | 235 | break; |
---|
| 236 | + if (!in) { |
---|
| 237 | + up_read(&mdsc->snap_rwsem); |
---|
| 238 | + in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm); |
---|
| 239 | + down_read(&mdsc->snap_rwsem); |
---|
| 240 | + if (IS_ERR_OR_NULL(in)) |
---|
| 241 | + break; |
---|
| 242 | + ceph_put_snap_realm(mdsc, realm); |
---|
| 243 | + if (!retry) |
---|
| 244 | + return ERR_PTR(-EAGAIN); |
---|
| 245 | + goto restart; |
---|
| 246 | + } |
---|
116 | 247 | |
---|
117 | 248 | ci = ceph_inode(in); |
---|
118 | 249 | has_quota = __ceph_has_any_quota(ci); |
---|
119 | | - iput(in); |
---|
| 250 | + /* avoid calling iput_final() while holding mdsc->snap_rwsem */ |
---|
| 251 | + ceph_async_iput(in); |
---|
120 | 252 | |
---|
121 | 253 | next = realm->parent; |
---|
122 | 254 | if (has_quota || !next) |
---|
.. | .. |
---|
132 | 264 | return NULL; |
---|
133 | 265 | } |
---|
134 | 266 | |
---|
135 | | -bool ceph_quota_is_same_realm(struct inode *old, struct inode *new) |
---|
| 267 | +static bool ceph_quota_is_same_realm(struct inode *old, struct inode *new) |
---|
136 | 268 | { |
---|
137 | | - struct ceph_mds_client *mdsc = ceph_inode_to_client(old)->mdsc; |
---|
| 269 | + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old->i_sb); |
---|
138 | 270 | struct ceph_snap_realm *old_realm, *new_realm; |
---|
139 | 271 | bool is_same; |
---|
140 | 272 | |
---|
| 273 | +restart: |
---|
| 274 | + /* |
---|
| 275 | + * We need to lookup 2 quota realms atomically, i.e. with snap_rwsem. |
---|
| 276 | + * However, get_quota_realm may drop it temporarily. By setting the |
---|
| 277 | + * 'retry' parameter to 'false', we'll get -EAGAIN if the rwsem was |
---|
| 278 | + * dropped and we can then restart the whole operation. |
---|
| 279 | + */ |
---|
141 | 280 | down_read(&mdsc->snap_rwsem); |
---|
142 | | - old_realm = get_quota_realm(mdsc, old); |
---|
143 | | - new_realm = get_quota_realm(mdsc, new); |
---|
| 281 | + old_realm = get_quota_realm(mdsc, old, true); |
---|
| 282 | + new_realm = get_quota_realm(mdsc, new, false); |
---|
| 283 | + if (PTR_ERR(new_realm) == -EAGAIN) { |
---|
| 284 | + up_read(&mdsc->snap_rwsem); |
---|
| 285 | + if (old_realm) |
---|
| 286 | + ceph_put_snap_realm(mdsc, old_realm); |
---|
| 287 | + goto restart; |
---|
| 288 | + } |
---|
144 | 289 | is_same = (old_realm == new_realm); |
---|
145 | 290 | up_read(&mdsc->snap_rwsem); |
---|
146 | 291 | |
---|
.. | .. |
---|
168 | 313 | static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, |
---|
169 | 314 | loff_t delta) |
---|
170 | 315 | { |
---|
171 | | - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; |
---|
| 316 | + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); |
---|
172 | 317 | struct ceph_inode_info *ci; |
---|
173 | 318 | struct ceph_snap_realm *realm, *next; |
---|
174 | 319 | struct inode *in; |
---|
.. | .. |
---|
179 | 324 | return false; |
---|
180 | 325 | |
---|
181 | 326 | down_read(&mdsc->snap_rwsem); |
---|
| 327 | +restart: |
---|
182 | 328 | realm = ceph_inode(inode)->i_snap_realm; |
---|
183 | 329 | if (realm) |
---|
184 | 330 | ceph_get_snap_realm(mdsc, realm); |
---|
.. | .. |
---|
186 | 332 | pr_err_ratelimited("check_quota_exceeded: ino (%llx.%llx) " |
---|
187 | 333 | "null i_snap_realm\n", ceph_vinop(inode)); |
---|
188 | 334 | while (realm) { |
---|
189 | | - spin_lock(&realm->inodes_with_caps_lock); |
---|
190 | | - in = realm->inode ? igrab(realm->inode) : NULL; |
---|
191 | | - spin_unlock(&realm->inodes_with_caps_lock); |
---|
192 | | - if (!in) |
---|
193 | | - break; |
---|
| 335 | + bool has_inode; |
---|
194 | 336 | |
---|
| 337 | + spin_lock(&realm->inodes_with_caps_lock); |
---|
| 338 | + has_inode = realm->inode; |
---|
| 339 | + in = has_inode ? igrab(realm->inode) : NULL; |
---|
| 340 | + spin_unlock(&realm->inodes_with_caps_lock); |
---|
| 341 | + if (has_inode && !in) |
---|
| 342 | + break; |
---|
| 343 | + if (!in) { |
---|
| 344 | + up_read(&mdsc->snap_rwsem); |
---|
| 345 | + in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm); |
---|
| 346 | + down_read(&mdsc->snap_rwsem); |
---|
| 347 | + if (IS_ERR_OR_NULL(in)) |
---|
| 348 | + break; |
---|
| 349 | + ceph_put_snap_realm(mdsc, realm); |
---|
| 350 | + goto restart; |
---|
| 351 | + } |
---|
195 | 352 | ci = ceph_inode(in); |
---|
196 | 353 | spin_lock(&ci->i_ceph_lock); |
---|
197 | 354 | if (op == QUOTA_CHECK_MAX_FILES_OP) { |
---|
.. | .. |
---|
204 | 361 | spin_unlock(&ci->i_ceph_lock); |
---|
205 | 362 | switch (op) { |
---|
206 | 363 | case QUOTA_CHECK_MAX_FILES_OP: |
---|
207 | | - exceeded = (max && (rvalue >= max)); |
---|
208 | | - break; |
---|
209 | 364 | case QUOTA_CHECK_MAX_BYTES_OP: |
---|
210 | 365 | exceeded = (max && (rvalue + delta > max)); |
---|
211 | 366 | break; |
---|
.. | .. |
---|
228 | 383 | pr_warn("Invalid quota check op (%d)\n", op); |
---|
229 | 384 | exceeded = true; /* Just break the loop */ |
---|
230 | 385 | } |
---|
231 | | - iput(in); |
---|
| 386 | + /* avoid calling iput_final() while holding mdsc->snap_rwsem */ |
---|
| 387 | + ceph_async_iput(in); |
---|
232 | 388 | |
---|
233 | 389 | next = realm->parent; |
---|
234 | 390 | if (exceeded || !next) |
---|
.. | .. |
---|
259 | 415 | |
---|
260 | 416 | WARN_ON(!S_ISDIR(inode->i_mode)); |
---|
261 | 417 | |
---|
262 | | - return check_quota_exceeded(inode, QUOTA_CHECK_MAX_FILES_OP, 0); |
---|
| 418 | + return check_quota_exceeded(inode, QUOTA_CHECK_MAX_FILES_OP, 1); |
---|
263 | 419 | } |
---|
264 | 420 | |
---|
265 | 421 | /* |
---|
.. | .. |
---|
327 | 483 | bool is_updated = false; |
---|
328 | 484 | |
---|
329 | 485 | down_read(&mdsc->snap_rwsem); |
---|
330 | | - realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root)); |
---|
| 486 | + realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root), true); |
---|
331 | 487 | up_read(&mdsc->snap_rwsem); |
---|
332 | 488 | if (!realm) |
---|
333 | 489 | return false; |
---|
.. | .. |
---|
360 | 516 | return is_updated; |
---|
361 | 517 | } |
---|
362 | 518 | |
---|
| 519 | +/* |
---|
| 520 | + * ceph_quota_check_rename - check if a rename can be executed |
---|
| 521 | + * @mdsc: MDS client instance |
---|
| 522 | + * @old: inode to be copied |
---|
| 523 | + * @new: destination inode (directory) |
---|
| 524 | + * |
---|
| 525 | + * This function verifies if a rename (e.g. moving a file or directory) can be |
---|
| 526 | + * executed. It forces an rstat update in the @new target directory (and in the |
---|
| 527 | + * source @old as well, if it's a directory). The actual check is done both for |
---|
| 528 | + * max_files and max_bytes. |
---|
| 529 | + * |
---|
| 530 | + * This function returns 0 if it's OK to do the rename, or, if quotas are |
---|
| 531 | + * exceeded, -EXDEV (if @old is a directory) or -EDQUOT. |
---|
| 532 | + */ |
---|
| 533 | +int ceph_quota_check_rename(struct ceph_mds_client *mdsc, |
---|
| 534 | + struct inode *old, struct inode *new) |
---|
| 535 | +{ |
---|
| 536 | + struct ceph_inode_info *ci_old = ceph_inode(old); |
---|
| 537 | + int ret = 0; |
---|
| 538 | + |
---|
| 539 | + if (ceph_quota_is_same_realm(old, new)) |
---|
| 540 | + return 0; |
---|
| 541 | + |
---|
| 542 | + /* |
---|
| 543 | + * Get the latest rstat for target directory (and for source, if a |
---|
| 544 | + * directory) |
---|
| 545 | + */ |
---|
| 546 | + ret = ceph_do_getattr(new, CEPH_STAT_RSTAT, false); |
---|
| 547 | + if (ret) |
---|
| 548 | + return ret; |
---|
| 549 | + |
---|
| 550 | + if (S_ISDIR(old->i_mode)) { |
---|
| 551 | + ret = ceph_do_getattr(old, CEPH_STAT_RSTAT, false); |
---|
| 552 | + if (ret) |
---|
| 553 | + return ret; |
---|
| 554 | + ret = check_quota_exceeded(new, QUOTA_CHECK_MAX_BYTES_OP, |
---|
| 555 | + ci_old->i_rbytes); |
---|
| 556 | + if (!ret) |
---|
| 557 | + ret = check_quota_exceeded(new, |
---|
| 558 | + QUOTA_CHECK_MAX_FILES_OP, |
---|
| 559 | + ci_old->i_rfiles + |
---|
| 560 | + ci_old->i_rsubdirs); |
---|
| 561 | + if (ret) |
---|
| 562 | + ret = -EXDEV; |
---|
| 563 | + } else { |
---|
| 564 | + ret = check_quota_exceeded(new, QUOTA_CHECK_MAX_BYTES_OP, |
---|
| 565 | + i_size_read(old)); |
---|
| 566 | + if (!ret) |
---|
| 567 | + ret = check_quota_exceeded(new, |
---|
| 568 | + QUOTA_CHECK_MAX_FILES_OP, 1); |
---|
| 569 | + if (ret) |
---|
| 570 | + ret = -EDQUOT; |
---|
| 571 | + } |
---|
| 572 | + |
---|
| 573 | + return ret; |
---|
| 574 | +} |
---|