hc
2024-05-10 cde9070d9970eef1f7ec2360586c802a16230ad8
kernel/fs/ceph/quota.c
....@@ -3,19 +3,6 @@
33 * quota.c - CephFS quota
44 *
55 * Copyright (C) 2017-2018 SUSE
6
- *
7
- * This program is free software; you can redistribute it and/or
8
- * modify it under the terms of the GNU General Public License
9
- * as published by the Free Software Foundation; either version 2
10
- * of the License, or (at your option) any later version.
11
- *
12
- * This program is distributed in the hope that it will be useful,
13
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- * GNU General Public License for more details.
16
- *
17
- * You should have received a copy of the GNU General Public License
18
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
196 */
207
218 #include <linux/statfs.h>
....@@ -25,7 +12,7 @@
2512
2613 void ceph_adjust_quota_realms_count(struct inode *inode, bool inc)
2714 {
28
- struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
15
+ struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
2916 if (inc)
3017 atomic64_inc(&mdsc->quotarealms_count);
3118 else
....@@ -34,8 +21,17 @@
3421
3522 static inline bool ceph_has_realms_with_quotas(struct inode *inode)
3623 {
37
- struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
38
- return atomic64_read(&mdsc->quotarealms_count) > 0;
24
+ struct super_block *sb = inode->i_sb;
25
+ struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(sb);
26
+ struct inode *root = d_inode(sb->s_root);
27
+
28
+ if (atomic64_read(&mdsc->quotarealms_count) > 0)
29
+ return true;
30
+ /* if root is the real CephFS root, we don't have quota realms */
31
+ if (root && ceph_ino(root) == CEPH_INO_ROOT)
32
+ return false;
33
+ /* otherwise, we can't know for sure */
34
+ return true;
3935 }
4036
4137 void ceph_handle_quota(struct ceph_mds_client *mdsc,
....@@ -57,7 +53,7 @@
5753
5854 /* increment msg sequence number */
5955 mutex_lock(&session->s_mutex);
60
- session->s_seq++;
56
+ inc_session_sequence(session);
6157 mutex_unlock(&session->s_mutex);
6258
6359 /* lookup inode */
....@@ -78,7 +74,121 @@
7874 le64_to_cpu(h->max_files));
7975 spin_unlock(&ci->i_ceph_lock);
8076
81
- iput(inode);
77
+ /* avoid calling iput_final() in dispatch thread */
78
+ ceph_async_iput(inode);
79
+}
80
+
81
+static struct ceph_quotarealm_inode *
82
+find_quotarealm_inode(struct ceph_mds_client *mdsc, u64 ino)
83
+{
84
+ struct ceph_quotarealm_inode *qri = NULL;
85
+ struct rb_node **node, *parent = NULL;
86
+
87
+ mutex_lock(&mdsc->quotarealms_inodes_mutex);
88
+ node = &(mdsc->quotarealms_inodes.rb_node);
89
+ while (*node) {
90
+ parent = *node;
91
+ qri = container_of(*node, struct ceph_quotarealm_inode, node);
92
+
93
+ if (ino < qri->ino)
94
+ node = &((*node)->rb_left);
95
+ else if (ino > qri->ino)
96
+ node = &((*node)->rb_right);
97
+ else
98
+ break;
99
+ }
100
+ if (!qri || (qri->ino != ino)) {
101
+ /* Not found, create a new one and insert it */
102
+ qri = kmalloc(sizeof(*qri), GFP_KERNEL);
103
+ if (qri) {
104
+ qri->ino = ino;
105
+ qri->inode = NULL;
106
+ qri->timeout = 0;
107
+ mutex_init(&qri->mutex);
108
+ rb_link_node(&qri->node, parent, node);
109
+ rb_insert_color(&qri->node, &mdsc->quotarealms_inodes);
110
+ } else
111
+ pr_warn("Failed to alloc quotarealms_inode\n");
112
+ }
113
+ mutex_unlock(&mdsc->quotarealms_inodes_mutex);
114
+
115
+ return qri;
116
+}
117
+
118
+/*
119
+ * This function will try to lookup a realm inode which isn't visible in the
120
+ * filesystem mountpoint. A list of these kind of inodes (not visible) is
121
+ * maintained in the mdsc and freed only when the filesystem is umounted.
122
+ *
123
+ * Note that these inodes are kept in this list even if the lookup fails, which
124
+ * allows to prevent useless lookup requests.
125
+ */
126
+static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc,
127
+ struct super_block *sb,
128
+ struct ceph_snap_realm *realm)
129
+{
130
+ struct ceph_quotarealm_inode *qri;
131
+ struct inode *in;
132
+
133
+ qri = find_quotarealm_inode(mdsc, realm->ino);
134
+ if (!qri)
135
+ return NULL;
136
+
137
+ mutex_lock(&qri->mutex);
138
+ if (qri->inode && ceph_is_any_caps(qri->inode)) {
139
+ /* A request has already returned the inode */
140
+ mutex_unlock(&qri->mutex);
141
+ return qri->inode;
142
+ }
143
+ /* Check if this inode lookup has failed recently */
144
+ if (qri->timeout &&
145
+ time_before_eq(jiffies, qri->timeout)) {
146
+ mutex_unlock(&qri->mutex);
147
+ return NULL;
148
+ }
149
+ if (qri->inode) {
150
+ /* get caps */
151
+ int ret = __ceph_do_getattr(qri->inode, NULL,
152
+ CEPH_STAT_CAP_INODE, true);
153
+ if (ret >= 0)
154
+ in = qri->inode;
155
+ else
156
+ in = ERR_PTR(ret);
157
+ } else {
158
+ in = ceph_lookup_inode(sb, realm->ino);
159
+ }
160
+
161
+ if (IS_ERR(in)) {
162
+ dout("Can't lookup inode %llx (err: %ld)\n",
163
+ realm->ino, PTR_ERR(in));
164
+ qri->timeout = jiffies + msecs_to_jiffies(60 * 1000); /* XXX */
165
+ } else {
166
+ qri->timeout = 0;
167
+ qri->inode = in;
168
+ }
169
+ mutex_unlock(&qri->mutex);
170
+
171
+ return in;
172
+}
173
+
174
+void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc)
175
+{
176
+ struct ceph_quotarealm_inode *qri;
177
+ struct rb_node *node;
178
+
179
+ /*
180
+ * It should now be safe to clean quotarealms_inode tree without holding
181
+ * mdsc->quotarealms_inodes_mutex...
182
+ */
183
+ mutex_lock(&mdsc->quotarealms_inodes_mutex);
184
+ while (!RB_EMPTY_ROOT(&mdsc->quotarealms_inodes)) {
185
+ node = rb_first(&mdsc->quotarealms_inodes);
186
+ qri = rb_entry(node, struct ceph_quotarealm_inode, node);
187
+ rb_erase(node, &mdsc->quotarealms_inodes);
188
+ iput(qri->inode);
189
+ kfree(qri);
190
+ }
191
+ mutex_unlock(&mdsc->quotarealms_inodes_mutex);
82192 }
83193
84194 /*
....@@ -89,9 +199,15 @@
89199 *
90200 * Note that the caller is responsible for calling ceph_put_snap_realm() on the
91201 * returned realm.
202
+ *
203
+ * Callers of this function need to hold mdsc->snap_rwsem. However, if there's
204
+ * a need to do an inode lookup, this rwsem will be temporarily dropped. Hence
205
+ * the 'retry' argument: if rwsem needs to be dropped and 'retry' is 'false'
206
+ * this function will return -EAGAIN; otherwise, the snaprealms walk-through
207
+ * will be restarted.
92208 */
93209 static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
94
- struct inode *inode)
210
+ struct inode *inode, bool retry)
95211 {
96212 struct ceph_inode_info *ci = NULL;
97213 struct ceph_snap_realm *realm, *next;
....@@ -101,6 +217,7 @@
101217 if (ceph_snap(inode) != CEPH_NOSNAP)
102218 return NULL;
103219
220
+restart:
104221 realm = ceph_inode(inode)->i_snap_realm;
105222 if (realm)
106223 ceph_get_snap_realm(mdsc, realm);
....@@ -108,15 +225,30 @@
108225 pr_err_ratelimited("get_quota_realm: ino (%llx.%llx) "
109226 "null i_snap_realm\n", ceph_vinop(inode));
110227 while (realm) {
228
+ bool has_inode;
229
+
111230 spin_lock(&realm->inodes_with_caps_lock);
112
- in = realm->inode ? igrab(realm->inode) : NULL;
231
+ has_inode = realm->inode;
232
+ in = has_inode ? igrab(realm->inode) : NULL;
113233 spin_unlock(&realm->inodes_with_caps_lock);
114
- if (!in)
234
+ if (has_inode && !in)
115235 break;
236
+ if (!in) {
237
+ up_read(&mdsc->snap_rwsem);
238
+ in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm);
239
+ down_read(&mdsc->snap_rwsem);
240
+ if (IS_ERR_OR_NULL(in))
241
+ break;
242
+ ceph_put_snap_realm(mdsc, realm);
243
+ if (!retry)
244
+ return ERR_PTR(-EAGAIN);
245
+ goto restart;
246
+ }
116247
117248 ci = ceph_inode(in);
118249 has_quota = __ceph_has_any_quota(ci);
119
- iput(in);
250
+ /* avoid calling iput_final() while holding mdsc->snap_rwsem */
251
+ ceph_async_iput(in);
120252
121253 next = realm->parent;
122254 if (has_quota || !next)
....@@ -132,15 +264,28 @@
132264 return NULL;
133265 }
134266
135
-bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
267
+static bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
136268 {
137
- struct ceph_mds_client *mdsc = ceph_inode_to_client(old)->mdsc;
269
+ struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old->i_sb);
138270 struct ceph_snap_realm *old_realm, *new_realm;
139271 bool is_same;
140272
273
+restart:
274
+ /*
275
+ * We need to lookup 2 quota realms atomically, i.e. with snap_rwsem.
276
+ * However, get_quota_realm may drop it temporarily. By setting the
277
+ * 'retry' parameter to 'false', we'll get -EAGAIN if the rwsem was
278
+ * dropped and we can then restart the whole operation.
279
+ */
141280 down_read(&mdsc->snap_rwsem);
142
- old_realm = get_quota_realm(mdsc, old);
143
- new_realm = get_quota_realm(mdsc, new);
281
+ old_realm = get_quota_realm(mdsc, old, true);
282
+ new_realm = get_quota_realm(mdsc, new, false);
283
+ if (PTR_ERR(new_realm) == -EAGAIN) {
284
+ up_read(&mdsc->snap_rwsem);
285
+ if (old_realm)
286
+ ceph_put_snap_realm(mdsc, old_realm);
287
+ goto restart;
288
+ }
144289 is_same = (old_realm == new_realm);
145290 up_read(&mdsc->snap_rwsem);
146291
....@@ -168,7 +313,7 @@
168313 static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op,
169314 loff_t delta)
170315 {
171
- struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
316
+ struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
172317 struct ceph_inode_info *ci;
173318 struct ceph_snap_realm *realm, *next;
174319 struct inode *in;
....@@ -179,6 +324,7 @@
179324 return false;
180325
181326 down_read(&mdsc->snap_rwsem);
327
+restart:
182328 realm = ceph_inode(inode)->i_snap_realm;
183329 if (realm)
184330 ceph_get_snap_realm(mdsc, realm);
....@@ -186,12 +332,23 @@
186332 pr_err_ratelimited("check_quota_exceeded: ino (%llx.%llx) "
187333 "null i_snap_realm\n", ceph_vinop(inode));
188334 while (realm) {
189
- spin_lock(&realm->inodes_with_caps_lock);
190
- in = realm->inode ? igrab(realm->inode) : NULL;
191
- spin_unlock(&realm->inodes_with_caps_lock);
192
- if (!in)
193
- break;
335
+ bool has_inode;
194336
337
+ spin_lock(&realm->inodes_with_caps_lock);
338
+ has_inode = realm->inode;
339
+ in = has_inode ? igrab(realm->inode) : NULL;
340
+ spin_unlock(&realm->inodes_with_caps_lock);
341
+ if (has_inode && !in)
342
+ break;
343
+ if (!in) {
344
+ up_read(&mdsc->snap_rwsem);
345
+ in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm);
346
+ down_read(&mdsc->snap_rwsem);
347
+ if (IS_ERR_OR_NULL(in))
348
+ break;
349
+ ceph_put_snap_realm(mdsc, realm);
350
+ goto restart;
351
+ }
195352 ci = ceph_inode(in);
196353 spin_lock(&ci->i_ceph_lock);
197354 if (op == QUOTA_CHECK_MAX_FILES_OP) {
....@@ -204,8 +361,6 @@
204361 spin_unlock(&ci->i_ceph_lock);
205362 switch (op) {
206363 case QUOTA_CHECK_MAX_FILES_OP:
207
- exceeded = (max && (rvalue >= max));
208
- break;
209364 case QUOTA_CHECK_MAX_BYTES_OP:
210365 exceeded = (max && (rvalue + delta > max));
211366 break;
....@@ -228,7 +383,8 @@
228383 pr_warn("Invalid quota check op (%d)\n", op);
229384 exceeded = true; /* Just break the loop */
230385 }
231
- iput(in);
386
+ /* avoid calling iput_final() while holding mdsc->snap_rwsem */
387
+ ceph_async_iput(in);
232388
233389 next = realm->parent;
234390 if (exceeded || !next)
....@@ -259,7 +415,7 @@
259415
260416 WARN_ON(!S_ISDIR(inode->i_mode));
261417
262
- return check_quota_exceeded(inode, QUOTA_CHECK_MAX_FILES_OP, 0);
418
+ return check_quota_exceeded(inode, QUOTA_CHECK_MAX_FILES_OP, 1);
263419 }
264420
265421 /*
....@@ -327,7 +483,7 @@
327483 bool is_updated = false;
328484
329485 down_read(&mdsc->snap_rwsem);
330
- realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root));
486
+ realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root), true);
331487 up_read(&mdsc->snap_rwsem);
332488 if (!realm)
333489 return false;
....@@ -360,3 +516,59 @@
360516 return is_updated;
361517 }
362518
519
+/*
520
+ * ceph_quota_check_rename - check if a rename can be executed
521
+ * @mdsc: MDS client instance
522
+ * @old: inode to be copied
523
+ * @new: destination inode (directory)
524
+ *
525
+ * This function verifies if a rename (e.g. moving a file or directory) can be
526
+ * executed. It forces an rstat update in the @new target directory (and in the
527
+ * source @old as well, if it's a directory). The actual check is done both for
528
+ * max_files and max_bytes.
529
+ *
530
+ * This function returns 0 if it's OK to do the rename, or, if quotas are
531
+ * exceeded, -EXDEV (if @old is a directory) or -EDQUOT.
532
+ */
533
+int ceph_quota_check_rename(struct ceph_mds_client *mdsc,
534
+ struct inode *old, struct inode *new)
535
+{
536
+ struct ceph_inode_info *ci_old = ceph_inode(old);
537
+ int ret = 0;
538
+
539
+ if (ceph_quota_is_same_realm(old, new))
540
+ return 0;
541
+
542
+ /*
543
+ * Get the latest rstat for target directory (and for source, if a
544
+ * directory)
545
+ */
546
+ ret = ceph_do_getattr(new, CEPH_STAT_RSTAT, false);
547
+ if (ret)
548
+ return ret;
549
+
550
+ if (S_ISDIR(old->i_mode)) {
551
+ ret = ceph_do_getattr(old, CEPH_STAT_RSTAT, false);
552
+ if (ret)
553
+ return ret;
554
+ ret = check_quota_exceeded(new, QUOTA_CHECK_MAX_BYTES_OP,
555
+ ci_old->i_rbytes);
556
+ if (!ret)
557
+ ret = check_quota_exceeded(new,
558
+ QUOTA_CHECK_MAX_FILES_OP,
559
+ ci_old->i_rfiles +
560
+ ci_old->i_rsubdirs);
561
+ if (ret)
562
+ ret = -EXDEV;
563
+ } else {
564
+ ret = check_quota_exceeded(new, QUOTA_CHECK_MAX_BYTES_OP,
565
+ i_size_read(old));
566
+ if (!ret)
567
+ ret = check_quota_exceeded(new,
568
+ QUOTA_CHECK_MAX_FILES_OP, 1);
569
+ if (ret)
570
+ ret = -EDQUOT;
571
+ }
572
+
573
+ return ret;
574
+}