hc
2024-05-10 9999e48639b3cecb08ffb37358bcba3b48161b29
kernel/fs/ceph/export.c
....@@ -22,18 +22,77 @@
2222 u64 ino, parent_ino;
2323 } __attribute__ ((packed));
2424
25
+/*
26
+ * fh for snapped inode
27
+ */
28
+struct ceph_nfs_snapfh {
29
+ u64 ino;
30
+ u64 snapid;
31
+ u64 parent_ino;
32
+ u32 hash;
33
+} __attribute__ ((packed));
34
+
35
+static int ceph_encode_snapfh(struct inode *inode, u32 *rawfh, int *max_len,
36
+ struct inode *parent_inode)
37
+{
38
+ static const int snap_handle_length =
39
+ sizeof(struct ceph_nfs_snapfh) >> 2;
40
+ struct ceph_nfs_snapfh *sfh = (void *)rawfh;
41
+ u64 snapid = ceph_snap(inode);
42
+ int ret;
43
+ bool no_parent = true;
44
+
45
+ if (*max_len < snap_handle_length) {
46
+ *max_len = snap_handle_length;
47
+ ret = FILEID_INVALID;
48
+ goto out;
49
+ }
50
+
51
+ ret = -EINVAL;
52
+ if (snapid != CEPH_SNAPDIR) {
53
+ struct inode *dir;
54
+ struct dentry *dentry = d_find_alias(inode);
55
+ if (!dentry)
56
+ goto out;
57
+
58
+ rcu_read_lock();
59
+ dir = d_inode_rcu(dentry->d_parent);
60
+ if (ceph_snap(dir) != CEPH_SNAPDIR) {
61
+ sfh->parent_ino = ceph_ino(dir);
62
+ sfh->hash = ceph_dentry_hash(dir, dentry);
63
+ no_parent = false;
64
+ }
65
+ rcu_read_unlock();
66
+ dput(dentry);
67
+ }
68
+
69
+ if (no_parent) {
70
+ if (!S_ISDIR(inode->i_mode))
71
+ goto out;
72
+ sfh->parent_ino = sfh->ino;
73
+ sfh->hash = 0;
74
+ }
75
+ sfh->ino = ceph_ino(inode);
76
+ sfh->snapid = snapid;
77
+
78
+ *max_len = snap_handle_length;
79
+ ret = FILEID_BTRFS_WITH_PARENT;
80
+out:
81
+ dout("encode_snapfh %llx.%llx ret=%d\n", ceph_vinop(inode), ret);
82
+ return ret;
83
+}
84
+
2585 static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
2686 struct inode *parent_inode)
2787 {
88
+ static const int handle_length =
89
+ sizeof(struct ceph_nfs_fh) >> 2;
90
+ static const int connected_handle_length =
91
+ sizeof(struct ceph_nfs_confh) >> 2;
2892 int type;
29
- struct ceph_nfs_fh *fh = (void *)rawfh;
30
- struct ceph_nfs_confh *cfh = (void *)rawfh;
31
- int connected_handle_length = sizeof(*cfh)/4;
32
- int handle_length = sizeof(*fh)/4;
3393
34
- /* don't re-export snaps */
3594 if (ceph_snap(inode) != CEPH_NOSNAP)
36
- return -EINVAL;
95
+ return ceph_encode_snapfh(inode, rawfh, max_len, parent_inode);
3796
3897 if (parent_inode && (*max_len < connected_handle_length)) {
3998 *max_len = connected_handle_length;
....@@ -44,6 +103,7 @@
44103 }
45104
46105 if (parent_inode) {
106
+ struct ceph_nfs_confh *cfh = (void *)rawfh;
47107 dout("encode_fh %llx with parent %llx\n",
48108 ceph_ino(inode), ceph_ino(parent_inode));
49109 cfh->ino = ceph_ino(inode);
....@@ -51,6 +111,7 @@
51111 *max_len = connected_handle_length;
52112 type = FILEID_INO32_GEN_PARENT;
53113 } else {
114
+ struct ceph_nfs_fh *fh = (void *)rawfh;
54115 dout("encode_fh %llx\n", ceph_ino(inode));
55116 fh->ino = ceph_ino(inode);
56117 *max_len = handle_length;
....@@ -59,7 +120,7 @@
59120 return type;
60121 }
61122
62
-static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
123
+static struct inode *__lookup_inode(struct super_block *sb, u64 ino)
63124 {
64125 struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
65126 struct inode *inode;
....@@ -68,6 +129,10 @@
68129
69130 vino.ino = ino;
70131 vino.snap = CEPH_NOSNAP;
132
+
133
+ if (ceph_vino_is_reserved(vino))
134
+ return ERR_PTR(-ESTALE);
135
+
71136 inode = ceph_find_inode(sb, vino);
72137 if (!inode) {
73138 struct ceph_mds_request *req;
....@@ -81,7 +146,7 @@
81146 mask = CEPH_STAT_CAP_INODE;
82147 if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
83148 mask |= CEPH_CAP_XATTR_SHARED;
84
- req->r_args.getattr.mask = cpu_to_le32(mask);
149
+ req->r_args.lookupino.mask = cpu_to_le32(mask);
85150
86151 req->r_ino1 = vino;
87152 req->r_num_caps = 1;
....@@ -91,14 +156,125 @@
91156 ihold(inode);
92157 ceph_mdsc_put_request(req);
93158 if (!inode)
94
- return ERR_PTR(-ESTALE);
95
- if (inode->i_nlink == 0) {
96
- iput(inode);
97
- return ERR_PTR(-ESTALE);
159
+ return err < 0 ? ERR_PTR(err) : ERR_PTR(-ESTALE);
160
+ }
161
+ return inode;
162
+}
163
+
164
+struct inode *ceph_lookup_inode(struct super_block *sb, u64 ino)
165
+{
166
+ struct inode *inode = __lookup_inode(sb, ino);
167
+ if (IS_ERR(inode))
168
+ return inode;
169
+ if (inode->i_nlink == 0) {
170
+ iput(inode);
171
+ return ERR_PTR(-ESTALE);
172
+ }
173
+ return inode;
174
+}
175
+
176
+static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
177
+{
178
+ struct inode *inode = __lookup_inode(sb, ino);
179
+ int err;
180
+
181
+ if (IS_ERR(inode))
182
+ return ERR_CAST(inode);
183
+ /* We need LINK caps to reliably check i_nlink */
184
+ err = ceph_do_getattr(inode, CEPH_CAP_LINK_SHARED, false);
185
+ if (err) {
186
+ iput(inode);
187
+ return ERR_PTR(err);
188
+ }
189
+ /* -ESTALE if inode as been unlinked and no file is open */
190
+ if ((inode->i_nlink == 0) && (atomic_read(&inode->i_count) == 1)) {
191
+ iput(inode);
192
+ return ERR_PTR(-ESTALE);
193
+ }
194
+ return d_obtain_alias(inode);
195
+}
196
+
197
+static struct dentry *__snapfh_to_dentry(struct super_block *sb,
198
+ struct ceph_nfs_snapfh *sfh,
199
+ bool want_parent)
200
+{
201
+ struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
202
+ struct ceph_mds_request *req;
203
+ struct inode *inode;
204
+ struct ceph_vino vino;
205
+ int mask;
206
+ int err;
207
+ bool unlinked = false;
208
+
209
+ if (want_parent) {
210
+ vino.ino = sfh->parent_ino;
211
+ if (sfh->snapid == CEPH_SNAPDIR)
212
+ vino.snap = CEPH_NOSNAP;
213
+ else if (sfh->ino == sfh->parent_ino)
214
+ vino.snap = CEPH_SNAPDIR;
215
+ else
216
+ vino.snap = sfh->snapid;
217
+ } else {
218
+ vino.ino = sfh->ino;
219
+ vino.snap = sfh->snapid;
220
+ }
221
+
222
+ if (ceph_vino_is_reserved(vino))
223
+ return ERR_PTR(-ESTALE);
224
+
225
+ inode = ceph_find_inode(sb, vino);
226
+ if (inode)
227
+ return d_obtain_alias(inode);
228
+
229
+ req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
230
+ USE_ANY_MDS);
231
+ if (IS_ERR(req))
232
+ return ERR_CAST(req);
233
+
234
+ mask = CEPH_STAT_CAP_INODE;
235
+ if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
236
+ mask |= CEPH_CAP_XATTR_SHARED;
237
+ req->r_args.lookupino.mask = cpu_to_le32(mask);
238
+ if (vino.snap < CEPH_NOSNAP) {
239
+ req->r_args.lookupino.snapid = cpu_to_le64(vino.snap);
240
+ if (!want_parent && sfh->ino != sfh->parent_ino) {
241
+ req->r_args.lookupino.parent =
242
+ cpu_to_le64(sfh->parent_ino);
243
+ req->r_args.lookupino.hash =
244
+ cpu_to_le32(sfh->hash);
98245 }
99246 }
100247
101
- return d_obtain_alias(inode);
248
+ req->r_ino1 = vino;
249
+ req->r_num_caps = 1;
250
+ err = ceph_mdsc_do_request(mdsc, NULL, req);
251
+ inode = req->r_target_inode;
252
+ if (inode) {
253
+ if (vino.snap == CEPH_SNAPDIR) {
254
+ if (inode->i_nlink == 0)
255
+ unlinked = true;
256
+ inode = ceph_get_snapdir(inode);
257
+ } else if (ceph_snap(inode) == vino.snap) {
258
+ ihold(inode);
259
+ } else {
260
+ /* mds does not support lookup snapped inode */
261
+ err = -EOPNOTSUPP;
262
+ inode = NULL;
263
+ }
264
+ }
265
+ ceph_mdsc_put_request(req);
266
+
267
+ if (want_parent) {
268
+ dout("snapfh_to_parent %llx.%llx\n err=%d\n",
269
+ vino.ino, vino.snap, err);
270
+ } else {
271
+ dout("snapfh_to_dentry %llx.%llx parent %llx hash %x err=%d",
272
+ vino.ino, vino.snap, sfh->parent_ino, sfh->hash, err);
273
+ }
274
+ if (!inode)
275
+ return ERR_PTR(-ESTALE);
276
+ /* see comments in ceph_get_parent() */
277
+ return unlinked ? d_obtain_root(inode) : d_obtain_alias(inode);
102278 }
103279
104280 /*
....@@ -109,6 +285,11 @@
109285 int fh_len, int fh_type)
110286 {
111287 struct ceph_nfs_fh *fh = (void *)fid->raw;
288
+
289
+ if (fh_type == FILEID_BTRFS_WITH_PARENT) {
290
+ struct ceph_nfs_snapfh *sfh = (void *)fid->raw;
291
+ return __snapfh_to_dentry(sb, sfh, false);
292
+ }
112293
113294 if (fh_type != FILEID_INO32_GEN &&
114295 fh_type != FILEID_INO32_GEN_PARENT)
....@@ -168,13 +349,49 @@
168349
169350 static struct dentry *ceph_get_parent(struct dentry *child)
170351 {
171
- /* don't re-export snaps */
172
- if (ceph_snap(d_inode(child)) != CEPH_NOSNAP)
173
- return ERR_PTR(-EINVAL);
352
+ struct inode *inode = d_inode(child);
353
+ struct dentry *dn;
174354
175
- dout("get_parent %p ino %llx.%llx\n",
176
- child, ceph_vinop(d_inode(child)));
177
- return __get_parent(child->d_sb, child, 0);
355
+ if (ceph_snap(inode) != CEPH_NOSNAP) {
356
+ struct inode* dir;
357
+ bool unlinked = false;
358
+ /* do not support non-directory */
359
+ if (!d_is_dir(child)) {
360
+ dn = ERR_PTR(-EINVAL);
361
+ goto out;
362
+ }
363
+ dir = __lookup_inode(inode->i_sb, ceph_ino(inode));
364
+ if (IS_ERR(dir)) {
365
+ dn = ERR_CAST(dir);
366
+ goto out;
367
+ }
368
+ /* There can be multiple paths to access snapped inode.
369
+ * For simplicity, treat snapdir of head inode as parent */
370
+ if (ceph_snap(inode) != CEPH_SNAPDIR) {
371
+ struct inode *snapdir = ceph_get_snapdir(dir);
372
+ if (dir->i_nlink == 0)
373
+ unlinked = true;
374
+ iput(dir);
375
+ if (IS_ERR(snapdir)) {
376
+ dn = ERR_CAST(snapdir);
377
+ goto out;
378
+ }
379
+ dir = snapdir;
380
+ }
381
+ /* If directory has already been deleted, futher get_parent
382
+ * will fail. Do not mark snapdir dentry as disconnected,
383
+ * this prevent exportfs from doing futher get_parent. */
384
+ if (unlinked)
385
+ dn = d_obtain_root(dir);
386
+ else
387
+ dn = d_obtain_alias(dir);
388
+ } else {
389
+ dn = __get_parent(child->d_sb, child, 0);
390
+ }
391
+out:
392
+ dout("get_parent %p ino %llx.%llx err=%ld\n",
393
+ child, ceph_vinop(inode), (long)PTR_ERR_OR_ZERO(dn));
394
+ return dn;
178395 }
179396
180397 /*
....@@ -186,6 +403,11 @@
186403 {
187404 struct ceph_nfs_confh *cfh = (void *)fid->raw;
188405 struct dentry *dentry;
406
+
407
+ if (fh_type == FILEID_BTRFS_WITH_PARENT) {
408
+ struct ceph_nfs_snapfh *sfh = (void *)fid->raw;
409
+ return __snapfh_to_dentry(sb, sfh, true);
410
+ }
189411
190412 if (fh_type != FILEID_INO32_GEN_PARENT)
191413 return NULL;
....@@ -199,14 +421,115 @@
199421 return dentry;
200422 }
201423
424
+static int __get_snap_name(struct dentry *parent, char *name,
425
+ struct dentry *child)
426
+{
427
+ struct inode *inode = d_inode(child);
428
+ struct inode *dir = d_inode(parent);
429
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
430
+ struct ceph_mds_request *req = NULL;
431
+ char *last_name = NULL;
432
+ unsigned next_offset = 2;
433
+ int err = -EINVAL;
434
+
435
+ if (ceph_ino(inode) != ceph_ino(dir))
436
+ goto out;
437
+ if (ceph_snap(inode) == CEPH_SNAPDIR) {
438
+ if (ceph_snap(dir) == CEPH_NOSNAP) {
439
+ strcpy(name, fsc->mount_options->snapdir_name);
440
+ err = 0;
441
+ }
442
+ goto out;
443
+ }
444
+ if (ceph_snap(dir) != CEPH_SNAPDIR)
445
+ goto out;
446
+
447
+ while (1) {
448
+ struct ceph_mds_reply_info_parsed *rinfo;
449
+ struct ceph_mds_reply_dir_entry *rde;
450
+ int i;
451
+
452
+ req = ceph_mdsc_create_request(fsc->mdsc, CEPH_MDS_OP_LSSNAP,
453
+ USE_AUTH_MDS);
454
+ if (IS_ERR(req)) {
455
+ err = PTR_ERR(req);
456
+ req = NULL;
457
+ goto out;
458
+ }
459
+ err = ceph_alloc_readdir_reply_buffer(req, inode);
460
+ if (err)
461
+ goto out;
462
+
463
+ req->r_direct_mode = USE_AUTH_MDS;
464
+ req->r_readdir_offset = next_offset;
465
+ req->r_args.readdir.flags =
466
+ cpu_to_le16(CEPH_READDIR_REPLY_BITFLAGS);
467
+ if (last_name) {
468
+ req->r_path2 = last_name;
469
+ last_name = NULL;
470
+ }
471
+
472
+ req->r_inode = dir;
473
+ ihold(dir);
474
+ req->r_dentry = dget(parent);
475
+
476
+ inode_lock(dir);
477
+ err = ceph_mdsc_do_request(fsc->mdsc, NULL, req);
478
+ inode_unlock(dir);
479
+
480
+ if (err < 0)
481
+ goto out;
482
+
483
+ rinfo = &req->r_reply_info;
484
+ for (i = 0; i < rinfo->dir_nr; i++) {
485
+ rde = rinfo->dir_entries + i;
486
+ BUG_ON(!rde->inode.in);
487
+ if (ceph_snap(inode) ==
488
+ le64_to_cpu(rde->inode.in->snapid)) {
489
+ memcpy(name, rde->name, rde->name_len);
490
+ name[rde->name_len] = '\0';
491
+ err = 0;
492
+ goto out;
493
+ }
494
+ }
495
+
496
+ if (rinfo->dir_end)
497
+ break;
498
+
499
+ BUG_ON(rinfo->dir_nr <= 0);
500
+ rde = rinfo->dir_entries + (rinfo->dir_nr - 1);
501
+ next_offset += rinfo->dir_nr;
502
+ last_name = kstrndup(rde->name, rde->name_len, GFP_KERNEL);
503
+ if (!last_name) {
504
+ err = -ENOMEM;
505
+ goto out;
506
+ }
507
+
508
+ ceph_mdsc_put_request(req);
509
+ req = NULL;
510
+ }
511
+ err = -ENOENT;
512
+out:
513
+ if (req)
514
+ ceph_mdsc_put_request(req);
515
+ kfree(last_name);
516
+ dout("get_snap_name %p ino %llx.%llx err=%d\n",
517
+ child, ceph_vinop(inode), err);
518
+ return err;
519
+}
520
+
202521 static int ceph_get_name(struct dentry *parent, char *name,
203522 struct dentry *child)
204523 {
205524 struct ceph_mds_client *mdsc;
206525 struct ceph_mds_request *req;
526
+ struct inode *inode = d_inode(child);
207527 int err;
208528
209
- mdsc = ceph_inode_to_client(d_inode(child))->mdsc;
529
+ if (ceph_snap(inode) != CEPH_NOSNAP)
530
+ return __get_snap_name(parent, name, child);
531
+
532
+ mdsc = ceph_inode_to_client(inode)->mdsc;
210533 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME,
211534 USE_ANY_MDS);
212535 if (IS_ERR(req))
....@@ -214,8 +537,8 @@
214537
215538 inode_lock(d_inode(parent));
216539
217
- req->r_inode = d_inode(child);
218
- ihold(d_inode(child));
540
+ req->r_inode = inode;
541
+ ihold(inode);
219542 req->r_ino2 = ceph_vino(d_inode(parent));
220543 req->r_parent = d_inode(parent);
221544 set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
....@@ -229,10 +552,10 @@
229552 memcpy(name, rinfo->dname, rinfo->dname_len);
230553 name[rinfo->dname_len] = 0;
231554 dout("get_name %p ino %llx.%llx name %s\n",
232
- child, ceph_vinop(d_inode(child)), name);
555
+ child, ceph_vinop(inode), name);
233556 } else {
234557 dout("get_name %p ino %llx.%llx err %d\n",
235
- child, ceph_vinop(d_inode(child)), err);
558
+ child, ceph_vinop(inode), err);
236559 }
237560
238561 ceph_mdsc_put_request(req);