hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/fs/ceph/super.h
....@@ -16,6 +16,7 @@
1616 #include <linux/slab.h>
1717 #include <linux/posix_acl.h>
1818 #include <linux/refcount.h>
19
+#include <linux/security.h>
1920
2021 #include <linux/ceph/libceph.h>
2122
....@@ -31,6 +32,7 @@
3132 #define CEPH_BLOCK_SHIFT 22 /* 4 MB */
3233 #define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT)
3334
35
+#define CEPH_MOUNT_OPT_CLEANRECOVER (1<<1) /* auto reonnect (clean mode) after blocklisted */
3436 #define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */
3537 #define CEPH_MOUNT_OPT_RBYTES (1<<5) /* dir st_bytes = rbytes */
3638 #define CEPH_MOUNT_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */
....@@ -40,11 +42,17 @@
4042 #define CEPH_MOUNT_OPT_NOPOOLPERM (1<<11) /* no pool permission check */
4143 #define CEPH_MOUNT_OPT_MOUNTWAIT (1<<12) /* mount waits if no mds is up */
4244 #define CEPH_MOUNT_OPT_NOQUOTADF (1<<13) /* no root dir quota in statfs */
45
+#define CEPH_MOUNT_OPT_NOCOPYFROM (1<<14) /* don't use RADOS 'copy-from' op */
46
+#define CEPH_MOUNT_OPT_ASYNC_DIROPS (1<<15) /* allow async directory ops */
4347
44
-#define CEPH_MOUNT_OPT_DEFAULT CEPH_MOUNT_OPT_DCACHE
48
+#define CEPH_MOUNT_OPT_DEFAULT \
49
+ (CEPH_MOUNT_OPT_DCACHE | \
50
+ CEPH_MOUNT_OPT_NOCOPYFROM)
4551
4652 #define ceph_set_mount_opt(fsc, opt) \
47
- (fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt;
53
+ (fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt
54
+#define ceph_clear_mount_opt(fsc, opt) \
55
+ (fsc)->mount_options->flags &= ~CEPH_MOUNT_OPT_##opt
4856 #define ceph_test_mount_opt(fsc, opt) \
4957 (!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt))
5058
....@@ -68,16 +76,16 @@
6876 #define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */
6977
7078 struct ceph_mount_options {
71
- int flags;
72
- int sb_flags;
79
+ unsigned int flags;
7380
74
- int wsize; /* max write size */
75
- int rsize; /* max read size */
76
- int rasize; /* max readahead */
77
- int congestion_kb; /* max writeback in flight */
78
- int caps_wanted_delay_min, caps_wanted_delay_max;
79
- int max_readdir; /* max readdir result (entires) */
80
- int max_readdir_bytes; /* max readdir result (bytes) */
81
+ unsigned int wsize; /* max write size */
82
+ unsigned int rsize; /* max read size */
83
+ unsigned int rasize; /* max readahead */
84
+ unsigned int congestion_kb; /* max writeback in flight */
85
+ unsigned int caps_wanted_delay_min, caps_wanted_delay_max;
86
+ int caps_max;
87
+ unsigned int max_readdir; /* max readdir result (entries) */
88
+ unsigned int max_readdir_bytes; /* max readdir result (bytes) */
8189
8290 /*
8391 * everything above this point can be memcmp'd; everything below
....@@ -93,27 +101,34 @@
93101 struct ceph_fs_client {
94102 struct super_block *sb;
95103
104
+ struct list_head metric_wakeup;
105
+
96106 struct ceph_mount_options *mount_options;
97107 struct ceph_client *client;
98108
99109 unsigned long mount_state;
100
- int min_caps; /* min caps i added */
110
+
111
+ unsigned long last_auto_reconnect;
112
+ bool blocklisted;
113
+
114
+ bool have_copy_from2;
115
+
116
+ u32 filp_gen;
101117 loff_t max_file_size;
102118
103119 struct ceph_mds_client *mdsc;
104120
105
- /* writeback */
106
- mempool_t *wb_pagevec_pool;
107
- struct workqueue_struct *wb_wq;
108
- struct workqueue_struct *pg_inv_wq;
109
- struct workqueue_struct *trunc_wq;
110121 atomic_long_t writeback_count;
122
+
123
+ struct workqueue_struct *inode_wq;
124
+ struct workqueue_struct *cap_wq;
111125
112126 #ifdef CONFIG_DEBUG_FS
113127 struct dentry *debugfs_dentry_lru, *debugfs_caps;
114128 struct dentry *debugfs_congestion_kb;
115129 struct dentry *debugfs_bdi;
116130 struct dentry *debugfs_mdsc, *debugfs_mdsmap;
131
+ struct dentry *debugfs_metric;
117132 struct dentry *debugfs_mds_sessions;
118133 #endif
119134
....@@ -145,7 +160,8 @@
145160 int issued; /* latest, from the mds */
146161 int implemented; /* implemented superset of
147162 issued (for revocation) */
148
- int mds, mds_wanted;
163
+ int mds; /* mds index for this cap */
164
+ int mds_wanted; /* caps wanted from this mds */
149165 };
150166 /* caps to release */
151167 struct {
....@@ -159,14 +175,15 @@
159175 struct list_head caps_item;
160176 };
161177
162
-#define CHECK_CAPS_NODELAY 1 /* do not delay any further */
163
-#define CHECK_CAPS_AUTHONLY 2 /* only check auth cap */
164
-#define CHECK_CAPS_FLUSH 4 /* flush any dirty caps */
178
+#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */
179
+#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */
180
+#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */
165181
166182 struct ceph_cap_flush {
167183 u64 tid;
168
- int caps; /* 0 means capsnap */
184
+ int caps;
169185 bool wake; /* wake up flush waiters when finish ? */
186
+ bool is_capsnap; /* true means capsnap */
170187 struct list_head g_list; // global
171188 struct list_head i_list; // per inode
172189 };
....@@ -194,7 +211,8 @@
194211 u64 xattr_version;
195212
196213 u64 size;
197
- struct timespec64 mtime, atime, ctime;
214
+ u64 change_attr;
215
+ struct timespec64 mtime, atime, ctime, btime;
198216 u64 time_warp_seq;
199217 u64 truncate_size;
200218 u32 truncate_seq;
....@@ -257,16 +275,22 @@
257275 * Ceph dentry state
258276 */
259277 struct ceph_dentry_info {
278
+ struct dentry *dentry;
260279 struct ceph_mds_session *lease_session;
280
+ struct list_head lease_list;
281
+ unsigned flags;
261282 int lease_shared_gen;
262283 u32 lease_gen;
263284 u32 lease_seq;
264285 unsigned long lease_renew_after, lease_renew_from;
265
- struct list_head lru;
266
- struct dentry *dentry;
267286 unsigned long time;
268287 u64 offset;
269288 };
289
+
290
+#define CEPH_DENTRY_REFERENCED 1
291
+#define CEPH_DENTRY_LEASE_LIST 2
292
+#define CEPH_DENTRY_SHRINK_LIST 4
293
+#define CEPH_DENTRY_PRIMARY_LINK 8
270294
271295 struct ceph_inode_xattrs_info {
272296 /*
....@@ -298,13 +322,14 @@
298322 u64 i_inline_version;
299323 u32 i_time_warp_seq;
300324
301
- unsigned i_ceph_flags;
325
+ unsigned long i_ceph_flags;
302326 atomic64_t i_release_count;
303327 atomic64_t i_ordered_count;
304328 atomic64_t i_complete_seq[2];
305329
306330 struct ceph_dir_layout i_dir_layout;
307331 struct ceph_file_layout i_layout;
332
+ struct ceph_file_layout i_cached_layout; // for async creates
308333 char *i_symlink;
309334
310335 /* for dirs */
....@@ -314,6 +339,8 @@
314339
315340 /* quotas */
316341 u64 i_max_bytes, i_max_files;
342
+
343
+ s32 i_dir_pin;
317344
318345 struct rb_root i_fragtree;
319346 int i_fragtree_nsplits;
....@@ -326,14 +353,31 @@
326353 struct rb_root i_caps; /* cap list */
327354 struct ceph_cap *i_auth_cap; /* authoritative cap, if any */
328355 unsigned i_dirty_caps, i_flushing_caps; /* mask of dirtied fields */
329
- struct list_head i_dirty_item, i_flushing_item;
356
+
357
+ /*
358
+ * Link to the auth cap's session's s_cap_dirty list. s_cap_dirty
359
+ * is protected by the mdsc->cap_dirty_lock, but each individual item
360
+ * is also protected by the inode's i_ceph_lock. Walking s_cap_dirty
361
+ * requires the mdsc->cap_dirty_lock. List presence for an item can
362
+ * be tested under the i_ceph_lock. Changing anything requires both.
363
+ */
364
+ struct list_head i_dirty_item;
365
+
366
+ /*
367
+ * Link to session's s_cap_flushing list. Protected in a similar
368
+ * fashion to i_dirty_item, but also by the s_mutex for changes. The
369
+ * s_cap_flushing list can be walked while holding either the s_mutex
370
+ * or msdc->cap_dirty_lock. List presence can also be checked while
371
+ * holding the i_ceph_lock for this inode.
372
+ */
373
+ struct list_head i_flushing_item;
374
+
330375 /* we need to track cap writeback on a per-cap-bit basis, to allow
331376 * overlapping, pipelined cap flushes to the mds. we can probably
332377 * reduce the tid to 8 bits if we're concerned about inode size. */
333378 struct ceph_cap_flush *i_prealloc_cap_flush;
334379 struct list_head i_cap_flush_list;
335380 wait_queue_head_t i_cap_wq; /* threads waiting on a capability */
336
- unsigned long i_hold_caps_min; /* jiffies */
337381 unsigned long i_hold_caps_max; /* jiffies */
338382 struct list_head i_cap_delay_list; /* for delayed cap release to mds */
339383 struct ceph_cap_reservation i_cap_migration_resv;
....@@ -342,6 +386,8 @@
342386 dirty|flushing caps */
343387 unsigned i_snap_caps; /* cap bits for snapped files */
344388
389
+ unsigned long i_last_rd;
390
+ unsigned long i_last_wr;
345391 int i_nr_by_mode[CEPH_FILE_MODE_BITS]; /* open file counts */
346392
347393 struct mutex i_truncate_mutex;
....@@ -356,7 +402,7 @@
356402
357403 /* held references to caps */
358404 int i_pin_ref;
359
- int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wb_ref;
405
+ int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wb_ref, i_fx_ref;
360406 int i_wrbuffer_ref, i_wrbuffer_ref_head;
361407 atomic_t i_filelock_ref;
362408 atomic_t i_shared_gen; /* increment each time we get FILE_SHARED */
....@@ -367,15 +413,18 @@
367413 struct list_head i_unsafe_iops; /* uncommitted mds inode ops */
368414 spinlock_t i_unsafe_lock;
369415
370
- struct ceph_snap_realm *i_snap_realm; /* snap realm (if caps) */
416
+ union {
417
+ struct ceph_snap_realm *i_snap_realm; /* snap realm (if caps) */
418
+ struct ceph_snapid_map *i_snapid_map; /* snapid -> dev_t */
419
+ };
371420 int i_snap_realm_counter; /* snap realm (if caps) */
372421 struct list_head i_snap_realm_item;
373422 struct list_head i_snap_flush_item;
423
+ struct timespec64 i_btime;
424
+ struct timespec64 i_snap_btime;
374425
375
- struct work_struct i_wb_work; /* writeback work */
376
- struct work_struct i_pg_inv_work; /* page invalidation work */
377
-
378
- struct work_struct i_vmtruncate_work;
426
+ struct work_struct i_work;
427
+ unsigned long i_work_mask;
379428
380429 #ifdef CONFIG_CEPH_FSCACHE
381430 struct fscache_cookie *fscache;
....@@ -384,35 +433,37 @@
384433 struct inode vfs_inode; /* at end */
385434 };
386435
387
-static inline struct ceph_inode_info *ceph_inode(struct inode *inode)
436
+static inline struct ceph_inode_info *
437
+ceph_inode(const struct inode *inode)
388438 {
389439 return container_of(inode, struct ceph_inode_info, vfs_inode);
390440 }
391441
392
-static inline struct ceph_fs_client *ceph_inode_to_client(struct inode *inode)
442
+static inline struct ceph_fs_client *
443
+ceph_inode_to_client(const struct inode *inode)
393444 {
394445 return (struct ceph_fs_client *)inode->i_sb->s_fs_info;
395446 }
396447
397
-static inline struct ceph_fs_client *ceph_sb_to_client(struct super_block *sb)
448
+static inline struct ceph_fs_client *
449
+ceph_sb_to_client(const struct super_block *sb)
398450 {
399451 return (struct ceph_fs_client *)sb->s_fs_info;
400452 }
401453
402
-static inline struct ceph_vino ceph_vino(struct inode *inode)
454
+static inline struct ceph_mds_client *
455
+ceph_sb_to_mdsc(const struct super_block *sb)
456
+{
457
+ return (struct ceph_mds_client *)ceph_sb_to_client(sb)->mdsc;
458
+}
459
+
460
+static inline struct ceph_vino
461
+ceph_vino(const struct inode *inode)
403462 {
404463 return ceph_inode(inode)->i_vino;
405464 }
406465
407
-/*
408
- * ino_t is <64 bits on many architectures, blech.
409
- *
410
- * i_ino (kernel inode) st_ino (userspace)
411
- * i386 32 32
412
- * x86_64+ino32 64 32
413
- * x86_64 64 64
414
- */
415
-static inline u32 ceph_ino_to_ino32(__u64 vino)
466
+static inline u32 ceph_ino_to_ino32(u64 vino)
416467 {
417468 u32 ino = vino & 0xffffffff;
418469 ino ^= vino >> 32;
....@@ -422,34 +473,17 @@
422473 }
423474
424475 /*
425
- * kernel i_ino value
476
+ * Inode numbers in cephfs are 64 bits, but inode->i_ino is 32-bits on
477
+ * some arches. We generally do not use this value inside the ceph driver, but
478
+ * we do want to set it to something, so that generic vfs code has an
479
+ * appropriate value for tracepoints and the like.
426480 */
427
-static inline ino_t ceph_vino_to_ino(struct ceph_vino vino)
481
+static inline ino_t ceph_vino_to_ino_t(struct ceph_vino vino)
428482 {
429
-#if BITS_PER_LONG == 32
430
- return ceph_ino_to_ino32(vino.ino);
431
-#else
483
+ if (sizeof(ino_t) == sizeof(u32))
484
+ return ceph_ino_to_ino32(vino.ino);
432485 return (ino_t)vino.ino;
433
-#endif
434486 }
435
-
436
-/*
437
- * user-visible ino (stat, filldir)
438
- */
439
-#if BITS_PER_LONG == 32
440
-static inline ino_t ceph_translate_ino(struct super_block *sb, ino_t ino)
441
-{
442
- return ino;
443
-}
444
-#else
445
-static inline ino_t ceph_translate_ino(struct super_block *sb, ino_t ino)
446
-{
447
- if (ceph_test_mount_opt(ceph_sb_to_client(sb), INO32))
448
- ino = ceph_ino_to_ino32(ino);
449
- return ino;
450
-}
451
-#endif
452
-
453487
454488 /* for printf-style formatting */
455489 #define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap
....@@ -458,9 +492,32 @@
458492 {
459493 return ceph_inode(inode)->i_vino.ino;
460494 }
495
+
461496 static inline u64 ceph_snap(struct inode *inode)
462497 {
463498 return ceph_inode(inode)->i_vino.snap;
499
+}
500
+
501
+/**
502
+ * ceph_present_ino - format an inode number for presentation to userland
503
+ * @sb: superblock where the inode lives
504
+ * @ino: inode number to (possibly) convert
505
+ *
506
+ * If the user mounted with the ino32 option, then the 64-bit value needs
507
+ * to be converted to something that can fit inside 32 bits. Note that
508
+ * internal kernel code never uses this value, so this is entirely for
509
+ * userland consumption.
510
+ */
511
+static inline u64 ceph_present_ino(struct super_block *sb, u64 ino)
512
+{
513
+ if (unlikely(ceph_test_mount_opt(ceph_sb_to_client(sb), INO32)))
514
+ return ceph_ino_to_ino32(ino);
515
+ return ino;
516
+}
517
+
518
+static inline u64 ceph_present_inode(struct inode *inode)
519
+{
520
+ return ceph_present_ino(inode->i_sb, ceph_ino(inode));
464521 }
465522
466523 static inline int ceph_ino_compare(struct inode *inode, void *data)
....@@ -471,11 +528,40 @@
471528 ci->i_vino.snap == pvino->snap;
472529 }
473530
531
+/*
532
+ * The MDS reserves a set of inodes for its own usage. These should never
533
+ * be accessible by clients, and so the MDS has no reason to ever hand these
534
+ * out. The range is CEPH_MDS_INO_MDSDIR_OFFSET..CEPH_INO_SYSTEM_BASE.
535
+ *
536
+ * These come from src/mds/mdstypes.h in the ceph sources.
537
+ */
538
+#define CEPH_MAX_MDS 0x100
539
+#define CEPH_NUM_STRAY 10
540
+#define CEPH_MDS_INO_MDSDIR_OFFSET (1 * CEPH_MAX_MDS)
541
+#define CEPH_INO_SYSTEM_BASE ((6*CEPH_MAX_MDS) + (CEPH_MAX_MDS * CEPH_NUM_STRAY))
542
+
543
+static inline bool ceph_vino_is_reserved(const struct ceph_vino vino)
544
+{
545
+ if (vino.ino < CEPH_INO_SYSTEM_BASE &&
546
+ vino.ino >= CEPH_MDS_INO_MDSDIR_OFFSET) {
547
+ WARN_RATELIMIT(1, "Attempt to access reserved inode number 0x%llx", vino.ino);
548
+ return true;
549
+ }
550
+ return false;
551
+}
552
+
474553 static inline struct inode *ceph_find_inode(struct super_block *sb,
475554 struct ceph_vino vino)
476555 {
477
- ino_t t = ceph_vino_to_ino(vino);
478
- return ilookup5(sb, t, ceph_ino_compare, &vino);
556
+ if (ceph_vino_is_reserved(vino))
557
+ return NULL;
558
+
559
+ /*
560
+ * NB: The hashval will be run through the fs/inode.c hash function
561
+ * anyway, so there is no need to squash the inode number down to
562
+ * 32-bits first. Just use low-order bits on arches with 32-bit long.
563
+ */
564
+ return ilookup5(sb, (unsigned long)vino.ino, ceph_ino_compare, &vino);
479565 }
480566
481567
....@@ -483,19 +569,25 @@
483569 * Ceph inode.
484570 */
485571 #define CEPH_I_DIR_ORDERED (1 << 0) /* dentries in dir are ordered */
486
-#define CEPH_I_NODELAY (1 << 1) /* do not delay cap release */
487572 #define CEPH_I_FLUSH (1 << 2) /* do not delay flush of dirty metadata */
488
-#define CEPH_I_NOFLUSH (1 << 3) /* do not flush dirty caps */
489
-#define CEPH_I_POOL_PERM (1 << 4) /* pool rd/wr bits are valid */
490
-#define CEPH_I_POOL_RD (1 << 5) /* can read from pool */
491
-#define CEPH_I_POOL_WR (1 << 6) /* can write to pool */
492
-#define CEPH_I_SEC_INITED (1 << 7) /* security initialized */
493
-#define CEPH_I_CAP_DROPPED (1 << 8) /* caps were forcibly dropped */
494
-#define CEPH_I_KICK_FLUSH (1 << 9) /* kick flushing caps */
495
-#define CEPH_I_FLUSH_SNAPS (1 << 10) /* need flush snapss */
496
-#define CEPH_I_ERROR_WRITE (1 << 11) /* have seen write errors */
497
-#define CEPH_I_ERROR_FILELOCK (1 << 12) /* have seen file lock errors */
573
+#define CEPH_I_POOL_PERM (1 << 3) /* pool rd/wr bits are valid */
574
+#define CEPH_I_POOL_RD (1 << 4) /* can read from pool */
575
+#define CEPH_I_POOL_WR (1 << 5) /* can write to pool */
576
+#define CEPH_I_SEC_INITED (1 << 6) /* security initialized */
577
+#define CEPH_I_KICK_FLUSH (1 << 7) /* kick flushing caps */
578
+#define CEPH_I_FLUSH_SNAPS (1 << 8) /* need flush snapss */
579
+#define CEPH_I_ERROR_WRITE (1 << 9) /* have seen write errors */
580
+#define CEPH_I_ERROR_FILELOCK (1 << 10) /* have seen file lock errors */
581
+#define CEPH_I_ODIRECT (1 << 11) /* inode in direct I/O mode */
582
+#define CEPH_ASYNC_CREATE_BIT (12) /* async create in flight for this */
583
+#define CEPH_I_ASYNC_CREATE (1 << CEPH_ASYNC_CREATE_BIT)
498584
585
+/*
586
+ * Masks of ceph inode work.
587
+ */
588
+#define CEPH_I_WORK_WRITEBACK 0 /* writeback */
589
+#define CEPH_I_WORK_INVALIDATE_PAGES 1 /* invalidate pages */
590
+#define CEPH_I_WORK_VMTRUNCATE 2 /* vmtruncate */
499591
500592 /*
501593 * We set the ERROR_WRITE bit when we start seeing write errors on an inode
....@@ -589,7 +681,7 @@
589681 struct ceph_inode_frag *pfrag,
590682 int *found);
591683
592
-static inline struct ceph_dentry_info *ceph_dentry(struct dentry *dentry)
684
+static inline struct ceph_dentry_info *ceph_dentry(const struct dentry *dentry)
593685 {
594686 return (struct ceph_dentry_info *)dentry->d_fsdata;
595687 }
....@@ -604,6 +696,8 @@
604696
605697 extern int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented);
606698 extern int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int t);
699
+extern int __ceph_caps_issued_mask_metric(struct ceph_inode_info *ci, int mask,
700
+ int t);
607701 extern int __ceph_caps_issued_other(struct ceph_inode_info *ci,
608702 struct ceph_cap *cap);
609703
....@@ -616,12 +710,12 @@
616710 return issued;
617711 }
618712
619
-static inline int ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask,
620
- int touch)
713
+static inline int ceph_caps_issued_mask_metric(struct ceph_inode_info *ci,
714
+ int mask, int touch)
621715 {
622716 int r;
623717 spin_lock(&ci->i_ceph_lock);
624
- r = __ceph_caps_issued_mask(ci, mask, touch);
718
+ r = __ceph_caps_issued_mask_metric(ci, mask, touch);
625719 spin_unlock(&ci->i_ceph_lock);
626720 return r;
627721 }
....@@ -640,25 +734,20 @@
640734 extern int ceph_caps_revoking(struct ceph_inode_info *ci, int mask);
641735 extern int __ceph_caps_used(struct ceph_inode_info *ci);
642736
643
-extern int __ceph_caps_file_wanted(struct ceph_inode_info *ci);
644
-
645
-/*
646
- * wanted, by virtue of open file modes AND cap refs (buffered/cached data)
647
- */
648
-static inline int __ceph_caps_wanted(struct ceph_inode_info *ci)
737
+static inline bool __ceph_is_file_opened(struct ceph_inode_info *ci)
649738 {
650
- int w = __ceph_caps_file_wanted(ci) | __ceph_caps_used(ci);
651
- if (w & CEPH_CAP_FILE_BUFFER)
652
- w |= CEPH_CAP_FILE_EXCL; /* we want EXCL if dirty data */
653
- return w;
739
+ return ci->i_nr_by_mode[0];
654740 }
741
+extern int __ceph_caps_file_wanted(struct ceph_inode_info *ci);
742
+extern int __ceph_caps_wanted(struct ceph_inode_info *ci);
655743
656744 /* what the mds thinks we want */
657745 extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check);
658746
659747 extern void ceph_caps_init(struct ceph_mds_client *mdsc);
660748 extern void ceph_caps_finalize(struct ceph_mds_client *mdsc);
661
-extern void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta);
749
+extern void ceph_adjust_caps_max_min(struct ceph_mds_client *mdsc,
750
+ struct ceph_mount_options *fsopt);
662751 extern int ceph_reserve_caps(struct ceph_mds_client *mdsc,
663752 struct ceph_cap_reservation *ctx, int need);
664753 extern void ceph_unreserve_caps(struct ceph_mds_client *mdsc,
....@@ -681,6 +770,8 @@
681770
682771 spinlock_t rw_contexts_lock;
683772 struct list_head rw_contexts;
773
+
774
+ u32 filp_gen;
684775 };
685776
686777 struct ceph_dir_file_info {
....@@ -812,7 +903,7 @@
812903 * This allows larger machines to have larger/more transfers.
813904 * Limit the default to 256M
814905 */
815
- congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
906
+ congestion_kb = (16*int_sqrt(totalram_pages())) << (PAGE_SHIFT-10);
816907 if (congestion_kb > 256*1024)
817908 congestion_kb = 256*1024;
818909
....@@ -820,7 +911,8 @@
820911 }
821912
822913
823
-
914
+/* super.c */
915
+extern int ceph_force_reconnect(struct super_block *sb);
824916 /* snap.c */
825917 struct ceph_snap_realm *ceph_lookup_snap_realm(struct ceph_mds_client *mdsc,
826918 u64 ino);
....@@ -839,6 +931,14 @@
839931 struct ceph_cap_snap *capsnap);
840932 extern void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc);
841933
934
+extern struct ceph_snapid_map *ceph_get_snapid_map(struct ceph_mds_client *mdsc,
935
+ u64 snap);
936
+extern void ceph_put_snapid_map(struct ceph_mds_client* mdsc,
937
+ struct ceph_snapid_map *sm);
938
+extern void ceph_trim_snapid_map(struct ceph_mds_client *mdsc);
939
+extern void ceph_cleanup_snapid_map(struct ceph_mds_client *mdsc);
940
+
941
+
842942 /*
843943 * a cap_snap is "pending" if it is still awaiting an in-progress
844944 * sync write (that may/may not still update size, mtime, etc.).
....@@ -851,12 +951,14 @@
851951 }
852952
853953 /* inode.c */
954
+struct ceph_mds_reply_info_in;
955
+struct ceph_mds_reply_dirfrag;
956
+
854957 extern const struct inode_operations ceph_file_iops;
855958
856959 extern struct inode *ceph_alloc_inode(struct super_block *sb);
857960 extern void ceph_evict_inode(struct inode *inode);
858
-extern void ceph_destroy_inode(struct inode *inode);
859
-extern int ceph_drop_inode(struct inode *inode);
961
+extern void ceph_free_inode(struct inode *inode);
860962
861963 extern struct inode *ceph_get_inode(struct super_block *sb,
862964 struct ceph_vino vino);
....@@ -867,6 +969,11 @@
867969 u64 time_warp_seq, struct timespec64 *ctime,
868970 struct timespec64 *mtime,
869971 struct timespec64 *atime);
972
+extern int ceph_fill_inode(struct inode *inode, struct page *locked_page,
973
+ struct ceph_mds_reply_info_in *iinfo,
974
+ struct ceph_mds_reply_dirfrag *dirinfo,
975
+ struct ceph_mds_session *session, int cap_fmode,
976
+ struct ceph_cap_reservation *caps_reservation);
870977 extern int ceph_fill_trace(struct super_block *sb,
871978 struct ceph_mds_request *req);
872979 extern int ceph_readdir_prepopulate(struct ceph_mds_request *req,
....@@ -877,9 +984,9 @@
877984 extern bool ceph_inode_set_size(struct inode *inode, loff_t size);
878985 extern void __ceph_do_pending_vmtruncate(struct inode *inode);
879986 extern void ceph_queue_vmtruncate(struct inode *inode);
880
-
881987 extern void ceph_queue_invalidate(struct inode *inode);
882988 extern void ceph_queue_writeback(struct inode *inode);
989
+extern void ceph_async_iput(struct inode *inode);
883990
884991 extern int __ceph_do_getattr(struct inode *inode, struct page *locked_page,
885992 int mask, bool force);
....@@ -899,9 +1006,19 @@
8991006 extern ssize_t ceph_listxattr(struct dentry *, char *, size_t);
9001007 extern struct ceph_buffer *__ceph_build_xattrs_blob(struct ceph_inode_info *ci);
9011008 extern void __ceph_destroy_xattrs(struct ceph_inode_info *ci);
902
-extern void __init ceph_xattr_init(void);
903
-extern void ceph_xattr_exit(void);
9041009 extern const struct xattr_handler *ceph_xattr_handlers[];
1010
+
1011
+struct ceph_acl_sec_ctx {
1012
+#ifdef CONFIG_CEPH_FS_POSIX_ACL
1013
+ void *default_acl;
1014
+ void *acl;
1015
+#endif
1016
+#ifdef CONFIG_CEPH_FS_SECURITY_LABEL
1017
+ void *sec_ctx;
1018
+ u32 sec_ctxlen;
1019
+#endif
1020
+ struct ceph_pagelist *pagelist;
1021
+};
9051022
9061023 #ifdef CONFIG_SECURITY
9071024 extern bool ceph_security_xattr_deadlock(struct inode *in);
....@@ -917,21 +1034,35 @@
9171034 }
9181035 #endif
9191036
920
-/* acl.c */
921
-struct ceph_acls_info {
922
- void *default_acl;
923
- void *acl;
924
- struct ceph_pagelist *pagelist;
925
-};
1037
+#ifdef CONFIG_CEPH_FS_SECURITY_LABEL
1038
+extern int ceph_security_init_secctx(struct dentry *dentry, umode_t mode,
1039
+ struct ceph_acl_sec_ctx *ctx);
1040
+static inline void ceph_security_invalidate_secctx(struct inode *inode)
1041
+{
1042
+ security_inode_invalidate_secctx(inode);
1043
+}
1044
+#else
1045
+static inline int ceph_security_init_secctx(struct dentry *dentry, umode_t mode,
1046
+ struct ceph_acl_sec_ctx *ctx)
1047
+{
1048
+ return 0;
1049
+}
1050
+static inline void ceph_security_invalidate_secctx(struct inode *inode)
1051
+{
1052
+}
1053
+#endif
9261054
1055
+void ceph_release_acl_sec_ctx(struct ceph_acl_sec_ctx *as_ctx);
1056
+
1057
+/* acl.c */
9271058 #ifdef CONFIG_CEPH_FS_POSIX_ACL
9281059
9291060 struct posix_acl *ceph_get_acl(struct inode *, int);
9301061 int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type);
9311062 int ceph_pre_init_acls(struct inode *dir, umode_t *mode,
932
- struct ceph_acls_info *info);
933
-void ceph_init_inode_acls(struct inode *inode, struct ceph_acls_info *info);
934
-void ceph_release_acls_info(struct ceph_acls_info *info);
1063
+ struct ceph_acl_sec_ctx *as_ctx);
1064
+void ceph_init_inode_acls(struct inode *inode,
1065
+ struct ceph_acl_sec_ctx *as_ctx);
9351066
9361067 static inline void ceph_forget_all_cached_acls(struct inode *inode)
9371068 {
....@@ -944,15 +1075,12 @@
9441075 #define ceph_set_acl NULL
9451076
9461077 static inline int ceph_pre_init_acls(struct inode *dir, umode_t *mode,
947
- struct ceph_acls_info *info)
1078
+ struct ceph_acl_sec_ctx *as_ctx)
9481079 {
9491080 return 0;
9501081 }
9511082 static inline void ceph_init_inode_acls(struct inode *inode,
952
- struct ceph_acls_info *info)
953
-{
954
-}
955
-static inline void ceph_release_acls_info(struct ceph_acls_info *info)
1083
+ struct ceph_acl_sec_ctx *as_ctx)
9561084 {
9571085 }
9581086 static inline int ceph_acl_chmod(struct dentry *dentry, struct inode *inode)
....@@ -974,15 +1102,15 @@
9741102 struct ceph_cap_reservation *ctx);
9751103 extern void ceph_add_cap(struct inode *inode,
9761104 struct ceph_mds_session *session, u64 cap_id,
977
- int fmode, unsigned issued, unsigned wanted,
1105
+ unsigned issued, unsigned wanted,
9781106 unsigned cap, unsigned seq, u64 realmino, int flags,
9791107 struct ceph_cap **new_cap);
9801108 extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
1109
+extern void __ceph_remove_caps(struct ceph_inode_info *ci);
9811110 extern void ceph_put_cap(struct ceph_mds_client *mdsc,
9821111 struct ceph_cap *cap);
9831112 extern int ceph_is_any_caps(struct inode *inode);
9841113
985
-extern void ceph_queue_caps_release(struct inode *inode);
9861114 extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc);
9871115 extern int ceph_fsync(struct file *file, loff_t start, loff_t end,
9881116 int datasync);
....@@ -990,19 +1118,30 @@
9901118 struct ceph_mds_session *session);
9911119 extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
9921120 struct ceph_mds_session *session);
1121
+void ceph_kick_flushing_inode_caps(struct ceph_mds_session *session,
1122
+ struct ceph_inode_info *ci);
9931123 extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci,
9941124 int mds);
995
-extern int ceph_get_cap_mds(struct inode *inode);
1125
+extern void ceph_take_cap_refs(struct ceph_inode_info *ci, int caps,
1126
+ bool snap_rwsem_locked);
9961127 extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps);
9971128 extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
1129
+extern void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci,
1130
+ int had);
9981131 extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
9991132 struct ceph_snap_context *snapc);
1133
+extern void __ceph_remove_capsnap(struct inode *inode,
1134
+ struct ceph_cap_snap *capsnap,
1135
+ bool *wake_ci, bool *wake_mdsc);
1136
+extern void ceph_remove_capsnap(struct inode *inode,
1137
+ struct ceph_cap_snap *capsnap,
1138
+ bool *wake_ci, bool *wake_mdsc);
10001139 extern void ceph_flush_snaps(struct ceph_inode_info *ci,
10011140 struct ceph_mds_session **psession);
10021141 extern bool __ceph_should_report_size(struct ceph_inode_info *ci);
10031142 extern void ceph_check_caps(struct ceph_inode_info *ci, int flags,
10041143 struct ceph_mds_session *session);
1005
-extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc);
1144
+extern unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc);
10061145 extern void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc);
10071146 extern int ceph_drop_caps_for_unlink(struct inode *inode);
10081147 extern int ceph_encode_inode_release(void **p, struct inode *inode,
....@@ -1011,26 +1150,28 @@
10111150 struct inode *dir,
10121151 int mds, int drop, int unless);
10131152
1014
-extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
1153
+extern int ceph_get_caps(struct file *filp, int need, int want,
10151154 loff_t endoff, int *got, struct page **pinned_page);
1016
-extern int ceph_try_get_caps(struct ceph_inode_info *ci,
1017
- int need, int want, int *got);
1155
+extern int ceph_try_get_caps(struct inode *inode,
1156
+ int need, int want, bool nonblock, int *got);
10181157
10191158 /* for counting open files by mode */
1020
-extern void __ceph_get_fmode(struct ceph_inode_info *ci, int mode);
1021
-extern void ceph_put_fmode(struct ceph_inode_info *ci, int mode);
1159
+extern void ceph_get_fmode(struct ceph_inode_info *ci, int mode, int count);
1160
+extern void ceph_put_fmode(struct ceph_inode_info *ci, int mode, int count);
1161
+extern void __ceph_touch_fmode(struct ceph_inode_info *ci,
1162
+ struct ceph_mds_client *mdsc, int fmode);
10221163
10231164 /* addr.c */
10241165 extern const struct address_space_operations ceph_aops;
10251166 extern int ceph_mmap(struct file *file, struct vm_area_struct *vma);
10261167 extern int ceph_uninline_data(struct file *filp, struct page *locked_page);
1027
-extern int ceph_pool_perm_check(struct ceph_inode_info *ci, int need);
1168
+extern int ceph_pool_perm_check(struct inode *inode, int need);
10281169 extern void ceph_pool_perm_destroy(struct ceph_mds_client* mdsc);
10291170
10301171 /* file.c */
10311172 extern const struct file_operations ceph_file_fops;
10321173
1033
-extern int ceph_renew_caps(struct inode *inode);
1174
+extern int ceph_renew_caps(struct inode *inode, int fmode);
10341175 extern int ceph_open(struct inode *inode, struct file *file);
10351176 extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
10361177 struct file *file, unsigned flags, umode_t mode);
....@@ -1052,10 +1193,10 @@
10521193 extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
10531194 struct dentry *dentry, int err);
10541195
1055
-extern void ceph_dentry_lru_add(struct dentry *dn);
1056
-extern void ceph_dentry_lru_touch(struct dentry *dn);
1057
-extern void ceph_dentry_lru_del(struct dentry *dn);
1196
+extern void __ceph_dentry_lease_touch(struct ceph_dentry_info *di);
1197
+extern void __ceph_dentry_dir_lease_touch(struct ceph_dentry_info *di);
10581198 extern void ceph_invalidate_dentry_lease(struct dentry *dentry);
1199
+extern int ceph_trim_dentries(struct ceph_mds_client *mdsc);
10591200 extern unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn);
10601201 extern void ceph_readdir_cache_release(struct ceph_readdir_cache_control *ctl);
10611202
....@@ -1064,6 +1205,7 @@
10641205
10651206 /* export.c */
10661207 extern const struct export_operations ceph_export_ops;
1208
+struct inode *ceph_lookup_inode(struct super_block *sb, u64 ino);
10671209
10681210 /* locks.c */
10691211 extern __init void ceph_flock_init(void);
....@@ -1079,7 +1221,7 @@
10791221 int num_fcntl_locks, int num_flock_locks);
10801222
10811223 /* debugfs.c */
1082
-extern int ceph_fs_debugfs_init(struct ceph_fs_client *client);
1224
+extern void ceph_fs_debugfs_init(struct ceph_fs_client *client);
10831225 extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client);
10841226
10851227 /* quota.c */
....@@ -1107,12 +1249,14 @@
11071249 struct ceph_mds_session *session,
11081250 struct ceph_msg *msg);
11091251 extern bool ceph_quota_is_max_files_exceeded(struct inode *inode);
1110
-extern bool ceph_quota_is_same_realm(struct inode *old, struct inode *new);
11111252 extern bool ceph_quota_is_max_bytes_exceeded(struct inode *inode,
11121253 loff_t newlen);
11131254 extern bool ceph_quota_is_max_bytes_approaching(struct inode *inode,
11141255 loff_t newlen);
11151256 extern bool ceph_quota_update_statfs(struct ceph_fs_client *fsc,
11161257 struct kstatfs *buf);
1258
+extern int ceph_quota_check_rename(struct ceph_mds_client *mdsc,
1259
+ struct inode *old, struct inode *new);
1260
+extern void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc);
11171261
11181262 #endif /* _FS_CEPH_SUPER_H */