| .. | .. |
|---|
| 10 | 10 | #include <linux/spinlock.h> |
|---|
| 11 | 11 | #include <linux/refcount.h> |
|---|
| 12 | 12 | #include <linux/utsname.h> |
|---|
| 13 | +#include <linux/ktime.h> |
|---|
| 13 | 14 | |
|---|
| 14 | 15 | #include <linux/ceph/types.h> |
|---|
| 15 | 16 | #include <linux/ceph/messenger.h> |
|---|
| 16 | 17 | #include <linux/ceph/mdsmap.h> |
|---|
| 17 | 18 | #include <linux/ceph/auth.h> |
|---|
| 18 | 19 | |
|---|
| 20 | +#include "metric.h" |
|---|
| 21 | +#include "super.h" |
|---|
| 22 | + |
|---|
| 19 | 23 | /* The first 8 bits are reserved for old ceph releases */ |
|---|
| 20 | | -#define CEPHFS_FEATURE_MIMIC 8 |
|---|
| 24 | +enum ceph_feature_type { |
|---|
| 25 | + CEPHFS_FEATURE_MIMIC = 8, |
|---|
| 26 | + CEPHFS_FEATURE_REPLY_ENCODING, |
|---|
| 27 | + CEPHFS_FEATURE_RECLAIM_CLIENT, |
|---|
| 28 | + CEPHFS_FEATURE_LAZY_CAP_WANTED, |
|---|
| 29 | + CEPHFS_FEATURE_MULTI_RECONNECT, |
|---|
| 30 | + CEPHFS_FEATURE_DELEG_INO, |
|---|
| 31 | + CEPHFS_FEATURE_METRIC_COLLECT, |
|---|
| 21 | 32 | |
|---|
| 22 | | -#define CEPHFS_FEATURES_ALL { \ |
|---|
| 23 | | - 0, 1, 2, 3, 4, 5, 6, 7, \ |
|---|
| 24 | | - CEPHFS_FEATURE_MIMIC, \ |
|---|
| 33 | + CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_METRIC_COLLECT, |
|---|
| 34 | +}; |
|---|
| 35 | + |
|---|
| 36 | +#define CEPHFS_FEATURES_CLIENT_SUPPORTED { \ |
|---|
| 37 | + 0, 1, 2, 3, 4, 5, 6, 7, \ |
|---|
| 38 | + CEPHFS_FEATURE_MIMIC, \ |
|---|
| 39 | + CEPHFS_FEATURE_REPLY_ENCODING, \ |
|---|
| 40 | + CEPHFS_FEATURE_LAZY_CAP_WANTED, \ |
|---|
| 41 | + CEPHFS_FEATURE_MULTI_RECONNECT, \ |
|---|
| 42 | + CEPHFS_FEATURE_DELEG_INO, \ |
|---|
| 43 | + CEPHFS_FEATURE_METRIC_COLLECT, \ |
|---|
| 25 | 44 | } |
|---|
| 26 | | - |
|---|
| 27 | | -#define CEPHFS_FEATURES_CLIENT_SUPPORTED CEPHFS_FEATURES_ALL |
|---|
| 28 | 45 | #define CEPHFS_FEATURES_CLIENT_REQUIRED {} |
|---|
| 29 | | - |
|---|
| 30 | 46 | |
|---|
| 31 | 47 | /* |
|---|
| 32 | 48 | * Some lock dependencies: |
|---|
| .. | .. |
|---|
| 63 | 79 | char *pool_ns_data; |
|---|
| 64 | 80 | u64 max_bytes; |
|---|
| 65 | 81 | u64 max_files; |
|---|
| 82 | + s32 dir_pin; |
|---|
| 83 | + struct ceph_timespec btime; |
|---|
| 84 | + struct ceph_timespec snap_btime; |
|---|
| 85 | + u64 change_attr; |
|---|
| 66 | 86 | }; |
|---|
| 67 | 87 | |
|---|
| 68 | 88 | struct ceph_mds_reply_dir_entry { |
|---|
| .. | .. |
|---|
| 139 | 159 | CEPH_MDS_SESSION_OPENING = 2, |
|---|
| 140 | 160 | CEPH_MDS_SESSION_OPEN = 3, |
|---|
| 141 | 161 | CEPH_MDS_SESSION_HUNG = 4, |
|---|
| 142 | | - CEPH_MDS_SESSION_CLOSING = 5, |
|---|
| 143 | | - CEPH_MDS_SESSION_RESTARTING = 6, |
|---|
| 144 | | - CEPH_MDS_SESSION_RECONNECTING = 7, |
|---|
| 145 | | - CEPH_MDS_SESSION_REJECTED = 8, |
|---|
| 162 | + CEPH_MDS_SESSION_RESTARTING = 5, |
|---|
| 163 | + CEPH_MDS_SESSION_RECONNECTING = 6, |
|---|
| 164 | + CEPH_MDS_SESSION_CLOSING = 7, |
|---|
| 165 | + CEPH_MDS_SESSION_CLOSED = 8, |
|---|
| 166 | + CEPH_MDS_SESSION_REJECTED = 9, |
|---|
| 146 | 167 | }; |
|---|
| 147 | 168 | |
|---|
| 148 | 169 | struct ceph_mds_session { |
|---|
| .. | .. |
|---|
| 150 | 171 | int s_mds; |
|---|
| 151 | 172 | int s_state; |
|---|
| 152 | 173 | unsigned long s_ttl; /* time until mds kills us */ |
|---|
| 174 | + unsigned long s_features; |
|---|
| 153 | 175 | u64 s_seq; /* incoming msg seq # */ |
|---|
| 154 | 176 | struct mutex s_mutex; /* serialize session messages */ |
|---|
| 155 | 177 | |
|---|
| .. | .. |
|---|
| 164 | 186 | |
|---|
| 165 | 187 | /* protected by s_cap_lock */ |
|---|
| 166 | 188 | spinlock_t s_cap_lock; |
|---|
| 189 | + refcount_t s_ref; |
|---|
| 167 | 190 | struct list_head s_caps; /* all caps issued by this session */ |
|---|
| 168 | | - int s_nr_caps, s_trim_caps; |
|---|
| 191 | + struct ceph_cap *s_cap_iterator; |
|---|
| 192 | + int s_nr_caps; |
|---|
| 169 | 193 | int s_num_cap_releases; |
|---|
| 170 | 194 | int s_cap_reconnect; |
|---|
| 171 | 195 | int s_readonly; |
|---|
| 172 | 196 | struct list_head s_cap_releases; /* waiting cap_release messages */ |
|---|
| 173 | | - struct ceph_cap *s_cap_iterator; |
|---|
| 197 | + struct work_struct s_cap_release_work; |
|---|
| 174 | 198 | |
|---|
| 175 | | - /* protected by mutex */ |
|---|
| 199 | + /* See ceph_inode_info->i_dirty_item. */ |
|---|
| 200 | + struct list_head s_cap_dirty; /* inodes w/ dirty caps */ |
|---|
| 201 | + |
|---|
| 202 | + /* See ceph_inode_info->i_flushing_item. */ |
|---|
| 176 | 203 | struct list_head s_cap_flushing; /* inodes w/ flushing caps */ |
|---|
| 204 | + |
|---|
| 177 | 205 | unsigned long s_renew_requested; /* last time we sent a renew req */ |
|---|
| 178 | 206 | u64 s_renew_seq; |
|---|
| 179 | 207 | |
|---|
| 180 | | - refcount_t s_ref; |
|---|
| 181 | 208 | struct list_head s_waiting; /* waiting requests */ |
|---|
| 182 | 209 | struct list_head s_unsafe; /* unsafe requests */ |
|---|
| 210 | + struct xarray s_delegated_inos; |
|---|
| 183 | 211 | }; |
|---|
| 184 | 212 | |
|---|
| 185 | 213 | /* |
|---|
| .. | .. |
|---|
| 213 | 241 | struct rb_node r_node; |
|---|
| 214 | 242 | struct ceph_mds_client *r_mdsc; |
|---|
| 215 | 243 | |
|---|
| 244 | + struct kref r_kref; |
|---|
| 216 | 245 | int r_op; /* mds op code */ |
|---|
| 217 | 246 | |
|---|
| 218 | 247 | /* operation on what? */ |
|---|
| .. | .. |
|---|
| 233 | 262 | #define CEPH_MDS_R_GOT_RESULT (5) /* got a result */ |
|---|
| 234 | 263 | #define CEPH_MDS_R_DID_PREPOPULATE (6) /* prepopulated readdir */ |
|---|
| 235 | 264 | #define CEPH_MDS_R_PARENT_LOCKED (7) /* is r_parent->i_rwsem wlocked? */ |
|---|
| 265 | +#define CEPH_MDS_R_ASYNC (8) /* async request */ |
|---|
| 236 | 266 | unsigned long r_req_flags; |
|---|
| 237 | 267 | |
|---|
| 238 | 268 | struct mutex r_fill_mutex; |
|---|
| .. | .. |
|---|
| 241 | 271 | int r_fmode; /* file mode, if expecting cap */ |
|---|
| 242 | 272 | kuid_t r_uid; |
|---|
| 243 | 273 | kgid_t r_gid; |
|---|
| 274 | + int r_request_release_offset; |
|---|
| 244 | 275 | struct timespec64 r_stamp; |
|---|
| 245 | 276 | |
|---|
| 246 | 277 | /* for choosing which mds to send this request to */ |
|---|
| .. | .. |
|---|
| 258 | 289 | int r_old_inode_drop, r_old_inode_unless; |
|---|
| 259 | 290 | |
|---|
| 260 | 291 | struct ceph_msg *r_request; /* original request */ |
|---|
| 261 | | - int r_request_release_offset; |
|---|
| 262 | 292 | struct ceph_msg *r_reply; |
|---|
| 263 | 293 | struct ceph_mds_reply_info_parsed r_reply_info; |
|---|
| 264 | | - struct page *r_locked_page; |
|---|
| 265 | 294 | int r_err; |
|---|
| 295 | + |
|---|
| 296 | + |
|---|
| 297 | + struct page *r_locked_page; |
|---|
| 298 | + int r_dir_caps; |
|---|
| 299 | + int r_num_caps; |
|---|
| 300 | + u32 r_readdir_offset; |
|---|
| 266 | 301 | |
|---|
| 267 | 302 | unsigned long r_timeout; /* optional. jiffies, 0 is "wait forever" */ |
|---|
| 268 | 303 | unsigned long r_started; /* start time to measure timeout against */ |
|---|
| 304 | + unsigned long r_start_latency; /* start time to measure latency */ |
|---|
| 305 | + unsigned long r_end_latency; /* finish time to measure latency */ |
|---|
| 269 | 306 | unsigned long r_request_started; /* start time for mds request only, |
|---|
| 270 | 307 | used to measure lease durations */ |
|---|
| 271 | 308 | |
|---|
| .. | .. |
|---|
| 282 | 319 | int r_num_fwd; /* number of forward attempts */ |
|---|
| 283 | 320 | int r_resend_mds; /* mds to resend to next, if any*/ |
|---|
| 284 | 321 | u32 r_sent_on_mseq; /* cap mseq request was sent at*/ |
|---|
| 322 | + u64 r_deleg_ino; |
|---|
| 285 | 323 | |
|---|
| 286 | | - struct kref r_kref; |
|---|
| 287 | 324 | struct list_head r_wait; |
|---|
| 288 | 325 | struct completion r_completion; |
|---|
| 289 | 326 | struct completion r_safe_completion; |
|---|
| .. | .. |
|---|
| 294 | 331 | long long r_dir_release_cnt; |
|---|
| 295 | 332 | long long r_dir_ordered_cnt; |
|---|
| 296 | 333 | int r_readdir_cache_idx; |
|---|
| 297 | | - u32 r_readdir_offset; |
|---|
| 298 | 334 | |
|---|
| 299 | 335 | struct ceph_cap_reservation r_caps_reservation; |
|---|
| 300 | | - int r_num_caps; |
|---|
| 301 | 336 | }; |
|---|
| 302 | 337 | |
|---|
| 303 | 338 | struct ceph_pool_perm { |
|---|
| .. | .. |
|---|
| 306 | 341 | s64 pool; |
|---|
| 307 | 342 | size_t pool_ns_len; |
|---|
| 308 | 343 | char pool_ns[]; |
|---|
| 344 | +}; |
|---|
| 345 | + |
|---|
| 346 | +struct ceph_snapid_map { |
|---|
| 347 | + struct rb_node node; |
|---|
| 348 | + struct list_head lru; |
|---|
| 349 | + atomic_t ref; |
|---|
| 350 | + u64 snap; |
|---|
| 351 | + dev_t dev; |
|---|
| 352 | + unsigned long last_used; |
|---|
| 353 | +}; |
|---|
| 354 | + |
|---|
| 355 | +/* |
|---|
| 356 | + * node for list of quotarealm inodes that are not visible from the filesystem |
|---|
| 357 | + * mountpoint, but required to handle, e.g. quotas. |
|---|
| 358 | + */ |
|---|
| 359 | +struct ceph_quotarealm_inode { |
|---|
| 360 | + struct rb_node node; |
|---|
| 361 | + u64 ino; |
|---|
| 362 | + unsigned long timeout; /* last time a lookup failed for this inode */ |
|---|
| 363 | + struct mutex mutex; |
|---|
| 364 | + struct inode *inode; |
|---|
| 365 | +}; |
|---|
| 366 | + |
|---|
| 367 | +struct cap_wait { |
|---|
| 368 | + struct list_head list; |
|---|
| 369 | + u64 ino; |
|---|
| 370 | + pid_t tgid; |
|---|
| 371 | + int need; |
|---|
| 372 | + int want; |
|---|
| 309 | 373 | }; |
|---|
| 310 | 374 | |
|---|
| 311 | 375 | /* |
|---|
| .. | .. |
|---|
| 323 | 387 | |
|---|
| 324 | 388 | struct ceph_mds_session **sessions; /* NULL for mds if no session */ |
|---|
| 325 | 389 | atomic_t num_sessions; |
|---|
| 326 | | - int max_sessions; /* len of s_mds_sessions */ |
|---|
| 390 | + int max_sessions; /* len of sessions array */ |
|---|
| 327 | 391 | int stopping; /* true if shutting down */ |
|---|
| 328 | 392 | |
|---|
| 329 | 393 | atomic64_t quotarealms_count; /* # realms with quota */ |
|---|
| 394 | + /* |
|---|
| 395 | + * We keep a list of inodes we don't see in the mountpoint but that we |
|---|
| 396 | + * need to track quota realms. |
|---|
| 397 | + */ |
|---|
| 398 | + struct rb_root quotarealms_inodes; |
|---|
| 399 | + struct mutex quotarealms_inodes_mutex; |
|---|
| 330 | 400 | |
|---|
| 331 | 401 | /* |
|---|
| 332 | 402 | * snap_rwsem will cover cap linkage into snaprealms, and |
|---|
| .. | .. |
|---|
| 339 | 409 | struct rw_semaphore snap_rwsem; |
|---|
| 340 | 410 | struct rb_root snap_realms; |
|---|
| 341 | 411 | struct list_head snap_empty; |
|---|
| 412 | + int num_snap_realms; |
|---|
| 342 | 413 | spinlock_t snap_empty_lock; /* protect snap_empty */ |
|---|
| 343 | 414 | |
|---|
| 344 | 415 | u64 last_tid; /* most recent mds request */ |
|---|
| .. | .. |
|---|
| 354 | 425 | |
|---|
| 355 | 426 | u64 last_cap_flush_tid; |
|---|
| 356 | 427 | struct list_head cap_flush_list; |
|---|
| 357 | | - struct list_head cap_dirty; /* inodes with dirty caps */ |
|---|
| 358 | 428 | struct list_head cap_dirty_migrating; /* ...that are migration... */ |
|---|
| 359 | 429 | int num_cap_flushing; /* # caps we are flushing */ |
|---|
| 360 | 430 | spinlock_t cap_dirty_lock; /* protects above items */ |
|---|
| 361 | 431 | wait_queue_head_t cap_flushing_wq; |
|---|
| 432 | + |
|---|
| 433 | + struct work_struct cap_reclaim_work; |
|---|
| 434 | + atomic_t cap_reclaim_pending; |
|---|
| 362 | 435 | |
|---|
| 363 | 436 | /* |
|---|
| 364 | 437 | * Cap reservations |
|---|
| .. | .. |
|---|
| 374 | 447 | spinlock_t caps_list_lock; |
|---|
| 375 | 448 | struct list_head caps_list; /* unused (reserved or |
|---|
| 376 | 449 | unreserved) */ |
|---|
| 450 | + struct list_head cap_wait_list; |
|---|
| 377 | 451 | int caps_total_count; /* total caps allocated */ |
|---|
| 378 | 452 | int caps_use_count; /* in use */ |
|---|
| 453 | + int caps_use_max; /* max used caps */ |
|---|
| 379 | 454 | int caps_reserve_count; /* unused, reserved */ |
|---|
| 380 | 455 | int caps_avail_count; /* unused, unreserved */ |
|---|
| 381 | 456 | int caps_min_count; /* keep at least this many |
|---|
| 382 | 457 | (unreserved) */ |
|---|
| 383 | | - spinlock_t dentry_lru_lock; |
|---|
| 384 | | - struct list_head dentry_lru; |
|---|
| 385 | | - int num_dentry; |
|---|
| 458 | + spinlock_t dentry_list_lock; |
|---|
| 459 | + struct list_head dentry_leases; /* fifo list */ |
|---|
| 460 | + struct list_head dentry_dir_leases; /* lru list */ |
|---|
| 461 | + |
|---|
| 462 | + struct ceph_client_metric metric; |
|---|
| 463 | + |
|---|
| 464 | + spinlock_t snapid_map_lock; |
|---|
| 465 | + struct rb_root snapid_map_tree; |
|---|
| 466 | + struct list_head snapid_map_lru; |
|---|
| 386 | 467 | |
|---|
| 387 | 468 | struct rw_semaphore pool_perm_rwsem; |
|---|
| 388 | 469 | struct rb_root pool_perm_tree; |
|---|
| .. | .. |
|---|
| 392 | 473 | |
|---|
| 393 | 474 | extern const char *ceph_mds_op_name(int op); |
|---|
| 394 | 475 | |
|---|
| 476 | +extern bool check_session_state(struct ceph_mds_session *s); |
|---|
| 477 | +void inc_session_sequence(struct ceph_mds_session *s); |
|---|
| 478 | + |
|---|
| 395 | 479 | extern struct ceph_mds_session * |
|---|
| 396 | 480 | __ceph_lookup_mds_session(struct ceph_mds_client *, int mds); |
|---|
| 397 | 481 | |
|---|
| 398 | | -static inline struct ceph_mds_session * |
|---|
| 399 | | -ceph_get_mds_session(struct ceph_mds_session *s) |
|---|
| 400 | | -{ |
|---|
| 401 | | - refcount_inc(&s->s_ref); |
|---|
| 402 | | - return s; |
|---|
| 403 | | -} |
|---|
| 404 | | - |
|---|
| 405 | 482 | extern const char *ceph_session_state_name(int s); |
|---|
| 406 | 483 | |
|---|
| 484 | +extern struct ceph_mds_session * |
|---|
| 485 | +ceph_get_mds_session(struct ceph_mds_session *s); |
|---|
| 407 | 486 | extern void ceph_put_mds_session(struct ceph_mds_session *s); |
|---|
| 408 | 487 | |
|---|
| 409 | 488 | extern int ceph_send_msg_mds(struct ceph_mds_client *mdsc, |
|---|
| .. | .. |
|---|
| 421 | 500 | struct inode *dir); |
|---|
| 422 | 501 | extern struct ceph_mds_request * |
|---|
| 423 | 502 | ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode); |
|---|
| 424 | | -extern void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, |
|---|
| 425 | | - struct ceph_mds_request *req); |
|---|
| 503 | +extern int ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, |
|---|
| 504 | + struct inode *dir, |
|---|
| 505 | + struct ceph_mds_request *req); |
|---|
| 426 | 506 | extern int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, |
|---|
| 427 | 507 | struct inode *dir, |
|---|
| 428 | 508 | struct ceph_mds_request *req); |
|---|
| 509 | +extern void ceph_mdsc_release_dir_caps(struct ceph_mds_request *req); |
|---|
| 510 | +extern void ceph_mdsc_release_dir_caps_no_check(struct ceph_mds_request *req); |
|---|
| 429 | 511 | static inline void ceph_mdsc_get_request(struct ceph_mds_request *req) |
|---|
| 430 | 512 | { |
|---|
| 431 | 513 | kref_get(&req->r_kref); |
|---|
| .. | .. |
|---|
| 436 | 518 | kref_put(&req->r_kref, ceph_mdsc_release_request); |
|---|
| 437 | 519 | } |
|---|
| 438 | 520 | |
|---|
| 439 | | -extern void ceph_send_cap_releases(struct ceph_mds_client *mdsc, |
|---|
| 440 | | - struct ceph_mds_session *session); |
|---|
| 441 | | - |
|---|
| 521 | +extern void send_flush_mdlog(struct ceph_mds_session *s); |
|---|
| 522 | +extern void ceph_mdsc_iterate_sessions(struct ceph_mds_client *mdsc, |
|---|
| 523 | + void (*cb)(struct ceph_mds_session *), |
|---|
| 524 | + bool check_state); |
|---|
| 525 | +extern struct ceph_msg *ceph_create_session_msg(u32 op, u64 seq); |
|---|
| 526 | +extern void __ceph_queue_cap_release(struct ceph_mds_session *session, |
|---|
| 527 | + struct ceph_cap *cap); |
|---|
| 528 | +extern void ceph_flush_cap_releases(struct ceph_mds_client *mdsc, |
|---|
| 529 | + struct ceph_mds_session *session); |
|---|
| 530 | +extern void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc); |
|---|
| 531 | +extern void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr); |
|---|
| 532 | +extern int ceph_iterate_session_caps(struct ceph_mds_session *session, |
|---|
| 533 | + int (*cb)(struct inode *, |
|---|
| 534 | + struct ceph_cap *, void *), |
|---|
| 535 | + void *arg); |
|---|
| 442 | 536 | extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc); |
|---|
| 537 | + |
|---|
| 538 | +static inline void ceph_mdsc_free_path(char *path, int len) |
|---|
| 539 | +{ |
|---|
| 540 | + if (!IS_ERR_OR_NULL(path)) |
|---|
| 541 | + __putname(path - (PATH_MAX - 1 - len)); |
|---|
| 542 | +} |
|---|
| 443 | 543 | |
|---|
| 444 | 544 | extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base, |
|---|
| 445 | 545 | int stop_on_nosnap); |
|---|
| 446 | 546 | |
|---|
| 447 | 547 | extern void __ceph_mdsc_drop_dentry_lease(struct dentry *dentry); |
|---|
| 448 | 548 | extern void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session, |
|---|
| 449 | | - struct inode *inode, |
|---|
| 450 | 549 | struct dentry *dentry, char action, |
|---|
| 451 | 550 | u32 seq); |
|---|
| 452 | 551 | |
|---|
| .. | .. |
|---|
| 463 | 562 | extern int ceph_trim_caps(struct ceph_mds_client *mdsc, |
|---|
| 464 | 563 | struct ceph_mds_session *session, |
|---|
| 465 | 564 | int max_caps); |
|---|
| 565 | + |
|---|
| 566 | +static inline int ceph_wait_on_async_create(struct inode *inode) |
|---|
| 567 | +{ |
|---|
| 568 | + struct ceph_inode_info *ci = ceph_inode(inode); |
|---|
| 569 | + |
|---|
| 570 | + return wait_on_bit(&ci->i_ceph_flags, CEPH_ASYNC_CREATE_BIT, |
|---|
| 571 | + TASK_INTERRUPTIBLE); |
|---|
| 572 | +} |
|---|
| 573 | + |
|---|
| 574 | +extern u64 ceph_get_deleg_ino(struct ceph_mds_session *session); |
|---|
| 575 | +extern int ceph_restore_deleg_ino(struct ceph_mds_session *session, u64 ino); |
|---|
| 466 | 576 | #endif |
|---|