.. | .. |
---|
10 | 10 | #include <linux/spinlock.h> |
---|
11 | 11 | #include <linux/refcount.h> |
---|
12 | 12 | #include <linux/utsname.h> |
---|
| 13 | +#include <linux/ktime.h> |
---|
13 | 14 | |
---|
14 | 15 | #include <linux/ceph/types.h> |
---|
15 | 16 | #include <linux/ceph/messenger.h> |
---|
16 | 17 | #include <linux/ceph/mdsmap.h> |
---|
17 | 18 | #include <linux/ceph/auth.h> |
---|
18 | 19 | |
---|
| 20 | +#include "metric.h" |
---|
| 21 | +#include "super.h" |
---|
| 22 | + |
---|
19 | 23 | /* The first 8 bits are reserved for old ceph releases */ |
---|
20 | | -#define CEPHFS_FEATURE_MIMIC 8 |
---|
| 24 | +enum ceph_feature_type { |
---|
| 25 | + CEPHFS_FEATURE_MIMIC = 8, |
---|
| 26 | + CEPHFS_FEATURE_REPLY_ENCODING, |
---|
| 27 | + CEPHFS_FEATURE_RECLAIM_CLIENT, |
---|
| 28 | + CEPHFS_FEATURE_LAZY_CAP_WANTED, |
---|
| 29 | + CEPHFS_FEATURE_MULTI_RECONNECT, |
---|
| 30 | + CEPHFS_FEATURE_DELEG_INO, |
---|
| 31 | + CEPHFS_FEATURE_METRIC_COLLECT, |
---|
21 | 32 | |
---|
22 | | -#define CEPHFS_FEATURES_ALL { \ |
---|
23 | | - 0, 1, 2, 3, 4, 5, 6, 7, \ |
---|
24 | | - CEPHFS_FEATURE_MIMIC, \ |
---|
| 33 | + CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_METRIC_COLLECT, |
---|
| 34 | +}; |
---|
| 35 | + |
---|
| 36 | +#define CEPHFS_FEATURES_CLIENT_SUPPORTED { \ |
---|
| 37 | + 0, 1, 2, 3, 4, 5, 6, 7, \ |
---|
| 38 | + CEPHFS_FEATURE_MIMIC, \ |
---|
| 39 | + CEPHFS_FEATURE_REPLY_ENCODING, \ |
---|
| 40 | + CEPHFS_FEATURE_LAZY_CAP_WANTED, \ |
---|
| 41 | + CEPHFS_FEATURE_MULTI_RECONNECT, \ |
---|
| 42 | + CEPHFS_FEATURE_DELEG_INO, \ |
---|
| 43 | + CEPHFS_FEATURE_METRIC_COLLECT, \ |
---|
25 | 44 | } |
---|
26 | | - |
---|
27 | | -#define CEPHFS_FEATURES_CLIENT_SUPPORTED CEPHFS_FEATURES_ALL |
---|
28 | 45 | #define CEPHFS_FEATURES_CLIENT_REQUIRED {} |
---|
29 | | - |
---|
30 | 46 | |
---|
31 | 47 | /* |
---|
32 | 48 | * Some lock dependencies: |
---|
.. | .. |
---|
63 | 79 | char *pool_ns_data; |
---|
64 | 80 | u64 max_bytes; |
---|
65 | 81 | u64 max_files; |
---|
| 82 | + s32 dir_pin; |
---|
| 83 | + struct ceph_timespec btime; |
---|
| 84 | + struct ceph_timespec snap_btime; |
---|
| 85 | + u64 change_attr; |
---|
66 | 86 | }; |
---|
67 | 87 | |
---|
68 | 88 | struct ceph_mds_reply_dir_entry { |
---|
.. | .. |
---|
139 | 159 | CEPH_MDS_SESSION_OPENING = 2, |
---|
140 | 160 | CEPH_MDS_SESSION_OPEN = 3, |
---|
141 | 161 | CEPH_MDS_SESSION_HUNG = 4, |
---|
142 | | - CEPH_MDS_SESSION_CLOSING = 5, |
---|
143 | | - CEPH_MDS_SESSION_RESTARTING = 6, |
---|
144 | | - CEPH_MDS_SESSION_RECONNECTING = 7, |
---|
145 | | - CEPH_MDS_SESSION_REJECTED = 8, |
---|
| 162 | + CEPH_MDS_SESSION_RESTARTING = 5, |
---|
| 163 | + CEPH_MDS_SESSION_RECONNECTING = 6, |
---|
| 164 | + CEPH_MDS_SESSION_CLOSING = 7, |
---|
| 165 | + CEPH_MDS_SESSION_CLOSED = 8, |
---|
| 166 | + CEPH_MDS_SESSION_REJECTED = 9, |
---|
146 | 167 | }; |
---|
147 | 168 | |
---|
148 | 169 | struct ceph_mds_session { |
---|
.. | .. |
---|
150 | 171 | int s_mds; |
---|
151 | 172 | int s_state; |
---|
152 | 173 | unsigned long s_ttl; /* time until mds kills us */ |
---|
| 174 | + unsigned long s_features; |
---|
153 | 175 | u64 s_seq; /* incoming msg seq # */ |
---|
154 | 176 | struct mutex s_mutex; /* serialize session messages */ |
---|
155 | 177 | |
---|
.. | .. |
---|
164 | 186 | |
---|
165 | 187 | /* protected by s_cap_lock */ |
---|
166 | 188 | spinlock_t s_cap_lock; |
---|
| 189 | + refcount_t s_ref; |
---|
167 | 190 | struct list_head s_caps; /* all caps issued by this session */ |
---|
168 | | - int s_nr_caps, s_trim_caps; |
---|
| 191 | + struct ceph_cap *s_cap_iterator; |
---|
| 192 | + int s_nr_caps; |
---|
169 | 193 | int s_num_cap_releases; |
---|
170 | 194 | int s_cap_reconnect; |
---|
171 | 195 | int s_readonly; |
---|
172 | 196 | struct list_head s_cap_releases; /* waiting cap_release messages */ |
---|
173 | | - struct ceph_cap *s_cap_iterator; |
---|
| 197 | + struct work_struct s_cap_release_work; |
---|
174 | 198 | |
---|
175 | | - /* protected by mutex */ |
---|
| 199 | + /* See ceph_inode_info->i_dirty_item. */ |
---|
| 200 | + struct list_head s_cap_dirty; /* inodes w/ dirty caps */ |
---|
| 201 | + |
---|
| 202 | + /* See ceph_inode_info->i_flushing_item. */ |
---|
176 | 203 | struct list_head s_cap_flushing; /* inodes w/ flushing caps */ |
---|
| 204 | + |
---|
177 | 205 | unsigned long s_renew_requested; /* last time we sent a renew req */ |
---|
178 | 206 | u64 s_renew_seq; |
---|
179 | 207 | |
---|
180 | | - refcount_t s_ref; |
---|
181 | 208 | struct list_head s_waiting; /* waiting requests */ |
---|
182 | 209 | struct list_head s_unsafe; /* unsafe requests */ |
---|
| 210 | + struct xarray s_delegated_inos; |
---|
183 | 211 | }; |
---|
184 | 212 | |
---|
185 | 213 | /* |
---|
.. | .. |
---|
213 | 241 | struct rb_node r_node; |
---|
214 | 242 | struct ceph_mds_client *r_mdsc; |
---|
215 | 243 | |
---|
| 244 | + struct kref r_kref; |
---|
216 | 245 | int r_op; /* mds op code */ |
---|
217 | 246 | |
---|
218 | 247 | /* operation on what? */ |
---|
.. | .. |
---|
233 | 262 | #define CEPH_MDS_R_GOT_RESULT (5) /* got a result */ |
---|
234 | 263 | #define CEPH_MDS_R_DID_PREPOPULATE (6) /* prepopulated readdir */ |
---|
235 | 264 | #define CEPH_MDS_R_PARENT_LOCKED (7) /* is r_parent->i_rwsem wlocked? */ |
---|
| 265 | +#define CEPH_MDS_R_ASYNC (8) /* async request */ |
---|
236 | 266 | unsigned long r_req_flags; |
---|
237 | 267 | |
---|
238 | 268 | struct mutex r_fill_mutex; |
---|
.. | .. |
---|
241 | 271 | int r_fmode; /* file mode, if expecting cap */ |
---|
242 | 272 | kuid_t r_uid; |
---|
243 | 273 | kgid_t r_gid; |
---|
| 274 | + int r_request_release_offset; |
---|
244 | 275 | struct timespec64 r_stamp; |
---|
245 | 276 | |
---|
246 | 277 | /* for choosing which mds to send this request to */ |
---|
.. | .. |
---|
258 | 289 | int r_old_inode_drop, r_old_inode_unless; |
---|
259 | 290 | |
---|
260 | 291 | struct ceph_msg *r_request; /* original request */ |
---|
261 | | - int r_request_release_offset; |
---|
262 | 292 | struct ceph_msg *r_reply; |
---|
263 | 293 | struct ceph_mds_reply_info_parsed r_reply_info; |
---|
264 | | - struct page *r_locked_page; |
---|
265 | 294 | int r_err; |
---|
| 295 | + |
---|
| 296 | + |
---|
| 297 | + struct page *r_locked_page; |
---|
| 298 | + int r_dir_caps; |
---|
| 299 | + int r_num_caps; |
---|
| 300 | + u32 r_readdir_offset; |
---|
266 | 301 | |
---|
267 | 302 | unsigned long r_timeout; /* optional. jiffies, 0 is "wait forever" */ |
---|
268 | 303 | unsigned long r_started; /* start time to measure timeout against */ |
---|
| 304 | + unsigned long r_start_latency; /* start time to measure latency */ |
---|
| 305 | + unsigned long r_end_latency; /* finish time to measure latency */ |
---|
269 | 306 | unsigned long r_request_started; /* start time for mds request only, |
---|
270 | 307 | used to measure lease durations */ |
---|
271 | 308 | |
---|
.. | .. |
---|
282 | 319 | int r_num_fwd; /* number of forward attempts */ |
---|
283 | 320 | int r_resend_mds; /* mds to resend to next, if any*/ |
---|
284 | 321 | u32 r_sent_on_mseq; /* cap mseq request was sent at*/ |
---|
| 322 | + u64 r_deleg_ino; |
---|
285 | 323 | |
---|
286 | | - struct kref r_kref; |
---|
287 | 324 | struct list_head r_wait; |
---|
288 | 325 | struct completion r_completion; |
---|
289 | 326 | struct completion r_safe_completion; |
---|
.. | .. |
---|
294 | 331 | long long r_dir_release_cnt; |
---|
295 | 332 | long long r_dir_ordered_cnt; |
---|
296 | 333 | int r_readdir_cache_idx; |
---|
297 | | - u32 r_readdir_offset; |
---|
298 | 334 | |
---|
299 | 335 | struct ceph_cap_reservation r_caps_reservation; |
---|
300 | | - int r_num_caps; |
---|
301 | 336 | }; |
---|
302 | 337 | |
---|
303 | 338 | struct ceph_pool_perm { |
---|
.. | .. |
---|
306 | 341 | s64 pool; |
---|
307 | 342 | size_t pool_ns_len; |
---|
308 | 343 | char pool_ns[]; |
---|
| 344 | +}; |
---|
| 345 | + |
---|
| 346 | +struct ceph_snapid_map { |
---|
| 347 | + struct rb_node node; |
---|
| 348 | + struct list_head lru; |
---|
| 349 | + atomic_t ref; |
---|
| 350 | + u64 snap; |
---|
| 351 | + dev_t dev; |
---|
| 352 | + unsigned long last_used; |
---|
| 353 | +}; |
---|
| 354 | + |
---|
| 355 | +/* |
---|
| 356 | + * node for list of quotarealm inodes that are not visible from the filesystem |
---|
| 357 | + * mountpoint, but required to handle, e.g. quotas. |
---|
| 358 | + */ |
---|
| 359 | +struct ceph_quotarealm_inode { |
---|
| 360 | + struct rb_node node; |
---|
| 361 | + u64 ino; |
---|
| 362 | + unsigned long timeout; /* last time a lookup failed for this inode */ |
---|
| 363 | + struct mutex mutex; |
---|
| 364 | + struct inode *inode; |
---|
| 365 | +}; |
---|
| 366 | + |
---|
| 367 | +struct cap_wait { |
---|
| 368 | + struct list_head list; |
---|
| 369 | + u64 ino; |
---|
| 370 | + pid_t tgid; |
---|
| 371 | + int need; |
---|
| 372 | + int want; |
---|
| 373 | +}; |
---|
| 374 | + |
---|
| 375 | +enum { |
---|
| 376 | + CEPH_MDSC_STOPPING_BEGIN = 1, |
---|
| 377 | + CEPH_MDSC_STOPPING_FLUSHED = 2, |
---|
309 | 378 | }; |
---|
310 | 379 | |
---|
311 | 380 | /* |
---|
.. | .. |
---|
323 | 392 | |
---|
324 | 393 | struct ceph_mds_session **sessions; /* NULL for mds if no session */ |
---|
325 | 394 | atomic_t num_sessions; |
---|
326 | | - int max_sessions; /* len of s_mds_sessions */ |
---|
| 395 | + int max_sessions; /* len of sessions array */ |
---|
327 | 396 | int stopping; /* true if shutting down */ |
---|
328 | 397 | |
---|
329 | 398 | atomic64_t quotarealms_count; /* # realms with quota */ |
---|
| 399 | + /* |
---|
| 400 | + * We keep a list of inodes we don't see in the mountpoint but that we |
---|
| 401 | + * need to track quota realms. |
---|
| 402 | + */ |
---|
| 403 | + struct rb_root quotarealms_inodes; |
---|
| 404 | + struct mutex quotarealms_inodes_mutex; |
---|
330 | 405 | |
---|
331 | 406 | /* |
---|
332 | 407 | * snap_rwsem will cover cap linkage into snaprealms, and |
---|
.. | .. |
---|
339 | 414 | struct rw_semaphore snap_rwsem; |
---|
340 | 415 | struct rb_root snap_realms; |
---|
341 | 416 | struct list_head snap_empty; |
---|
| 417 | + int num_snap_realms; |
---|
342 | 418 | spinlock_t snap_empty_lock; /* protect snap_empty */ |
---|
343 | 419 | |
---|
344 | 420 | u64 last_tid; /* most recent mds request */ |
---|
.. | .. |
---|
354 | 430 | |
---|
355 | 431 | u64 last_cap_flush_tid; |
---|
356 | 432 | struct list_head cap_flush_list; |
---|
357 | | - struct list_head cap_dirty; /* inodes with dirty caps */ |
---|
358 | 433 | struct list_head cap_dirty_migrating; /* ...that are migration... */ |
---|
359 | 434 | int num_cap_flushing; /* # caps we are flushing */ |
---|
360 | 435 | spinlock_t cap_dirty_lock; /* protects above items */ |
---|
361 | 436 | wait_queue_head_t cap_flushing_wq; |
---|
| 437 | + |
---|
| 438 | + struct work_struct cap_reclaim_work; |
---|
| 439 | + atomic_t cap_reclaim_pending; |
---|
362 | 440 | |
---|
363 | 441 | /* |
---|
364 | 442 | * Cap reservations |
---|
.. | .. |
---|
374 | 452 | spinlock_t caps_list_lock; |
---|
375 | 453 | struct list_head caps_list; /* unused (reserved or |
---|
376 | 454 | unreserved) */ |
---|
| 455 | + struct list_head cap_wait_list; |
---|
377 | 456 | int caps_total_count; /* total caps allocated */ |
---|
378 | 457 | int caps_use_count; /* in use */ |
---|
| 458 | + int caps_use_max; /* max used caps */ |
---|
379 | 459 | int caps_reserve_count; /* unused, reserved */ |
---|
380 | 460 | int caps_avail_count; /* unused, unreserved */ |
---|
381 | 461 | int caps_min_count; /* keep at least this many |
---|
382 | 462 | (unreserved) */ |
---|
383 | | - spinlock_t dentry_lru_lock; |
---|
384 | | - struct list_head dentry_lru; |
---|
385 | | - int num_dentry; |
---|
| 463 | + spinlock_t dentry_list_lock; |
---|
| 464 | + struct list_head dentry_leases; /* fifo list */ |
---|
| 465 | + struct list_head dentry_dir_leases; /* lru list */ |
---|
| 466 | + |
---|
| 467 | + struct ceph_client_metric metric; |
---|
| 468 | + |
---|
| 469 | + spinlock_t snapid_map_lock; |
---|
| 470 | + struct rb_root snapid_map_tree; |
---|
| 471 | + struct list_head snapid_map_lru; |
---|
386 | 472 | |
---|
387 | 473 | struct rw_semaphore pool_perm_rwsem; |
---|
388 | 474 | struct rb_root pool_perm_tree; |
---|
.. | .. |
---|
392 | 478 | |
---|
393 | 479 | extern const char *ceph_mds_op_name(int op); |
---|
394 | 480 | |
---|
| 481 | +extern bool check_session_state(struct ceph_mds_session *s); |
---|
| 482 | +void inc_session_sequence(struct ceph_mds_session *s); |
---|
| 483 | + |
---|
395 | 484 | extern struct ceph_mds_session * |
---|
396 | 485 | __ceph_lookup_mds_session(struct ceph_mds_client *, int mds); |
---|
397 | 486 | |
---|
398 | | -static inline struct ceph_mds_session * |
---|
399 | | -ceph_get_mds_session(struct ceph_mds_session *s) |
---|
400 | | -{ |
---|
401 | | - refcount_inc(&s->s_ref); |
---|
402 | | - return s; |
---|
403 | | -} |
---|
404 | | - |
---|
405 | 487 | extern const char *ceph_session_state_name(int s); |
---|
406 | 488 | |
---|
| 489 | +extern struct ceph_mds_session * |
---|
| 490 | +ceph_get_mds_session(struct ceph_mds_session *s); |
---|
407 | 491 | extern void ceph_put_mds_session(struct ceph_mds_session *s); |
---|
408 | 492 | |
---|
409 | 493 | extern int ceph_send_msg_mds(struct ceph_mds_client *mdsc, |
---|
.. | .. |
---|
421 | 505 | struct inode *dir); |
---|
422 | 506 | extern struct ceph_mds_request * |
---|
423 | 507 | ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode); |
---|
424 | | -extern void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, |
---|
425 | | - struct ceph_mds_request *req); |
---|
| 508 | +extern int ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, |
---|
| 509 | + struct inode *dir, |
---|
| 510 | + struct ceph_mds_request *req); |
---|
426 | 511 | extern int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, |
---|
427 | 512 | struct inode *dir, |
---|
428 | 513 | struct ceph_mds_request *req); |
---|
| 514 | +extern void ceph_mdsc_release_dir_caps(struct ceph_mds_request *req); |
---|
| 515 | +extern void ceph_mdsc_release_dir_caps_no_check(struct ceph_mds_request *req); |
---|
429 | 516 | static inline void ceph_mdsc_get_request(struct ceph_mds_request *req) |
---|
430 | 517 | { |
---|
431 | 518 | kref_get(&req->r_kref); |
---|
.. | .. |
---|
436 | 523 | kref_put(&req->r_kref, ceph_mdsc_release_request); |
---|
437 | 524 | } |
---|
438 | 525 | |
---|
439 | | -extern void ceph_send_cap_releases(struct ceph_mds_client *mdsc, |
---|
440 | | - struct ceph_mds_session *session); |
---|
441 | | - |
---|
| 526 | +extern void send_flush_mdlog(struct ceph_mds_session *s); |
---|
| 527 | +extern void ceph_mdsc_iterate_sessions(struct ceph_mds_client *mdsc, |
---|
| 528 | + void (*cb)(struct ceph_mds_session *), |
---|
| 529 | + bool check_state); |
---|
| 530 | +extern struct ceph_msg *ceph_create_session_msg(u32 op, u64 seq); |
---|
| 531 | +extern void __ceph_queue_cap_release(struct ceph_mds_session *session, |
---|
| 532 | + struct ceph_cap *cap); |
---|
| 533 | +extern void ceph_flush_cap_releases(struct ceph_mds_client *mdsc, |
---|
| 534 | + struct ceph_mds_session *session); |
---|
| 535 | +extern void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc); |
---|
| 536 | +extern void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr); |
---|
| 537 | +extern int ceph_iterate_session_caps(struct ceph_mds_session *session, |
---|
| 538 | + int (*cb)(struct inode *, |
---|
| 539 | + struct ceph_cap *, void *), |
---|
| 540 | + void *arg); |
---|
442 | 541 | extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc); |
---|
| 542 | + |
---|
| 543 | +static inline void ceph_mdsc_free_path(char *path, int len) |
---|
| 544 | +{ |
---|
| 545 | + if (!IS_ERR_OR_NULL(path)) |
---|
| 546 | + __putname(path - (PATH_MAX - 1 - len)); |
---|
| 547 | +} |
---|
443 | 548 | |
---|
444 | 549 | extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base, |
---|
445 | 550 | int stop_on_nosnap); |
---|
446 | 551 | |
---|
447 | 552 | extern void __ceph_mdsc_drop_dentry_lease(struct dentry *dentry); |
---|
448 | 553 | extern void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session, |
---|
449 | | - struct inode *inode, |
---|
450 | 554 | struct dentry *dentry, char action, |
---|
451 | 555 | u32 seq); |
---|
452 | 556 | |
---|
.. | .. |
---|
463 | 567 | extern int ceph_trim_caps(struct ceph_mds_client *mdsc, |
---|
464 | 568 | struct ceph_mds_session *session, |
---|
465 | 569 | int max_caps); |
---|
| 570 | + |
---|
| 571 | +static inline int ceph_wait_on_async_create(struct inode *inode) |
---|
| 572 | +{ |
---|
| 573 | + struct ceph_inode_info *ci = ceph_inode(inode); |
---|
| 574 | + |
---|
| 575 | + return wait_on_bit(&ci->i_ceph_flags, CEPH_ASYNC_CREATE_BIT, |
---|
| 576 | + TASK_INTERRUPTIBLE); |
---|
| 577 | +} |
---|
| 578 | + |
---|
| 579 | +extern u64 ceph_get_deleg_ino(struct ceph_mds_session *session); |
---|
| 580 | +extern int ceph_restore_deleg_ino(struct ceph_mds_session *session, u64 ino); |
---|
466 | 581 | #endif |
---|