From b22da3d8526a935aa31e086e63f60ff3246cb61c Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Sat, 09 Dec 2023 07:24:11 +0000 Subject: [PATCH] add stmac read mac form eeprom --- kernel/fs/ceph/dir.c | 763 ++++++++++++++++++++++++++++++++++++++++++++++------------ 1 files changed, 603 insertions(+), 160 deletions(-) diff --git a/kernel/fs/ceph/dir.c b/kernel/fs/ceph/dir.c index 7f3f64b..1fddb9c 100644 --- a/kernel/fs/ceph/dir.c +++ b/kernel/fs/ceph/dir.c @@ -29,12 +29,16 @@ const struct dentry_operations ceph_dentry_ops; +static bool __dentry_lease_is_valid(struct ceph_dentry_info *di); +static int __dir_lease_try_check(const struct dentry *dentry); + /* * Initialize ceph dentry state. */ static int ceph_d_init(struct dentry *dentry) { struct ceph_dentry_info *di; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dentry->d_sb); di = kmem_cache_zalloc(ceph_dentry_cachep, GFP_KERNEL); if (!di) @@ -44,7 +48,10 @@ di->lease_session = NULL; di->time = jiffies; dentry->d_fsdata = di; - ceph_dentry_lru_add(dentry); + INIT_LIST_HEAD(&di->lease_list); + + atomic64_inc(&mdsc->metric.total_dentries); + return 0; } @@ -241,6 +248,7 @@ goto out; } if (fpos_cmp(ctx->pos, di->offset) <= 0) { + __ceph_dentry_dir_lease_touch(di); emit_dentry = true; } spin_unlock(&dentry->d_lock); @@ -250,9 +258,7 @@ dentry, dentry, d_inode(dentry)); ctx->pos = di->offset; if (!dir_emit(ctx, dentry->d_name.name, - dentry->d_name.len, - ceph_translate_ino(dentry->d_sb, - d_inode(dentry)->i_ino), + dentry->d_name.len, ceph_present_inode(d_inode(dentry)), d_inode(dentry)->i_mode >> 12)) { dput(dentry); err = 0; @@ -315,30 +321,37 @@ /* always start with . and .. */ if (ctx->pos == 0) { dout("readdir off 0 -> '.'\n"); - if (!dir_emit(ctx, ".", 1, - ceph_translate_ino(inode->i_sb, inode->i_ino), + if (!dir_emit(ctx, ".", 1, ceph_present_inode(inode), inode->i_mode >> 12)) return 0; ctx->pos = 1; } if (ctx->pos == 1) { - ino_t ino = parent_ino(file->f_path.dentry); + u64 ino; + struct dentry *dentry = file->f_path.dentry; + + spin_lock(&dentry->d_lock); + ino = ceph_present_inode(dentry->d_parent->d_inode); + spin_unlock(&dentry->d_lock); + dout("readdir off 1 -> '..'\n"); - if (!dir_emit(ctx, "..", 2, - ceph_translate_ino(inode->i_sb, ino), - inode->i_mode >> 12)) + if (!dir_emit(ctx, "..", 2, ino, inode->i_mode >> 12)) return 0; ctx->pos = 2; } - /* can we use the dcache? */ spin_lock(&ci->i_ceph_lock); + /* request Fx cap. if have Fx, we don't need to release Fs cap + * for later create/unlink. */ + __ceph_touch_fmode(ci, mdsc, CEPH_FILE_MODE_WR); + /* can we use the dcache? */ if (ceph_test_mount_opt(fsc, DCACHE) && !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && ceph_snap(inode) != CEPH_SNAPDIR && __ceph_dir_is_complete_ordered(ci) && - __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { + __ceph_caps_issued_mask_metric(ci, CEPH_CAP_FILE_SHARED, 1)) { int shared_gen = atomic_read(&ci->i_shared_gen); + spin_unlock(&ci->i_ceph_lock); err = __dcache_readdir(file, ctx, shared_gen); if (err != -EAGAIN) @@ -465,8 +478,11 @@ 2 : (fpos_off(rde->offset) + 1); err = note_last_dentry(dfi, rde->name, rde->name_len, next_offset); - if (err) + if (err) { + ceph_mdsc_put_request(dfi->last_readdir); + dfi->last_readdir = NULL; return err; + } } else if (req->r_reply_info.dir_end) { dfi->next_offset = 2; /* keep last name */ @@ -494,9 +510,6 @@ } for (; i < rinfo->dir_nr; i++) { struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i; - struct ceph_vino vino; - ino_t ino; - u32 ftype; BUG_ON(rde->offset < ctx->pos); @@ -506,13 +519,16 @@ rde->name_len, rde->name, &rde->inode.in); BUG_ON(!rde->inode.in); - ftype = le32_to_cpu(rde->inode.in->mode) >> 12; - vino.ino = le64_to_cpu(rde->inode.in->ino); - vino.snap = le64_to_cpu(rde->inode.in->snapid); - ino = ceph_vino_to_ino(vino); if (!dir_emit(ctx, rde->name, rde->name_len, - ceph_translate_ino(inode->i_sb, ino), ftype)) { + ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)), + le32_to_cpu(rde->inode.in->mode) >> 12)) { + /* + * NOTE: Here no need to put the 'dfi->last_readdir', + * because when dir_emit stops us it's most likely + * doesn't have enough memory, etc. So for next readdir + * it will continue. + */ dout("filldir stopping us...\n"); return 0; } @@ -730,7 +746,7 @@ unsigned int flags) { struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); struct ceph_mds_request *req; int op; int mask; @@ -748,14 +764,15 @@ struct ceph_dentry_info *di = ceph_dentry(dentry); spin_lock(&ci->i_ceph_lock); - dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags); + dout(" dir %p flags are 0x%lx\n", dir, ci->i_ceph_flags); if (strncmp(dentry->d_name.name, fsc->mount_options->snapdir_name, dentry->d_name.len) && !is_root_ceph_dentry(dir, dentry) && ceph_test_mount_opt(fsc, DCACHE) && __ceph_dir_is_complete(ci) && - (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) { + __ceph_caps_issued_mask_metric(ci, CEPH_CAP_FILE_SHARED, 1)) { + __ceph_touch_fmode(ci, mdsc, CEPH_FILE_MODE_RD); spin_unlock(&ci->i_ceph_lock); dout(" dir %p complete, -ENOENT\n", dir); d_add(dentry, NULL); @@ -818,10 +835,9 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { - struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); struct ceph_mds_request *req; - struct ceph_acls_info acls = {}; + struct ceph_acl_sec_ctx as_ctx = {}; int err; if (ceph_snap(dir) != CEPH_NOSNAP) @@ -832,7 +848,10 @@ goto out; } - err = ceph_pre_init_acls(dir, &mode, &acls); + err = ceph_pre_init_acls(dir, &mode, &as_ctx); + if (err < 0) + goto out; + err = ceph_security_init_secctx(dentry, mode, &as_ctx); if (err < 0) goto out; @@ -851,9 +870,9 @@ req->r_args.mknod.rdev = cpu_to_le32(rdev); req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; - if (acls.pagelist) { - req->r_pagelist = acls.pagelist; - acls.pagelist = NULL; + if (as_ctx.pagelist) { + req->r_pagelist = as_ctx.pagelist; + as_ctx.pagelist = NULL; } err = ceph_mdsc_do_request(mdsc, dir, req); if (!err && !req->r_reply_info.head->is_dentry) @@ -861,10 +880,10 @@ ceph_mdsc_put_request(req); out: if (!err) - ceph_init_inode_acls(d_inode(dentry), &acls); + ceph_init_inode_acls(d_inode(dentry), &as_ctx); else d_drop(dentry); - ceph_release_acls_info(&acls); + ceph_release_acl_sec_ctx(&as_ctx); return err; } @@ -877,9 +896,9 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry, const char *dest) { - struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); struct ceph_mds_request *req; + struct ceph_acl_sec_ctx as_ctx = {}; int err; if (ceph_snap(dir) != CEPH_NOSNAP) @@ -889,6 +908,10 @@ err = -EDQUOT; goto out; } + + err = ceph_security_init_secctx(dentry, S_IFLNK | 0777, &as_ctx); + if (err < 0) + goto out; dout("symlink in dir %p dentry %p to '%s'\n", dir, dentry, dest); req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS); @@ -908,6 +931,10 @@ req->r_num_caps = 2; req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; + if (as_ctx.pagelist) { + req->r_pagelist = as_ctx.pagelist; + as_ctx.pagelist = NULL; + } err = ceph_mdsc_do_request(mdsc, dir, req); if (!err && !req->r_reply_info.head->is_dentry) err = ceph_handle_notrace_create(dir, dentry); @@ -915,15 +942,15 @@ out: if (err) d_drop(dentry); + ceph_release_acl_sec_ctx(&as_ctx); return err; } static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { - struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); struct ceph_mds_request *req; - struct ceph_acls_info acls = {}; + struct ceph_acl_sec_ctx as_ctx = {}; int err = -EROFS; int op; @@ -946,7 +973,10 @@ } mode |= S_IFDIR; - err = ceph_pre_init_acls(dir, &mode, &acls); + err = ceph_pre_init_acls(dir, &mode, &as_ctx); + if (err < 0) + goto out; + err = ceph_security_init_secctx(dentry, mode, &as_ctx); if (err < 0) goto out; @@ -963,9 +993,9 @@ req->r_args.mkdir.mode = cpu_to_le32(mode); req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; - if (acls.pagelist) { - req->r_pagelist = acls.pagelist; - acls.pagelist = NULL; + if (as_ctx.pagelist) { + req->r_pagelist = as_ctx.pagelist; + as_ctx.pagelist = NULL; } err = ceph_mdsc_do_request(mdsc, dir, req); if (!err && @@ -975,18 +1005,17 @@ ceph_mdsc_put_request(req); out: if (!err) - ceph_init_inode_acls(d_inode(dentry), &acls); + ceph_init_inode_acls(d_inode(dentry), &as_ctx); else d_drop(dentry); - ceph_release_acls_info(&acls); + ceph_release_acl_sec_ctx(&as_ctx); return err; } static int ceph_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { - struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); struct ceph_mds_request *req; int err; @@ -1020,6 +1049,78 @@ return err; } +static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc, + struct ceph_mds_request *req) +{ + int result = req->r_err ? req->r_err : + le32_to_cpu(req->r_reply_info.head->result); + + if (result == -EJUKEBOX) + goto out; + + /* If op failed, mark everyone involved for errors */ + if (result) { + int pathlen = 0; + u64 base = 0; + char *path = ceph_mdsc_build_path(req->r_dentry, &pathlen, + &base, 0); + + /* mark error on parent + clear complete */ + mapping_set_error(req->r_parent->i_mapping, result); + ceph_dir_clear_complete(req->r_parent); + + /* drop the dentry -- we don't know its status */ + if (!d_unhashed(req->r_dentry)) + d_drop(req->r_dentry); + + /* mark inode itself for an error (since metadata is bogus) */ + mapping_set_error(req->r_old_inode->i_mapping, result); + + pr_warn("ceph: async unlink failure path=(%llx)%s result=%d!\n", + base, IS_ERR(path) ? "<<bad>>" : path, result); + ceph_mdsc_free_path(path, pathlen); + } +out: + iput(req->r_old_inode); + ceph_mdsc_release_dir_caps(req); +} + +static int get_caps_for_async_unlink(struct inode *dir, struct dentry *dentry) +{ + struct ceph_inode_info *ci = ceph_inode(dir); + struct ceph_dentry_info *di; + int got = 0, want = CEPH_CAP_FILE_EXCL | CEPH_CAP_DIR_UNLINK; + + spin_lock(&ci->i_ceph_lock); + if ((__ceph_caps_issued(ci, NULL) & want) == want) { + ceph_take_cap_refs(ci, want, false); + got = want; + } + spin_unlock(&ci->i_ceph_lock); + + /* If we didn't get anything, return 0 */ + if (!got) + return 0; + + spin_lock(&dentry->d_lock); + di = ceph_dentry(dentry); + /* + * - We are holding Fx, which implies Fs caps. + * - Only support async unlink for primary linkage + */ + if (atomic_read(&ci->i_shared_gen) != di->lease_shared_gen || + !(di->flags & CEPH_DENTRY_PRIMARY_LINK)) + want = 0; + spin_unlock(&dentry->d_lock); + + /* Do we still want what we've got? */ + if (want == got) + return got; + + ceph_put_cap_refs(ci, got); + return 0; +} + /* * rmdir and unlink are differ only by the metadata op code */ @@ -1029,6 +1130,7 @@ struct ceph_mds_client *mdsc = fsc->mdsc; struct inode *inode = d_inode(dentry); struct ceph_mds_request *req; + bool try_async = ceph_test_mount_opt(fsc, ASYNC_DIROPS); int err = -EROFS; int op; @@ -1043,6 +1145,7 @@ CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK; } else goto out; +retry: req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); if (IS_ERR(req)) { err = PTR_ERR(req); @@ -1051,13 +1154,39 @@ req->r_dentry = dget(dentry); req->r_num_caps = 2; req->r_parent = dir; - set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); req->r_dentry_drop = CEPH_CAP_FILE_SHARED; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; req->r_inode_drop = ceph_drop_caps_for_unlink(inode); - err = ceph_mdsc_do_request(mdsc, dir, req); - if (!err && !req->r_reply_info.head->is_dentry) - d_delete(dentry); + + if (try_async && op == CEPH_MDS_OP_UNLINK && + (req->r_dir_caps = get_caps_for_async_unlink(dir, dentry))) { + dout("async unlink on %llu/%.*s caps=%s", ceph_ino(dir), + dentry->d_name.len, dentry->d_name.name, + ceph_cap_string(req->r_dir_caps)); + set_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags); + req->r_callback = ceph_async_unlink_cb; + req->r_old_inode = d_inode(dentry); + ihold(req->r_old_inode); + err = ceph_mdsc_submit_request(mdsc, dir, req); + if (!err) { + /* + * We have enough caps, so we assume that the unlink + * will succeed. Fix up the target inode and dcache. + */ + drop_nlink(inode); + d_delete(dentry); + } else if (err == -EJUKEBOX) { + try_async = false; + ceph_mdsc_put_request(req); + goto retry; + } + } else { + set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); + err = ceph_mdsc_do_request(mdsc, dir, req); + if (!err && !req->r_reply_info.head->is_dentry) + d_delete(dentry); + } + ceph_mdsc_put_request(req); out: return err; @@ -1067,8 +1196,7 @@ struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { - struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old_dir->i_sb); struct ceph_mds_request *req; int op = CEPH_MDS_OP_RENAME; int err; @@ -1083,11 +1211,12 @@ op = CEPH_MDS_OP_RENAMESNAP; else return -EROFS; + } else if (old_dir != new_dir) { + err = ceph_quota_check_rename(mdsc, d_inode(old_dentry), + new_dir); + if (err) + return err; } - /* don't allow cross-quota renames */ - if ((old_dir != new_dir) && - (!ceph_quota_is_same_realm(old_dir, new_dir))) - return -EXDEV; dout("rename dir %p dentry %p to dir %p dentry %p\n", old_dir, old_dentry, new_dir, new_dentry); @@ -1125,13 +1254,278 @@ } /* + * Move dentry to tail of mdsc->dentry_leases list when lease is updated. + * Leases at front of the list will expire first. (Assume all leases have + * similar duration) + * + * Called under dentry->d_lock. + */ +void __ceph_dentry_lease_touch(struct ceph_dentry_info *di) +{ + struct dentry *dn = di->dentry; + struct ceph_mds_client *mdsc; + + dout("dentry_lease_touch %p %p '%pd'\n", di, dn, dn); + + di->flags |= CEPH_DENTRY_LEASE_LIST; + if (di->flags & CEPH_DENTRY_SHRINK_LIST) { + di->flags |= CEPH_DENTRY_REFERENCED; + return; + } + + mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; + spin_lock(&mdsc->dentry_list_lock); + list_move_tail(&di->lease_list, &mdsc->dentry_leases); + spin_unlock(&mdsc->dentry_list_lock); +} + +static void __dentry_dir_lease_touch(struct ceph_mds_client* mdsc, + struct ceph_dentry_info *di) +{ + di->flags &= ~(CEPH_DENTRY_LEASE_LIST | CEPH_DENTRY_REFERENCED); + di->lease_gen = 0; + di->time = jiffies; + list_move_tail(&di->lease_list, &mdsc->dentry_dir_leases); +} + +/* + * When dir lease is used, add dentry to tail of mdsc->dentry_dir_leases + * list if it's not in the list, otherwise set 'referenced' flag. + * + * Called under dentry->d_lock. + */ +void __ceph_dentry_dir_lease_touch(struct ceph_dentry_info *di) +{ + struct dentry *dn = di->dentry; + struct ceph_mds_client *mdsc; + + dout("dentry_dir_lease_touch %p %p '%pd' (offset 0x%llx)\n", + di, dn, dn, di->offset); + + if (!list_empty(&di->lease_list)) { + if (di->flags & CEPH_DENTRY_LEASE_LIST) { + /* don't remove dentry from dentry lease list + * if its lease is valid */ + if (__dentry_lease_is_valid(di)) + return; + } else { + di->flags |= CEPH_DENTRY_REFERENCED; + return; + } + } + + if (di->flags & CEPH_DENTRY_SHRINK_LIST) { + di->flags |= CEPH_DENTRY_REFERENCED; + di->flags &= ~CEPH_DENTRY_LEASE_LIST; + return; + } + + mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; + spin_lock(&mdsc->dentry_list_lock); + __dentry_dir_lease_touch(mdsc, di), + spin_unlock(&mdsc->dentry_list_lock); +} + +static void __dentry_lease_unlist(struct ceph_dentry_info *di) +{ + struct ceph_mds_client *mdsc; + if (di->flags & CEPH_DENTRY_SHRINK_LIST) + return; + if (list_empty(&di->lease_list)) + return; + + mdsc = ceph_sb_to_client(di->dentry->d_sb)->mdsc; + spin_lock(&mdsc->dentry_list_lock); + list_del_init(&di->lease_list); + spin_unlock(&mdsc->dentry_list_lock); +} + +enum { + KEEP = 0, + DELETE = 1, + TOUCH = 2, + STOP = 4, +}; + +struct ceph_lease_walk_control { + bool dir_lease; + bool expire_dir_lease; + unsigned long nr_to_scan; + unsigned long dir_lease_ttl; +}; + +static unsigned long +__dentry_leases_walk(struct ceph_mds_client *mdsc, + struct ceph_lease_walk_control *lwc, + int (*check)(struct dentry*, void*)) +{ + struct ceph_dentry_info *di, *tmp; + struct dentry *dentry, *last = NULL; + struct list_head* list; + LIST_HEAD(dispose); + unsigned long freed = 0; + int ret = 0; + + list = lwc->dir_lease ? &mdsc->dentry_dir_leases : &mdsc->dentry_leases; + spin_lock(&mdsc->dentry_list_lock); + list_for_each_entry_safe(di, tmp, list, lease_list) { + if (!lwc->nr_to_scan) + break; + --lwc->nr_to_scan; + + dentry = di->dentry; + if (last == dentry) + break; + + if (!spin_trylock(&dentry->d_lock)) + continue; + + if (__lockref_is_dead(&dentry->d_lockref)) { + list_del_init(&di->lease_list); + goto next; + } + + ret = check(dentry, lwc); + if (ret & TOUCH) { + /* move it into tail of dir lease list */ + __dentry_dir_lease_touch(mdsc, di); + if (!last) + last = dentry; + } + if (ret & DELETE) { + /* stale lease */ + di->flags &= ~CEPH_DENTRY_REFERENCED; + if (dentry->d_lockref.count > 0) { + /* update_dentry_lease() will re-add + * it to lease list, or + * ceph_d_delete() will return 1 when + * last reference is dropped */ + list_del_init(&di->lease_list); + } else { + di->flags |= CEPH_DENTRY_SHRINK_LIST; + list_move_tail(&di->lease_list, &dispose); + dget_dlock(dentry); + } + } +next: + spin_unlock(&dentry->d_lock); + if (ret & STOP) + break; + } + spin_unlock(&mdsc->dentry_list_lock); + + while (!list_empty(&dispose)) { + di = list_first_entry(&dispose, struct ceph_dentry_info, + lease_list); + dentry = di->dentry; + spin_lock(&dentry->d_lock); + + list_del_init(&di->lease_list); + di->flags &= ~CEPH_DENTRY_SHRINK_LIST; + if (di->flags & CEPH_DENTRY_REFERENCED) { + spin_lock(&mdsc->dentry_list_lock); + if (di->flags & CEPH_DENTRY_LEASE_LIST) { + list_add_tail(&di->lease_list, + &mdsc->dentry_leases); + } else { + __dentry_dir_lease_touch(mdsc, di); + } + spin_unlock(&mdsc->dentry_list_lock); + } else { + freed++; + } + + spin_unlock(&dentry->d_lock); + /* ceph_d_delete() does the trick */ + dput(dentry); + } + return freed; +} + +static int __dentry_lease_check(struct dentry *dentry, void *arg) +{ + struct ceph_dentry_info *di = ceph_dentry(dentry); + int ret; + + if (__dentry_lease_is_valid(di)) + return STOP; + ret = __dir_lease_try_check(dentry); + if (ret == -EBUSY) + return KEEP; + if (ret > 0) + return TOUCH; + return DELETE; +} + +static int __dir_lease_check(struct dentry *dentry, void *arg) +{ + struct ceph_lease_walk_control *lwc = arg; + struct ceph_dentry_info *di = ceph_dentry(dentry); + + int ret = __dir_lease_try_check(dentry); + if (ret == -EBUSY) + return KEEP; + if (ret > 0) { + if (time_before(jiffies, di->time + lwc->dir_lease_ttl)) + return STOP; + /* Move dentry to tail of dir lease list if we don't want + * to delete it. So dentries in the list are checked in a + * round robin manner */ + if (!lwc->expire_dir_lease) + return TOUCH; + if (dentry->d_lockref.count > 0 || + (di->flags & CEPH_DENTRY_REFERENCED)) + return TOUCH; + /* invalidate dir lease */ + di->lease_shared_gen = 0; + } + return DELETE; +} + +int ceph_trim_dentries(struct ceph_mds_client *mdsc) +{ + struct ceph_lease_walk_control lwc; + unsigned long count; + unsigned long freed; + + spin_lock(&mdsc->caps_list_lock); + if (mdsc->caps_use_max > 0 && + mdsc->caps_use_count > mdsc->caps_use_max) + count = mdsc->caps_use_count - mdsc->caps_use_max; + else + count = 0; + spin_unlock(&mdsc->caps_list_lock); + + lwc.dir_lease = false; + lwc.nr_to_scan = CEPH_CAPS_PER_RELEASE * 2; + freed = __dentry_leases_walk(mdsc, &lwc, __dentry_lease_check); + if (!lwc.nr_to_scan) /* more invalid leases */ + return -EAGAIN; + + if (lwc.nr_to_scan < CEPH_CAPS_PER_RELEASE) + lwc.nr_to_scan = CEPH_CAPS_PER_RELEASE; + + lwc.dir_lease = true; + lwc.expire_dir_lease = freed < count; + lwc.dir_lease_ttl = mdsc->fsc->mount_options->caps_wanted_delay_max * HZ; + freed +=__dentry_leases_walk(mdsc, &lwc, __dir_lease_check); + if (!lwc.nr_to_scan) /* more to check */ + return -EAGAIN; + + return freed > 0 ? 1 : 0; +} + +/* * Ensure a dentry lease will no longer revalidate. */ void ceph_invalidate_dentry_lease(struct dentry *dentry) { + struct ceph_dentry_info *di = ceph_dentry(dentry); spin_lock(&dentry->d_lock); - ceph_dentry(dentry)->time = jiffies; - ceph_dentry(dentry)->lease_shared_gen = 0; + di->time = jiffies; + di->lease_shared_gen = 0; + di->flags &= ~CEPH_DENTRY_PRIMARY_LINK; + __dentry_lease_unlist(di); spin_unlock(&dentry->d_lock); } @@ -1139,52 +1533,65 @@ * Check if dentry lease is valid. If not, delete the lease. Try to * renew if the least is more than half up. */ -static int dentry_lease_is_valid(struct dentry *dentry, unsigned int flags, - struct inode *dir) +static bool __dentry_lease_is_valid(struct ceph_dentry_info *di) +{ + struct ceph_mds_session *session; + + if (!di->lease_gen) + return false; + + session = di->lease_session; + if (session) { + u32 gen; + unsigned long ttl; + + spin_lock(&session->s_gen_ttl_lock); + gen = session->s_cap_gen; + ttl = session->s_cap_ttl; + spin_unlock(&session->s_gen_ttl_lock); + + if (di->lease_gen == gen && + time_before(jiffies, ttl) && + time_before(jiffies, di->time)) + return true; + } + di->lease_gen = 0; + return false; +} + +static int dentry_lease_is_valid(struct dentry *dentry, unsigned int flags) { struct ceph_dentry_info *di; - struct ceph_mds_session *s; - int valid = 0; - u32 gen; - unsigned long ttl; struct ceph_mds_session *session = NULL; u32 seq = 0; + int valid = 0; spin_lock(&dentry->d_lock); di = ceph_dentry(dentry); - if (di && di->lease_session) { - s = di->lease_session; - spin_lock(&s->s_gen_ttl_lock); - gen = s->s_cap_gen; - ttl = s->s_cap_ttl; - spin_unlock(&s->s_gen_ttl_lock); + if (di && __dentry_lease_is_valid(di)) { + valid = 1; - if (di->lease_gen == gen && - time_before(jiffies, di->time) && - time_before(jiffies, ttl)) { - valid = 1; - if (di->lease_renew_after && - time_after(jiffies, di->lease_renew_after)) { - /* - * We should renew. If we're in RCU walk mode - * though, we can't do that so just return - * -ECHILD. - */ - if (flags & LOOKUP_RCU) { - valid = -ECHILD; - } else { - session = ceph_get_mds_session(s); - seq = di->lease_seq; - di->lease_renew_after = 0; - di->lease_renew_from = jiffies; - } + if (di->lease_renew_after && + time_after(jiffies, di->lease_renew_after)) { + /* + * We should renew. If we're in RCU walk mode + * though, we can't do that so just return + * -ECHILD. + */ + if (flags & LOOKUP_RCU) { + valid = -ECHILD; + } else { + session = ceph_get_mds_session(di->lease_session); + seq = di->lease_seq; + di->lease_renew_after = 0; + di->lease_renew_from = jiffies; } } } spin_unlock(&dentry->d_lock); if (session) { - ceph_mdsc_lease_send_msg(session, dir, dentry, + ceph_mdsc_lease_send_msg(session, dentry, CEPH_MDS_LEASE_RENEW, seq); ceph_put_mds_session(session); } @@ -1193,21 +1600,67 @@ } /* - * Check if directory-wide content lease/cap is valid. + * Called under dentry->d_lock. */ -static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry) +static int __dir_lease_try_check(const struct dentry *dentry) { - struct ceph_inode_info *ci = ceph_inode(dir); struct ceph_dentry_info *di = ceph_dentry(dentry); + struct inode *dir; + struct ceph_inode_info *ci; int valid = 0; + if (!di->lease_shared_gen) + return 0; + if (IS_ROOT(dentry)) + return 0; + + dir = d_inode(dentry->d_parent); + ci = ceph_inode(dir); + + if (spin_trylock(&ci->i_ceph_lock)) { + if (atomic_read(&ci->i_shared_gen) == di->lease_shared_gen && + __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 0)) + valid = 1; + spin_unlock(&ci->i_ceph_lock); + } else { + valid = -EBUSY; + } + + if (!valid) + di->lease_shared_gen = 0; + return valid; +} + +/* + * Check if directory-wide content lease/cap is valid. + */ +static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry, + struct ceph_mds_client *mdsc) +{ + struct ceph_inode_info *ci = ceph_inode(dir); + int valid; + int shared_gen; + spin_lock(&ci->i_ceph_lock); - if (atomic_read(&ci->i_shared_gen) == di->lease_shared_gen) - valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1); + valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1); + if (valid) { + __ceph_touch_fmode(ci, mdsc, CEPH_FILE_MODE_RD); + shared_gen = atomic_read(&ci->i_shared_gen); + } spin_unlock(&ci->i_ceph_lock); - dout("dir_lease_is_valid dir %p v%u dentry %p v%u = %d\n", - dir, (unsigned)atomic_read(&ci->i_shared_gen), - dentry, (unsigned)di->lease_shared_gen, valid); + if (valid) { + struct ceph_dentry_info *di; + spin_lock(&dentry->d_lock); + di = ceph_dentry(dentry); + if (dir == d_inode(dentry->d_parent) && + di && di->lease_shared_gen == shared_gen) + __ceph_dentry_dir_lease_touch(di); + else + valid = 0; + spin_unlock(&dentry->d_lock); + } + dout("dir_lease_is_valid dir %p v%u dentry %p = %d\n", + dir, (unsigned)atomic_read(&ci->i_shared_gen), dentry, valid); return valid; } @@ -1218,50 +1671,54 @@ { int valid = 0; struct dentry *parent; - struct inode *dir; + struct inode *dir, *inode; + struct ceph_mds_client *mdsc; if (flags & LOOKUP_RCU) { parent = READ_ONCE(dentry->d_parent); dir = d_inode_rcu(parent); if (!dir) return -ECHILD; + inode = d_inode_rcu(dentry); } else { parent = dget_parent(dentry); dir = d_inode(parent); + inode = d_inode(dentry); } - dout("d_revalidate %p '%pd' inode %p offset %lld\n", dentry, - dentry, d_inode(dentry), ceph_dentry(dentry)->offset); + dout("d_revalidate %p '%pd' inode %p offset 0x%llx\n", dentry, + dentry, inode, ceph_dentry(dentry)->offset); + + mdsc = ceph_sb_to_client(dir->i_sb)->mdsc; /* always trust cached snapped dentries, snapdir dentry */ if (ceph_snap(dir) != CEPH_NOSNAP) { dout("d_revalidate %p '%pd' inode %p is SNAPPED\n", dentry, - dentry, d_inode(dentry)); + dentry, inode); valid = 1; - } else if (d_really_is_positive(dentry) && - ceph_snap(d_inode(dentry)) == CEPH_SNAPDIR) { + } else if (inode && ceph_snap(inode) == CEPH_SNAPDIR) { valid = 1; } else { - valid = dentry_lease_is_valid(dentry, flags, dir); + valid = dentry_lease_is_valid(dentry, flags); if (valid == -ECHILD) return valid; - if (valid || dir_lease_is_valid(dir, dentry)) { - if (d_really_is_positive(dentry)) - valid = ceph_is_any_caps(d_inode(dentry)); + if (valid || dir_lease_is_valid(dir, dentry, mdsc)) { + if (inode) + valid = ceph_is_any_caps(inode); else valid = 1; } } if (!valid) { - struct ceph_mds_client *mdsc = - ceph_sb_to_client(dir->i_sb)->mdsc; struct ceph_mds_request *req; int op, err; u32 mask; if (flags & LOOKUP_RCU) return -ECHILD; + + percpu_counter_inc(&mdsc->metric.d_lease_mis); op = ceph_snap(dir) == CEPH_SNAPDIR ? CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP; @@ -1286,7 +1743,7 @@ case -ENOENT: if (d_really_is_negative(dentry)) valid = 1; - /* Fallthrough */ + fallthrough; default: break; } @@ -1294,18 +1751,42 @@ dout("d_revalidate %p lookup result=%d\n", dentry, err); } + } else { + percpu_counter_inc(&mdsc->metric.d_lease_hit); } dout("d_revalidate %p %s\n", dentry, valid ? "valid" : "invalid"); - if (valid) { - ceph_dentry_lru_touch(dentry); - } else { + if (!valid) ceph_dir_clear_complete(dir); - } if (!(flags & LOOKUP_RCU)) dput(parent); return valid; +} + +/* + * Delete unused dentry that doesn't have valid lease + * + * Called under dentry->d_lock. + */ +static int ceph_d_delete(const struct dentry *dentry) +{ + struct ceph_dentry_info *di; + + /* won't release caps */ + if (d_really_is_negative(dentry)) + return 0; + if (ceph_snap(d_inode(dentry)) != CEPH_NOSNAP) + return 0; + /* vaild lease? */ + di = ceph_dentry(dentry); + if (di) { + if (__dentry_lease_is_valid(di)) + return 0; + if (__dir_lease_try_check(dentry)) + return 0; + } + return 1; } /* @@ -1314,16 +1795,18 @@ static void ceph_d_release(struct dentry *dentry) { struct ceph_dentry_info *di = ceph_dentry(dentry); + struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); dout("d_release %p\n", dentry); - ceph_dentry_lru_del(dentry); + + atomic64_dec(&fsc->mdsc->metric.total_dentries); spin_lock(&dentry->d_lock); + __dentry_lease_unlist(di); dentry->d_fsdata = NULL; spin_unlock(&dentry->d_lock); - if (di->lease_session) - ceph_put_mds_session(di->lease_session); + ceph_put_mds_session(di->lease_session); kmem_cache_free(ceph_dentry_cachep, di); } @@ -1419,49 +1902,7 @@ return size - left; } -/* - * We maintain a private dentry LRU. - * - * FIXME: this needs to be changed to a per-mds lru to be useful. - */ -void ceph_dentry_lru_add(struct dentry *dn) -{ - struct ceph_dentry_info *di = ceph_dentry(dn); - struct ceph_mds_client *mdsc; - dout("dentry_lru_add %p %p '%pd'\n", di, dn, dn); - mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; - spin_lock(&mdsc->dentry_lru_lock); - list_add_tail(&di->lru, &mdsc->dentry_lru); - mdsc->num_dentry++; - spin_unlock(&mdsc->dentry_lru_lock); -} - -void ceph_dentry_lru_touch(struct dentry *dn) -{ - struct ceph_dentry_info *di = ceph_dentry(dn); - struct ceph_mds_client *mdsc; - - dout("dentry_lru_touch %p %p '%pd' (offset %lld)\n", di, dn, dn, - di->offset); - mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; - spin_lock(&mdsc->dentry_lru_lock); - list_move_tail(&di->lru, &mdsc->dentry_lru); - spin_unlock(&mdsc->dentry_lru_lock); -} - -void ceph_dentry_lru_del(struct dentry *dn) -{ - struct ceph_dentry_info *di = ceph_dentry(dn); - struct ceph_mds_client *mdsc; - - dout("dentry_lru_del %p %p '%pd'\n", di, dn, dn); - mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; - spin_lock(&mdsc->dentry_lru_lock); - list_del_init(&di->lru); - mdsc->num_dentry--; - spin_unlock(&mdsc->dentry_lru_lock); -} /* * Return name hash for a given dentry. This is dependent on @@ -1493,6 +1934,7 @@ .open = ceph_open, .release = ceph_release, .unlocked_ioctl = ceph_ioctl, + .compat_ioctl = compat_ptr_ioctl, .fsync = ceph_fsync, .lock = ceph_lock, .flock = ceph_flock, @@ -1535,6 +1977,7 @@ const struct dentry_operations ceph_dentry_ops = { .d_revalidate = ceph_d_revalidate, + .d_delete = ceph_d_delete, .d_release = ceph_d_release, .d_prune = ceph_d_prune, .d_init = ceph_d_init, -- Gitblit v1.6.2