From 102a0743326a03cd1a1202ceda21e175b7d3575c Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Tue, 20 Feb 2024 01:20:52 +0000 Subject: [PATCH] add new system file --- kernel/fs/nfsd/vfs.c | 757 +++++++++++++++++++++++++++++++++++++++------------------ 1 files changed, 514 insertions(+), 243 deletions(-) diff --git a/kernel/fs/nfsd/vfs.c b/kernel/fs/nfsd/vfs.c index 28e7f86..b09ead0 100644 --- a/kernel/fs/nfsd/vfs.c +++ b/kernel/fs/nfsd/vfs.c @@ -44,37 +44,10 @@ #include "nfsd.h" #include "vfs.h" +#include "filecache.h" #include "trace.h" #define NFSDDBG_FACILITY NFSDDBG_FILEOP - - -/* - * This is a cache of readahead params that help us choose the proper - * readahead strategy. Initially, we set all readahead parameters to 0 - * and let the VFS handle things. - * If you increase the number of cached files very much, you'll need to - * add a hash table here. - */ -struct raparms { - struct raparms *p_next; - unsigned int p_count; - ino_t p_ino; - dev_t p_dev; - int p_set; - struct file_ra_state p_ra; - unsigned int p_hindex; -}; - -struct raparm_hbucket { - struct raparms *pb_head; - spinlock_t pb_lock; -} ____cacheline_aligned_in_smp; - -#define RAPARM_HASH_BITS 4 -#define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS) -#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1) -static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE]; /* * Called from nfsd_lookup and encode_dirent. Check if we have crossed @@ -307,17 +280,23 @@ * Commit metadata changes to stable storage. */ static int -commit_metadata(struct svc_fh *fhp) +commit_inode_metadata(struct inode *inode) { - struct inode *inode = d_inode(fhp->fh_dentry); const struct export_operations *export_ops = inode->i_sb->s_export_op; - - if (!EX_ISSYNC(fhp->fh_export)) - return 0; if (export_ops->commit_metadata) return export_ops->commit_metadata(inode); return sync_inode_metadata(inode, 1); +} + +static int +commit_metadata(struct svc_fh *fhp) +{ + struct inode *inode = d_inode(fhp->fh_dentry); + + if (!EX_ISSYNC(fhp->fh_export)) + return 0; + return commit_inode_metadata(inode); } /* @@ -385,7 +364,7 @@ */ __be32 nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, - int check_guard, time_t guardtime) + int check_guard, time64_t guardtime) { struct dentry *dentry; struct inode *inode; @@ -404,7 +383,7 @@ /* * If utimes(2) and friends are called with times not NULL, we should * not set NFSD_MAY_WRITE bit. Otherwise fh_verify->nfsd_permission - * will return EACCESS, when the caller's effective UID does not match + * will return EACCES, when the caller's effective UID does not match * the owner of the file, and the caller is not privileged. In this * situation, we should return EPERM(notify_change will return this). */ @@ -551,16 +530,47 @@ } #endif -__be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst, - u64 dst_pos, u64 count) +__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos, + struct nfsd_file *nf_dst, u64 dst_pos, u64 count, bool sync) { - return nfserrno(vfs_clone_file_range(src, src_pos, dst, dst_pos, - count)); + struct file *src = nf_src->nf_file; + struct file *dst = nf_dst->nf_file; + errseq_t since; + loff_t cloned; + __be32 ret = 0; + + since = READ_ONCE(dst->f_wb_err); + cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0); + if (cloned < 0) { + ret = nfserrno(cloned); + goto out_err; + } + if (count && cloned != count) { + ret = nfserrno(-EINVAL); + goto out_err; + } + if (sync) { + loff_t dst_end = count ? dst_pos + count - 1 : LLONG_MAX; + int status = vfs_fsync_range(dst, dst_pos, dst_end, 0); + + if (!status) + status = filemap_check_wb_err(dst->f_mapping, since); + if (!status) + status = commit_inode_metadata(file_inode(src)); + if (status < 0) { + nfsd_reset_boot_verifier(net_generic(nf_dst->nf_net, + nfsd_net_id)); + ret = nfserrno(status); + } + } +out_err: + return ret; } ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, u64 dst_pos, u64 count) { + ssize_t ret; /* * Limit copy to 4MB to prevent indefinitely blocking an nfsd @@ -571,7 +581,12 @@ * limit like this and pipeline multiple COPY requests. */ count = min_t(u64, count, 1 << 22); - return vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0); + ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0); + + if (ret == -EOPNOTSUPP || ret == -EXDEV) + ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, + COPY_FILE_SPLICE); + return ret; } __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp, @@ -605,6 +620,12 @@ { NFS3_ACCESS_MODIFY, NFSD_MAY_WRITE|NFSD_MAY_TRUNC }, { NFS3_ACCESS_EXTEND, NFSD_MAY_WRITE }, +#ifdef CONFIG_NFSD_V4 + { NFS4_ACCESS_XAREAD, NFSD_MAY_READ }, + { NFS4_ACCESS_XAWRITE, NFSD_MAY_WRITE }, + { NFS4_ACCESS_XALIST, NFSD_MAY_READ }, +#endif + { 0, 0 } }; @@ -614,6 +635,12 @@ { NFS3_ACCESS_MODIFY, NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC}, { NFS3_ACCESS_EXTEND, NFSD_MAY_EXEC|NFSD_MAY_WRITE }, { NFS3_ACCESS_DELETE, NFSD_MAY_REMOVE }, + +#ifdef CONFIG_NFSD_V4 + { NFS4_ACCESS_XAREAD, NFSD_MAY_READ }, + { NFS4_ACCESS_XAWRITE, NFSD_MAY_WRITE }, + { NFS4_ACCESS_XALIST, NFSD_MAY_READ }, +#endif { 0, 0 } }; @@ -693,7 +720,7 @@ } #endif /* CONFIG_NFSD_V3 */ -static int nfsd_open_break_lease(struct inode *inode, int access) +int nfsd_open_break_lease(struct inode *inode, int access) { unsigned int mode; @@ -709,8 +736,8 @@ * and additional flags. * N.B. After this call fhp needs an fh_put */ -__be32 -nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, +static __be32 +__nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int may_flags, struct file **filp) { struct path path; @@ -719,25 +746,6 @@ int flags = O_RDONLY|O_LARGEFILE; __be32 err; int host_err = 0; - - validate_process_creds(); - - /* - * If we get here, then the client has already done an "open", - * and (hopefully) checked permission - so allow OWNER_OVERRIDE - * in case a chmod has now revoked permission. - * - * Arguably we should also allow the owner override for - * directories, but we never have and it doesn't seem to have - * caused anyone a problem. If we were to change this, note - * also that our filldir callbacks would need a variant of - * lookup_one_len that doesn't check permissions. - */ - if (type == S_IFREG) - may_flags |= NFSD_MAY_OWNER_OVERRIDE; - err = fh_verify(rqstp, fhp, type, may_flags); - if (err) - goto out; path.mnt = fhp->fh_export->ex_path.mnt; path.dentry = fhp->fh_dentry; @@ -792,67 +800,46 @@ out_nfserr: err = nfserrno(host_err); out: + return err; +} + +__be32 +nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, + int may_flags, struct file **filp) +{ + __be32 err; + + validate_process_creds(); + /* + * If we get here, then the client has already done an "open", + * and (hopefully) checked permission - so allow OWNER_OVERRIDE + * in case a chmod has now revoked permission. + * + * Arguably we should also allow the owner override for + * directories, but we never have and it doesn't seem to have + * caused anyone a problem. If we were to change this, note + * also that our filldir callbacks would need a variant of + * lookup_one_len that doesn't check permissions. + */ + if (type == S_IFREG) + may_flags |= NFSD_MAY_OWNER_OVERRIDE; + err = fh_verify(rqstp, fhp, type, may_flags); + if (!err) + err = __nfsd_open(rqstp, fhp, type, may_flags, filp); validate_process_creds(); return err; } -struct raparms * -nfsd_init_raparms(struct file *file) +__be32 +nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, + int may_flags, struct file **filp) { - struct inode *inode = file_inode(file); - dev_t dev = inode->i_sb->s_dev; - ino_t ino = inode->i_ino; - struct raparms *ra, **rap, **frap = NULL; - int depth = 0; - unsigned int hash; - struct raparm_hbucket *rab; + __be32 err; - hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK; - rab = &raparm_hash[hash]; - - spin_lock(&rab->pb_lock); - for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) { - if (ra->p_ino == ino && ra->p_dev == dev) - goto found; - depth++; - if (ra->p_count == 0) - frap = rap; - } - depth = nfsdstats.ra_size; - if (!frap) { - spin_unlock(&rab->pb_lock); - return NULL; - } - rap = frap; - ra = *frap; - ra->p_dev = dev; - ra->p_ino = ino; - ra->p_set = 0; - ra->p_hindex = hash; -found: - if (rap != &rab->pb_head) { - *rap = ra->p_next; - ra->p_next = rab->pb_head; - rab->pb_head = ra; - } - ra->p_count++; - nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++; - spin_unlock(&rab->pb_lock); - - if (ra->p_set) - file->f_ra = ra->p_ra; - return ra; -} - -void nfsd_put_raparams(struct file *file, struct raparms *ra) -{ - struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex]; - - spin_lock(&rab->pb_lock); - ra->p_ra = file->f_ra; - ra->p_set = 1; - ra->p_count--; - spin_unlock(&rab->pb_lock); + validate_process_creds(); + err = __nfsd_open(rqstp, fhp, type, may_flags, filp); + validate_process_creds(); + return err; } /* @@ -895,12 +882,23 @@ return __splice_from_pipe(pipe, sd, nfsd_splice_actor); } +static u32 nfsd_eof_on_read(struct file *file, loff_t offset, ssize_t len, + size_t expected) +{ + if (expected != 0 && len == 0) + return 1; + if (offset+len >= i_size_read(file_inode(file))) + return 1; + return 0; +} + static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, - unsigned long *count, int host_err) + unsigned long *count, u32 *eof, ssize_t host_err) { if (host_err >= 0) { nfsdstats.io_read += host_err; + *eof = nfsd_eof_on_read(file, offset, host_err, *count); *count = host_err; fsnotify_access(file); trace_nfsd_read_io_done(rqstp, fhp, offset, *count); @@ -912,7 +910,8 @@ } __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct file *file, loff_t offset, unsigned long *count) + struct file *file, loff_t offset, unsigned long *count, + u32 *eof) { struct splice_desc sd = { .len = 0, @@ -920,25 +919,27 @@ .pos = offset, .u.data = rqstp, }; - int host_err; + ssize_t host_err; trace_nfsd_read_splice(rqstp, fhp, offset, *count); rqstp->rq_next_page = rqstp->rq_respages + 1; host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor); - return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err); + return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err); } __be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, - struct kvec *vec, int vlen, unsigned long *count) + struct kvec *vec, int vlen, unsigned long *count, + u32 *eof) { struct iov_iter iter; - int host_err; + loff_t ppos = offset; + ssize_t host_err; trace_nfsd_read_vector(rqstp, fhp, offset, *count); - iov_iter_kvec(&iter, READ | ITER_KVEC, vec, vlen, *count); - host_err = vfs_iter_read(file, &iter, &offset, 0); - return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err); + iov_iter_kvec(&iter, READ, vec, vlen, *count); + host_err = vfs_iter_read(file, &iter, &ppos, 0); + return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err); } /* @@ -979,12 +980,15 @@ } __be32 -nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, +nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, loff_t offset, struct kvec *vec, int vlen, - unsigned long *cnt, int stable) + unsigned long *cnt, int stable, + __be32 *verf) { + struct file *file = nf->nf_file; struct svc_export *exp; struct iov_iter iter; + errseq_t since; __be32 nfserr; int host_err; int use_wgather; @@ -996,12 +1000,13 @@ if (test_bit(RQ_LOCAL, &rqstp->rq_flags)) /* - * We want less throttling in balance_dirty_pages() - * and shrink_inactive_list() so that nfs to + * We want throttling in balance_dirty_pages() + * and shrink_inactive_list() to only consider + * the backingdev we are writing to, so that nfs to * localhost doesn't cause nfsd to lock up due to all * the client's dirty pages or its congested queue. */ - current->flags |= PF_LESS_THROTTLE; + current->flags |= PF_LOCAL_THROTTLE; exp = fhp->fh_export; use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp); @@ -1012,16 +1017,42 @@ if (stable && !use_wgather) flags |= RWF_SYNC; - iov_iter_kvec(&iter, WRITE | ITER_KVEC, vec, vlen, *cnt); - host_err = vfs_iter_write(file, &iter, &pos, flags); - if (host_err < 0) + iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt); + since = READ_ONCE(file->f_wb_err); + if (flags & RWF_SYNC) { + if (verf) + nfsd_copy_boot_verifier(verf, + net_generic(SVC_NET(rqstp), + nfsd_net_id)); + host_err = vfs_iter_write(file, &iter, &pos, flags); + if (host_err < 0) + nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp), + nfsd_net_id)); + } else { + if (verf) + nfsd_copy_boot_verifier(verf, + net_generic(SVC_NET(rqstp), + nfsd_net_id)); + host_err = vfs_iter_write(file, &iter, &pos, flags); + } + if (host_err < 0) { + nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp), + nfsd_net_id)); goto out_nfserr; + } *cnt = host_err; nfsdstats.io_write += *cnt; fsnotify_modify(file); + host_err = filemap_check_wb_err(file->f_mapping, since); + if (host_err < 0) + goto out_nfserr; - if (stable && use_wgather) + if (stable && use_wgather) { host_err = wait_for_concurrent_writes(file); + if (host_err < 0) + nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp), + nfsd_net_id)); + } out_nfserr: if (host_err >= 0) { @@ -1032,7 +1063,7 @@ nfserr = nfserrno(host_err); } if (test_bit(RQ_LOCAL, &rqstp->rq_flags)) - current_restore_flags(pflags, PF_LESS_THROTTLE); + current_restore_flags(pflags, PF_LOCAL_THROTTLE); return nfserr; } @@ -1042,27 +1073,25 @@ * N.B. After this call fhp needs an fh_put */ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, - loff_t offset, struct kvec *vec, int vlen, unsigned long *count) + loff_t offset, struct kvec *vec, int vlen, unsigned long *count, + u32 *eof) { + struct nfsd_file *nf; struct file *file; - struct raparms *ra; __be32 err; trace_nfsd_read_start(rqstp, fhp, offset, *count); - err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file); + err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf); if (err) return err; - ra = nfsd_init_raparms(file); - + file = nf->nf_file; if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags)) - err = nfsd_splice_read(rqstp, fhp, file, offset, count); + err = nfsd_splice_read(rqstp, fhp, file, offset, count, eof); else - err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count); + err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count, eof); - if (ra) - nfsd_put_raparams(file, ra); - fput(file); + nfsd_file_put(nf); trace_nfsd_read_done(rqstp, fhp, offset, *count); @@ -1076,19 +1105,21 @@ */ __be32 nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, - struct kvec *vec, int vlen, unsigned long *cnt, int stable) + struct kvec *vec, int vlen, unsigned long *cnt, int stable, + __be32 *verf) { - struct file *file = NULL; - __be32 err = 0; + struct nfsd_file *nf; + __be32 err; trace_nfsd_write_start(rqstp, fhp, offset, *cnt); - err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file); + err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_WRITE, &nf); if (err) goto out; - err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stable); - fput(file); + err = nfsd_vfs_write(rqstp, fhp, nf, offset, vec, + vlen, cnt, stable, verf); + nfsd_file_put(nf); out: trace_nfsd_write_done(rqstp, fhp, offset, *cnt); return err; @@ -1106,11 +1137,11 @@ */ __be32 nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, - loff_t offset, unsigned long count) + loff_t offset, unsigned long count, __be32 *verf) { - struct file *file; - loff_t end = LLONG_MAX; - __be32 err = nfserr_inval; + struct nfsd_file *nf; + loff_t end = LLONG_MAX; + __be32 err = nfserr_inval; if (offset < 0) goto out; @@ -1120,20 +1151,36 @@ goto out; } - err = nfsd_open(rqstp, fhp, S_IFREG, - NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &file); + err = nfsd_file_acquire(rqstp, fhp, + NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf); if (err) goto out; if (EX_ISSYNC(fhp->fh_export)) { - int err2 = vfs_fsync_range(file, offset, end, 0); + errseq_t since = READ_ONCE(nf->nf_file->f_wb_err); + int err2; - if (err2 != -EINVAL) + err2 = vfs_fsync_range(nf->nf_file, offset, end, 0); + switch (err2) { + case 0: + nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net, + nfsd_net_id)); + err2 = filemap_check_wb_err(nf->nf_file->f_mapping, + since); err = nfserrno(err2); - else + break; + case -EINVAL: err = nfserr_notsupp; - } + break; + default: + nfsd_reset_boot_verifier(net_generic(nf->nf_net, + nfsd_net_id)); + err = nfserrno(err2); + } + } else + nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net, + nfsd_net_id)); - fput(file); + nfsd_file_put(nf); out: return err; } @@ -1155,7 +1202,7 @@ if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) iap->ia_valid &= ~(ATTR_UID|ATTR_GID); if (iap->ia_valid) - return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0); + return nfsd_setattr(rqstp, resfhp, iap, 0, (time64_t)0); /* Callers expect file metadata to be committed here */ return nfserrno(commit_metadata(resfhp)); } @@ -1293,7 +1340,6 @@ int type, dev_t rdev, struct svc_fh *resfhp) { struct dentry *dentry, *dchild = NULL; - struct inode *dirp; __be32 err; int host_err; @@ -1305,7 +1351,6 @@ return err; dentry = fhp->fh_dentry; - dirp = d_inode(dentry); host_err = fh_want_write(fhp); if (host_err) @@ -1423,18 +1468,19 @@ && d_inode(dchild)->i_atime.tv_sec == v_atime && d_inode(dchild)->i_size == 0 ) { if (created) - *created = 1; + *created = true; break; } + fallthrough; case NFS4_CREATE_EXCLUSIVE4_1: if ( d_inode(dchild)->i_mtime.tv_sec == v_mtime && d_inode(dchild)->i_atime.tv_sec == v_atime && d_inode(dchild)->i_size == 0 ) { if (created) - *created = 1; + *created = true; goto set_attr; } - /* fallthru */ + fallthrough; case NFS3_CREATE_GUARDED: err = nfserr_exist; } @@ -1451,7 +1497,7 @@ goto out_nfserr; } if (created) - *created = 1; + *created = true; nfsd_check_ignore_resizing(iap); @@ -1661,6 +1707,26 @@ goto out_unlock; } +static void +nfsd_close_cached_files(struct dentry *dentry) +{ + struct inode *inode = d_inode(dentry); + + if (inode && S_ISREG(inode->i_mode)) + nfsd_file_close_inode_sync(inode); +} + +static bool +nfsd_has_cached_files(struct dentry *dentry) +{ + bool ret = false; + struct inode *inode = d_inode(dentry); + + if (inode && S_ISREG(inode->i_mode)) + ret = nfsd_file_is_cached(inode); + return ret; +} + /* * Rename a file * N.B. After this call _both_ ffhp and tfhp need an fh_put @@ -1673,6 +1739,7 @@ struct inode *fdir, *tdir; __be32 err; int host_err; + bool has_cached = false; err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE); if (err) @@ -1691,6 +1758,7 @@ if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen)) goto out; +retry: host_err = fh_want_write(ffhp); if (host_err) { err = nfserrno(host_err); @@ -1730,11 +1798,16 @@ if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry) goto out_dput_new; - host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0); - if (!host_err) { - host_err = commit_metadata(tfhp); - if (!host_err) - host_err = commit_metadata(ffhp); + if (nfsd_has_cached_files(ndentry)) { + has_cached = true; + goto out_dput_old; + } else { + host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0); + if (!host_err) { + host_err = commit_metadata(tfhp); + if (!host_err) + host_err = commit_metadata(ffhp); + } } out_dput_new: dput(ndentry); @@ -1747,12 +1820,26 @@ * as that would do the wrong thing if the two directories * were the same, so again we do it by hand. */ - fill_post_wcc(ffhp); - fill_post_wcc(tfhp); + if (!has_cached) { + fill_post_wcc(ffhp); + fill_post_wcc(tfhp); + } unlock_rename(tdentry, fdentry); ffhp->fh_locked = tfhp->fh_locked = false; fh_drop_write(ffhp); + /* + * If the target dentry has cached open files, then we need to try to + * close them prior to doing the rename. Flushing delayed fput + * shouldn't be done with locks held however, so we delay it until this + * point and then reattempt the whole shebang. + */ + if (has_cached) { + has_cached = false; + nfsd_close_cached_files(ndentry); + dput(ndentry); + goto retry; + } out: return err; } @@ -1788,27 +1875,42 @@ rdentry = lookup_one_len(fname, dentry, flen); host_err = PTR_ERR(rdentry); if (IS_ERR(rdentry)) - goto out_nfserr; + goto out_drop_write; if (d_really_is_negative(rdentry)) { dput(rdentry); - err = nfserr_noent; - goto out; + host_err = -ENOENT; + goto out_drop_write; } if (!type) type = d_inode(rdentry)->i_mode & S_IFMT; - if (type != S_IFDIR) + if (type != S_IFDIR) { + nfsd_close_cached_files(rdentry); host_err = vfs_unlink(dirp, rdentry, NULL); - else + } else { host_err = vfs_rmdir(dirp, rdentry); + } + if (!host_err) host_err = commit_metadata(fhp); dput(rdentry); +out_drop_write: + fh_drop_write(fhp); out_nfserr: - err = nfserrno(host_err); + if (host_err == -EBUSY) { + /* name is mounted-on. There is no perfect + * error status. + */ + if (nfsd_v4client(rqstp)) + err = nfserr_file_open; + else + err = nfserr_acces; + } else { + err = nfserrno(host_err); + } out: return err; } @@ -1990,6 +2092,235 @@ return nfsexp_flags(rqstp, exp) & NFSEXP_READONLY; } +#ifdef CONFIG_NFSD_V4 +/* + * Helper function to translate error numbers. In the case of xattr operations, + * some error codes need to be translated outside of the standard translations. + * + * ENODATA needs to be translated to nfserr_noxattr. + * E2BIG to nfserr_xattr2big. + * + * Additionally, vfs_listxattr can return -ERANGE. This means that the + * file has too many extended attributes to retrieve inside an + * XATTR_LIST_MAX sized buffer. This is a bug in the xattr implementation: + * filesystems will allow the adding of extended attributes until they hit + * their own internal limit. This limit may be larger than XATTR_LIST_MAX. + * So, at that point, the attributes are present and valid, but can't + * be retrieved using listxattr, since the upper level xattr code enforces + * the XATTR_LIST_MAX limit. + * + * This bug means that we need to deal with listxattr returning -ERANGE. The + * best mapping is to return TOOSMALL. + */ +static __be32 +nfsd_xattr_errno(int err) +{ + switch (err) { + case -ENODATA: + return nfserr_noxattr; + case -E2BIG: + return nfserr_xattr2big; + case -ERANGE: + return nfserr_toosmall; + } + return nfserrno(err); +} + +/* + * Retrieve the specified user extended attribute. To avoid always + * having to allocate the maximum size (since we are not getting + * a maximum size from the RPC), do a probe + alloc. Hold a reader + * lock on i_rwsem to prevent the extended attribute from changing + * size while we're doing this. + */ +__be32 +nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name, + void **bufp, int *lenp) +{ + ssize_t len; + __be32 err; + char *buf; + struct inode *inode; + struct dentry *dentry; + + err = fh_verify(rqstp, fhp, 0, NFSD_MAY_READ); + if (err) + return err; + + err = nfs_ok; + dentry = fhp->fh_dentry; + inode = d_inode(dentry); + + inode_lock_shared(inode); + + len = vfs_getxattr(dentry, name, NULL, 0); + + /* + * Zero-length attribute, just return. + */ + if (len == 0) { + *bufp = NULL; + *lenp = 0; + goto out; + } + + if (len < 0) { + err = nfsd_xattr_errno(len); + goto out; + } + + if (len > *lenp) { + err = nfserr_toosmall; + goto out; + } + + buf = kvmalloc(len, GFP_KERNEL | GFP_NOFS); + if (buf == NULL) { + err = nfserr_jukebox; + goto out; + } + + len = vfs_getxattr(dentry, name, buf, len); + if (len <= 0) { + kvfree(buf); + buf = NULL; + err = nfsd_xattr_errno(len); + } + + *lenp = len; + *bufp = buf; + +out: + inode_unlock_shared(inode); + + return err; +} + +/* + * Retrieve the xattr names. Since we can't know how many are + * user extended attributes, we must get all attributes here, + * and have the XDR encode filter out the "user." ones. + * + * While this could always just allocate an XATTR_LIST_MAX + * buffer, that's a waste, so do a probe + allocate. To + * avoid any changes between the probe and allocate, wrap + * this in inode_lock. + */ +__be32 +nfsd_listxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char **bufp, + int *lenp) +{ + ssize_t len; + __be32 err; + char *buf; + struct inode *inode; + struct dentry *dentry; + + err = fh_verify(rqstp, fhp, 0, NFSD_MAY_READ); + if (err) + return err; + + dentry = fhp->fh_dentry; + inode = d_inode(dentry); + *lenp = 0; + + inode_lock_shared(inode); + + len = vfs_listxattr(dentry, NULL, 0); + if (len <= 0) { + err = nfsd_xattr_errno(len); + goto out; + } + + if (len > XATTR_LIST_MAX) { + err = nfserr_xattr2big; + goto out; + } + + /* + * We're holding i_rwsem - use GFP_NOFS. + */ + buf = kvmalloc(len, GFP_KERNEL | GFP_NOFS); + if (buf == NULL) { + err = nfserr_jukebox; + goto out; + } + + len = vfs_listxattr(dentry, buf, len); + if (len <= 0) { + kvfree(buf); + err = nfsd_xattr_errno(len); + goto out; + } + + *lenp = len; + *bufp = buf; + + err = nfs_ok; +out: + inode_unlock_shared(inode); + + return err; +} + +/* + * Removexattr and setxattr need to call fh_lock to both lock the inode + * and set the change attribute. Since the top-level vfs_removexattr + * and vfs_setxattr calls already do their own inode_lock calls, call + * the _locked variant. Pass in a NULL pointer for delegated_inode, + * and let the client deal with NFS4ERR_DELAY (same as with e.g. + * setattr and remove). + */ +__be32 +nfsd_removexattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name) +{ + __be32 err; + int ret; + + err = fh_verify(rqstp, fhp, 0, NFSD_MAY_WRITE); + if (err) + return err; + + ret = fh_want_write(fhp); + if (ret) + return nfserrno(ret); + + fh_lock(fhp); + + ret = __vfs_removexattr_locked(fhp->fh_dentry, name, NULL); + + fh_unlock(fhp); + fh_drop_write(fhp); + + return nfsd_xattr_errno(ret); +} + +__be32 +nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name, + void *buf, u32 len, u32 flags) +{ + __be32 err; + int ret; + + err = fh_verify(rqstp, fhp, 0, NFSD_MAY_WRITE); + if (err) + return err; + + ret = fh_want_write(fhp); + if (ret) + return nfserrno(ret); + fh_lock(fhp); + + ret = __vfs_setxattr_locked(fhp->fh_dentry, name, buf, len, flags, + NULL); + + fh_unlock(fhp); + fh_drop_write(fhp); + + return nfsd_xattr_errno(ret); +} +#endif + /* * Check for a user's access permissions to this inode. */ @@ -2073,64 +2404,4 @@ err = inode_permission(inode, MAY_EXEC); return err? nfserrno(err) : 0; -} - -void -nfsd_racache_shutdown(void) -{ - struct raparms *raparm, *last_raparm; - unsigned int i; - - dprintk("nfsd: freeing readahead buffers.\n"); - - for (i = 0; i < RAPARM_HASH_SIZE; i++) { - raparm = raparm_hash[i].pb_head; - while(raparm) { - last_raparm = raparm; - raparm = raparm->p_next; - kfree(last_raparm); - } - raparm_hash[i].pb_head = NULL; - } -} -/* - * Initialize readahead param cache - */ -int -nfsd_racache_init(int cache_size) -{ - int i; - int j = 0; - int nperbucket; - struct raparms **raparm = NULL; - - - if (raparm_hash[0].pb_head) - return 0; - nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE); - nperbucket = max(2, nperbucket); - cache_size = nperbucket * RAPARM_HASH_SIZE; - - dprintk("nfsd: allocating %d readahead buffers.\n", cache_size); - - for (i = 0; i < RAPARM_HASH_SIZE; i++) { - spin_lock_init(&raparm_hash[i].pb_lock); - - raparm = &raparm_hash[i].pb_head; - for (j = 0; j < nperbucket; j++) { - *raparm = kzalloc(sizeof(struct raparms), GFP_KERNEL); - if (!*raparm) - goto out_nomem; - raparm = &(*raparm)->p_next; - } - *raparm = NULL; - } - - nfsdstats.ra_size = cache_size; - return 0; - -out_nomem: - dprintk("nfsd: kmalloc failed, freeing readahead buffers\n"); - nfsd_racache_shutdown(); - return -ENOMEM; } -- Gitblit v1.6.2