From 01573e231f18eb2d99162747186f59511f56b64d Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Fri, 08 Dec 2023 10:40:48 +0000
Subject: [PATCH] 移去rt
---
kernel/fs/nfsd/vfs.c | 757 +++++++++++++++++++++++++++++++++++++++------------------
1 files changed, 514 insertions(+), 243 deletions(-)
diff --git a/kernel/fs/nfsd/vfs.c b/kernel/fs/nfsd/vfs.c
index 28e7f86..b09ead0 100644
--- a/kernel/fs/nfsd/vfs.c
+++ b/kernel/fs/nfsd/vfs.c
@@ -44,37 +44,10 @@
#include "nfsd.h"
#include "vfs.h"
+#include "filecache.h"
#include "trace.h"
#define NFSDDBG_FACILITY NFSDDBG_FILEOP
-
-
-/*
- * This is a cache of readahead params that help us choose the proper
- * readahead strategy. Initially, we set all readahead parameters to 0
- * and let the VFS handle things.
- * If you increase the number of cached files very much, you'll need to
- * add a hash table here.
- */
-struct raparms {
- struct raparms *p_next;
- unsigned int p_count;
- ino_t p_ino;
- dev_t p_dev;
- int p_set;
- struct file_ra_state p_ra;
- unsigned int p_hindex;
-};
-
-struct raparm_hbucket {
- struct raparms *pb_head;
- spinlock_t pb_lock;
-} ____cacheline_aligned_in_smp;
-
-#define RAPARM_HASH_BITS 4
-#define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS)
-#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1)
-static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE];
/*
* Called from nfsd_lookup and encode_dirent. Check if we have crossed
@@ -307,17 +280,23 @@
* Commit metadata changes to stable storage.
*/
static int
-commit_metadata(struct svc_fh *fhp)
+commit_inode_metadata(struct inode *inode)
{
- struct inode *inode = d_inode(fhp->fh_dentry);
const struct export_operations *export_ops = inode->i_sb->s_export_op;
-
- if (!EX_ISSYNC(fhp->fh_export))
- return 0;
if (export_ops->commit_metadata)
return export_ops->commit_metadata(inode);
return sync_inode_metadata(inode, 1);
+}
+
+static int
+commit_metadata(struct svc_fh *fhp)
+{
+ struct inode *inode = d_inode(fhp->fh_dentry);
+
+ if (!EX_ISSYNC(fhp->fh_export))
+ return 0;
+ return commit_inode_metadata(inode);
}
/*
@@ -385,7 +364,7 @@
*/
__be32
nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
- int check_guard, time_t guardtime)
+ int check_guard, time64_t guardtime)
{
struct dentry *dentry;
struct inode *inode;
@@ -404,7 +383,7 @@
/*
* If utimes(2) and friends are called with times not NULL, we should
* not set NFSD_MAY_WRITE bit. Otherwise fh_verify->nfsd_permission
- * will return EACCESS, when the caller's effective UID does not match
+ * will return EACCES, when the caller's effective UID does not match
* the owner of the file, and the caller is not privileged. In this
* situation, we should return EPERM(notify_change will return this).
*/
@@ -551,16 +530,47 @@
}
#endif
-__be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst,
- u64 dst_pos, u64 count)
+__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
+ struct nfsd_file *nf_dst, u64 dst_pos, u64 count, bool sync)
{
- return nfserrno(vfs_clone_file_range(src, src_pos, dst, dst_pos,
- count));
+ struct file *src = nf_src->nf_file;
+ struct file *dst = nf_dst->nf_file;
+ errseq_t since;
+ loff_t cloned;
+ __be32 ret = 0;
+
+ since = READ_ONCE(dst->f_wb_err);
+ cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0);
+ if (cloned < 0) {
+ ret = nfserrno(cloned);
+ goto out_err;
+ }
+ if (count && cloned != count) {
+ ret = nfserrno(-EINVAL);
+ goto out_err;
+ }
+ if (sync) {
+ loff_t dst_end = count ? dst_pos + count - 1 : LLONG_MAX;
+ int status = vfs_fsync_range(dst, dst_pos, dst_end, 0);
+
+ if (!status)
+ status = filemap_check_wb_err(dst->f_mapping, since);
+ if (!status)
+ status = commit_inode_metadata(file_inode(src));
+ if (status < 0) {
+ nfsd_reset_boot_verifier(net_generic(nf_dst->nf_net,
+ nfsd_net_id));
+ ret = nfserrno(status);
+ }
+ }
+out_err:
+ return ret;
}
ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
u64 dst_pos, u64 count)
{
+ ssize_t ret;
/*
* Limit copy to 4MB to prevent indefinitely blocking an nfsd
@@ -571,7 +581,12 @@
* limit like this and pipeline multiple COPY requests.
*/
count = min_t(u64, count, 1 << 22);
- return vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
+ ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
+
+ if (ret == -EOPNOTSUPP || ret == -EXDEV)
+ ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count,
+ COPY_FILE_SPLICE);
+ return ret;
}
__be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp,
@@ -605,6 +620,12 @@
{ NFS3_ACCESS_MODIFY, NFSD_MAY_WRITE|NFSD_MAY_TRUNC },
{ NFS3_ACCESS_EXTEND, NFSD_MAY_WRITE },
+#ifdef CONFIG_NFSD_V4
+ { NFS4_ACCESS_XAREAD, NFSD_MAY_READ },
+ { NFS4_ACCESS_XAWRITE, NFSD_MAY_WRITE },
+ { NFS4_ACCESS_XALIST, NFSD_MAY_READ },
+#endif
+
{ 0, 0 }
};
@@ -614,6 +635,12 @@
{ NFS3_ACCESS_MODIFY, NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC},
{ NFS3_ACCESS_EXTEND, NFSD_MAY_EXEC|NFSD_MAY_WRITE },
{ NFS3_ACCESS_DELETE, NFSD_MAY_REMOVE },
+
+#ifdef CONFIG_NFSD_V4
+ { NFS4_ACCESS_XAREAD, NFSD_MAY_READ },
+ { NFS4_ACCESS_XAWRITE, NFSD_MAY_WRITE },
+ { NFS4_ACCESS_XALIST, NFSD_MAY_READ },
+#endif
{ 0, 0 }
};
@@ -693,7 +720,7 @@
}
#endif /* CONFIG_NFSD_V3 */
-static int nfsd_open_break_lease(struct inode *inode, int access)
+int nfsd_open_break_lease(struct inode *inode, int access)
{
unsigned int mode;
@@ -709,8 +736,8 @@
* and additional flags.
* N.B. After this call fhp needs an fh_put
*/
-__be32
-nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
+static __be32
+__nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
int may_flags, struct file **filp)
{
struct path path;
@@ -719,25 +746,6 @@
int flags = O_RDONLY|O_LARGEFILE;
__be32 err;
int host_err = 0;
-
- validate_process_creds();
-
- /*
- * If we get here, then the client has already done an "open",
- * and (hopefully) checked permission - so allow OWNER_OVERRIDE
- * in case a chmod has now revoked permission.
- *
- * Arguably we should also allow the owner override for
- * directories, but we never have and it doesn't seem to have
- * caused anyone a problem. If we were to change this, note
- * also that our filldir callbacks would need a variant of
- * lookup_one_len that doesn't check permissions.
- */
- if (type == S_IFREG)
- may_flags |= NFSD_MAY_OWNER_OVERRIDE;
- err = fh_verify(rqstp, fhp, type, may_flags);
- if (err)
- goto out;
path.mnt = fhp->fh_export->ex_path.mnt;
path.dentry = fhp->fh_dentry;
@@ -792,67 +800,46 @@
out_nfserr:
err = nfserrno(host_err);
out:
+ return err;
+}
+
+__be32
+nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
+ int may_flags, struct file **filp)
+{
+ __be32 err;
+
+ validate_process_creds();
+ /*
+ * If we get here, then the client has already done an "open",
+ * and (hopefully) checked permission - so allow OWNER_OVERRIDE
+ * in case a chmod has now revoked permission.
+ *
+ * Arguably we should also allow the owner override for
+ * directories, but we never have and it doesn't seem to have
+ * caused anyone a problem. If we were to change this, note
+ * also that our filldir callbacks would need a variant of
+ * lookup_one_len that doesn't check permissions.
+ */
+ if (type == S_IFREG)
+ may_flags |= NFSD_MAY_OWNER_OVERRIDE;
+ err = fh_verify(rqstp, fhp, type, may_flags);
+ if (!err)
+ err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
validate_process_creds();
return err;
}
-struct raparms *
-nfsd_init_raparms(struct file *file)
+__be32
+nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
+ int may_flags, struct file **filp)
{
- struct inode *inode = file_inode(file);
- dev_t dev = inode->i_sb->s_dev;
- ino_t ino = inode->i_ino;
- struct raparms *ra, **rap, **frap = NULL;
- int depth = 0;
- unsigned int hash;
- struct raparm_hbucket *rab;
+ __be32 err;
- hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK;
- rab = &raparm_hash[hash];
-
- spin_lock(&rab->pb_lock);
- for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) {
- if (ra->p_ino == ino && ra->p_dev == dev)
- goto found;
- depth++;
- if (ra->p_count == 0)
- frap = rap;
- }
- depth = nfsdstats.ra_size;
- if (!frap) {
- spin_unlock(&rab->pb_lock);
- return NULL;
- }
- rap = frap;
- ra = *frap;
- ra->p_dev = dev;
- ra->p_ino = ino;
- ra->p_set = 0;
- ra->p_hindex = hash;
-found:
- if (rap != &rab->pb_head) {
- *rap = ra->p_next;
- ra->p_next = rab->pb_head;
- rab->pb_head = ra;
- }
- ra->p_count++;
- nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
- spin_unlock(&rab->pb_lock);
-
- if (ra->p_set)
- file->f_ra = ra->p_ra;
- return ra;
-}
-
-void nfsd_put_raparams(struct file *file, struct raparms *ra)
-{
- struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
-
- spin_lock(&rab->pb_lock);
- ra->p_ra = file->f_ra;
- ra->p_set = 1;
- ra->p_count--;
- spin_unlock(&rab->pb_lock);
+ validate_process_creds();
+ err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
+ validate_process_creds();
+ return err;
}
/*
@@ -895,12 +882,23 @@
return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
}
+static u32 nfsd_eof_on_read(struct file *file, loff_t offset, ssize_t len,
+ size_t expected)
+{
+ if (expected != 0 && len == 0)
+ return 1;
+ if (offset+len >= i_size_read(file_inode(file)))
+ return 1;
+ return 0;
+}
+
static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset,
- unsigned long *count, int host_err)
+ unsigned long *count, u32 *eof, ssize_t host_err)
{
if (host_err >= 0) {
nfsdstats.io_read += host_err;
+ *eof = nfsd_eof_on_read(file, offset, host_err, *count);
*count = host_err;
fsnotify_access(file);
trace_nfsd_read_io_done(rqstp, fhp, offset, *count);
@@ -912,7 +910,8 @@
}
__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
- struct file *file, loff_t offset, unsigned long *count)
+ struct file *file, loff_t offset, unsigned long *count,
+ u32 *eof)
{
struct splice_desc sd = {
.len = 0,
@@ -920,25 +919,27 @@
.pos = offset,
.u.data = rqstp,
};
- int host_err;
+ ssize_t host_err;
trace_nfsd_read_splice(rqstp, fhp, offset, *count);
rqstp->rq_next_page = rqstp->rq_respages + 1;
host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
- return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err);
+ return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
}
__be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset,
- struct kvec *vec, int vlen, unsigned long *count)
+ struct kvec *vec, int vlen, unsigned long *count,
+ u32 *eof)
{
struct iov_iter iter;
- int host_err;
+ loff_t ppos = offset;
+ ssize_t host_err;
trace_nfsd_read_vector(rqstp, fhp, offset, *count);
- iov_iter_kvec(&iter, READ | ITER_KVEC, vec, vlen, *count);
- host_err = vfs_iter_read(file, &iter, &offset, 0);
- return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err);
+ iov_iter_kvec(&iter, READ, vec, vlen, *count);
+ host_err = vfs_iter_read(file, &iter, &ppos, 0);
+ return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
}
/*
@@ -979,12 +980,15 @@
}
__be32
-nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
+nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
loff_t offset, struct kvec *vec, int vlen,
- unsigned long *cnt, int stable)
+ unsigned long *cnt, int stable,
+ __be32 *verf)
{
+ struct file *file = nf->nf_file;
struct svc_export *exp;
struct iov_iter iter;
+ errseq_t since;
__be32 nfserr;
int host_err;
int use_wgather;
@@ -996,12 +1000,13 @@
if (test_bit(RQ_LOCAL, &rqstp->rq_flags))
/*
- * We want less throttling in balance_dirty_pages()
- * and shrink_inactive_list() so that nfs to
+ * We want throttling in balance_dirty_pages()
+ * and shrink_inactive_list() to only consider
+ * the backingdev we are writing to, so that nfs to
* localhost doesn't cause nfsd to lock up due to all
* the client's dirty pages or its congested queue.
*/
- current->flags |= PF_LESS_THROTTLE;
+ current->flags |= PF_LOCAL_THROTTLE;
exp = fhp->fh_export;
use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
@@ -1012,16 +1017,42 @@
if (stable && !use_wgather)
flags |= RWF_SYNC;
- iov_iter_kvec(&iter, WRITE | ITER_KVEC, vec, vlen, *cnt);
- host_err = vfs_iter_write(file, &iter, &pos, flags);
- if (host_err < 0)
+ iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt);
+ since = READ_ONCE(file->f_wb_err);
+ if (flags & RWF_SYNC) {
+ if (verf)
+ nfsd_copy_boot_verifier(verf,
+ net_generic(SVC_NET(rqstp),
+ nfsd_net_id));
+ host_err = vfs_iter_write(file, &iter, &pos, flags);
+ if (host_err < 0)
+ nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
+ nfsd_net_id));
+ } else {
+ if (verf)
+ nfsd_copy_boot_verifier(verf,
+ net_generic(SVC_NET(rqstp),
+ nfsd_net_id));
+ host_err = vfs_iter_write(file, &iter, &pos, flags);
+ }
+ if (host_err < 0) {
+ nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
+ nfsd_net_id));
goto out_nfserr;
+ }
*cnt = host_err;
nfsdstats.io_write += *cnt;
fsnotify_modify(file);
+ host_err = filemap_check_wb_err(file->f_mapping, since);
+ if (host_err < 0)
+ goto out_nfserr;
- if (stable && use_wgather)
+ if (stable && use_wgather) {
host_err = wait_for_concurrent_writes(file);
+ if (host_err < 0)
+ nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
+ nfsd_net_id));
+ }
out_nfserr:
if (host_err >= 0) {
@@ -1032,7 +1063,7 @@
nfserr = nfserrno(host_err);
}
if (test_bit(RQ_LOCAL, &rqstp->rq_flags))
- current_restore_flags(pflags, PF_LESS_THROTTLE);
+ current_restore_flags(pflags, PF_LOCAL_THROTTLE);
return nfserr;
}
@@ -1042,27 +1073,25 @@
* N.B. After this call fhp needs an fh_put
*/
__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
- loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
+ loff_t offset, struct kvec *vec, int vlen, unsigned long *count,
+ u32 *eof)
{
+ struct nfsd_file *nf;
struct file *file;
- struct raparms *ra;
__be32 err;
trace_nfsd_read_start(rqstp, fhp, offset, *count);
- err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
+ err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf);
if (err)
return err;
- ra = nfsd_init_raparms(file);
-
+ file = nf->nf_file;
if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags))
- err = nfsd_splice_read(rqstp, fhp, file, offset, count);
+ err = nfsd_splice_read(rqstp, fhp, file, offset, count, eof);
else
- err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count);
+ err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count, eof);
- if (ra)
- nfsd_put_raparams(file, ra);
- fput(file);
+ nfsd_file_put(nf);
trace_nfsd_read_done(rqstp, fhp, offset, *count);
@@ -1076,19 +1105,21 @@
*/
__be32
nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
- struct kvec *vec, int vlen, unsigned long *cnt, int stable)
+ struct kvec *vec, int vlen, unsigned long *cnt, int stable,
+ __be32 *verf)
{
- struct file *file = NULL;
- __be32 err = 0;
+ struct nfsd_file *nf;
+ __be32 err;
trace_nfsd_write_start(rqstp, fhp, offset, *cnt);
- err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
+ err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_WRITE, &nf);
if (err)
goto out;
- err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stable);
- fput(file);
+ err = nfsd_vfs_write(rqstp, fhp, nf, offset, vec,
+ vlen, cnt, stable, verf);
+ nfsd_file_put(nf);
out:
trace_nfsd_write_done(rqstp, fhp, offset, *cnt);
return err;
@@ -1106,11 +1137,11 @@
*/
__be32
nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
- loff_t offset, unsigned long count)
+ loff_t offset, unsigned long count, __be32 *verf)
{
- struct file *file;
- loff_t end = LLONG_MAX;
- __be32 err = nfserr_inval;
+ struct nfsd_file *nf;
+ loff_t end = LLONG_MAX;
+ __be32 err = nfserr_inval;
if (offset < 0)
goto out;
@@ -1120,20 +1151,36 @@
goto out;
}
- err = nfsd_open(rqstp, fhp, S_IFREG,
- NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &file);
+ err = nfsd_file_acquire(rqstp, fhp,
+ NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf);
if (err)
goto out;
if (EX_ISSYNC(fhp->fh_export)) {
- int err2 = vfs_fsync_range(file, offset, end, 0);
+ errseq_t since = READ_ONCE(nf->nf_file->f_wb_err);
+ int err2;
- if (err2 != -EINVAL)
+ err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
+ switch (err2) {
+ case 0:
+ nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net,
+ nfsd_net_id));
+ err2 = filemap_check_wb_err(nf->nf_file->f_mapping,
+ since);
err = nfserrno(err2);
- else
+ break;
+ case -EINVAL:
err = nfserr_notsupp;
- }
+ break;
+ default:
+ nfsd_reset_boot_verifier(net_generic(nf->nf_net,
+ nfsd_net_id));
+ err = nfserrno(err2);
+ }
+ } else
+ nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net,
+ nfsd_net_id));
- fput(file);
+ nfsd_file_put(nf);
out:
return err;
}
@@ -1155,7 +1202,7 @@
if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID))
iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
if (iap->ia_valid)
- return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
+ return nfsd_setattr(rqstp, resfhp, iap, 0, (time64_t)0);
/* Callers expect file metadata to be committed here */
return nfserrno(commit_metadata(resfhp));
}
@@ -1293,7 +1340,6 @@
int type, dev_t rdev, struct svc_fh *resfhp)
{
struct dentry *dentry, *dchild = NULL;
- struct inode *dirp;
__be32 err;
int host_err;
@@ -1305,7 +1351,6 @@
return err;
dentry = fhp->fh_dentry;
- dirp = d_inode(dentry);
host_err = fh_want_write(fhp);
if (host_err)
@@ -1423,18 +1468,19 @@
&& d_inode(dchild)->i_atime.tv_sec == v_atime
&& d_inode(dchild)->i_size == 0 ) {
if (created)
- *created = 1;
+ *created = true;
break;
}
+ fallthrough;
case NFS4_CREATE_EXCLUSIVE4_1:
if ( d_inode(dchild)->i_mtime.tv_sec == v_mtime
&& d_inode(dchild)->i_atime.tv_sec == v_atime
&& d_inode(dchild)->i_size == 0 ) {
if (created)
- *created = 1;
+ *created = true;
goto set_attr;
}
- /* fallthru */
+ fallthrough;
case NFS3_CREATE_GUARDED:
err = nfserr_exist;
}
@@ -1451,7 +1497,7 @@
goto out_nfserr;
}
if (created)
- *created = 1;
+ *created = true;
nfsd_check_ignore_resizing(iap);
@@ -1661,6 +1707,26 @@
goto out_unlock;
}
+static void
+nfsd_close_cached_files(struct dentry *dentry)
+{
+ struct inode *inode = d_inode(dentry);
+
+ if (inode && S_ISREG(inode->i_mode))
+ nfsd_file_close_inode_sync(inode);
+}
+
+static bool
+nfsd_has_cached_files(struct dentry *dentry)
+{
+ bool ret = false;
+ struct inode *inode = d_inode(dentry);
+
+ if (inode && S_ISREG(inode->i_mode))
+ ret = nfsd_file_is_cached(inode);
+ return ret;
+}
+
/*
* Rename a file
* N.B. After this call _both_ ffhp and tfhp need an fh_put
@@ -1673,6 +1739,7 @@
struct inode *fdir, *tdir;
__be32 err;
int host_err;
+ bool has_cached = false;
err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE);
if (err)
@@ -1691,6 +1758,7 @@
if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen))
goto out;
+retry:
host_err = fh_want_write(ffhp);
if (host_err) {
err = nfserrno(host_err);
@@ -1730,11 +1798,16 @@
if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry)
goto out_dput_new;
- host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0);
- if (!host_err) {
- host_err = commit_metadata(tfhp);
- if (!host_err)
- host_err = commit_metadata(ffhp);
+ if (nfsd_has_cached_files(ndentry)) {
+ has_cached = true;
+ goto out_dput_old;
+ } else {
+ host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0);
+ if (!host_err) {
+ host_err = commit_metadata(tfhp);
+ if (!host_err)
+ host_err = commit_metadata(ffhp);
+ }
}
out_dput_new:
dput(ndentry);
@@ -1747,12 +1820,26 @@
* as that would do the wrong thing if the two directories
* were the same, so again we do it by hand.
*/
- fill_post_wcc(ffhp);
- fill_post_wcc(tfhp);
+ if (!has_cached) {
+ fill_post_wcc(ffhp);
+ fill_post_wcc(tfhp);
+ }
unlock_rename(tdentry, fdentry);
ffhp->fh_locked = tfhp->fh_locked = false;
fh_drop_write(ffhp);
+ /*
+ * If the target dentry has cached open files, then we need to try to
+ * close them prior to doing the rename. Flushing delayed fput
+ * shouldn't be done with locks held however, so we delay it until this
+ * point and then reattempt the whole shebang.
+ */
+ if (has_cached) {
+ has_cached = false;
+ nfsd_close_cached_files(ndentry);
+ dput(ndentry);
+ goto retry;
+ }
out:
return err;
}
@@ -1788,27 +1875,42 @@
rdentry = lookup_one_len(fname, dentry, flen);
host_err = PTR_ERR(rdentry);
if (IS_ERR(rdentry))
- goto out_nfserr;
+ goto out_drop_write;
if (d_really_is_negative(rdentry)) {
dput(rdentry);
- err = nfserr_noent;
- goto out;
+ host_err = -ENOENT;
+ goto out_drop_write;
}
if (!type)
type = d_inode(rdentry)->i_mode & S_IFMT;
- if (type != S_IFDIR)
+ if (type != S_IFDIR) {
+ nfsd_close_cached_files(rdentry);
host_err = vfs_unlink(dirp, rdentry, NULL);
- else
+ } else {
host_err = vfs_rmdir(dirp, rdentry);
+ }
+
if (!host_err)
host_err = commit_metadata(fhp);
dput(rdentry);
+out_drop_write:
+ fh_drop_write(fhp);
out_nfserr:
- err = nfserrno(host_err);
+ if (host_err == -EBUSY) {
+ /* name is mounted-on. There is no perfect
+ * error status.
+ */
+ if (nfsd_v4client(rqstp))
+ err = nfserr_file_open;
+ else
+ err = nfserr_acces;
+ } else {
+ err = nfserrno(host_err);
+ }
out:
return err;
}
@@ -1990,6 +2092,235 @@
return nfsexp_flags(rqstp, exp) & NFSEXP_READONLY;
}
+#ifdef CONFIG_NFSD_V4
+/*
+ * Helper function to translate error numbers. In the case of xattr operations,
+ * some error codes need to be translated outside of the standard translations.
+ *
+ * ENODATA needs to be translated to nfserr_noxattr.
+ * E2BIG to nfserr_xattr2big.
+ *
+ * Additionally, vfs_listxattr can return -ERANGE. This means that the
+ * file has too many extended attributes to retrieve inside an
+ * XATTR_LIST_MAX sized buffer. This is a bug in the xattr implementation:
+ * filesystems will allow the adding of extended attributes until they hit
+ * their own internal limit. This limit may be larger than XATTR_LIST_MAX.
+ * So, at that point, the attributes are present and valid, but can't
+ * be retrieved using listxattr, since the upper level xattr code enforces
+ * the XATTR_LIST_MAX limit.
+ *
+ * This bug means that we need to deal with listxattr returning -ERANGE. The
+ * best mapping is to return TOOSMALL.
+ */
+static __be32
+nfsd_xattr_errno(int err)
+{
+ switch (err) {
+ case -ENODATA:
+ return nfserr_noxattr;
+ case -E2BIG:
+ return nfserr_xattr2big;
+ case -ERANGE:
+ return nfserr_toosmall;
+ }
+ return nfserrno(err);
+}
+
+/*
+ * Retrieve the specified user extended attribute. To avoid always
+ * having to allocate the maximum size (since we are not getting
+ * a maximum size from the RPC), do a probe + alloc. Hold a reader
+ * lock on i_rwsem to prevent the extended attribute from changing
+ * size while we're doing this.
+ */
+__be32
+nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name,
+ void **bufp, int *lenp)
+{
+ ssize_t len;
+ __be32 err;
+ char *buf;
+ struct inode *inode;
+ struct dentry *dentry;
+
+ err = fh_verify(rqstp, fhp, 0, NFSD_MAY_READ);
+ if (err)
+ return err;
+
+ err = nfs_ok;
+ dentry = fhp->fh_dentry;
+ inode = d_inode(dentry);
+
+ inode_lock_shared(inode);
+
+ len = vfs_getxattr(dentry, name, NULL, 0);
+
+ /*
+ * Zero-length attribute, just return.
+ */
+ if (len == 0) {
+ *bufp = NULL;
+ *lenp = 0;
+ goto out;
+ }
+
+ if (len < 0) {
+ err = nfsd_xattr_errno(len);
+ goto out;
+ }
+
+ if (len > *lenp) {
+ err = nfserr_toosmall;
+ goto out;
+ }
+
+ buf = kvmalloc(len, GFP_KERNEL | GFP_NOFS);
+ if (buf == NULL) {
+ err = nfserr_jukebox;
+ goto out;
+ }
+
+ len = vfs_getxattr(dentry, name, buf, len);
+ if (len <= 0) {
+ kvfree(buf);
+ buf = NULL;
+ err = nfsd_xattr_errno(len);
+ }
+
+ *lenp = len;
+ *bufp = buf;
+
+out:
+ inode_unlock_shared(inode);
+
+ return err;
+}
+
+/*
+ * Retrieve the xattr names. Since we can't know how many are
+ * user extended attributes, we must get all attributes here,
+ * and have the XDR encode filter out the "user." ones.
+ *
+ * While this could always just allocate an XATTR_LIST_MAX
+ * buffer, that's a waste, so do a probe + allocate. To
+ * avoid any changes between the probe and allocate, wrap
+ * this in inode_lock.
+ */
+__be32
+nfsd_listxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char **bufp,
+ int *lenp)
+{
+ ssize_t len;
+ __be32 err;
+ char *buf;
+ struct inode *inode;
+ struct dentry *dentry;
+
+ err = fh_verify(rqstp, fhp, 0, NFSD_MAY_READ);
+ if (err)
+ return err;
+
+ dentry = fhp->fh_dentry;
+ inode = d_inode(dentry);
+ *lenp = 0;
+
+ inode_lock_shared(inode);
+
+ len = vfs_listxattr(dentry, NULL, 0);
+ if (len <= 0) {
+ err = nfsd_xattr_errno(len);
+ goto out;
+ }
+
+ if (len > XATTR_LIST_MAX) {
+ err = nfserr_xattr2big;
+ goto out;
+ }
+
+ /*
+ * We're holding i_rwsem - use GFP_NOFS.
+ */
+ buf = kvmalloc(len, GFP_KERNEL | GFP_NOFS);
+ if (buf == NULL) {
+ err = nfserr_jukebox;
+ goto out;
+ }
+
+ len = vfs_listxattr(dentry, buf, len);
+ if (len <= 0) {
+ kvfree(buf);
+ err = nfsd_xattr_errno(len);
+ goto out;
+ }
+
+ *lenp = len;
+ *bufp = buf;
+
+ err = nfs_ok;
+out:
+ inode_unlock_shared(inode);
+
+ return err;
+}
+
+/*
+ * Removexattr and setxattr need to call fh_lock to both lock the inode
+ * and set the change attribute. Since the top-level vfs_removexattr
+ * and vfs_setxattr calls already do their own inode_lock calls, call
+ * the _locked variant. Pass in a NULL pointer for delegated_inode,
+ * and let the client deal with NFS4ERR_DELAY (same as with e.g.
+ * setattr and remove).
+ */
+__be32
+nfsd_removexattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name)
+{
+ __be32 err;
+ int ret;
+
+ err = fh_verify(rqstp, fhp, 0, NFSD_MAY_WRITE);
+ if (err)
+ return err;
+
+ ret = fh_want_write(fhp);
+ if (ret)
+ return nfserrno(ret);
+
+ fh_lock(fhp);
+
+ ret = __vfs_removexattr_locked(fhp->fh_dentry, name, NULL);
+
+ fh_unlock(fhp);
+ fh_drop_write(fhp);
+
+ return nfsd_xattr_errno(ret);
+}
+
+__be32
+nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name,
+ void *buf, u32 len, u32 flags)
+{
+ __be32 err;
+ int ret;
+
+ err = fh_verify(rqstp, fhp, 0, NFSD_MAY_WRITE);
+ if (err)
+ return err;
+
+ ret = fh_want_write(fhp);
+ if (ret)
+ return nfserrno(ret);
+ fh_lock(fhp);
+
+ ret = __vfs_setxattr_locked(fhp->fh_dentry, name, buf, len, flags,
+ NULL);
+
+ fh_unlock(fhp);
+ fh_drop_write(fhp);
+
+ return nfsd_xattr_errno(ret);
+}
+#endif
+
/*
* Check for a user's access permissions to this inode.
*/
@@ -2073,64 +2404,4 @@
err = inode_permission(inode, MAY_EXEC);
return err? nfserrno(err) : 0;
-}
-
-void
-nfsd_racache_shutdown(void)
-{
- struct raparms *raparm, *last_raparm;
- unsigned int i;
-
- dprintk("nfsd: freeing readahead buffers.\n");
-
- for (i = 0; i < RAPARM_HASH_SIZE; i++) {
- raparm = raparm_hash[i].pb_head;
- while(raparm) {
- last_raparm = raparm;
- raparm = raparm->p_next;
- kfree(last_raparm);
- }
- raparm_hash[i].pb_head = NULL;
- }
-}
-/*
- * Initialize readahead param cache
- */
-int
-nfsd_racache_init(int cache_size)
-{
- int i;
- int j = 0;
- int nperbucket;
- struct raparms **raparm = NULL;
-
-
- if (raparm_hash[0].pb_head)
- return 0;
- nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE);
- nperbucket = max(2, nperbucket);
- cache_size = nperbucket * RAPARM_HASH_SIZE;
-
- dprintk("nfsd: allocating %d readahead buffers.\n", cache_size);
-
- for (i = 0; i < RAPARM_HASH_SIZE; i++) {
- spin_lock_init(&raparm_hash[i].pb_lock);
-
- raparm = &raparm_hash[i].pb_head;
- for (j = 0; j < nperbucket; j++) {
- *raparm = kzalloc(sizeof(struct raparms), GFP_KERNEL);
- if (!*raparm)
- goto out_nomem;
- raparm = &(*raparm)->p_next;
- }
- *raparm = NULL;
- }
-
- nfsdstats.ra_size = cache_size;
- return 0;
-
-out_nomem:
- dprintk("nfsd: kmalloc failed, freeing readahead buffers\n");
- nfsd_racache_shutdown();
- return -ENOMEM;
}
--
Gitblit v1.6.2