~hc/RK356X_SDK_RELEASE.git

..	..	@@ -44,37 +44,10 @@
44	44
45	45	#include "nfsd.h"
46	46	#include "vfs.h"
	47	+#include "filecache.h"
47	48	#include "trace.h"
48	49
49	50	#define NFSDDBG_FACILITY NFSDDBG_FILEOP
50		-
51		-
52		-/*
53		- * This is a cache of readahead params that help us choose the proper
54		- * readahead strategy. Initially, we set all readahead parameters to 0
55		- * and let the VFS handle things.
56		- * If you increase the number of cached files very much, you'll need to
57		- * add a hash table here.
58		- */
59		-struct raparms {
60		- struct raparms *p_next;
61		- unsigned int p_count;
62		- ino_t p_ino;
63		- dev_t p_dev;
64		- int p_set;
65		- struct file_ra_state p_ra;
66		- unsigned int p_hindex;
67		-};
68		-
69		-struct raparm_hbucket {
70		- struct raparms *pb_head;
71		- spinlock_t pb_lock;
72		-} ____cacheline_aligned_in_smp;
73		-
74		-#define RAPARM_HASH_BITS 4
75		-#define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS)
76		-#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1)
77		-static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE];
78	51
79	52	/*
80	53	* Called from nfsd_lookup and encode_dirent. Check if we have crossed
..	..	@@ -307,17 +280,23 @@
307	280	* Commit metadata changes to stable storage.
308	281	*/
309	282	static int
310		-commit_metadata(struct svc_fh *fhp)
	283	+commit_inode_metadata(struct inode *inode)
311	284	{
312		- struct inode *inode = d_inode(fhp->fh_dentry);
313	285	const struct export_operations *export_ops = inode->i_sb->s_export_op;
314		-
315		- if (!EX_ISSYNC(fhp->fh_export))
316		- return 0;
317	286
318	287	if (export_ops->commit_metadata)
319	288	return export_ops->commit_metadata(inode);
320	289	return sync_inode_metadata(inode, 1);
	290	+}
	291	+
	292	+static int
	293	+commit_metadata(struct svc_fh *fhp)
	294	+{
	295	+ struct inode *inode = d_inode(fhp->fh_dentry);
	296	+
	297	+ if (!EX_ISSYNC(fhp->fh_export))
	298	+ return 0;
	299	+ return commit_inode_metadata(inode);
321	300	}
322	301
323	302	/*
..	..	@@ -385,7 +364,7 @@
385	364	*/
386	365	__be32
387	366	nfsd_setattr(struct svc_rqst rqstp, struct svc_fh fhp, struct iattr *iap,
388		- int check_guard, time_t guardtime)
	367	+ int check_guard, time64_t guardtime)
389	368	{
390	369	struct dentry *dentry;
391	370	struct inode *inode;
..	..	@@ -404,7 +383,7 @@
404	383	/*
405	384	* If utimes(2) and friends are called with times not NULL, we should
406	385	* not set NFSD_MAY_WRITE bit. Otherwise fh_verify->nfsd_permission
407		- * will return EACCESS, when the caller's effective UID does not match
	386	+ * will return EACCES, when the caller's effective UID does not match
408	387	* the owner of the file, and the caller is not privileged. In this
409	388	* situation, we should return EPERM(notify_change will return this).
410	389	*/
..	..	@@ -551,16 +530,47 @@
551	530	}
552	531	#endif
553	532
554		-__be32 nfsd4_clone_file_range(struct file src, u64 src_pos, struct file dst,
555		- u64 dst_pos, u64 count)
	533	+__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
	534	+ struct nfsd_file *nf_dst, u64 dst_pos, u64 count, bool sync)
556	535	{
557		- return nfserrno(vfs_clone_file_range(src, src_pos, dst, dst_pos,
558		- count));
	536	+ struct file *src = nf_src->nf_file;
	537	+ struct file *dst = nf_dst->nf_file;
	538	+ errseq_t since;
	539	+ loff_t cloned;
	540	+ __be32 ret = 0;
	541	+
	542	+ since = READ_ONCE(dst->f_wb_err);
	543	+ cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0);
	544	+ if (cloned < 0) {
	545	+ ret = nfserrno(cloned);
	546	+ goto out_err;
	547	+ }
	548	+ if (count && cloned != count) {
	549	+ ret = nfserrno(-EINVAL);
	550	+ goto out_err;
	551	+ }
	552	+ if (sync) {
	553	+ loff_t dst_end = count ? dst_pos + count - 1 : LLONG_MAX;
	554	+ int status = vfs_fsync_range(dst, dst_pos, dst_end, 0);
	555	+
	556	+ if (!status)
	557	+ status = filemap_check_wb_err(dst->f_mapping, since);
	558	+ if (!status)
	559	+ status = commit_inode_metadata(file_inode(src));
	560	+ if (status < 0) {
	561	+ nfsd_reset_boot_verifier(net_generic(nf_dst->nf_net,
	562	+ nfsd_net_id));
	563	+ ret = nfserrno(status);
	564	+ }
	565	+ }
	566	+out_err:
	567	+ return ret;
559	568	}
560	569
561	570	ssize_t nfsd_copy_file_range(struct file src, u64 src_pos, struct file dst,
562	571	u64 dst_pos, u64 count)
563	572	{
	573	+ ssize_t ret;
564	574
565	575	/*
566	576	* Limit copy to 4MB to prevent indefinitely blocking an nfsd
..	..	@@ -571,7 +581,12 @@
571	581	* limit like this and pipeline multiple COPY requests.
572	582	*/
573	583	count = min_t(u64, count, 1 << 22);
574		- return vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
	584	+ ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
	585	+
	586	+ if (ret == -EOPNOTSUPP \|\| ret == -EXDEV)
	587	+ ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count,
	588	+ COPY_FILE_SPLICE);
	589	+ return ret;
575	590	}
576	591
577	592	__be32 nfsd4_vfs_fallocate(struct svc_rqst rqstp, struct svc_fh fhp,
..	..	@@ -605,6 +620,12 @@
605	620	{ NFS3_ACCESS_MODIFY, NFSD_MAY_WRITE\|NFSD_MAY_TRUNC },
606	621	{ NFS3_ACCESS_EXTEND, NFSD_MAY_WRITE },
607	622
	623	+#ifdef CONFIG_NFSD_V4
	624	+ { NFS4_ACCESS_XAREAD, NFSD_MAY_READ },
	625	+ { NFS4_ACCESS_XAWRITE, NFSD_MAY_WRITE },
	626	+ { NFS4_ACCESS_XALIST, NFSD_MAY_READ },
	627	+#endif
	628	+
608	629	{ 0, 0 }
609	630	};
610	631
..	..	@@ -614,6 +635,12 @@
614	635	{ NFS3_ACCESS_MODIFY, NFSD_MAY_EXEC\|NFSD_MAY_WRITE\|NFSD_MAY_TRUNC},
615	636	{ NFS3_ACCESS_EXTEND, NFSD_MAY_EXEC\|NFSD_MAY_WRITE },
616	637	{ NFS3_ACCESS_DELETE, NFSD_MAY_REMOVE },
	638	+
	639	+#ifdef CONFIG_NFSD_V4
	640	+ { NFS4_ACCESS_XAREAD, NFSD_MAY_READ },
	641	+ { NFS4_ACCESS_XAWRITE, NFSD_MAY_WRITE },
	642	+ { NFS4_ACCESS_XALIST, NFSD_MAY_READ },
	643	+#endif
617	644
618	645	{ 0, 0 }
619	646	};
..	..	@@ -693,7 +720,7 @@
693	720	}
694	721	#endif /* CONFIG_NFSD_V3 */
695	722
696		-static int nfsd_open_break_lease(struct inode *inode, int access)
	723	+int nfsd_open_break_lease(struct inode *inode, int access)
697	724	{
698	725	unsigned int mode;
699	726
..	..	@@ -709,8 +736,8 @@
709	736	* and additional flags.
710	737	* N.B. After this call fhp needs an fh_put
711	738	*/
712		-__be32
713		-nfsd_open(struct svc_rqst rqstp, struct svc_fh fhp, umode_t type,
	739	+static __be32
	740	+__nfsd_open(struct svc_rqst rqstp, struct svc_fh fhp, umode_t type,
714	741	int may_flags, struct file **filp)
715	742	{
716	743	struct path path;
..	..	@@ -719,25 +746,6 @@
719	746	int flags = O_RDONLY\|O_LARGEFILE;
720	747	__be32 err;
721	748	int host_err = 0;
722		-
723		- validate_process_creds();
724		-
725		- /*
726		- * If we get here, then the client has already done an "open",
727		- * and (hopefully) checked permission - so allow OWNER_OVERRIDE
728		- * in case a chmod has now revoked permission.
729		- *
730		- * Arguably we should also allow the owner override for
731		- * directories, but we never have and it doesn't seem to have
732		- * caused anyone a problem. If we were to change this, note
733		- * also that our filldir callbacks would need a variant of
734		- * lookup_one_len that doesn't check permissions.
735		- */
736		- if (type == S_IFREG)
737		- may_flags \|= NFSD_MAY_OWNER_OVERRIDE;
738		- err = fh_verify(rqstp, fhp, type, may_flags);
739		- if (err)
740		- goto out;
741	749
742	750	path.mnt = fhp->fh_export->ex_path.mnt;
743	751	path.dentry = fhp->fh_dentry;
..	..	@@ -792,67 +800,46 @@
792	800	out_nfserr:
793	801	err = nfserrno(host_err);
794	802	out:
	803	+ return err;
	804	+}
	805	+
	806	+__be32
	807	+nfsd_open(struct svc_rqst rqstp, struct svc_fh fhp, umode_t type,
	808	+ int may_flags, struct file **filp)
	809	+{
	810	+ __be32 err;
	811	+
	812	+ validate_process_creds();
	813	+ /*
	814	+ * If we get here, then the client has already done an "open",
	815	+ * and (hopefully) checked permission - so allow OWNER_OVERRIDE
	816	+ * in case a chmod has now revoked permission.
	817	+ *
	818	+ * Arguably we should also allow the owner override for
	819	+ * directories, but we never have and it doesn't seem to have
	820	+ * caused anyone a problem. If we were to change this, note
	821	+ * also that our filldir callbacks would need a variant of
	822	+ * lookup_one_len that doesn't check permissions.
	823	+ */
	824	+ if (type == S_IFREG)
	825	+ may_flags \|= NFSD_MAY_OWNER_OVERRIDE;
	826	+ err = fh_verify(rqstp, fhp, type, may_flags);
	827	+ if (!err)
	828	+ err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
795	829	validate_process_creds();
796	830	return err;
797	831	}
798	832
799		-struct raparms *
800		-nfsd_init_raparms(struct file *file)
	833	+__be32
	834	+nfsd_open_verified(struct svc_rqst rqstp, struct svc_fh fhp, umode_t type,
	835	+ int may_flags, struct file **filp)
801	836	{
802		- struct inode *inode = file_inode(file);
803		- dev_t dev = inode->i_sb->s_dev;
804		- ino_t ino = inode->i_ino;
805		- struct raparms ra, rap, *frap = NULL;
806		- int depth = 0;
807		- unsigned int hash;
808		- struct raparm_hbucket *rab;
	837	+ __be32 err;
809	838
810		- hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK;
811		- rab = &raparm_hash[hash];
812		-
813		- spin_lock(&rab->pb_lock);
814		- for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) {
815		- if (ra->p_ino == ino && ra->p_dev == dev)
816		- goto found;
817		- depth++;
818		- if (ra->p_count == 0)
819		- frap = rap;
820		- }
821		- depth = nfsdstats.ra_size;
822		- if (!frap) {
823		- spin_unlock(&rab->pb_lock);
824		- return NULL;
825		- }
826		- rap = frap;
827		- ra = *frap;
828		- ra->p_dev = dev;
829		- ra->p_ino = ino;
830		- ra->p_set = 0;
831		- ra->p_hindex = hash;
832		-found:
833		- if (rap != &rab->pb_head) {
834		- *rap = ra->p_next;
835		- ra->p_next = rab->pb_head;
836		- rab->pb_head = ra;
837		- }
838		- ra->p_count++;
839		- nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
840		- spin_unlock(&rab->pb_lock);
841		-
842		- if (ra->p_set)
843		- file->f_ra = ra->p_ra;
844		- return ra;
845		-}
846		-
847		-void nfsd_put_raparams(struct file file, struct raparms ra)
848		-{
849		- struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
850		-
851		- spin_lock(&rab->pb_lock);
852		- ra->p_ra = file->f_ra;
853		- ra->p_set = 1;
854		- ra->p_count--;
855		- spin_unlock(&rab->pb_lock);
	839	+ validate_process_creds();
	840	+ err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
	841	+ validate_process_creds();
	842	+ return err;
856	843	}
857	844
858	845	/*
..	..	@@ -895,12 +882,23 @@
895	882	return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
896	883	}
897	884
	885	+static u32 nfsd_eof_on_read(struct file *file, loff_t offset, ssize_t len,
	886	+ size_t expected)
	887	+{
	888	+ if (expected != 0 && len == 0)
	889	+ return 1;
	890	+ if (offset+len >= i_size_read(file_inode(file)))
	891	+ return 1;
	892	+ return 0;
	893	+}
	894	+
898	895	static __be32 nfsd_finish_read(struct svc_rqst rqstp, struct svc_fh fhp,
899	896	struct file *file, loff_t offset,
900		- unsigned long *count, int host_err)
	897	+ unsigned long count, u32 eof, ssize_t host_err)
901	898	{
902	899	if (host_err >= 0) {
903	900	nfsdstats.io_read += host_err;
	901	+ eof = nfsd_eof_on_read(file, offset, host_err, count);
904	902	*count = host_err;
905	903	fsnotify_access(file);
906	904	trace_nfsd_read_io_done(rqstp, fhp, offset, *count);
..	..	@@ -912,7 +910,8 @@
912	910	}
913	911
914	912	__be32 nfsd_splice_read(struct svc_rqst rqstp, struct svc_fh fhp,
915		- struct file file, loff_t offset, unsigned long count)
	913	+ struct file file, loff_t offset, unsigned long count,
	914	+ u32 *eof)
916	915	{
917	916	struct splice_desc sd = {
918	917	.len = 0,
..	..	@@ -920,25 +919,27 @@
920	919	.pos = offset,
921	920	.u.data = rqstp,
922	921	};
923		- int host_err;
	922	+ ssize_t host_err;
924	923
925	924	trace_nfsd_read_splice(rqstp, fhp, offset, *count);
926	925	rqstp->rq_next_page = rqstp->rq_respages + 1;
927	926	host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
928		- return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err);
	927	+ return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
929	928	}
930	929
931	930	__be32 nfsd_readv(struct svc_rqst rqstp, struct svc_fh fhp,
932	931	struct file *file, loff_t offset,
933		- struct kvec vec, int vlen, unsigned long count)
	932	+ struct kvec vec, int vlen, unsigned long count,
	933	+ u32 *eof)
934	934	{
935	935	struct iov_iter iter;
936		- int host_err;
	936	+ loff_t ppos = offset;
	937	+ ssize_t host_err;
937	938
938	939	trace_nfsd_read_vector(rqstp, fhp, offset, *count);
939		- iov_iter_kvec(&iter, READ \| ITER_KVEC, vec, vlen, *count);
940		- host_err = vfs_iter_read(file, &iter, &offset, 0);
941		- return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err);
	940	+ iov_iter_kvec(&iter, READ, vec, vlen, *count);
	941	+ host_err = vfs_iter_read(file, &iter, &ppos, 0);
	942	+ return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
942	943	}
943	944
944	945	/*
..	..	@@ -979,12 +980,15 @@
979	980	}
980	981
981	982	__be32
982		-nfsd_vfs_write(struct svc_rqst rqstp, struct svc_fh fhp, struct file *file,
	983	+nfsd_vfs_write(struct svc_rqst rqstp, struct svc_fh fhp, struct nfsd_file *nf,
983	984	loff_t offset, struct kvec *vec, int vlen,
984		- unsigned long *cnt, int stable)
	985	+ unsigned long *cnt, int stable,
	986	+ __be32 *verf)
985	987	{
	988	+ struct file *file = nf->nf_file;
986	989	struct svc_export *exp;
987	990	struct iov_iter iter;
	991	+ errseq_t since;
988	992	__be32 nfserr;
989	993	int host_err;
990	994	int use_wgather;
..	..	@@ -996,12 +1000,13 @@
996	1000
997	1001	if (test_bit(RQ_LOCAL, &rqstp->rq_flags))
998	1002	/*
999		- * We want less throttling in balance_dirty_pages()
1000		- * and shrink_inactive_list() so that nfs to
	1003	+ * We want throttling in balance_dirty_pages()
	1004	+ * and shrink_inactive_list() to only consider
	1005	+ * the backingdev we are writing to, so that nfs to
1001	1006	* localhost doesn't cause nfsd to lock up due to all
1002	1007	* the client's dirty pages or its congested queue.
1003	1008	*/
1004		- current->flags \|= PF_LESS_THROTTLE;
	1009	+ current->flags \|= PF_LOCAL_THROTTLE;
1005	1010
1006	1011	exp = fhp->fh_export;
1007	1012	use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
..	..	@@ -1012,16 +1017,42 @@
1012	1017	if (stable && !use_wgather)
1013	1018	flags \|= RWF_SYNC;
1014	1019
1015		- iov_iter_kvec(&iter, WRITE \| ITER_KVEC, vec, vlen, *cnt);
1016		- host_err = vfs_iter_write(file, &iter, &pos, flags);
1017		- if (host_err < 0)
	1020	+ iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt);
	1021	+ since = READ_ONCE(file->f_wb_err);
	1022	+ if (flags & RWF_SYNC) {
	1023	+ if (verf)
	1024	+ nfsd_copy_boot_verifier(verf,
	1025	+ net_generic(SVC_NET(rqstp),
	1026	+ nfsd_net_id));
	1027	+ host_err = vfs_iter_write(file, &iter, &pos, flags);
	1028	+ if (host_err < 0)
	1029	+ nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
	1030	+ nfsd_net_id));
	1031	+ } else {
	1032	+ if (verf)
	1033	+ nfsd_copy_boot_verifier(verf,
	1034	+ net_generic(SVC_NET(rqstp),
	1035	+ nfsd_net_id));
	1036	+ host_err = vfs_iter_write(file, &iter, &pos, flags);
	1037	+ }
	1038	+ if (host_err < 0) {
	1039	+ nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
	1040	+ nfsd_net_id));
1018	1041	goto out_nfserr;
	1042	+ }
1019	1043	*cnt = host_err;
1020	1044	nfsdstats.io_write += *cnt;
1021	1045	fsnotify_modify(file);
	1046	+ host_err = filemap_check_wb_err(file->f_mapping, since);
	1047	+ if (host_err < 0)
	1048	+ goto out_nfserr;
1022	1049
1023		- if (stable && use_wgather)
	1050	+ if (stable && use_wgather) {
1024	1051	host_err = wait_for_concurrent_writes(file);
	1052	+ if (host_err < 0)
	1053	+ nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
	1054	+ nfsd_net_id));
	1055	+ }
1025	1056
1026	1057	out_nfserr:
1027	1058	if (host_err >= 0) {
..	..	@@ -1032,7 +1063,7 @@
1032	1063	nfserr = nfserrno(host_err);
1033	1064	}
1034	1065	if (test_bit(RQ_LOCAL, &rqstp->rq_flags))
1035		- current_restore_flags(pflags, PF_LESS_THROTTLE);
	1066	+ current_restore_flags(pflags, PF_LOCAL_THROTTLE);
1036	1067	return nfserr;
1037	1068	}
1038	1069
..	..	@@ -1042,27 +1073,25 @@
1042	1073	* N.B. After this call fhp needs an fh_put
1043	1074	*/
1044	1075	__be32 nfsd_read(struct svc_rqst rqstp, struct svc_fh fhp,
1045		- loff_t offset, struct kvec vec, int vlen, unsigned long count)
	1076	+ loff_t offset, struct kvec vec, int vlen, unsigned long count,
	1077	+ u32 *eof)
1046	1078	{
	1079	+ struct nfsd_file *nf;
1047	1080	struct file *file;
1048		- struct raparms *ra;
1049	1081	__be32 err;
1050	1082
1051	1083	trace_nfsd_read_start(rqstp, fhp, offset, *count);
1052		- err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
	1084	+ err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf);
1053	1085	if (err)
1054	1086	return err;
1055	1087
1056		- ra = nfsd_init_raparms(file);
1057		-
	1088	+ file = nf->nf_file;
1058	1089	if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags))
1059		- err = nfsd_splice_read(rqstp, fhp, file, offset, count);
	1090	+ err = nfsd_splice_read(rqstp, fhp, file, offset, count, eof);
1060	1091	else
1061		- err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count);
	1092	+ err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count, eof);
1062	1093
1063		- if (ra)
1064		- nfsd_put_raparams(file, ra);
1065		- fput(file);
	1094	+ nfsd_file_put(nf);
1066	1095
1067	1096	trace_nfsd_read_done(rqstp, fhp, offset, *count);
1068	1097
..	..	@@ -1076,19 +1105,21 @@
1076	1105	*/
1077	1106	__be32
1078	1107	nfsd_write(struct svc_rqst rqstp, struct svc_fh fhp, loff_t offset,
1079		- struct kvec vec, int vlen, unsigned long cnt, int stable)
	1108	+ struct kvec vec, int vlen, unsigned long cnt, int stable,
	1109	+ __be32 *verf)
1080	1110	{
1081		- struct file *file = NULL;
1082		- __be32 err = 0;
	1111	+ struct nfsd_file *nf;
	1112	+ __be32 err;
1083	1113
1084	1114	trace_nfsd_write_start(rqstp, fhp, offset, *cnt);
1085	1115
1086		- err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
	1116	+ err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_WRITE, &nf);
1087	1117	if (err)
1088	1118	goto out;
1089	1119
1090		- err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stable);
1091		- fput(file);
	1120	+ err = nfsd_vfs_write(rqstp, fhp, nf, offset, vec,
	1121	+ vlen, cnt, stable, verf);
	1122	+ nfsd_file_put(nf);
1092	1123	out:
1093	1124	trace_nfsd_write_done(rqstp, fhp, offset, *cnt);
1094	1125	return err;
..	..	@@ -1106,11 +1137,11 @@
1106	1137	*/
1107	1138	__be32
1108	1139	nfsd_commit(struct svc_rqst rqstp, struct svc_fh fhp,
1109		- loff_t offset, unsigned long count)
	1140	+ loff_t offset, unsigned long count, __be32 *verf)
1110	1141	{
1111		- struct file *file;
1112		- loff_t end = LLONG_MAX;
1113		- __be32 err = nfserr_inval;
	1142	+ struct nfsd_file *nf;
	1143	+ loff_t end = LLONG_MAX;
	1144	+ __be32 err = nfserr_inval;
1114	1145
1115	1146	if (offset < 0)
1116	1147	goto out;
..	..	@@ -1120,20 +1151,36 @@
1120	1151	goto out;
1121	1152	}
1122	1153
1123		- err = nfsd_open(rqstp, fhp, S_IFREG,
1124		- NFSD_MAY_WRITE\|NFSD_MAY_NOT_BREAK_LEASE, &file);
	1154	+ err = nfsd_file_acquire(rqstp, fhp,
	1155	+ NFSD_MAY_WRITE\|NFSD_MAY_NOT_BREAK_LEASE, &nf);
1125	1156	if (err)
1126	1157	goto out;
1127	1158	if (EX_ISSYNC(fhp->fh_export)) {
1128		- int err2 = vfs_fsync_range(file, offset, end, 0);
	1159	+ errseq_t since = READ_ONCE(nf->nf_file->f_wb_err);
	1160	+ int err2;
1129	1161
1130		- if (err2 != -EINVAL)
	1162	+ err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
	1163	+ switch (err2) {
	1164	+ case 0:
	1165	+ nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net,
	1166	+ nfsd_net_id));
	1167	+ err2 = filemap_check_wb_err(nf->nf_file->f_mapping,
	1168	+ since);
1131	1169	err = nfserrno(err2);
1132		- else
	1170	+ break;
	1171	+ case -EINVAL:
1133	1172	err = nfserr_notsupp;
1134		- }
	1173	+ break;
	1174	+ default:
	1175	+ nfsd_reset_boot_verifier(net_generic(nf->nf_net,
	1176	+ nfsd_net_id));
	1177	+ err = nfserrno(err2);
	1178	+ }
	1179	+ } else
	1180	+ nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net,
	1181	+ nfsd_net_id));
1135	1182
1136		- fput(file);
	1183	+ nfsd_file_put(nf);
1137	1184	out:
1138	1185	return err;
1139	1186	}
..	..	@@ -1155,7 +1202,7 @@
1155	1202	if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID))
1156	1203	iap->ia_valid &= ~(ATTR_UID\|ATTR_GID);
1157	1204	if (iap->ia_valid)
1158		- return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
	1205	+ return nfsd_setattr(rqstp, resfhp, iap, 0, (time64_t)0);
1159	1206	/* Callers expect file metadata to be committed here */
1160	1207	return nfserrno(commit_metadata(resfhp));
1161	1208	}
..	..	@@ -1293,7 +1340,6 @@
1293	1340	int type, dev_t rdev, struct svc_fh *resfhp)
1294	1341	{
1295	1342	struct dentry dentry, dchild = NULL;
1296		- struct inode *dirp;
1297	1343	__be32 err;
1298	1344	int host_err;
1299	1345
..	..	@@ -1305,7 +1351,6 @@
1305	1351	return err;
1306	1352
1307	1353	dentry = fhp->fh_dentry;
1308		- dirp = d_inode(dentry);
1309	1354
1310	1355	host_err = fh_want_write(fhp);
1311	1356	if (host_err)
..	..	@@ -1423,18 +1468,19 @@
1423	1468	&& d_inode(dchild)->i_atime.tv_sec == v_atime
1424	1469	&& d_inode(dchild)->i_size == 0 ) {
1425	1470	if (created)
1426		- *created = 1;
	1471	+ *created = true;
1427	1472	break;
1428	1473	}
	1474	+ fallthrough;
1429	1475	case NFS4_CREATE_EXCLUSIVE4_1:
1430	1476	if ( d_inode(dchild)->i_mtime.tv_sec == v_mtime
1431	1477	&& d_inode(dchild)->i_atime.tv_sec == v_atime
1432	1478	&& d_inode(dchild)->i_size == 0 ) {
1433	1479	if (created)
1434		- *created = 1;
	1480	+ *created = true;
1435	1481	goto set_attr;
1436	1482	}
1437		- /* fallthru */
	1483	+ fallthrough;
1438	1484	case NFS3_CREATE_GUARDED:
1439	1485	err = nfserr_exist;
1440	1486	}
..	..	@@ -1451,7 +1497,7 @@
1451	1497	goto out_nfserr;
1452	1498	}
1453	1499	if (created)
1454		- *created = 1;
	1500	+ *created = true;
1455	1501
1456	1502	nfsd_check_ignore_resizing(iap);
1457	1503
..	..	@@ -1661,6 +1707,26 @@
1661	1707	goto out_unlock;
1662	1708	}
1663	1709
	1710	+static void
	1711	+nfsd_close_cached_files(struct dentry *dentry)
	1712	+{
	1713	+ struct inode *inode = d_inode(dentry);
	1714	+
	1715	+ if (inode && S_ISREG(inode->i_mode))
	1716	+ nfsd_file_close_inode_sync(inode);
	1717	+}
	1718	+
	1719	+static bool
	1720	+nfsd_has_cached_files(struct dentry *dentry)
	1721	+{
	1722	+ bool ret = false;
	1723	+ struct inode *inode = d_inode(dentry);
	1724	+
	1725	+ if (inode && S_ISREG(inode->i_mode))
	1726	+ ret = nfsd_file_is_cached(inode);
	1727	+ return ret;
	1728	+}
	1729	+
1664	1730	/*
1665	1731	* Rename a file
1666	1732	* N.B. After this call _both_ ffhp and tfhp need an fh_put
..	..	@@ -1673,6 +1739,7 @@
1673	1739	struct inode fdir, tdir;
1674	1740	__be32 err;
1675	1741	int host_err;
	1742	+ bool has_cached = false;
1676	1743
1677	1744	err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE);
1678	1745	if (err)
..	..	@@ -1691,6 +1758,7 @@
1691	1758	if (!flen \|\| isdotent(fname, flen) \|\| !tlen \|\| isdotent(tname, tlen))
1692	1759	goto out;
1693	1760
	1761	+retry:
1694	1762	host_err = fh_want_write(ffhp);
1695	1763	if (host_err) {
1696	1764	err = nfserrno(host_err);
..	..	@@ -1730,11 +1798,16 @@
1730	1798	if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry)
1731	1799	goto out_dput_new;
1732	1800
1733		- host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0);
1734		- if (!host_err) {
1735		- host_err = commit_metadata(tfhp);
1736		- if (!host_err)
1737		- host_err = commit_metadata(ffhp);
	1801	+ if (nfsd_has_cached_files(ndentry)) {
	1802	+ has_cached = true;
	1803	+ goto out_dput_old;
	1804	+ } else {
	1805	+ host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0);
	1806	+ if (!host_err) {
	1807	+ host_err = commit_metadata(tfhp);
	1808	+ if (!host_err)
	1809	+ host_err = commit_metadata(ffhp);
	1810	+ }
1738	1811	}
1739	1812	out_dput_new:
1740	1813	dput(ndentry);
..	..	@@ -1747,12 +1820,26 @@
1747	1820	* as that would do the wrong thing if the two directories
1748	1821	* were the same, so again we do it by hand.
1749	1822	*/
1750		- fill_post_wcc(ffhp);
1751		- fill_post_wcc(tfhp);
	1823	+ if (!has_cached) {
	1824	+ fill_post_wcc(ffhp);
	1825	+ fill_post_wcc(tfhp);
	1826	+ }
1752	1827	unlock_rename(tdentry, fdentry);
1753	1828	ffhp->fh_locked = tfhp->fh_locked = false;
1754	1829	fh_drop_write(ffhp);
1755	1830
	1831	+ /*
	1832	+ * If the target dentry has cached open files, then we need to try to
	1833	+ * close them prior to doing the rename. Flushing delayed fput
	1834	+ * shouldn't be done with locks held however, so we delay it until this
	1835	+ * point and then reattempt the whole shebang.
	1836	+ */
	1837	+ if (has_cached) {
	1838	+ has_cached = false;
	1839	+ nfsd_close_cached_files(ndentry);
	1840	+ dput(ndentry);
	1841	+ goto retry;
	1842	+ }
1756	1843	out:
1757	1844	return err;
1758	1845	}
..	..	@@ -1788,27 +1875,42 @@
1788	1875	rdentry = lookup_one_len(fname, dentry, flen);
1789	1876	host_err = PTR_ERR(rdentry);
1790	1877	if (IS_ERR(rdentry))
1791		- goto out_nfserr;
	1878	+ goto out_drop_write;
1792	1879
1793	1880	if (d_really_is_negative(rdentry)) {
1794	1881	dput(rdentry);
1795		- err = nfserr_noent;
1796		- goto out;
	1882	+ host_err = -ENOENT;
	1883	+ goto out_drop_write;
1797	1884	}
1798	1885
1799	1886	if (!type)
1800	1887	type = d_inode(rdentry)->i_mode & S_IFMT;
1801	1888
1802		- if (type != S_IFDIR)
	1889	+ if (type != S_IFDIR) {
	1890	+ nfsd_close_cached_files(rdentry);
1803	1891	host_err = vfs_unlink(dirp, rdentry, NULL);
1804		- else
	1892	+ } else {
1805	1893	host_err = vfs_rmdir(dirp, rdentry);
	1894	+ }
	1895	+
1806	1896	if (!host_err)
1807	1897	host_err = commit_metadata(fhp);
1808	1898	dput(rdentry);
1809	1899
	1900	+out_drop_write:
	1901	+ fh_drop_write(fhp);
1810	1902	out_nfserr:
1811		- err = nfserrno(host_err);
	1903	+ if (host_err == -EBUSY) {
	1904	+ /* name is mounted-on. There is no perfect
	1905	+ * error status.
	1906	+ */
	1907	+ if (nfsd_v4client(rqstp))
	1908	+ err = nfserr_file_open;
	1909	+ else
	1910	+ err = nfserr_acces;
	1911	+ } else {
	1912	+ err = nfserrno(host_err);
	1913	+ }
1812	1914	out:
1813	1915	return err;
1814	1916	}
..	..	@@ -1990,6 +2092,235 @@
1990	2092	return nfsexp_flags(rqstp, exp) & NFSEXP_READONLY;
1991	2093	}
1992	2094
	2095	+#ifdef CONFIG_NFSD_V4
	2096	+/*
	2097	+ * Helper function to translate error numbers. In the case of xattr operations,
	2098	+ * some error codes need to be translated outside of the standard translations.
	2099	+ *
	2100	+ * ENODATA needs to be translated to nfserr_noxattr.
	2101	+ * E2BIG to nfserr_xattr2big.
	2102	+ *
	2103	+ * Additionally, vfs_listxattr can return -ERANGE. This means that the
	2104	+ * file has too many extended attributes to retrieve inside an
	2105	+ * XATTR_LIST_MAX sized buffer. This is a bug in the xattr implementation:
	2106	+ * filesystems will allow the adding of extended attributes until they hit
	2107	+ * their own internal limit. This limit may be larger than XATTR_LIST_MAX.
	2108	+ * So, at that point, the attributes are present and valid, but can't
	2109	+ * be retrieved using listxattr, since the upper level xattr code enforces
	2110	+ * the XATTR_LIST_MAX limit.
	2111	+ *
	2112	+ * This bug means that we need to deal with listxattr returning -ERANGE. The
	2113	+ * best mapping is to return TOOSMALL.
	2114	+ */
	2115	+static __be32
	2116	+nfsd_xattr_errno(int err)
	2117	+{
	2118	+ switch (err) {
	2119	+ case -ENODATA:
	2120	+ return nfserr_noxattr;
	2121	+ case -E2BIG:
	2122	+ return nfserr_xattr2big;
	2123	+ case -ERANGE:
	2124	+ return nfserr_toosmall;
	2125	+ }
	2126	+ return nfserrno(err);
	2127	+}
	2128	+
	2129	+/*
	2130	+ * Retrieve the specified user extended attribute. To avoid always
	2131	+ * having to allocate the maximum size (since we are not getting
	2132	+ * a maximum size from the RPC), do a probe + alloc. Hold a reader
	2133	+ * lock on i_rwsem to prevent the extended attribute from changing
	2134	+ * size while we're doing this.
	2135	+ */
	2136	+__be32
	2137	+nfsd_getxattr(struct svc_rqst rqstp, struct svc_fh fhp, char *name,
	2138	+ void *bufp, int lenp)
	2139	+{
	2140	+ ssize_t len;
	2141	+ __be32 err;
	2142	+ char *buf;
	2143	+ struct inode *inode;
	2144	+ struct dentry *dentry;
	2145	+
	2146	+ err = fh_verify(rqstp, fhp, 0, NFSD_MAY_READ);
	2147	+ if (err)
	2148	+ return err;
	2149	+
	2150	+ err = nfs_ok;
	2151	+ dentry = fhp->fh_dentry;
	2152	+ inode = d_inode(dentry);
	2153	+
	2154	+ inode_lock_shared(inode);
	2155	+
	2156	+ len = vfs_getxattr(dentry, name, NULL, 0);
	2157	+
	2158	+ /*
	2159	+ * Zero-length attribute, just return.
	2160	+ */
	2161	+ if (len == 0) {
	2162	+ *bufp = NULL;
	2163	+ *lenp = 0;
	2164	+ goto out;
	2165	+ }
	2166	+
	2167	+ if (len < 0) {
	2168	+ err = nfsd_xattr_errno(len);
	2169	+ goto out;
	2170	+ }
	2171	+
	2172	+ if (len > *lenp) {
	2173	+ err = nfserr_toosmall;
	2174	+ goto out;
	2175	+ }
	2176	+
	2177	+ buf = kvmalloc(len, GFP_KERNEL \| GFP_NOFS);
	2178	+ if (buf == NULL) {
	2179	+ err = nfserr_jukebox;
	2180	+ goto out;
	2181	+ }
	2182	+
	2183	+ len = vfs_getxattr(dentry, name, buf, len);
	2184	+ if (len <= 0) {
	2185	+ kvfree(buf);
	2186	+ buf = NULL;
	2187	+ err = nfsd_xattr_errno(len);
	2188	+ }
	2189	+
	2190	+ *lenp = len;
	2191	+ *bufp = buf;
	2192	+
	2193	+out:
	2194	+ inode_unlock_shared(inode);
	2195	+
	2196	+ return err;
	2197	+}
	2198	+
	2199	+/*
	2200	+ * Retrieve the xattr names. Since we can't know how many are
	2201	+ * user extended attributes, we must get all attributes here,
	2202	+ * and have the XDR encode filter out the "user." ones.
	2203	+ *
	2204	+ * While this could always just allocate an XATTR_LIST_MAX
	2205	+ * buffer, that's a waste, so do a probe + allocate. To
	2206	+ * avoid any changes between the probe and allocate, wrap
	2207	+ * this in inode_lock.
	2208	+ */
	2209	+__be32
	2210	+nfsd_listxattr(struct svc_rqst rqstp, struct svc_fh fhp, char **bufp,
	2211	+ int *lenp)
	2212	+{
	2213	+ ssize_t len;
	2214	+ __be32 err;
	2215	+ char *buf;
	2216	+ struct inode *inode;
	2217	+ struct dentry *dentry;
	2218	+
	2219	+ err = fh_verify(rqstp, fhp, 0, NFSD_MAY_READ);
	2220	+ if (err)
	2221	+ return err;
	2222	+
	2223	+ dentry = fhp->fh_dentry;
	2224	+ inode = d_inode(dentry);
	2225	+ *lenp = 0;
	2226	+
	2227	+ inode_lock_shared(inode);
	2228	+
	2229	+ len = vfs_listxattr(dentry, NULL, 0);
	2230	+ if (len <= 0) {
	2231	+ err = nfsd_xattr_errno(len);
	2232	+ goto out;
	2233	+ }
	2234	+
	2235	+ if (len > XATTR_LIST_MAX) {
	2236	+ err = nfserr_xattr2big;
	2237	+ goto out;
	2238	+ }
	2239	+
	2240	+ /*
	2241	+ * We're holding i_rwsem - use GFP_NOFS.
	2242	+ */
	2243	+ buf = kvmalloc(len, GFP_KERNEL \| GFP_NOFS);
	2244	+ if (buf == NULL) {
	2245	+ err = nfserr_jukebox;
	2246	+ goto out;
	2247	+ }
	2248	+
	2249	+ len = vfs_listxattr(dentry, buf, len);
	2250	+ if (len <= 0) {
	2251	+ kvfree(buf);
	2252	+ err = nfsd_xattr_errno(len);
	2253	+ goto out;
	2254	+ }
	2255	+
	2256	+ *lenp = len;
	2257	+ *bufp = buf;
	2258	+
	2259	+ err = nfs_ok;
	2260	+out:
	2261	+ inode_unlock_shared(inode);
	2262	+
	2263	+ return err;
	2264	+}
	2265	+
	2266	+/*
	2267	+ * Removexattr and setxattr need to call fh_lock to both lock the inode
	2268	+ * and set the change attribute. Since the top-level vfs_removexattr
	2269	+ * and vfs_setxattr calls already do their own inode_lock calls, call
	2270	+ * the _locked variant. Pass in a NULL pointer for delegated_inode,
	2271	+ * and let the client deal with NFS4ERR_DELAY (same as with e.g.
	2272	+ * setattr and remove).
	2273	+ */
	2274	+__be32
	2275	+nfsd_removexattr(struct svc_rqst rqstp, struct svc_fh fhp, char *name)
	2276	+{
	2277	+ __be32 err;
	2278	+ int ret;
	2279	+
	2280	+ err = fh_verify(rqstp, fhp, 0, NFSD_MAY_WRITE);
	2281	+ if (err)
	2282	+ return err;
	2283	+
	2284	+ ret = fh_want_write(fhp);
	2285	+ if (ret)
	2286	+ return nfserrno(ret);
	2287	+
	2288	+ fh_lock(fhp);
	2289	+
	2290	+ ret = __vfs_removexattr_locked(fhp->fh_dentry, name, NULL);
	2291	+
	2292	+ fh_unlock(fhp);
	2293	+ fh_drop_write(fhp);
	2294	+
	2295	+ return nfsd_xattr_errno(ret);
	2296	+}
	2297	+
	2298	+__be32
	2299	+nfsd_setxattr(struct svc_rqst rqstp, struct svc_fh fhp, char *name,
	2300	+ void *buf, u32 len, u32 flags)
	2301	+{
	2302	+ __be32 err;
	2303	+ int ret;
	2304	+
	2305	+ err = fh_verify(rqstp, fhp, 0, NFSD_MAY_WRITE);
	2306	+ if (err)
	2307	+ return err;
	2308	+
	2309	+ ret = fh_want_write(fhp);
	2310	+ if (ret)
	2311	+ return nfserrno(ret);
	2312	+ fh_lock(fhp);
	2313	+
	2314	+ ret = __vfs_setxattr_locked(fhp->fh_dentry, name, buf, len, flags,
	2315	+ NULL);
	2316	+
	2317	+ fh_unlock(fhp);
	2318	+ fh_drop_write(fhp);
	2319	+
	2320	+ return nfsd_xattr_errno(ret);
	2321	+}
	2322	+#endif
	2323	+
1993	2324	/*
1994	2325	* Check for a user's access permissions to this inode.
1995	2326	*/
..	..	@@ -2073,64 +2404,4 @@
2073	2404	err = inode_permission(inode, MAY_EXEC);
2074	2405
2075	2406	return err? nfserrno(err) : 0;
2076		-}
2077		-
2078		-void
2079		-nfsd_racache_shutdown(void)
2080		-{
2081		- struct raparms raparm, last_raparm;
2082		- unsigned int i;
2083		-
2084		- dprintk("nfsd: freeing readahead buffers.\n");
2085		-
2086		- for (i = 0; i < RAPARM_HASH_SIZE; i++) {
2087		- raparm = raparm_hash[i].pb_head;
2088		- while(raparm) {
2089		- last_raparm = raparm;
2090		- raparm = raparm->p_next;
2091		- kfree(last_raparm);
2092		- }
2093		- raparm_hash[i].pb_head = NULL;
2094		- }
2095		-}
2096		-/*
2097		- * Initialize readahead param cache
2098		- */
2099		-int
2100		-nfsd_racache_init(int cache_size)
2101		-{
2102		- int i;
2103		- int j = 0;
2104		- int nperbucket;
2105		- struct raparms **raparm = NULL;
2106		-
2107		-
2108		- if (raparm_hash[0].pb_head)
2109		- return 0;
2110		- nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE);
2111		- nperbucket = max(2, nperbucket);
2112		- cache_size = nperbucket * RAPARM_HASH_SIZE;
2113		-
2114		- dprintk("nfsd: allocating %d readahead buffers.\n", cache_size);
2115		-
2116		- for (i = 0; i < RAPARM_HASH_SIZE; i++) {
2117		- spin_lock_init(&raparm_hash[i].pb_lock);
2118		-
2119		- raparm = &raparm_hash[i].pb_head;
2120		- for (j = 0; j < nperbucket; j++) {
2121		- *raparm = kzalloc(sizeof(struct raparms), GFP_KERNEL);
2122		- if (!*raparm)
2123		- goto out_nomem;
2124		- raparm = &(*raparm)->p_next;
2125		- }
2126		- *raparm = NULL;
2127		- }
2128		-
2129		- nfsdstats.ra_size = cache_size;
2130		- return 0;
2131		-
2132		-out_nomem:
2133		- dprintk("nfsd: kmalloc failed, freeing readahead buffers\n");
2134		- nfsd_racache_shutdown();
2135		- return -ENOMEM;
2136	2407	}