hc
2024-05-11 297b60346df8beafee954a0fd7c2d64f33f3b9bc
kernel/fs/nfsd/vfs.c
....@@ -44,37 +44,10 @@
4444
4545 #include "nfsd.h"
4646 #include "vfs.h"
47
+#include "filecache.h"
4748 #include "trace.h"
4849
4950 #define NFSDDBG_FACILITY NFSDDBG_FILEOP
50
-
51
-
52
-/*
53
- * This is a cache of readahead params that help us choose the proper
54
- * readahead strategy. Initially, we set all readahead parameters to 0
55
- * and let the VFS handle things.
56
- * If you increase the number of cached files very much, you'll need to
57
- * add a hash table here.
58
- */
59
-struct raparms {
60
- struct raparms *p_next;
61
- unsigned int p_count;
62
- ino_t p_ino;
63
- dev_t p_dev;
64
- int p_set;
65
- struct file_ra_state p_ra;
66
- unsigned int p_hindex;
67
-};
68
-
69
-struct raparm_hbucket {
70
- struct raparms *pb_head;
71
- spinlock_t pb_lock;
72
-} ____cacheline_aligned_in_smp;
73
-
74
-#define RAPARM_HASH_BITS 4
75
-#define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS)
76
-#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1)
77
-static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE];
7851
7952 /*
8053 * Called from nfsd_lookup and encode_dirent. Check if we have crossed
....@@ -307,17 +280,23 @@
307280 * Commit metadata changes to stable storage.
308281 */
309282 static int
310
-commit_metadata(struct svc_fh *fhp)
283
+commit_inode_metadata(struct inode *inode)
311284 {
312
- struct inode *inode = d_inode(fhp->fh_dentry);
313285 const struct export_operations *export_ops = inode->i_sb->s_export_op;
314
-
315
- if (!EX_ISSYNC(fhp->fh_export))
316
- return 0;
317286
318287 if (export_ops->commit_metadata)
319288 return export_ops->commit_metadata(inode);
320289 return sync_inode_metadata(inode, 1);
290
+}
291
+
292
+static int
293
+commit_metadata(struct svc_fh *fhp)
294
+{
295
+ struct inode *inode = d_inode(fhp->fh_dentry);
296
+
297
+ if (!EX_ISSYNC(fhp->fh_export))
298
+ return 0;
299
+ return commit_inode_metadata(inode);
321300 }
322301
323302 /*
....@@ -385,7 +364,7 @@
385364 */
386365 __be32
387366 nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
388
- int check_guard, time_t guardtime)
367
+ int check_guard, time64_t guardtime)
389368 {
390369 struct dentry *dentry;
391370 struct inode *inode;
....@@ -404,7 +383,7 @@
404383 /*
405384 * If utimes(2) and friends are called with times not NULL, we should
406385 * not set NFSD_MAY_WRITE bit. Otherwise fh_verify->nfsd_permission
407
- * will return EACCESS, when the caller's effective UID does not match
386
+ * will return EACCES, when the caller's effective UID does not match
408387 * the owner of the file, and the caller is not privileged. In this
409388 * situation, we should return EPERM(notify_change will return this).
410389 */
....@@ -551,16 +530,47 @@
551530 }
552531 #endif
553532
554
-__be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst,
555
- u64 dst_pos, u64 count)
533
+__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
534
+ struct nfsd_file *nf_dst, u64 dst_pos, u64 count, bool sync)
556535 {
557
- return nfserrno(vfs_clone_file_range(src, src_pos, dst, dst_pos,
558
- count));
536
+ struct file *src = nf_src->nf_file;
537
+ struct file *dst = nf_dst->nf_file;
538
+ errseq_t since;
539
+ loff_t cloned;
540
+ __be32 ret = 0;
541
+
542
+ since = READ_ONCE(dst->f_wb_err);
543
+ cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0);
544
+ if (cloned < 0) {
545
+ ret = nfserrno(cloned);
546
+ goto out_err;
547
+ }
548
+ if (count && cloned != count) {
549
+ ret = nfserrno(-EINVAL);
550
+ goto out_err;
551
+ }
552
+ if (sync) {
553
+ loff_t dst_end = count ? dst_pos + count - 1 : LLONG_MAX;
554
+ int status = vfs_fsync_range(dst, dst_pos, dst_end, 0);
555
+
556
+ if (!status)
557
+ status = filemap_check_wb_err(dst->f_mapping, since);
558
+ if (!status)
559
+ status = commit_inode_metadata(file_inode(src));
560
+ if (status < 0) {
561
+ nfsd_reset_boot_verifier(net_generic(nf_dst->nf_net,
562
+ nfsd_net_id));
563
+ ret = nfserrno(status);
564
+ }
565
+ }
566
+out_err:
567
+ return ret;
559568 }
560569
561570 ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
562571 u64 dst_pos, u64 count)
563572 {
573
+ ssize_t ret;
564574
565575 /*
566576 * Limit copy to 4MB to prevent indefinitely blocking an nfsd
....@@ -571,7 +581,12 @@
571581 * limit like this and pipeline multiple COPY requests.
572582 */
573583 count = min_t(u64, count, 1 << 22);
574
- return vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
584
+ ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
585
+
586
+ if (ret == -EOPNOTSUPP || ret == -EXDEV)
587
+ ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count,
588
+ COPY_FILE_SPLICE);
589
+ return ret;
575590 }
576591
577592 __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp,
....@@ -605,6 +620,12 @@
605620 { NFS3_ACCESS_MODIFY, NFSD_MAY_WRITE|NFSD_MAY_TRUNC },
606621 { NFS3_ACCESS_EXTEND, NFSD_MAY_WRITE },
607622
623
+#ifdef CONFIG_NFSD_V4
624
+ { NFS4_ACCESS_XAREAD, NFSD_MAY_READ },
625
+ { NFS4_ACCESS_XAWRITE, NFSD_MAY_WRITE },
626
+ { NFS4_ACCESS_XALIST, NFSD_MAY_READ },
627
+#endif
628
+
608629 { 0, 0 }
609630 };
610631
....@@ -614,6 +635,12 @@
614635 { NFS3_ACCESS_MODIFY, NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC},
615636 { NFS3_ACCESS_EXTEND, NFSD_MAY_EXEC|NFSD_MAY_WRITE },
616637 { NFS3_ACCESS_DELETE, NFSD_MAY_REMOVE },
638
+
639
+#ifdef CONFIG_NFSD_V4
640
+ { NFS4_ACCESS_XAREAD, NFSD_MAY_READ },
641
+ { NFS4_ACCESS_XAWRITE, NFSD_MAY_WRITE },
642
+ { NFS4_ACCESS_XALIST, NFSD_MAY_READ },
643
+#endif
617644
618645 { 0, 0 }
619646 };
....@@ -693,7 +720,7 @@
693720 }
694721 #endif /* CONFIG_NFSD_V3 */
695722
696
-static int nfsd_open_break_lease(struct inode *inode, int access)
723
+int nfsd_open_break_lease(struct inode *inode, int access)
697724 {
698725 unsigned int mode;
699726
....@@ -709,8 +736,8 @@
709736 * and additional flags.
710737 * N.B. After this call fhp needs an fh_put
711738 */
712
-__be32
713
-nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
739
+static __be32
740
+__nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
714741 int may_flags, struct file **filp)
715742 {
716743 struct path path;
....@@ -719,25 +746,6 @@
719746 int flags = O_RDONLY|O_LARGEFILE;
720747 __be32 err;
721748 int host_err = 0;
722
-
723
- validate_process_creds();
724
-
725
- /*
726
- * If we get here, then the client has already done an "open",
727
- * and (hopefully) checked permission - so allow OWNER_OVERRIDE
728
- * in case a chmod has now revoked permission.
729
- *
730
- * Arguably we should also allow the owner override for
731
- * directories, but we never have and it doesn't seem to have
732
- * caused anyone a problem. If we were to change this, note
733
- * also that our filldir callbacks would need a variant of
734
- * lookup_one_len that doesn't check permissions.
735
- */
736
- if (type == S_IFREG)
737
- may_flags |= NFSD_MAY_OWNER_OVERRIDE;
738
- err = fh_verify(rqstp, fhp, type, may_flags);
739
- if (err)
740
- goto out;
741749
742750 path.mnt = fhp->fh_export->ex_path.mnt;
743751 path.dentry = fhp->fh_dentry;
....@@ -792,67 +800,46 @@
792800 out_nfserr:
793801 err = nfserrno(host_err);
794802 out:
803
+ return err;
804
+}
805
+
806
+__be32
807
+nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
808
+ int may_flags, struct file **filp)
809
+{
810
+ __be32 err;
811
+
812
+ validate_process_creds();
813
+ /*
814
+ * If we get here, then the client has already done an "open",
815
+ * and (hopefully) checked permission - so allow OWNER_OVERRIDE
816
+ * in case a chmod has now revoked permission.
817
+ *
818
+ * Arguably we should also allow the owner override for
819
+ * directories, but we never have and it doesn't seem to have
820
+ * caused anyone a problem. If we were to change this, note
821
+ * also that our filldir callbacks would need a variant of
822
+ * lookup_one_len that doesn't check permissions.
823
+ */
824
+ if (type == S_IFREG)
825
+ may_flags |= NFSD_MAY_OWNER_OVERRIDE;
826
+ err = fh_verify(rqstp, fhp, type, may_flags);
827
+ if (!err)
828
+ err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
795829 validate_process_creds();
796830 return err;
797831 }
798832
799
-struct raparms *
800
-nfsd_init_raparms(struct file *file)
833
+__be32
834
+nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
835
+ int may_flags, struct file **filp)
801836 {
802
- struct inode *inode = file_inode(file);
803
- dev_t dev = inode->i_sb->s_dev;
804
- ino_t ino = inode->i_ino;
805
- struct raparms *ra, **rap, **frap = NULL;
806
- int depth = 0;
807
- unsigned int hash;
808
- struct raparm_hbucket *rab;
837
+ __be32 err;
809838
810
- hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK;
811
- rab = &raparm_hash[hash];
812
-
813
- spin_lock(&rab->pb_lock);
814
- for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) {
815
- if (ra->p_ino == ino && ra->p_dev == dev)
816
- goto found;
817
- depth++;
818
- if (ra->p_count == 0)
819
- frap = rap;
820
- }
821
- depth = nfsdstats.ra_size;
822
- if (!frap) {
823
- spin_unlock(&rab->pb_lock);
824
- return NULL;
825
- }
826
- rap = frap;
827
- ra = *frap;
828
- ra->p_dev = dev;
829
- ra->p_ino = ino;
830
- ra->p_set = 0;
831
- ra->p_hindex = hash;
832
-found:
833
- if (rap != &rab->pb_head) {
834
- *rap = ra->p_next;
835
- ra->p_next = rab->pb_head;
836
- rab->pb_head = ra;
837
- }
838
- ra->p_count++;
839
- nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
840
- spin_unlock(&rab->pb_lock);
841
-
842
- if (ra->p_set)
843
- file->f_ra = ra->p_ra;
844
- return ra;
845
-}
846
-
847
-void nfsd_put_raparams(struct file *file, struct raparms *ra)
848
-{
849
- struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
850
-
851
- spin_lock(&rab->pb_lock);
852
- ra->p_ra = file->f_ra;
853
- ra->p_set = 1;
854
- ra->p_count--;
855
- spin_unlock(&rab->pb_lock);
839
+ validate_process_creds();
840
+ err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
841
+ validate_process_creds();
842
+ return err;
856843 }
857844
858845 /*
....@@ -895,12 +882,23 @@
895882 return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
896883 }
897884
885
+static u32 nfsd_eof_on_read(struct file *file, loff_t offset, ssize_t len,
886
+ size_t expected)
887
+{
888
+ if (expected != 0 && len == 0)
889
+ return 1;
890
+ if (offset+len >= i_size_read(file_inode(file)))
891
+ return 1;
892
+ return 0;
893
+}
894
+
898895 static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
899896 struct file *file, loff_t offset,
900
- unsigned long *count, int host_err)
897
+ unsigned long *count, u32 *eof, ssize_t host_err)
901898 {
902899 if (host_err >= 0) {
903900 nfsdstats.io_read += host_err;
901
+ *eof = nfsd_eof_on_read(file, offset, host_err, *count);
904902 *count = host_err;
905903 fsnotify_access(file);
906904 trace_nfsd_read_io_done(rqstp, fhp, offset, *count);
....@@ -912,7 +910,8 @@
912910 }
913911
914912 __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
915
- struct file *file, loff_t offset, unsigned long *count)
913
+ struct file *file, loff_t offset, unsigned long *count,
914
+ u32 *eof)
916915 {
917916 struct splice_desc sd = {
918917 .len = 0,
....@@ -920,25 +919,27 @@
920919 .pos = offset,
921920 .u.data = rqstp,
922921 };
923
- int host_err;
922
+ ssize_t host_err;
924923
925924 trace_nfsd_read_splice(rqstp, fhp, offset, *count);
926925 rqstp->rq_next_page = rqstp->rq_respages + 1;
927926 host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
928
- return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err);
927
+ return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
929928 }
930929
931930 __be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
932931 struct file *file, loff_t offset,
933
- struct kvec *vec, int vlen, unsigned long *count)
932
+ struct kvec *vec, int vlen, unsigned long *count,
933
+ u32 *eof)
934934 {
935935 struct iov_iter iter;
936
- int host_err;
936
+ loff_t ppos = offset;
937
+ ssize_t host_err;
937938
938939 trace_nfsd_read_vector(rqstp, fhp, offset, *count);
939
- iov_iter_kvec(&iter, READ | ITER_KVEC, vec, vlen, *count);
940
- host_err = vfs_iter_read(file, &iter, &offset, 0);
941
- return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err);
940
+ iov_iter_kvec(&iter, READ, vec, vlen, *count);
941
+ host_err = vfs_iter_read(file, &iter, &ppos, 0);
942
+ return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
942943 }
943944
944945 /*
....@@ -979,12 +980,15 @@
979980 }
980981
981982 __be32
982
-nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
983
+nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
983984 loff_t offset, struct kvec *vec, int vlen,
984
- unsigned long *cnt, int stable)
985
+ unsigned long *cnt, int stable,
986
+ __be32 *verf)
985987 {
988
+ struct file *file = nf->nf_file;
986989 struct svc_export *exp;
987990 struct iov_iter iter;
991
+ errseq_t since;
988992 __be32 nfserr;
989993 int host_err;
990994 int use_wgather;
....@@ -996,12 +1000,13 @@
9961000
9971001 if (test_bit(RQ_LOCAL, &rqstp->rq_flags))
9981002 /*
999
- * We want less throttling in balance_dirty_pages()
1000
- * and shrink_inactive_list() so that nfs to
1003
+ * We want throttling in balance_dirty_pages()
1004
+ * and shrink_inactive_list() to only consider
1005
+ * the backingdev we are writing to, so that nfs to
10011006 * localhost doesn't cause nfsd to lock up due to all
10021007 * the client's dirty pages or its congested queue.
10031008 */
1004
- current->flags |= PF_LESS_THROTTLE;
1009
+ current->flags |= PF_LOCAL_THROTTLE;
10051010
10061011 exp = fhp->fh_export;
10071012 use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
....@@ -1012,16 +1017,42 @@
10121017 if (stable && !use_wgather)
10131018 flags |= RWF_SYNC;
10141019
1015
- iov_iter_kvec(&iter, WRITE | ITER_KVEC, vec, vlen, *cnt);
1016
- host_err = vfs_iter_write(file, &iter, &pos, flags);
1017
- if (host_err < 0)
1020
+ iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt);
1021
+ since = READ_ONCE(file->f_wb_err);
1022
+ if (flags & RWF_SYNC) {
1023
+ if (verf)
1024
+ nfsd_copy_boot_verifier(verf,
1025
+ net_generic(SVC_NET(rqstp),
1026
+ nfsd_net_id));
1027
+ host_err = vfs_iter_write(file, &iter, &pos, flags);
1028
+ if (host_err < 0)
1029
+ nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
1030
+ nfsd_net_id));
1031
+ } else {
1032
+ if (verf)
1033
+ nfsd_copy_boot_verifier(verf,
1034
+ net_generic(SVC_NET(rqstp),
1035
+ nfsd_net_id));
1036
+ host_err = vfs_iter_write(file, &iter, &pos, flags);
1037
+ }
1038
+ if (host_err < 0) {
1039
+ nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
1040
+ nfsd_net_id));
10181041 goto out_nfserr;
1042
+ }
10191043 *cnt = host_err;
10201044 nfsdstats.io_write += *cnt;
10211045 fsnotify_modify(file);
1046
+ host_err = filemap_check_wb_err(file->f_mapping, since);
1047
+ if (host_err < 0)
1048
+ goto out_nfserr;
10221049
1023
- if (stable && use_wgather)
1050
+ if (stable && use_wgather) {
10241051 host_err = wait_for_concurrent_writes(file);
1052
+ if (host_err < 0)
1053
+ nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
1054
+ nfsd_net_id));
1055
+ }
10251056
10261057 out_nfserr:
10271058 if (host_err >= 0) {
....@@ -1032,7 +1063,7 @@
10321063 nfserr = nfserrno(host_err);
10331064 }
10341065 if (test_bit(RQ_LOCAL, &rqstp->rq_flags))
1035
- current_restore_flags(pflags, PF_LESS_THROTTLE);
1066
+ current_restore_flags(pflags, PF_LOCAL_THROTTLE);
10361067 return nfserr;
10371068 }
10381069
....@@ -1042,27 +1073,25 @@
10421073 * N.B. After this call fhp needs an fh_put
10431074 */
10441075 __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
1045
- loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
1076
+ loff_t offset, struct kvec *vec, int vlen, unsigned long *count,
1077
+ u32 *eof)
10461078 {
1079
+ struct nfsd_file *nf;
10471080 struct file *file;
1048
- struct raparms *ra;
10491081 __be32 err;
10501082
10511083 trace_nfsd_read_start(rqstp, fhp, offset, *count);
1052
- err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
1084
+ err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf);
10531085 if (err)
10541086 return err;
10551087
1056
- ra = nfsd_init_raparms(file);
1057
-
1088
+ file = nf->nf_file;
10581089 if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags))
1059
- err = nfsd_splice_read(rqstp, fhp, file, offset, count);
1090
+ err = nfsd_splice_read(rqstp, fhp, file, offset, count, eof);
10601091 else
1061
- err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count);
1092
+ err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count, eof);
10621093
1063
- if (ra)
1064
- nfsd_put_raparams(file, ra);
1065
- fput(file);
1094
+ nfsd_file_put(nf);
10661095
10671096 trace_nfsd_read_done(rqstp, fhp, offset, *count);
10681097
....@@ -1076,19 +1105,21 @@
10761105 */
10771106 __be32
10781107 nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
1079
- struct kvec *vec, int vlen, unsigned long *cnt, int stable)
1108
+ struct kvec *vec, int vlen, unsigned long *cnt, int stable,
1109
+ __be32 *verf)
10801110 {
1081
- struct file *file = NULL;
1082
- __be32 err = 0;
1111
+ struct nfsd_file *nf;
1112
+ __be32 err;
10831113
10841114 trace_nfsd_write_start(rqstp, fhp, offset, *cnt);
10851115
1086
- err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
1116
+ err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_WRITE, &nf);
10871117 if (err)
10881118 goto out;
10891119
1090
- err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stable);
1091
- fput(file);
1120
+ err = nfsd_vfs_write(rqstp, fhp, nf, offset, vec,
1121
+ vlen, cnt, stable, verf);
1122
+ nfsd_file_put(nf);
10921123 out:
10931124 trace_nfsd_write_done(rqstp, fhp, offset, *cnt);
10941125 return err;
....@@ -1106,11 +1137,11 @@
11061137 */
11071138 __be32
11081139 nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
1109
- loff_t offset, unsigned long count)
1140
+ loff_t offset, unsigned long count, __be32 *verf)
11101141 {
1111
- struct file *file;
1112
- loff_t end = LLONG_MAX;
1113
- __be32 err = nfserr_inval;
1142
+ struct nfsd_file *nf;
1143
+ loff_t end = LLONG_MAX;
1144
+ __be32 err = nfserr_inval;
11141145
11151146 if (offset < 0)
11161147 goto out;
....@@ -1120,20 +1151,36 @@
11201151 goto out;
11211152 }
11221153
1123
- err = nfsd_open(rqstp, fhp, S_IFREG,
1124
- NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &file);
1154
+ err = nfsd_file_acquire(rqstp, fhp,
1155
+ NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf);
11251156 if (err)
11261157 goto out;
11271158 if (EX_ISSYNC(fhp->fh_export)) {
1128
- int err2 = vfs_fsync_range(file, offset, end, 0);
1159
+ errseq_t since = READ_ONCE(nf->nf_file->f_wb_err);
1160
+ int err2;
11291161
1130
- if (err2 != -EINVAL)
1162
+ err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
1163
+ switch (err2) {
1164
+ case 0:
1165
+ nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net,
1166
+ nfsd_net_id));
1167
+ err2 = filemap_check_wb_err(nf->nf_file->f_mapping,
1168
+ since);
11311169 err = nfserrno(err2);
1132
- else
1170
+ break;
1171
+ case -EINVAL:
11331172 err = nfserr_notsupp;
1134
- }
1173
+ break;
1174
+ default:
1175
+ nfsd_reset_boot_verifier(net_generic(nf->nf_net,
1176
+ nfsd_net_id));
1177
+ err = nfserrno(err2);
1178
+ }
1179
+ } else
1180
+ nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net,
1181
+ nfsd_net_id));
11351182
1136
- fput(file);
1183
+ nfsd_file_put(nf);
11371184 out:
11381185 return err;
11391186 }
....@@ -1155,7 +1202,7 @@
11551202 if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID))
11561203 iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
11571204 if (iap->ia_valid)
1158
- return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
1205
+ return nfsd_setattr(rqstp, resfhp, iap, 0, (time64_t)0);
11591206 /* Callers expect file metadata to be committed here */
11601207 return nfserrno(commit_metadata(resfhp));
11611208 }
....@@ -1293,7 +1340,6 @@
12931340 int type, dev_t rdev, struct svc_fh *resfhp)
12941341 {
12951342 struct dentry *dentry, *dchild = NULL;
1296
- struct inode *dirp;
12971343 __be32 err;
12981344 int host_err;
12991345
....@@ -1305,7 +1351,6 @@
13051351 return err;
13061352
13071353 dentry = fhp->fh_dentry;
1308
- dirp = d_inode(dentry);
13091354
13101355 host_err = fh_want_write(fhp);
13111356 if (host_err)
....@@ -1423,18 +1468,19 @@
14231468 && d_inode(dchild)->i_atime.tv_sec == v_atime
14241469 && d_inode(dchild)->i_size == 0 ) {
14251470 if (created)
1426
- *created = 1;
1471
+ *created = true;
14271472 break;
14281473 }
1474
+ fallthrough;
14291475 case NFS4_CREATE_EXCLUSIVE4_1:
14301476 if ( d_inode(dchild)->i_mtime.tv_sec == v_mtime
14311477 && d_inode(dchild)->i_atime.tv_sec == v_atime
14321478 && d_inode(dchild)->i_size == 0 ) {
14331479 if (created)
1434
- *created = 1;
1480
+ *created = true;
14351481 goto set_attr;
14361482 }
1437
- /* fallthru */
1483
+ fallthrough;
14381484 case NFS3_CREATE_GUARDED:
14391485 err = nfserr_exist;
14401486 }
....@@ -1451,7 +1497,7 @@
14511497 goto out_nfserr;
14521498 }
14531499 if (created)
1454
- *created = 1;
1500
+ *created = true;
14551501
14561502 nfsd_check_ignore_resizing(iap);
14571503
....@@ -1661,6 +1707,26 @@
16611707 goto out_unlock;
16621708 }
16631709
1710
+static void
1711
+nfsd_close_cached_files(struct dentry *dentry)
1712
+{
1713
+ struct inode *inode = d_inode(dentry);
1714
+
1715
+ if (inode && S_ISREG(inode->i_mode))
1716
+ nfsd_file_close_inode_sync(inode);
1717
+}
1718
+
1719
+static bool
1720
+nfsd_has_cached_files(struct dentry *dentry)
1721
+{
1722
+ bool ret = false;
1723
+ struct inode *inode = d_inode(dentry);
1724
+
1725
+ if (inode && S_ISREG(inode->i_mode))
1726
+ ret = nfsd_file_is_cached(inode);
1727
+ return ret;
1728
+}
1729
+
16641730 /*
16651731 * Rename a file
16661732 * N.B. After this call _both_ ffhp and tfhp need an fh_put
....@@ -1673,6 +1739,7 @@
16731739 struct inode *fdir, *tdir;
16741740 __be32 err;
16751741 int host_err;
1742
+ bool has_cached = false;
16761743
16771744 err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE);
16781745 if (err)
....@@ -1691,6 +1758,7 @@
16911758 if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen))
16921759 goto out;
16931760
1761
+retry:
16941762 host_err = fh_want_write(ffhp);
16951763 if (host_err) {
16961764 err = nfserrno(host_err);
....@@ -1730,11 +1798,16 @@
17301798 if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry)
17311799 goto out_dput_new;
17321800
1733
- host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0);
1734
- if (!host_err) {
1735
- host_err = commit_metadata(tfhp);
1736
- if (!host_err)
1737
- host_err = commit_metadata(ffhp);
1801
+ if (nfsd_has_cached_files(ndentry)) {
1802
+ has_cached = true;
1803
+ goto out_dput_old;
1804
+ } else {
1805
+ host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0);
1806
+ if (!host_err) {
1807
+ host_err = commit_metadata(tfhp);
1808
+ if (!host_err)
1809
+ host_err = commit_metadata(ffhp);
1810
+ }
17381811 }
17391812 out_dput_new:
17401813 dput(ndentry);
....@@ -1747,12 +1820,26 @@
17471820 * as that would do the wrong thing if the two directories
17481821 * were the same, so again we do it by hand.
17491822 */
1750
- fill_post_wcc(ffhp);
1751
- fill_post_wcc(tfhp);
1823
+ if (!has_cached) {
1824
+ fill_post_wcc(ffhp);
1825
+ fill_post_wcc(tfhp);
1826
+ }
17521827 unlock_rename(tdentry, fdentry);
17531828 ffhp->fh_locked = tfhp->fh_locked = false;
17541829 fh_drop_write(ffhp);
17551830
1831
+ /*
1832
+ * If the target dentry has cached open files, then we need to try to
1833
+ * close them prior to doing the rename. Flushing delayed fput
1834
+ * shouldn't be done with locks held however, so we delay it until this
1835
+ * point and then reattempt the whole shebang.
1836
+ */
1837
+ if (has_cached) {
1838
+ has_cached = false;
1839
+ nfsd_close_cached_files(ndentry);
1840
+ dput(ndentry);
1841
+ goto retry;
1842
+ }
17561843 out:
17571844 return err;
17581845 }
....@@ -1788,27 +1875,42 @@
17881875 rdentry = lookup_one_len(fname, dentry, flen);
17891876 host_err = PTR_ERR(rdentry);
17901877 if (IS_ERR(rdentry))
1791
- goto out_nfserr;
1878
+ goto out_drop_write;
17921879
17931880 if (d_really_is_negative(rdentry)) {
17941881 dput(rdentry);
1795
- err = nfserr_noent;
1796
- goto out;
1882
+ host_err = -ENOENT;
1883
+ goto out_drop_write;
17971884 }
17981885
17991886 if (!type)
18001887 type = d_inode(rdentry)->i_mode & S_IFMT;
18011888
1802
- if (type != S_IFDIR)
1889
+ if (type != S_IFDIR) {
1890
+ nfsd_close_cached_files(rdentry);
18031891 host_err = vfs_unlink(dirp, rdentry, NULL);
1804
- else
1892
+ } else {
18051893 host_err = vfs_rmdir(dirp, rdentry);
1894
+ }
1895
+
18061896 if (!host_err)
18071897 host_err = commit_metadata(fhp);
18081898 dput(rdentry);
18091899
1900
+out_drop_write:
1901
+ fh_drop_write(fhp);
18101902 out_nfserr:
1811
- err = nfserrno(host_err);
1903
+ if (host_err == -EBUSY) {
1904
+ /* name is mounted-on. There is no perfect
1905
+ * error status.
1906
+ */
1907
+ if (nfsd_v4client(rqstp))
1908
+ err = nfserr_file_open;
1909
+ else
1910
+ err = nfserr_acces;
1911
+ } else {
1912
+ err = nfserrno(host_err);
1913
+ }
18121914 out:
18131915 return err;
18141916 }
....@@ -1990,6 +2092,235 @@
19902092 return nfsexp_flags(rqstp, exp) & NFSEXP_READONLY;
19912093 }
19922094
2095
+#ifdef CONFIG_NFSD_V4
2096
+/*
2097
+ * Helper function to translate error numbers. In the case of xattr operations,
2098
+ * some error codes need to be translated outside of the standard translations.
2099
+ *
2100
+ * ENODATA needs to be translated to nfserr_noxattr.
2101
+ * E2BIG to nfserr_xattr2big.
2102
+ *
2103
+ * Additionally, vfs_listxattr can return -ERANGE. This means that the
2104
+ * file has too many extended attributes to retrieve inside an
2105
+ * XATTR_LIST_MAX sized buffer. This is a bug in the xattr implementation:
2106
+ * filesystems will allow the adding of extended attributes until they hit
2107
+ * their own internal limit. This limit may be larger than XATTR_LIST_MAX.
2108
+ * So, at that point, the attributes are present and valid, but can't
2109
+ * be retrieved using listxattr, since the upper level xattr code enforces
2110
+ * the XATTR_LIST_MAX limit.
2111
+ *
2112
+ * This bug means that we need to deal with listxattr returning -ERANGE. The
2113
+ * best mapping is to return TOOSMALL.
2114
+ */
2115
+static __be32
2116
+nfsd_xattr_errno(int err)
2117
+{
2118
+ switch (err) {
2119
+ case -ENODATA:
2120
+ return nfserr_noxattr;
2121
+ case -E2BIG:
2122
+ return nfserr_xattr2big;
2123
+ case -ERANGE:
2124
+ return nfserr_toosmall;
2125
+ }
2126
+ return nfserrno(err);
2127
+}
2128
+
2129
+/*
2130
+ * Retrieve the specified user extended attribute. To avoid always
2131
+ * having to allocate the maximum size (since we are not getting
2132
+ * a maximum size from the RPC), do a probe + alloc. Hold a reader
2133
+ * lock on i_rwsem to prevent the extended attribute from changing
2134
+ * size while we're doing this.
2135
+ */
2136
+__be32
2137
+nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name,
2138
+ void **bufp, int *lenp)
2139
+{
2140
+ ssize_t len;
2141
+ __be32 err;
2142
+ char *buf;
2143
+ struct inode *inode;
2144
+ struct dentry *dentry;
2145
+
2146
+ err = fh_verify(rqstp, fhp, 0, NFSD_MAY_READ);
2147
+ if (err)
2148
+ return err;
2149
+
2150
+ err = nfs_ok;
2151
+ dentry = fhp->fh_dentry;
2152
+ inode = d_inode(dentry);
2153
+
2154
+ inode_lock_shared(inode);
2155
+
2156
+ len = vfs_getxattr(dentry, name, NULL, 0);
2157
+
2158
+ /*
2159
+ * Zero-length attribute, just return.
2160
+ */
2161
+ if (len == 0) {
2162
+ *bufp = NULL;
2163
+ *lenp = 0;
2164
+ goto out;
2165
+ }
2166
+
2167
+ if (len < 0) {
2168
+ err = nfsd_xattr_errno(len);
2169
+ goto out;
2170
+ }
2171
+
2172
+ if (len > *lenp) {
2173
+ err = nfserr_toosmall;
2174
+ goto out;
2175
+ }
2176
+
2177
+ buf = kvmalloc(len, GFP_KERNEL | GFP_NOFS);
2178
+ if (buf == NULL) {
2179
+ err = nfserr_jukebox;
2180
+ goto out;
2181
+ }
2182
+
2183
+ len = vfs_getxattr(dentry, name, buf, len);
2184
+ if (len <= 0) {
2185
+ kvfree(buf);
2186
+ buf = NULL;
2187
+ err = nfsd_xattr_errno(len);
2188
+ }
2189
+
2190
+ *lenp = len;
2191
+ *bufp = buf;
2192
+
2193
+out:
2194
+ inode_unlock_shared(inode);
2195
+
2196
+ return err;
2197
+}
2198
+
2199
+/*
2200
+ * Retrieve the xattr names. Since we can't know how many are
2201
+ * user extended attributes, we must get all attributes here,
2202
+ * and have the XDR encode filter out the "user." ones.
2203
+ *
2204
+ * While this could always just allocate an XATTR_LIST_MAX
2205
+ * buffer, that's a waste, so do a probe + allocate. To
2206
+ * avoid any changes between the probe and allocate, wrap
2207
+ * this in inode_lock.
2208
+ */
2209
+__be32
2210
+nfsd_listxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char **bufp,
2211
+ int *lenp)
2212
+{
2213
+ ssize_t len;
2214
+ __be32 err;
2215
+ char *buf;
2216
+ struct inode *inode;
2217
+ struct dentry *dentry;
2218
+
2219
+ err = fh_verify(rqstp, fhp, 0, NFSD_MAY_READ);
2220
+ if (err)
2221
+ return err;
2222
+
2223
+ dentry = fhp->fh_dentry;
2224
+ inode = d_inode(dentry);
2225
+ *lenp = 0;
2226
+
2227
+ inode_lock_shared(inode);
2228
+
2229
+ len = vfs_listxattr(dentry, NULL, 0);
2230
+ if (len <= 0) {
2231
+ err = nfsd_xattr_errno(len);
2232
+ goto out;
2233
+ }
2234
+
2235
+ if (len > XATTR_LIST_MAX) {
2236
+ err = nfserr_xattr2big;
2237
+ goto out;
2238
+ }
2239
+
2240
+ /*
2241
+ * We're holding i_rwsem - use GFP_NOFS.
2242
+ */
2243
+ buf = kvmalloc(len, GFP_KERNEL | GFP_NOFS);
2244
+ if (buf == NULL) {
2245
+ err = nfserr_jukebox;
2246
+ goto out;
2247
+ }
2248
+
2249
+ len = vfs_listxattr(dentry, buf, len);
2250
+ if (len <= 0) {
2251
+ kvfree(buf);
2252
+ err = nfsd_xattr_errno(len);
2253
+ goto out;
2254
+ }
2255
+
2256
+ *lenp = len;
2257
+ *bufp = buf;
2258
+
2259
+ err = nfs_ok;
2260
+out:
2261
+ inode_unlock_shared(inode);
2262
+
2263
+ return err;
2264
+}
2265
+
2266
+/*
2267
+ * Removexattr and setxattr need to call fh_lock to both lock the inode
2268
+ * and set the change attribute. Since the top-level vfs_removexattr
2269
+ * and vfs_setxattr calls already do their own inode_lock calls, call
2270
+ * the _locked variant. Pass in a NULL pointer for delegated_inode,
2271
+ * and let the client deal with NFS4ERR_DELAY (same as with e.g.
2272
+ * setattr and remove).
2273
+ */
2274
+__be32
2275
+nfsd_removexattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name)
2276
+{
2277
+ __be32 err;
2278
+ int ret;
2279
+
2280
+ err = fh_verify(rqstp, fhp, 0, NFSD_MAY_WRITE);
2281
+ if (err)
2282
+ return err;
2283
+
2284
+ ret = fh_want_write(fhp);
2285
+ if (ret)
2286
+ return nfserrno(ret);
2287
+
2288
+ fh_lock(fhp);
2289
+
2290
+ ret = __vfs_removexattr_locked(fhp->fh_dentry, name, NULL);
2291
+
2292
+ fh_unlock(fhp);
2293
+ fh_drop_write(fhp);
2294
+
2295
+ return nfsd_xattr_errno(ret);
2296
+}
2297
+
2298
+__be32
2299
+nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name,
2300
+ void *buf, u32 len, u32 flags)
2301
+{
2302
+ __be32 err;
2303
+ int ret;
2304
+
2305
+ err = fh_verify(rqstp, fhp, 0, NFSD_MAY_WRITE);
2306
+ if (err)
2307
+ return err;
2308
+
2309
+ ret = fh_want_write(fhp);
2310
+ if (ret)
2311
+ return nfserrno(ret);
2312
+ fh_lock(fhp);
2313
+
2314
+ ret = __vfs_setxattr_locked(fhp->fh_dentry, name, buf, len, flags,
2315
+ NULL);
2316
+
2317
+ fh_unlock(fhp);
2318
+ fh_drop_write(fhp);
2319
+
2320
+ return nfsd_xattr_errno(ret);
2321
+}
2322
+#endif
2323
+
19932324 /*
19942325 * Check for a user's access permissions to this inode.
19952326 */
....@@ -2073,64 +2404,4 @@
20732404 err = inode_permission(inode, MAY_EXEC);
20742405
20752406 return err? nfserrno(err) : 0;
2076
-}
2077
-
2078
-void
2079
-nfsd_racache_shutdown(void)
2080
-{
2081
- struct raparms *raparm, *last_raparm;
2082
- unsigned int i;
2083
-
2084
- dprintk("nfsd: freeing readahead buffers.\n");
2085
-
2086
- for (i = 0; i < RAPARM_HASH_SIZE; i++) {
2087
- raparm = raparm_hash[i].pb_head;
2088
- while(raparm) {
2089
- last_raparm = raparm;
2090
- raparm = raparm->p_next;
2091
- kfree(last_raparm);
2092
- }
2093
- raparm_hash[i].pb_head = NULL;
2094
- }
2095
-}
2096
-/*
2097
- * Initialize readahead param cache
2098
- */
2099
-int
2100
-nfsd_racache_init(int cache_size)
2101
-{
2102
- int i;
2103
- int j = 0;
2104
- int nperbucket;
2105
- struct raparms **raparm = NULL;
2106
-
2107
-
2108
- if (raparm_hash[0].pb_head)
2109
- return 0;
2110
- nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE);
2111
- nperbucket = max(2, nperbucket);
2112
- cache_size = nperbucket * RAPARM_HASH_SIZE;
2113
-
2114
- dprintk("nfsd: allocating %d readahead buffers.\n", cache_size);
2115
-
2116
- for (i = 0; i < RAPARM_HASH_SIZE; i++) {
2117
- spin_lock_init(&raparm_hash[i].pb_lock);
2118
-
2119
- raparm = &raparm_hash[i].pb_head;
2120
- for (j = 0; j < nperbucket; j++) {
2121
- *raparm = kzalloc(sizeof(struct raparms), GFP_KERNEL);
2122
- if (!*raparm)
2123
- goto out_nomem;
2124
- raparm = &(*raparm)->p_next;
2125
- }
2126
- *raparm = NULL;
2127
- }
2128
-
2129
- nfsdstats.ra_size = cache_size;
2130
- return 0;
2131
-
2132
-out_nomem:
2133
- dprintk("nfsd: kmalloc failed, freeing readahead buffers\n");
2134
- nfsd_racache_shutdown();
2135
- return -ENOMEM;
21362407 }