hc
2023-12-09 b22da3d8526a935aa31e086e63f60ff3246cb61c
kernel/fs/nfs/flexfilelayout/flexfilelayout.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Module for pnfs flexfile layout driver.
34 *
....@@ -7,8 +8,10 @@
78 */
89
910 #include <linux/nfs_fs.h>
11
+#include <linux/nfs_mount.h>
1012 #include <linux/nfs_page.h>
1113 #include <linux/module.h>
14
+#include <linux/sched/mm.h>
1215
1316 #include <linux/sunrpc/metrics.h>
1417
....@@ -27,9 +30,9 @@
2730 #define FF_LAYOUT_POLL_RETRY_MAX (15*HZ)
2831 #define FF_LAYOUTRETURN_MAXERR 20
2932
33
+static unsigned short io_maxretrans;
3034
31
-static struct group_info *ff_zero_group;
32
-
35
+static const struct pnfs_commit_ops ff_layout_commit_ops;
3336 static void ff_layout_read_record_layoutstats_done(struct rpc_task *task,
3437 struct nfs_pgio_header *hdr);
3538 static int ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo,
....@@ -46,9 +49,11 @@
4649
4750 ffl = kzalloc(sizeof(*ffl), gfp_flags);
4851 if (ffl) {
52
+ pnfs_init_ds_commit_info(&ffl->commit_info);
4953 INIT_LIST_HEAD(&ffl->error_list);
5054 INIT_LIST_HEAD(&ffl->mirrors);
5155 ffl->last_report_time = ktime_get();
56
+ ffl->commit_info.ops = &ff_layout_commit_ops;
5257 return &ffl->generic_hdr;
5358 } else
5459 return NULL;
....@@ -57,14 +62,14 @@
5762 static void
5863 ff_layout_free_layout_hdr(struct pnfs_layout_hdr *lo)
5964 {
65
+ struct nfs4_flexfile_layout *ffl = FF_LAYOUT_FROM_HDR(lo);
6066 struct nfs4_ff_layout_ds_err *err, *n;
6167
62
- list_for_each_entry_safe(err, n, &FF_LAYOUT_FROM_HDR(lo)->error_list,
63
- list) {
68
+ list_for_each_entry_safe(err, n, &ffl->error_list, list) {
6469 list_del(&err->list);
6570 kfree(err);
6671 }
67
- kfree(FF_LAYOUT_FROM_HDR(lo));
72
+ kfree_rcu(ffl, generic_hdr.plh_rcu);
6873 }
6974
7075 static int decode_pnfs_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
....@@ -226,16 +231,14 @@
226231
227232 static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror)
228233 {
229
- struct rpc_cred *cred;
234
+ const struct cred *cred;
230235
231236 ff_layout_remove_mirror(mirror);
232237 kfree(mirror->fh_versions);
233238 cred = rcu_access_pointer(mirror->ro_cred);
234
- if (cred)
235
- put_rpccred(cred);
239
+ put_cred(cred);
236240 cred = rcu_access_pointer(mirror->rw_cred);
237
- if (cred)
238
- put_rpccred(cred);
241
+ put_cred(cred);
239242 nfs4_ff_layout_put_deviceid(mirror->mirror_ds);
240243 kfree(mirror);
241244 }
....@@ -248,36 +251,10 @@
248251
249252 static void ff_layout_free_mirror_array(struct nfs4_ff_layout_segment *fls)
250253 {
251
- int i;
254
+ u32 i;
252255
253
- if (fls->mirror_array) {
254
- for (i = 0; i < fls->mirror_array_cnt; i++) {
255
- /* normally mirror_ds is freed in
256
- * .free_deviceid_node but we still do it here
257
- * for .alloc_lseg error path */
258
- ff_layout_put_mirror(fls->mirror_array[i]);
259
- }
260
- kfree(fls->mirror_array);
261
- fls->mirror_array = NULL;
262
- }
263
-}
264
-
265
-static int ff_layout_check_layout(struct nfs4_layoutget_res *lgr)
266
-{
267
- int ret = 0;
268
-
269
- dprintk("--> %s\n", __func__);
270
-
271
- /* FIXME: remove this check when layout segment support is added */
272
- if (lgr->range.offset != 0 ||
273
- lgr->range.length != NFS4_MAX_UINT64) {
274
- dprintk("%s Only whole file layouts supported. Use MDS i/o\n",
275
- __func__);
276
- ret = -EINVAL;
277
- }
278
-
279
- dprintk("--> %s returns %d\n", __func__, ret);
280
- return ret;
256
+ for (i = 0; i < fls->mirror_array_cnt; i++)
257
+ ff_layout_put_mirror(fls->mirror_array[i]);
281258 }
282259
283260 static void _ff_layout_free_lseg(struct nfs4_ff_layout_segment *fls)
....@@ -286,6 +263,23 @@
286263 ff_layout_free_mirror_array(fls);
287264 kfree(fls);
288265 }
266
+}
267
+
268
+static bool
269
+ff_lseg_match_mirrors(struct pnfs_layout_segment *l1,
270
+ struct pnfs_layout_segment *l2)
271
+{
272
+ const struct nfs4_ff_layout_segment *fl1 = FF_LAYOUT_LSEG(l1);
273
+ const struct nfs4_ff_layout_segment *fl2 = FF_LAYOUT_LSEG(l1);
274
+ u32 i;
275
+
276
+ if (fl1->mirror_array_cnt != fl2->mirror_array_cnt)
277
+ return false;
278
+ for (i = 0; i < fl1->mirror_array_cnt; i++) {
279
+ if (fl1->mirror_array[i] != fl2->mirror_array[i])
280
+ return false;
281
+ }
282
+ return true;
289283 }
290284
291285 static bool
....@@ -322,6 +316,8 @@
322316 new_end = pnfs_calc_offset_end(new->pls_range.offset,
323317 new->pls_range.length);
324318 if (new_end < old->pls_range.offset)
319
+ return false;
320
+ if (!ff_lseg_match_mirrors(new, old))
325321 return false;
326322
327323 /* Mergeable: copy info from 'old' to 'new' */
....@@ -400,21 +396,20 @@
400396 goto out_err_free;
401397
402398 rc = -ENOMEM;
403
- fls = kzalloc(sizeof(*fls), gfp_flags);
399
+ fls = kzalloc(struct_size(fls, mirror_array, mirror_array_cnt),
400
+ gfp_flags);
404401 if (!fls)
405402 goto out_err_free;
406403
407404 fls->mirror_array_cnt = mirror_array_cnt;
408405 fls->stripe_unit = stripe_unit;
409
- fls->mirror_array = kcalloc(fls->mirror_array_cnt,
410
- sizeof(fls->mirror_array[0]), gfp_flags);
411
- if (fls->mirror_array == NULL)
412
- goto out_err_free;
413406
414407 for (i = 0; i < fls->mirror_array_cnt; i++) {
415408 struct nfs4_ff_layout_mirror *mirror;
416
- struct auth_cred acred = { .group_info = ff_zero_group };
417
- struct rpc_cred __rcu *cred;
409
+ struct cred *kcred;
410
+ const struct cred __rcu *cred;
411
+ kuid_t uid;
412
+ kgid_t gid;
418413 u32 ds_count, fh_count, id;
419414 int j;
420415
....@@ -482,21 +477,28 @@
482477 if (rc)
483478 goto out_err_free;
484479
485
- acred.uid = make_kuid(&init_user_ns, id);
480
+ uid = make_kuid(&init_user_ns, id);
486481
487482 /* group */
488483 rc = decode_name(&stream, &id);
489484 if (rc)
490485 goto out_err_free;
491486
492
- acred.gid = make_kgid(&init_user_ns, id);
487
+ gid = make_kgid(&init_user_ns, id);
493488
494
- /* find the cred for it */
495
- rcu_assign_pointer(cred, rpc_lookup_generic_cred(&acred, 0, gfp_flags));
496
- if (IS_ERR(cred)) {
497
- rc = PTR_ERR(cred);
498
- goto out_err_free;
489
+ if (gfp_flags & __GFP_FS)
490
+ kcred = prepare_kernel_cred(NULL);
491
+ else {
492
+ unsigned int nofs_flags = memalloc_nofs_save();
493
+ kcred = prepare_kernel_cred(NULL);
494
+ memalloc_nofs_restore(nofs_flags);
499495 }
496
+ rc = -ENOMEM;
497
+ if (!kcred)
498
+ goto out_err_free;
499
+ kcred->fsuid = uid;
500
+ kcred->fsgid = gid;
501
+ cred = RCU_INITIALIZER(kcred);
500502
501503 if (lgr->range.iomode == IOMODE_READ)
502504 rcu_assign_pointer(fls->mirror_array[i]->ro_cred, cred);
....@@ -519,8 +521,8 @@
519521
520522 dprintk("%s: iomode %s uid %u gid %u\n", __func__,
521523 lgr->range.iomode == IOMODE_READ ? "READ" : "RW",
522
- from_kuid(&init_user_ns, acred.uid),
523
- from_kgid(&init_user_ns, acred.gid));
524
+ from_kuid(&init_user_ns, uid),
525
+ from_kgid(&init_user_ns, gid));
524526 }
525527
526528 p = xdr_inline_decode(&stream, 4);
....@@ -536,9 +538,6 @@
536538
537539 out_sort_mirrors:
538540 ff_layout_sort_mirrors(fls);
539
- rc = ff_layout_check_layout(lgr);
540
- if (rc)
541
- goto out_err_free;
542541 ret = &fls->generic_hdr;
543542 dprintk("<-- %s (success)\n", __func__);
544543 out_free_page:
....@@ -549,17 +548,6 @@
549548 ret = ERR_PTR(rc);
550549 dprintk("<-- %s (%d)\n", __func__, rc);
551550 goto out_free_page;
552
-}
553
-
554
-static bool ff_layout_has_rw_segments(struct pnfs_layout_hdr *layout)
555
-{
556
- struct pnfs_layout_segment *lseg;
557
-
558
- list_for_each_entry(lseg, &layout->plh_segs, pls_list)
559
- if (lseg->pls_range.iomode == IOMODE_RW)
560
- return true;
561
-
562
- return false;
563551 }
564552
565553 static void
....@@ -576,21 +564,10 @@
576564 ffl = FF_LAYOUT_FROM_HDR(lseg->pls_layout);
577565 inode = ffl->generic_hdr.plh_inode;
578566 spin_lock(&inode->i_lock);
579
- if (!ff_layout_has_rw_segments(lseg->pls_layout)) {
580
- ffl->commit_info.nbuckets = 0;
581
- kfree(ffl->commit_info.buckets);
582
- ffl->commit_info.buckets = NULL;
583
- }
567
+ pnfs_generic_ds_cinfo_release_lseg(&ffl->commit_info, lseg);
584568 spin_unlock(&inode->i_lock);
585569 }
586570 _ff_layout_free_lseg(fls);
587
-}
588
-
589
-/* Return 1 until we have multiple lsegs support */
590
-static int
591
-ff_layout_get_lseg_count(struct nfs4_ff_layout_segment *fls)
592
-{
593
- return 1;
594571 }
595572
596573 static void
....@@ -737,74 +714,94 @@
737714 spin_unlock(&mirror->lock);
738715 }
739716
740
-static int
741
-ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg,
742
- struct nfs_commit_info *cinfo,
743
- gfp_t gfp_flags)
717
+static void
718
+ff_layout_mark_ds_unreachable(struct pnfs_layout_segment *lseg, u32 idx)
719
+{
720
+ struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
721
+
722
+ if (devid)
723
+ nfs4_mark_deviceid_unavailable(devid);
724
+}
725
+
726
+static void
727
+ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, u32 idx)
728
+{
729
+ struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
730
+
731
+ if (devid)
732
+ nfs4_mark_deviceid_available(devid);
733
+}
734
+
735
+static struct nfs4_pnfs_ds *
736
+ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
737
+ u32 start_idx, u32 *best_idx,
738
+ bool check_device)
744739 {
745740 struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
746
- struct pnfs_commit_bucket *buckets;
747
- int size;
741
+ struct nfs4_ff_layout_mirror *mirror;
742
+ struct nfs4_pnfs_ds *ds;
743
+ bool fail_return = false;
744
+ u32 idx;
748745
749
- if (cinfo->ds->nbuckets != 0) {
750
- /* This assumes there is only one RW lseg per file.
751
- * To support multiple lseg per file, we need to
752
- * change struct pnfs_commit_bucket to allow dynamic
753
- * increasing nbuckets.
754
- */
755
- return 0;
746
+ /* mirrors are initially sorted by efficiency */
747
+ for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) {
748
+ if (idx+1 == fls->mirror_array_cnt)
749
+ fail_return = !check_device;
750
+
751
+ mirror = FF_LAYOUT_COMP(lseg, idx);
752
+ ds = nfs4_ff_layout_prepare_ds(lseg, mirror, fail_return);
753
+ if (!ds)
754
+ continue;
755
+
756
+ if (check_device &&
757
+ nfs4_test_deviceid_unavailable(&mirror->mirror_ds->id_node))
758
+ continue;
759
+
760
+ *best_idx = idx;
761
+ return ds;
756762 }
757763
758
- size = ff_layout_get_lseg_count(fls) * FF_LAYOUT_MIRROR_COUNT(lseg);
764
+ return NULL;
765
+}
759766
760
- buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket),
761
- gfp_flags);
762
- if (!buckets)
763
- return -ENOMEM;
764
- else {
765
- int i;
767
+static struct nfs4_pnfs_ds *
768
+ff_layout_choose_any_ds_for_read(struct pnfs_layout_segment *lseg,
769
+ u32 start_idx, u32 *best_idx)
770
+{
771
+ return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, false);
772
+}
766773
767
- spin_lock(&cinfo->inode->i_lock);
768
- if (cinfo->ds->nbuckets != 0)
769
- kfree(buckets);
770
- else {
771
- cinfo->ds->buckets = buckets;
772
- cinfo->ds->nbuckets = size;
773
- for (i = 0; i < size; i++) {
774
- INIT_LIST_HEAD(&buckets[i].written);
775
- INIT_LIST_HEAD(&buckets[i].committing);
776
- /* mark direct verifier as unset */
777
- buckets[i].direct_verf.committed =
778
- NFS_INVALID_STABLE_HOW;
779
- }
780
- }
781
- spin_unlock(&cinfo->inode->i_lock);
782
- return 0;
783
- }
774
+static struct nfs4_pnfs_ds *
775
+ff_layout_choose_valid_ds_for_read(struct pnfs_layout_segment *lseg,
776
+ u32 start_idx, u32 *best_idx)
777
+{
778
+ return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, true);
784779 }
785780
786781 static struct nfs4_pnfs_ds *
787782 ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg,
788
- int start_idx,
789
- int *best_idx)
783
+ u32 start_idx, u32 *best_idx)
790784 {
791
- struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
792785 struct nfs4_pnfs_ds *ds;
793
- bool fail_return = false;
794
- int idx;
795786
796
- /* mirrors are sorted by efficiency */
797
- for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) {
798
- if (idx+1 == fls->mirror_array_cnt)
799
- fail_return = true;
800
- ds = nfs4_ff_layout_prepare_ds(lseg, idx, fail_return);
801
- if (ds) {
802
- *best_idx = idx;
803
- return ds;
804
- }
805
- }
787
+ ds = ff_layout_choose_valid_ds_for_read(lseg, start_idx, best_idx);
788
+ if (ds)
789
+ return ds;
790
+ return ff_layout_choose_any_ds_for_read(lseg, start_idx, best_idx);
791
+}
806792
807
- return NULL;
793
+static struct nfs4_pnfs_ds *
794
+ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio,
795
+ u32 *best_idx)
796
+{
797
+ struct pnfs_layout_segment *lseg = pgio->pg_lseg;
798
+ struct nfs4_pnfs_ds *ds;
799
+
800
+ ds = ff_layout_choose_best_ds_for_read(lseg, pgio->pg_mirror_idx,
801
+ best_idx);
802
+ if (ds || !pgio->pg_mirror_idx)
803
+ return ds;
804
+ return ff_layout_choose_best_ds_for_read(lseg, 0, best_idx);
808805 }
809806
810807 static void
....@@ -814,9 +811,9 @@
814811 {
815812 pnfs_put_lseg(pgio->pg_lseg);
816813 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
817
- req->wb_context,
818
- 0,
819
- NFS4_MAX_UINT64,
814
+ nfs_req_openctx(req),
815
+ req_offset(req),
816
+ req->wb_bytes,
820817 IOMODE_READ,
821818 strict_iomode,
822819 GFP_KERNEL);
....@@ -827,16 +824,24 @@
827824 }
828825
829826 static void
827
+ff_layout_pg_check_layout(struct nfs_pageio_descriptor *pgio,
828
+ struct nfs_page *req)
829
+{
830
+ pnfs_generic_pg_check_layout(pgio);
831
+ pnfs_generic_pg_check_range(pgio, req);
832
+}
833
+
834
+static void
830835 ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
831836 struct nfs_page *req)
832837 {
833838 struct nfs_pgio_mirror *pgm;
834839 struct nfs4_ff_layout_mirror *mirror;
835840 struct nfs4_pnfs_ds *ds;
836
- int ds_idx;
841
+ u32 ds_idx;
837842
838843 retry:
839
- pnfs_generic_pg_check_layout(pgio);
844
+ ff_layout_pg_check_layout(pgio, req);
840845 /* Use full layout for now */
841846 if (!pgio->pg_lseg) {
842847 ff_layout_pg_get_read(pgio, req, false);
....@@ -849,32 +854,35 @@
849854 goto out_nolseg;
850855 }
851856
852
- ds = ff_layout_choose_best_ds_for_read(pgio->pg_lseg, 0, &ds_idx);
857
+ ds = ff_layout_get_ds_for_read(pgio, &ds_idx);
853858 if (!ds) {
854859 if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
855860 goto out_mds;
856
- pnfs_put_lseg(pgio->pg_lseg);
857
- pgio->pg_lseg = NULL;
861
+ pnfs_generic_pg_cleanup(pgio);
858862 /* Sleep for 1 second before retrying */
859863 ssleep(1);
860864 goto retry;
861865 }
862866
863867 mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx);
864
-
865
- pgio->pg_mirror_idx = ds_idx;
866
-
867
- /* read always uses only one mirror - idx 0 for pgio layer */
868868 pgm = &pgio->pg_mirrors[0];
869869 pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize;
870870
871
+ pgio->pg_mirror_idx = ds_idx;
872
+
873
+ if (NFS_SERVER(pgio->pg_inode)->flags &
874
+ (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR))
875
+ pgio->pg_maxretrans = io_maxretrans;
871876 return;
872877 out_nolseg:
873878 if (pgio->pg_error < 0)
874879 return;
875880 out_mds:
876
- pnfs_put_lseg(pgio->pg_lseg);
877
- pgio->pg_lseg = NULL;
881
+ trace_pnfs_mds_fallback_pg_init_read(pgio->pg_inode,
882
+ 0, NFS4_MAX_UINT64, IOMODE_READ,
883
+ NFS_I(pgio->pg_inode)->layout,
884
+ pgio->pg_lseg);
885
+ pgio->pg_maxretrans = 0;
878886 nfs_pageio_reset_read_mds(pgio);
879887 }
880888
....@@ -884,18 +892,16 @@
884892 {
885893 struct nfs4_ff_layout_mirror *mirror;
886894 struct nfs_pgio_mirror *pgm;
887
- struct nfs_commit_info cinfo;
888895 struct nfs4_pnfs_ds *ds;
889
- int i;
890
- int status;
896
+ u32 i;
891897
892898 retry:
893
- pnfs_generic_pg_check_layout(pgio);
899
+ ff_layout_pg_check_layout(pgio, req);
894900 if (!pgio->pg_lseg) {
895901 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
896
- req->wb_context,
897
- 0,
898
- NFS4_MAX_UINT64,
902
+ nfs_req_openctx(req),
903
+ req_offset(req),
904
+ req->wb_bytes,
899905 IOMODE_RW,
900906 false,
901907 GFP_NOFS);
....@@ -909,39 +915,39 @@
909915 if (pgio->pg_lseg == NULL)
910916 goto out_mds;
911917
912
- nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq);
913
- status = ff_layout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS);
914
- if (status < 0)
915
- goto out_mds;
916
-
917918 /* Use a direct mapping of ds_idx to pgio mirror_idx */
918919 if (pgio->pg_mirror_count != FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg))
919920 goto out_eagain;
920921
921922 for (i = 0; i < pgio->pg_mirror_count; i++) {
922
- ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, i, true);
923
+ mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i);
924
+ ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, mirror, true);
923925 if (!ds) {
924926 if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
925927 goto out_mds;
926
- pnfs_put_lseg(pgio->pg_lseg);
927
- pgio->pg_lseg = NULL;
928
+ pnfs_generic_pg_cleanup(pgio);
928929 /* Sleep for 1 second before retrying */
929930 ssleep(1);
930931 goto retry;
931932 }
932933 pgm = &pgio->pg_mirrors[i];
933
- mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i);
934934 pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].wsize;
935935 }
936936
937
+ if (NFS_SERVER(pgio->pg_inode)->flags &
938
+ (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR))
939
+ pgio->pg_maxretrans = io_maxretrans;
937940 return;
938941 out_eagain:
939942 pnfs_generic_pg_cleanup(pgio);
940943 pgio->pg_error = -EAGAIN;
941944 return;
942945 out_mds:
943
- pnfs_put_lseg(pgio->pg_lseg);
944
- pgio->pg_lseg = NULL;
946
+ trace_pnfs_mds_fallback_pg_init_write(pgio->pg_inode,
947
+ 0, NFS4_MAX_UINT64, IOMODE_RW,
948
+ NFS_I(pgio->pg_inode)->layout,
949
+ pgio->pg_lseg);
950
+ pgio->pg_maxretrans = 0;
945951 nfs_pageio_reset_write_mds(pgio);
946952 pgio->pg_error = -EAGAIN;
947953 }
....@@ -952,9 +958,9 @@
952958 {
953959 if (!pgio->pg_lseg) {
954960 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
955
- req->wb_context,
956
- 0,
957
- NFS4_MAX_UINT64,
961
+ nfs_req_openctx(req),
962
+ req_offset(req),
963
+ req->wb_bytes,
958964 IOMODE_RW,
959965 false,
960966 GFP_NOFS);
....@@ -967,10 +973,29 @@
967973 if (pgio->pg_lseg)
968974 return FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg);
969975
976
+ trace_pnfs_mds_fallback_pg_get_mirror_count(pgio->pg_inode,
977
+ 0, NFS4_MAX_UINT64, IOMODE_RW,
978
+ NFS_I(pgio->pg_inode)->layout,
979
+ pgio->pg_lseg);
970980 /* no lseg means that pnfs is not in use, so no mirroring here */
971981 nfs_pageio_reset_write_mds(pgio);
972982 out:
973983 return 1;
984
+}
985
+
986
+static u32
987
+ff_layout_pg_set_mirror_write(struct nfs_pageio_descriptor *desc, u32 idx)
988
+{
989
+ u32 old = desc->pg_mirror_idx;
990
+
991
+ desc->pg_mirror_idx = idx;
992
+ return old;
993
+}
994
+
995
+static struct nfs_pgio_mirror *
996
+ff_layout_pg_get_mirror_write(struct nfs_pageio_descriptor *desc, u32 idx)
997
+{
998
+ return &desc->pg_mirrors[idx];
974999 }
9751000
9761001 static const struct nfs_pageio_ops ff_layout_pg_read_ops = {
....@@ -986,6 +1011,8 @@
9861011 .pg_doio = pnfs_generic_pg_writepages,
9871012 .pg_get_mirror_count = ff_layout_pg_get_mirror_count_write,
9881013 .pg_cleanup = pnfs_generic_pg_cleanup,
1014
+ .pg_get_mirror = ff_layout_pg_get_mirror_write,
1015
+ .pg_set_mirror = ff_layout_pg_set_mirror_write,
9891016 };
9901017
9911018 static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs)
....@@ -1016,8 +1043,24 @@
10161043 hdr->args.count,
10171044 (unsigned long long)hdr->args.offset);
10181045
1046
+ trace_pnfs_mds_fallback_write_done(hdr->inode,
1047
+ hdr->args.offset, hdr->args.count,
1048
+ IOMODE_RW, NFS_I(hdr->inode)->layout,
1049
+ hdr->lseg);
10191050 task->tk_status = pnfs_write_done_resend_to_mds(hdr);
10201051 }
1052
+}
1053
+
1054
+static void ff_layout_resend_pnfs_read(struct nfs_pgio_header *hdr)
1055
+{
1056
+ u32 idx = hdr->pgio_mirror_idx + 1;
1057
+ u32 new_idx = 0;
1058
+
1059
+ if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx))
1060
+ ff_layout_send_layouterror(hdr->lseg);
1061
+ else
1062
+ pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg);
1063
+ pnfs_read_resend_pnfs(hdr, new_idx);
10211064 }
10221065
10231066 static void ff_layout_reset_read(struct nfs_pgio_header *hdr)
....@@ -1025,6 +1068,7 @@
10251068 struct rpc_task *task = &hdr->task;
10261069
10271070 pnfs_layoutcommit_inode(hdr->inode, false);
1071
+ pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg);
10281072
10291073 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
10301074 dprintk("%s Reset task %5u for i/o through MDS "
....@@ -1035,6 +1079,10 @@
10351079 hdr->args.count,
10361080 (unsigned long long)hdr->args.offset);
10371081
1082
+ trace_pnfs_mds_fallback_read_done(hdr->inode,
1083
+ hdr->args.offset, hdr->args.count,
1084
+ IOMODE_READ, NFS_I(hdr->inode)->layout,
1085
+ hdr->lseg);
10381086 task->tk_status = pnfs_read_done_resend_to_mds(hdr);
10391087 }
10401088 }
....@@ -1043,7 +1091,7 @@
10431091 struct nfs4_state *state,
10441092 struct nfs_client *clp,
10451093 struct pnfs_layout_segment *lseg,
1046
- int idx)
1094
+ u32 idx)
10471095 {
10481096 struct pnfs_layout_hdr *lo = lseg->pls_layout;
10491097 struct inode *inode = lo->plh_inode;
....@@ -1101,7 +1149,7 @@
11011149 nfs4_delete_deviceid(devid->ld, devid->nfs_client,
11021150 &devid->deviceid);
11031151 rpc_wake_up(&tbl->slot_tbl_waitq);
1104
- /* fall through */
1152
+ fallthrough;
11051153 default:
11061154 if (ff_layout_avoid_mds_available_ds(lseg))
11071155 return -NFS4ERR_RESET_TO_PNFS;
....@@ -1117,7 +1165,7 @@
11171165 /* Retry all errors through either pNFS or MDS except for -EJUKEBOX */
11181166 static int ff_layout_async_handle_error_v3(struct rpc_task *task,
11191167 struct pnfs_layout_segment *lseg,
1120
- int idx)
1168
+ u32 idx)
11211169 {
11221170 struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
11231171
....@@ -1152,12 +1200,14 @@
11521200 struct nfs4_state *state,
11531201 struct nfs_client *clp,
11541202 struct pnfs_layout_segment *lseg,
1155
- int idx)
1203
+ u32 idx)
11561204 {
11571205 int vers = clp->cl_nfs_mod->rpc_vers->number;
11581206
1159
- if (task->tk_status >= 0)
1207
+ if (task->tk_status >= 0) {
1208
+ ff_layout_mark_ds_reachable(lseg, idx);
11601209 return 0;
1210
+ }
11611211
11621212 /* Handle the case of an invalid layout segment */
11631213 if (!pnfs_is_valid_lseg(lseg))
....@@ -1177,10 +1227,11 @@
11771227 }
11781228
11791229 static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
1180
- int idx, u64 offset, u64 length,
1181
- u32 status, int opnum, int error)
1230
+ u32 idx, u64 offset, u64 length,
1231
+ u32 *op_status, int opnum, int error)
11821232 {
11831233 struct nfs4_ff_layout_mirror *mirror;
1234
+ u32 status = *op_status;
11841235 int err;
11851236
11861237 if (status == 0) {
....@@ -1198,29 +1249,39 @@
11981249 case -ENOBUFS:
11991250 case -EPIPE:
12001251 case -EPERM:
1201
- status = NFS4ERR_NXIO;
1252
+ *op_status = status = NFS4ERR_NXIO;
12021253 break;
12031254 case -EACCES:
1204
- status = NFS4ERR_ACCESS;
1255
+ *op_status = status = NFS4ERR_ACCESS;
12051256 break;
12061257 default:
12071258 return;
12081259 }
12091260 }
12101261
1211
- switch (status) {
1212
- case NFS4ERR_DELAY:
1213
- case NFS4ERR_GRACE:
1214
- return;
1215
- default:
1216
- break;
1217
- }
1218
-
12191262 mirror = FF_LAYOUT_COMP(lseg, idx);
12201263 err = ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
12211264 mirror, offset, length, status, opnum,
12221265 GFP_NOIO);
1223
- pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, lseg);
1266
+
1267
+ switch (status) {
1268
+ case NFS4ERR_DELAY:
1269
+ case NFS4ERR_GRACE:
1270
+ break;
1271
+ case NFS4ERR_NXIO:
1272
+ ff_layout_mark_ds_unreachable(lseg, idx);
1273
+ /*
1274
+ * Don't return the layout if this is a read and we still
1275
+ * have layouts to try
1276
+ */
1277
+ if (opnum == OP_READ)
1278
+ break;
1279
+ fallthrough;
1280
+ default:
1281
+ pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode,
1282
+ lseg);
1283
+ }
1284
+
12241285 dprintk("%s: err %d op %d status %u\n", __func__, err, opnum, status);
12251286 }
12261287
....@@ -1230,24 +1291,23 @@
12301291 {
12311292 int err;
12321293
1233
- trace_nfs4_pnfs_read(hdr, task->tk_status);
1234
- if (task->tk_status < 0)
1294
+ if (task->tk_status < 0) {
12351295 ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx,
12361296 hdr->args.offset, hdr->args.count,
1237
- hdr->res.op_status, OP_READ,
1297
+ &hdr->res.op_status, OP_READ,
12381298 task->tk_status);
1299
+ trace_ff_layout_read_error(hdr);
1300
+ }
1301
+
12391302 err = ff_layout_async_handle_error(task, hdr->args.context->state,
12401303 hdr->ds_clp, hdr->lseg,
12411304 hdr->pgio_mirror_idx);
12421305
1306
+ trace_nfs4_pnfs_read(hdr, err);
12431307 clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
12441308 clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags);
12451309 switch (err) {
12461310 case -NFS4ERR_RESET_TO_PNFS:
1247
- if (ff_layout_choose_best_ds_for_read(hdr->lseg,
1248
- hdr->pgio_mirror_idx + 1,
1249
- &hdr->pgio_mirror_idx))
1250
- goto out_eagain;
12511311 set_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
12521312 return task->tk_status;
12531313 case -NFS4ERR_RESET_TO_MDS:
....@@ -1291,15 +1351,6 @@
12911351 (unsigned long long) NFS_I(inode)->layout->plh_lwb);
12921352 }
12931353
1294
-static bool
1295
-ff_layout_device_unavailable(struct pnfs_layout_segment *lseg, int idx)
1296
-{
1297
- /* No mirroring for now */
1298
- struct nfs4_deviceid_node *node = FF_LAYOUT_DEVID_NODE(lseg, idx);
1299
-
1300
- return ff_layout_test_devid_unavailable(node);
1301
-}
1302
-
13031354 static void ff_layout_read_record_layoutstats_start(struct rpc_task *task,
13041355 struct nfs_pgio_header *hdr)
13051356 {
....@@ -1329,10 +1380,6 @@
13291380 if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
13301381 rpc_exit(task, -EIO);
13311382 return -EIO;
1332
- }
1333
- if (ff_layout_device_unavailable(hdr->lseg, hdr->pgio_mirror_idx)) {
1334
- rpc_exit(task, -EHOSTDOWN);
1335
- return -EAGAIN;
13361383 }
13371384
13381385 ff_layout_read_record_layoutstats_start(task, hdr);
....@@ -1398,7 +1445,7 @@
13981445
13991446 ff_layout_read_record_layoutstats_done(&hdr->task, hdr);
14001447 if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags))
1401
- pnfs_read_resend_pnfs(hdr);
1448
+ ff_layout_resend_pnfs_read(hdr);
14021449 else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags))
14031450 ff_layout_reset_read(hdr);
14041451 pnfs_generic_rw_release(data);
....@@ -1411,16 +1458,19 @@
14111458 loff_t end_offs = 0;
14121459 int err;
14131460
1414
- trace_nfs4_pnfs_write(hdr, task->tk_status);
1415
- if (task->tk_status < 0)
1461
+ if (task->tk_status < 0) {
14161462 ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx,
14171463 hdr->args.offset, hdr->args.count,
1418
- hdr->res.op_status, OP_WRITE,
1464
+ &hdr->res.op_status, OP_WRITE,
14191465 task->tk_status);
1466
+ trace_ff_layout_write_error(hdr);
1467
+ }
1468
+
14201469 err = ff_layout_async_handle_error(task, hdr->args.context->state,
14211470 hdr->ds_clp, hdr->lseg,
14221471 hdr->pgio_mirror_idx);
14231472
1473
+ trace_nfs4_pnfs_write(hdr, err);
14241474 clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
14251475 clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags);
14261476 switch (err) {
....@@ -1454,15 +1504,18 @@
14541504 {
14551505 int err;
14561506
1457
- trace_nfs4_pnfs_commit_ds(data, task->tk_status);
1458
- if (task->tk_status < 0)
1507
+ if (task->tk_status < 0) {
14591508 ff_layout_io_track_ds_error(data->lseg, data->ds_commit_index,
14601509 data->args.offset, data->args.count,
1461
- data->res.op_status, OP_COMMIT,
1510
+ &data->res.op_status, OP_COMMIT,
14621511 task->tk_status);
1512
+ trace_ff_layout_commit_error(data);
1513
+ }
1514
+
14631515 err = ff_layout_async_handle_error(task, NULL, data->ds_clp,
14641516 data->lseg, data->ds_commit_index);
14651517
1518
+ trace_nfs4_pnfs_commit_ds(data, err);
14661519 switch (err) {
14671520 case -NFS4ERR_RESET_TO_PNFS:
14681521 pnfs_generic_prepare_to_resend_writes(data);
....@@ -1509,11 +1562,6 @@
15091562 if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
15101563 rpc_exit(task, -EIO);
15111564 return -EIO;
1512
- }
1513
-
1514
- if (ff_layout_device_unavailable(hdr->lseg, hdr->pgio_mirror_idx)) {
1515
- rpc_exit(task, -EHOSTDOWN);
1516
- return -EAGAIN;
15171565 }
15181566
15191567 ff_layout_write_record_layoutstats_start(task, hdr);
....@@ -1571,9 +1619,10 @@
15711619 struct nfs_pgio_header *hdr = data;
15721620
15731621 ff_layout_write_record_layoutstats_done(&hdr->task, hdr);
1574
- if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags))
1622
+ if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags)) {
1623
+ ff_layout_send_layouterror(hdr->lseg);
15751624 ff_layout_reset_write(hdr, true);
1576
- else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags))
1625
+ } else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags))
15771626 ff_layout_reset_write(hdr, false);
15781627 pnfs_generic_rw_release(data);
15791628 }
....@@ -1701,7 +1750,8 @@
17011750 struct pnfs_layout_segment *lseg = hdr->lseg;
17021751 struct nfs4_pnfs_ds *ds;
17031752 struct rpc_clnt *ds_clnt;
1704
- struct rpc_cred *ds_cred;
1753
+ struct nfs4_ff_layout_mirror *mirror;
1754
+ const struct cred *ds_cred;
17051755 loff_t offset = hdr->args.offset;
17061756 u32 idx = hdr->pgio_mirror_idx;
17071757 int vers;
....@@ -1711,20 +1761,21 @@
17111761 __func__, hdr->inode->i_ino,
17121762 hdr->args.pgbase, (size_t)hdr->args.count, offset);
17131763
1714
- ds = nfs4_ff_layout_prepare_ds(lseg, idx, false);
1764
+ mirror = FF_LAYOUT_COMP(lseg, idx);
1765
+ ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false);
17151766 if (!ds)
17161767 goto out_failed;
17171768
1718
- ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp,
1769
+ ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
17191770 hdr->inode);
17201771 if (IS_ERR(ds_clnt))
17211772 goto out_failed;
17221773
1723
- ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred);
1774
+ ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, hdr->cred);
17241775 if (!ds_cred)
17251776 goto out_failed;
17261777
1727
- vers = nfs4_ff_layout_ds_version(lseg, idx);
1778
+ vers = nfs4_ff_layout_ds_version(mirror);
17281779
17291780 dprintk("%s USE DS: %s cl_count %d vers %d\n", __func__,
17301781 ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count), vers);
....@@ -1732,13 +1783,11 @@
17321783 hdr->pgio_done_cb = ff_layout_read_done_cb;
17331784 refcount_inc(&ds->ds_clp->cl_count);
17341785 hdr->ds_clp = ds->ds_clp;
1735
- fh = nfs4_ff_layout_select_ds_fh(lseg, idx);
1786
+ fh = nfs4_ff_layout_select_ds_fh(mirror);
17361787 if (fh)
17371788 hdr->args.fh = fh;
17381789
1739
- if (vers == 4 &&
1740
- !nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid))
1741
- goto out_failed;
1790
+ nfs4_ff_layout_select_ds_stateid(mirror, &hdr->args.stateid);
17421791
17431792 /*
17441793 * Note that if we ever decide to split across DSes,
....@@ -1752,12 +1801,15 @@
17521801 vers == 3 ? &ff_layout_read_call_ops_v3 :
17531802 &ff_layout_read_call_ops_v4,
17541803 0, RPC_TASK_SOFTCONN);
1755
- put_rpccred(ds_cred);
1804
+ put_cred(ds_cred);
17561805 return PNFS_ATTEMPTED;
17571806
17581807 out_failed:
17591808 if (ff_layout_avoid_mds_available_ds(lseg))
17601809 return PNFS_TRY_AGAIN;
1810
+ trace_pnfs_mds_fallback_read_pagelist(hdr->inode,
1811
+ hdr->args.offset, hdr->args.count,
1812
+ IOMODE_READ, NFS_I(hdr->inode)->layout, lseg);
17611813 return PNFS_NOT_ATTEMPTED;
17621814 }
17631815
....@@ -1768,26 +1820,28 @@
17681820 struct pnfs_layout_segment *lseg = hdr->lseg;
17691821 struct nfs4_pnfs_ds *ds;
17701822 struct rpc_clnt *ds_clnt;
1771
- struct rpc_cred *ds_cred;
1823
+ struct nfs4_ff_layout_mirror *mirror;
1824
+ const struct cred *ds_cred;
17721825 loff_t offset = hdr->args.offset;
17731826 int vers;
17741827 struct nfs_fh *fh;
1775
- int idx = hdr->pgio_mirror_idx;
1828
+ u32 idx = hdr->pgio_mirror_idx;
17761829
1777
- ds = nfs4_ff_layout_prepare_ds(lseg, idx, true);
1830
+ mirror = FF_LAYOUT_COMP(lseg, idx);
1831
+ ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true);
17781832 if (!ds)
17791833 goto out_failed;
17801834
1781
- ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp,
1835
+ ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
17821836 hdr->inode);
17831837 if (IS_ERR(ds_clnt))
17841838 goto out_failed;
17851839
1786
- ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred);
1840
+ ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, hdr->cred);
17871841 if (!ds_cred)
17881842 goto out_failed;
17891843
1790
- vers = nfs4_ff_layout_ds_version(lseg, idx);
1844
+ vers = nfs4_ff_layout_ds_version(mirror);
17911845
17921846 dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d vers %d\n",
17931847 __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count,
....@@ -1798,13 +1852,11 @@
17981852 refcount_inc(&ds->ds_clp->cl_count);
17991853 hdr->ds_clp = ds->ds_clp;
18001854 hdr->ds_commit_idx = idx;
1801
- fh = nfs4_ff_layout_select_ds_fh(lseg, idx);
1855
+ fh = nfs4_ff_layout_select_ds_fh(mirror);
18021856 if (fh)
18031857 hdr->args.fh = fh;
18041858
1805
- if (vers == 4 &&
1806
- !nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid))
1807
- goto out_failed;
1859
+ nfs4_ff_layout_select_ds_stateid(mirror, &hdr->args.stateid);
18081860
18091861 /*
18101862 * Note that if we ever decide to split across DSes,
....@@ -1817,12 +1869,15 @@
18171869 vers == 3 ? &ff_layout_write_call_ops_v3 :
18181870 &ff_layout_write_call_ops_v4,
18191871 sync, RPC_TASK_SOFTCONN);
1820
- put_rpccred(ds_cred);
1872
+ put_cred(ds_cred);
18211873 return PNFS_ATTEMPTED;
18221874
18231875 out_failed:
18241876 if (ff_layout_avoid_mds_available_ds(lseg))
18251877 return PNFS_TRY_AGAIN;
1878
+ trace_pnfs_mds_fallback_write_pagelist(hdr->inode,
1879
+ hdr->args.offset, hdr->args.count,
1880
+ IOMODE_RW, NFS_I(hdr->inode)->layout, lseg);
18261881 return PNFS_NOT_ATTEMPTED;
18271882 }
18281883
....@@ -1847,7 +1902,8 @@
18471902 struct pnfs_layout_segment *lseg = data->lseg;
18481903 struct nfs4_pnfs_ds *ds;
18491904 struct rpc_clnt *ds_clnt;
1850
- struct rpc_cred *ds_cred;
1905
+ struct nfs4_ff_layout_mirror *mirror;
1906
+ const struct cred *ds_cred;
18511907 u32 idx;
18521908 int vers, ret;
18531909 struct nfs_fh *fh;
....@@ -1857,20 +1913,21 @@
18571913 goto out_err;
18581914
18591915 idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
1860
- ds = nfs4_ff_layout_prepare_ds(lseg, idx, true);
1916
+ mirror = FF_LAYOUT_COMP(lseg, idx);
1917
+ ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true);
18611918 if (!ds)
18621919 goto out_err;
18631920
1864
- ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp,
1921
+ ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
18651922 data->inode);
18661923 if (IS_ERR(ds_clnt))
18671924 goto out_err;
18681925
1869
- ds_cred = ff_layout_get_ds_cred(lseg, idx, data->cred);
1926
+ ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, data->cred);
18701927 if (!ds_cred)
18711928 goto out_err;
18721929
1873
- vers = nfs4_ff_layout_ds_version(lseg, idx);
1930
+ vers = nfs4_ff_layout_ds_version(mirror);
18741931
18751932 dprintk("%s ino %lu, how %d cl_count %d vers %d\n", __func__,
18761933 data->inode->i_ino, how, refcount_read(&ds->ds_clp->cl_count),
....@@ -1887,7 +1944,7 @@
18871944 vers == 3 ? &ff_layout_commit_call_ops_v3 :
18881945 &ff_layout_commit_call_ops_v4,
18891946 how, RPC_TASK_SOFTCONN);
1890
- put_rpccred(ds_cred);
1947
+ put_cred(ds_cred);
18911948 return ret;
18921949 out_err:
18931950 pnfs_generic_prepare_to_resend_writes(data);
....@@ -1912,6 +1969,33 @@
19121969 return NULL;
19131970
19141971 return &FF_LAYOUT_FROM_HDR(layout)->commit_info;
1972
+}
1973
+
1974
+static void
1975
+ff_layout_setup_ds_info(struct pnfs_ds_commit_info *fl_cinfo,
1976
+ struct pnfs_layout_segment *lseg)
1977
+{
1978
+ struct nfs4_ff_layout_segment *flseg = FF_LAYOUT_LSEG(lseg);
1979
+ struct inode *inode = lseg->pls_layout->plh_inode;
1980
+ struct pnfs_commit_array *array, *new;
1981
+
1982
+ new = pnfs_alloc_commit_array(flseg->mirror_array_cnt, GFP_NOIO);
1983
+ if (new) {
1984
+ spin_lock(&inode->i_lock);
1985
+ array = pnfs_add_commit_array(fl_cinfo, new, lseg);
1986
+ spin_unlock(&inode->i_lock);
1987
+ if (array != new)
1988
+ pnfs_free_commit_array(new);
1989
+ }
1990
+}
1991
+
1992
+static void
1993
+ff_layout_release_ds_info(struct pnfs_ds_commit_info *fl_cinfo,
1994
+ struct inode *inode)
1995
+{
1996
+ spin_lock(&inode->i_lock);
1997
+ pnfs_generic_ds_cinfo_destroy(fl_cinfo);
1998
+ spin_unlock(&inode->i_lock);
19151999 }
19162000
19172001 static void
....@@ -2034,7 +2118,7 @@
20342118
20352119 dprintk("%s: Begin\n", __func__);
20362120
2037
- xdr_init_encode(&tmp_xdr, &tmp_buf, NULL);
2121
+ xdr_init_encode(&tmp_xdr, &tmp_buf, NULL, NULL);
20382122
20392123 ff_layout_encode_ioerr(&tmp_xdr, args, ff_args);
20402124 ff_layout_encode_iostats_array(&tmp_xdr, args, ff_args);
....@@ -2099,6 +2183,52 @@
20992183 out_nomem:
21002184 return -ENOMEM;
21012185 }
2186
+
2187
+#ifdef CONFIG_NFS_V4_2
2188
+void
2189
+ff_layout_send_layouterror(struct pnfs_layout_segment *lseg)
2190
+{
2191
+ struct pnfs_layout_hdr *lo = lseg->pls_layout;
2192
+ struct nfs42_layout_error *errors;
2193
+ LIST_HEAD(head);
2194
+
2195
+ if (!nfs_server_capable(lo->plh_inode, NFS_CAP_LAYOUTERROR))
2196
+ return;
2197
+ ff_layout_fetch_ds_ioerr(lo, &lseg->pls_range, &head, -1);
2198
+ if (list_empty(&head))
2199
+ return;
2200
+
2201
+ errors = kmalloc_array(NFS42_LAYOUTERROR_MAX,
2202
+ sizeof(*errors), GFP_NOFS);
2203
+ if (errors != NULL) {
2204
+ const struct nfs4_ff_layout_ds_err *pos;
2205
+ size_t n = 0;
2206
+
2207
+ list_for_each_entry(pos, &head, list) {
2208
+ errors[n].offset = pos->offset;
2209
+ errors[n].length = pos->length;
2210
+ nfs4_stateid_copy(&errors[n].stateid, &pos->stateid);
2211
+ errors[n].errors[0].dev_id = pos->deviceid;
2212
+ errors[n].errors[0].status = pos->status;
2213
+ errors[n].errors[0].opnum = pos->opnum;
2214
+ n++;
2215
+ if (!list_is_last(&pos->list, &head) &&
2216
+ n < NFS42_LAYOUTERROR_MAX)
2217
+ continue;
2218
+ if (nfs42_proc_layouterror(lseg, errors, n) < 0)
2219
+ break;
2220
+ n = 0;
2221
+ }
2222
+ kfree(errors);
2223
+ }
2224
+ ff_layout_free_ds_ioerr(&head);
2225
+}
2226
+#else
2227
+void
2228
+ff_layout_send_layouterror(struct pnfs_layout_segment *lseg)
2229
+{
2230
+}
2231
+#endif
21022232
21032233 static int
21042234 ff_layout_ntop4(const struct sockaddr *sap, char *buf, const size_t buflen)
....@@ -2353,11 +2483,22 @@
23532483 return 0;
23542484 }
23552485
2486
+static const struct pnfs_commit_ops ff_layout_commit_ops = {
2487
+ .setup_ds_info = ff_layout_setup_ds_info,
2488
+ .release_ds_info = ff_layout_release_ds_info,
2489
+ .mark_request_commit = pnfs_layout_mark_request_commit,
2490
+ .clear_request_commit = pnfs_generic_clear_request_commit,
2491
+ .scan_commit_lists = pnfs_generic_scan_commit_lists,
2492
+ .recover_commit_reqs = pnfs_generic_recover_commit_reqs,
2493
+ .commit_pagelist = ff_layout_commit_pagelist,
2494
+};
2495
+
23562496 static struct pnfs_layoutdriver_type flexfilelayout_type = {
23572497 .id = LAYOUT_FLEX_FILES,
23582498 .name = "LAYOUT_FLEX_FILES",
23592499 .owner = THIS_MODULE,
23602500 .flags = PNFS_LAYOUTGET_ON_OPEN,
2501
+ .max_layoutget_response = 4096, /* 1 page or so... */
23612502 .set_layoutdriver = ff_layout_set_layoutdriver,
23622503 .alloc_layout_hdr = ff_layout_alloc_layout_hdr,
23632504 .free_layout_hdr = ff_layout_free_layout_hdr,
....@@ -2368,11 +2509,6 @@
23682509 .pg_write_ops = &ff_layout_pg_write_ops,
23692510 .get_ds_info = ff_layout_get_ds_info,
23702511 .free_deviceid_node = ff_layout_free_deviceid_node,
2371
- .mark_request_commit = pnfs_layout_mark_request_commit,
2372
- .clear_request_commit = pnfs_generic_clear_request_commit,
2373
- .scan_commit_lists = pnfs_generic_scan_commit_lists,
2374
- .recover_commit_reqs = pnfs_generic_recover_commit_reqs,
2375
- .commit_pagelist = ff_layout_commit_pagelist,
23762512 .read_pagelist = ff_layout_read_pagelist,
23772513 .write_pagelist = ff_layout_write_pagelist,
23782514 .alloc_deviceid_node = ff_layout_alloc_deviceid_node,
....@@ -2385,11 +2521,6 @@
23852521 {
23862522 printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Registering...\n",
23872523 __func__);
2388
- if (!ff_zero_group) {
2389
- ff_zero_group = groups_alloc(0);
2390
- if (!ff_zero_group)
2391
- return -ENOMEM;
2392
- }
23932524 return pnfs_register_layoutdriver(&flexfilelayout_type);
23942525 }
23952526
....@@ -2398,10 +2529,6 @@
23982529 printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Unregistering...\n",
23992530 __func__);
24002531 pnfs_unregister_layoutdriver(&flexfilelayout_type);
2401
- if (ff_zero_group) {
2402
- put_group_info(ff_zero_group);
2403
- ff_zero_group = NULL;
2404
- }
24052532 }
24062533
24072534 MODULE_ALIAS("nfs-layouttype4-4");
....@@ -2411,3 +2538,7 @@
24112538
24122539 module_init(nfs4flexfilelayout_init);
24132540 module_exit(nfs4flexfilelayout_exit);
2541
+
2542
+module_param(io_maxretrans, ushort, 0644);
2543
+MODULE_PARM_DESC(io_maxretrans, "The number of times the NFSv4.1 client "
2544
+ "retries an I/O request before returning an error. ");