| .. | .. |
|---|
| 92 | 92 | return local; |
|---|
| 93 | 93 | } |
|---|
| 94 | 94 | |
|---|
| 95 | +const struct pnfs_layoutdriver_type *pnfs_find_layoutdriver(u32 id) |
|---|
| 96 | +{ |
|---|
| 97 | + return find_pnfs_driver(id); |
|---|
| 98 | +} |
|---|
| 99 | + |
|---|
| 100 | +void pnfs_put_layoutdriver(const struct pnfs_layoutdriver_type *ld) |
|---|
| 101 | +{ |
|---|
| 102 | + if (ld) |
|---|
| 103 | + module_put(ld->owner); |
|---|
| 104 | +} |
|---|
| 105 | + |
|---|
| 95 | 106 | void |
|---|
| 96 | 107 | unset_pnfs_layoutdriver(struct nfs_server *nfss) |
|---|
| 97 | 108 | { |
|---|
| .. | .. |
|---|
| 268 | 279 | struct nfs_server *server = NFS_SERVER(lo->plh_inode); |
|---|
| 269 | 280 | struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; |
|---|
| 270 | 281 | |
|---|
| 271 | | - if (!list_empty(&lo->plh_layouts)) { |
|---|
| 282 | + if (test_and_clear_bit(NFS_LAYOUT_HASHED, &lo->plh_flags)) { |
|---|
| 272 | 283 | struct nfs_client *clp = server->nfs_client; |
|---|
| 273 | 284 | |
|---|
| 274 | 285 | spin_lock(&clp->cl_lock); |
|---|
| 275 | | - list_del_init(&lo->plh_layouts); |
|---|
| 286 | + list_del_rcu(&lo->plh_layouts); |
|---|
| 276 | 287 | spin_unlock(&clp->cl_lock); |
|---|
| 277 | 288 | } |
|---|
| 278 | | - put_rpccred(lo->plh_lc_cred); |
|---|
| 289 | + put_cred(lo->plh_lc_cred); |
|---|
| 279 | 290 | return ld->free_layout_hdr(lo); |
|---|
| 280 | 291 | } |
|---|
| 281 | 292 | |
|---|
| .. | .. |
|---|
| 314 | 325 | } |
|---|
| 315 | 326 | } |
|---|
| 316 | 327 | |
|---|
| 328 | +static struct inode * |
|---|
| 329 | +pnfs_grab_inode_layout_hdr(struct pnfs_layout_hdr *lo) |
|---|
| 330 | +{ |
|---|
| 331 | + struct inode *inode = igrab(lo->plh_inode); |
|---|
| 332 | + if (inode) |
|---|
| 333 | + return inode; |
|---|
| 334 | + set_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags); |
|---|
| 335 | + return NULL; |
|---|
| 336 | +} |
|---|
| 337 | + |
|---|
| 338 | +/* |
|---|
| 339 | + * Compare 2 layout stateid sequence ids, to see which is newer, |
|---|
| 340 | + * taking into account wraparound issues. |
|---|
| 341 | + */ |
|---|
| 342 | +static bool pnfs_seqid_is_newer(u32 s1, u32 s2) |
|---|
| 343 | +{ |
|---|
| 344 | + return (s32)(s1 - s2) > 0; |
|---|
| 345 | +} |
|---|
| 346 | + |
|---|
| 347 | +static void pnfs_barrier_update(struct pnfs_layout_hdr *lo, u32 newseq) |
|---|
| 348 | +{ |
|---|
| 349 | + if (pnfs_seqid_is_newer(newseq, lo->plh_barrier) || !lo->plh_barrier) |
|---|
| 350 | + lo->plh_barrier = newseq; |
|---|
| 351 | +} |
|---|
| 352 | + |
|---|
| 317 | 353 | static void |
|---|
| 318 | 354 | pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode, |
|---|
| 319 | 355 | u32 seq) |
|---|
| .. | .. |
|---|
| 322 | 358 | iomode = IOMODE_ANY; |
|---|
| 323 | 359 | lo->plh_return_iomode = iomode; |
|---|
| 324 | 360 | set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); |
|---|
| 325 | | - if (seq != 0) { |
|---|
| 326 | | - WARN_ON_ONCE(lo->plh_return_seq != 0 && lo->plh_return_seq != seq); |
|---|
| 361 | + /* |
|---|
| 362 | + * We must set lo->plh_return_seq to avoid livelocks with |
|---|
| 363 | + * pnfs_layout_need_return() |
|---|
| 364 | + */ |
|---|
| 365 | + if (seq == 0) |
|---|
| 366 | + seq = be32_to_cpu(lo->plh_stateid.seqid); |
|---|
| 367 | + if (!lo->plh_return_seq || pnfs_seqid_is_newer(seq, lo->plh_return_seq)) |
|---|
| 327 | 368 | lo->plh_return_seq = seq; |
|---|
| 328 | | - } |
|---|
| 369 | + pnfs_barrier_update(lo, seq); |
|---|
| 329 | 370 | } |
|---|
| 330 | 371 | |
|---|
| 331 | 372 | static void |
|---|
| .. | .. |
|---|
| 364 | 405 | } |
|---|
| 365 | 406 | |
|---|
| 366 | 407 | /* |
|---|
| 367 | | - * Update the seqid of a layout stateid |
|---|
| 408 | + * Update the seqid of a layout stateid after receiving |
|---|
| 409 | + * NFS4ERR_OLD_STATEID |
|---|
| 368 | 410 | */ |
|---|
| 369 | | -bool nfs4_layoutreturn_refresh_stateid(nfs4_stateid *dst, |
|---|
| 411 | +bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst, |
|---|
| 370 | 412 | struct pnfs_layout_range *dst_range, |
|---|
| 371 | 413 | struct inode *inode) |
|---|
| 372 | 414 | { |
|---|
| .. | .. |
|---|
| 382 | 424 | |
|---|
| 383 | 425 | spin_lock(&inode->i_lock); |
|---|
| 384 | 426 | lo = NFS_I(inode)->layout; |
|---|
| 385 | | - if (lo && nfs4_stateid_match_other(dst, &lo->plh_stateid)) { |
|---|
| 427 | + if (lo && pnfs_layout_is_valid(lo) && |
|---|
| 428 | + nfs4_stateid_match_other(dst, &lo->plh_stateid)) { |
|---|
| 429 | + /* Is our call using the most recent seqid? If so, bump it */ |
|---|
| 430 | + if (!nfs4_stateid_is_newer(&lo->plh_stateid, dst)) { |
|---|
| 431 | + nfs4_stateid_seqid_inc(dst); |
|---|
| 432 | + ret = true; |
|---|
| 433 | + goto out; |
|---|
| 434 | + } |
|---|
| 435 | + /* Try to update the seqid to the most recent */ |
|---|
| 386 | 436 | err = pnfs_mark_matching_lsegs_return(lo, &head, &range, 0); |
|---|
| 387 | 437 | if (err != -EBUSY) { |
|---|
| 388 | 438 | dst->seqid = lo->plh_stateid.seqid; |
|---|
| .. | .. |
|---|
| 390 | 440 | ret = true; |
|---|
| 391 | 441 | } |
|---|
| 392 | 442 | } |
|---|
| 443 | +out: |
|---|
| 393 | 444 | spin_unlock(&inode->i_lock); |
|---|
| 394 | 445 | pnfs_free_lseg_list(&head); |
|---|
| 395 | 446 | return ret; |
|---|
| .. | .. |
|---|
| 418 | 469 | pnfs_clear_lseg_state(lseg, lseg_list); |
|---|
| 419 | 470 | pnfs_clear_layoutreturn_info(lo); |
|---|
| 420 | 471 | pnfs_free_returned_lsegs(lo, lseg_list, &range, 0); |
|---|
| 472 | + set_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags); |
|---|
| 421 | 473 | if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags) && |
|---|
| 422 | 474 | !test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) |
|---|
| 423 | 475 | pnfs_clear_layoutreturn_waitbit(lo); |
|---|
| .. | .. |
|---|
| 491 | 543 | { |
|---|
| 492 | 544 | INIT_LIST_HEAD(&lseg->pls_list); |
|---|
| 493 | 545 | INIT_LIST_HEAD(&lseg->pls_lc_list); |
|---|
| 546 | + INIT_LIST_HEAD(&lseg->pls_commits); |
|---|
| 494 | 547 | refcount_set(&lseg->pls_refcount, 1); |
|---|
| 495 | 548 | set_bit(NFS_LSEG_VALID, &lseg->pls_flags); |
|---|
| 496 | 549 | lseg->pls_layout = lo; |
|---|
| .. | .. |
|---|
| 616 | 669 | rv = 1; |
|---|
| 617 | 670 | } |
|---|
| 618 | 671 | return rv; |
|---|
| 619 | | -} |
|---|
| 620 | | - |
|---|
| 621 | | -/* |
|---|
| 622 | | - * Compare 2 layout stateid sequence ids, to see which is newer, |
|---|
| 623 | | - * taking into account wraparound issues. |
|---|
| 624 | | - */ |
|---|
| 625 | | -static bool pnfs_seqid_is_newer(u32 s1, u32 s2) |
|---|
| 626 | | -{ |
|---|
| 627 | | - return (s32)(s1 - s2) > 0; |
|---|
| 628 | 672 | } |
|---|
| 629 | 673 | |
|---|
| 630 | 674 | static bool |
|---|
| .. | .. |
|---|
| 801 | 845 | /* If the sb is being destroyed, just bail */ |
|---|
| 802 | 846 | if (!nfs_sb_active(server->super)) |
|---|
| 803 | 847 | break; |
|---|
| 804 | | - inode = igrab(lo->plh_inode); |
|---|
| 848 | + inode = pnfs_grab_inode_layout_hdr(lo); |
|---|
| 805 | 849 | if (inode != NULL) { |
|---|
| 806 | | - list_del_init(&lo->plh_layouts); |
|---|
| 850 | + if (test_and_clear_bit(NFS_LAYOUT_HASHED, &lo->plh_flags)) |
|---|
| 851 | + list_del_rcu(&lo->plh_layouts); |
|---|
| 807 | 852 | if (pnfs_layout_add_bulk_destroy_list(inode, |
|---|
| 808 | 853 | layout_list)) |
|---|
| 809 | 854 | continue; |
|---|
| .. | .. |
|---|
| 813 | 858 | } else { |
|---|
| 814 | 859 | rcu_read_unlock(); |
|---|
| 815 | 860 | spin_unlock(&clp->cl_lock); |
|---|
| 816 | | - set_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags); |
|---|
| 817 | 861 | } |
|---|
| 818 | 862 | nfs_sb_deactive(server->super); |
|---|
| 819 | 863 | spin_lock(&clp->cl_lock); |
|---|
| .. | .. |
|---|
| 910 | 954 | } |
|---|
| 911 | 955 | |
|---|
| 912 | 956 | /* |
|---|
| 913 | | - * Called by the state manger to remove all layouts established under an |
|---|
| 957 | + * Called by the state manager to remove all layouts established under an |
|---|
| 914 | 958 | * expired lease. |
|---|
| 915 | 959 | */ |
|---|
| 916 | 960 | void |
|---|
| .. | .. |
|---|
| 922 | 966 | pnfs_destroy_layouts_byclid(clp, false); |
|---|
| 923 | 967 | } |
|---|
| 924 | 968 | |
|---|
| 969 | +static void |
|---|
| 970 | +pnfs_set_layout_cred(struct pnfs_layout_hdr *lo, const struct cred *cred) |
|---|
| 971 | +{ |
|---|
| 972 | + const struct cred *old; |
|---|
| 973 | + |
|---|
| 974 | + if (cred && cred_fscmp(lo->plh_lc_cred, cred) != 0) { |
|---|
| 975 | + old = xchg(&lo->plh_lc_cred, get_cred(cred)); |
|---|
| 976 | + put_cred(old); |
|---|
| 977 | + } |
|---|
| 978 | +} |
|---|
| 979 | + |
|---|
| 925 | 980 | /* update lo->plh_stateid with new if is more recent */ |
|---|
| 926 | 981 | void |
|---|
| 927 | 982 | pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, |
|---|
| 928 | | - bool update_barrier) |
|---|
| 983 | + const struct cred *cred, bool update_barrier) |
|---|
| 929 | 984 | { |
|---|
| 930 | | - u32 oldseq, newseq, new_barrier = 0; |
|---|
| 931 | | - |
|---|
| 932 | | - oldseq = be32_to_cpu(lo->plh_stateid.seqid); |
|---|
| 933 | | - newseq = be32_to_cpu(new->seqid); |
|---|
| 985 | + u32 oldseq = be32_to_cpu(lo->plh_stateid.seqid); |
|---|
| 986 | + u32 newseq = be32_to_cpu(new->seqid); |
|---|
| 934 | 987 | |
|---|
| 935 | 988 | if (!pnfs_layout_is_valid(lo)) { |
|---|
| 989 | + pnfs_set_layout_cred(lo, cred); |
|---|
| 936 | 990 | nfs4_stateid_copy(&lo->plh_stateid, new); |
|---|
| 937 | 991 | lo->plh_barrier = newseq; |
|---|
| 938 | 992 | pnfs_clear_layoutreturn_info(lo); |
|---|
| 939 | 993 | clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); |
|---|
| 940 | 994 | return; |
|---|
| 941 | 995 | } |
|---|
| 942 | | - if (pnfs_seqid_is_newer(newseq, oldseq)) { |
|---|
| 996 | + |
|---|
| 997 | + if (pnfs_seqid_is_newer(newseq, oldseq)) |
|---|
| 943 | 998 | nfs4_stateid_copy(&lo->plh_stateid, new); |
|---|
| 944 | | - /* |
|---|
| 945 | | - * Because of wraparound, we want to keep the barrier |
|---|
| 946 | | - * "close" to the current seqids. |
|---|
| 947 | | - */ |
|---|
| 948 | | - new_barrier = newseq - atomic_read(&lo->plh_outstanding); |
|---|
| 949 | | - } |
|---|
| 950 | | - if (update_barrier) |
|---|
| 951 | | - new_barrier = be32_to_cpu(new->seqid); |
|---|
| 952 | | - else if (new_barrier == 0) |
|---|
| 999 | + |
|---|
| 1000 | + if (update_barrier) { |
|---|
| 1001 | + pnfs_barrier_update(lo, newseq); |
|---|
| 953 | 1002 | return; |
|---|
| 954 | | - if (pnfs_seqid_is_newer(new_barrier, lo->plh_barrier)) |
|---|
| 955 | | - lo->plh_barrier = new_barrier; |
|---|
| 1003 | + } |
|---|
| 1004 | + /* |
|---|
| 1005 | + * Because of wraparound, we want to keep the barrier |
|---|
| 1006 | + * "close" to the current seqids. We really only want to |
|---|
| 1007 | + * get here from a layoutget call. |
|---|
| 1008 | + */ |
|---|
| 1009 | + if (atomic_read(&lo->plh_outstanding) == 1) |
|---|
| 1010 | + pnfs_barrier_update(lo, be32_to_cpu(lo->plh_stateid.seqid)); |
|---|
| 956 | 1011 | } |
|---|
| 957 | 1012 | |
|---|
| 958 | 1013 | static bool |
|---|
| .. | .. |
|---|
| 961 | 1016 | { |
|---|
| 962 | 1017 | u32 seqid = be32_to_cpu(stateid->seqid); |
|---|
| 963 | 1018 | |
|---|
| 964 | | - return !pnfs_seqid_is_newer(seqid, lo->plh_barrier); |
|---|
| 1019 | + return lo->plh_barrier && pnfs_seqid_is_newer(lo->plh_barrier, seqid); |
|---|
| 965 | 1020 | } |
|---|
| 966 | 1021 | |
|---|
| 967 | 1022 | /* lget is set to 1 if called from inside send_layoutget call chain */ |
|---|
| .. | .. |
|---|
| 1007 | 1062 | struct page **pages; |
|---|
| 1008 | 1063 | int i; |
|---|
| 1009 | 1064 | |
|---|
| 1010 | | - pages = kcalloc(size, sizeof(struct page *), gfp_flags); |
|---|
| 1065 | + pages = kmalloc_array(size, sizeof(struct page *), gfp_flags); |
|---|
| 1011 | 1066 | if (!pages) { |
|---|
| 1012 | 1067 | dprintk("%s: can't alloc array of %zu pages\n", __func__, size); |
|---|
| 1013 | 1068 | return NULL; |
|---|
| .. | .. |
|---|
| 1017 | 1072 | pages[i] = alloc_page(gfp_flags); |
|---|
| 1018 | 1073 | if (!pages[i]) { |
|---|
| 1019 | 1074 | dprintk("%s: failed to allocate page\n", __func__); |
|---|
| 1020 | | - nfs4_free_pages(pages, size); |
|---|
| 1075 | + nfs4_free_pages(pages, i); |
|---|
| 1021 | 1076 | return NULL; |
|---|
| 1022 | 1077 | } |
|---|
| 1023 | 1078 | } |
|---|
| .. | .. |
|---|
| 1033 | 1088 | gfp_t gfp_flags) |
|---|
| 1034 | 1089 | { |
|---|
| 1035 | 1090 | struct nfs_server *server = pnfs_find_server(ino, ctx); |
|---|
| 1091 | + size_t max_reply_sz = server->pnfs_curr_ld->max_layoutget_response; |
|---|
| 1036 | 1092 | size_t max_pages = max_response_pages(server); |
|---|
| 1037 | 1093 | struct nfs4_layoutget *lgp; |
|---|
| 1038 | 1094 | |
|---|
| .. | .. |
|---|
| 1041 | 1097 | lgp = kzalloc(sizeof(*lgp), gfp_flags); |
|---|
| 1042 | 1098 | if (lgp == NULL) |
|---|
| 1043 | 1099 | return NULL; |
|---|
| 1100 | + |
|---|
| 1101 | + if (max_reply_sz) { |
|---|
| 1102 | + size_t npages = (max_reply_sz + PAGE_SIZE - 1) >> PAGE_SHIFT; |
|---|
| 1103 | + if (npages < max_pages) |
|---|
| 1104 | + max_pages = npages; |
|---|
| 1105 | + } |
|---|
| 1044 | 1106 | |
|---|
| 1045 | 1107 | lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); |
|---|
| 1046 | 1108 | if (!lgp->args.layout.pages) { |
|---|
| .. | .. |
|---|
| 1073 | 1135 | lgp->args.ctx = get_nfs_open_context(ctx); |
|---|
| 1074 | 1136 | nfs4_stateid_copy(&lgp->args.stateid, stateid); |
|---|
| 1075 | 1137 | lgp->gfp_flags = gfp_flags; |
|---|
| 1076 | | - lgp->cred = get_rpccred(ctx->cred); |
|---|
| 1138 | + lgp->cred = ctx->cred; |
|---|
| 1077 | 1139 | return lgp; |
|---|
| 1078 | 1140 | } |
|---|
| 1079 | 1141 | |
|---|
| .. | .. |
|---|
| 1084 | 1146 | nfs4_free_pages(lgp->args.layout.pages, max_pages); |
|---|
| 1085 | 1147 | if (lgp->args.inode) |
|---|
| 1086 | 1148 | pnfs_put_layout_hdr(NFS_I(lgp->args.inode)->layout); |
|---|
| 1087 | | - put_rpccred(lgp->cred); |
|---|
| 1088 | 1149 | put_nfs_open_context(lgp->args.ctx); |
|---|
| 1089 | 1150 | kfree(lgp); |
|---|
| 1090 | 1151 | } |
|---|
| .. | .. |
|---|
| 1121 | 1182 | |
|---|
| 1122 | 1183 | pnfs_mark_matching_lsegs_invalid(lo, &freeme, range, seq); |
|---|
| 1123 | 1184 | pnfs_free_returned_lsegs(lo, &freeme, range, seq); |
|---|
| 1124 | | - pnfs_set_layout_stateid(lo, stateid, true); |
|---|
| 1185 | + pnfs_set_layout_stateid(lo, stateid, NULL, true); |
|---|
| 1125 | 1186 | } else |
|---|
| 1126 | 1187 | pnfs_mark_layout_stateid_invalid(lo, &freeme); |
|---|
| 1127 | 1188 | out_unlock: |
|---|
| .. | .. |
|---|
| 1134 | 1195 | static bool |
|---|
| 1135 | 1196 | pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo, |
|---|
| 1136 | 1197 | nfs4_stateid *stateid, |
|---|
| 1198 | + const struct cred **cred, |
|---|
| 1137 | 1199 | enum pnfs_iomode *iomode) |
|---|
| 1138 | 1200 | { |
|---|
| 1139 | 1201 | /* Serialise LAYOUTGET/LAYOUTRETURN */ |
|---|
| .. | .. |
|---|
| 1143 | 1205 | return false; |
|---|
| 1144 | 1206 | set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); |
|---|
| 1145 | 1207 | pnfs_get_layout_hdr(lo); |
|---|
| 1208 | + nfs4_stateid_copy(stateid, &lo->plh_stateid); |
|---|
| 1209 | + *cred = get_cred(lo->plh_lc_cred); |
|---|
| 1146 | 1210 | if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) { |
|---|
| 1147 | | - if (stateid != NULL) { |
|---|
| 1148 | | - nfs4_stateid_copy(stateid, &lo->plh_stateid); |
|---|
| 1149 | | - if (lo->plh_return_seq != 0) |
|---|
| 1150 | | - stateid->seqid = cpu_to_be32(lo->plh_return_seq); |
|---|
| 1151 | | - } |
|---|
| 1211 | + if (lo->plh_return_seq != 0) |
|---|
| 1212 | + stateid->seqid = cpu_to_be32(lo->plh_return_seq); |
|---|
| 1152 | 1213 | if (iomode != NULL) |
|---|
| 1153 | 1214 | *iomode = lo->plh_return_iomode; |
|---|
| 1154 | 1215 | pnfs_clear_layoutreturn_info(lo); |
|---|
| 1155 | | - return true; |
|---|
| 1156 | | - } |
|---|
| 1157 | | - if (stateid != NULL) |
|---|
| 1158 | | - nfs4_stateid_copy(stateid, &lo->plh_stateid); |
|---|
| 1159 | | - if (iomode != NULL) |
|---|
| 1216 | + } else if (iomode != NULL) |
|---|
| 1160 | 1217 | *iomode = IOMODE_ANY; |
|---|
| 1218 | + pnfs_barrier_update(lo, be32_to_cpu(stateid->seqid)); |
|---|
| 1161 | 1219 | return true; |
|---|
| 1162 | 1220 | } |
|---|
| 1163 | 1221 | |
|---|
| .. | .. |
|---|
| 1179 | 1237 | } |
|---|
| 1180 | 1238 | |
|---|
| 1181 | 1239 | static int |
|---|
| 1182 | | -pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid, |
|---|
| 1183 | | - enum pnfs_iomode iomode, bool sync) |
|---|
| 1240 | +pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, |
|---|
| 1241 | + const nfs4_stateid *stateid, |
|---|
| 1242 | + const struct cred **pcred, |
|---|
| 1243 | + enum pnfs_iomode iomode, |
|---|
| 1244 | + bool sync) |
|---|
| 1184 | 1245 | { |
|---|
| 1185 | 1246 | struct inode *ino = lo->plh_inode; |
|---|
| 1186 | 1247 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; |
|---|
| 1187 | 1248 | struct nfs4_layoutreturn *lrp; |
|---|
| 1249 | + const struct cred *cred = *pcred; |
|---|
| 1188 | 1250 | int status = 0; |
|---|
| 1189 | 1251 | |
|---|
| 1252 | + *pcred = NULL; |
|---|
| 1190 | 1253 | lrp = kzalloc(sizeof(*lrp), GFP_NOFS); |
|---|
| 1191 | 1254 | if (unlikely(lrp == NULL)) { |
|---|
| 1192 | 1255 | status = -ENOMEM; |
|---|
| 1193 | 1256 | spin_lock(&ino->i_lock); |
|---|
| 1194 | 1257 | pnfs_clear_layoutreturn_waitbit(lo); |
|---|
| 1195 | 1258 | spin_unlock(&ino->i_lock); |
|---|
| 1259 | + put_cred(cred); |
|---|
| 1196 | 1260 | pnfs_put_layout_hdr(lo); |
|---|
| 1197 | 1261 | goto out; |
|---|
| 1198 | 1262 | } |
|---|
| .. | .. |
|---|
| 1200 | 1264 | pnfs_init_layoutreturn_args(&lrp->args, lo, stateid, iomode); |
|---|
| 1201 | 1265 | lrp->args.ld_private = &lrp->ld_private; |
|---|
| 1202 | 1266 | lrp->clp = NFS_SERVER(ino)->nfs_client; |
|---|
| 1203 | | - lrp->cred = lo->plh_lc_cred; |
|---|
| 1267 | + lrp->cred = cred; |
|---|
| 1204 | 1268 | if (ld->prepare_layoutreturn) |
|---|
| 1205 | 1269 | ld->prepare_layoutreturn(&lrp->args); |
|---|
| 1206 | 1270 | |
|---|
| .. | .. |
|---|
| 1241 | 1305 | return; |
|---|
| 1242 | 1306 | spin_lock(&inode->i_lock); |
|---|
| 1243 | 1307 | if (pnfs_layout_need_return(lo)) { |
|---|
| 1308 | + const struct cred *cred; |
|---|
| 1244 | 1309 | nfs4_stateid stateid; |
|---|
| 1245 | 1310 | enum pnfs_iomode iomode; |
|---|
| 1246 | 1311 | bool send; |
|---|
| 1247 | 1312 | |
|---|
| 1248 | | - send = pnfs_prepare_layoutreturn(lo, &stateid, &iomode); |
|---|
| 1313 | + send = pnfs_prepare_layoutreturn(lo, &stateid, &cred, &iomode); |
|---|
| 1249 | 1314 | spin_unlock(&inode->i_lock); |
|---|
| 1250 | 1315 | if (send) { |
|---|
| 1251 | 1316 | /* Send an async layoutreturn so we dont deadlock */ |
|---|
| 1252 | | - pnfs_send_layoutreturn(lo, &stateid, iomode, false); |
|---|
| 1317 | + pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false); |
|---|
| 1253 | 1318 | } |
|---|
| 1254 | 1319 | } else |
|---|
| 1255 | 1320 | spin_unlock(&inode->i_lock); |
|---|
| .. | .. |
|---|
| 1274 | 1339 | .length = NFS4_MAX_UINT64, |
|---|
| 1275 | 1340 | }; |
|---|
| 1276 | 1341 | LIST_HEAD(tmp_list); |
|---|
| 1342 | + const struct cred *cred; |
|---|
| 1277 | 1343 | nfs4_stateid stateid; |
|---|
| 1278 | 1344 | int status = 0; |
|---|
| 1279 | 1345 | bool send, valid_layout; |
|---|
| .. | .. |
|---|
| 1309 | 1375 | !valid_layout) { |
|---|
| 1310 | 1376 | spin_unlock(&ino->i_lock); |
|---|
| 1311 | 1377 | dprintk("NFS: %s no layout segments to return\n", __func__); |
|---|
| 1312 | | - goto out_put_layout_hdr; |
|---|
| 1378 | + goto out_wait_layoutreturn; |
|---|
| 1313 | 1379 | } |
|---|
| 1314 | 1380 | |
|---|
| 1315 | | - send = pnfs_prepare_layoutreturn(lo, &stateid, NULL); |
|---|
| 1381 | + send = pnfs_prepare_layoutreturn(lo, &stateid, &cred, NULL); |
|---|
| 1316 | 1382 | spin_unlock(&ino->i_lock); |
|---|
| 1317 | 1383 | if (send) |
|---|
| 1318 | | - status = pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true); |
|---|
| 1384 | + status = pnfs_send_layoutreturn(lo, &stateid, &cred, IOMODE_ANY, true); |
|---|
| 1385 | +out_wait_layoutreturn: |
|---|
| 1386 | + wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, TASK_UNINTERRUPTIBLE); |
|---|
| 1319 | 1387 | out_put_layout_hdr: |
|---|
| 1320 | 1388 | pnfs_free_lseg_list(&tmp_list); |
|---|
| 1321 | 1389 | pnfs_put_layout_hdr(lo); |
|---|
| .. | .. |
|---|
| 1354 | 1422 | bool pnfs_roc(struct inode *ino, |
|---|
| 1355 | 1423 | struct nfs4_layoutreturn_args *args, |
|---|
| 1356 | 1424 | struct nfs4_layoutreturn_res *res, |
|---|
| 1357 | | - const struct rpc_cred *cred) |
|---|
| 1425 | + const struct cred *cred) |
|---|
| 1358 | 1426 | { |
|---|
| 1359 | 1427 | struct nfs_inode *nfsi = NFS_I(ino); |
|---|
| 1360 | 1428 | struct nfs_open_context *ctx; |
|---|
| 1361 | 1429 | struct nfs4_state *state; |
|---|
| 1362 | 1430 | struct pnfs_layout_hdr *lo; |
|---|
| 1363 | 1431 | struct pnfs_layout_segment *lseg, *next; |
|---|
| 1432 | + const struct cred *lc_cred; |
|---|
| 1364 | 1433 | nfs4_stateid stateid; |
|---|
| 1365 | 1434 | enum pnfs_iomode iomode = 0; |
|---|
| 1366 | 1435 | bool layoutreturn = false, roc = false; |
|---|
| .. | .. |
|---|
| 1369 | 1438 | if (!nfs_have_layout(ino)) |
|---|
| 1370 | 1439 | return false; |
|---|
| 1371 | 1440 | retry: |
|---|
| 1441 | + rcu_read_lock(); |
|---|
| 1372 | 1442 | spin_lock(&ino->i_lock); |
|---|
| 1373 | 1443 | lo = nfsi->layout; |
|---|
| 1374 | 1444 | if (!lo || !pnfs_layout_is_valid(lo) || |
|---|
| .. | .. |
|---|
| 1379 | 1449 | pnfs_get_layout_hdr(lo); |
|---|
| 1380 | 1450 | if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) { |
|---|
| 1381 | 1451 | spin_unlock(&ino->i_lock); |
|---|
| 1452 | + rcu_read_unlock(); |
|---|
| 1382 | 1453 | wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, |
|---|
| 1383 | 1454 | TASK_UNINTERRUPTIBLE); |
|---|
| 1384 | 1455 | pnfs_put_layout_hdr(lo); |
|---|
| .. | .. |
|---|
| 1392 | 1463 | skip_read = true; |
|---|
| 1393 | 1464 | } |
|---|
| 1394 | 1465 | |
|---|
| 1395 | | - list_for_each_entry(ctx, &nfsi->open_files, list) { |
|---|
| 1466 | + list_for_each_entry_rcu(ctx, &nfsi->open_files, list) { |
|---|
| 1396 | 1467 | state = ctx->state; |
|---|
| 1397 | 1468 | if (state == NULL) |
|---|
| 1398 | 1469 | continue; |
|---|
| .. | .. |
|---|
| 1428 | 1499 | * 2. we don't send layoutreturn |
|---|
| 1429 | 1500 | */ |
|---|
| 1430 | 1501 | /* lo ref dropped in pnfs_roc_release() */ |
|---|
| 1431 | | - layoutreturn = pnfs_prepare_layoutreturn(lo, &stateid, &iomode); |
|---|
| 1502 | + layoutreturn = pnfs_prepare_layoutreturn(lo, &stateid, &lc_cred, &iomode); |
|---|
| 1432 | 1503 | /* If the creds don't match, we can't compound the layoutreturn */ |
|---|
| 1433 | | - if (!layoutreturn || cred != lo->plh_lc_cred) |
|---|
| 1504 | + if (!layoutreturn || cred_fscmp(cred, lc_cred) != 0) |
|---|
| 1434 | 1505 | goto out_noroc; |
|---|
| 1435 | 1506 | |
|---|
| 1436 | 1507 | roc = layoutreturn; |
|---|
| 1437 | 1508 | pnfs_init_layoutreturn_args(args, lo, &stateid, iomode); |
|---|
| 1438 | 1509 | res->lrs_present = 0; |
|---|
| 1439 | 1510 | layoutreturn = false; |
|---|
| 1511 | + put_cred(lc_cred); |
|---|
| 1440 | 1512 | |
|---|
| 1441 | 1513 | out_noroc: |
|---|
| 1442 | 1514 | spin_unlock(&ino->i_lock); |
|---|
| 1515 | + rcu_read_unlock(); |
|---|
| 1443 | 1516 | pnfs_layoutcommit_inode(ino, true); |
|---|
| 1444 | 1517 | if (roc) { |
|---|
| 1445 | 1518 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; |
|---|
| .. | .. |
|---|
| 1449 | 1522 | return true; |
|---|
| 1450 | 1523 | } |
|---|
| 1451 | 1524 | if (layoutreturn) |
|---|
| 1452 | | - pnfs_send_layoutreturn(lo, &stateid, iomode, true); |
|---|
| 1525 | + pnfs_send_layoutreturn(lo, &stateid, &lc_cred, iomode, true); |
|---|
| 1453 | 1526 | pnfs_put_layout_hdr(lo); |
|---|
| 1454 | 1527 | return false; |
|---|
| 1528 | +} |
|---|
| 1529 | + |
|---|
| 1530 | +int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp, |
|---|
| 1531 | + struct nfs4_layoutreturn_res **respp, int *ret) |
|---|
| 1532 | +{ |
|---|
| 1533 | + struct nfs4_layoutreturn_args *arg = *argpp; |
|---|
| 1534 | + int retval = -EAGAIN; |
|---|
| 1535 | + |
|---|
| 1536 | + if (!arg) |
|---|
| 1537 | + return 0; |
|---|
| 1538 | + /* Handle Layoutreturn errors */ |
|---|
| 1539 | + switch (*ret) { |
|---|
| 1540 | + case 0: |
|---|
| 1541 | + retval = 0; |
|---|
| 1542 | + break; |
|---|
| 1543 | + case -NFS4ERR_NOMATCHING_LAYOUT: |
|---|
| 1544 | + /* Was there an RPC level error? If not, retry */ |
|---|
| 1545 | + if (task->tk_rpc_status == 0) |
|---|
| 1546 | + break; |
|---|
| 1547 | + /* If the call was not sent, let caller handle it */ |
|---|
| 1548 | + if (!RPC_WAS_SENT(task)) |
|---|
| 1549 | + return 0; |
|---|
| 1550 | + /* |
|---|
| 1551 | + * Otherwise, assume the call succeeded and |
|---|
| 1552 | + * that we need to release the layout |
|---|
| 1553 | + */ |
|---|
| 1554 | + *ret = 0; |
|---|
| 1555 | + (*respp)->lrs_present = 0; |
|---|
| 1556 | + retval = 0; |
|---|
| 1557 | + break; |
|---|
| 1558 | + case -NFS4ERR_DELAY: |
|---|
| 1559 | + /* Let the caller handle the retry */ |
|---|
| 1560 | + *ret = -NFS4ERR_NOMATCHING_LAYOUT; |
|---|
| 1561 | + return 0; |
|---|
| 1562 | + case -NFS4ERR_OLD_STATEID: |
|---|
| 1563 | + if (!nfs4_layout_refresh_old_stateid(&arg->stateid, |
|---|
| 1564 | + &arg->range, arg->inode)) |
|---|
| 1565 | + break; |
|---|
| 1566 | + *ret = -NFS4ERR_NOMATCHING_LAYOUT; |
|---|
| 1567 | + return -EAGAIN; |
|---|
| 1568 | + } |
|---|
| 1569 | + *argpp = NULL; |
|---|
| 1570 | + *respp = NULL; |
|---|
| 1571 | + return retval; |
|---|
| 1455 | 1572 | } |
|---|
| 1456 | 1573 | |
|---|
| 1457 | 1574 | void pnfs_roc_release(struct nfs4_layoutreturn_args *args, |
|---|
| .. | .. |
|---|
| 1475 | 1592 | case 0: |
|---|
| 1476 | 1593 | if (res->lrs_present) |
|---|
| 1477 | 1594 | res_stateid = &res->stateid; |
|---|
| 1478 | | - /* Fallthrough */ |
|---|
| 1595 | + fallthrough; |
|---|
| 1479 | 1596 | default: |
|---|
| 1480 | 1597 | arg_stateid = &args->stateid; |
|---|
| 1481 | 1598 | } |
|---|
| 1599 | + trace_nfs4_layoutreturn_on_close(args->inode, &args->stateid, ret); |
|---|
| 1482 | 1600 | pnfs_layoutreturn_free_lsegs(lo, arg_stateid, &args->range, |
|---|
| 1483 | 1601 | res_stateid); |
|---|
| 1484 | 1602 | if (ld_private && ld_private->ops && ld_private->ops->free) |
|---|
| 1485 | 1603 | ld_private->ops->free(ld_private); |
|---|
| 1486 | 1604 | pnfs_put_layout_hdr(lo); |
|---|
| 1487 | | - trace_nfs4_layoutreturn_on_close(args->inode, 0); |
|---|
| 1488 | 1605 | } |
|---|
| 1489 | 1606 | |
|---|
| 1490 | 1607 | bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task) |
|---|
| .. | .. |
|---|
| 1621 | 1738 | INIT_LIST_HEAD(&lo->plh_return_segs); |
|---|
| 1622 | 1739 | INIT_LIST_HEAD(&lo->plh_bulk_destroy); |
|---|
| 1623 | 1740 | lo->plh_inode = ino; |
|---|
| 1624 | | - lo->plh_lc_cred = get_rpccred(ctx->cred); |
|---|
| 1741 | + lo->plh_lc_cred = get_cred(ctx->cred); |
|---|
| 1625 | 1742 | lo->plh_flags |= 1 << NFS_LAYOUT_INVALID_STID; |
|---|
| 1626 | 1743 | return lo; |
|---|
| 1627 | 1744 | } |
|---|
| .. | .. |
|---|
| 1807 | 1924 | |
|---|
| 1808 | 1925 | static void nfs_layoutget_end(struct pnfs_layout_hdr *lo) |
|---|
| 1809 | 1926 | { |
|---|
| 1810 | | - if (atomic_dec_and_test(&lo->plh_outstanding)) |
|---|
| 1811 | | - wake_up_var(&lo->plh_outstanding); |
|---|
| 1927 | + if (atomic_dec_and_test(&lo->plh_outstanding) && |
|---|
| 1928 | + test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags)) |
|---|
| 1929 | + wake_up_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN); |
|---|
| 1930 | +} |
|---|
| 1931 | + |
|---|
| 1932 | +static bool pnfs_is_first_layoutget(struct pnfs_layout_hdr *lo) |
|---|
| 1933 | +{ |
|---|
| 1934 | + return test_bit(NFS_LAYOUT_FIRST_LAYOUTGET, &lo->plh_flags); |
|---|
| 1812 | 1935 | } |
|---|
| 1813 | 1936 | |
|---|
| 1814 | 1937 | static void pnfs_clear_first_layoutget(struct pnfs_layout_hdr *lo) |
|---|
| .. | .. |
|---|
| 1823 | 1946 | static void _add_to_server_list(struct pnfs_layout_hdr *lo, |
|---|
| 1824 | 1947 | struct nfs_server *server) |
|---|
| 1825 | 1948 | { |
|---|
| 1826 | | - if (list_empty(&lo->plh_layouts)) { |
|---|
| 1949 | + if (!test_and_set_bit(NFS_LAYOUT_HASHED, &lo->plh_flags)) { |
|---|
| 1827 | 1950 | struct nfs_client *clp = server->nfs_client; |
|---|
| 1828 | 1951 | |
|---|
| 1829 | 1952 | /* The lo must be on the clp list if there is any |
|---|
| 1830 | 1953 | * chance of a CB_LAYOUTRECALL(FILE) coming in. |
|---|
| 1831 | 1954 | */ |
|---|
| 1832 | 1955 | spin_lock(&clp->cl_lock); |
|---|
| 1833 | | - if (list_empty(&lo->plh_layouts)) |
|---|
| 1834 | | - list_add_tail(&lo->plh_layouts, &server->layouts); |
|---|
| 1956 | + list_add_tail_rcu(&lo->plh_layouts, &server->layouts); |
|---|
| 1835 | 1957 | spin_unlock(&clp->cl_lock); |
|---|
| 1836 | 1958 | } |
|---|
| 1837 | 1959 | } |
|---|
| .. | .. |
|---|
| 1886 | 2008 | lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); |
|---|
| 1887 | 2009 | if (lo == NULL) { |
|---|
| 1888 | 2010 | spin_unlock(&ino->i_lock); |
|---|
| 2011 | + lseg = ERR_PTR(-ENOMEM); |
|---|
| 1889 | 2012 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, |
|---|
| 1890 | 2013 | PNFS_UPDATE_LAYOUT_NOMEM); |
|---|
| 1891 | 2014 | goto out; |
|---|
| .. | .. |
|---|
| 1910 | 2033 | * If the layout segment list is empty, but there are outstanding |
|---|
| 1911 | 2034 | * layoutget calls, then they might be subject to a layoutrecall. |
|---|
| 1912 | 2035 | */ |
|---|
| 1913 | | - if (list_empty(&lo->plh_segs) && |
|---|
| 2036 | + if (test_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags) && |
|---|
| 1914 | 2037 | atomic_read(&lo->plh_outstanding) != 0) { |
|---|
| 1915 | 2038 | spin_unlock(&ino->i_lock); |
|---|
| 1916 | | - lseg = ERR_PTR(wait_var_event_killable(&lo->plh_outstanding, |
|---|
| 1917 | | - !atomic_read(&lo->plh_outstanding))); |
|---|
| 2039 | + lseg = ERR_PTR(wait_on_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN, |
|---|
| 2040 | + TASK_KILLABLE)); |
|---|
| 1918 | 2041 | if (IS_ERR(lseg)) |
|---|
| 1919 | 2042 | goto out_put_layout_hdr; |
|---|
| 1920 | 2043 | pnfs_put_layout_hdr(lo); |
|---|
| 1921 | 2044 | goto lookup_again; |
|---|
| 2045 | + } |
|---|
| 2046 | + |
|---|
| 2047 | + /* |
|---|
| 2048 | + * Because we free lsegs when sending LAYOUTRETURN, we need to wait |
|---|
| 2049 | + * for LAYOUTRETURN. |
|---|
| 2050 | + */ |
|---|
| 2051 | + if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { |
|---|
| 2052 | + spin_unlock(&ino->i_lock); |
|---|
| 2053 | + dprintk("%s wait for layoutreturn\n", __func__); |
|---|
| 2054 | + lseg = ERR_PTR(pnfs_prepare_to_retry_layoutget(lo)); |
|---|
| 2055 | + if (!IS_ERR(lseg)) { |
|---|
| 2056 | + pnfs_put_layout_hdr(lo); |
|---|
| 2057 | + dprintk("%s retrying\n", __func__); |
|---|
| 2058 | + trace_pnfs_update_layout(ino, pos, count, iomode, lo, |
|---|
| 2059 | + lseg, |
|---|
| 2060 | + PNFS_UPDATE_LAYOUT_RETRY); |
|---|
| 2061 | + goto lookup_again; |
|---|
| 2062 | + } |
|---|
| 2063 | + trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, |
|---|
| 2064 | + PNFS_UPDATE_LAYOUT_RETURN); |
|---|
| 2065 | + goto out_put_layout_hdr; |
|---|
| 1922 | 2066 | } |
|---|
| 1923 | 2067 | |
|---|
| 1924 | 2068 | lseg = pnfs_find_lseg(lo, &arg, strict_iomode); |
|---|
| .. | .. |
|---|
| 1928 | 2072 | goto out_unlock; |
|---|
| 1929 | 2073 | } |
|---|
| 1930 | 2074 | |
|---|
| 1931 | | - if (!nfs4_valid_open_stateid(ctx->state)) { |
|---|
| 1932 | | - trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, |
|---|
| 1933 | | - PNFS_UPDATE_LAYOUT_INVALID_OPEN); |
|---|
| 1934 | | - goto out_unlock; |
|---|
| 1935 | | - } |
|---|
| 1936 | | - |
|---|
| 1937 | 2075 | /* |
|---|
| 1938 | 2076 | * Choose a stateid for the LAYOUTGET. If we don't have a layout |
|---|
| 1939 | 2077 | * stateid, or it has been invalidated, then we must use the open |
|---|
| 1940 | 2078 | * stateid. |
|---|
| 1941 | 2079 | */ |
|---|
| 1942 | 2080 | if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) { |
|---|
| 2081 | + int status; |
|---|
| 1943 | 2082 | |
|---|
| 1944 | 2083 | /* |
|---|
| 1945 | 2084 | * The first layoutget for the file. Need to serialize per |
|---|
| .. | .. |
|---|
| 1958 | 2097 | goto lookup_again; |
|---|
| 1959 | 2098 | } |
|---|
| 1960 | 2099 | |
|---|
| 2100 | + spin_unlock(&ino->i_lock); |
|---|
| 1961 | 2101 | first = true; |
|---|
| 1962 | | - if (nfs4_select_rw_stateid(ctx->state, |
|---|
| 2102 | + status = nfs4_select_rw_stateid(ctx->state, |
|---|
| 1963 | 2103 | iomode == IOMODE_RW ? FMODE_WRITE : FMODE_READ, |
|---|
| 1964 | | - NULL, &stateid, NULL) != 0) { |
|---|
| 2104 | + NULL, &stateid, NULL); |
|---|
| 2105 | + if (status != 0) { |
|---|
| 2106 | + lseg = ERR_PTR(status); |
|---|
| 1965 | 2107 | trace_pnfs_update_layout(ino, pos, count, |
|---|
| 1966 | 2108 | iomode, lo, lseg, |
|---|
| 1967 | 2109 | PNFS_UPDATE_LAYOUT_INVALID_OPEN); |
|---|
| 1968 | | - goto out_unlock; |
|---|
| 1969 | | - } |
|---|
| 1970 | | - } else { |
|---|
| 1971 | | - nfs4_stateid_copy(&stateid, &lo->plh_stateid); |
|---|
| 1972 | | - } |
|---|
| 1973 | | - |
|---|
| 1974 | | - /* |
|---|
| 1975 | | - * Because we free lsegs before sending LAYOUTRETURN, we need to wait |
|---|
| 1976 | | - * for LAYOUTRETURN even if first is true. |
|---|
| 1977 | | - */ |
|---|
| 1978 | | - if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { |
|---|
| 1979 | | - spin_unlock(&ino->i_lock); |
|---|
| 1980 | | - dprintk("%s wait for layoutreturn\n", __func__); |
|---|
| 1981 | | - lseg = ERR_PTR(pnfs_prepare_to_retry_layoutget(lo)); |
|---|
| 1982 | | - if (!IS_ERR(lseg)) { |
|---|
| 1983 | | - if (first) |
|---|
| 1984 | | - pnfs_clear_first_layoutget(lo); |
|---|
| 2110 | + nfs4_schedule_stateid_recovery(server, ctx->state); |
|---|
| 2111 | + pnfs_clear_first_layoutget(lo); |
|---|
| 1985 | 2112 | pnfs_put_layout_hdr(lo); |
|---|
| 1986 | | - dprintk("%s retrying\n", __func__); |
|---|
| 1987 | | - trace_pnfs_update_layout(ino, pos, count, iomode, lo, |
|---|
| 1988 | | - lseg, PNFS_UPDATE_LAYOUT_RETRY); |
|---|
| 1989 | 2113 | goto lookup_again; |
|---|
| 1990 | 2114 | } |
|---|
| 1991 | | - trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, |
|---|
| 1992 | | - PNFS_UPDATE_LAYOUT_RETURN); |
|---|
| 1993 | | - goto out_put_layout_hdr; |
|---|
| 2115 | + spin_lock(&ino->i_lock); |
|---|
| 2116 | + } else { |
|---|
| 2117 | + nfs4_stateid_copy(&stateid, &lo->plh_stateid); |
|---|
| 1994 | 2118 | } |
|---|
| 1995 | 2119 | |
|---|
| 1996 | 2120 | if (pnfs_layoutgets_blocked(lo)) { |
|---|
| .. | .. |
|---|
| 2013 | 2137 | |
|---|
| 2014 | 2138 | lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &stateid, &arg, gfp_flags); |
|---|
| 2015 | 2139 | if (!lgp) { |
|---|
| 2140 | + lseg = ERR_PTR(-ENOMEM); |
|---|
| 2016 | 2141 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, NULL, |
|---|
| 2017 | 2142 | PNFS_UPDATE_LAYOUT_NOMEM); |
|---|
| 2018 | 2143 | nfs_layoutget_end(lo); |
|---|
| .. | .. |
|---|
| 2032 | 2157 | case -ERECALLCONFLICT: |
|---|
| 2033 | 2158 | case -EAGAIN: |
|---|
| 2034 | 2159 | break; |
|---|
| 2160 | + case -ENODATA: |
|---|
| 2161 | + /* The server returned NFS4ERR_LAYOUTUNAVAILABLE */ |
|---|
| 2162 | + pnfs_layout_set_fail_bit( |
|---|
| 2163 | + lo, pnfs_iomode_to_fail_bit(iomode)); |
|---|
| 2164 | + lseg = NULL; |
|---|
| 2165 | + goto out_put_layout_hdr; |
|---|
| 2035 | 2166 | default: |
|---|
| 2036 | 2167 | if (!nfs_error_is_fatal(PTR_ERR(lseg))) { |
|---|
| 2037 | 2168 | pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); |
|---|
| .. | .. |
|---|
| 2054 | 2185 | out_put_layout_hdr: |
|---|
| 2055 | 2186 | if (first) |
|---|
| 2056 | 2187 | pnfs_clear_first_layoutget(lo); |
|---|
| 2188 | + trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, |
|---|
| 2189 | + PNFS_UPDATE_LAYOUT_EXIT); |
|---|
| 2057 | 2190 | pnfs_put_layout_hdr(lo); |
|---|
| 2058 | 2191 | out: |
|---|
| 2059 | 2192 | dprintk("%s: inode %s/%llu pNFS layout segment %s for " |
|---|
| .. | .. |
|---|
| 2118 | 2251 | pnfs_put_layout_hdr(lo); |
|---|
| 2119 | 2252 | return NULL; |
|---|
| 2120 | 2253 | } |
|---|
| 2121 | | - |
|---|
| 2122 | | -extern const nfs4_stateid current_stateid; |
|---|
| 2123 | 2254 | |
|---|
| 2124 | 2255 | static void _lgopen_prepare_attached(struct nfs4_opendata *data, |
|---|
| 2125 | 2256 | struct nfs_open_context *ctx) |
|---|
| .. | .. |
|---|
| 2285 | 2416 | goto out_forget; |
|---|
| 2286 | 2417 | } |
|---|
| 2287 | 2418 | |
|---|
| 2288 | | - if (!pnfs_layout_is_valid(lo)) { |
|---|
| 2289 | | - /* We have a completely new layout */ |
|---|
| 2290 | | - pnfs_set_layout_stateid(lo, &res->stateid, true); |
|---|
| 2291 | | - } else if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) { |
|---|
| 2419 | + if (test_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags) && |
|---|
| 2420 | + !pnfs_is_first_layoutget(lo)) |
|---|
| 2421 | + goto out_forget; |
|---|
| 2422 | + |
|---|
| 2423 | + if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) { |
|---|
| 2292 | 2424 | /* existing state ID, make sure the sequence number matches. */ |
|---|
| 2293 | 2425 | if (pnfs_layout_stateid_blocked(lo, &res->stateid)) { |
|---|
| 2426 | + if (!pnfs_layout_is_valid(lo)) |
|---|
| 2427 | + lo->plh_barrier = 0; |
|---|
| 2294 | 2428 | dprintk("%s forget reply due to sequence\n", __func__); |
|---|
| 2295 | 2429 | goto out_forget; |
|---|
| 2296 | 2430 | } |
|---|
| 2297 | | - pnfs_set_layout_stateid(lo, &res->stateid, false); |
|---|
| 2298 | | - } else { |
|---|
| 2431 | + pnfs_set_layout_stateid(lo, &res->stateid, lgp->cred, false); |
|---|
| 2432 | + } else if (pnfs_layout_is_valid(lo)) { |
|---|
| 2299 | 2433 | /* |
|---|
| 2300 | 2434 | * We got an entirely new state ID. Mark all segments for the |
|---|
| 2301 | 2435 | * inode invalid, and retry the layoutget |
|---|
| .. | .. |
|---|
| 2308 | 2442 | pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, |
|---|
| 2309 | 2443 | &range, 0); |
|---|
| 2310 | 2444 | goto out_forget; |
|---|
| 2445 | + } else { |
|---|
| 2446 | + /* We have a completely new layout */ |
|---|
| 2447 | + pnfs_set_layout_stateid(lo, &res->stateid, lgp->cred, true); |
|---|
| 2311 | 2448 | } |
|---|
| 2312 | 2449 | |
|---|
| 2313 | 2450 | pnfs_get_lseg(lseg); |
|---|
| .. | .. |
|---|
| 2389 | 2526 | return -ENOENT; |
|---|
| 2390 | 2527 | } |
|---|
| 2391 | 2528 | |
|---|
| 2392 | | -void pnfs_error_mark_layout_for_return(struct inode *inode, |
|---|
| 2393 | | - struct pnfs_layout_segment *lseg) |
|---|
| 2529 | +static void |
|---|
| 2530 | +pnfs_mark_layout_for_return(struct inode *inode, |
|---|
| 2531 | + const struct pnfs_layout_range *range) |
|---|
| 2394 | 2532 | { |
|---|
| 2395 | | - struct pnfs_layout_hdr *lo = NFS_I(inode)->layout; |
|---|
| 2396 | | - struct pnfs_layout_range range = { |
|---|
| 2397 | | - .iomode = lseg->pls_range.iomode, |
|---|
| 2398 | | - .offset = 0, |
|---|
| 2399 | | - .length = NFS4_MAX_UINT64, |
|---|
| 2400 | | - }; |
|---|
| 2533 | + struct pnfs_layout_hdr *lo; |
|---|
| 2401 | 2534 | bool return_now = false; |
|---|
| 2402 | 2535 | |
|---|
| 2403 | 2536 | spin_lock(&inode->i_lock); |
|---|
| 2537 | + lo = NFS_I(inode)->layout; |
|---|
| 2404 | 2538 | if (!pnfs_layout_is_valid(lo)) { |
|---|
| 2405 | 2539 | spin_unlock(&inode->i_lock); |
|---|
| 2406 | 2540 | return; |
|---|
| 2407 | 2541 | } |
|---|
| 2408 | | - pnfs_set_plh_return_info(lo, range.iomode, 0); |
|---|
| 2542 | + pnfs_set_plh_return_info(lo, range->iomode, 0); |
|---|
| 2409 | 2543 | /* |
|---|
| 2410 | 2544 | * mark all matching lsegs so that we are sure to have no live |
|---|
| 2411 | 2545 | * segments at hand when sending layoutreturn. See pnfs_put_lseg() |
|---|
| 2412 | 2546 | * for how it works. |
|---|
| 2413 | 2547 | */ |
|---|
| 2414 | | - if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, &range, 0) != -EBUSY) { |
|---|
| 2548 | + if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, range, 0) != -EBUSY) { |
|---|
| 2549 | + const struct cred *cred; |
|---|
| 2415 | 2550 | nfs4_stateid stateid; |
|---|
| 2416 | 2551 | enum pnfs_iomode iomode; |
|---|
| 2417 | 2552 | |
|---|
| 2418 | | - return_now = pnfs_prepare_layoutreturn(lo, &stateid, &iomode); |
|---|
| 2553 | + return_now = pnfs_prepare_layoutreturn(lo, &stateid, &cred, &iomode); |
|---|
| 2419 | 2554 | spin_unlock(&inode->i_lock); |
|---|
| 2420 | 2555 | if (return_now) |
|---|
| 2421 | | - pnfs_send_layoutreturn(lo, &stateid, iomode, false); |
|---|
| 2556 | + pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false); |
|---|
| 2422 | 2557 | } else { |
|---|
| 2423 | 2558 | spin_unlock(&inode->i_lock); |
|---|
| 2424 | 2559 | nfs_commit_inode(inode, 0); |
|---|
| 2425 | 2560 | } |
|---|
| 2426 | 2561 | } |
|---|
| 2562 | + |
|---|
| 2563 | +void pnfs_error_mark_layout_for_return(struct inode *inode, |
|---|
| 2564 | + struct pnfs_layout_segment *lseg) |
|---|
| 2565 | +{ |
|---|
| 2566 | + struct pnfs_layout_range range = { |
|---|
| 2567 | + .iomode = lseg->pls_range.iomode, |
|---|
| 2568 | + .offset = 0, |
|---|
| 2569 | + .length = NFS4_MAX_UINT64, |
|---|
| 2570 | + }; |
|---|
| 2571 | + |
|---|
| 2572 | + pnfs_mark_layout_for_return(inode, &range); |
|---|
| 2573 | +} |
|---|
| 2427 | 2574 | EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return); |
|---|
| 2575 | + |
|---|
| 2576 | +static bool |
|---|
| 2577 | +pnfs_layout_can_be_returned(struct pnfs_layout_hdr *lo) |
|---|
| 2578 | +{ |
|---|
| 2579 | + return pnfs_layout_is_valid(lo) && |
|---|
| 2580 | + !test_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags) && |
|---|
| 2581 | + !test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); |
|---|
| 2582 | +} |
|---|
| 2583 | + |
|---|
| 2584 | +static struct pnfs_layout_segment * |
|---|
| 2585 | +pnfs_find_first_lseg(struct pnfs_layout_hdr *lo, |
|---|
| 2586 | + const struct pnfs_layout_range *range, |
|---|
| 2587 | + enum pnfs_iomode iomode) |
|---|
| 2588 | +{ |
|---|
| 2589 | + struct pnfs_layout_segment *lseg; |
|---|
| 2590 | + |
|---|
| 2591 | + list_for_each_entry(lseg, &lo->plh_segs, pls_list) { |
|---|
| 2592 | + if (!test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) |
|---|
| 2593 | + continue; |
|---|
| 2594 | + if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) |
|---|
| 2595 | + continue; |
|---|
| 2596 | + if (lseg->pls_range.iomode != iomode && iomode != IOMODE_ANY) |
|---|
| 2597 | + continue; |
|---|
| 2598 | + if (pnfs_lseg_range_intersecting(&lseg->pls_range, range)) |
|---|
| 2599 | + return lseg; |
|---|
| 2600 | + } |
|---|
| 2601 | + return NULL; |
|---|
| 2602 | +} |
|---|
| 2603 | + |
|---|
| 2604 | +/* Find open file states whose mode matches that of the range */ |
|---|
| 2605 | +static bool |
|---|
| 2606 | +pnfs_should_return_unused_layout(struct pnfs_layout_hdr *lo, |
|---|
| 2607 | + const struct pnfs_layout_range *range) |
|---|
| 2608 | +{ |
|---|
| 2609 | + struct list_head *head; |
|---|
| 2610 | + struct nfs_open_context *ctx; |
|---|
| 2611 | + fmode_t mode = 0; |
|---|
| 2612 | + |
|---|
| 2613 | + if (!pnfs_layout_can_be_returned(lo) || |
|---|
| 2614 | + !pnfs_find_first_lseg(lo, range, range->iomode)) |
|---|
| 2615 | + return false; |
|---|
| 2616 | + |
|---|
| 2617 | + head = &NFS_I(lo->plh_inode)->open_files; |
|---|
| 2618 | + list_for_each_entry_rcu(ctx, head, list) { |
|---|
| 2619 | + if (ctx->state) |
|---|
| 2620 | + mode |= ctx->state->state & (FMODE_READ|FMODE_WRITE); |
|---|
| 2621 | + } |
|---|
| 2622 | + |
|---|
| 2623 | + switch (range->iomode) { |
|---|
| 2624 | + default: |
|---|
| 2625 | + break; |
|---|
| 2626 | + case IOMODE_READ: |
|---|
| 2627 | + mode &= ~FMODE_WRITE; |
|---|
| 2628 | + break; |
|---|
| 2629 | + case IOMODE_RW: |
|---|
| 2630 | + if (pnfs_find_first_lseg(lo, range, IOMODE_READ)) |
|---|
| 2631 | + mode &= ~FMODE_READ; |
|---|
| 2632 | + } |
|---|
| 2633 | + return mode == 0; |
|---|
| 2634 | +} |
|---|
| 2635 | + |
|---|
| 2636 | +static int |
|---|
| 2637 | +pnfs_layout_return_unused_byserver(struct nfs_server *server, void *data) |
|---|
| 2638 | +{ |
|---|
| 2639 | + const struct pnfs_layout_range *range = data; |
|---|
| 2640 | + struct pnfs_layout_hdr *lo; |
|---|
| 2641 | + struct inode *inode; |
|---|
| 2642 | +restart: |
|---|
| 2643 | + rcu_read_lock(); |
|---|
| 2644 | + list_for_each_entry_rcu(lo, &server->layouts, plh_layouts) { |
|---|
| 2645 | + if (!pnfs_layout_can_be_returned(lo) || |
|---|
| 2646 | + test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) |
|---|
| 2647 | + continue; |
|---|
| 2648 | + inode = lo->plh_inode; |
|---|
| 2649 | + spin_lock(&inode->i_lock); |
|---|
| 2650 | + if (!pnfs_should_return_unused_layout(lo, range)) { |
|---|
| 2651 | + spin_unlock(&inode->i_lock); |
|---|
| 2652 | + continue; |
|---|
| 2653 | + } |
|---|
| 2654 | + spin_unlock(&inode->i_lock); |
|---|
| 2655 | + inode = pnfs_grab_inode_layout_hdr(lo); |
|---|
| 2656 | + if (!inode) |
|---|
| 2657 | + continue; |
|---|
| 2658 | + rcu_read_unlock(); |
|---|
| 2659 | + pnfs_mark_layout_for_return(inode, range); |
|---|
| 2660 | + iput(inode); |
|---|
| 2661 | + cond_resched(); |
|---|
| 2662 | + goto restart; |
|---|
| 2663 | + } |
|---|
| 2664 | + rcu_read_unlock(); |
|---|
| 2665 | + return 0; |
|---|
| 2666 | +} |
|---|
| 2667 | + |
|---|
| 2668 | +void |
|---|
| 2669 | +pnfs_layout_return_unused_byclid(struct nfs_client *clp, |
|---|
| 2670 | + enum pnfs_iomode iomode) |
|---|
| 2671 | +{ |
|---|
| 2672 | + struct pnfs_layout_range range = { |
|---|
| 2673 | + .iomode = iomode, |
|---|
| 2674 | + .offset = 0, |
|---|
| 2675 | + .length = NFS4_MAX_UINT64, |
|---|
| 2676 | + }; |
|---|
| 2677 | + |
|---|
| 2678 | + nfs_client_for_each_server(clp, pnfs_layout_return_unused_byserver, |
|---|
| 2679 | + &range); |
|---|
| 2680 | +} |
|---|
| 2428 | 2681 | |
|---|
| 2429 | 2682 | void |
|---|
| 2430 | 2683 | pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio) |
|---|
| .. | .. |
|---|
| 2441 | 2694 | * Check for any intersection between the request and the pgio->pg_lseg, |
|---|
| 2442 | 2695 | * and if none, put this pgio->pg_lseg away. |
|---|
| 2443 | 2696 | */ |
|---|
| 2444 | | -static void |
|---|
| 2697 | +void |
|---|
| 2445 | 2698 | pnfs_generic_pg_check_range(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) |
|---|
| 2446 | 2699 | { |
|---|
| 2447 | 2700 | if (pgio->pg_lseg && !pnfs_lseg_request_intersecting(pgio->pg_lseg, req)) { |
|---|
| .. | .. |
|---|
| 2449 | 2702 | pgio->pg_lseg = NULL; |
|---|
| 2450 | 2703 | } |
|---|
| 2451 | 2704 | } |
|---|
| 2705 | +EXPORT_SYMBOL_GPL(pnfs_generic_pg_check_range); |
|---|
| 2452 | 2706 | |
|---|
| 2453 | 2707 | void |
|---|
| 2454 | 2708 | pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) |
|---|
| .. | .. |
|---|
| 2464 | 2718 | rd_size = nfs_dreq_bytes_left(pgio->pg_dreq); |
|---|
| 2465 | 2719 | |
|---|
| 2466 | 2720 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, |
|---|
| 2467 | | - req->wb_context, |
|---|
| 2721 | + nfs_req_openctx(req), |
|---|
| 2468 | 2722 | req_offset(req), |
|---|
| 2469 | 2723 | rd_size, |
|---|
| 2470 | 2724 | IOMODE_READ, |
|---|
| .. | .. |
|---|
| 2491 | 2745 | pnfs_generic_pg_check_range(pgio, req); |
|---|
| 2492 | 2746 | if (pgio->pg_lseg == NULL) { |
|---|
| 2493 | 2747 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, |
|---|
| 2494 | | - req->wb_context, |
|---|
| 2748 | + nfs_req_openctx(req), |
|---|
| 2495 | 2749 | req_offset(req), |
|---|
| 2496 | 2750 | wb_size, |
|---|
| 2497 | 2751 | IOMODE_RW, |
|---|
| 2498 | 2752 | false, |
|---|
| 2499 | | - GFP_NOFS); |
|---|
| 2753 | + GFP_KERNEL); |
|---|
| 2500 | 2754 | if (IS_ERR(pgio->pg_lseg)) { |
|---|
| 2501 | 2755 | pgio->pg_error = PTR_ERR(pgio->pg_lseg); |
|---|
| 2502 | 2756 | pgio->pg_lseg = NULL; |
|---|
| .. | .. |
|---|
| 2768 | 3022 | } |
|---|
| 2769 | 3023 | |
|---|
| 2770 | 3024 | /* Resend all requests through pnfs. */ |
|---|
| 2771 | | -void pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr) |
|---|
| 3025 | +void pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr, |
|---|
| 3026 | + unsigned int mirror_idx) |
|---|
| 2772 | 3027 | { |
|---|
| 2773 | 3028 | struct nfs_pageio_descriptor pgio; |
|---|
| 2774 | 3029 | |
|---|
| .. | .. |
|---|
| 2779 | 3034 | |
|---|
| 2780 | 3035 | nfs_pageio_init_read(&pgio, hdr->inode, false, |
|---|
| 2781 | 3036 | hdr->completion_ops); |
|---|
| 3037 | + pgio.pg_mirror_idx = mirror_idx; |
|---|
| 2782 | 3038 | hdr->task.tk_status = nfs_pageio_resend(&pgio, hdr); |
|---|
| 2783 | 3039 | } |
|---|
| 2784 | 3040 | } |
|---|
| .. | .. |
|---|
| 2966 | 3222 | end_pos = nfsi->layout->plh_lwb; |
|---|
| 2967 | 3223 | |
|---|
| 2968 | 3224 | nfs4_stateid_copy(&data->args.stateid, &nfsi->layout->plh_stateid); |
|---|
| 3225 | + data->cred = get_cred(nfsi->layout->plh_lc_cred); |
|---|
| 2969 | 3226 | spin_unlock(&inode->i_lock); |
|---|
| 2970 | 3227 | |
|---|
| 2971 | 3228 | data->args.inode = inode; |
|---|
| 2972 | | - data->cred = get_rpccred(nfsi->layout->plh_lc_cred); |
|---|
| 2973 | 3229 | nfs_fattr_init(&data->fattr); |
|---|
| 2974 | 3230 | data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask; |
|---|
| 2975 | 3231 | data->res.fattr = &data->fattr; |
|---|
| .. | .. |
|---|
| 2982 | 3238 | if (ld->prepare_layoutcommit) { |
|---|
| 2983 | 3239 | status = ld->prepare_layoutcommit(&data->args); |
|---|
| 2984 | 3240 | if (status) { |
|---|
| 2985 | | - put_rpccred(data->cred); |
|---|
| 3241 | + put_cred(data->cred); |
|---|
| 2986 | 3242 | spin_lock(&inode->i_lock); |
|---|
| 2987 | 3243 | set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags); |
|---|
| 2988 | 3244 | if (end_pos > nfsi->layout->plh_lwb) |
|---|