| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * linux/fs/nfs/file.c |
|---|
| 3 | 4 | * |
|---|
| .. | .. |
|---|
| 89 | 90 | |
|---|
| 90 | 91 | /** |
|---|
| 91 | 92 | * nfs_revalidate_size - Revalidate the file size |
|---|
| 92 | | - * @inode - pointer to inode struct |
|---|
| 93 | | - * @file - pointer to struct file |
|---|
| 93 | + * @inode: pointer to inode struct |
|---|
| 94 | + * @filp: pointer to struct file |
|---|
| 94 | 95 | * |
|---|
| 95 | 96 | * Revalidates the file length. This is basically a wrapper around |
|---|
| 96 | 97 | * nfs_revalidate_inode() that takes into account the fact that we may |
|---|
| .. | .. |
|---|
| 139 | 140 | nfs_file_flush(struct file *file, fl_owner_t id) |
|---|
| 140 | 141 | { |
|---|
| 141 | 142 | struct inode *inode = file_inode(file); |
|---|
| 143 | + errseq_t since; |
|---|
| 142 | 144 | |
|---|
| 143 | 145 | dprintk("NFS: flush(%pD2)\n", file); |
|---|
| 144 | 146 | |
|---|
| .. | .. |
|---|
| 147 | 149 | return 0; |
|---|
| 148 | 150 | |
|---|
| 149 | 151 | /* Flush writes to the server and return any errors */ |
|---|
| 150 | | - return vfs_fsync(file, 0); |
|---|
| 152 | + since = filemap_sample_wb_err(file->f_mapping); |
|---|
| 153 | + nfs_wb_all(inode); |
|---|
| 154 | + return filemap_check_wb_err(file->f_mapping, since); |
|---|
| 151 | 155 | } |
|---|
| 152 | 156 | |
|---|
| 153 | 157 | ssize_t |
|---|
| .. | .. |
|---|
| 157 | 161 | ssize_t result; |
|---|
| 158 | 162 | |
|---|
| 159 | 163 | if (iocb->ki_flags & IOCB_DIRECT) |
|---|
| 160 | | - return nfs_file_direct_read(iocb, to); |
|---|
| 164 | + return nfs_file_direct_read(iocb, to, false); |
|---|
| 161 | 165 | |
|---|
| 162 | 166 | dprintk("NFS: read(%pD2, %zu@%lu)\n", |
|---|
| 163 | 167 | iocb->ki_filp, |
|---|
| .. | .. |
|---|
| 199 | 203 | * Flush any dirty pages for this process, and check for write errors. |
|---|
| 200 | 204 | * The return status from this call provides a reliable indication of |
|---|
| 201 | 205 | * whether any write errors occurred for this process. |
|---|
| 202 | | - * |
|---|
| 203 | | - * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to |
|---|
| 204 | | - * disk, but it retrieves and clears ctx->error after synching, despite |
|---|
| 205 | | - * the two being set at the same time in nfs_context_set_write_error(). |
|---|
| 206 | | - * This is because the former is used to notify the _next_ call to |
|---|
| 207 | | - * nfs_file_write() that a write error occurred, and hence cause it to |
|---|
| 208 | | - * fall back to doing a synchronous write. |
|---|
| 209 | 206 | */ |
|---|
| 210 | 207 | static int |
|---|
| 211 | 208 | nfs_file_fsync_commit(struct file *file, int datasync) |
|---|
| 212 | 209 | { |
|---|
| 213 | | - struct nfs_open_context *ctx = nfs_file_open_context(file); |
|---|
| 214 | 210 | struct inode *inode = file_inode(file); |
|---|
| 215 | | - int do_resend, status; |
|---|
| 216 | | - int ret = 0; |
|---|
| 211 | + int ret, ret2; |
|---|
| 217 | 212 | |
|---|
| 218 | 213 | dprintk("NFS: fsync file(%pD2) datasync %d\n", file, datasync); |
|---|
| 219 | 214 | |
|---|
| 220 | 215 | nfs_inc_stats(inode, NFSIOS_VFSFSYNC); |
|---|
| 221 | | - do_resend = test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); |
|---|
| 222 | | - status = nfs_commit_inode(inode, FLUSH_SYNC); |
|---|
| 223 | | - if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) { |
|---|
| 224 | | - ret = xchg(&ctx->error, 0); |
|---|
| 225 | | - if (ret) |
|---|
| 226 | | - goto out; |
|---|
| 227 | | - } |
|---|
| 228 | | - if (status < 0) { |
|---|
| 229 | | - ret = status; |
|---|
| 230 | | - goto out; |
|---|
| 231 | | - } |
|---|
| 232 | | - do_resend |= test_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); |
|---|
| 233 | | - if (do_resend) |
|---|
| 234 | | - ret = -EAGAIN; |
|---|
| 235 | | -out: |
|---|
| 216 | + ret = nfs_commit_inode(inode, FLUSH_SYNC); |
|---|
| 217 | + ret2 = file_check_and_advance_wb_err(file); |
|---|
| 218 | + if (ret2 < 0) |
|---|
| 219 | + return ret2; |
|---|
| 236 | 220 | return ret; |
|---|
| 237 | 221 | } |
|---|
| 238 | 222 | |
|---|
| 239 | 223 | int |
|---|
| 240 | 224 | nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) |
|---|
| 241 | 225 | { |
|---|
| 242 | | - int ret; |
|---|
| 226 | + struct nfs_open_context *ctx = nfs_file_open_context(file); |
|---|
| 243 | 227 | struct inode *inode = file_inode(file); |
|---|
| 228 | + int ret; |
|---|
| 244 | 229 | |
|---|
| 245 | 230 | trace_nfs_fsync_enter(inode); |
|---|
| 246 | 231 | |
|---|
| 247 | | - do { |
|---|
| 248 | | - struct nfs_open_context *ctx = nfs_file_open_context(file); |
|---|
| 249 | | - ret = filemap_write_and_wait_range(inode->i_mapping, start, end); |
|---|
| 250 | | - if (test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) { |
|---|
| 251 | | - int ret2 = xchg(&ctx->error, 0); |
|---|
| 252 | | - if (ret2) |
|---|
| 253 | | - ret = ret2; |
|---|
| 254 | | - } |
|---|
| 232 | + for (;;) { |
|---|
| 233 | + ret = file_write_and_wait_range(file, start, end); |
|---|
| 255 | 234 | if (ret != 0) |
|---|
| 256 | 235 | break; |
|---|
| 257 | 236 | ret = nfs_file_fsync_commit(file, datasync); |
|---|
| 258 | | - if (!ret) |
|---|
| 259 | | - ret = pnfs_sync_inode(inode, !!datasync); |
|---|
| 237 | + if (ret != 0) |
|---|
| 238 | + break; |
|---|
| 239 | + ret = pnfs_sync_inode(inode, !!datasync); |
|---|
| 240 | + if (ret != 0) |
|---|
| 241 | + break; |
|---|
| 242 | + if (!test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags)) |
|---|
| 243 | + break; |
|---|
| 260 | 244 | /* |
|---|
| 261 | 245 | * If nfs_file_fsync_commit detected a server reboot, then |
|---|
| 262 | 246 | * resend all dirty pages that might have been covered by |
|---|
| .. | .. |
|---|
| 264 | 248 | */ |
|---|
| 265 | 249 | start = 0; |
|---|
| 266 | 250 | end = LLONG_MAX; |
|---|
| 267 | | - } while (ret == -EAGAIN); |
|---|
| 251 | + } |
|---|
| 268 | 252 | |
|---|
| 269 | 253 | trace_nfs_fsync_exit(inode, ret); |
|---|
| 270 | 254 | return ret; |
|---|
| .. | .. |
|---|
| 275 | 259 | * Decide whether a read/modify/write cycle may be more efficient |
|---|
| 276 | 260 | * then a modify/write/read cycle when writing to a page in the |
|---|
| 277 | 261 | * page cache. |
|---|
| 262 | + * |
|---|
| 263 | + * Some pNFS layout drivers can only read/write at a certain block |
|---|
| 264 | + * granularity like all block devices and therefore we must perform |
|---|
| 265 | + * read/modify/write whenever a page hasn't read yet and the data |
|---|
| 266 | + * to be written there is not aligned to a block boundary and/or |
|---|
| 267 | + * smaller than the block size. |
|---|
| 278 | 268 | * |
|---|
| 279 | 269 | * The modify/write/read cycle may occur if a page is read before |
|---|
| 280 | 270 | * being completely filled by the writer. In this situation, the |
|---|
| .. | .. |
|---|
| 291 | 281 | * and that the new data won't completely replace the old data in |
|---|
| 292 | 282 | * that range of the file. |
|---|
| 293 | 283 | */ |
|---|
| 294 | | -static int nfs_want_read_modify_write(struct file *file, struct page *page, |
|---|
| 295 | | - loff_t pos, unsigned len) |
|---|
| 284 | +static bool nfs_full_page_write(struct page *page, loff_t pos, unsigned int len) |
|---|
| 296 | 285 | { |
|---|
| 297 | 286 | unsigned int pglen = nfs_page_length(page); |
|---|
| 298 | 287 | unsigned int offset = pos & (PAGE_SIZE - 1); |
|---|
| 299 | 288 | unsigned int end = offset + len; |
|---|
| 300 | 289 | |
|---|
| 301 | | - if (pnfs_ld_read_whole_page(file->f_mapping->host)) { |
|---|
| 302 | | - if (!PageUptodate(page)) |
|---|
| 303 | | - return 1; |
|---|
| 304 | | - return 0; |
|---|
| 305 | | - } |
|---|
| 290 | + return !pglen || (end >= pglen && !offset); |
|---|
| 291 | +} |
|---|
| 306 | 292 | |
|---|
| 307 | | - if ((file->f_mode & FMODE_READ) && /* open for read? */ |
|---|
| 308 | | - !PageUptodate(page) && /* Uptodate? */ |
|---|
| 309 | | - !PagePrivate(page) && /* i/o request already? */ |
|---|
| 310 | | - pglen && /* valid bytes of file? */ |
|---|
| 311 | | - (end < pglen || offset)) /* replace all valid bytes? */ |
|---|
| 312 | | - return 1; |
|---|
| 313 | | - return 0; |
|---|
| 293 | +static bool nfs_want_read_modify_write(struct file *file, struct page *page, |
|---|
| 294 | + loff_t pos, unsigned int len) |
|---|
| 295 | +{ |
|---|
| 296 | + /* |
|---|
| 297 | + * Up-to-date pages, those with ongoing or full-page write |
|---|
| 298 | + * don't need read/modify/write |
|---|
| 299 | + */ |
|---|
| 300 | + if (PageUptodate(page) || PagePrivate(page) || |
|---|
| 301 | + nfs_full_page_write(page, pos, len)) |
|---|
| 302 | + return false; |
|---|
| 303 | + |
|---|
| 304 | + if (pnfs_ld_read_whole_page(file->f_mapping->host)) |
|---|
| 305 | + return true; |
|---|
| 306 | + /* Open for reading too? */ |
|---|
| 307 | + if (file->f_mode & FMODE_READ) |
|---|
| 308 | + return true; |
|---|
| 309 | + return false; |
|---|
| 314 | 310 | } |
|---|
| 315 | 311 | |
|---|
| 316 | 312 | /* |
|---|
| .. | .. |
|---|
| 394 | 390 | return status; |
|---|
| 395 | 391 | NFS_I(mapping->host)->write_io += copied; |
|---|
| 396 | 392 | |
|---|
| 397 | | - if (nfs_ctx_key_to_expire(ctx, mapping->host)) { |
|---|
| 398 | | - status = nfs_wb_all(mapping->host); |
|---|
| 399 | | - if (status < 0) |
|---|
| 400 | | - return status; |
|---|
| 401 | | - } |
|---|
| 393 | + if (nfs_ctx_key_to_expire(ctx, mapping->host)) |
|---|
| 394 | + nfs_wb_all(mapping->host); |
|---|
| 402 | 395 | |
|---|
| 403 | 396 | return copied; |
|---|
| 404 | 397 | } |
|---|
| .. | .. |
|---|
| 492 | 485 | static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, |
|---|
| 493 | 486 | sector_t *span) |
|---|
| 494 | 487 | { |
|---|
| 495 | | - struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host); |
|---|
| 488 | + unsigned long blocks; |
|---|
| 489 | + long long isize; |
|---|
| 490 | + struct inode *inode = file_inode(file); |
|---|
| 491 | + struct rpc_clnt *clnt = NFS_CLIENT(inode); |
|---|
| 492 | + struct nfs_client *cl = NFS_SERVER(inode)->nfs_client; |
|---|
| 493 | + |
|---|
| 494 | + spin_lock(&inode->i_lock); |
|---|
| 495 | + blocks = inode->i_blocks; |
|---|
| 496 | + isize = inode->i_size; |
|---|
| 497 | + spin_unlock(&inode->i_lock); |
|---|
| 498 | + if (blocks*512 < isize) { |
|---|
| 499 | + pr_warn("swap activate: swapfile has holes\n"); |
|---|
| 500 | + return -EINVAL; |
|---|
| 501 | + } |
|---|
| 496 | 502 | |
|---|
| 497 | 503 | *span = sis->pages; |
|---|
| 504 | + |
|---|
| 505 | + |
|---|
| 506 | + if (cl->rpc_ops->enable_swap) |
|---|
| 507 | + cl->rpc_ops->enable_swap(inode); |
|---|
| 498 | 508 | |
|---|
| 499 | 509 | return rpc_clnt_swap_activate(clnt); |
|---|
| 500 | 510 | } |
|---|
| 501 | 511 | |
|---|
| 502 | 512 | static void nfs_swap_deactivate(struct file *file) |
|---|
| 503 | 513 | { |
|---|
| 504 | | - struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host); |
|---|
| 514 | + struct inode *inode = file_inode(file); |
|---|
| 515 | + struct rpc_clnt *clnt = NFS_CLIENT(inode); |
|---|
| 516 | + struct nfs_client *cl = NFS_SERVER(inode)->nfs_client; |
|---|
| 505 | 517 | |
|---|
| 506 | 518 | rpc_clnt_swap_deactivate(clnt); |
|---|
| 519 | + if (cl->rpc_ops->disable_swap) |
|---|
| 520 | + cl->rpc_ops->disable_swap(file_inode(file)); |
|---|
| 507 | 521 | } |
|---|
| 508 | 522 | |
|---|
| 509 | 523 | const struct address_space_operations nfs_file_aops = { |
|---|
| .. | .. |
|---|
| 583 | 597 | .page_mkwrite = nfs_vm_page_mkwrite, |
|---|
| 584 | 598 | }; |
|---|
| 585 | 599 | |
|---|
| 586 | | -static int nfs_need_check_write(struct file *filp, struct inode *inode) |
|---|
| 600 | +static int nfs_need_check_write(struct file *filp, struct inode *inode, |
|---|
| 601 | + int error) |
|---|
| 587 | 602 | { |
|---|
| 588 | 603 | struct nfs_open_context *ctx; |
|---|
| 589 | 604 | |
|---|
| 590 | 605 | ctx = nfs_file_open_context(filp); |
|---|
| 591 | | - if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags) || |
|---|
| 606 | + if (nfs_error_is_fatal_on_server(error) || |
|---|
| 592 | 607 | nfs_ctx_key_to_expire(ctx, inode)) |
|---|
| 593 | 608 | return 1; |
|---|
| 594 | 609 | return 0; |
|---|
| .. | .. |
|---|
| 600 | 615 | struct inode *inode = file_inode(file); |
|---|
| 601 | 616 | unsigned long written = 0; |
|---|
| 602 | 617 | ssize_t result; |
|---|
| 618 | + errseq_t since; |
|---|
| 619 | + int error; |
|---|
| 603 | 620 | |
|---|
| 604 | 621 | result = nfs_key_timeout_notify(file, inode); |
|---|
| 605 | 622 | if (result) |
|---|
| 606 | 623 | return result; |
|---|
| 607 | 624 | |
|---|
| 608 | 625 | if (iocb->ki_flags & IOCB_DIRECT) |
|---|
| 609 | | - return nfs_file_direct_write(iocb, from); |
|---|
| 626 | + return nfs_file_direct_write(iocb, from, false); |
|---|
| 610 | 627 | |
|---|
| 611 | 628 | dprintk("NFS: write(%pD2, %zu@%Ld)\n", |
|---|
| 612 | 629 | file, iov_iter_count(from), (long long) iocb->ki_pos); |
|---|
| .. | .. |
|---|
| 624 | 641 | if (iocb->ki_pos > i_size_read(inode)) |
|---|
| 625 | 642 | nfs_revalidate_mapping(inode, file->f_mapping); |
|---|
| 626 | 643 | |
|---|
| 644 | + since = filemap_sample_wb_err(file->f_mapping); |
|---|
| 627 | 645 | nfs_start_io_write(inode); |
|---|
| 628 | 646 | result = generic_write_checks(iocb, from); |
|---|
| 629 | 647 | if (result > 0) { |
|---|
| .. | .. |
|---|
| 642 | 660 | goto out; |
|---|
| 643 | 661 | |
|---|
| 644 | 662 | /* Return error values */ |
|---|
| 645 | | - if (nfs_need_check_write(file, inode)) { |
|---|
| 646 | | - int err = vfs_fsync(file, 0); |
|---|
| 663 | + error = filemap_check_wb_err(file->f_mapping, since); |
|---|
| 664 | + if (nfs_need_check_write(file, inode, error)) { |
|---|
| 665 | + int err = nfs_wb_all(inode); |
|---|
| 647 | 666 | if (err < 0) |
|---|
| 648 | 667 | result = err; |
|---|
| 649 | 668 | } |
|---|
| .. | .. |
|---|
| 653 | 672 | |
|---|
| 654 | 673 | out_swapfile: |
|---|
| 655 | 674 | printk(KERN_INFO "NFS: attempt to write to active swap file!\n"); |
|---|
| 656 | | - return -EBUSY; |
|---|
| 675 | + return -ETXTBSY; |
|---|
| 657 | 676 | } |
|---|
| 658 | 677 | EXPORT_SYMBOL_GPL(nfs_file_write); |
|---|
| 659 | 678 | |
|---|
| .. | .. |
|---|
| 697 | 716 | * Flush all pending writes before doing anything |
|---|
| 698 | 717 | * with locks.. |
|---|
| 699 | 718 | */ |
|---|
| 700 | | - vfs_fsync(filp, 0); |
|---|
| 719 | + nfs_wb_all(inode); |
|---|
| 701 | 720 | |
|---|
| 702 | 721 | l_ctx = nfs_get_lock_context(nfs_file_open_context(filp)); |
|---|
| 703 | 722 | if (!IS_ERR(l_ctx)) { |
|---|