.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * linux/fs/nfs/file.c |
---|
3 | 4 | * |
---|
.. | .. |
---|
89 | 90 | |
---|
90 | 91 | /** |
---|
91 | 92 | * nfs_revalidate_size - Revalidate the file size |
---|
92 | | - * @inode - pointer to inode struct |
---|
93 | | - * @file - pointer to struct file |
---|
| 93 | + * @inode: pointer to inode struct |
---|
| 94 | + * @filp: pointer to struct file |
---|
94 | 95 | * |
---|
95 | 96 | * Revalidates the file length. This is basically a wrapper around |
---|
96 | 97 | * nfs_revalidate_inode() that takes into account the fact that we may |
---|
.. | .. |
---|
139 | 140 | nfs_file_flush(struct file *file, fl_owner_t id) |
---|
140 | 141 | { |
---|
141 | 142 | struct inode *inode = file_inode(file); |
---|
| 143 | + errseq_t since; |
---|
142 | 144 | |
---|
143 | 145 | dprintk("NFS: flush(%pD2)\n", file); |
---|
144 | 146 | |
---|
.. | .. |
---|
147 | 149 | return 0; |
---|
148 | 150 | |
---|
149 | 151 | /* Flush writes to the server and return any errors */ |
---|
150 | | - return vfs_fsync(file, 0); |
---|
| 152 | + since = filemap_sample_wb_err(file->f_mapping); |
---|
| 153 | + nfs_wb_all(inode); |
---|
| 154 | + return filemap_check_wb_err(file->f_mapping, since); |
---|
151 | 155 | } |
---|
152 | 156 | |
---|
153 | 157 | ssize_t |
---|
.. | .. |
---|
157 | 161 | ssize_t result; |
---|
158 | 162 | |
---|
159 | 163 | if (iocb->ki_flags & IOCB_DIRECT) |
---|
160 | | - return nfs_file_direct_read(iocb, to); |
---|
| 164 | + return nfs_file_direct_read(iocb, to, false); |
---|
161 | 165 | |
---|
162 | 166 | dprintk("NFS: read(%pD2, %zu@%lu)\n", |
---|
163 | 167 | iocb->ki_filp, |
---|
.. | .. |
---|
199 | 203 | * Flush any dirty pages for this process, and check for write errors. |
---|
200 | 204 | * The return status from this call provides a reliable indication of |
---|
201 | 205 | * whether any write errors occurred for this process. |
---|
202 | | - * |
---|
203 | | - * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to |
---|
204 | | - * disk, but it retrieves and clears ctx->error after synching, despite |
---|
205 | | - * the two being set at the same time in nfs_context_set_write_error(). |
---|
206 | | - * This is because the former is used to notify the _next_ call to |
---|
207 | | - * nfs_file_write() that a write error occurred, and hence cause it to |
---|
208 | | - * fall back to doing a synchronous write. |
---|
209 | 206 | */ |
---|
210 | 207 | static int |
---|
211 | 208 | nfs_file_fsync_commit(struct file *file, int datasync) |
---|
212 | 209 | { |
---|
213 | | - struct nfs_open_context *ctx = nfs_file_open_context(file); |
---|
214 | 210 | struct inode *inode = file_inode(file); |
---|
215 | | - int do_resend, status; |
---|
216 | | - int ret = 0; |
---|
| 211 | + int ret, ret2; |
---|
217 | 212 | |
---|
218 | 213 | dprintk("NFS: fsync file(%pD2) datasync %d\n", file, datasync); |
---|
219 | 214 | |
---|
220 | 215 | nfs_inc_stats(inode, NFSIOS_VFSFSYNC); |
---|
221 | | - do_resend = test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); |
---|
222 | | - status = nfs_commit_inode(inode, FLUSH_SYNC); |
---|
223 | | - if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) { |
---|
224 | | - ret = xchg(&ctx->error, 0); |
---|
225 | | - if (ret) |
---|
226 | | - goto out; |
---|
227 | | - } |
---|
228 | | - if (status < 0) { |
---|
229 | | - ret = status; |
---|
230 | | - goto out; |
---|
231 | | - } |
---|
232 | | - do_resend |= test_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); |
---|
233 | | - if (do_resend) |
---|
234 | | - ret = -EAGAIN; |
---|
235 | | -out: |
---|
| 216 | + ret = nfs_commit_inode(inode, FLUSH_SYNC); |
---|
| 217 | + ret2 = file_check_and_advance_wb_err(file); |
---|
| 218 | + if (ret2 < 0) |
---|
| 219 | + return ret2; |
---|
236 | 220 | return ret; |
---|
237 | 221 | } |
---|
238 | 222 | |
---|
239 | 223 | int |
---|
240 | 224 | nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) |
---|
241 | 225 | { |
---|
242 | | - int ret; |
---|
| 226 | + struct nfs_open_context *ctx = nfs_file_open_context(file); |
---|
243 | 227 | struct inode *inode = file_inode(file); |
---|
| 228 | + int ret; |
---|
244 | 229 | |
---|
245 | 230 | trace_nfs_fsync_enter(inode); |
---|
246 | 231 | |
---|
247 | | - do { |
---|
248 | | - struct nfs_open_context *ctx = nfs_file_open_context(file); |
---|
249 | | - ret = filemap_write_and_wait_range(inode->i_mapping, start, end); |
---|
250 | | - if (test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) { |
---|
251 | | - int ret2 = xchg(&ctx->error, 0); |
---|
252 | | - if (ret2) |
---|
253 | | - ret = ret2; |
---|
254 | | - } |
---|
| 232 | + for (;;) { |
---|
| 233 | + ret = file_write_and_wait_range(file, start, end); |
---|
255 | 234 | if (ret != 0) |
---|
256 | 235 | break; |
---|
257 | 236 | ret = nfs_file_fsync_commit(file, datasync); |
---|
258 | | - if (!ret) |
---|
259 | | - ret = pnfs_sync_inode(inode, !!datasync); |
---|
| 237 | + if (ret != 0) |
---|
| 238 | + break; |
---|
| 239 | + ret = pnfs_sync_inode(inode, !!datasync); |
---|
| 240 | + if (ret != 0) |
---|
| 241 | + break; |
---|
| 242 | + if (!test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags)) |
---|
| 243 | + break; |
---|
260 | 244 | /* |
---|
261 | 245 | * If nfs_file_fsync_commit detected a server reboot, then |
---|
262 | 246 | * resend all dirty pages that might have been covered by |
---|
.. | .. |
---|
264 | 248 | */ |
---|
265 | 249 | start = 0; |
---|
266 | 250 | end = LLONG_MAX; |
---|
267 | | - } while (ret == -EAGAIN); |
---|
| 251 | + } |
---|
268 | 252 | |
---|
269 | 253 | trace_nfs_fsync_exit(inode, ret); |
---|
270 | 254 | return ret; |
---|
.. | .. |
---|
275 | 259 | * Decide whether a read/modify/write cycle may be more efficient |
---|
276 | 260 | * then a modify/write/read cycle when writing to a page in the |
---|
277 | 261 | * page cache. |
---|
| 262 | + * |
---|
| 263 | + * Some pNFS layout drivers can only read/write at a certain block |
---|
| 264 | + * granularity like all block devices and therefore we must perform |
---|
| 265 | + * read/modify/write whenever a page hasn't read yet and the data |
---|
| 266 | + * to be written there is not aligned to a block boundary and/or |
---|
| 267 | + * smaller than the block size. |
---|
278 | 268 | * |
---|
279 | 269 | * The modify/write/read cycle may occur if a page is read before |
---|
280 | 270 | * being completely filled by the writer. In this situation, the |
---|
.. | .. |
---|
291 | 281 | * and that the new data won't completely replace the old data in |
---|
292 | 282 | * that range of the file. |
---|
293 | 283 | */ |
---|
294 | | -static int nfs_want_read_modify_write(struct file *file, struct page *page, |
---|
295 | | - loff_t pos, unsigned len) |
---|
| 284 | +static bool nfs_full_page_write(struct page *page, loff_t pos, unsigned int len) |
---|
296 | 285 | { |
---|
297 | 286 | unsigned int pglen = nfs_page_length(page); |
---|
298 | 287 | unsigned int offset = pos & (PAGE_SIZE - 1); |
---|
299 | 288 | unsigned int end = offset + len; |
---|
300 | 289 | |
---|
301 | | - if (pnfs_ld_read_whole_page(file->f_mapping->host)) { |
---|
302 | | - if (!PageUptodate(page)) |
---|
303 | | - return 1; |
---|
304 | | - return 0; |
---|
305 | | - } |
---|
| 290 | + return !pglen || (end >= pglen && !offset); |
---|
| 291 | +} |
---|
306 | 292 | |
---|
307 | | - if ((file->f_mode & FMODE_READ) && /* open for read? */ |
---|
308 | | - !PageUptodate(page) && /* Uptodate? */ |
---|
309 | | - !PagePrivate(page) && /* i/o request already? */ |
---|
310 | | - pglen && /* valid bytes of file? */ |
---|
311 | | - (end < pglen || offset)) /* replace all valid bytes? */ |
---|
312 | | - return 1; |
---|
313 | | - return 0; |
---|
| 293 | +static bool nfs_want_read_modify_write(struct file *file, struct page *page, |
---|
| 294 | + loff_t pos, unsigned int len) |
---|
| 295 | +{ |
---|
| 296 | + /* |
---|
| 297 | + * Up-to-date pages, those with ongoing or full-page write |
---|
| 298 | + * don't need read/modify/write |
---|
| 299 | + */ |
---|
| 300 | + if (PageUptodate(page) || PagePrivate(page) || |
---|
| 301 | + nfs_full_page_write(page, pos, len)) |
---|
| 302 | + return false; |
---|
| 303 | + |
---|
| 304 | + if (pnfs_ld_read_whole_page(file->f_mapping->host)) |
---|
| 305 | + return true; |
---|
| 306 | + /* Open for reading too? */ |
---|
| 307 | + if (file->f_mode & FMODE_READ) |
---|
| 308 | + return true; |
---|
| 309 | + return false; |
---|
314 | 310 | } |
---|
315 | 311 | |
---|
316 | 312 | /* |
---|
.. | .. |
---|
394 | 390 | return status; |
---|
395 | 391 | NFS_I(mapping->host)->write_io += copied; |
---|
396 | 392 | |
---|
397 | | - if (nfs_ctx_key_to_expire(ctx, mapping->host)) { |
---|
398 | | - status = nfs_wb_all(mapping->host); |
---|
399 | | - if (status < 0) |
---|
400 | | - return status; |
---|
401 | | - } |
---|
| 393 | + if (nfs_ctx_key_to_expire(ctx, mapping->host)) |
---|
| 394 | + nfs_wb_all(mapping->host); |
---|
402 | 395 | |
---|
403 | 396 | return copied; |
---|
404 | 397 | } |
---|
.. | .. |
---|
492 | 485 | static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, |
---|
493 | 486 | sector_t *span) |
---|
494 | 487 | { |
---|
495 | | - struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host); |
---|
| 488 | + unsigned long blocks; |
---|
| 489 | + long long isize; |
---|
| 490 | + struct inode *inode = file_inode(file); |
---|
| 491 | + struct rpc_clnt *clnt = NFS_CLIENT(inode); |
---|
| 492 | + struct nfs_client *cl = NFS_SERVER(inode)->nfs_client; |
---|
| 493 | + |
---|
| 494 | + spin_lock(&inode->i_lock); |
---|
| 495 | + blocks = inode->i_blocks; |
---|
| 496 | + isize = inode->i_size; |
---|
| 497 | + spin_unlock(&inode->i_lock); |
---|
| 498 | + if (blocks*512 < isize) { |
---|
| 499 | + pr_warn("swap activate: swapfile has holes\n"); |
---|
| 500 | + return -EINVAL; |
---|
| 501 | + } |
---|
496 | 502 | |
---|
497 | 503 | *span = sis->pages; |
---|
| 504 | + |
---|
| 505 | + |
---|
| 506 | + if (cl->rpc_ops->enable_swap) |
---|
| 507 | + cl->rpc_ops->enable_swap(inode); |
---|
498 | 508 | |
---|
499 | 509 | return rpc_clnt_swap_activate(clnt); |
---|
500 | 510 | } |
---|
501 | 511 | |
---|
502 | 512 | static void nfs_swap_deactivate(struct file *file) |
---|
503 | 513 | { |
---|
504 | | - struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host); |
---|
| 514 | + struct inode *inode = file_inode(file); |
---|
| 515 | + struct rpc_clnt *clnt = NFS_CLIENT(inode); |
---|
| 516 | + struct nfs_client *cl = NFS_SERVER(inode)->nfs_client; |
---|
505 | 517 | |
---|
506 | 518 | rpc_clnt_swap_deactivate(clnt); |
---|
| 519 | + if (cl->rpc_ops->disable_swap) |
---|
| 520 | + cl->rpc_ops->disable_swap(file_inode(file)); |
---|
507 | 521 | } |
---|
508 | 522 | |
---|
509 | 523 | const struct address_space_operations nfs_file_aops = { |
---|
.. | .. |
---|
583 | 597 | .page_mkwrite = nfs_vm_page_mkwrite, |
---|
584 | 598 | }; |
---|
585 | 599 | |
---|
586 | | -static int nfs_need_check_write(struct file *filp, struct inode *inode) |
---|
| 600 | +static int nfs_need_check_write(struct file *filp, struct inode *inode, |
---|
| 601 | + int error) |
---|
587 | 602 | { |
---|
588 | 603 | struct nfs_open_context *ctx; |
---|
589 | 604 | |
---|
590 | 605 | ctx = nfs_file_open_context(filp); |
---|
591 | | - if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags) || |
---|
| 606 | + if (nfs_error_is_fatal_on_server(error) || |
---|
592 | 607 | nfs_ctx_key_to_expire(ctx, inode)) |
---|
593 | 608 | return 1; |
---|
594 | 609 | return 0; |
---|
.. | .. |
---|
600 | 615 | struct inode *inode = file_inode(file); |
---|
601 | 616 | unsigned long written = 0; |
---|
602 | 617 | ssize_t result; |
---|
| 618 | + errseq_t since; |
---|
| 619 | + int error; |
---|
603 | 620 | |
---|
604 | 621 | result = nfs_key_timeout_notify(file, inode); |
---|
605 | 622 | if (result) |
---|
606 | 623 | return result; |
---|
607 | 624 | |
---|
608 | 625 | if (iocb->ki_flags & IOCB_DIRECT) |
---|
609 | | - return nfs_file_direct_write(iocb, from); |
---|
| 626 | + return nfs_file_direct_write(iocb, from, false); |
---|
610 | 627 | |
---|
611 | 628 | dprintk("NFS: write(%pD2, %zu@%Ld)\n", |
---|
612 | 629 | file, iov_iter_count(from), (long long) iocb->ki_pos); |
---|
.. | .. |
---|
624 | 641 | if (iocb->ki_pos > i_size_read(inode)) |
---|
625 | 642 | nfs_revalidate_mapping(inode, file->f_mapping); |
---|
626 | 643 | |
---|
| 644 | + since = filemap_sample_wb_err(file->f_mapping); |
---|
627 | 645 | nfs_start_io_write(inode); |
---|
628 | 646 | result = generic_write_checks(iocb, from); |
---|
629 | 647 | if (result > 0) { |
---|
.. | .. |
---|
642 | 660 | goto out; |
---|
643 | 661 | |
---|
644 | 662 | /* Return error values */ |
---|
645 | | - if (nfs_need_check_write(file, inode)) { |
---|
646 | | - int err = vfs_fsync(file, 0); |
---|
| 663 | + error = filemap_check_wb_err(file->f_mapping, since); |
---|
| 664 | + if (nfs_need_check_write(file, inode, error)) { |
---|
| 665 | + int err = nfs_wb_all(inode); |
---|
647 | 666 | if (err < 0) |
---|
648 | 667 | result = err; |
---|
649 | 668 | } |
---|
.. | .. |
---|
653 | 672 | |
---|
654 | 673 | out_swapfile: |
---|
655 | 674 | printk(KERN_INFO "NFS: attempt to write to active swap file!\n"); |
---|
656 | | - return -EBUSY; |
---|
| 675 | + return -ETXTBSY; |
---|
657 | 676 | } |
---|
658 | 677 | EXPORT_SYMBOL_GPL(nfs_file_write); |
---|
659 | 678 | |
---|
.. | .. |
---|
697 | 716 | * Flush all pending writes before doing anything |
---|
698 | 717 | * with locks.. |
---|
699 | 718 | */ |
---|
700 | | - vfs_fsync(filp, 0); |
---|
| 719 | + nfs_wb_all(inode); |
---|
701 | 720 | |
---|
702 | 721 | l_ctx = nfs_get_lock_context(nfs_file_open_context(filp)); |
---|
703 | 722 | if (!IS_ERR(l_ctx)) { |
---|