| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * linux/fs/nfs/direct.c |
|---|
| 3 | 4 | * |
|---|
| .. | .. |
|---|
| 63 | 64 | |
|---|
| 64 | 65 | static struct kmem_cache *nfs_direct_cachep; |
|---|
| 65 | 66 | |
|---|
| 66 | | -/* |
|---|
| 67 | | - * This represents a set of asynchronous requests that we're waiting on |
|---|
| 68 | | - */ |
|---|
| 69 | | -struct nfs_direct_mirror { |
|---|
| 70 | | - ssize_t count; |
|---|
| 71 | | -}; |
|---|
| 72 | | - |
|---|
| 73 | 67 | struct nfs_direct_req { |
|---|
| 74 | 68 | struct kref kref; /* release manager */ |
|---|
| 75 | 69 | |
|---|
| .. | .. |
|---|
| 82 | 76 | /* completion state */ |
|---|
| 83 | 77 | atomic_t io_count; /* i/os we're waiting for */ |
|---|
| 84 | 78 | spinlock_t lock; /* protect completion state */ |
|---|
| 85 | | - |
|---|
| 86 | | - struct nfs_direct_mirror mirrors[NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX]; |
|---|
| 87 | | - int mirror_count; |
|---|
| 88 | 79 | |
|---|
| 89 | 80 | loff_t io_start; /* Start offset for I/O */ |
|---|
| 90 | 81 | ssize_t count, /* bytes actually processed */ |
|---|
| .. | .. |
|---|
| 103 | 94 | #define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */ |
|---|
| 104 | 95 | /* for read */ |
|---|
| 105 | 96 | #define NFS_ODIRECT_SHOULD_DIRTY (3) /* dirty user-space page after read */ |
|---|
| 106 | | - struct nfs_writeverf verf; /* unstable write verifier */ |
|---|
| 97 | +#define NFS_ODIRECT_DONE INT_MAX /* write verification failed */ |
|---|
| 107 | 98 | }; |
|---|
| 108 | 99 | |
|---|
| 109 | 100 | static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops; |
|---|
| .. | .. |
|---|
| 126 | 117 | const struct nfs_pgio_header *hdr, |
|---|
| 127 | 118 | ssize_t dreq_len) |
|---|
| 128 | 119 | { |
|---|
| 129 | | - struct nfs_direct_mirror *mirror = &dreq->mirrors[hdr->pgio_mirror_idx]; |
|---|
| 130 | | - |
|---|
| 131 | 120 | if (!(test_bit(NFS_IOHDR_ERROR, &hdr->flags) || |
|---|
| 132 | 121 | test_bit(NFS_IOHDR_EOF, &hdr->flags))) |
|---|
| 133 | 122 | return; |
|---|
| .. | .. |
|---|
| 141 | 130 | else /* Clear outstanding error if this is EOF */ |
|---|
| 142 | 131 | dreq->error = 0; |
|---|
| 143 | 132 | } |
|---|
| 144 | | - if (mirror->count > dreq_len) |
|---|
| 145 | | - mirror->count = dreq_len; |
|---|
| 146 | 133 | } |
|---|
| 147 | 134 | |
|---|
| 148 | 135 | static void |
|---|
| 149 | 136 | nfs_direct_count_bytes(struct nfs_direct_req *dreq, |
|---|
| 150 | 137 | const struct nfs_pgio_header *hdr) |
|---|
| 151 | 138 | { |
|---|
| 152 | | - struct nfs_direct_mirror *mirror = &dreq->mirrors[hdr->pgio_mirror_idx]; |
|---|
| 153 | 139 | loff_t hdr_end = hdr->io_start + hdr->good_bytes; |
|---|
| 154 | 140 | ssize_t dreq_len = 0; |
|---|
| 155 | 141 | |
|---|
| .. | .. |
|---|
| 161 | 147 | if (dreq_len > dreq->max_count) |
|---|
| 162 | 148 | dreq_len = dreq->max_count; |
|---|
| 163 | 149 | |
|---|
| 164 | | - if (mirror->count < dreq_len) |
|---|
| 165 | | - mirror->count = dreq_len; |
|---|
| 166 | 150 | if (dreq->count < dreq_len) |
|---|
| 167 | 151 | dreq->count = dreq_len; |
|---|
| 168 | | -} |
|---|
| 169 | | - |
|---|
| 170 | | -/* |
|---|
| 171 | | - * nfs_direct_select_verf - select the right verifier |
|---|
| 172 | | - * @dreq - direct request possibly spanning multiple servers |
|---|
| 173 | | - * @ds_clp - nfs_client of data server or NULL if MDS / non-pnfs |
|---|
| 174 | | - * @commit_idx - commit bucket index for the DS |
|---|
| 175 | | - * |
|---|
| 176 | | - * returns the correct verifier to use given the role of the server |
|---|
| 177 | | - */ |
|---|
| 178 | | -static struct nfs_writeverf * |
|---|
| 179 | | -nfs_direct_select_verf(struct nfs_direct_req *dreq, |
|---|
| 180 | | - struct nfs_client *ds_clp, |
|---|
| 181 | | - int commit_idx) |
|---|
| 182 | | -{ |
|---|
| 183 | | - struct nfs_writeverf *verfp = &dreq->verf; |
|---|
| 184 | | - |
|---|
| 185 | | -#ifdef CONFIG_NFS_V4_1 |
|---|
| 186 | | - /* |
|---|
| 187 | | - * pNFS is in use, use the DS verf except commit_through_mds is set |
|---|
| 188 | | - * for layout segment where nbuckets is zero. |
|---|
| 189 | | - */ |
|---|
| 190 | | - if (ds_clp && dreq->ds_cinfo.nbuckets > 0) { |
|---|
| 191 | | - if (commit_idx >= 0 && commit_idx < dreq->ds_cinfo.nbuckets) |
|---|
| 192 | | - verfp = &dreq->ds_cinfo.buckets[commit_idx].direct_verf; |
|---|
| 193 | | - else |
|---|
| 194 | | - WARN_ON_ONCE(1); |
|---|
| 195 | | - } |
|---|
| 196 | | -#endif |
|---|
| 197 | | - return verfp; |
|---|
| 198 | | -} |
|---|
| 199 | | - |
|---|
| 200 | | - |
|---|
| 201 | | -/* |
|---|
| 202 | | - * nfs_direct_set_hdr_verf - set the write/commit verifier |
|---|
| 203 | | - * @dreq - direct request possibly spanning multiple servers |
|---|
| 204 | | - * @hdr - pageio header to validate against previously seen verfs |
|---|
| 205 | | - * |
|---|
| 206 | | - * Set the server's (MDS or DS) "seen" verifier |
|---|
| 207 | | - */ |
|---|
| 208 | | -static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq, |
|---|
| 209 | | - struct nfs_pgio_header *hdr) |
|---|
| 210 | | -{ |
|---|
| 211 | | - struct nfs_writeverf *verfp; |
|---|
| 212 | | - |
|---|
| 213 | | - verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, hdr->ds_commit_idx); |
|---|
| 214 | | - WARN_ON_ONCE(verfp->committed >= 0); |
|---|
| 215 | | - memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); |
|---|
| 216 | | - WARN_ON_ONCE(verfp->committed < 0); |
|---|
| 217 | | -} |
|---|
| 218 | | - |
|---|
| 219 | | -static int nfs_direct_cmp_verf(const struct nfs_writeverf *v1, |
|---|
| 220 | | - const struct nfs_writeverf *v2) |
|---|
| 221 | | -{ |
|---|
| 222 | | - return nfs_write_verifier_cmp(&v1->verifier, &v2->verifier); |
|---|
| 223 | | -} |
|---|
| 224 | | - |
|---|
| 225 | | -/* |
|---|
| 226 | | - * nfs_direct_cmp_hdr_verf - compare verifier for pgio header |
|---|
| 227 | | - * @dreq - direct request possibly spanning multiple servers |
|---|
| 228 | | - * @hdr - pageio header to validate against previously seen verf |
|---|
| 229 | | - * |
|---|
| 230 | | - * set the server's "seen" verf if not initialized. |
|---|
| 231 | | - * returns result of comparison between @hdr->verf and the "seen" |
|---|
| 232 | | - * verf of the server used by @hdr (DS or MDS) |
|---|
| 233 | | - */ |
|---|
| 234 | | -static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq, |
|---|
| 235 | | - struct nfs_pgio_header *hdr) |
|---|
| 236 | | -{ |
|---|
| 237 | | - struct nfs_writeverf *verfp; |
|---|
| 238 | | - |
|---|
| 239 | | - verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, hdr->ds_commit_idx); |
|---|
| 240 | | - if (verfp->committed < 0) { |
|---|
| 241 | | - nfs_direct_set_hdr_verf(dreq, hdr); |
|---|
| 242 | | - return 0; |
|---|
| 243 | | - } |
|---|
| 244 | | - return nfs_direct_cmp_verf(verfp, &hdr->verf); |
|---|
| 245 | | -} |
|---|
| 246 | | - |
|---|
| 247 | | -/* |
|---|
| 248 | | - * nfs_direct_cmp_commit_data_verf - compare verifier for commit data |
|---|
| 249 | | - * @dreq - direct request possibly spanning multiple servers |
|---|
| 250 | | - * @data - commit data to validate against previously seen verf |
|---|
| 251 | | - * |
|---|
| 252 | | - * returns result of comparison between @data->verf and the verf of |
|---|
| 253 | | - * the server used by @data (DS or MDS) |
|---|
| 254 | | - */ |
|---|
| 255 | | -static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq, |
|---|
| 256 | | - struct nfs_commit_data *data) |
|---|
| 257 | | -{ |
|---|
| 258 | | - struct nfs_writeverf *verfp; |
|---|
| 259 | | - |
|---|
| 260 | | - verfp = nfs_direct_select_verf(dreq, data->ds_clp, |
|---|
| 261 | | - data->ds_commit_index); |
|---|
| 262 | | - |
|---|
| 263 | | - /* verifier not set so always fail */ |
|---|
| 264 | | - if (verfp->committed < 0 || data->res.verf->committed <= NFS_UNSTABLE) |
|---|
| 265 | | - return 1; |
|---|
| 266 | | - |
|---|
| 267 | | - return nfs_direct_cmp_verf(verfp, data->res.verf); |
|---|
| 268 | 152 | } |
|---|
| 269 | 153 | |
|---|
| 270 | 154 | /** |
|---|
| .. | .. |
|---|
| 288 | 172 | VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE); |
|---|
| 289 | 173 | |
|---|
| 290 | 174 | if (iov_iter_rw(iter) == READ) |
|---|
| 291 | | - return nfs_file_direct_read(iocb, iter); |
|---|
| 292 | | - return nfs_file_direct_write(iocb, iter); |
|---|
| 175 | + return nfs_file_direct_read(iocb, iter, true); |
|---|
| 176 | + return nfs_file_direct_write(iocb, iter, true); |
|---|
| 293 | 177 | } |
|---|
| 294 | 178 | |
|---|
| 295 | 179 | static void nfs_direct_release_pages(struct page **pages, unsigned int npages) |
|---|
| .. | .. |
|---|
| 309 | 193 | cinfo->completion_ops = &nfs_direct_commit_completion_ops; |
|---|
| 310 | 194 | } |
|---|
| 311 | 195 | |
|---|
| 312 | | -static inline void nfs_direct_setup_mirroring(struct nfs_direct_req *dreq, |
|---|
| 313 | | - struct nfs_pageio_descriptor *pgio, |
|---|
| 314 | | - struct nfs_page *req) |
|---|
| 315 | | -{ |
|---|
| 316 | | - int mirror_count = 1; |
|---|
| 317 | | - |
|---|
| 318 | | - if (pgio->pg_ops->pg_get_mirror_count) |
|---|
| 319 | | - mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req); |
|---|
| 320 | | - |
|---|
| 321 | | - dreq->mirror_count = mirror_count; |
|---|
| 322 | | -} |
|---|
| 323 | | - |
|---|
| 324 | 196 | static inline struct nfs_direct_req *nfs_direct_req_alloc(void) |
|---|
| 325 | 197 | { |
|---|
| 326 | 198 | struct nfs_direct_req *dreq; |
|---|
| .. | .. |
|---|
| 333 | 205 | kref_get(&dreq->kref); |
|---|
| 334 | 206 | init_completion(&dreq->completion); |
|---|
| 335 | 207 | INIT_LIST_HEAD(&dreq->mds_cinfo.list); |
|---|
| 336 | | - dreq->verf.committed = NFS_INVALID_STABLE_HOW; /* not set yet */ |
|---|
| 208 | + pnfs_init_ds_commit_info(&dreq->ds_cinfo); |
|---|
| 337 | 209 | INIT_WORK(&dreq->work, nfs_direct_write_schedule_work); |
|---|
| 338 | | - dreq->mirror_count = 1; |
|---|
| 339 | 210 | spin_lock_init(&dreq->lock); |
|---|
| 340 | 211 | |
|---|
| 341 | 212 | return dreq; |
|---|
| .. | .. |
|---|
| 345 | 216 | { |
|---|
| 346 | 217 | struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref); |
|---|
| 347 | 218 | |
|---|
| 348 | | - nfs_free_pnfs_ds_cinfo(&dreq->ds_cinfo); |
|---|
| 219 | + pnfs_release_ds_info(&dreq->ds_cinfo, dreq->inode); |
|---|
| 349 | 220 | if (dreq->l_ctx != NULL) |
|---|
| 350 | 221 | nfs_put_lock_context(dreq->l_ctx); |
|---|
| 351 | 222 | if (dreq->ctx != NULL) |
|---|
| .. | .. |
|---|
| 507 | 378 | struct nfs_page *req; |
|---|
| 508 | 379 | unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); |
|---|
| 509 | 380 | /* XXX do we need to do the eof zeroing found in async_filler? */ |
|---|
| 510 | | - req = nfs_create_request(dreq->ctx, pagevec[i], NULL, |
|---|
| 381 | + req = nfs_create_request(dreq->ctx, pagevec[i], |
|---|
| 511 | 382 | pgbase, req_len); |
|---|
| 512 | 383 | if (IS_ERR(req)) { |
|---|
| 513 | 384 | result = PTR_ERR(req); |
|---|
| .. | .. |
|---|
| 553 | 424 | * nfs_file_direct_read - file direct read operation for NFS files |
|---|
| 554 | 425 | * @iocb: target I/O control block |
|---|
| 555 | 426 | * @iter: vector of user buffers into which to read data |
|---|
| 427 | + * @swap: flag indicating this is swap IO, not O_DIRECT IO |
|---|
| 556 | 428 | * |
|---|
| 557 | 429 | * We use this function for direct reads instead of calling |
|---|
| 558 | 430 | * generic_file_aio_read() in order to avoid gfar's check to see if |
|---|
| .. | .. |
|---|
| 568 | 440 | * client must read the updated atime from the server back into its |
|---|
| 569 | 441 | * cache. |
|---|
| 570 | 442 | */ |
|---|
| 571 | | -ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) |
|---|
| 443 | +ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, |
|---|
| 444 | + bool swap) |
|---|
| 572 | 445 | { |
|---|
| 573 | 446 | struct file *file = iocb->ki_filp; |
|---|
| 574 | 447 | struct address_space *mapping = file->f_mapping; |
|---|
| 575 | 448 | struct inode *inode = mapping->host; |
|---|
| 576 | 449 | struct nfs_direct_req *dreq; |
|---|
| 577 | 450 | struct nfs_lock_context *l_ctx; |
|---|
| 578 | | - ssize_t result = -EINVAL, requested; |
|---|
| 451 | + ssize_t result, requested; |
|---|
| 579 | 452 | size_t count = iov_iter_count(iter); |
|---|
| 580 | 453 | nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count); |
|---|
| 581 | 454 | |
|---|
| .. | .. |
|---|
| 610 | 483 | if (iter_is_iovec(iter)) |
|---|
| 611 | 484 | dreq->flags = NFS_ODIRECT_SHOULD_DIRTY; |
|---|
| 612 | 485 | |
|---|
| 613 | | - nfs_start_io_direct(inode); |
|---|
| 486 | + if (!swap) |
|---|
| 487 | + nfs_start_io_direct(inode); |
|---|
| 614 | 488 | |
|---|
| 615 | 489 | NFS_I(inode)->read_io += count; |
|---|
| 616 | 490 | requested = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos); |
|---|
| 617 | 491 | |
|---|
| 618 | | - nfs_end_io_direct(inode); |
|---|
| 492 | + if (!swap) |
|---|
| 493 | + nfs_end_io_direct(inode); |
|---|
| 619 | 494 | |
|---|
| 620 | 495 | if (requested > 0) { |
|---|
| 621 | 496 | result = nfs_direct_wait(dreq); |
|---|
| .. | .. |
|---|
| 635 | 510 | } |
|---|
| 636 | 511 | |
|---|
| 637 | 512 | static void |
|---|
| 513 | +nfs_direct_join_group(struct list_head *list, struct inode *inode) |
|---|
| 514 | +{ |
|---|
| 515 | + struct nfs_page *req, *next; |
|---|
| 516 | + |
|---|
| 517 | + list_for_each_entry(req, list, wb_list) { |
|---|
| 518 | + if (req->wb_head != req || req->wb_this_page == req) |
|---|
| 519 | + continue; |
|---|
| 520 | + for (next = req->wb_this_page; |
|---|
| 521 | + next != req->wb_head; |
|---|
| 522 | + next = next->wb_this_page) { |
|---|
| 523 | + nfs_list_remove_request(next); |
|---|
| 524 | + nfs_release_request(next); |
|---|
| 525 | + } |
|---|
| 526 | + nfs_join_page_group(req, inode); |
|---|
| 527 | + } |
|---|
| 528 | +} |
|---|
| 529 | + |
|---|
| 530 | +static void |
|---|
| 638 | 531 | nfs_direct_write_scan_commit_list(struct inode *inode, |
|---|
| 639 | 532 | struct list_head *list, |
|---|
| 640 | 533 | struct nfs_commit_info *cinfo) |
|---|
| 641 | 534 | { |
|---|
| 642 | 535 | mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); |
|---|
| 643 | | -#ifdef CONFIG_NFS_V4_1 |
|---|
| 644 | | - if (cinfo->ds != NULL && cinfo->ds->nwritten != 0) |
|---|
| 645 | | - NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo); |
|---|
| 646 | | -#endif |
|---|
| 536 | + pnfs_recover_commit_reqs(list, cinfo); |
|---|
| 647 | 537 | nfs_scan_commit_list(&cinfo->mds->list, list, cinfo, 0); |
|---|
| 648 | 538 | mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); |
|---|
| 649 | 539 | } |
|---|
| .. | .. |
|---|
| 655 | 545 | LIST_HEAD(reqs); |
|---|
| 656 | 546 | struct nfs_commit_info cinfo; |
|---|
| 657 | 547 | LIST_HEAD(failed); |
|---|
| 658 | | - int i; |
|---|
| 659 | 548 | |
|---|
| 660 | 549 | nfs_init_cinfo_from_dreq(&cinfo, dreq); |
|---|
| 661 | 550 | nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo); |
|---|
| 551 | + |
|---|
| 552 | + nfs_direct_join_group(&reqs, dreq->inode); |
|---|
| 662 | 553 | |
|---|
| 663 | 554 | dreq->count = 0; |
|---|
| 664 | 555 | dreq->max_count = 0; |
|---|
| 665 | 556 | list_for_each_entry(req, &reqs, wb_list) |
|---|
| 666 | 557 | dreq->max_count += req->wb_bytes; |
|---|
| 667 | | - dreq->verf.committed = NFS_INVALID_STABLE_HOW; |
|---|
| 668 | 558 | nfs_clear_pnfs_ds_commit_verifiers(&dreq->ds_cinfo); |
|---|
| 669 | | - for (i = 0; i < dreq->mirror_count; i++) |
|---|
| 670 | | - dreq->mirrors[i].count = 0; |
|---|
| 671 | 559 | get_dreq(dreq); |
|---|
| 672 | 560 | |
|---|
| 673 | 561 | nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false, |
|---|
| 674 | 562 | &nfs_direct_write_completion_ops); |
|---|
| 675 | 563 | desc.pg_dreq = dreq; |
|---|
| 676 | 564 | |
|---|
| 677 | | - req = nfs_list_entry(reqs.next); |
|---|
| 678 | | - nfs_direct_setup_mirroring(dreq, &desc, req); |
|---|
| 679 | | - if (desc.pg_error < 0) { |
|---|
| 680 | | - list_splice_init(&reqs, &failed); |
|---|
| 681 | | - goto out_failed; |
|---|
| 682 | | - } |
|---|
| 683 | | - |
|---|
| 684 | 565 | list_for_each_entry_safe(req, tmp, &reqs, wb_list) { |
|---|
| 566 | + /* Bump the transmission count */ |
|---|
| 567 | + req->wb_nio++; |
|---|
| 685 | 568 | if (!nfs_pageio_add_request(&desc, req)) { |
|---|
| 686 | 569 | nfs_list_move_request(req, &failed); |
|---|
| 687 | 570 | spin_lock(&cinfo.inode->i_lock); |
|---|
| .. | .. |
|---|
| 696 | 579 | } |
|---|
| 697 | 580 | nfs_pageio_complete(&desc); |
|---|
| 698 | 581 | |
|---|
| 699 | | -out_failed: |
|---|
| 700 | 582 | while (!list_empty(&failed)) { |
|---|
| 701 | 583 | req = nfs_list_entry(failed.next); |
|---|
| 702 | 584 | nfs_list_remove_request(req); |
|---|
| .. | .. |
|---|
| 709 | 591 | |
|---|
| 710 | 592 | static void nfs_direct_commit_complete(struct nfs_commit_data *data) |
|---|
| 711 | 593 | { |
|---|
| 594 | + const struct nfs_writeverf *verf = data->res.verf; |
|---|
| 712 | 595 | struct nfs_direct_req *dreq = data->dreq; |
|---|
| 713 | 596 | struct nfs_commit_info cinfo; |
|---|
| 714 | 597 | struct nfs_page *req; |
|---|
| 715 | 598 | int status = data->task.tk_status; |
|---|
| 716 | 599 | |
|---|
| 600 | + if (status < 0) { |
|---|
| 601 | + /* Errors in commit are fatal */ |
|---|
| 602 | + dreq->error = status; |
|---|
| 603 | + dreq->max_count = 0; |
|---|
| 604 | + dreq->count = 0; |
|---|
| 605 | + dreq->flags = NFS_ODIRECT_DONE; |
|---|
| 606 | + } else if (dreq->flags == NFS_ODIRECT_DONE) |
|---|
| 607 | + status = dreq->error; |
|---|
| 608 | + |
|---|
| 717 | 609 | nfs_init_cinfo_from_dreq(&cinfo, dreq); |
|---|
| 718 | | - if (status < 0 || nfs_direct_cmp_commit_data_verf(dreq, data)) |
|---|
| 719 | | - dreq->flags = NFS_ODIRECT_RESCHED_WRITES; |
|---|
| 720 | 610 | |
|---|
| 721 | 611 | while (!list_empty(&data->pages)) { |
|---|
| 722 | 612 | req = nfs_list_entry(data->pages.next); |
|---|
| 723 | 613 | nfs_list_remove_request(req); |
|---|
| 724 | | - if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) { |
|---|
| 725 | | - /* Note the rewrite will go through mds */ |
|---|
| 614 | + if (status >= 0 && !nfs_write_match_verf(verf, req)) { |
|---|
| 615 | + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; |
|---|
| 616 | + /* |
|---|
| 617 | + * Despite the reboot, the write was successful, |
|---|
| 618 | + * so reset wb_nio. |
|---|
| 619 | + */ |
|---|
| 620 | + req->wb_nio = 0; |
|---|
| 726 | 621 | nfs_mark_request_commit(req, NULL, &cinfo, 0); |
|---|
| 727 | | - } else |
|---|
| 622 | + } else /* Error or match */ |
|---|
| 728 | 623 | nfs_release_request(req); |
|---|
| 729 | 624 | nfs_unlock_and_release_request(req); |
|---|
| 730 | 625 | } |
|---|
| 731 | 626 | |
|---|
| 732 | | - if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) |
|---|
| 627 | + if (nfs_commit_end(cinfo.mds)) |
|---|
| 733 | 628 | nfs_direct_write_complete(dreq); |
|---|
| 734 | 629 | } |
|---|
| 735 | 630 | |
|---|
| .. | .. |
|---|
| 739 | 634 | struct nfs_direct_req *dreq = cinfo->dreq; |
|---|
| 740 | 635 | |
|---|
| 741 | 636 | spin_lock(&dreq->lock); |
|---|
| 742 | | - dreq->flags = NFS_ODIRECT_RESCHED_WRITES; |
|---|
| 637 | + if (dreq->flags != NFS_ODIRECT_DONE) |
|---|
| 638 | + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; |
|---|
| 743 | 639 | spin_unlock(&dreq->lock); |
|---|
| 744 | 640 | nfs_mark_request_commit(req, NULL, cinfo, 0); |
|---|
| 745 | 641 | } |
|---|
| .. | .. |
|---|
| 762 | 658 | nfs_direct_write_reschedule(dreq); |
|---|
| 763 | 659 | } |
|---|
| 764 | 660 | |
|---|
| 661 | +static void nfs_direct_write_clear_reqs(struct nfs_direct_req *dreq) |
|---|
| 662 | +{ |
|---|
| 663 | + struct nfs_commit_info cinfo; |
|---|
| 664 | + struct nfs_page *req; |
|---|
| 665 | + LIST_HEAD(reqs); |
|---|
| 666 | + |
|---|
| 667 | + nfs_init_cinfo_from_dreq(&cinfo, dreq); |
|---|
| 668 | + nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo); |
|---|
| 669 | + |
|---|
| 670 | + while (!list_empty(&reqs)) { |
|---|
| 671 | + req = nfs_list_entry(reqs.next); |
|---|
| 672 | + nfs_list_remove_request(req); |
|---|
| 673 | + nfs_release_request(req); |
|---|
| 674 | + nfs_unlock_and_release_request(req); |
|---|
| 675 | + } |
|---|
| 676 | +} |
|---|
| 677 | + |
|---|
| 765 | 678 | static void nfs_direct_write_schedule_work(struct work_struct *work) |
|---|
| 766 | 679 | { |
|---|
| 767 | 680 | struct nfs_direct_req *dreq = container_of(work, struct nfs_direct_req, work); |
|---|
| .. | .. |
|---|
| 776 | 689 | nfs_direct_write_reschedule(dreq); |
|---|
| 777 | 690 | break; |
|---|
| 778 | 691 | default: |
|---|
| 692 | + nfs_direct_write_clear_reqs(dreq); |
|---|
| 779 | 693 | nfs_zap_mapping(dreq->inode, dreq->inode->i_mapping); |
|---|
| 780 | 694 | nfs_direct_complete(dreq); |
|---|
| 781 | 695 | } |
|---|
| .. | .. |
|---|
| 790 | 704 | { |
|---|
| 791 | 705 | struct nfs_direct_req *dreq = hdr->dreq; |
|---|
| 792 | 706 | struct nfs_commit_info cinfo; |
|---|
| 793 | | - bool request_commit = false; |
|---|
| 794 | 707 | struct nfs_page *req = nfs_list_entry(hdr->pages.next); |
|---|
| 708 | + int flags = NFS_ODIRECT_DONE; |
|---|
| 795 | 709 | |
|---|
| 796 | 710 | nfs_init_cinfo_from_dreq(&cinfo, dreq); |
|---|
| 797 | 711 | |
|---|
| .. | .. |
|---|
| 802 | 716 | } |
|---|
| 803 | 717 | |
|---|
| 804 | 718 | nfs_direct_count_bytes(dreq, hdr); |
|---|
| 805 | | - if (hdr->good_bytes != 0) { |
|---|
| 806 | | - if (nfs_write_need_commit(hdr)) { |
|---|
| 807 | | - if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) |
|---|
| 808 | | - request_commit = true; |
|---|
| 809 | | - else if (dreq->flags == 0) { |
|---|
| 810 | | - nfs_direct_set_hdr_verf(dreq, hdr); |
|---|
| 811 | | - request_commit = true; |
|---|
| 812 | | - dreq->flags = NFS_ODIRECT_DO_COMMIT; |
|---|
| 813 | | - } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) { |
|---|
| 814 | | - request_commit = true; |
|---|
| 815 | | - if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr)) |
|---|
| 816 | | - dreq->flags = |
|---|
| 817 | | - NFS_ODIRECT_RESCHED_WRITES; |
|---|
| 818 | | - } |
|---|
| 819 | | - } |
|---|
| 719 | + if (hdr->good_bytes != 0 && nfs_write_need_commit(hdr)) { |
|---|
| 720 | + if (!dreq->flags) |
|---|
| 721 | + dreq->flags = NFS_ODIRECT_DO_COMMIT; |
|---|
| 722 | + flags = dreq->flags; |
|---|
| 820 | 723 | } |
|---|
| 821 | 724 | spin_unlock(&dreq->lock); |
|---|
| 822 | 725 | |
|---|
| .. | .. |
|---|
| 824 | 727 | |
|---|
| 825 | 728 | req = nfs_list_entry(hdr->pages.next); |
|---|
| 826 | 729 | nfs_list_remove_request(req); |
|---|
| 827 | | - if (request_commit) { |
|---|
| 730 | + if (flags == NFS_ODIRECT_DO_COMMIT) { |
|---|
| 828 | 731 | kref_get(&req->wb_kref); |
|---|
| 732 | + memcpy(&req->wb_verf, &hdr->verf.verifier, |
|---|
| 733 | + sizeof(req->wb_verf)); |
|---|
| 829 | 734 | nfs_mark_request_commit(req, hdr->lseg, &cinfo, |
|---|
| 830 | 735 | hdr->ds_commit_idx); |
|---|
| 736 | + } else if (flags == NFS_ODIRECT_RESCHED_WRITES) { |
|---|
| 737 | + kref_get(&req->wb_kref); |
|---|
| 738 | + nfs_mark_request_commit(req, NULL, &cinfo, 0); |
|---|
| 831 | 739 | } |
|---|
| 832 | 740 | nfs_unlock_and_release_request(req); |
|---|
| 833 | 741 | } |
|---|
| .. | .. |
|---|
| 858 | 766 | dreq->flags = NFS_ODIRECT_RESCHED_WRITES; |
|---|
| 859 | 767 | /* fake unstable write to let common nfs resend pages */ |
|---|
| 860 | 768 | hdr->verf.committed = NFS_UNSTABLE; |
|---|
| 861 | | - hdr->good_bytes = hdr->args.count; |
|---|
| 769 | + hdr->good_bytes = hdr->args.offset + hdr->args.count - |
|---|
| 770 | + hdr->io_start; |
|---|
| 862 | 771 | } |
|---|
| 863 | 772 | spin_unlock(&dreq->lock); |
|---|
| 864 | 773 | } |
|---|
| .. | .. |
|---|
| 884 | 793 | */ |
|---|
| 885 | 794 | static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, |
|---|
| 886 | 795 | struct iov_iter *iter, |
|---|
| 887 | | - loff_t pos) |
|---|
| 796 | + loff_t pos, int ioflags) |
|---|
| 888 | 797 | { |
|---|
| 889 | 798 | struct nfs_pageio_descriptor desc; |
|---|
| 890 | 799 | struct inode *inode = dreq->inode; |
|---|
| .. | .. |
|---|
| 892 | 801 | size_t requested_bytes = 0; |
|---|
| 893 | 802 | size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE); |
|---|
| 894 | 803 | |
|---|
| 895 | | - nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false, |
|---|
| 804 | + nfs_pageio_init_write(&desc, inode, ioflags, false, |
|---|
| 896 | 805 | &nfs_direct_write_completion_ops); |
|---|
| 897 | 806 | desc.pg_dreq = dreq; |
|---|
| 898 | 807 | get_dreq(dreq); |
|---|
| .. | .. |
|---|
| 917 | 826 | struct nfs_page *req; |
|---|
| 918 | 827 | unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); |
|---|
| 919 | 828 | |
|---|
| 920 | | - req = nfs_create_request(dreq->ctx, pagevec[i], NULL, |
|---|
| 829 | + req = nfs_create_request(dreq->ctx, pagevec[i], |
|---|
| 921 | 830 | pgbase, req_len); |
|---|
| 922 | 831 | if (IS_ERR(req)) { |
|---|
| 923 | 832 | result = PTR_ERR(req); |
|---|
| 924 | 833 | break; |
|---|
| 925 | 834 | } |
|---|
| 926 | 835 | |
|---|
| 927 | | - nfs_direct_setup_mirroring(dreq, &desc, req); |
|---|
| 928 | 836 | if (desc.pg_error < 0) { |
|---|
| 929 | 837 | nfs_free_request(req); |
|---|
| 930 | 838 | result = desc.pg_error; |
|---|
| .. | .. |
|---|
| 971 | 879 | * nfs_file_direct_write - file direct write operation for NFS files |
|---|
| 972 | 880 | * @iocb: target I/O control block |
|---|
| 973 | 881 | * @iter: vector of user buffers from which to write data |
|---|
| 882 | + * @swap: flag indicating this is swap IO, not O_DIRECT IO |
|---|
| 974 | 883 | * |
|---|
| 975 | 884 | * We use this function for direct writes instead of calling |
|---|
| 976 | 885 | * generic_file_aio_write() in order to avoid taking the inode |
|---|
| .. | .. |
|---|
| 987 | 896 | * Note that O_APPEND is not supported for NFS direct writes, as there |
|---|
| 988 | 897 | * is no atomic O_APPEND write facility in the NFS protocol. |
|---|
| 989 | 898 | */ |
|---|
| 990 | | -ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) |
|---|
| 899 | +ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, |
|---|
| 900 | + bool swap) |
|---|
| 991 | 901 | { |
|---|
| 992 | | - ssize_t result = -EINVAL, requested; |
|---|
| 902 | + ssize_t result, requested; |
|---|
| 993 | 903 | size_t count; |
|---|
| 994 | 904 | struct file *file = iocb->ki_filp; |
|---|
| 995 | 905 | struct address_space *mapping = file->f_mapping; |
|---|
| .. | .. |
|---|
| 1001 | 911 | dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n", |
|---|
| 1002 | 912 | file, iov_iter_count(iter), (long long) iocb->ki_pos); |
|---|
| 1003 | 913 | |
|---|
| 1004 | | - result = generic_write_checks(iocb, iter); |
|---|
| 914 | + if (swap) |
|---|
| 915 | + /* bypass generic checks */ |
|---|
| 916 | + result = iov_iter_count(iter); |
|---|
| 917 | + else |
|---|
| 918 | + result = generic_write_checks(iocb, iter); |
|---|
| 1005 | 919 | if (result <= 0) |
|---|
| 1006 | 920 | return result; |
|---|
| 1007 | 921 | count = result; |
|---|
| .. | .. |
|---|
| 1030 | 944 | dreq->l_ctx = l_ctx; |
|---|
| 1031 | 945 | if (!is_sync_kiocb(iocb)) |
|---|
| 1032 | 946 | dreq->iocb = iocb; |
|---|
| 947 | + pnfs_init_ds_commit_info_ops(&dreq->ds_cinfo, inode); |
|---|
| 1033 | 948 | |
|---|
| 1034 | | - nfs_start_io_direct(inode); |
|---|
| 949 | + if (swap) { |
|---|
| 950 | + requested = nfs_direct_write_schedule_iovec(dreq, iter, pos, |
|---|
| 951 | + FLUSH_STABLE); |
|---|
| 952 | + } else { |
|---|
| 953 | + nfs_start_io_direct(inode); |
|---|
| 1035 | 954 | |
|---|
| 1036 | | - requested = nfs_direct_write_schedule_iovec(dreq, iter, pos); |
|---|
| 955 | + requested = nfs_direct_write_schedule_iovec(dreq, iter, pos, |
|---|
| 956 | + FLUSH_COND_STABLE); |
|---|
| 1037 | 957 | |
|---|
| 1038 | | - if (mapping->nrpages) { |
|---|
| 1039 | | - invalidate_inode_pages2_range(mapping, |
|---|
| 1040 | | - pos >> PAGE_SHIFT, end); |
|---|
| 958 | + if (mapping->nrpages) { |
|---|
| 959 | + invalidate_inode_pages2_range(mapping, |
|---|
| 960 | + pos >> PAGE_SHIFT, end); |
|---|
| 961 | + } |
|---|
| 962 | + |
|---|
| 963 | + nfs_end_io_direct(inode); |
|---|
| 1041 | 964 | } |
|---|
| 1042 | | - |
|---|
| 1043 | | - nfs_end_io_direct(inode); |
|---|
| 1044 | 965 | |
|---|
| 1045 | 966 | if (requested > 0) { |
|---|
| 1046 | 967 | result = nfs_direct_wait(dreq); |
|---|