| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * mm/truncate.c - code for taking down pages from address_spaces |
|---|
| 3 | 4 | * |
|---|
| .. | .. |
|---|
| 33 | 34 | static inline void __clear_shadow_entry(struct address_space *mapping, |
|---|
| 34 | 35 | pgoff_t index, void *entry) |
|---|
| 35 | 36 | { |
|---|
| 36 | | - struct radix_tree_node *node; |
|---|
| 37 | | - void **slot; |
|---|
| 37 | + XA_STATE(xas, &mapping->i_pages, index); |
|---|
| 38 | 38 | |
|---|
| 39 | | - if (!__radix_tree_lookup(&mapping->i_pages, index, &node, &slot)) |
|---|
| 39 | + xas_set_update(&xas, workingset_update_node); |
|---|
| 40 | + if (xas_load(&xas) != entry) |
|---|
| 40 | 41 | return; |
|---|
| 41 | | - if (*slot != entry) |
|---|
| 42 | | - return; |
|---|
| 43 | | - __radix_tree_replace(&mapping->i_pages, node, slot, NULL, |
|---|
| 44 | | - workingset_update_node); |
|---|
| 42 | + xas_store(&xas, NULL); |
|---|
| 45 | 43 | mapping->nrexceptional--; |
|---|
| 46 | 44 | } |
|---|
| 47 | 45 | |
|---|
| .. | .. |
|---|
| 70 | 68 | return; |
|---|
| 71 | 69 | |
|---|
| 72 | 70 | for (j = 0; j < pagevec_count(pvec); j++) |
|---|
| 73 | | - if (radix_tree_exceptional_entry(pvec->pages[j])) |
|---|
| 71 | + if (xa_is_value(pvec->pages[j])) |
|---|
| 74 | 72 | break; |
|---|
| 75 | 73 | |
|---|
| 76 | 74 | if (j == pagevec_count(pvec)) |
|---|
| .. | .. |
|---|
| 85 | 83 | struct page *page = pvec->pages[i]; |
|---|
| 86 | 84 | pgoff_t index = indices[i]; |
|---|
| 87 | 85 | |
|---|
| 88 | | - if (!radix_tree_exceptional_entry(page)) { |
|---|
| 86 | + if (!xa_is_value(page)) { |
|---|
| 89 | 87 | pvec->pages[j++] = page; |
|---|
| 90 | 88 | continue; |
|---|
| 91 | 89 | } |
|---|
| .. | .. |
|---|
| 170 | 168 | * becomes orphaned. It will be left on the LRU and may even be mapped into |
|---|
| 171 | 169 | * user pagetables if we're racing with filemap_fault(). |
|---|
| 172 | 170 | * |
|---|
| 173 | | - * We need to bale out if page->mapping is no longer equal to the original |
|---|
| 171 | + * We need to bail out if page->mapping is no longer equal to the original |
|---|
| 174 | 172 | * mapping. This happens a) when the VM reclaimed the page while we waited on |
|---|
| 175 | 173 | * its lock, b) when a concurrent invalidate_mapping_pages got there first and |
|---|
| 176 | 174 | * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space. |
|---|
| .. | .. |
|---|
| 181 | 179 | unmap_mapping_page(page); |
|---|
| 182 | 180 | |
|---|
| 183 | 181 | if (page_has_private(page)) |
|---|
| 184 | | - do_invalidatepage(page, 0, PAGE_SIZE); |
|---|
| 182 | + do_invalidatepage(page, 0, thp_size(page)); |
|---|
| 185 | 183 | |
|---|
| 186 | 184 | /* |
|---|
| 187 | 185 | * Some filesystems seem to re-dirty the page even after |
|---|
| .. | .. |
|---|
| 344 | 342 | if (index >= end) |
|---|
| 345 | 343 | break; |
|---|
| 346 | 344 | |
|---|
| 347 | | - if (radix_tree_exceptional_entry(page)) |
|---|
| 345 | + if (xa_is_value(page)) |
|---|
| 348 | 346 | continue; |
|---|
| 349 | 347 | |
|---|
| 350 | 348 | if (!trylock_page(page)) |
|---|
| .. | .. |
|---|
| 439 | 437 | break; |
|---|
| 440 | 438 | } |
|---|
| 441 | 439 | |
|---|
| 442 | | - if (radix_tree_exceptional_entry(page)) |
|---|
| 440 | + if (xa_is_value(page)) |
|---|
| 443 | 441 | continue; |
|---|
| 444 | 442 | |
|---|
| 445 | 443 | lock_page(page); |
|---|
| .. | .. |
|---|
| 527 | 525 | } |
|---|
| 528 | 526 | EXPORT_SYMBOL(truncate_inode_pages_final); |
|---|
| 529 | 527 | |
|---|
| 530 | | -/** |
|---|
| 531 | | - * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode |
|---|
| 532 | | - * @mapping: the address_space which holds the pages to invalidate |
|---|
| 533 | | - * @start: the offset 'from' which to invalidate |
|---|
| 534 | | - * @end: the offset 'to' which to invalidate (inclusive) |
|---|
| 535 | | - * |
|---|
| 536 | | - * This function only removes the unlocked pages, if you want to |
|---|
| 537 | | - * remove all the pages of one inode, you must call truncate_inode_pages. |
|---|
| 538 | | - * |
|---|
| 539 | | - * invalidate_mapping_pages() will not block on IO activity. It will not |
|---|
| 540 | | - * invalidate pages which are dirty, locked, under writeback or mapped into |
|---|
| 541 | | - * pagetables. |
|---|
| 542 | | - */ |
|---|
| 543 | | -unsigned long invalidate_mapping_pages(struct address_space *mapping, |
|---|
| 544 | | - pgoff_t start, pgoff_t end) |
|---|
| 528 | +static unsigned long __invalidate_mapping_pages(struct address_space *mapping, |
|---|
| 529 | + pgoff_t start, pgoff_t end, unsigned long *nr_pagevec) |
|---|
| 545 | 530 | { |
|---|
| 546 | 531 | pgoff_t indices[PAGEVEC_SIZE]; |
|---|
| 547 | 532 | struct pagevec pvec; |
|---|
| .. | .. |
|---|
| 562 | 547 | if (index > end) |
|---|
| 563 | 548 | break; |
|---|
| 564 | 549 | |
|---|
| 565 | | - if (radix_tree_exceptional_entry(page)) { |
|---|
| 550 | + if (xa_is_value(page)) { |
|---|
| 566 | 551 | invalidate_exceptional_entry(mapping, index, |
|---|
| 567 | 552 | page); |
|---|
| 568 | 553 | continue; |
|---|
| .. | .. |
|---|
| 589 | 574 | unlock_page(page); |
|---|
| 590 | 575 | continue; |
|---|
| 591 | 576 | } |
|---|
| 577 | + |
|---|
| 578 | + /* Take a pin outside pagevec */ |
|---|
| 579 | + get_page(page); |
|---|
| 580 | + |
|---|
| 581 | + /* |
|---|
| 582 | + * Drop extra pins before trying to invalidate |
|---|
| 583 | + * the huge page. |
|---|
| 584 | + */ |
|---|
| 585 | + pagevec_remove_exceptionals(&pvec); |
|---|
| 586 | + pagevec_release(&pvec); |
|---|
| 592 | 587 | } |
|---|
| 593 | 588 | |
|---|
| 594 | 589 | ret = invalidate_inode_page(page); |
|---|
| .. | .. |
|---|
| 597 | 592 | * Invalidation is a hint that the page is no longer |
|---|
| 598 | 593 | * of interest and try to speed up its reclaim. |
|---|
| 599 | 594 | */ |
|---|
| 600 | | - if (!ret) |
|---|
| 595 | + if (!ret) { |
|---|
| 601 | 596 | deactivate_file_page(page); |
|---|
| 597 | + /* It is likely on the pagevec of a remote CPU */ |
|---|
| 598 | + if (nr_pagevec) |
|---|
| 599 | + (*nr_pagevec)++; |
|---|
| 600 | + } |
|---|
| 601 | + |
|---|
| 602 | + if (PageTransHuge(page)) |
|---|
| 603 | + put_page(page); |
|---|
| 602 | 604 | count += ret; |
|---|
| 603 | 605 | } |
|---|
| 604 | 606 | pagevec_remove_exceptionals(&pvec); |
|---|
| .. | .. |
|---|
| 608 | 610 | } |
|---|
| 609 | 611 | return count; |
|---|
| 610 | 612 | } |
|---|
| 613 | + |
|---|
| 614 | +/** |
|---|
| 615 | + * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode |
|---|
| 616 | + * @mapping: the address_space which holds the pages to invalidate |
|---|
| 617 | + * @start: the offset 'from' which to invalidate |
|---|
| 618 | + * @end: the offset 'to' which to invalidate (inclusive) |
|---|
| 619 | + * |
|---|
| 620 | + * This function only removes the unlocked pages, if you want to |
|---|
| 621 | + * remove all the pages of one inode, you must call truncate_inode_pages. |
|---|
| 622 | + * |
|---|
| 623 | + * invalidate_mapping_pages() will not block on IO activity. It will not |
|---|
| 624 | + * invalidate pages which are dirty, locked, under writeback or mapped into |
|---|
| 625 | + * pagetables. |
|---|
| 626 | + * |
|---|
| 627 | + * Return: the number of the pages that were invalidated |
|---|
| 628 | + */ |
|---|
| 629 | +unsigned long invalidate_mapping_pages(struct address_space *mapping, |
|---|
| 630 | + pgoff_t start, pgoff_t end) |
|---|
| 631 | +{ |
|---|
| 632 | + return __invalidate_mapping_pages(mapping, start, end, NULL); |
|---|
| 633 | +} |
|---|
| 611 | 634 | EXPORT_SYMBOL(invalidate_mapping_pages); |
|---|
| 635 | + |
|---|
| 636 | +/** |
|---|
| 637 | + * This helper is similar with the above one, except that it accounts for pages |
|---|
| 638 | + * that are likely on a pagevec and count them in @nr_pagevec, which will used by |
|---|
| 639 | + * the caller. |
|---|
| 640 | + */ |
|---|
| 641 | +void invalidate_mapping_pagevec(struct address_space *mapping, |
|---|
| 642 | + pgoff_t start, pgoff_t end, unsigned long *nr_pagevec) |
|---|
| 643 | +{ |
|---|
| 644 | + __invalidate_mapping_pages(mapping, start, end, nr_pagevec); |
|---|
| 645 | +} |
|---|
| 612 | 646 | |
|---|
| 613 | 647 | /* |
|---|
| 614 | 648 | * This is like invalidate_complete_page(), except it ignores the page's |
|---|
| .. | .. |
|---|
| 664 | 698 | * Any pages which are found to be mapped into pagetables are unmapped prior to |
|---|
| 665 | 699 | * invalidation. |
|---|
| 666 | 700 | * |
|---|
| 667 | | - * Returns -EBUSY if any pages could not be invalidated. |
|---|
| 701 | + * Return: -EBUSY if any pages could not be invalidated. |
|---|
| 668 | 702 | */ |
|---|
| 669 | 703 | int invalidate_inode_pages2_range(struct address_space *mapping, |
|---|
| 670 | 704 | pgoff_t start, pgoff_t end) |
|---|
| .. | .. |
|---|
| 693 | 727 | if (index > end) |
|---|
| 694 | 728 | break; |
|---|
| 695 | 729 | |
|---|
| 696 | | - if (radix_tree_exceptional_entry(page)) { |
|---|
| 730 | + if (xa_is_value(page)) { |
|---|
| 697 | 731 | if (!invalidate_exceptional_entry2(mapping, |
|---|
| 698 | 732 | index, page)) |
|---|
| 699 | 733 | ret = -EBUSY; |
|---|
| .. | .. |
|---|
| 737 | 771 | index++; |
|---|
| 738 | 772 | } |
|---|
| 739 | 773 | /* |
|---|
| 740 | | - * For DAX we invalidate page tables after invalidating radix tree. We |
|---|
| 774 | + * For DAX we invalidate page tables after invalidating page cache. We |
|---|
| 741 | 775 | * could invalidate page tables while invalidating each entry however |
|---|
| 742 | 776 | * that would be expensive. And doing range unmapping before doesn't |
|---|
| 743 | | - * work as we have no cheap way to find whether radix tree entry didn't |
|---|
| 777 | + * work as we have no cheap way to find whether page cache entry didn't |
|---|
| 744 | 778 | * get remapped later. |
|---|
| 745 | 779 | */ |
|---|
| 746 | 780 | if (dax_mapping(mapping)) { |
|---|
| .. | .. |
|---|
| 759 | 793 | * Any pages which are found to be mapped into pagetables are unmapped prior to |
|---|
| 760 | 794 | * invalidation. |
|---|
| 761 | 795 | * |
|---|
| 762 | | - * Returns -EBUSY if any pages could not be invalidated. |
|---|
| 796 | + * Return: -EBUSY if any pages could not be invalidated. |
|---|
| 763 | 797 | */ |
|---|
| 764 | 798 | int invalidate_inode_pages2(struct address_space *mapping) |
|---|
| 765 | 799 | { |
|---|