.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * mm/truncate.c - code for taking down pages from address_spaces |
---|
3 | 4 | * |
---|
.. | .. |
---|
33 | 34 | static inline void __clear_shadow_entry(struct address_space *mapping, |
---|
34 | 35 | pgoff_t index, void *entry) |
---|
35 | 36 | { |
---|
36 | | - struct radix_tree_node *node; |
---|
37 | | - void **slot; |
---|
| 37 | + XA_STATE(xas, &mapping->i_pages, index); |
---|
38 | 38 | |
---|
39 | | - if (!__radix_tree_lookup(&mapping->i_pages, index, &node, &slot)) |
---|
| 39 | + xas_set_update(&xas, workingset_update_node); |
---|
| 40 | + if (xas_load(&xas) != entry) |
---|
40 | 41 | return; |
---|
41 | | - if (*slot != entry) |
---|
42 | | - return; |
---|
43 | | - __radix_tree_replace(&mapping->i_pages, node, slot, NULL, |
---|
44 | | - workingset_update_node); |
---|
| 42 | + xas_store(&xas, NULL); |
---|
45 | 43 | mapping->nrexceptional--; |
---|
46 | 44 | } |
---|
47 | 45 | |
---|
.. | .. |
---|
70 | 68 | return; |
---|
71 | 69 | |
---|
72 | 70 | for (j = 0; j < pagevec_count(pvec); j++) |
---|
73 | | - if (radix_tree_exceptional_entry(pvec->pages[j])) |
---|
| 71 | + if (xa_is_value(pvec->pages[j])) |
---|
74 | 72 | break; |
---|
75 | 73 | |
---|
76 | 74 | if (j == pagevec_count(pvec)) |
---|
.. | .. |
---|
85 | 83 | struct page *page = pvec->pages[i]; |
---|
86 | 84 | pgoff_t index = indices[i]; |
---|
87 | 85 | |
---|
88 | | - if (!radix_tree_exceptional_entry(page)) { |
---|
| 86 | + if (!xa_is_value(page)) { |
---|
89 | 87 | pvec->pages[j++] = page; |
---|
90 | 88 | continue; |
---|
91 | 89 | } |
---|
.. | .. |
---|
170 | 168 | * becomes orphaned. It will be left on the LRU and may even be mapped into |
---|
171 | 169 | * user pagetables if we're racing with filemap_fault(). |
---|
172 | 170 | * |
---|
173 | | - * We need to bale out if page->mapping is no longer equal to the original |
---|
| 171 | + * We need to bail out if page->mapping is no longer equal to the original |
---|
174 | 172 | * mapping. This happens a) when the VM reclaimed the page while we waited on |
---|
175 | 173 | * its lock, b) when a concurrent invalidate_mapping_pages got there first and |
---|
176 | 174 | * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space. |
---|
.. | .. |
---|
181 | 179 | unmap_mapping_page(page); |
---|
182 | 180 | |
---|
183 | 181 | if (page_has_private(page)) |
---|
184 | | - do_invalidatepage(page, 0, PAGE_SIZE); |
---|
| 182 | + do_invalidatepage(page, 0, thp_size(page)); |
---|
185 | 183 | |
---|
186 | 184 | /* |
---|
187 | 185 | * Some filesystems seem to re-dirty the page even after |
---|
.. | .. |
---|
344 | 342 | if (index >= end) |
---|
345 | 343 | break; |
---|
346 | 344 | |
---|
347 | | - if (radix_tree_exceptional_entry(page)) |
---|
| 345 | + if (xa_is_value(page)) |
---|
348 | 346 | continue; |
---|
349 | 347 | |
---|
350 | 348 | if (!trylock_page(page)) |
---|
.. | .. |
---|
439 | 437 | break; |
---|
440 | 438 | } |
---|
441 | 439 | |
---|
442 | | - if (radix_tree_exceptional_entry(page)) |
---|
| 440 | + if (xa_is_value(page)) |
---|
443 | 441 | continue; |
---|
444 | 442 | |
---|
445 | 443 | lock_page(page); |
---|
.. | .. |
---|
527 | 525 | } |
---|
528 | 526 | EXPORT_SYMBOL(truncate_inode_pages_final); |
---|
529 | 527 | |
---|
530 | | -/** |
---|
531 | | - * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode |
---|
532 | | - * @mapping: the address_space which holds the pages to invalidate |
---|
533 | | - * @start: the offset 'from' which to invalidate |
---|
534 | | - * @end: the offset 'to' which to invalidate (inclusive) |
---|
535 | | - * |
---|
536 | | - * This function only removes the unlocked pages, if you want to |
---|
537 | | - * remove all the pages of one inode, you must call truncate_inode_pages. |
---|
538 | | - * |
---|
539 | | - * invalidate_mapping_pages() will not block on IO activity. It will not |
---|
540 | | - * invalidate pages which are dirty, locked, under writeback or mapped into |
---|
541 | | - * pagetables. |
---|
542 | | - */ |
---|
543 | | -unsigned long invalidate_mapping_pages(struct address_space *mapping, |
---|
544 | | - pgoff_t start, pgoff_t end) |
---|
| 528 | +static unsigned long __invalidate_mapping_pages(struct address_space *mapping, |
---|
| 529 | + pgoff_t start, pgoff_t end, unsigned long *nr_pagevec) |
---|
545 | 530 | { |
---|
546 | 531 | pgoff_t indices[PAGEVEC_SIZE]; |
---|
547 | 532 | struct pagevec pvec; |
---|
.. | .. |
---|
562 | 547 | if (index > end) |
---|
563 | 548 | break; |
---|
564 | 549 | |
---|
565 | | - if (radix_tree_exceptional_entry(page)) { |
---|
| 550 | + if (xa_is_value(page)) { |
---|
566 | 551 | invalidate_exceptional_entry(mapping, index, |
---|
567 | 552 | page); |
---|
568 | 553 | continue; |
---|
.. | .. |
---|
589 | 574 | unlock_page(page); |
---|
590 | 575 | continue; |
---|
591 | 576 | } |
---|
| 577 | + |
---|
| 578 | + /* Take a pin outside pagevec */ |
---|
| 579 | + get_page(page); |
---|
| 580 | + |
---|
| 581 | + /* |
---|
| 582 | + * Drop extra pins before trying to invalidate |
---|
| 583 | + * the huge page. |
---|
| 584 | + */ |
---|
| 585 | + pagevec_remove_exceptionals(&pvec); |
---|
| 586 | + pagevec_release(&pvec); |
---|
592 | 587 | } |
---|
593 | 588 | |
---|
594 | 589 | ret = invalidate_inode_page(page); |
---|
.. | .. |
---|
597 | 592 | * Invalidation is a hint that the page is no longer |
---|
598 | 593 | * of interest and try to speed up its reclaim. |
---|
599 | 594 | */ |
---|
600 | | - if (!ret) |
---|
| 595 | + if (!ret) { |
---|
601 | 596 | deactivate_file_page(page); |
---|
| 597 | + /* It is likely on the pagevec of a remote CPU */ |
---|
| 598 | + if (nr_pagevec) |
---|
| 599 | + (*nr_pagevec)++; |
---|
| 600 | + } |
---|
| 601 | + |
---|
| 602 | + if (PageTransHuge(page)) |
---|
| 603 | + put_page(page); |
---|
602 | 604 | count += ret; |
---|
603 | 605 | } |
---|
604 | 606 | pagevec_remove_exceptionals(&pvec); |
---|
.. | .. |
---|
608 | 610 | } |
---|
609 | 611 | return count; |
---|
610 | 612 | } |
---|
| 613 | + |
---|
| 614 | +/** |
---|
| 615 | + * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode |
---|
| 616 | + * @mapping: the address_space which holds the pages to invalidate |
---|
| 617 | + * @start: the offset 'from' which to invalidate |
---|
| 618 | + * @end: the offset 'to' which to invalidate (inclusive) |
---|
| 619 | + * |
---|
| 620 | + * This function only removes the unlocked pages, if you want to |
---|
| 621 | + * remove all the pages of one inode, you must call truncate_inode_pages. |
---|
| 622 | + * |
---|
| 623 | + * invalidate_mapping_pages() will not block on IO activity. It will not |
---|
| 624 | + * invalidate pages which are dirty, locked, under writeback or mapped into |
---|
| 625 | + * pagetables. |
---|
| 626 | + * |
---|
| 627 | + * Return: the number of the pages that were invalidated |
---|
| 628 | + */ |
---|
| 629 | +unsigned long invalidate_mapping_pages(struct address_space *mapping, |
---|
| 630 | + pgoff_t start, pgoff_t end) |
---|
| 631 | +{ |
---|
| 632 | + return __invalidate_mapping_pages(mapping, start, end, NULL); |
---|
| 633 | +} |
---|
611 | 634 | EXPORT_SYMBOL(invalidate_mapping_pages); |
---|
| 635 | + |
---|
| 636 | +/** |
---|
| 637 | + * This helper is similar with the above one, except that it accounts for pages |
---|
| 638 | + * that are likely on a pagevec and count them in @nr_pagevec, which will used by |
---|
| 639 | + * the caller. |
---|
| 640 | + */ |
---|
| 641 | +void invalidate_mapping_pagevec(struct address_space *mapping, |
---|
| 642 | + pgoff_t start, pgoff_t end, unsigned long *nr_pagevec) |
---|
| 643 | +{ |
---|
| 644 | + __invalidate_mapping_pages(mapping, start, end, nr_pagevec); |
---|
| 645 | +} |
---|
612 | 646 | |
---|
613 | 647 | /* |
---|
614 | 648 | * This is like invalidate_complete_page(), except it ignores the page's |
---|
.. | .. |
---|
664 | 698 | * Any pages which are found to be mapped into pagetables are unmapped prior to |
---|
665 | 699 | * invalidation. |
---|
666 | 700 | * |
---|
667 | | - * Returns -EBUSY if any pages could not be invalidated. |
---|
| 701 | + * Return: -EBUSY if any pages could not be invalidated. |
---|
668 | 702 | */ |
---|
669 | 703 | int invalidate_inode_pages2_range(struct address_space *mapping, |
---|
670 | 704 | pgoff_t start, pgoff_t end) |
---|
.. | .. |
---|
693 | 727 | if (index > end) |
---|
694 | 728 | break; |
---|
695 | 729 | |
---|
696 | | - if (radix_tree_exceptional_entry(page)) { |
---|
| 730 | + if (xa_is_value(page)) { |
---|
697 | 731 | if (!invalidate_exceptional_entry2(mapping, |
---|
698 | 732 | index, page)) |
---|
699 | 733 | ret = -EBUSY; |
---|
.. | .. |
---|
737 | 771 | index++; |
---|
738 | 772 | } |
---|
739 | 773 | /* |
---|
740 | | - * For DAX we invalidate page tables after invalidating radix tree. We |
---|
| 774 | + * For DAX we invalidate page tables after invalidating page cache. We |
---|
741 | 775 | * could invalidate page tables while invalidating each entry however |
---|
742 | 776 | * that would be expensive. And doing range unmapping before doesn't |
---|
743 | | - * work as we have no cheap way to find whether radix tree entry didn't |
---|
| 777 | + * work as we have no cheap way to find whether page cache entry didn't |
---|
744 | 778 | * get remapped later. |
---|
745 | 779 | */ |
---|
746 | 780 | if (dax_mapping(mapping)) { |
---|
.. | .. |
---|
759 | 793 | * Any pages which are found to be mapped into pagetables are unmapped prior to |
---|
760 | 794 | * invalidation. |
---|
761 | 795 | * |
---|
762 | | - * Returns -EBUSY if any pages could not be invalidated. |
---|
| 796 | + * Return: -EBUSY if any pages could not be invalidated. |
---|
763 | 797 | */ |
---|
764 | 798 | int invalidate_inode_pages2(struct address_space *mapping) |
---|
765 | 799 | { |
---|