hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/mm/truncate.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * mm/truncate.c - code for taking down pages from address_spaces
34 *
....@@ -33,15 +34,12 @@
3334 static inline void __clear_shadow_entry(struct address_space *mapping,
3435 pgoff_t index, void *entry)
3536 {
36
- struct radix_tree_node *node;
37
- void **slot;
37
+ XA_STATE(xas, &mapping->i_pages, index);
3838
39
- if (!__radix_tree_lookup(&mapping->i_pages, index, &node, &slot))
39
+ xas_set_update(&xas, workingset_update_node);
40
+ if (xas_load(&xas) != entry)
4041 return;
41
- if (*slot != entry)
42
- return;
43
- __radix_tree_replace(&mapping->i_pages, node, slot, NULL,
44
- workingset_update_node);
42
+ xas_store(&xas, NULL);
4543 mapping->nrexceptional--;
4644 }
4745
....@@ -70,7 +68,7 @@
7068 return;
7169
7270 for (j = 0; j < pagevec_count(pvec); j++)
73
- if (radix_tree_exceptional_entry(pvec->pages[j]))
71
+ if (xa_is_value(pvec->pages[j]))
7472 break;
7573
7674 if (j == pagevec_count(pvec))
....@@ -85,7 +83,7 @@
8583 struct page *page = pvec->pages[i];
8684 pgoff_t index = indices[i];
8785
88
- if (!radix_tree_exceptional_entry(page)) {
86
+ if (!xa_is_value(page)) {
8987 pvec->pages[j++] = page;
9088 continue;
9189 }
....@@ -170,7 +168,7 @@
170168 * becomes orphaned. It will be left on the LRU and may even be mapped into
171169 * user pagetables if we're racing with filemap_fault().
172170 *
173
- * We need to bale out if page->mapping is no longer equal to the original
171
+ * We need to bail out if page->mapping is no longer equal to the original
174172 * mapping. This happens a) when the VM reclaimed the page while we waited on
175173 * its lock, b) when a concurrent invalidate_mapping_pages got there first and
176174 * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
....@@ -181,7 +179,7 @@
181179 unmap_mapping_page(page);
182180
183181 if (page_has_private(page))
184
- do_invalidatepage(page, 0, PAGE_SIZE);
182
+ do_invalidatepage(page, 0, thp_size(page));
185183
186184 /*
187185 * Some filesystems seem to re-dirty the page even after
....@@ -344,7 +342,7 @@
344342 if (index >= end)
345343 break;
346344
347
- if (radix_tree_exceptional_entry(page))
345
+ if (xa_is_value(page))
348346 continue;
349347
350348 if (!trylock_page(page))
....@@ -439,7 +437,7 @@
439437 break;
440438 }
441439
442
- if (radix_tree_exceptional_entry(page))
440
+ if (xa_is_value(page))
443441 continue;
444442
445443 lock_page(page);
....@@ -527,21 +525,8 @@
527525 }
528526 EXPORT_SYMBOL(truncate_inode_pages_final);
529527
530
-/**
531
- * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode
532
- * @mapping: the address_space which holds the pages to invalidate
533
- * @start: the offset 'from' which to invalidate
534
- * @end: the offset 'to' which to invalidate (inclusive)
535
- *
536
- * This function only removes the unlocked pages, if you want to
537
- * remove all the pages of one inode, you must call truncate_inode_pages.
538
- *
539
- * invalidate_mapping_pages() will not block on IO activity. It will not
540
- * invalidate pages which are dirty, locked, under writeback or mapped into
541
- * pagetables.
542
- */
543
-unsigned long invalidate_mapping_pages(struct address_space *mapping,
544
- pgoff_t start, pgoff_t end)
528
+static unsigned long __invalidate_mapping_pages(struct address_space *mapping,
529
+ pgoff_t start, pgoff_t end, unsigned long *nr_pagevec)
545530 {
546531 pgoff_t indices[PAGEVEC_SIZE];
547532 struct pagevec pvec;
....@@ -562,7 +547,7 @@
562547 if (index > end)
563548 break;
564549
565
- if (radix_tree_exceptional_entry(page)) {
550
+ if (xa_is_value(page)) {
566551 invalidate_exceptional_entry(mapping, index,
567552 page);
568553 continue;
....@@ -589,6 +574,16 @@
589574 unlock_page(page);
590575 continue;
591576 }
577
+
578
+ /* Take a pin outside pagevec */
579
+ get_page(page);
580
+
581
+ /*
582
+ * Drop extra pins before trying to invalidate
583
+ * the huge page.
584
+ */
585
+ pagevec_remove_exceptionals(&pvec);
586
+ pagevec_release(&pvec);
592587 }
593588
594589 ret = invalidate_inode_page(page);
....@@ -597,8 +592,15 @@
597592 * Invalidation is a hint that the page is no longer
598593 * of interest and try to speed up its reclaim.
599594 */
600
- if (!ret)
595
+ if (!ret) {
601596 deactivate_file_page(page);
597
+ /* It is likely on the pagevec of a remote CPU */
598
+ if (nr_pagevec)
599
+ (*nr_pagevec)++;
600
+ }
601
+
602
+ if (PageTransHuge(page))
603
+ put_page(page);
602604 count += ret;
603605 }
604606 pagevec_remove_exceptionals(&pvec);
....@@ -608,7 +610,39 @@
608610 }
609611 return count;
610612 }
613
+
614
+/**
615
+ * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode
616
+ * @mapping: the address_space which holds the pages to invalidate
617
+ * @start: the offset 'from' which to invalidate
618
+ * @end: the offset 'to' which to invalidate (inclusive)
619
+ *
620
+ * This function only removes the unlocked pages, if you want to
621
+ * remove all the pages of one inode, you must call truncate_inode_pages.
622
+ *
623
+ * invalidate_mapping_pages() will not block on IO activity. It will not
624
+ * invalidate pages which are dirty, locked, under writeback or mapped into
625
+ * pagetables.
626
+ *
627
+ * Return: the number of the pages that were invalidated
628
+ */
629
+unsigned long invalidate_mapping_pages(struct address_space *mapping,
630
+ pgoff_t start, pgoff_t end)
631
+{
632
+ return __invalidate_mapping_pages(mapping, start, end, NULL);
633
+}
611634 EXPORT_SYMBOL(invalidate_mapping_pages);
635
+
636
+/**
637
+ * This helper is similar with the above one, except that it accounts for pages
638
+ * that are likely on a pagevec and count them in @nr_pagevec, which will used by
639
+ * the caller.
640
+ */
641
+void invalidate_mapping_pagevec(struct address_space *mapping,
642
+ pgoff_t start, pgoff_t end, unsigned long *nr_pagevec)
643
+{
644
+ __invalidate_mapping_pages(mapping, start, end, nr_pagevec);
645
+}
612646
613647 /*
614648 * This is like invalidate_complete_page(), except it ignores the page's
....@@ -664,7 +698,7 @@
664698 * Any pages which are found to be mapped into pagetables are unmapped prior to
665699 * invalidation.
666700 *
667
- * Returns -EBUSY if any pages could not be invalidated.
701
+ * Return: -EBUSY if any pages could not be invalidated.
668702 */
669703 int invalidate_inode_pages2_range(struct address_space *mapping,
670704 pgoff_t start, pgoff_t end)
....@@ -693,7 +727,7 @@
693727 if (index > end)
694728 break;
695729
696
- if (radix_tree_exceptional_entry(page)) {
730
+ if (xa_is_value(page)) {
697731 if (!invalidate_exceptional_entry2(mapping,
698732 index, page))
699733 ret = -EBUSY;
....@@ -737,10 +771,10 @@
737771 index++;
738772 }
739773 /*
740
- * For DAX we invalidate page tables after invalidating radix tree. We
774
+ * For DAX we invalidate page tables after invalidating page cache. We
741775 * could invalidate page tables while invalidating each entry however
742776 * that would be expensive. And doing range unmapping before doesn't
743
- * work as we have no cheap way to find whether radix tree entry didn't
777
+ * work as we have no cheap way to find whether page cache entry didn't
744778 * get remapped later.
745779 */
746780 if (dax_mapping(mapping)) {
....@@ -759,7 +793,7 @@
759793 * Any pages which are found to be mapped into pagetables are unmapped prior to
760794 * invalidation.
761795 *
762
- * Returns -EBUSY if any pages could not be invalidated.
796
+ * Return: -EBUSY if any pages could not be invalidated.
763797 */
764798 int invalidate_inode_pages2(struct address_space *mapping)
765799 {