.. | .. |
---|
36 | 36 | #include <linux/uio.h> |
---|
37 | 37 | #include <linux/khugepaged.h> |
---|
38 | 38 | #include <linux/hugetlb.h> |
---|
| 39 | +#include <linux/frontswap.h> |
---|
| 40 | +#include <linux/fs_parser.h> |
---|
| 41 | +#include <linux/mm_inline.h> |
---|
39 | 42 | |
---|
40 | 43 | #include <asm/tlbflush.h> /* for arch/microblaze update_mmu_cache() */ |
---|
| 44 | + |
---|
| 45 | +#include "internal.h" |
---|
| 46 | + |
---|
| 47 | +#undef CREATE_TRACE_POINTS |
---|
| 48 | +#include <trace/hooks/shmem_fs.h> |
---|
| 49 | +#include <trace/hooks/mm.h> |
---|
41 | 50 | |
---|
42 | 51 | static struct vfsmount *shm_mnt; |
---|
43 | 52 | |
---|
.. | .. |
---|
80 | 89 | #include <linux/uuid.h> |
---|
81 | 90 | |
---|
82 | 91 | #include <linux/uaccess.h> |
---|
83 | | -#include <asm/pgtable.h> |
---|
84 | 92 | |
---|
85 | 93 | #include "internal.h" |
---|
86 | 94 | |
---|
.. | .. |
---|
106 | 114 | pgoff_t nr_unswapped; /* how often writepage refused to swap out */ |
---|
107 | 115 | }; |
---|
108 | 116 | |
---|
| 117 | +struct shmem_options { |
---|
| 118 | + unsigned long long blocks; |
---|
| 119 | + unsigned long long inodes; |
---|
| 120 | + struct mempolicy *mpol; |
---|
| 121 | + kuid_t uid; |
---|
| 122 | + kgid_t gid; |
---|
| 123 | + umode_t mode; |
---|
| 124 | + bool full_inums; |
---|
| 125 | + int huge; |
---|
| 126 | + int seen; |
---|
| 127 | +#define SHMEM_SEEN_BLOCKS 1 |
---|
| 128 | +#define SHMEM_SEEN_INODES 2 |
---|
| 129 | +#define SHMEM_SEEN_HUGE 4 |
---|
| 130 | +#define SHMEM_SEEN_INUMS 8 |
---|
| 131 | +}; |
---|
| 132 | + |
---|
109 | 133 | #ifdef CONFIG_TMPFS |
---|
110 | 134 | static unsigned long shmem_default_max_blocks(void) |
---|
111 | 135 | { |
---|
112 | | - return totalram_pages / 2; |
---|
| 136 | + return totalram_pages() / 2; |
---|
113 | 137 | } |
---|
114 | 138 | |
---|
115 | 139 | static unsigned long shmem_default_max_inodes(void) |
---|
116 | 140 | { |
---|
117 | | - return min(totalram_pages - totalhigh_pages, totalram_pages / 2); |
---|
| 141 | + unsigned long nr_pages = totalram_pages(); |
---|
| 142 | + |
---|
| 143 | + return min(nr_pages - totalhigh_pages(), nr_pages / 2); |
---|
118 | 144 | } |
---|
119 | 145 | #endif |
---|
120 | 146 | |
---|
121 | 147 | static bool shmem_should_replace_page(struct page *page, gfp_t gfp); |
---|
122 | 148 | static int shmem_replace_page(struct page **pagep, gfp_t gfp, |
---|
123 | 149 | struct shmem_inode_info *info, pgoff_t index); |
---|
| 150 | +static int shmem_swapin_page(struct inode *inode, pgoff_t index, |
---|
| 151 | + struct page **pagep, enum sgp_type sgp, |
---|
| 152 | + gfp_t gfp, struct vm_area_struct *vma, |
---|
| 153 | + vm_fault_t *fault_type); |
---|
124 | 154 | static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, |
---|
125 | 155 | struct page **pagep, enum sgp_type sgp, |
---|
126 | 156 | gfp_t gfp, struct vm_area_struct *vma, |
---|
.. | .. |
---|
239 | 269 | static LIST_HEAD(shmem_swaplist); |
---|
240 | 270 | static DEFINE_MUTEX(shmem_swaplist_mutex); |
---|
241 | 271 | |
---|
242 | | -static int shmem_reserve_inode(struct super_block *sb) |
---|
| 272 | +/* |
---|
| 273 | + * shmem_reserve_inode() performs bookkeeping to reserve a shmem inode, and |
---|
| 274 | + * produces a novel ino for the newly allocated inode. |
---|
| 275 | + * |
---|
| 276 | + * It may also be called when making a hard link to permit the space needed by |
---|
| 277 | + * each dentry. However, in that case, no new inode number is needed since that |
---|
| 278 | + * internally draws from another pool of inode numbers (currently global |
---|
| 279 | + * get_next_ino()). This case is indicated by passing NULL as inop. |
---|
| 280 | + */ |
---|
| 281 | +#define SHMEM_INO_BATCH 1024 |
---|
| 282 | +static int shmem_reserve_inode(struct super_block *sb, ino_t *inop) |
---|
243 | 283 | { |
---|
244 | 284 | struct shmem_sb_info *sbinfo = SHMEM_SB(sb); |
---|
245 | | - if (sbinfo->max_inodes) { |
---|
| 285 | + ino_t ino; |
---|
| 286 | + |
---|
| 287 | + if (!(sb->s_flags & SB_KERNMOUNT)) { |
---|
246 | 288 | spin_lock(&sbinfo->stat_lock); |
---|
247 | | - if (!sbinfo->free_inodes) { |
---|
248 | | - spin_unlock(&sbinfo->stat_lock); |
---|
249 | | - return -ENOSPC; |
---|
| 289 | + if (sbinfo->max_inodes) { |
---|
| 290 | + if (!sbinfo->free_inodes) { |
---|
| 291 | + spin_unlock(&sbinfo->stat_lock); |
---|
| 292 | + return -ENOSPC; |
---|
| 293 | + } |
---|
| 294 | + sbinfo->free_inodes--; |
---|
250 | 295 | } |
---|
251 | | - sbinfo->free_inodes--; |
---|
| 296 | + if (inop) { |
---|
| 297 | + ino = sbinfo->next_ino++; |
---|
| 298 | + if (unlikely(is_zero_ino(ino))) |
---|
| 299 | + ino = sbinfo->next_ino++; |
---|
| 300 | + if (unlikely(!sbinfo->full_inums && |
---|
| 301 | + ino > UINT_MAX)) { |
---|
| 302 | + /* |
---|
| 303 | + * Emulate get_next_ino uint wraparound for |
---|
| 304 | + * compatibility |
---|
| 305 | + */ |
---|
| 306 | + if (IS_ENABLED(CONFIG_64BIT)) |
---|
| 307 | + pr_warn("%s: inode number overflow on device %d, consider using inode64 mount option\n", |
---|
| 308 | + __func__, MINOR(sb->s_dev)); |
---|
| 309 | + sbinfo->next_ino = 1; |
---|
| 310 | + ino = sbinfo->next_ino++; |
---|
| 311 | + } |
---|
| 312 | + *inop = ino; |
---|
| 313 | + } |
---|
252 | 314 | spin_unlock(&sbinfo->stat_lock); |
---|
| 315 | + } else if (inop) { |
---|
| 316 | + /* |
---|
| 317 | + * __shmem_file_setup, one of our callers, is lock-free: it |
---|
| 318 | + * doesn't hold stat_lock in shmem_reserve_inode since |
---|
| 319 | + * max_inodes is always 0, and is called from potentially |
---|
| 320 | + * unknown contexts. As such, use a per-cpu batched allocator |
---|
| 321 | + * which doesn't require the per-sb stat_lock unless we are at |
---|
| 322 | + * the batch boundary. |
---|
| 323 | + * |
---|
| 324 | + * We don't need to worry about inode{32,64} since SB_KERNMOUNT |
---|
| 325 | + * shmem mounts are not exposed to userspace, so we don't need |
---|
| 326 | + * to worry about things like glibc compatibility. |
---|
| 327 | + */ |
---|
| 328 | + ino_t *next_ino; |
---|
| 329 | + next_ino = per_cpu_ptr(sbinfo->ino_batch, get_cpu()); |
---|
| 330 | + ino = *next_ino; |
---|
| 331 | + if (unlikely(ino % SHMEM_INO_BATCH == 0)) { |
---|
| 332 | + spin_lock(&sbinfo->stat_lock); |
---|
| 333 | + ino = sbinfo->next_ino; |
---|
| 334 | + sbinfo->next_ino += SHMEM_INO_BATCH; |
---|
| 335 | + spin_unlock(&sbinfo->stat_lock); |
---|
| 336 | + if (unlikely(is_zero_ino(ino))) |
---|
| 337 | + ino++; |
---|
| 338 | + } |
---|
| 339 | + *inop = ino; |
---|
| 340 | + *next_ino = ++ino; |
---|
| 341 | + put_cpu(); |
---|
253 | 342 | } |
---|
| 343 | + |
---|
254 | 344 | return 0; |
---|
255 | 345 | } |
---|
256 | 346 | |
---|
.. | .. |
---|
326 | 416 | } |
---|
327 | 417 | |
---|
328 | 418 | /* |
---|
329 | | - * Replace item expected in radix tree by a new item, while holding tree lock. |
---|
| 419 | + * Replace item expected in xarray by a new item, while holding xa_lock. |
---|
330 | 420 | */ |
---|
331 | | -static int shmem_radix_tree_replace(struct address_space *mapping, |
---|
| 421 | +static int shmem_replace_entry(struct address_space *mapping, |
---|
332 | 422 | pgoff_t index, void *expected, void *replacement) |
---|
333 | 423 | { |
---|
334 | | - struct radix_tree_node *node; |
---|
335 | | - void __rcu **pslot; |
---|
| 424 | + XA_STATE(xas, &mapping->i_pages, index); |
---|
336 | 425 | void *item; |
---|
337 | 426 | |
---|
338 | 427 | VM_BUG_ON(!expected); |
---|
339 | 428 | VM_BUG_ON(!replacement); |
---|
340 | | - item = __radix_tree_lookup(&mapping->i_pages, index, &node, &pslot); |
---|
341 | | - if (!item) |
---|
342 | | - return -ENOENT; |
---|
| 429 | + item = xas_load(&xas); |
---|
343 | 430 | if (item != expected) |
---|
344 | 431 | return -ENOENT; |
---|
345 | | - __radix_tree_replace(&mapping->i_pages, node, pslot, |
---|
346 | | - replacement, NULL); |
---|
| 432 | + xas_store(&xas, replacement); |
---|
347 | 433 | return 0; |
---|
348 | 434 | } |
---|
349 | 435 | |
---|
.. | .. |
---|
357 | 443 | static bool shmem_confirm_swap(struct address_space *mapping, |
---|
358 | 444 | pgoff_t index, swp_entry_t swap) |
---|
359 | 445 | { |
---|
360 | | - void *item; |
---|
361 | | - |
---|
362 | | - rcu_read_lock(); |
---|
363 | | - item = radix_tree_lookup(&mapping->i_pages, index); |
---|
364 | | - rcu_read_unlock(); |
---|
365 | | - return item == swp_to_radix_entry(swap); |
---|
| 446 | + return xa_load(&mapping->i_pages, index) == swp_to_radix_entry(swap); |
---|
366 | 447 | } |
---|
367 | 448 | |
---|
368 | 449 | /* |
---|
.. | .. |
---|
397 | 478 | #define SHMEM_HUGE_DENY (-1) |
---|
398 | 479 | #define SHMEM_HUGE_FORCE (-2) |
---|
399 | 480 | |
---|
400 | | -#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE |
---|
| 481 | +#ifdef CONFIG_TRANSPARENT_HUGEPAGE |
---|
401 | 482 | /* ifdef here to avoid bloating shmem.o when not necessary */ |
---|
402 | 483 | |
---|
403 | 484 | static int shmem_huge __read_mostly; |
---|
404 | 485 | |
---|
405 | | -#if defined(CONFIG_SYSFS) || defined(CONFIG_TMPFS) |
---|
| 486 | +#if defined(CONFIG_SYSFS) |
---|
406 | 487 | static int shmem_parse_huge(const char *str) |
---|
407 | 488 | { |
---|
408 | 489 | if (!strcmp(str, "never")) |
---|
.. | .. |
---|
419 | 500 | return SHMEM_HUGE_FORCE; |
---|
420 | 501 | return -EINVAL; |
---|
421 | 502 | } |
---|
| 503 | +#endif |
---|
422 | 504 | |
---|
| 505 | +#if defined(CONFIG_SYSFS) || defined(CONFIG_TMPFS) |
---|
423 | 506 | static const char *shmem_format_huge(int huge) |
---|
424 | 507 | { |
---|
425 | 508 | switch (huge) { |
---|
.. | .. |
---|
570 | 653 | struct shmem_sb_info *sbinfo = SHMEM_SB(sb); |
---|
571 | 654 | return READ_ONCE(sbinfo->shrinklist_len); |
---|
572 | 655 | } |
---|
573 | | -#else /* !CONFIG_TRANSPARENT_HUGE_PAGECACHE */ |
---|
| 656 | +#else /* !CONFIG_TRANSPARENT_HUGEPAGE */ |
---|
574 | 657 | |
---|
575 | 658 | #define shmem_huge SHMEM_HUGE_DENY |
---|
576 | 659 | |
---|
.. | .. |
---|
579 | 662 | { |
---|
580 | 663 | return 0; |
---|
581 | 664 | } |
---|
582 | | -#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE */ |
---|
| 665 | +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
---|
583 | 666 | |
---|
584 | 667 | static inline bool is_huge_enabled(struct shmem_sb_info *sbinfo) |
---|
585 | 668 | { |
---|
586 | | - if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE) && |
---|
| 669 | + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && |
---|
587 | 670 | (shmem_huge == SHMEM_HUGE_FORCE || sbinfo->huge) && |
---|
588 | 671 | shmem_huge != SHMEM_HUGE_DENY) |
---|
589 | 672 | return true; |
---|
.. | .. |
---|
595 | 678 | */ |
---|
596 | 679 | static int shmem_add_to_page_cache(struct page *page, |
---|
597 | 680 | struct address_space *mapping, |
---|
598 | | - pgoff_t index, void *expected) |
---|
| 681 | + pgoff_t index, void *expected, gfp_t gfp, |
---|
| 682 | + struct mm_struct *charge_mm) |
---|
599 | 683 | { |
---|
600 | | - int error, nr = hpage_nr_pages(page); |
---|
| 684 | + XA_STATE_ORDER(xas, &mapping->i_pages, index, compound_order(page)); |
---|
| 685 | + unsigned long i = 0; |
---|
| 686 | + unsigned long nr = compound_nr(page); |
---|
| 687 | + int error; |
---|
601 | 688 | |
---|
602 | 689 | VM_BUG_ON_PAGE(PageTail(page), page); |
---|
603 | 690 | VM_BUG_ON_PAGE(index != round_down(index, nr), page); |
---|
.. | .. |
---|
609 | 696 | page->mapping = mapping; |
---|
610 | 697 | page->index = index; |
---|
611 | 698 | |
---|
612 | | - xa_lock_irq(&mapping->i_pages); |
---|
613 | | - if (PageTransHuge(page)) { |
---|
614 | | - void __rcu **results; |
---|
615 | | - pgoff_t idx; |
---|
616 | | - int i; |
---|
617 | | - |
---|
618 | | - error = 0; |
---|
619 | | - if (radix_tree_gang_lookup_slot(&mapping->i_pages, |
---|
620 | | - &results, &idx, index, 1) && |
---|
621 | | - idx < index + HPAGE_PMD_NR) { |
---|
622 | | - error = -EEXIST; |
---|
623 | | - } |
---|
624 | | - |
---|
625 | | - if (!error) { |
---|
626 | | - for (i = 0; i < HPAGE_PMD_NR; i++) { |
---|
627 | | - error = radix_tree_insert(&mapping->i_pages, |
---|
628 | | - index + i, page + i); |
---|
629 | | - VM_BUG_ON(error); |
---|
| 699 | + if (!PageSwapCache(page)) { |
---|
| 700 | + error = mem_cgroup_charge(page, charge_mm, gfp); |
---|
| 701 | + if (error) { |
---|
| 702 | + if (PageTransHuge(page)) { |
---|
| 703 | + count_vm_event(THP_FILE_FALLBACK); |
---|
| 704 | + count_vm_event(THP_FILE_FALLBACK_CHARGE); |
---|
630 | 705 | } |
---|
631 | | - count_vm_event(THP_FILE_ALLOC); |
---|
| 706 | + goto error; |
---|
632 | 707 | } |
---|
633 | | - } else if (!expected) { |
---|
634 | | - error = radix_tree_insert(&mapping->i_pages, index, page); |
---|
635 | | - } else { |
---|
636 | | - error = shmem_radix_tree_replace(mapping, index, expected, |
---|
637 | | - page); |
---|
| 708 | + } |
---|
| 709 | + cgroup_throttle_swaprate(page, gfp); |
---|
| 710 | + |
---|
| 711 | + do { |
---|
| 712 | + void *entry; |
---|
| 713 | + xas_lock_irq(&xas); |
---|
| 714 | + entry = xas_find_conflict(&xas); |
---|
| 715 | + if (entry != expected) |
---|
| 716 | + xas_set_err(&xas, -EEXIST); |
---|
| 717 | + xas_create_range(&xas); |
---|
| 718 | + if (xas_error(&xas)) |
---|
| 719 | + goto unlock; |
---|
| 720 | +next: |
---|
| 721 | + xas_store(&xas, page); |
---|
| 722 | + if (++i < nr) { |
---|
| 723 | + xas_next(&xas); |
---|
| 724 | + goto next; |
---|
| 725 | + } |
---|
| 726 | + if (PageTransHuge(page)) { |
---|
| 727 | + count_vm_event(THP_FILE_ALLOC); |
---|
| 728 | + __inc_node_page_state(page, NR_SHMEM_THPS); |
---|
| 729 | + } |
---|
| 730 | + mapping->nrpages += nr; |
---|
| 731 | + __mod_lruvec_page_state(page, NR_FILE_PAGES, nr); |
---|
| 732 | + __mod_lruvec_page_state(page, NR_SHMEM, nr); |
---|
| 733 | +unlock: |
---|
| 734 | + xas_unlock_irq(&xas); |
---|
| 735 | + } while (xas_nomem(&xas, gfp)); |
---|
| 736 | + |
---|
| 737 | + if (xas_error(&xas)) { |
---|
| 738 | + error = xas_error(&xas); |
---|
| 739 | + goto error; |
---|
638 | 740 | } |
---|
639 | 741 | |
---|
640 | | - if (!error) { |
---|
641 | | - mapping->nrpages += nr; |
---|
642 | | - if (PageTransHuge(page)) |
---|
643 | | - __inc_node_page_state(page, NR_SHMEM_THPS); |
---|
644 | | - __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr); |
---|
645 | | - __mod_node_page_state(page_pgdat(page), NR_SHMEM, nr); |
---|
646 | | - xa_unlock_irq(&mapping->i_pages); |
---|
647 | | - } else { |
---|
648 | | - page->mapping = NULL; |
---|
649 | | - xa_unlock_irq(&mapping->i_pages); |
---|
650 | | - page_ref_sub(page, nr); |
---|
651 | | - } |
---|
| 742 | + return 0; |
---|
| 743 | +error: |
---|
| 744 | + page->mapping = NULL; |
---|
| 745 | + page_ref_sub(page, nr); |
---|
652 | 746 | return error; |
---|
653 | 747 | } |
---|
654 | 748 | |
---|
.. | .. |
---|
663 | 757 | VM_BUG_ON_PAGE(PageCompound(page), page); |
---|
664 | 758 | |
---|
665 | 759 | xa_lock_irq(&mapping->i_pages); |
---|
666 | | - error = shmem_radix_tree_replace(mapping, page->index, page, radswap); |
---|
| 760 | + error = shmem_replace_entry(mapping, page->index, page, radswap); |
---|
667 | 761 | page->mapping = NULL; |
---|
668 | 762 | mapping->nrpages--; |
---|
669 | | - __dec_node_page_state(page, NR_FILE_PAGES); |
---|
670 | | - __dec_node_page_state(page, NR_SHMEM); |
---|
| 763 | + __dec_lruvec_page_state(page, NR_FILE_PAGES); |
---|
| 764 | + __dec_lruvec_page_state(page, NR_SHMEM); |
---|
671 | 765 | xa_unlock_irq(&mapping->i_pages); |
---|
672 | 766 | put_page(page); |
---|
673 | 767 | BUG_ON(error); |
---|
674 | 768 | } |
---|
675 | 769 | |
---|
676 | 770 | /* |
---|
677 | | - * Remove swap entry from radix tree, free the swap and its page cache. |
---|
| 771 | + * Remove swap entry from page cache, free the swap and its page cache. |
---|
678 | 772 | */ |
---|
679 | 773 | static int shmem_free_swap(struct address_space *mapping, |
---|
680 | 774 | pgoff_t index, void *radswap) |
---|
681 | 775 | { |
---|
682 | 776 | void *old; |
---|
683 | 777 | |
---|
684 | | - xa_lock_irq(&mapping->i_pages); |
---|
685 | | - old = radix_tree_delete_item(&mapping->i_pages, index, radswap); |
---|
686 | | - xa_unlock_irq(&mapping->i_pages); |
---|
| 778 | + old = xa_cmpxchg_irq(&mapping->i_pages, index, radswap, NULL, 0); |
---|
687 | 779 | if (old != radswap) |
---|
688 | 780 | return -ENOENT; |
---|
689 | 781 | free_swap_and_cache(radix_to_swp_entry(radswap)); |
---|
.. | .. |
---|
700 | 792 | unsigned long shmem_partial_swap_usage(struct address_space *mapping, |
---|
701 | 793 | pgoff_t start, pgoff_t end) |
---|
702 | 794 | { |
---|
703 | | - struct radix_tree_iter iter; |
---|
704 | | - void __rcu **slot; |
---|
| 795 | + XA_STATE(xas, &mapping->i_pages, start); |
---|
705 | 796 | struct page *page; |
---|
706 | 797 | unsigned long swapped = 0; |
---|
707 | 798 | |
---|
708 | 799 | rcu_read_lock(); |
---|
709 | | - |
---|
710 | | - radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) { |
---|
711 | | - if (iter.index >= end) |
---|
712 | | - break; |
---|
713 | | - |
---|
714 | | - page = radix_tree_deref_slot(slot); |
---|
715 | | - |
---|
716 | | - if (radix_tree_deref_retry(page)) { |
---|
717 | | - slot = radix_tree_iter_retry(&iter); |
---|
| 800 | + xas_for_each(&xas, page, end - 1) { |
---|
| 801 | + if (xas_retry(&xas, page)) |
---|
718 | 802 | continue; |
---|
719 | | - } |
---|
720 | | - |
---|
721 | | - if (radix_tree_exceptional_entry(page)) |
---|
| 803 | + if (xa_is_value(page)) |
---|
722 | 804 | swapped++; |
---|
723 | 805 | |
---|
724 | 806 | if (need_resched()) { |
---|
725 | | - slot = radix_tree_iter_resume(slot, &iter); |
---|
| 807 | + xas_pause(&xas); |
---|
726 | 808 | cond_resched_rcu(); |
---|
727 | 809 | } |
---|
728 | 810 | } |
---|
.. | .. |
---|
797 | 879 | } |
---|
798 | 880 | |
---|
799 | 881 | /* |
---|
800 | | - * Remove range of pages and swap entries from radix tree, and free them. |
---|
| 882 | + * Check whether a hole-punch or truncation needs to split a huge page, |
---|
| 883 | + * returning true if no split was required, or the split has been successful. |
---|
| 884 | + * |
---|
| 885 | + * Eviction (or truncation to 0 size) should never need to split a huge page; |
---|
| 886 | + * but in rare cases might do so, if shmem_undo_range() failed to trylock on |
---|
| 887 | + * head, and then succeeded to trylock on tail. |
---|
| 888 | + * |
---|
| 889 | + * A split can only succeed when there are no additional references on the |
---|
| 890 | + * huge page: so the split below relies upon find_get_entries() having stopped |
---|
| 891 | + * when it found a subpage of the huge page, without getting further references. |
---|
| 892 | + */ |
---|
| 893 | +static bool shmem_punch_compound(struct page *page, pgoff_t start, pgoff_t end) |
---|
| 894 | +{ |
---|
| 895 | + if (!PageTransCompound(page)) |
---|
| 896 | + return true; |
---|
| 897 | + |
---|
| 898 | + /* Just proceed to delete a huge page wholly within the range punched */ |
---|
| 899 | + if (PageHead(page) && |
---|
| 900 | + page->index >= start && page->index + HPAGE_PMD_NR <= end) |
---|
| 901 | + return true; |
---|
| 902 | + |
---|
| 903 | + /* Try to split huge page, so we can truly punch the hole or truncate */ |
---|
| 904 | + return split_huge_page(page) >= 0; |
---|
| 905 | +} |
---|
| 906 | + |
---|
| 907 | +/* |
---|
| 908 | + * Remove range of pages and swap entries from page cache, and free them. |
---|
801 | 909 | * If !unfalloc, truncate or punch hole; if unfalloc, undo failed fallocate. |
---|
802 | 910 | */ |
---|
803 | 911 | static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, |
---|
.. | .. |
---|
833 | 941 | if (index >= end) |
---|
834 | 942 | break; |
---|
835 | 943 | |
---|
836 | | - if (radix_tree_exceptional_entry(page)) { |
---|
| 944 | + if (xa_is_value(page)) { |
---|
837 | 945 | if (unfalloc) |
---|
838 | 946 | continue; |
---|
839 | 947 | nr_swaps_freed += !shmem_free_swap(mapping, |
---|
.. | .. |
---|
846 | 954 | if (!trylock_page(page)) |
---|
847 | 955 | continue; |
---|
848 | 956 | |
---|
849 | | - if (PageTransTail(page)) { |
---|
850 | | - /* Middle of THP: zero out the page */ |
---|
851 | | - clear_highpage(page); |
---|
852 | | - unlock_page(page); |
---|
853 | | - continue; |
---|
854 | | - } else if (PageTransHuge(page)) { |
---|
855 | | - if (index == round_down(end, HPAGE_PMD_NR)) { |
---|
856 | | - /* |
---|
857 | | - * Range ends in the middle of THP: |
---|
858 | | - * zero out the page |
---|
859 | | - */ |
---|
860 | | - clear_highpage(page); |
---|
861 | | - unlock_page(page); |
---|
862 | | - continue; |
---|
863 | | - } |
---|
864 | | - index += HPAGE_PMD_NR - 1; |
---|
865 | | - i += HPAGE_PMD_NR - 1; |
---|
866 | | - } |
---|
867 | | - |
---|
868 | | - if (!unfalloc || !PageUptodate(page)) { |
---|
869 | | - VM_BUG_ON_PAGE(PageTail(page), page); |
---|
870 | | - if (page_mapping(page) == mapping) { |
---|
871 | | - VM_BUG_ON_PAGE(PageWriteback(page), page); |
---|
| 957 | + if ((!unfalloc || !PageUptodate(page)) && |
---|
| 958 | + page_mapping(page) == mapping) { |
---|
| 959 | + VM_BUG_ON_PAGE(PageWriteback(page), page); |
---|
| 960 | + if (shmem_punch_compound(page, start, end)) |
---|
872 | 961 | truncate_inode_page(mapping, page); |
---|
873 | | - } |
---|
874 | 962 | } |
---|
875 | 963 | unlock_page(page); |
---|
876 | 964 | } |
---|
.. | .. |
---|
930 | 1018 | if (index >= end) |
---|
931 | 1019 | break; |
---|
932 | 1020 | |
---|
933 | | - if (radix_tree_exceptional_entry(page)) { |
---|
| 1021 | + if (xa_is_value(page)) { |
---|
934 | 1022 | if (unfalloc) |
---|
935 | 1023 | continue; |
---|
936 | 1024 | if (shmem_free_swap(mapping, index, page)) { |
---|
.. | .. |
---|
944 | 1032 | |
---|
945 | 1033 | lock_page(page); |
---|
946 | 1034 | |
---|
947 | | - if (PageTransTail(page)) { |
---|
948 | | - /* Middle of THP: zero out the page */ |
---|
949 | | - clear_highpage(page); |
---|
950 | | - unlock_page(page); |
---|
951 | | - /* |
---|
952 | | - * Partial thp truncate due 'start' in middle |
---|
953 | | - * of THP: don't need to look on these pages |
---|
954 | | - * again on !pvec.nr restart. |
---|
955 | | - */ |
---|
956 | | - if (index != round_down(end, HPAGE_PMD_NR)) |
---|
957 | | - start++; |
---|
958 | | - continue; |
---|
959 | | - } else if (PageTransHuge(page)) { |
---|
960 | | - if (index == round_down(end, HPAGE_PMD_NR)) { |
---|
961 | | - /* |
---|
962 | | - * Range ends in the middle of THP: |
---|
963 | | - * zero out the page |
---|
964 | | - */ |
---|
965 | | - clear_highpage(page); |
---|
966 | | - unlock_page(page); |
---|
967 | | - continue; |
---|
968 | | - } |
---|
969 | | - index += HPAGE_PMD_NR - 1; |
---|
970 | | - i += HPAGE_PMD_NR - 1; |
---|
971 | | - } |
---|
972 | | - |
---|
973 | 1035 | if (!unfalloc || !PageUptodate(page)) { |
---|
974 | | - VM_BUG_ON_PAGE(PageTail(page), page); |
---|
975 | | - if (page_mapping(page) == mapping) { |
---|
976 | | - VM_BUG_ON_PAGE(PageWriteback(page), page); |
---|
977 | | - truncate_inode_page(mapping, page); |
---|
978 | | - } else { |
---|
| 1036 | + if (page_mapping(page) != mapping) { |
---|
979 | 1037 | /* Page was replaced by swap: retry */ |
---|
980 | 1038 | unlock_page(page); |
---|
981 | 1039 | index--; |
---|
982 | 1040 | break; |
---|
| 1041 | + } |
---|
| 1042 | + VM_BUG_ON_PAGE(PageWriteback(page), page); |
---|
| 1043 | + if (shmem_punch_compound(page, start, end)) |
---|
| 1044 | + truncate_inode_page(mapping, page); |
---|
| 1045 | + else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { |
---|
| 1046 | + /* Wipe the page and don't get stuck */ |
---|
| 1047 | + clear_highpage(page); |
---|
| 1048 | + flush_dcache_page(page); |
---|
| 1049 | + set_page_dirty(page); |
---|
| 1050 | + if (index < |
---|
| 1051 | + round_up(start, HPAGE_PMD_NR)) |
---|
| 1052 | + start = index + 1; |
---|
983 | 1053 | } |
---|
984 | 1054 | } |
---|
985 | 1055 | unlock_page(page); |
---|
.. | .. |
---|
1067 | 1137 | * Part of the huge page can be beyond i_size: subject |
---|
1068 | 1138 | * to shrink under memory pressure. |
---|
1069 | 1139 | */ |
---|
1070 | | - if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) { |
---|
| 1140 | + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { |
---|
1071 | 1141 | spin_lock(&sbinfo->shrinklist_lock); |
---|
1072 | 1142 | /* |
---|
1073 | 1143 | * _careful to defend against unlocked access to |
---|
.. | .. |
---|
1106 | 1176 | } |
---|
1107 | 1177 | spin_unlock(&sbinfo->shrinklist_lock); |
---|
1108 | 1178 | } |
---|
1109 | | - if (!list_empty(&info->swaplist)) { |
---|
| 1179 | + while (!list_empty(&info->swaplist)) { |
---|
| 1180 | + /* Wait while shmem_unuse() is scanning this inode... */ |
---|
| 1181 | + wait_var_event(&info->stop_eviction, |
---|
| 1182 | + !atomic_read(&info->stop_eviction)); |
---|
1110 | 1183 | mutex_lock(&shmem_swaplist_mutex); |
---|
1111 | | - list_del_init(&info->swaplist); |
---|
| 1184 | + /* ...but beware of the race if we peeked too early */ |
---|
| 1185 | + if (!atomic_read(&info->stop_eviction)) |
---|
| 1186 | + list_del_init(&info->swaplist); |
---|
1112 | 1187 | mutex_unlock(&shmem_swaplist_mutex); |
---|
1113 | 1188 | } |
---|
1114 | 1189 | } |
---|
.. | .. |
---|
1119 | 1194 | clear_inode(inode); |
---|
1120 | 1195 | } |
---|
1121 | 1196 | |
---|
1122 | | -static unsigned long find_swap_entry(struct radix_tree_root *root, void *item) |
---|
| 1197 | +extern struct swap_info_struct *swap_info[]; |
---|
| 1198 | + |
---|
| 1199 | +static int shmem_find_swap_entries(struct address_space *mapping, |
---|
| 1200 | + pgoff_t start, unsigned int nr_entries, |
---|
| 1201 | + struct page **entries, pgoff_t *indices, |
---|
| 1202 | + unsigned int type, bool frontswap) |
---|
1123 | 1203 | { |
---|
1124 | | - struct radix_tree_iter iter; |
---|
1125 | | - void __rcu **slot; |
---|
1126 | | - unsigned long found = -1; |
---|
1127 | | - unsigned int checked = 0; |
---|
| 1204 | + XA_STATE(xas, &mapping->i_pages, start); |
---|
| 1205 | + struct page *page; |
---|
| 1206 | + swp_entry_t entry; |
---|
| 1207 | + unsigned int ret = 0; |
---|
| 1208 | + |
---|
| 1209 | + if (!nr_entries) |
---|
| 1210 | + return 0; |
---|
1128 | 1211 | |
---|
1129 | 1212 | rcu_read_lock(); |
---|
1130 | | - radix_tree_for_each_slot(slot, root, &iter, 0) { |
---|
1131 | | - void *entry = radix_tree_deref_slot(slot); |
---|
1132 | | - |
---|
1133 | | - if (radix_tree_deref_retry(entry)) { |
---|
1134 | | - slot = radix_tree_iter_retry(&iter); |
---|
| 1213 | + xas_for_each(&xas, page, ULONG_MAX) { |
---|
| 1214 | + if (xas_retry(&xas, page)) |
---|
1135 | 1215 | continue; |
---|
| 1216 | + |
---|
| 1217 | + if (!xa_is_value(page)) |
---|
| 1218 | + continue; |
---|
| 1219 | + |
---|
| 1220 | + entry = radix_to_swp_entry(page); |
---|
| 1221 | + if (swp_type(entry) != type) |
---|
| 1222 | + continue; |
---|
| 1223 | + if (frontswap && |
---|
| 1224 | + !frontswap_test(swap_info[type], swp_offset(entry))) |
---|
| 1225 | + continue; |
---|
| 1226 | + |
---|
| 1227 | + indices[ret] = xas.xa_index; |
---|
| 1228 | + entries[ret] = page; |
---|
| 1229 | + |
---|
| 1230 | + if (need_resched()) { |
---|
| 1231 | + xas_pause(&xas); |
---|
| 1232 | + cond_resched_rcu(); |
---|
1136 | 1233 | } |
---|
1137 | | - if (entry == item) { |
---|
1138 | | - found = iter.index; |
---|
| 1234 | + if (++ret == nr_entries) |
---|
1139 | 1235 | break; |
---|
1140 | | - } |
---|
1141 | | - checked++; |
---|
1142 | | - if ((checked % 4096) != 0) |
---|
1143 | | - continue; |
---|
1144 | | - slot = radix_tree_iter_resume(slot, &iter); |
---|
1145 | | - cond_resched_rcu(); |
---|
1146 | 1236 | } |
---|
1147 | | - |
---|
1148 | 1237 | rcu_read_unlock(); |
---|
1149 | | - return found; |
---|
| 1238 | + |
---|
| 1239 | + return ret; |
---|
| 1240 | +} |
---|
| 1241 | + |
---|
| 1242 | +/* |
---|
| 1243 | + * Move the swapped pages for an inode to page cache. Returns the count |
---|
| 1244 | + * of pages swapped in, or the error in case of failure. |
---|
| 1245 | + */ |
---|
| 1246 | +static int shmem_unuse_swap_entries(struct inode *inode, struct pagevec pvec, |
---|
| 1247 | + pgoff_t *indices) |
---|
| 1248 | +{ |
---|
| 1249 | + int i = 0; |
---|
| 1250 | + int ret = 0; |
---|
| 1251 | + int error = 0; |
---|
| 1252 | + struct address_space *mapping = inode->i_mapping; |
---|
| 1253 | + |
---|
| 1254 | + for (i = 0; i < pvec.nr; i++) { |
---|
| 1255 | + struct page *page = pvec.pages[i]; |
---|
| 1256 | + |
---|
| 1257 | + if (!xa_is_value(page)) |
---|
| 1258 | + continue; |
---|
| 1259 | + error = shmem_swapin_page(inode, indices[i], |
---|
| 1260 | + &page, SGP_CACHE, |
---|
| 1261 | + mapping_gfp_mask(mapping), |
---|
| 1262 | + NULL, NULL); |
---|
| 1263 | + if (error == 0) { |
---|
| 1264 | + unlock_page(page); |
---|
| 1265 | + put_page(page); |
---|
| 1266 | + ret++; |
---|
| 1267 | + } |
---|
| 1268 | + if (error == -ENOMEM) |
---|
| 1269 | + break; |
---|
| 1270 | + error = 0; |
---|
| 1271 | + } |
---|
| 1272 | + return error ? error : ret; |
---|
1150 | 1273 | } |
---|
1151 | 1274 | |
---|
1152 | 1275 | /* |
---|
1153 | 1276 | * If swap found in inode, free it and move page from swapcache to filecache. |
---|
1154 | 1277 | */ |
---|
1155 | | -static int shmem_unuse_inode(struct shmem_inode_info *info, |
---|
1156 | | - swp_entry_t swap, struct page **pagep) |
---|
| 1278 | +static int shmem_unuse_inode(struct inode *inode, unsigned int type, |
---|
| 1279 | + bool frontswap, unsigned long *fs_pages_to_unuse) |
---|
1157 | 1280 | { |
---|
1158 | | - struct address_space *mapping = info->vfs_inode.i_mapping; |
---|
1159 | | - void *radswap; |
---|
1160 | | - pgoff_t index; |
---|
1161 | | - gfp_t gfp; |
---|
1162 | | - int error = 0; |
---|
| 1281 | + struct address_space *mapping = inode->i_mapping; |
---|
| 1282 | + pgoff_t start = 0; |
---|
| 1283 | + struct pagevec pvec; |
---|
| 1284 | + pgoff_t indices[PAGEVEC_SIZE]; |
---|
| 1285 | + bool frontswap_partial = (frontswap && *fs_pages_to_unuse > 0); |
---|
| 1286 | + int ret = 0; |
---|
1163 | 1287 | |
---|
1164 | | - radswap = swp_to_radix_entry(swap); |
---|
1165 | | - index = find_swap_entry(&mapping->i_pages, radswap); |
---|
1166 | | - if (index == -1) |
---|
1167 | | - return -EAGAIN; /* tell shmem_unuse we found nothing */ |
---|
| 1288 | + pagevec_init(&pvec); |
---|
| 1289 | + do { |
---|
| 1290 | + unsigned int nr_entries = PAGEVEC_SIZE; |
---|
1168 | 1291 | |
---|
1169 | | - /* |
---|
1170 | | - * Move _head_ to start search for next from here. |
---|
1171 | | - * But be careful: shmem_evict_inode checks list_empty without taking |
---|
1172 | | - * mutex, and there's an instant in list_move_tail when info->swaplist |
---|
1173 | | - * would appear empty, if it were the only one on shmem_swaplist. |
---|
1174 | | - */ |
---|
1175 | | - if (shmem_swaplist.next != &info->swaplist) |
---|
1176 | | - list_move_tail(&shmem_swaplist, &info->swaplist); |
---|
| 1292 | + if (frontswap_partial && *fs_pages_to_unuse < PAGEVEC_SIZE) |
---|
| 1293 | + nr_entries = *fs_pages_to_unuse; |
---|
1177 | 1294 | |
---|
1178 | | - gfp = mapping_gfp_mask(mapping); |
---|
1179 | | - if (shmem_should_replace_page(*pagep, gfp)) { |
---|
1180 | | - mutex_unlock(&shmem_swaplist_mutex); |
---|
1181 | | - error = shmem_replace_page(pagep, gfp, info, index); |
---|
1182 | | - mutex_lock(&shmem_swaplist_mutex); |
---|
1183 | | - /* |
---|
1184 | | - * We needed to drop mutex to make that restrictive page |
---|
1185 | | - * allocation, but the inode might have been freed while we |
---|
1186 | | - * dropped it: although a racing shmem_evict_inode() cannot |
---|
1187 | | - * complete without emptying the radix_tree, our page lock |
---|
1188 | | - * on this swapcache page is not enough to prevent that - |
---|
1189 | | - * free_swap_and_cache() of our swap entry will only |
---|
1190 | | - * trylock_page(), removing swap from radix_tree whatever. |
---|
1191 | | - * |
---|
1192 | | - * We must not proceed to shmem_add_to_page_cache() if the |
---|
1193 | | - * inode has been freed, but of course we cannot rely on |
---|
1194 | | - * inode or mapping or info to check that. However, we can |
---|
1195 | | - * safely check if our swap entry is still in use (and here |
---|
1196 | | - * it can't have got reused for another page): if it's still |
---|
1197 | | - * in use, then the inode cannot have been freed yet, and we |
---|
1198 | | - * can safely proceed (if it's no longer in use, that tells |
---|
1199 | | - * nothing about the inode, but we don't need to unuse swap). |
---|
1200 | | - */ |
---|
1201 | | - if (!page_swapcount(*pagep)) |
---|
1202 | | - error = -ENOENT; |
---|
1203 | | - } |
---|
1204 | | - |
---|
1205 | | - /* |
---|
1206 | | - * We rely on shmem_swaplist_mutex, not only to protect the swaplist, |
---|
1207 | | - * but also to hold up shmem_evict_inode(): so inode cannot be freed |
---|
1208 | | - * beneath us (pagelock doesn't help until the page is in pagecache). |
---|
1209 | | - */ |
---|
1210 | | - if (!error) |
---|
1211 | | - error = shmem_add_to_page_cache(*pagep, mapping, index, |
---|
1212 | | - radswap); |
---|
1213 | | - if (error != -ENOMEM) { |
---|
1214 | | - /* |
---|
1215 | | - * Truncation and eviction use free_swap_and_cache(), which |
---|
1216 | | - * only does trylock page: if we raced, best clean up here. |
---|
1217 | | - */ |
---|
1218 | | - delete_from_swap_cache(*pagep); |
---|
1219 | | - set_page_dirty(*pagep); |
---|
1220 | | - if (!error) { |
---|
1221 | | - spin_lock_irq(&info->lock); |
---|
1222 | | - info->swapped--; |
---|
1223 | | - spin_unlock_irq(&info->lock); |
---|
1224 | | - swap_free(swap); |
---|
| 1295 | + pvec.nr = shmem_find_swap_entries(mapping, start, nr_entries, |
---|
| 1296 | + pvec.pages, indices, |
---|
| 1297 | + type, frontswap); |
---|
| 1298 | + if (pvec.nr == 0) { |
---|
| 1299 | + ret = 0; |
---|
| 1300 | + break; |
---|
1225 | 1301 | } |
---|
1226 | | - } |
---|
1227 | | - return error; |
---|
| 1302 | + |
---|
| 1303 | + ret = shmem_unuse_swap_entries(inode, pvec, indices); |
---|
| 1304 | + if (ret < 0) |
---|
| 1305 | + break; |
---|
| 1306 | + |
---|
| 1307 | + if (frontswap_partial) { |
---|
| 1308 | + *fs_pages_to_unuse -= ret; |
---|
| 1309 | + if (*fs_pages_to_unuse == 0) { |
---|
| 1310 | + ret = FRONTSWAP_PAGES_UNUSED; |
---|
| 1311 | + break; |
---|
| 1312 | + } |
---|
| 1313 | + } |
---|
| 1314 | + |
---|
| 1315 | + start = indices[pvec.nr - 1]; |
---|
| 1316 | + } while (true); |
---|
| 1317 | + |
---|
| 1318 | + return ret; |
---|
1228 | 1319 | } |
---|
1229 | 1320 | |
---|
1230 | 1321 | /* |
---|
1231 | | - * Search through swapped inodes to find and replace swap by page. |
---|
| 1322 | + * Read all the shared memory data that resides in the swap |
---|
| 1323 | + * device 'type' back into memory, so the swap device can be |
---|
| 1324 | + * unused. |
---|
1232 | 1325 | */ |
---|
1233 | | -int shmem_unuse(swp_entry_t swap, struct page *page) |
---|
| 1326 | +int shmem_unuse(unsigned int type, bool frontswap, |
---|
| 1327 | + unsigned long *fs_pages_to_unuse) |
---|
1234 | 1328 | { |
---|
1235 | | - struct list_head *this, *next; |
---|
1236 | | - struct shmem_inode_info *info; |
---|
1237 | | - struct mem_cgroup *memcg; |
---|
| 1329 | + struct shmem_inode_info *info, *next; |
---|
1238 | 1330 | int error = 0; |
---|
1239 | 1331 | |
---|
1240 | | - /* |
---|
1241 | | - * There's a faint possibility that swap page was replaced before |
---|
1242 | | - * caller locked it: caller will come back later with the right page. |
---|
1243 | | - */ |
---|
1244 | | - if (unlikely(!PageSwapCache(page) || page_private(page) != swap.val)) |
---|
1245 | | - goto out; |
---|
1246 | | - |
---|
1247 | | - /* |
---|
1248 | | - * Charge page using GFP_KERNEL while we can wait, before taking |
---|
1249 | | - * the shmem_swaplist_mutex which might hold up shmem_writepage(). |
---|
1250 | | - * Charged back to the user (not to caller) when swap account is used. |
---|
1251 | | - */ |
---|
1252 | | - error = mem_cgroup_try_charge_delay(page, current->mm, GFP_KERNEL, |
---|
1253 | | - &memcg, false); |
---|
1254 | | - if (error) |
---|
1255 | | - goto out; |
---|
1256 | | - /* No radix_tree_preload: swap entry keeps a place for page in tree */ |
---|
1257 | | - error = -EAGAIN; |
---|
| 1332 | + if (list_empty(&shmem_swaplist)) |
---|
| 1333 | + return 0; |
---|
1258 | 1334 | |
---|
1259 | 1335 | mutex_lock(&shmem_swaplist_mutex); |
---|
1260 | | - list_for_each_safe(this, next, &shmem_swaplist) { |
---|
1261 | | - info = list_entry(this, struct shmem_inode_info, swaplist); |
---|
1262 | | - if (info->swapped) |
---|
1263 | | - error = shmem_unuse_inode(info, swap, &page); |
---|
1264 | | - else |
---|
| 1336 | + list_for_each_entry_safe(info, next, &shmem_swaplist, swaplist) { |
---|
| 1337 | + if (!info->swapped) { |
---|
1265 | 1338 | list_del_init(&info->swaplist); |
---|
| 1339 | + continue; |
---|
| 1340 | + } |
---|
| 1341 | + /* |
---|
| 1342 | + * Drop the swaplist mutex while searching the inode for swap; |
---|
| 1343 | + * but before doing so, make sure shmem_evict_inode() will not |
---|
| 1344 | + * remove placeholder inode from swaplist, nor let it be freed |
---|
| 1345 | + * (igrab() would protect from unlink, but not from unmount). |
---|
| 1346 | + */ |
---|
| 1347 | + atomic_inc(&info->stop_eviction); |
---|
| 1348 | + mutex_unlock(&shmem_swaplist_mutex); |
---|
| 1349 | + |
---|
| 1350 | + error = shmem_unuse_inode(&info->vfs_inode, type, frontswap, |
---|
| 1351 | + fs_pages_to_unuse); |
---|
1266 | 1352 | cond_resched(); |
---|
1267 | | - if (error != -EAGAIN) |
---|
| 1353 | + |
---|
| 1354 | + mutex_lock(&shmem_swaplist_mutex); |
---|
| 1355 | + next = list_next_entry(info, swaplist); |
---|
| 1356 | + if (!info->swapped) |
---|
| 1357 | + list_del_init(&info->swaplist); |
---|
| 1358 | + if (atomic_dec_and_test(&info->stop_eviction)) |
---|
| 1359 | + wake_up_var(&info->stop_eviction); |
---|
| 1360 | + if (error) |
---|
1268 | 1361 | break; |
---|
1269 | | - /* found nothing in this: move on to search the next */ |
---|
1270 | 1362 | } |
---|
1271 | 1363 | mutex_unlock(&shmem_swaplist_mutex); |
---|
1272 | 1364 | |
---|
1273 | | - if (error) { |
---|
1274 | | - if (error != -ENOMEM) |
---|
1275 | | - error = 0; |
---|
1276 | | - mem_cgroup_cancel_charge(page, memcg, false); |
---|
1277 | | - } else |
---|
1278 | | - mem_cgroup_commit_charge(page, memcg, true, false); |
---|
1279 | | -out: |
---|
1280 | | - unlock_page(page); |
---|
1281 | | - put_page(page); |
---|
1282 | 1365 | return error; |
---|
1283 | 1366 | } |
---|
1284 | 1367 | |
---|
.. | .. |
---|
1348 | 1431 | SetPageUptodate(page); |
---|
1349 | 1432 | } |
---|
1350 | 1433 | |
---|
| 1434 | + trace_android_vh_set_shmem_page_flag(page); |
---|
1351 | 1435 | swap = get_swap_page(page); |
---|
1352 | 1436 | if (!swap.val) |
---|
1353 | 1437 | goto redirty; |
---|
.. | .. |
---|
1362 | 1446 | */ |
---|
1363 | 1447 | mutex_lock(&shmem_swaplist_mutex); |
---|
1364 | 1448 | if (list_empty(&info->swaplist)) |
---|
1365 | | - list_add_tail(&info->swaplist, &shmem_swaplist); |
---|
| 1449 | + list_add(&info->swaplist, &shmem_swaplist); |
---|
1366 | 1450 | |
---|
1367 | | - if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) { |
---|
| 1451 | + if (add_to_swap_cache(page, swap, |
---|
| 1452 | + __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN, |
---|
| 1453 | + NULL) == 0) { |
---|
1368 | 1454 | spin_lock_irq(&info->lock); |
---|
1369 | 1455 | shmem_recalc_inode(inode); |
---|
1370 | 1456 | info->swapped++; |
---|
.. | .. |
---|
1447 | 1533 | { |
---|
1448 | 1534 | struct vm_area_struct pvma; |
---|
1449 | 1535 | struct page *page; |
---|
1450 | | - struct vm_fault vmf; |
---|
| 1536 | + struct vm_fault vmf = { |
---|
| 1537 | + .vma = &pvma, |
---|
| 1538 | + }; |
---|
1451 | 1539 | |
---|
1452 | 1540 | shmem_pseudo_vma_init(&pvma, info, index); |
---|
1453 | | - vmf.vma = &pvma; |
---|
1454 | | - vmf.address = 0; |
---|
1455 | 1541 | page = swap_cluster_readahead(swap, gfp, &vmf); |
---|
1456 | 1542 | shmem_pseudo_vma_destroy(&pvma); |
---|
1457 | 1543 | |
---|
.. | .. |
---|
1462 | 1548 | struct shmem_inode_info *info, pgoff_t index) |
---|
1463 | 1549 | { |
---|
1464 | 1550 | struct vm_area_struct pvma; |
---|
1465 | | - struct inode *inode = &info->vfs_inode; |
---|
1466 | | - struct address_space *mapping = inode->i_mapping; |
---|
1467 | | - pgoff_t idx, hindex; |
---|
1468 | | - void __rcu **results; |
---|
| 1551 | + struct address_space *mapping = info->vfs_inode.i_mapping; |
---|
| 1552 | + pgoff_t hindex; |
---|
1469 | 1553 | struct page *page; |
---|
1470 | 1554 | |
---|
1471 | | - if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) |
---|
1472 | | - return NULL; |
---|
1473 | | - |
---|
1474 | 1555 | hindex = round_down(index, HPAGE_PMD_NR); |
---|
1475 | | - rcu_read_lock(); |
---|
1476 | | - if (radix_tree_gang_lookup_slot(&mapping->i_pages, &results, &idx, |
---|
1477 | | - hindex, 1) && idx < hindex + HPAGE_PMD_NR) { |
---|
1478 | | - rcu_read_unlock(); |
---|
| 1556 | + if (xa_find(&mapping->i_pages, &hindex, hindex + HPAGE_PMD_NR - 1, |
---|
| 1557 | + XA_PRESENT)) |
---|
1479 | 1558 | return NULL; |
---|
1480 | | - } |
---|
1481 | | - rcu_read_unlock(); |
---|
1482 | 1559 | |
---|
1483 | 1560 | shmem_pseudo_vma_init(&pvma, info, hindex); |
---|
1484 | 1561 | page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN, |
---|
.. | .. |
---|
1486 | 1563 | shmem_pseudo_vma_destroy(&pvma); |
---|
1487 | 1564 | if (page) |
---|
1488 | 1565 | prep_transhuge_page(page); |
---|
| 1566 | + else |
---|
| 1567 | + count_vm_event(THP_FILE_FALLBACK); |
---|
1489 | 1568 | return page; |
---|
1490 | 1569 | } |
---|
1491 | 1570 | |
---|
.. | .. |
---|
1493 | 1572 | struct shmem_inode_info *info, pgoff_t index) |
---|
1494 | 1573 | { |
---|
1495 | 1574 | struct vm_area_struct pvma; |
---|
1496 | | - struct page *page; |
---|
| 1575 | + struct page *page = NULL; |
---|
| 1576 | + |
---|
| 1577 | + trace_android_vh_shmem_alloc_page(&page); |
---|
| 1578 | + if (page) |
---|
| 1579 | + return page; |
---|
1497 | 1580 | |
---|
1498 | 1581 | shmem_pseudo_vma_init(&pvma, info, index); |
---|
1499 | 1582 | page = alloc_page_vma(gfp, &pvma, 0); |
---|
.. | .. |
---|
1511 | 1594 | int nr; |
---|
1512 | 1595 | int err = -ENOSPC; |
---|
1513 | 1596 | |
---|
1514 | | - if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) |
---|
| 1597 | + if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) |
---|
1515 | 1598 | huge = false; |
---|
1516 | 1599 | nr = huge ? HPAGE_PMD_NR : 1; |
---|
1517 | 1600 | |
---|
.. | .. |
---|
1589 | 1672 | * a nice clean interface for us to replace oldpage by newpage there. |
---|
1590 | 1673 | */ |
---|
1591 | 1674 | xa_lock_irq(&swap_mapping->i_pages); |
---|
1592 | | - error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage, |
---|
1593 | | - newpage); |
---|
| 1675 | + error = shmem_replace_entry(swap_mapping, swap_index, oldpage, newpage); |
---|
1594 | 1676 | if (!error) { |
---|
1595 | | - __inc_node_page_state(newpage, NR_FILE_PAGES); |
---|
1596 | | - __dec_node_page_state(oldpage, NR_FILE_PAGES); |
---|
| 1677 | + mem_cgroup_migrate(oldpage, newpage); |
---|
| 1678 | + __inc_lruvec_page_state(newpage, NR_FILE_PAGES); |
---|
| 1679 | + __dec_lruvec_page_state(oldpage, NR_FILE_PAGES); |
---|
1597 | 1680 | } |
---|
1598 | 1681 | xa_unlock_irq(&swap_mapping->i_pages); |
---|
1599 | 1682 | |
---|
.. | .. |
---|
1605 | 1688 | */ |
---|
1606 | 1689 | oldpage = newpage; |
---|
1607 | 1690 | } else { |
---|
1608 | | - mem_cgroup_migrate(oldpage, newpage); |
---|
1609 | | - lru_cache_add_anon(newpage); |
---|
| 1691 | + lru_cache_add(newpage); |
---|
1610 | 1692 | *pagep = newpage; |
---|
1611 | 1693 | } |
---|
1612 | 1694 | |
---|
.. | .. |
---|
1620 | 1702 | } |
---|
1621 | 1703 | |
---|
1622 | 1704 | /* |
---|
| 1705 | + * Swap in the page pointed to by *pagep. |
---|
| 1706 | + * Caller has to make sure that *pagep contains a valid swapped page. |
---|
| 1707 | + * Returns 0 and the page in pagep if success. On failure, returns the |
---|
| 1708 | + * error code and NULL in *pagep. |
---|
| 1709 | + */ |
---|
| 1710 | +static int shmem_swapin_page(struct inode *inode, pgoff_t index, |
---|
| 1711 | + struct page **pagep, enum sgp_type sgp, |
---|
| 1712 | + gfp_t gfp, struct vm_area_struct *vma, |
---|
| 1713 | + vm_fault_t *fault_type) |
---|
| 1714 | +{ |
---|
| 1715 | + struct address_space *mapping = inode->i_mapping; |
---|
| 1716 | + struct shmem_inode_info *info = SHMEM_I(inode); |
---|
| 1717 | + struct mm_struct *charge_mm = vma ? vma->vm_mm : current->mm; |
---|
| 1718 | + struct page *page; |
---|
| 1719 | + swp_entry_t swap; |
---|
| 1720 | + int error; |
---|
| 1721 | + |
---|
| 1722 | + VM_BUG_ON(!*pagep || !xa_is_value(*pagep)); |
---|
| 1723 | + swap = radix_to_swp_entry(*pagep); |
---|
| 1724 | + *pagep = NULL; |
---|
| 1725 | + |
---|
| 1726 | + /* Look it up and read it in.. */ |
---|
| 1727 | + page = lookup_swap_cache(swap, NULL, 0); |
---|
| 1728 | + if (!page) { |
---|
| 1729 | + /* Or update major stats only when swapin succeeds?? */ |
---|
| 1730 | + if (fault_type) { |
---|
| 1731 | + *fault_type |= VM_FAULT_MAJOR; |
---|
| 1732 | + count_vm_event(PGMAJFAULT); |
---|
| 1733 | + count_memcg_event_mm(charge_mm, PGMAJFAULT); |
---|
| 1734 | + } |
---|
| 1735 | + /* Here we actually start the io */ |
---|
| 1736 | + page = shmem_swapin(swap, gfp, info, index); |
---|
| 1737 | + if (!page) { |
---|
| 1738 | + error = -ENOMEM; |
---|
| 1739 | + goto failed; |
---|
| 1740 | + } |
---|
| 1741 | + } |
---|
| 1742 | + |
---|
| 1743 | + /* We have to do this with page locked to prevent races */ |
---|
| 1744 | + lock_page(page); |
---|
| 1745 | + if (!PageSwapCache(page) || page_private(page) != swap.val || |
---|
| 1746 | + !shmem_confirm_swap(mapping, index, swap)) { |
---|
| 1747 | + error = -EEXIST; |
---|
| 1748 | + goto unlock; |
---|
| 1749 | + } |
---|
| 1750 | + if (!PageUptodate(page)) { |
---|
| 1751 | + error = -EIO; |
---|
| 1752 | + goto failed; |
---|
| 1753 | + } |
---|
| 1754 | + wait_on_page_writeback(page); |
---|
| 1755 | + |
---|
| 1756 | + /* |
---|
| 1757 | + * Some architectures may have to restore extra metadata to the |
---|
| 1758 | + * physical page after reading from swap. |
---|
| 1759 | + */ |
---|
| 1760 | + arch_swap_restore(swap, page); |
---|
| 1761 | + |
---|
| 1762 | + if (shmem_should_replace_page(page, gfp)) { |
---|
| 1763 | + error = shmem_replace_page(&page, gfp, info, index); |
---|
| 1764 | + if (error) |
---|
| 1765 | + goto failed; |
---|
| 1766 | + } |
---|
| 1767 | + |
---|
| 1768 | + error = shmem_add_to_page_cache(page, mapping, index, |
---|
| 1769 | + swp_to_radix_entry(swap), gfp, |
---|
| 1770 | + charge_mm); |
---|
| 1771 | + if (error) |
---|
| 1772 | + goto failed; |
---|
| 1773 | + |
---|
| 1774 | + spin_lock_irq(&info->lock); |
---|
| 1775 | + info->swapped--; |
---|
| 1776 | + shmem_recalc_inode(inode); |
---|
| 1777 | + spin_unlock_irq(&info->lock); |
---|
| 1778 | + |
---|
| 1779 | + if (sgp == SGP_WRITE) |
---|
| 1780 | + mark_page_accessed(page); |
---|
| 1781 | + |
---|
| 1782 | + delete_from_swap_cache(page); |
---|
| 1783 | + set_page_dirty(page); |
---|
| 1784 | + swap_free(swap); |
---|
| 1785 | + |
---|
| 1786 | + *pagep = page; |
---|
| 1787 | + return 0; |
---|
| 1788 | +failed: |
---|
| 1789 | + if (!shmem_confirm_swap(mapping, index, swap)) |
---|
| 1790 | + error = -EEXIST; |
---|
| 1791 | +unlock: |
---|
| 1792 | + if (page) { |
---|
| 1793 | + unlock_page(page); |
---|
| 1794 | + put_page(page); |
---|
| 1795 | + } |
---|
| 1796 | + |
---|
| 1797 | + return error; |
---|
| 1798 | +} |
---|
| 1799 | + |
---|
| 1800 | +/* |
---|
1623 | 1801 | * shmem_getpage_gfp - find page in cache, or get from swap, or allocate |
---|
1624 | 1802 | * |
---|
1625 | 1803 | * If we allocate a new one we do not mark it dirty. That's up to the |
---|
1626 | 1804 | * vm. If we swap it in we mark it dirty since we also free the swap |
---|
1627 | 1805 | * entry since a page cannot live in both the swap and page cache. |
---|
1628 | 1806 | * |
---|
1629 | | - * fault_mm and fault_type are only supplied by shmem_fault: |
---|
| 1807 | + * vma, vmf, and fault_type are only supplied by shmem_fault: |
---|
1630 | 1808 | * otherwise they are NULL. |
---|
1631 | 1809 | */ |
---|
1632 | 1810 | static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, |
---|
.. | .. |
---|
1638 | 1816 | struct shmem_inode_info *info = SHMEM_I(inode); |
---|
1639 | 1817 | struct shmem_sb_info *sbinfo; |
---|
1640 | 1818 | struct mm_struct *charge_mm; |
---|
1641 | | - struct mem_cgroup *memcg; |
---|
1642 | 1819 | struct page *page; |
---|
1643 | | - swp_entry_t swap; |
---|
1644 | 1820 | enum sgp_type sgp_huge = sgp; |
---|
1645 | 1821 | pgoff_t hindex = index; |
---|
1646 | 1822 | int error; |
---|
.. | .. |
---|
1652 | 1828 | if (sgp == SGP_NOHUGE || sgp == SGP_HUGE) |
---|
1653 | 1829 | sgp = SGP_CACHE; |
---|
1654 | 1830 | repeat: |
---|
1655 | | - swap.val = 0; |
---|
1656 | | - page = find_lock_entry(mapping, index); |
---|
1657 | | - if (radix_tree_exceptional_entry(page)) { |
---|
1658 | | - swap = radix_to_swp_entry(page); |
---|
1659 | | - page = NULL; |
---|
1660 | | - } |
---|
1661 | | - |
---|
1662 | 1831 | if (sgp <= SGP_CACHE && |
---|
1663 | 1832 | ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) { |
---|
1664 | | - error = -EINVAL; |
---|
1665 | | - goto unlock; |
---|
| 1833 | + return -EINVAL; |
---|
1666 | 1834 | } |
---|
1667 | 1835 | |
---|
| 1836 | + sbinfo = SHMEM_SB(inode->i_sb); |
---|
| 1837 | + charge_mm = vma ? vma->vm_mm : current->mm; |
---|
| 1838 | + |
---|
| 1839 | + page = find_lock_entry(mapping, index); |
---|
| 1840 | + |
---|
| 1841 | + if (page && vma && userfaultfd_minor(vma)) { |
---|
| 1842 | + if (!xa_is_value(page)) { |
---|
| 1843 | + unlock_page(page); |
---|
| 1844 | + put_page(page); |
---|
| 1845 | + } |
---|
| 1846 | + *fault_type = handle_userfault(vmf, VM_UFFD_MINOR); |
---|
| 1847 | + return 0; |
---|
| 1848 | + } |
---|
| 1849 | + |
---|
| 1850 | + if (xa_is_value(page)) { |
---|
| 1851 | + error = shmem_swapin_page(inode, index, &page, |
---|
| 1852 | + sgp, gfp, vma, fault_type); |
---|
| 1853 | + if (error == -EEXIST) |
---|
| 1854 | + goto repeat; |
---|
| 1855 | + |
---|
| 1856 | + *pagep = page; |
---|
| 1857 | + return error; |
---|
| 1858 | + } |
---|
| 1859 | + |
---|
| 1860 | + if (page) |
---|
| 1861 | + hindex = page->index; |
---|
1668 | 1862 | if (page && sgp == SGP_WRITE) |
---|
1669 | 1863 | mark_page_accessed(page); |
---|
1670 | 1864 | |
---|
.. | .. |
---|
1675 | 1869 | unlock_page(page); |
---|
1676 | 1870 | put_page(page); |
---|
1677 | 1871 | page = NULL; |
---|
| 1872 | + hindex = index; |
---|
1678 | 1873 | } |
---|
1679 | | - if (page || (sgp == SGP_READ && !swap.val)) { |
---|
1680 | | - *pagep = page; |
---|
1681 | | - return 0; |
---|
1682 | | - } |
---|
| 1874 | + if (page || sgp == SGP_READ) |
---|
| 1875 | + goto out; |
---|
1683 | 1876 | |
---|
1684 | 1877 | /* |
---|
1685 | 1878 | * Fast cache lookup did not find it: |
---|
1686 | 1879 | * bring it back from swap or allocate. |
---|
1687 | 1880 | */ |
---|
1688 | | - sbinfo = SHMEM_SB(inode->i_sb); |
---|
1689 | | - charge_mm = vma ? vma->vm_mm : current->mm; |
---|
1690 | 1881 | |
---|
1691 | | - if (swap.val) { |
---|
1692 | | - /* Look it up and read it in.. */ |
---|
1693 | | - page = lookup_swap_cache(swap, NULL, 0); |
---|
1694 | | - if (!page) { |
---|
1695 | | - /* Or update major stats only when swapin succeeds?? */ |
---|
1696 | | - if (fault_type) { |
---|
1697 | | - *fault_type |= VM_FAULT_MAJOR; |
---|
1698 | | - count_vm_event(PGMAJFAULT); |
---|
1699 | | - count_memcg_event_mm(charge_mm, PGMAJFAULT); |
---|
1700 | | - } |
---|
1701 | | - /* Here we actually start the io */ |
---|
1702 | | - page = shmem_swapin(swap, gfp, info, index); |
---|
1703 | | - if (!page) { |
---|
1704 | | - error = -ENOMEM; |
---|
1705 | | - goto failed; |
---|
1706 | | - } |
---|
1707 | | - } |
---|
| 1882 | + if (vma && userfaultfd_missing(vma)) { |
---|
| 1883 | + *fault_type = handle_userfault(vmf, VM_UFFD_MISSING); |
---|
| 1884 | + return 0; |
---|
| 1885 | + } |
---|
1708 | 1886 | |
---|
1709 | | - /* We have to do this with page locked to prevent races */ |
---|
1710 | | - lock_page(page); |
---|
1711 | | - if (!PageSwapCache(page) || page_private(page) != swap.val || |
---|
1712 | | - !shmem_confirm_swap(mapping, index, swap)) { |
---|
1713 | | - error = -EEXIST; /* try again */ |
---|
1714 | | - goto unlock; |
---|
1715 | | - } |
---|
1716 | | - if (!PageUptodate(page)) { |
---|
1717 | | - error = -EIO; |
---|
1718 | | - goto failed; |
---|
1719 | | - } |
---|
1720 | | - wait_on_page_writeback(page); |
---|
| 1887 | + /* shmem_symlink() */ |
---|
| 1888 | + if (mapping->a_ops != &shmem_aops) |
---|
| 1889 | + goto alloc_nohuge; |
---|
| 1890 | + if (shmem_huge == SHMEM_HUGE_DENY || sgp_huge == SGP_NOHUGE) |
---|
| 1891 | + goto alloc_nohuge; |
---|
| 1892 | + if (shmem_huge == SHMEM_HUGE_FORCE) |
---|
| 1893 | + goto alloc_huge; |
---|
| 1894 | + switch (sbinfo->huge) { |
---|
| 1895 | + case SHMEM_HUGE_NEVER: |
---|
| 1896 | + goto alloc_nohuge; |
---|
| 1897 | + case SHMEM_HUGE_WITHIN_SIZE: { |
---|
| 1898 | + loff_t i_size; |
---|
| 1899 | + pgoff_t off; |
---|
1721 | 1900 | |
---|
1722 | | - if (shmem_should_replace_page(page, gfp)) { |
---|
1723 | | - error = shmem_replace_page(&page, gfp, info, index); |
---|
1724 | | - if (error) |
---|
1725 | | - goto failed; |
---|
1726 | | - } |
---|
1727 | | - |
---|
1728 | | - error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg, |
---|
1729 | | - false); |
---|
1730 | | - if (!error) { |
---|
1731 | | - error = shmem_add_to_page_cache(page, mapping, index, |
---|
1732 | | - swp_to_radix_entry(swap)); |
---|
1733 | | - /* |
---|
1734 | | - * We already confirmed swap under page lock, and make |
---|
1735 | | - * no memory allocation here, so usually no possibility |
---|
1736 | | - * of error; but free_swap_and_cache() only trylocks a |
---|
1737 | | - * page, so it is just possible that the entry has been |
---|
1738 | | - * truncated or holepunched since swap was confirmed. |
---|
1739 | | - * shmem_undo_range() will have done some of the |
---|
1740 | | - * unaccounting, now delete_from_swap_cache() will do |
---|
1741 | | - * the rest. |
---|
1742 | | - * Reset swap.val? No, leave it so "failed" goes back to |
---|
1743 | | - * "repeat": reading a hole and writing should succeed. |
---|
1744 | | - */ |
---|
1745 | | - if (error) { |
---|
1746 | | - mem_cgroup_cancel_charge(page, memcg, false); |
---|
1747 | | - delete_from_swap_cache(page); |
---|
1748 | | - } |
---|
1749 | | - } |
---|
1750 | | - if (error) |
---|
1751 | | - goto failed; |
---|
1752 | | - |
---|
1753 | | - mem_cgroup_commit_charge(page, memcg, true, false); |
---|
1754 | | - |
---|
1755 | | - spin_lock_irq(&info->lock); |
---|
1756 | | - info->swapped--; |
---|
1757 | | - shmem_recalc_inode(inode); |
---|
1758 | | - spin_unlock_irq(&info->lock); |
---|
1759 | | - |
---|
1760 | | - if (sgp == SGP_WRITE) |
---|
1761 | | - mark_page_accessed(page); |
---|
1762 | | - |
---|
1763 | | - delete_from_swap_cache(page); |
---|
1764 | | - set_page_dirty(page); |
---|
1765 | | - swap_free(swap); |
---|
1766 | | - |
---|
1767 | | - } else { |
---|
1768 | | - if (vma && userfaultfd_missing(vma)) { |
---|
1769 | | - *fault_type = handle_userfault(vmf, VM_UFFD_MISSING); |
---|
1770 | | - return 0; |
---|
1771 | | - } |
---|
1772 | | - |
---|
1773 | | - /* shmem_symlink() */ |
---|
1774 | | - if (mapping->a_ops != &shmem_aops) |
---|
1775 | | - goto alloc_nohuge; |
---|
1776 | | - if (shmem_huge == SHMEM_HUGE_DENY || sgp_huge == SGP_NOHUGE) |
---|
1777 | | - goto alloc_nohuge; |
---|
1778 | | - if (shmem_huge == SHMEM_HUGE_FORCE) |
---|
| 1901 | + off = round_up(index, HPAGE_PMD_NR); |
---|
| 1902 | + i_size = round_up(i_size_read(inode), PAGE_SIZE); |
---|
| 1903 | + if (i_size >= HPAGE_PMD_SIZE && |
---|
| 1904 | + i_size >> PAGE_SHIFT >= off) |
---|
1779 | 1905 | goto alloc_huge; |
---|
1780 | | - switch (sbinfo->huge) { |
---|
1781 | | - loff_t i_size; |
---|
1782 | | - pgoff_t off; |
---|
1783 | | - case SHMEM_HUGE_NEVER: |
---|
1784 | | - goto alloc_nohuge; |
---|
1785 | | - case SHMEM_HUGE_WITHIN_SIZE: |
---|
1786 | | - off = round_up(index, HPAGE_PMD_NR); |
---|
1787 | | - i_size = round_up(i_size_read(inode), PAGE_SIZE); |
---|
1788 | | - if (i_size >= HPAGE_PMD_SIZE && |
---|
1789 | | - i_size >> PAGE_SHIFT >= off) |
---|
1790 | | - goto alloc_huge; |
---|
1791 | | - /* fallthrough */ |
---|
1792 | | - case SHMEM_HUGE_ADVISE: |
---|
1793 | | - if (sgp_huge == SGP_HUGE) |
---|
1794 | | - goto alloc_huge; |
---|
1795 | | - /* TODO: implement fadvise() hints */ |
---|
1796 | | - goto alloc_nohuge; |
---|
1797 | | - } |
---|
| 1906 | + |
---|
| 1907 | + fallthrough; |
---|
| 1908 | + } |
---|
| 1909 | + case SHMEM_HUGE_ADVISE: |
---|
| 1910 | + if (sgp_huge == SGP_HUGE) |
---|
| 1911 | + goto alloc_huge; |
---|
| 1912 | + /* TODO: implement fadvise() hints */ |
---|
| 1913 | + goto alloc_nohuge; |
---|
| 1914 | + } |
---|
1798 | 1915 | |
---|
1799 | 1916 | alloc_huge: |
---|
1800 | | - page = shmem_alloc_and_acct_page(gfp, inode, index, true); |
---|
1801 | | - if (IS_ERR(page)) { |
---|
1802 | | -alloc_nohuge: page = shmem_alloc_and_acct_page(gfp, inode, |
---|
1803 | | - index, false); |
---|
1804 | | - } |
---|
1805 | | - if (IS_ERR(page)) { |
---|
1806 | | - int retry = 5; |
---|
1807 | | - error = PTR_ERR(page); |
---|
1808 | | - page = NULL; |
---|
1809 | | - if (error != -ENOSPC) |
---|
1810 | | - goto failed; |
---|
1811 | | - /* |
---|
1812 | | - * Try to reclaim some spece by splitting a huge page |
---|
1813 | | - * beyond i_size on the filesystem. |
---|
1814 | | - */ |
---|
1815 | | - while (retry--) { |
---|
1816 | | - int ret; |
---|
1817 | | - ret = shmem_unused_huge_shrink(sbinfo, NULL, 1); |
---|
1818 | | - if (ret == SHRINK_STOP) |
---|
1819 | | - break; |
---|
1820 | | - if (ret) |
---|
1821 | | - goto alloc_nohuge; |
---|
1822 | | - } |
---|
1823 | | - goto failed; |
---|
1824 | | - } |
---|
| 1917 | + page = shmem_alloc_and_acct_page(gfp, inode, index, true); |
---|
| 1918 | + if (IS_ERR(page)) { |
---|
| 1919 | +alloc_nohuge: |
---|
| 1920 | + page = shmem_alloc_and_acct_page(gfp, inode, |
---|
| 1921 | + index, false); |
---|
| 1922 | + } |
---|
| 1923 | + if (IS_ERR(page)) { |
---|
| 1924 | + int retry = 5; |
---|
1825 | 1925 | |
---|
1826 | | - if (PageTransHuge(page)) |
---|
1827 | | - hindex = round_down(index, HPAGE_PMD_NR); |
---|
1828 | | - else |
---|
1829 | | - hindex = index; |
---|
1830 | | - |
---|
1831 | | - if (sgp == SGP_WRITE) |
---|
1832 | | - __SetPageReferenced(page); |
---|
1833 | | - |
---|
1834 | | - error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg, |
---|
1835 | | - PageTransHuge(page)); |
---|
1836 | | - if (error) |
---|
1837 | | - goto unacct; |
---|
1838 | | - error = radix_tree_maybe_preload_order(gfp & GFP_RECLAIM_MASK, |
---|
1839 | | - compound_order(page)); |
---|
1840 | | - if (!error) { |
---|
1841 | | - error = shmem_add_to_page_cache(page, mapping, hindex, |
---|
1842 | | - NULL); |
---|
1843 | | - radix_tree_preload_end(); |
---|
1844 | | - } |
---|
1845 | | - if (error) { |
---|
1846 | | - mem_cgroup_cancel_charge(page, memcg, |
---|
1847 | | - PageTransHuge(page)); |
---|
1848 | | - goto unacct; |
---|
1849 | | - } |
---|
1850 | | - mem_cgroup_commit_charge(page, memcg, false, |
---|
1851 | | - PageTransHuge(page)); |
---|
1852 | | - lru_cache_add_anon(page); |
---|
1853 | | - |
---|
1854 | | - spin_lock_irq(&info->lock); |
---|
1855 | | - info->alloced += 1 << compound_order(page); |
---|
1856 | | - inode->i_blocks += BLOCKS_PER_PAGE << compound_order(page); |
---|
1857 | | - shmem_recalc_inode(inode); |
---|
1858 | | - spin_unlock_irq(&info->lock); |
---|
1859 | | - alloced = true; |
---|
1860 | | - |
---|
1861 | | - if (PageTransHuge(page) && |
---|
1862 | | - DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE) < |
---|
1863 | | - hindex + HPAGE_PMD_NR - 1) { |
---|
1864 | | - /* |
---|
1865 | | - * Part of the huge page is beyond i_size: subject |
---|
1866 | | - * to shrink under memory pressure. |
---|
1867 | | - */ |
---|
1868 | | - spin_lock(&sbinfo->shrinklist_lock); |
---|
1869 | | - /* |
---|
1870 | | - * _careful to defend against unlocked access to |
---|
1871 | | - * ->shrink_list in shmem_unused_huge_shrink() |
---|
1872 | | - */ |
---|
1873 | | - if (list_empty_careful(&info->shrinklist)) { |
---|
1874 | | - list_add_tail(&info->shrinklist, |
---|
1875 | | - &sbinfo->shrinklist); |
---|
1876 | | - sbinfo->shrinklist_len++; |
---|
1877 | | - } |
---|
1878 | | - spin_unlock(&sbinfo->shrinklist_lock); |
---|
1879 | | - } |
---|
1880 | | - |
---|
| 1926 | + error = PTR_ERR(page); |
---|
| 1927 | + page = NULL; |
---|
| 1928 | + if (error != -ENOSPC) |
---|
| 1929 | + goto unlock; |
---|
1881 | 1930 | /* |
---|
1882 | | - * Let SGP_FALLOC use the SGP_WRITE optimization on a new page. |
---|
| 1931 | + * Try to reclaim some space by splitting a huge page |
---|
| 1932 | + * beyond i_size on the filesystem. |
---|
1883 | 1933 | */ |
---|
1884 | | - if (sgp == SGP_FALLOC) |
---|
1885 | | - sgp = SGP_WRITE; |
---|
| 1934 | + while (retry--) { |
---|
| 1935 | + int ret; |
---|
| 1936 | + |
---|
| 1937 | + ret = shmem_unused_huge_shrink(sbinfo, NULL, 1); |
---|
| 1938 | + if (ret == SHRINK_STOP) |
---|
| 1939 | + break; |
---|
| 1940 | + if (ret) |
---|
| 1941 | + goto alloc_nohuge; |
---|
| 1942 | + } |
---|
| 1943 | + goto unlock; |
---|
| 1944 | + } |
---|
| 1945 | + |
---|
| 1946 | + if (PageTransHuge(page)) |
---|
| 1947 | + hindex = round_down(index, HPAGE_PMD_NR); |
---|
| 1948 | + else |
---|
| 1949 | + hindex = index; |
---|
| 1950 | + |
---|
| 1951 | + if (sgp == SGP_WRITE) |
---|
| 1952 | + __SetPageReferenced(page); |
---|
| 1953 | + |
---|
| 1954 | + error = shmem_add_to_page_cache(page, mapping, hindex, |
---|
| 1955 | + NULL, gfp & GFP_RECLAIM_MASK, |
---|
| 1956 | + charge_mm); |
---|
| 1957 | + if (error) |
---|
| 1958 | + goto unacct; |
---|
| 1959 | + lru_cache_add(page); |
---|
| 1960 | + |
---|
| 1961 | + spin_lock_irq(&info->lock); |
---|
| 1962 | + info->alloced += compound_nr(page); |
---|
| 1963 | + inode->i_blocks += BLOCKS_PER_PAGE << compound_order(page); |
---|
| 1964 | + shmem_recalc_inode(inode); |
---|
| 1965 | + spin_unlock_irq(&info->lock); |
---|
| 1966 | + alloced = true; |
---|
| 1967 | + |
---|
| 1968 | + if (PageTransHuge(page) && |
---|
| 1969 | + DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE) < |
---|
| 1970 | + hindex + HPAGE_PMD_NR - 1) { |
---|
| 1971 | + /* |
---|
| 1972 | + * Part of the huge page is beyond i_size: subject |
---|
| 1973 | + * to shrink under memory pressure. |
---|
| 1974 | + */ |
---|
| 1975 | + spin_lock(&sbinfo->shrinklist_lock); |
---|
| 1976 | + /* |
---|
| 1977 | + * _careful to defend against unlocked access to |
---|
| 1978 | + * ->shrink_list in shmem_unused_huge_shrink() |
---|
| 1979 | + */ |
---|
| 1980 | + if (list_empty_careful(&info->shrinklist)) { |
---|
| 1981 | + list_add_tail(&info->shrinklist, |
---|
| 1982 | + &sbinfo->shrinklist); |
---|
| 1983 | + sbinfo->shrinklist_len++; |
---|
| 1984 | + } |
---|
| 1985 | + spin_unlock(&sbinfo->shrinklist_lock); |
---|
| 1986 | + } |
---|
| 1987 | + |
---|
| 1988 | + /* |
---|
| 1989 | + * Let SGP_FALLOC use the SGP_WRITE optimization on a new page. |
---|
| 1990 | + */ |
---|
| 1991 | + if (sgp == SGP_FALLOC) |
---|
| 1992 | + sgp = SGP_WRITE; |
---|
1886 | 1993 | clear: |
---|
1887 | | - /* |
---|
1888 | | - * Let SGP_WRITE caller clear ends if write does not fill page; |
---|
1889 | | - * but SGP_FALLOC on a page fallocated earlier must initialize |
---|
1890 | | - * it now, lest undo on failure cancel our earlier guarantee. |
---|
1891 | | - */ |
---|
1892 | | - if (sgp != SGP_WRITE && !PageUptodate(page)) { |
---|
1893 | | - struct page *head = compound_head(page); |
---|
1894 | | - int i; |
---|
| 1994 | + /* |
---|
| 1995 | + * Let SGP_WRITE caller clear ends if write does not fill page; |
---|
| 1996 | + * but SGP_FALLOC on a page fallocated earlier must initialize |
---|
| 1997 | + * it now, lest undo on failure cancel our earlier guarantee. |
---|
| 1998 | + */ |
---|
| 1999 | + if (sgp != SGP_WRITE && !PageUptodate(page)) { |
---|
| 2000 | + int i; |
---|
1895 | 2001 | |
---|
1896 | | - for (i = 0; i < (1 << compound_order(head)); i++) { |
---|
1897 | | - clear_highpage(head + i); |
---|
1898 | | - flush_dcache_page(head + i); |
---|
1899 | | - } |
---|
1900 | | - SetPageUptodate(head); |
---|
| 2002 | + for (i = 0; i < compound_nr(page); i++) { |
---|
| 2003 | + clear_highpage(page + i); |
---|
| 2004 | + flush_dcache_page(page + i); |
---|
1901 | 2005 | } |
---|
| 2006 | + SetPageUptodate(page); |
---|
1902 | 2007 | } |
---|
1903 | 2008 | |
---|
1904 | 2009 | /* Perhaps the file has been truncated since we checked */ |
---|
.. | .. |
---|
1914 | 2019 | error = -EINVAL; |
---|
1915 | 2020 | goto unlock; |
---|
1916 | 2021 | } |
---|
| 2022 | +out: |
---|
1917 | 2023 | *pagep = page + index - hindex; |
---|
1918 | 2024 | return 0; |
---|
1919 | 2025 | |
---|
.. | .. |
---|
1921 | 2027 | * Error recovery. |
---|
1922 | 2028 | */ |
---|
1923 | 2029 | unacct: |
---|
1924 | | - shmem_inode_unacct_blocks(inode, 1 << compound_order(page)); |
---|
| 2030 | + shmem_inode_unacct_blocks(inode, compound_nr(page)); |
---|
1925 | 2031 | |
---|
1926 | 2032 | if (PageTransHuge(page)) { |
---|
1927 | 2033 | unlock_page(page); |
---|
1928 | 2034 | put_page(page); |
---|
1929 | 2035 | goto alloc_nohuge; |
---|
1930 | 2036 | } |
---|
1931 | | -failed: |
---|
1932 | | - if (swap.val && !shmem_confirm_swap(mapping, index, swap)) |
---|
1933 | | - error = -EEXIST; |
---|
1934 | 2037 | unlock: |
---|
1935 | 2038 | if (page) { |
---|
1936 | 2039 | unlock_page(page); |
---|
.. | .. |
---|
1942 | 2045 | spin_unlock_irq(&info->lock); |
---|
1943 | 2046 | goto repeat; |
---|
1944 | 2047 | } |
---|
1945 | | - if (error == -EEXIST) /* from above or from radix_tree_insert */ |
---|
| 2048 | + if (error == -EEXIST) |
---|
1946 | 2049 | goto repeat; |
---|
1947 | 2050 | return error; |
---|
1948 | 2051 | } |
---|
.. | .. |
---|
1994 | 2097 | shmem_falloc->waitq && |
---|
1995 | 2098 | vmf->pgoff >= shmem_falloc->start && |
---|
1996 | 2099 | vmf->pgoff < shmem_falloc->next) { |
---|
| 2100 | + struct file *fpin; |
---|
1997 | 2101 | wait_queue_head_t *shmem_falloc_waitq; |
---|
1998 | 2102 | DEFINE_WAIT_FUNC(shmem_fault_wait, synchronous_wake_function); |
---|
1999 | 2103 | |
---|
2000 | 2104 | ret = VM_FAULT_NOPAGE; |
---|
2001 | | - if ((vmf->flags & FAULT_FLAG_ALLOW_RETRY) && |
---|
2002 | | - !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) { |
---|
2003 | | - /* It's polite to up mmap_sem if we can */ |
---|
2004 | | - up_read(&vma->vm_mm->mmap_sem); |
---|
| 2105 | + fpin = maybe_unlock_mmap_for_io(vmf, NULL); |
---|
| 2106 | + if (fpin) |
---|
2005 | 2107 | ret = VM_FAULT_RETRY; |
---|
2006 | | - } |
---|
2007 | 2108 | |
---|
2008 | 2109 | shmem_falloc_waitq = shmem_falloc->waitq; |
---|
2009 | 2110 | prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait, |
---|
.. | .. |
---|
2021 | 2122 | spin_lock(&inode->i_lock); |
---|
2022 | 2123 | finish_wait(shmem_falloc_waitq, &shmem_fault_wait); |
---|
2023 | 2124 | spin_unlock(&inode->i_lock); |
---|
| 2125 | + |
---|
| 2126 | + if (fpin) |
---|
| 2127 | + fput(fpin); |
---|
2024 | 2128 | return ret; |
---|
2025 | 2129 | } |
---|
2026 | 2130 | spin_unlock(&inode->i_lock); |
---|
.. | .. |
---|
2059 | 2163 | get_area = current->mm->get_unmapped_area; |
---|
2060 | 2164 | addr = get_area(file, uaddr, len, pgoff, flags); |
---|
2061 | 2165 | |
---|
2062 | | - if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) |
---|
| 2166 | + if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) |
---|
2063 | 2167 | return addr; |
---|
2064 | 2168 | if (IS_ERR_VALUE(addr)) |
---|
2065 | 2169 | return addr; |
---|
.. | .. |
---|
2179 | 2283 | static int shmem_mmap(struct file *file, struct vm_area_struct *vma) |
---|
2180 | 2284 | { |
---|
2181 | 2285 | struct shmem_inode_info *info = SHMEM_I(file_inode(file)); |
---|
| 2286 | + int ret; |
---|
2182 | 2287 | |
---|
2183 | | - if (info->seals & F_SEAL_FUTURE_WRITE) { |
---|
2184 | | - /* |
---|
2185 | | - * New PROT_WRITE and MAP_SHARED mmaps are not allowed when |
---|
2186 | | - * "future write" seal active. |
---|
2187 | | - */ |
---|
2188 | | - if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE)) |
---|
2189 | | - return -EPERM; |
---|
| 2288 | + ret = seal_check_future_write(info->seals, vma); |
---|
| 2289 | + if (ret) |
---|
| 2290 | + return ret; |
---|
2190 | 2291 | |
---|
2191 | | - /* |
---|
2192 | | - * Since the F_SEAL_FUTURE_WRITE seals allow for a MAP_SHARED |
---|
2193 | | - * read-only mapping, take care to not allow mprotect to revert |
---|
2194 | | - * protections. |
---|
2195 | | - */ |
---|
2196 | | - vma->vm_flags &= ~(VM_MAYWRITE); |
---|
2197 | | - } |
---|
| 2292 | + /* arm64 - allow memory tagging on RAM-based files */ |
---|
| 2293 | + vma->vm_flags |= VM_MTE_ALLOWED; |
---|
2198 | 2294 | |
---|
2199 | 2295 | file_accessed(file); |
---|
2200 | 2296 | vma->vm_ops = &shmem_vm_ops; |
---|
2201 | | - if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE) && |
---|
| 2297 | + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && |
---|
2202 | 2298 | ((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) < |
---|
2203 | 2299 | (vma->vm_end & HPAGE_PMD_MASK)) { |
---|
2204 | 2300 | khugepaged_enter(vma, vma->vm_flags); |
---|
.. | .. |
---|
2212 | 2308 | struct inode *inode; |
---|
2213 | 2309 | struct shmem_inode_info *info; |
---|
2214 | 2310 | struct shmem_sb_info *sbinfo = SHMEM_SB(sb); |
---|
| 2311 | + ino_t ino; |
---|
2215 | 2312 | |
---|
2216 | | - if (shmem_reserve_inode(sb)) |
---|
| 2313 | + if (shmem_reserve_inode(sb, &ino)) |
---|
2217 | 2314 | return NULL; |
---|
2218 | 2315 | |
---|
2219 | 2316 | inode = new_inode(sb); |
---|
2220 | 2317 | if (inode) { |
---|
2221 | | - inode->i_ino = get_next_ino(); |
---|
| 2318 | + inode->i_ino = ino; |
---|
2222 | 2319 | inode_init_owner(inode, dir, mode); |
---|
2223 | 2320 | inode->i_blocks = 0; |
---|
2224 | 2321 | inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); |
---|
.. | .. |
---|
2226 | 2323 | info = SHMEM_I(inode); |
---|
2227 | 2324 | memset(info, 0, (char *)inode - (char *)info); |
---|
2228 | 2325 | spin_lock_init(&info->lock); |
---|
| 2326 | + atomic_set(&info->stop_eviction, 0); |
---|
2229 | 2327 | info->seals = F_SEAL_SEAL; |
---|
2230 | 2328 | info->flags = flags & VM_NORESERVE; |
---|
2231 | 2329 | INIT_LIST_HEAD(&info->shrinklist); |
---|
.. | .. |
---|
2272 | 2370 | return mapping->a_ops == &shmem_aops; |
---|
2273 | 2371 | } |
---|
2274 | 2372 | |
---|
2275 | | -static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, |
---|
2276 | | - pmd_t *dst_pmd, |
---|
2277 | | - struct vm_area_struct *dst_vma, |
---|
2278 | | - unsigned long dst_addr, |
---|
2279 | | - unsigned long src_addr, |
---|
2280 | | - bool zeropage, |
---|
2281 | | - struct page **pagep) |
---|
| 2373 | +#ifdef CONFIG_USERFAULTFD |
---|
| 2374 | +int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, |
---|
| 2375 | + pmd_t *dst_pmd, |
---|
| 2376 | + struct vm_area_struct *dst_vma, |
---|
| 2377 | + unsigned long dst_addr, |
---|
| 2378 | + unsigned long src_addr, |
---|
| 2379 | + bool zeropage, |
---|
| 2380 | + struct page **pagep) |
---|
2282 | 2381 | { |
---|
2283 | 2382 | struct inode *inode = file_inode(dst_vma->vm_file); |
---|
2284 | 2383 | struct shmem_inode_info *info = SHMEM_I(inode); |
---|
2285 | 2384 | struct address_space *mapping = inode->i_mapping; |
---|
2286 | 2385 | gfp_t gfp = mapping_gfp_mask(mapping); |
---|
2287 | 2386 | pgoff_t pgoff = linear_page_index(dst_vma, dst_addr); |
---|
2288 | | - struct mem_cgroup *memcg; |
---|
2289 | | - spinlock_t *ptl; |
---|
2290 | 2387 | void *page_kaddr; |
---|
2291 | 2388 | struct page *page; |
---|
2292 | | - pte_t _dst_pte, *dst_pte; |
---|
2293 | 2389 | int ret; |
---|
2294 | | - pgoff_t offset, max_off; |
---|
| 2390 | + pgoff_t max_off; |
---|
2295 | 2391 | |
---|
2296 | | - ret = -ENOMEM; |
---|
2297 | 2392 | if (!shmem_inode_acct_block(inode, 1)) { |
---|
2298 | 2393 | /* |
---|
2299 | 2394 | * We may have got a page, returned -ENOENT triggering a retry, |
---|
.. | .. |
---|
2304 | 2399 | put_page(*pagep); |
---|
2305 | 2400 | *pagep = NULL; |
---|
2306 | 2401 | } |
---|
2307 | | - goto out; |
---|
| 2402 | + return -ENOMEM; |
---|
2308 | 2403 | } |
---|
2309 | 2404 | |
---|
2310 | 2405 | if (!*pagep) { |
---|
| 2406 | + ret = -ENOMEM; |
---|
2311 | 2407 | page = shmem_alloc_page(gfp, info, pgoff); |
---|
2312 | 2408 | if (!page) |
---|
2313 | 2409 | goto out_unacct_blocks; |
---|
2314 | 2410 | |
---|
2315 | | - if (!zeropage) { /* mcopy_atomic */ |
---|
| 2411 | + if (!zeropage) { /* COPY */ |
---|
2316 | 2412 | page_kaddr = kmap_atomic(page); |
---|
2317 | 2413 | ret = copy_from_user(page_kaddr, |
---|
2318 | 2414 | (const void __user *)src_addr, |
---|
2319 | 2415 | PAGE_SIZE); |
---|
2320 | 2416 | kunmap_atomic(page_kaddr); |
---|
2321 | 2417 | |
---|
2322 | | - /* fallback to copy_from_user outside mmap_sem */ |
---|
| 2418 | + /* fallback to copy_from_user outside mmap_lock */ |
---|
2323 | 2419 | if (unlikely(ret)) { |
---|
2324 | 2420 | *pagep = page; |
---|
2325 | | - shmem_inode_unacct_blocks(inode, 1); |
---|
| 2421 | + ret = -ENOENT; |
---|
2326 | 2422 | /* don't free the page */ |
---|
2327 | | - return -ENOENT; |
---|
| 2423 | + goto out_unacct_blocks; |
---|
2328 | 2424 | } |
---|
2329 | | - } else { /* mfill_zeropage_atomic */ |
---|
| 2425 | + } else { /* ZEROPAGE */ |
---|
2330 | 2426 | clear_highpage(page); |
---|
2331 | 2427 | } |
---|
2332 | 2428 | } else { |
---|
.. | .. |
---|
2334 | 2430 | *pagep = NULL; |
---|
2335 | 2431 | } |
---|
2336 | 2432 | |
---|
2337 | | - VM_BUG_ON(PageLocked(page) || PageSwapBacked(page)); |
---|
| 2433 | + VM_BUG_ON(PageLocked(page)); |
---|
| 2434 | + VM_BUG_ON(PageSwapBacked(page)); |
---|
2338 | 2435 | __SetPageLocked(page); |
---|
2339 | 2436 | __SetPageSwapBacked(page); |
---|
2340 | 2437 | __SetPageUptodate(page); |
---|
2341 | 2438 | |
---|
2342 | 2439 | ret = -EFAULT; |
---|
2343 | | - offset = linear_page_index(dst_vma, dst_addr); |
---|
2344 | 2440 | max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); |
---|
2345 | | - if (unlikely(offset >= max_off)) |
---|
| 2441 | + if (unlikely(pgoff >= max_off)) |
---|
2346 | 2442 | goto out_release; |
---|
2347 | 2443 | |
---|
2348 | | - ret = mem_cgroup_try_charge_delay(page, dst_mm, gfp, &memcg, false); |
---|
| 2444 | + ret = shmem_add_to_page_cache(page, mapping, pgoff, NULL, |
---|
| 2445 | + gfp & GFP_RECLAIM_MASK, dst_mm); |
---|
2349 | 2446 | if (ret) |
---|
2350 | 2447 | goto out_release; |
---|
2351 | 2448 | |
---|
2352 | | - ret = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK); |
---|
2353 | | - if (!ret) { |
---|
2354 | | - ret = shmem_add_to_page_cache(page, mapping, pgoff, NULL); |
---|
2355 | | - radix_tree_preload_end(); |
---|
2356 | | - } |
---|
| 2449 | + ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr, |
---|
| 2450 | + page, true, false); |
---|
2357 | 2451 | if (ret) |
---|
2358 | | - goto out_release_uncharge; |
---|
2359 | | - |
---|
2360 | | - mem_cgroup_commit_charge(page, memcg, false, false); |
---|
2361 | | - |
---|
2362 | | - _dst_pte = mk_pte(page, dst_vma->vm_page_prot); |
---|
2363 | | - if (dst_vma->vm_flags & VM_WRITE) |
---|
2364 | | - _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte)); |
---|
2365 | | - else { |
---|
2366 | | - /* |
---|
2367 | | - * We don't set the pte dirty if the vma has no |
---|
2368 | | - * VM_WRITE permission, so mark the page dirty or it |
---|
2369 | | - * could be freed from under us. We could do it |
---|
2370 | | - * unconditionally before unlock_page(), but doing it |
---|
2371 | | - * only if VM_WRITE is not set is faster. |
---|
2372 | | - */ |
---|
2373 | | - set_page_dirty(page); |
---|
2374 | | - } |
---|
2375 | | - |
---|
2376 | | - dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl); |
---|
2377 | | - |
---|
2378 | | - ret = -EFAULT; |
---|
2379 | | - max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); |
---|
2380 | | - if (unlikely(offset >= max_off)) |
---|
2381 | | - goto out_release_uncharge_unlock; |
---|
2382 | | - |
---|
2383 | | - ret = -EEXIST; |
---|
2384 | | - if (!pte_none(*dst_pte)) |
---|
2385 | | - goto out_release_uncharge_unlock; |
---|
2386 | | - |
---|
2387 | | - lru_cache_add_anon(page); |
---|
| 2452 | + goto out_delete_from_cache; |
---|
2388 | 2453 | |
---|
2389 | 2454 | spin_lock_irq(&info->lock); |
---|
2390 | 2455 | info->alloced++; |
---|
.. | .. |
---|
2392 | 2457 | shmem_recalc_inode(inode); |
---|
2393 | 2458 | spin_unlock_irq(&info->lock); |
---|
2394 | 2459 | |
---|
2395 | | - inc_mm_counter(dst_mm, mm_counter_file(page)); |
---|
2396 | | - page_add_file_rmap(page, false); |
---|
2397 | | - set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte); |
---|
2398 | | - |
---|
2399 | | - /* No need to invalidate - it was non-present before */ |
---|
2400 | | - update_mmu_cache(dst_vma, dst_addr, dst_pte); |
---|
2401 | | - pte_unmap_unlock(dst_pte, ptl); |
---|
| 2460 | + SetPageDirty(page); |
---|
2402 | 2461 | unlock_page(page); |
---|
2403 | | - ret = 0; |
---|
2404 | | -out: |
---|
2405 | | - return ret; |
---|
2406 | | -out_release_uncharge_unlock: |
---|
2407 | | - pte_unmap_unlock(dst_pte, ptl); |
---|
2408 | | - ClearPageDirty(page); |
---|
| 2462 | + return 0; |
---|
| 2463 | +out_delete_from_cache: |
---|
2409 | 2464 | delete_from_page_cache(page); |
---|
2410 | | -out_release_uncharge: |
---|
2411 | | - mem_cgroup_cancel_charge(page, memcg, false); |
---|
2412 | 2465 | out_release: |
---|
2413 | 2466 | unlock_page(page); |
---|
2414 | 2467 | put_page(page); |
---|
2415 | 2468 | out_unacct_blocks: |
---|
2416 | 2469 | shmem_inode_unacct_blocks(inode, 1); |
---|
2417 | | - goto out; |
---|
| 2470 | + return ret; |
---|
2418 | 2471 | } |
---|
2419 | | - |
---|
2420 | | -int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, |
---|
2421 | | - pmd_t *dst_pmd, |
---|
2422 | | - struct vm_area_struct *dst_vma, |
---|
2423 | | - unsigned long dst_addr, |
---|
2424 | | - unsigned long src_addr, |
---|
2425 | | - struct page **pagep) |
---|
2426 | | -{ |
---|
2427 | | - return shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, |
---|
2428 | | - dst_addr, src_addr, false, pagep); |
---|
2429 | | -} |
---|
2430 | | - |
---|
2431 | | -int shmem_mfill_zeropage_pte(struct mm_struct *dst_mm, |
---|
2432 | | - pmd_t *dst_pmd, |
---|
2433 | | - struct vm_area_struct *dst_vma, |
---|
2434 | | - unsigned long dst_addr) |
---|
2435 | | -{ |
---|
2436 | | - struct page *page = NULL; |
---|
2437 | | - |
---|
2438 | | - return shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, |
---|
2439 | | - dst_addr, 0, true, &page); |
---|
2440 | | -} |
---|
| 2472 | +#endif /* CONFIG_USERFAULTFD */ |
---|
2441 | 2473 | |
---|
2442 | 2474 | #ifdef CONFIG_TMPFS |
---|
2443 | 2475 | static const struct inode_operations shmem_symlink_inode_operations; |
---|
.. | .. |
---|
2617 | 2649 | } |
---|
2618 | 2650 | |
---|
2619 | 2651 | /* |
---|
2620 | | - * llseek SEEK_DATA or SEEK_HOLE through the radix_tree. |
---|
| 2652 | + * llseek SEEK_DATA or SEEK_HOLE through the page cache. |
---|
2621 | 2653 | */ |
---|
2622 | 2654 | static pgoff_t shmem_seek_hole_data(struct address_space *mapping, |
---|
2623 | 2655 | pgoff_t index, pgoff_t end, int whence) |
---|
.. | .. |
---|
2647 | 2679 | index = indices[i]; |
---|
2648 | 2680 | } |
---|
2649 | 2681 | page = pvec.pages[i]; |
---|
2650 | | - if (page && !radix_tree_exceptional_entry(page)) { |
---|
| 2682 | + if (page && !xa_is_value(page)) { |
---|
2651 | 2683 | if (!PageUptodate(page)) |
---|
2652 | 2684 | page = NULL; |
---|
2653 | 2685 | } |
---|
.. | .. |
---|
2943 | 2975 | * first link must skip that, to get the accounting right. |
---|
2944 | 2976 | */ |
---|
2945 | 2977 | if (inode->i_nlink) { |
---|
2946 | | - ret = shmem_reserve_inode(inode->i_sb); |
---|
| 2978 | + ret = shmem_reserve_inode(inode->i_sb, NULL); |
---|
2947 | 2979 | if (ret) |
---|
2948 | 2980 | goto out; |
---|
2949 | 2981 | } |
---|
.. | .. |
---|
3095 | 3127 | |
---|
3096 | 3128 | error = security_inode_init_security(inode, dir, &dentry->d_name, |
---|
3097 | 3129 | shmem_initxattrs, NULL); |
---|
3098 | | - if (error) { |
---|
3099 | | - if (error != -EOPNOTSUPP) { |
---|
3100 | | - iput(inode); |
---|
3101 | | - return error; |
---|
3102 | | - } |
---|
3103 | | - error = 0; |
---|
| 3130 | + if (error && error != -EOPNOTSUPP) { |
---|
| 3131 | + iput(inode); |
---|
| 3132 | + return error; |
---|
3104 | 3133 | } |
---|
3105 | 3134 | |
---|
3106 | 3135 | inode->i_size = len-1; |
---|
.. | .. |
---|
3192 | 3221 | new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len, |
---|
3193 | 3222 | GFP_KERNEL); |
---|
3194 | 3223 | if (!new_xattr->name) { |
---|
3195 | | - kfree(new_xattr); |
---|
| 3224 | + kvfree(new_xattr); |
---|
3196 | 3225 | return -ENOMEM; |
---|
3197 | 3226 | } |
---|
3198 | 3227 | |
---|
.. | .. |
---|
3209 | 3238 | |
---|
3210 | 3239 | static int shmem_xattr_handler_get(const struct xattr_handler *handler, |
---|
3211 | 3240 | struct dentry *unused, struct inode *inode, |
---|
3212 | | - const char *name, void *buffer, size_t size) |
---|
| 3241 | + const char *name, void *buffer, size_t size, |
---|
| 3242 | + int flags) |
---|
3213 | 3243 | { |
---|
3214 | 3244 | struct shmem_inode_info *info = SHMEM_I(inode); |
---|
3215 | 3245 | |
---|
.. | .. |
---|
3225 | 3255 | struct shmem_inode_info *info = SHMEM_I(inode); |
---|
3226 | 3256 | |
---|
3227 | 3257 | name = xattr_full_name(handler, name); |
---|
3228 | | - return simple_xattr_set(&info->xattrs, name, value, size, flags); |
---|
| 3258 | + return simple_xattr_set(&info->xattrs, name, value, size, flags, NULL); |
---|
3229 | 3259 | } |
---|
3230 | 3260 | |
---|
3231 | 3261 | static const struct xattr_handler shmem_security_xattr_handler = { |
---|
.. | .. |
---|
3352 | 3382 | .fh_to_dentry = shmem_fh_to_dentry, |
---|
3353 | 3383 | }; |
---|
3354 | 3384 | |
---|
3355 | | -static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo, |
---|
3356 | | - bool remount) |
---|
| 3385 | +enum shmem_param { |
---|
| 3386 | + Opt_gid, |
---|
| 3387 | + Opt_huge, |
---|
| 3388 | + Opt_mode, |
---|
| 3389 | + Opt_mpol, |
---|
| 3390 | + Opt_nr_blocks, |
---|
| 3391 | + Opt_nr_inodes, |
---|
| 3392 | + Opt_size, |
---|
| 3393 | + Opt_uid, |
---|
| 3394 | + Opt_inode32, |
---|
| 3395 | + Opt_inode64, |
---|
| 3396 | +}; |
---|
| 3397 | + |
---|
| 3398 | +static const struct constant_table shmem_param_enums_huge[] = { |
---|
| 3399 | + {"never", SHMEM_HUGE_NEVER }, |
---|
| 3400 | + {"always", SHMEM_HUGE_ALWAYS }, |
---|
| 3401 | + {"within_size", SHMEM_HUGE_WITHIN_SIZE }, |
---|
| 3402 | + {"advise", SHMEM_HUGE_ADVISE }, |
---|
| 3403 | + {} |
---|
| 3404 | +}; |
---|
| 3405 | + |
---|
| 3406 | +const struct fs_parameter_spec shmem_fs_parameters[] = { |
---|
| 3407 | + fsparam_u32 ("gid", Opt_gid), |
---|
| 3408 | + fsparam_enum ("huge", Opt_huge, shmem_param_enums_huge), |
---|
| 3409 | + fsparam_u32oct("mode", Opt_mode), |
---|
| 3410 | + fsparam_string("mpol", Opt_mpol), |
---|
| 3411 | + fsparam_string("nr_blocks", Opt_nr_blocks), |
---|
| 3412 | + fsparam_string("nr_inodes", Opt_nr_inodes), |
---|
| 3413 | + fsparam_string("size", Opt_size), |
---|
| 3414 | + fsparam_u32 ("uid", Opt_uid), |
---|
| 3415 | + fsparam_flag ("inode32", Opt_inode32), |
---|
| 3416 | + fsparam_flag ("inode64", Opt_inode64), |
---|
| 3417 | + {} |
---|
| 3418 | +}; |
---|
| 3419 | + |
---|
| 3420 | +static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param) |
---|
3357 | 3421 | { |
---|
3358 | | - char *this_char, *value, *rest; |
---|
3359 | | - struct mempolicy *mpol = NULL; |
---|
3360 | | - uid_t uid; |
---|
3361 | | - gid_t gid; |
---|
| 3422 | + struct shmem_options *ctx = fc->fs_private; |
---|
| 3423 | + struct fs_parse_result result; |
---|
| 3424 | + unsigned long long size; |
---|
| 3425 | + char *rest; |
---|
| 3426 | + int opt; |
---|
| 3427 | + kuid_t kuid; |
---|
| 3428 | + kgid_t kgid; |
---|
| 3429 | + |
---|
| 3430 | + opt = fs_parse(fc, shmem_fs_parameters, param, &result); |
---|
| 3431 | + if (opt < 0) |
---|
| 3432 | + return opt; |
---|
| 3433 | + |
---|
| 3434 | + switch (opt) { |
---|
| 3435 | + case Opt_size: |
---|
| 3436 | + size = memparse(param->string, &rest); |
---|
| 3437 | + if (*rest == '%') { |
---|
| 3438 | + size <<= PAGE_SHIFT; |
---|
| 3439 | + size *= totalram_pages(); |
---|
| 3440 | + do_div(size, 100); |
---|
| 3441 | + rest++; |
---|
| 3442 | + } |
---|
| 3443 | + if (*rest) |
---|
| 3444 | + goto bad_value; |
---|
| 3445 | + ctx->blocks = DIV_ROUND_UP(size, PAGE_SIZE); |
---|
| 3446 | + ctx->seen |= SHMEM_SEEN_BLOCKS; |
---|
| 3447 | + break; |
---|
| 3448 | + case Opt_nr_blocks: |
---|
| 3449 | + ctx->blocks = memparse(param->string, &rest); |
---|
| 3450 | + if (*rest) |
---|
| 3451 | + goto bad_value; |
---|
| 3452 | + ctx->seen |= SHMEM_SEEN_BLOCKS; |
---|
| 3453 | + break; |
---|
| 3454 | + case Opt_nr_inodes: |
---|
| 3455 | + ctx->inodes = memparse(param->string, &rest); |
---|
| 3456 | + if (*rest) |
---|
| 3457 | + goto bad_value; |
---|
| 3458 | + ctx->seen |= SHMEM_SEEN_INODES; |
---|
| 3459 | + break; |
---|
| 3460 | + case Opt_mode: |
---|
| 3461 | + ctx->mode = result.uint_32 & 07777; |
---|
| 3462 | + break; |
---|
| 3463 | + case Opt_uid: |
---|
| 3464 | + kuid = make_kuid(current_user_ns(), result.uint_32); |
---|
| 3465 | + if (!uid_valid(kuid)) |
---|
| 3466 | + goto bad_value; |
---|
| 3467 | + |
---|
| 3468 | + /* |
---|
| 3469 | + * The requested uid must be representable in the |
---|
| 3470 | + * filesystem's idmapping. |
---|
| 3471 | + */ |
---|
| 3472 | + if (!kuid_has_mapping(fc->user_ns, kuid)) |
---|
| 3473 | + goto bad_value; |
---|
| 3474 | + |
---|
| 3475 | + ctx->uid = kuid; |
---|
| 3476 | + break; |
---|
| 3477 | + case Opt_gid: |
---|
| 3478 | + kgid = make_kgid(current_user_ns(), result.uint_32); |
---|
| 3479 | + if (!gid_valid(kgid)) |
---|
| 3480 | + goto bad_value; |
---|
| 3481 | + |
---|
| 3482 | + /* |
---|
| 3483 | + * The requested gid must be representable in the |
---|
| 3484 | + * filesystem's idmapping. |
---|
| 3485 | + */ |
---|
| 3486 | + if (!kgid_has_mapping(fc->user_ns, kgid)) |
---|
| 3487 | + goto bad_value; |
---|
| 3488 | + |
---|
| 3489 | + ctx->gid = kgid; |
---|
| 3490 | + break; |
---|
| 3491 | + case Opt_huge: |
---|
| 3492 | + ctx->huge = result.uint_32; |
---|
| 3493 | + if (ctx->huge != SHMEM_HUGE_NEVER && |
---|
| 3494 | + !(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && |
---|
| 3495 | + has_transparent_hugepage())) |
---|
| 3496 | + goto unsupported_parameter; |
---|
| 3497 | + ctx->seen |= SHMEM_SEEN_HUGE; |
---|
| 3498 | + break; |
---|
| 3499 | + case Opt_mpol: |
---|
| 3500 | + if (IS_ENABLED(CONFIG_NUMA)) { |
---|
| 3501 | + mpol_put(ctx->mpol); |
---|
| 3502 | + ctx->mpol = NULL; |
---|
| 3503 | + if (mpol_parse_str(param->string, &ctx->mpol)) |
---|
| 3504 | + goto bad_value; |
---|
| 3505 | + break; |
---|
| 3506 | + } |
---|
| 3507 | + goto unsupported_parameter; |
---|
| 3508 | + case Opt_inode32: |
---|
| 3509 | + ctx->full_inums = false; |
---|
| 3510 | + ctx->seen |= SHMEM_SEEN_INUMS; |
---|
| 3511 | + break; |
---|
| 3512 | + case Opt_inode64: |
---|
| 3513 | + if (sizeof(ino_t) < 8) { |
---|
| 3514 | + return invalfc(fc, |
---|
| 3515 | + "Cannot use inode64 with <64bit inums in kernel\n"); |
---|
| 3516 | + } |
---|
| 3517 | + ctx->full_inums = true; |
---|
| 3518 | + ctx->seen |= SHMEM_SEEN_INUMS; |
---|
| 3519 | + break; |
---|
| 3520 | + } |
---|
| 3521 | + return 0; |
---|
| 3522 | + |
---|
| 3523 | +unsupported_parameter: |
---|
| 3524 | + return invalfc(fc, "Unsupported parameter '%s'", param->key); |
---|
| 3525 | +bad_value: |
---|
| 3526 | + return invalfc(fc, "Bad value for '%s'", param->key); |
---|
| 3527 | +} |
---|
| 3528 | + |
---|
| 3529 | +static int shmem_parse_options(struct fs_context *fc, void *data) |
---|
| 3530 | +{ |
---|
| 3531 | + char *options = data; |
---|
| 3532 | + |
---|
| 3533 | + if (options) { |
---|
| 3534 | + int err = security_sb_eat_lsm_opts(options, &fc->security); |
---|
| 3535 | + if (err) |
---|
| 3536 | + return err; |
---|
| 3537 | + } |
---|
3362 | 3538 | |
---|
3363 | 3539 | while (options != NULL) { |
---|
3364 | | - this_char = options; |
---|
| 3540 | + char *this_char = options; |
---|
3365 | 3541 | for (;;) { |
---|
3366 | 3542 | /* |
---|
3367 | 3543 | * NUL-terminate this option: unfortunately, |
---|
.. | .. |
---|
3377 | 3553 | break; |
---|
3378 | 3554 | } |
---|
3379 | 3555 | } |
---|
3380 | | - if (!*this_char) |
---|
3381 | | - continue; |
---|
3382 | | - if ((value = strchr(this_char,'=')) != NULL) { |
---|
3383 | | - *value++ = 0; |
---|
3384 | | - } else { |
---|
3385 | | - pr_err("tmpfs: No value for mount option '%s'\n", |
---|
3386 | | - this_char); |
---|
3387 | | - goto error; |
---|
3388 | | - } |
---|
| 3556 | + if (*this_char) { |
---|
| 3557 | + char *value = strchr(this_char,'='); |
---|
| 3558 | + size_t len = 0; |
---|
| 3559 | + int err; |
---|
3389 | 3560 | |
---|
3390 | | - if (!strcmp(this_char,"size")) { |
---|
3391 | | - unsigned long long size; |
---|
3392 | | - size = memparse(value,&rest); |
---|
3393 | | - if (*rest == '%') { |
---|
3394 | | - size <<= PAGE_SHIFT; |
---|
3395 | | - size *= totalram_pages; |
---|
3396 | | - do_div(size, 100); |
---|
3397 | | - rest++; |
---|
| 3561 | + if (value) { |
---|
| 3562 | + *value++ = '\0'; |
---|
| 3563 | + len = strlen(value); |
---|
3398 | 3564 | } |
---|
3399 | | - if (*rest) |
---|
3400 | | - goto bad_val; |
---|
3401 | | - sbinfo->max_blocks = |
---|
3402 | | - DIV_ROUND_UP(size, PAGE_SIZE); |
---|
3403 | | - } else if (!strcmp(this_char,"nr_blocks")) { |
---|
3404 | | - sbinfo->max_blocks = memparse(value, &rest); |
---|
3405 | | - if (*rest) |
---|
3406 | | - goto bad_val; |
---|
3407 | | - } else if (!strcmp(this_char,"nr_inodes")) { |
---|
3408 | | - sbinfo->max_inodes = memparse(value, &rest); |
---|
3409 | | - if (*rest) |
---|
3410 | | - goto bad_val; |
---|
3411 | | - } else if (!strcmp(this_char,"mode")) { |
---|
3412 | | - if (remount) |
---|
3413 | | - continue; |
---|
3414 | | - sbinfo->mode = simple_strtoul(value, &rest, 8) & 07777; |
---|
3415 | | - if (*rest) |
---|
3416 | | - goto bad_val; |
---|
3417 | | - } else if (!strcmp(this_char,"uid")) { |
---|
3418 | | - if (remount) |
---|
3419 | | - continue; |
---|
3420 | | - uid = simple_strtoul(value, &rest, 0); |
---|
3421 | | - if (*rest) |
---|
3422 | | - goto bad_val; |
---|
3423 | | - sbinfo->uid = make_kuid(current_user_ns(), uid); |
---|
3424 | | - if (!uid_valid(sbinfo->uid)) |
---|
3425 | | - goto bad_val; |
---|
3426 | | - } else if (!strcmp(this_char,"gid")) { |
---|
3427 | | - if (remount) |
---|
3428 | | - continue; |
---|
3429 | | - gid = simple_strtoul(value, &rest, 0); |
---|
3430 | | - if (*rest) |
---|
3431 | | - goto bad_val; |
---|
3432 | | - sbinfo->gid = make_kgid(current_user_ns(), gid); |
---|
3433 | | - if (!gid_valid(sbinfo->gid)) |
---|
3434 | | - goto bad_val; |
---|
3435 | | -#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE |
---|
3436 | | - } else if (!strcmp(this_char, "huge")) { |
---|
3437 | | - int huge; |
---|
3438 | | - huge = shmem_parse_huge(value); |
---|
3439 | | - if (huge < 0) |
---|
3440 | | - goto bad_val; |
---|
3441 | | - if (!has_transparent_hugepage() && |
---|
3442 | | - huge != SHMEM_HUGE_NEVER) |
---|
3443 | | - goto bad_val; |
---|
3444 | | - sbinfo->huge = huge; |
---|
3445 | | -#endif |
---|
3446 | | -#ifdef CONFIG_NUMA |
---|
3447 | | - } else if (!strcmp(this_char,"mpol")) { |
---|
3448 | | - mpol_put(mpol); |
---|
3449 | | - mpol = NULL; |
---|
3450 | | - if (mpol_parse_str(value, &mpol)) |
---|
3451 | | - goto bad_val; |
---|
3452 | | -#endif |
---|
3453 | | - } else { |
---|
3454 | | - pr_err("tmpfs: Bad mount option %s\n", this_char); |
---|
3455 | | - goto error; |
---|
| 3565 | + err = vfs_parse_fs_string(fc, this_char, value, len); |
---|
| 3566 | + if (err < 0) |
---|
| 3567 | + return err; |
---|
3456 | 3568 | } |
---|
3457 | 3569 | } |
---|
3458 | | - sbinfo->mpol = mpol; |
---|
3459 | 3570 | return 0; |
---|
3460 | | - |
---|
3461 | | -bad_val: |
---|
3462 | | - pr_err("tmpfs: Bad value '%s' for mount option '%s'\n", |
---|
3463 | | - value, this_char); |
---|
3464 | | -error: |
---|
3465 | | - mpol_put(mpol); |
---|
3466 | | - return 1; |
---|
3467 | | - |
---|
3468 | 3571 | } |
---|
3469 | 3572 | |
---|
3470 | | -static int shmem_remount_fs(struct super_block *sb, int *flags, char *data) |
---|
| 3573 | +/* |
---|
| 3574 | + * Reconfigure a shmem filesystem. |
---|
| 3575 | + * |
---|
| 3576 | + * Note that we disallow change from limited->unlimited blocks/inodes while any |
---|
| 3577 | + * are in use; but we must separately disallow unlimited->limited, because in |
---|
| 3578 | + * that case we have no record of how much is already in use. |
---|
| 3579 | + */ |
---|
| 3580 | +static int shmem_reconfigure(struct fs_context *fc) |
---|
3471 | 3581 | { |
---|
3472 | | - struct shmem_sb_info *sbinfo = SHMEM_SB(sb); |
---|
3473 | | - struct shmem_sb_info config = *sbinfo; |
---|
| 3582 | + struct shmem_options *ctx = fc->fs_private; |
---|
| 3583 | + struct shmem_sb_info *sbinfo = SHMEM_SB(fc->root->d_sb); |
---|
3474 | 3584 | unsigned long inodes; |
---|
3475 | | - int error = -EINVAL; |
---|
3476 | | - |
---|
3477 | | - config.mpol = NULL; |
---|
3478 | | - if (shmem_parse_options(data, &config, true)) |
---|
3479 | | - return error; |
---|
| 3585 | + const char *err; |
---|
3480 | 3586 | |
---|
3481 | 3587 | spin_lock(&sbinfo->stat_lock); |
---|
3482 | 3588 | inodes = sbinfo->max_inodes - sbinfo->free_inodes; |
---|
3483 | | - if (percpu_counter_compare(&sbinfo->used_blocks, config.max_blocks) > 0) |
---|
3484 | | - goto out; |
---|
3485 | | - if (config.max_inodes < inodes) |
---|
3486 | | - goto out; |
---|
3487 | | - /* |
---|
3488 | | - * Those tests disallow limited->unlimited while any are in use; |
---|
3489 | | - * but we must separately disallow unlimited->limited, because |
---|
3490 | | - * in that case we have no record of how much is already in use. |
---|
3491 | | - */ |
---|
3492 | | - if (config.max_blocks && !sbinfo->max_blocks) |
---|
3493 | | - goto out; |
---|
3494 | | - if (config.max_inodes && !sbinfo->max_inodes) |
---|
3495 | | - goto out; |
---|
| 3589 | + if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) { |
---|
| 3590 | + if (!sbinfo->max_blocks) { |
---|
| 3591 | + err = "Cannot retroactively limit size"; |
---|
| 3592 | + goto out; |
---|
| 3593 | + } |
---|
| 3594 | + if (percpu_counter_compare(&sbinfo->used_blocks, |
---|
| 3595 | + ctx->blocks) > 0) { |
---|
| 3596 | + err = "Too small a size for current use"; |
---|
| 3597 | + goto out; |
---|
| 3598 | + } |
---|
| 3599 | + } |
---|
| 3600 | + if ((ctx->seen & SHMEM_SEEN_INODES) && ctx->inodes) { |
---|
| 3601 | + if (!sbinfo->max_inodes) { |
---|
| 3602 | + err = "Cannot retroactively limit inodes"; |
---|
| 3603 | + goto out; |
---|
| 3604 | + } |
---|
| 3605 | + if (ctx->inodes < inodes) { |
---|
| 3606 | + err = "Too few inodes for current use"; |
---|
| 3607 | + goto out; |
---|
| 3608 | + } |
---|
| 3609 | + } |
---|
3496 | 3610 | |
---|
3497 | | - error = 0; |
---|
3498 | | - sbinfo->huge = config.huge; |
---|
3499 | | - sbinfo->max_blocks = config.max_blocks; |
---|
3500 | | - sbinfo->max_inodes = config.max_inodes; |
---|
3501 | | - sbinfo->free_inodes = config.max_inodes - inodes; |
---|
| 3611 | + if ((ctx->seen & SHMEM_SEEN_INUMS) && !ctx->full_inums && |
---|
| 3612 | + sbinfo->next_ino > UINT_MAX) { |
---|
| 3613 | + err = "Current inum too high to switch to 32-bit inums"; |
---|
| 3614 | + goto out; |
---|
| 3615 | + } |
---|
| 3616 | + |
---|
| 3617 | + if (ctx->seen & SHMEM_SEEN_HUGE) |
---|
| 3618 | + sbinfo->huge = ctx->huge; |
---|
| 3619 | + if (ctx->seen & SHMEM_SEEN_INUMS) |
---|
| 3620 | + sbinfo->full_inums = ctx->full_inums; |
---|
| 3621 | + if (ctx->seen & SHMEM_SEEN_BLOCKS) |
---|
| 3622 | + sbinfo->max_blocks = ctx->blocks; |
---|
| 3623 | + if (ctx->seen & SHMEM_SEEN_INODES) { |
---|
| 3624 | + sbinfo->max_inodes = ctx->inodes; |
---|
| 3625 | + sbinfo->free_inodes = ctx->inodes - inodes; |
---|
| 3626 | + } |
---|
3502 | 3627 | |
---|
3503 | 3628 | /* |
---|
3504 | 3629 | * Preserve previous mempolicy unless mpol remount option was specified. |
---|
3505 | 3630 | */ |
---|
3506 | | - if (config.mpol) { |
---|
| 3631 | + if (ctx->mpol) { |
---|
3507 | 3632 | mpol_put(sbinfo->mpol); |
---|
3508 | | - sbinfo->mpol = config.mpol; /* transfers initial ref */ |
---|
| 3633 | + sbinfo->mpol = ctx->mpol; /* transfers initial ref */ |
---|
| 3634 | + ctx->mpol = NULL; |
---|
3509 | 3635 | } |
---|
| 3636 | + spin_unlock(&sbinfo->stat_lock); |
---|
| 3637 | + return 0; |
---|
3510 | 3638 | out: |
---|
3511 | 3639 | spin_unlock(&sbinfo->stat_lock); |
---|
3512 | | - return error; |
---|
| 3640 | + return invalfc(fc, "%s", err); |
---|
3513 | 3641 | } |
---|
3514 | 3642 | |
---|
3515 | 3643 | static int shmem_show_options(struct seq_file *seq, struct dentry *root) |
---|
.. | .. |
---|
3529 | 3657 | if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID)) |
---|
3530 | 3658 | seq_printf(seq, ",gid=%u", |
---|
3531 | 3659 | from_kgid_munged(&init_user_ns, sbinfo->gid)); |
---|
3532 | | -#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE |
---|
| 3660 | + |
---|
| 3661 | + /* |
---|
| 3662 | + * Showing inode{64,32} might be useful even if it's the system default, |
---|
| 3663 | + * since then people don't have to resort to checking both here and |
---|
| 3664 | + * /proc/config.gz to confirm 64-bit inums were successfully applied |
---|
| 3665 | + * (which may not even exist if IKCONFIG_PROC isn't enabled). |
---|
| 3666 | + * |
---|
| 3667 | + * We hide it when inode64 isn't the default and we are using 32-bit |
---|
| 3668 | + * inodes, since that probably just means the feature isn't even under |
---|
| 3669 | + * consideration. |
---|
| 3670 | + * |
---|
| 3671 | + * As such: |
---|
| 3672 | + * |
---|
| 3673 | + * +-----------------+-----------------+ |
---|
| 3674 | + * | TMPFS_INODE64=y | TMPFS_INODE64=n | |
---|
| 3675 | + * +------------------+-----------------+-----------------+ |
---|
| 3676 | + * | full_inums=true | show | show | |
---|
| 3677 | + * | full_inums=false | show | hide | |
---|
| 3678 | + * +------------------+-----------------+-----------------+ |
---|
| 3679 | + * |
---|
| 3680 | + */ |
---|
| 3681 | + if (IS_ENABLED(CONFIG_TMPFS_INODE64) || sbinfo->full_inums) |
---|
| 3682 | + seq_printf(seq, ",inode%d", (sbinfo->full_inums ? 64 : 32)); |
---|
| 3683 | +#ifdef CONFIG_TRANSPARENT_HUGEPAGE |
---|
3533 | 3684 | /* Rightly or wrongly, show huge mount option unmasked by shmem_huge */ |
---|
3534 | 3685 | if (sbinfo->huge) |
---|
3535 | 3686 | seq_printf(seq, ",huge=%s", shmem_format_huge(sbinfo->huge)); |
---|
.. | .. |
---|
3544 | 3695 | { |
---|
3545 | 3696 | struct shmem_sb_info *sbinfo = SHMEM_SB(sb); |
---|
3546 | 3697 | |
---|
| 3698 | + free_percpu(sbinfo->ino_batch); |
---|
3547 | 3699 | percpu_counter_destroy(&sbinfo->used_blocks); |
---|
3548 | 3700 | mpol_put(sbinfo->mpol); |
---|
3549 | 3701 | kfree(sbinfo); |
---|
3550 | 3702 | sb->s_fs_info = NULL; |
---|
3551 | 3703 | } |
---|
3552 | 3704 | |
---|
3553 | | -int shmem_fill_super(struct super_block *sb, void *data, int silent) |
---|
| 3705 | +static int shmem_fill_super(struct super_block *sb, struct fs_context *fc) |
---|
3554 | 3706 | { |
---|
| 3707 | + struct shmem_options *ctx = fc->fs_private; |
---|
3555 | 3708 | struct inode *inode; |
---|
3556 | 3709 | struct shmem_sb_info *sbinfo; |
---|
3557 | 3710 | int err = -ENOMEM; |
---|
.. | .. |
---|
3562 | 3715 | if (!sbinfo) |
---|
3563 | 3716 | return -ENOMEM; |
---|
3564 | 3717 | |
---|
3565 | | - sbinfo->mode = 0777 | S_ISVTX; |
---|
3566 | | - sbinfo->uid = current_fsuid(); |
---|
3567 | | - sbinfo->gid = current_fsgid(); |
---|
3568 | 3718 | sb->s_fs_info = sbinfo; |
---|
3569 | 3719 | |
---|
3570 | 3720 | #ifdef CONFIG_TMPFS |
---|
.. | .. |
---|
3574 | 3724 | * but the internal instance is left unlimited. |
---|
3575 | 3725 | */ |
---|
3576 | 3726 | if (!(sb->s_flags & SB_KERNMOUNT)) { |
---|
3577 | | - sbinfo->max_blocks = shmem_default_max_blocks(); |
---|
3578 | | - sbinfo->max_inodes = shmem_default_max_inodes(); |
---|
3579 | | - if (shmem_parse_options(data, sbinfo, false)) { |
---|
3580 | | - err = -EINVAL; |
---|
3581 | | - goto failed; |
---|
3582 | | - } |
---|
| 3727 | + if (!(ctx->seen & SHMEM_SEEN_BLOCKS)) |
---|
| 3728 | + ctx->blocks = shmem_default_max_blocks(); |
---|
| 3729 | + if (!(ctx->seen & SHMEM_SEEN_INODES)) |
---|
| 3730 | + ctx->inodes = shmem_default_max_inodes(); |
---|
| 3731 | + if (!(ctx->seen & SHMEM_SEEN_INUMS)) |
---|
| 3732 | + ctx->full_inums = IS_ENABLED(CONFIG_TMPFS_INODE64); |
---|
3583 | 3733 | } else { |
---|
3584 | 3734 | sb->s_flags |= SB_NOUSER; |
---|
3585 | 3735 | } |
---|
.. | .. |
---|
3588 | 3738 | #else |
---|
3589 | 3739 | sb->s_flags |= SB_NOUSER; |
---|
3590 | 3740 | #endif |
---|
| 3741 | + sbinfo->max_blocks = ctx->blocks; |
---|
| 3742 | + sbinfo->free_inodes = sbinfo->max_inodes = ctx->inodes; |
---|
| 3743 | + if (sb->s_flags & SB_KERNMOUNT) { |
---|
| 3744 | + sbinfo->ino_batch = alloc_percpu(ino_t); |
---|
| 3745 | + if (!sbinfo->ino_batch) |
---|
| 3746 | + goto failed; |
---|
| 3747 | + } |
---|
| 3748 | + sbinfo->uid = ctx->uid; |
---|
| 3749 | + sbinfo->gid = ctx->gid; |
---|
| 3750 | + sbinfo->full_inums = ctx->full_inums; |
---|
| 3751 | + sbinfo->mode = ctx->mode; |
---|
| 3752 | + sbinfo->huge = ctx->huge; |
---|
| 3753 | + sbinfo->mpol = ctx->mpol; |
---|
| 3754 | + ctx->mpol = NULL; |
---|
3591 | 3755 | |
---|
3592 | 3756 | spin_lock_init(&sbinfo->stat_lock); |
---|
3593 | 3757 | if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL)) |
---|
3594 | 3758 | goto failed; |
---|
3595 | | - sbinfo->free_inodes = sbinfo->max_inodes; |
---|
3596 | 3759 | spin_lock_init(&sbinfo->shrinklist_lock); |
---|
3597 | 3760 | INIT_LIST_HEAD(&sbinfo->shrinklist); |
---|
3598 | 3761 | |
---|
.. | .. |
---|
3625 | 3788 | return err; |
---|
3626 | 3789 | } |
---|
3627 | 3790 | |
---|
| 3791 | +static int shmem_get_tree(struct fs_context *fc) |
---|
| 3792 | +{ |
---|
| 3793 | + return get_tree_nodev(fc, shmem_fill_super); |
---|
| 3794 | +} |
---|
| 3795 | + |
---|
| 3796 | +static void shmem_free_fc(struct fs_context *fc) |
---|
| 3797 | +{ |
---|
| 3798 | + struct shmem_options *ctx = fc->fs_private; |
---|
| 3799 | + |
---|
| 3800 | + if (ctx) { |
---|
| 3801 | + mpol_put(ctx->mpol); |
---|
| 3802 | + kfree(ctx); |
---|
| 3803 | + } |
---|
| 3804 | +} |
---|
| 3805 | + |
---|
| 3806 | +static const struct fs_context_operations shmem_fs_context_ops = { |
---|
| 3807 | + .free = shmem_free_fc, |
---|
| 3808 | + .get_tree = shmem_get_tree, |
---|
| 3809 | +#ifdef CONFIG_TMPFS |
---|
| 3810 | + .parse_monolithic = shmem_parse_options, |
---|
| 3811 | + .parse_param = shmem_parse_one, |
---|
| 3812 | + .reconfigure = shmem_reconfigure, |
---|
| 3813 | +#endif |
---|
| 3814 | +}; |
---|
| 3815 | + |
---|
3628 | 3816 | static struct kmem_cache *shmem_inode_cachep; |
---|
3629 | 3817 | |
---|
3630 | 3818 | static struct inode *shmem_alloc_inode(struct super_block *sb) |
---|
.. | .. |
---|
3636 | 3824 | return &info->vfs_inode; |
---|
3637 | 3825 | } |
---|
3638 | 3826 | |
---|
3639 | | -static void shmem_destroy_callback(struct rcu_head *head) |
---|
| 3827 | +static void shmem_free_in_core_inode(struct inode *inode) |
---|
3640 | 3828 | { |
---|
3641 | | - struct inode *inode = container_of(head, struct inode, i_rcu); |
---|
3642 | 3829 | if (S_ISLNK(inode->i_mode)) |
---|
3643 | 3830 | kfree(inode->i_link); |
---|
3644 | 3831 | kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); |
---|
.. | .. |
---|
3648 | 3835 | { |
---|
3649 | 3836 | if (S_ISREG(inode->i_mode)) |
---|
3650 | 3837 | mpol_free_shared_policy(&SHMEM_I(inode)->policy); |
---|
3651 | | - call_rcu(&inode->i_rcu, shmem_destroy_callback); |
---|
3652 | 3838 | } |
---|
3653 | 3839 | |
---|
3654 | 3840 | static void shmem_init_inode(void *foo) |
---|
.. | .. |
---|
3739 | 3925 | |
---|
3740 | 3926 | static const struct super_operations shmem_ops = { |
---|
3741 | 3927 | .alloc_inode = shmem_alloc_inode, |
---|
| 3928 | + .free_inode = shmem_free_in_core_inode, |
---|
3742 | 3929 | .destroy_inode = shmem_destroy_inode, |
---|
3743 | 3930 | #ifdef CONFIG_TMPFS |
---|
3744 | 3931 | .statfs = shmem_statfs, |
---|
3745 | | - .remount_fs = shmem_remount_fs, |
---|
3746 | 3932 | .show_options = shmem_show_options, |
---|
3747 | 3933 | #endif |
---|
3748 | 3934 | .evict_inode = shmem_evict_inode, |
---|
3749 | 3935 | .drop_inode = generic_delete_inode, |
---|
3750 | 3936 | .put_super = shmem_put_super, |
---|
3751 | | -#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE |
---|
| 3937 | +#ifdef CONFIG_TRANSPARENT_HUGEPAGE |
---|
3752 | 3938 | .nr_cached_objects = shmem_unused_huge_count, |
---|
3753 | 3939 | .free_cached_objects = shmem_unused_huge_scan, |
---|
3754 | 3940 | #endif |
---|
.. | .. |
---|
3761 | 3947 | .set_policy = shmem_set_policy, |
---|
3762 | 3948 | .get_policy = shmem_get_policy, |
---|
3763 | 3949 | #endif |
---|
| 3950 | +#ifdef CONFIG_SPECULATIVE_PAGE_FAULT |
---|
| 3951 | + .allow_speculation = filemap_allow_speculation, |
---|
| 3952 | +#endif |
---|
3764 | 3953 | }; |
---|
3765 | 3954 | |
---|
3766 | | -static struct dentry *shmem_mount(struct file_system_type *fs_type, |
---|
3767 | | - int flags, const char *dev_name, void *data) |
---|
| 3955 | +int shmem_init_fs_context(struct fs_context *fc) |
---|
3768 | 3956 | { |
---|
3769 | | - return mount_nodev(fs_type, flags, data, shmem_fill_super); |
---|
| 3957 | + struct shmem_options *ctx; |
---|
| 3958 | + |
---|
| 3959 | + ctx = kzalloc(sizeof(struct shmem_options), GFP_KERNEL); |
---|
| 3960 | + if (!ctx) |
---|
| 3961 | + return -ENOMEM; |
---|
| 3962 | + |
---|
| 3963 | + ctx->mode = 0777 | S_ISVTX; |
---|
| 3964 | + ctx->uid = current_fsuid(); |
---|
| 3965 | + ctx->gid = current_fsgid(); |
---|
| 3966 | + |
---|
| 3967 | + fc->fs_private = ctx; |
---|
| 3968 | + fc->ops = &shmem_fs_context_ops; |
---|
| 3969 | + return 0; |
---|
3770 | 3970 | } |
---|
3771 | 3971 | |
---|
3772 | 3972 | static struct file_system_type shmem_fs_type = { |
---|
3773 | 3973 | .owner = THIS_MODULE, |
---|
3774 | 3974 | .name = "tmpfs", |
---|
3775 | | - .mount = shmem_mount, |
---|
| 3975 | + .init_fs_context = shmem_init_fs_context, |
---|
| 3976 | +#ifdef CONFIG_TMPFS |
---|
| 3977 | + .parameters = shmem_fs_parameters, |
---|
| 3978 | +#endif |
---|
3776 | 3979 | .kill_sb = kill_litter_super, |
---|
3777 | | - .fs_flags = FS_USERNS_MOUNT, |
---|
| 3980 | + .fs_flags = FS_USERNS_MOUNT | FS_THP_SUPPORT, |
---|
3778 | 3981 | }; |
---|
3779 | 3982 | |
---|
3780 | 3983 | int __init shmem_init(void) |
---|
3781 | 3984 | { |
---|
3782 | 3985 | int error; |
---|
3783 | | - |
---|
3784 | | - /* If rootfs called this, don't re-init */ |
---|
3785 | | - if (shmem_inode_cachep) |
---|
3786 | | - return 0; |
---|
3787 | 3986 | |
---|
3788 | 3987 | shmem_init_inodecache(); |
---|
3789 | 3988 | |
---|
.. | .. |
---|
3800 | 3999 | goto out1; |
---|
3801 | 4000 | } |
---|
3802 | 4001 | |
---|
3803 | | -#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE |
---|
| 4002 | +#ifdef CONFIG_TRANSPARENT_HUGEPAGE |
---|
3804 | 4003 | if (has_transparent_hugepage() && shmem_huge > SHMEM_HUGE_DENY) |
---|
3805 | 4004 | SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge; |
---|
3806 | 4005 | else |
---|
.. | .. |
---|
3816 | 4015 | return error; |
---|
3817 | 4016 | } |
---|
3818 | 4017 | |
---|
3819 | | -#if defined(CONFIG_TRANSPARENT_HUGE_PAGECACHE) && defined(CONFIG_SYSFS) |
---|
| 4018 | +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_SYSFS) |
---|
3820 | 4019 | static ssize_t shmem_enabled_show(struct kobject *kobj, |
---|
3821 | 4020 | struct kobj_attribute *attr, char *buf) |
---|
3822 | 4021 | { |
---|
3823 | | - int values[] = { |
---|
| 4022 | + static const int values[] = { |
---|
3824 | 4023 | SHMEM_HUGE_ALWAYS, |
---|
3825 | 4024 | SHMEM_HUGE_WITHIN_SIZE, |
---|
3826 | 4025 | SHMEM_HUGE_ADVISE, |
---|
.. | .. |
---|
3868 | 4067 | |
---|
3869 | 4068 | struct kobj_attribute shmem_enabled_attr = |
---|
3870 | 4069 | __ATTR(shmem_enabled, 0644, shmem_enabled_show, shmem_enabled_store); |
---|
3871 | | -#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE && CONFIG_SYSFS */ |
---|
| 4070 | +#endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_SYSFS */ |
---|
3872 | 4071 | |
---|
3873 | | -#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE |
---|
| 4072 | +#ifdef CONFIG_TRANSPARENT_HUGEPAGE |
---|
3874 | 4073 | bool shmem_huge_enabled(struct vm_area_struct *vma) |
---|
3875 | 4074 | { |
---|
3876 | 4075 | struct inode *inode = file_inode(vma->vm_file); |
---|
.. | .. |
---|
3878 | 4077 | loff_t i_size; |
---|
3879 | 4078 | pgoff_t off; |
---|
3880 | 4079 | |
---|
| 4080 | + if (!transhuge_vma_enabled(vma, vma->vm_flags)) |
---|
| 4081 | + return false; |
---|
3881 | 4082 | if (shmem_huge == SHMEM_HUGE_FORCE) |
---|
3882 | 4083 | return true; |
---|
3883 | 4084 | if (shmem_huge == SHMEM_HUGE_DENY) |
---|
.. | .. |
---|
3893 | 4094 | if (i_size >= HPAGE_PMD_SIZE && |
---|
3894 | 4095 | i_size >> PAGE_SHIFT >= off) |
---|
3895 | 4096 | return true; |
---|
3896 | | - /* fall through */ |
---|
| 4097 | + fallthrough; |
---|
3897 | 4098 | case SHMEM_HUGE_ADVISE: |
---|
3898 | 4099 | /* TODO: implement fadvise() hints */ |
---|
3899 | 4100 | return (vma->vm_flags & VM_HUGEPAGE); |
---|
.. | .. |
---|
3902 | 4103 | return false; |
---|
3903 | 4104 | } |
---|
3904 | 4105 | } |
---|
3905 | | -#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE */ |
---|
| 4106 | +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
---|
3906 | 4107 | |
---|
3907 | 4108 | #else /* !CONFIG_SHMEM */ |
---|
3908 | 4109 | |
---|
.. | .. |
---|
3917 | 4118 | |
---|
3918 | 4119 | static struct file_system_type shmem_fs_type = { |
---|
3919 | 4120 | .name = "tmpfs", |
---|
3920 | | - .mount = ramfs_mount, |
---|
3921 | | - .kill_sb = kill_litter_super, |
---|
| 4121 | + .init_fs_context = ramfs_init_fs_context, |
---|
| 4122 | + .parameters = ramfs_fs_parameters, |
---|
| 4123 | + .kill_sb = ramfs_kill_sb, |
---|
3922 | 4124 | .fs_flags = FS_USERNS_MOUNT, |
---|
3923 | 4125 | }; |
---|
3924 | 4126 | |
---|
.. | .. |
---|
3932 | 4134 | return 0; |
---|
3933 | 4135 | } |
---|
3934 | 4136 | |
---|
3935 | | -int shmem_unuse(swp_entry_t swap, struct page *page) |
---|
| 4137 | +int shmem_unuse(unsigned int type, bool frontswap, |
---|
| 4138 | + unsigned long *fs_pages_to_unuse) |
---|
3936 | 4139 | { |
---|
3937 | 4140 | return 0; |
---|
3938 | 4141 | } |
---|
.. | .. |
---|
4047 | 4250 | |
---|
4048 | 4251 | /** |
---|
4049 | 4252 | * shmem_zero_setup - setup a shared anonymous mapping |
---|
4050 | | - * @vma: the vma to be mmapped is prepared by do_mmap_pgoff |
---|
| 4253 | + * @vma: the vma to be mmapped is prepared by do_mmap |
---|
4051 | 4254 | */ |
---|
4052 | 4255 | int shmem_zero_setup(struct vm_area_struct *vma) |
---|
4053 | 4256 | { |
---|
.. | .. |
---|
4055 | 4258 | loff_t size = vma->vm_end - vma->vm_start; |
---|
4056 | 4259 | |
---|
4057 | 4260 | /* |
---|
4058 | | - * Cloning a new file under mmap_sem leads to a lock ordering conflict |
---|
| 4261 | + * Cloning a new file under mmap_lock leads to a lock ordering conflict |
---|
4059 | 4262 | * between XFS directory reading and selinux: since this file is only |
---|
4060 | 4263 | * accessible to the user through its mapping, use S_PRIVATE flag to |
---|
4061 | 4264 | * bypass file security, in the same way as shmem_kernel_file_setup(). |
---|
.. | .. |
---|
4069 | 4272 | vma->vm_file = file; |
---|
4070 | 4273 | vma->vm_ops = &shmem_vm_ops; |
---|
4071 | 4274 | |
---|
4072 | | - if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE) && |
---|
| 4275 | + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && |
---|
4073 | 4276 | ((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) < |
---|
4074 | 4277 | (vma->vm_end & HPAGE_PMD_MASK)) { |
---|
4075 | 4278 | khugepaged_enter(vma, vma->vm_flags); |
---|
.. | .. |
---|
4117 | 4320 | #endif |
---|
4118 | 4321 | } |
---|
4119 | 4322 | EXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp); |
---|
| 4323 | + |
---|
| 4324 | +void shmem_mark_page_lazyfree(struct page *page, bool tail) |
---|
| 4325 | +{ |
---|
| 4326 | + mark_page_lazyfree_movetail(page, tail); |
---|
| 4327 | +} |
---|
| 4328 | +EXPORT_SYMBOL_GPL(shmem_mark_page_lazyfree); |
---|
| 4329 | + |
---|
| 4330 | +int reclaim_shmem_address_space(struct address_space *mapping) |
---|
| 4331 | +{ |
---|
| 4332 | +#ifdef CONFIG_SHMEM |
---|
| 4333 | + pgoff_t start = 0; |
---|
| 4334 | + struct page *page; |
---|
| 4335 | + LIST_HEAD(page_list); |
---|
| 4336 | + XA_STATE(xas, &mapping->i_pages, start); |
---|
| 4337 | + |
---|
| 4338 | + if (!shmem_mapping(mapping)) |
---|
| 4339 | + return -EINVAL; |
---|
| 4340 | + |
---|
| 4341 | + lru_add_drain(); |
---|
| 4342 | + |
---|
| 4343 | + rcu_read_lock(); |
---|
| 4344 | + xas_for_each(&xas, page, ULONG_MAX) { |
---|
| 4345 | + if (xas_retry(&xas, page)) |
---|
| 4346 | + continue; |
---|
| 4347 | + if (xa_is_value(page)) |
---|
| 4348 | + continue; |
---|
| 4349 | + if (isolate_lru_page(page)) |
---|
| 4350 | + continue; |
---|
| 4351 | + |
---|
| 4352 | + list_add(&page->lru, &page_list); |
---|
| 4353 | + |
---|
| 4354 | + if (need_resched()) { |
---|
| 4355 | + xas_pause(&xas); |
---|
| 4356 | + cond_resched_rcu(); |
---|
| 4357 | + } |
---|
| 4358 | + } |
---|
| 4359 | + rcu_read_unlock(); |
---|
| 4360 | + |
---|
| 4361 | + return reclaim_pages(&page_list); |
---|
| 4362 | +#else |
---|
| 4363 | + return 0; |
---|
| 4364 | +#endif |
---|
| 4365 | +} |
---|
| 4366 | +EXPORT_SYMBOL_GPL(reclaim_shmem_address_space); |
---|