.. | .. |
---|
36 | 36 | #include <linux/uio.h> |
---|
37 | 37 | #include <linux/khugepaged.h> |
---|
38 | 38 | #include <linux/hugetlb.h> |
---|
| 39 | +#include <linux/frontswap.h> |
---|
| 40 | +#include <linux/fs_parser.h> |
---|
| 41 | +#include <linux/mm_inline.h> |
---|
39 | 42 | |
---|
40 | 43 | #include <asm/tlbflush.h> /* for arch/microblaze update_mmu_cache() */ |
---|
| 44 | + |
---|
| 45 | +#include "internal.h" |
---|
| 46 | + |
---|
| 47 | +#undef CREATE_TRACE_POINTS |
---|
| 48 | +#include <trace/hooks/shmem_fs.h> |
---|
| 49 | +#include <trace/hooks/mm.h> |
---|
41 | 50 | |
---|
42 | 51 | static struct vfsmount *shm_mnt; |
---|
43 | 52 | |
---|
.. | .. |
---|
80 | 89 | #include <linux/uuid.h> |
---|
81 | 90 | |
---|
82 | 91 | #include <linux/uaccess.h> |
---|
83 | | -#include <asm/pgtable.h> |
---|
84 | 92 | |
---|
85 | 93 | #include "internal.h" |
---|
86 | 94 | |
---|
.. | .. |
---|
106 | 114 | pgoff_t nr_unswapped; /* how often writepage refused to swap out */ |
---|
107 | 115 | }; |
---|
108 | 116 | |
---|
| 117 | +struct shmem_options { |
---|
| 118 | + unsigned long long blocks; |
---|
| 119 | + unsigned long long inodes; |
---|
| 120 | + struct mempolicy *mpol; |
---|
| 121 | + kuid_t uid; |
---|
| 122 | + kgid_t gid; |
---|
| 123 | + umode_t mode; |
---|
| 124 | + bool full_inums; |
---|
| 125 | + int huge; |
---|
| 126 | + int seen; |
---|
| 127 | +#define SHMEM_SEEN_BLOCKS 1 |
---|
| 128 | +#define SHMEM_SEEN_INODES 2 |
---|
| 129 | +#define SHMEM_SEEN_HUGE 4 |
---|
| 130 | +#define SHMEM_SEEN_INUMS 8 |
---|
| 131 | +}; |
---|
| 132 | + |
---|
109 | 133 | #ifdef CONFIG_TMPFS |
---|
110 | 134 | static unsigned long shmem_default_max_blocks(void) |
---|
111 | 135 | { |
---|
112 | | - return totalram_pages / 2; |
---|
| 136 | + return totalram_pages() / 2; |
---|
113 | 137 | } |
---|
114 | 138 | |
---|
115 | 139 | static unsigned long shmem_default_max_inodes(void) |
---|
116 | 140 | { |
---|
117 | | - return min(totalram_pages - totalhigh_pages, totalram_pages / 2); |
---|
| 141 | + unsigned long nr_pages = totalram_pages(); |
---|
| 142 | + |
---|
| 143 | + return min(nr_pages - totalhigh_pages(), nr_pages / 2); |
---|
118 | 144 | } |
---|
119 | 145 | #endif |
---|
120 | 146 | |
---|
121 | 147 | static bool shmem_should_replace_page(struct page *page, gfp_t gfp); |
---|
122 | 148 | static int shmem_replace_page(struct page **pagep, gfp_t gfp, |
---|
123 | 149 | struct shmem_inode_info *info, pgoff_t index); |
---|
| 150 | +static int shmem_swapin_page(struct inode *inode, pgoff_t index, |
---|
| 151 | + struct page **pagep, enum sgp_type sgp, |
---|
| 152 | + gfp_t gfp, struct vm_area_struct *vma, |
---|
| 153 | + vm_fault_t *fault_type); |
---|
124 | 154 | static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, |
---|
125 | 155 | struct page **pagep, enum sgp_type sgp, |
---|
126 | 156 | gfp_t gfp, struct vm_area_struct *vma, |
---|
.. | .. |
---|
239 | 269 | static LIST_HEAD(shmem_swaplist); |
---|
240 | 270 | static DEFINE_MUTEX(shmem_swaplist_mutex); |
---|
241 | 271 | |
---|
242 | | -static int shmem_reserve_inode(struct super_block *sb) |
---|
| 272 | +/* |
---|
| 273 | + * shmem_reserve_inode() performs bookkeeping to reserve a shmem inode, and |
---|
| 274 | + * produces a novel ino for the newly allocated inode. |
---|
| 275 | + * |
---|
| 276 | + * It may also be called when making a hard link to permit the space needed by |
---|
| 277 | + * each dentry. However, in that case, no new inode number is needed since that |
---|
| 278 | + * internally draws from another pool of inode numbers (currently global |
---|
| 279 | + * get_next_ino()). This case is indicated by passing NULL as inop. |
---|
| 280 | + */ |
---|
| 281 | +#define SHMEM_INO_BATCH 1024 |
---|
| 282 | +static int shmem_reserve_inode(struct super_block *sb, ino_t *inop) |
---|
243 | 283 | { |
---|
244 | 284 | struct shmem_sb_info *sbinfo = SHMEM_SB(sb); |
---|
245 | | - if (sbinfo->max_inodes) { |
---|
246 | | - spin_lock(&sbinfo->stat_lock); |
---|
247 | | - if (!sbinfo->free_inodes) { |
---|
248 | | - spin_unlock(&sbinfo->stat_lock); |
---|
249 | | - return -ENOSPC; |
---|
| 285 | + ino_t ino; |
---|
| 286 | + |
---|
| 287 | + if (!(sb->s_flags & SB_KERNMOUNT)) { |
---|
| 288 | + raw_spin_lock(&sbinfo->stat_lock); |
---|
| 289 | + if (sbinfo->max_inodes) { |
---|
| 290 | + if (!sbinfo->free_inodes) { |
---|
| 291 | + raw_spin_unlock(&sbinfo->stat_lock); |
---|
| 292 | + return -ENOSPC; |
---|
| 293 | + } |
---|
| 294 | + sbinfo->free_inodes--; |
---|
250 | 295 | } |
---|
251 | | - sbinfo->free_inodes--; |
---|
252 | | - spin_unlock(&sbinfo->stat_lock); |
---|
| 296 | + if (inop) { |
---|
| 297 | + ino = sbinfo->next_ino++; |
---|
| 298 | + if (unlikely(is_zero_ino(ino))) |
---|
| 299 | + ino = sbinfo->next_ino++; |
---|
| 300 | + if (unlikely(!sbinfo->full_inums && |
---|
| 301 | + ino > UINT_MAX)) { |
---|
| 302 | + /* |
---|
| 303 | + * Emulate get_next_ino uint wraparound for |
---|
| 304 | + * compatibility |
---|
| 305 | + */ |
---|
| 306 | + if (IS_ENABLED(CONFIG_64BIT)) |
---|
| 307 | + pr_warn("%s: inode number overflow on device %d, consider using inode64 mount option\n", |
---|
| 308 | + __func__, MINOR(sb->s_dev)); |
---|
| 309 | + sbinfo->next_ino = 1; |
---|
| 310 | + ino = sbinfo->next_ino++; |
---|
| 311 | + } |
---|
| 312 | + *inop = ino; |
---|
| 313 | + } |
---|
| 314 | + raw_spin_unlock(&sbinfo->stat_lock); |
---|
| 315 | + } else if (inop) { |
---|
| 316 | + /* |
---|
| 317 | + * __shmem_file_setup, one of our callers, is lock-free: it |
---|
| 318 | + * doesn't hold stat_lock in shmem_reserve_inode since |
---|
| 319 | + * max_inodes is always 0, and is called from potentially |
---|
| 320 | + * unknown contexts. As such, use a per-cpu batched allocator |
---|
| 321 | + * which doesn't require the per-sb stat_lock unless we are at |
---|
| 322 | + * the batch boundary. |
---|
| 323 | + * |
---|
| 324 | + * We don't need to worry about inode{32,64} since SB_KERNMOUNT |
---|
| 325 | + * shmem mounts are not exposed to userspace, so we don't need |
---|
| 326 | + * to worry about things like glibc compatibility. |
---|
| 327 | + */ |
---|
| 328 | + ino_t *next_ino; |
---|
| 329 | + |
---|
| 330 | + next_ino = per_cpu_ptr(sbinfo->ino_batch, get_cpu()); |
---|
| 331 | + ino = *next_ino; |
---|
| 332 | + if (unlikely(ino % SHMEM_INO_BATCH == 0)) { |
---|
| 333 | + raw_spin_lock(&sbinfo->stat_lock); |
---|
| 334 | + ino = sbinfo->next_ino; |
---|
| 335 | + sbinfo->next_ino += SHMEM_INO_BATCH; |
---|
| 336 | + raw_spin_unlock(&sbinfo->stat_lock); |
---|
| 337 | + if (unlikely(is_zero_ino(ino))) |
---|
| 338 | + ino++; |
---|
| 339 | + } |
---|
| 340 | + *inop = ino; |
---|
| 341 | + *next_ino = ++ino; |
---|
| 342 | + put_cpu(); |
---|
253 | 343 | } |
---|
| 344 | + |
---|
254 | 345 | return 0; |
---|
255 | 346 | } |
---|
256 | 347 | |
---|
.. | .. |
---|
258 | 349 | { |
---|
259 | 350 | struct shmem_sb_info *sbinfo = SHMEM_SB(sb); |
---|
260 | 351 | if (sbinfo->max_inodes) { |
---|
261 | | - spin_lock(&sbinfo->stat_lock); |
---|
| 352 | + raw_spin_lock(&sbinfo->stat_lock); |
---|
262 | 353 | sbinfo->free_inodes++; |
---|
263 | | - spin_unlock(&sbinfo->stat_lock); |
---|
| 354 | + raw_spin_unlock(&sbinfo->stat_lock); |
---|
264 | 355 | } |
---|
265 | 356 | } |
---|
266 | 357 | |
---|
.. | .. |
---|
326 | 417 | } |
---|
327 | 418 | |
---|
328 | 419 | /* |
---|
329 | | - * Replace item expected in radix tree by a new item, while holding tree lock. |
---|
| 420 | + * Replace item expected in xarray by a new item, while holding xa_lock. |
---|
330 | 421 | */ |
---|
331 | | -static int shmem_radix_tree_replace(struct address_space *mapping, |
---|
| 422 | +static int shmem_replace_entry(struct address_space *mapping, |
---|
332 | 423 | pgoff_t index, void *expected, void *replacement) |
---|
333 | 424 | { |
---|
334 | | - struct radix_tree_node *node; |
---|
335 | | - void __rcu **pslot; |
---|
| 425 | + XA_STATE(xas, &mapping->i_pages, index); |
---|
336 | 426 | void *item; |
---|
337 | 427 | |
---|
338 | 428 | VM_BUG_ON(!expected); |
---|
339 | 429 | VM_BUG_ON(!replacement); |
---|
340 | | - item = __radix_tree_lookup(&mapping->i_pages, index, &node, &pslot); |
---|
341 | | - if (!item) |
---|
342 | | - return -ENOENT; |
---|
| 430 | + item = xas_load(&xas); |
---|
343 | 431 | if (item != expected) |
---|
344 | 432 | return -ENOENT; |
---|
345 | | - __radix_tree_replace(&mapping->i_pages, node, pslot, |
---|
346 | | - replacement, NULL); |
---|
| 433 | + xas_store(&xas, replacement); |
---|
347 | 434 | return 0; |
---|
348 | 435 | } |
---|
349 | 436 | |
---|
.. | .. |
---|
357 | 444 | static bool shmem_confirm_swap(struct address_space *mapping, |
---|
358 | 445 | pgoff_t index, swp_entry_t swap) |
---|
359 | 446 | { |
---|
360 | | - void *item; |
---|
361 | | - |
---|
362 | | - rcu_read_lock(); |
---|
363 | | - item = radix_tree_lookup(&mapping->i_pages, index); |
---|
364 | | - rcu_read_unlock(); |
---|
365 | | - return item == swp_to_radix_entry(swap); |
---|
| 447 | + return xa_load(&mapping->i_pages, index) == swp_to_radix_entry(swap); |
---|
366 | 448 | } |
---|
367 | 449 | |
---|
368 | 450 | /* |
---|
.. | .. |
---|
397 | 479 | #define SHMEM_HUGE_DENY (-1) |
---|
398 | 480 | #define SHMEM_HUGE_FORCE (-2) |
---|
399 | 481 | |
---|
400 | | -#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE |
---|
| 482 | +#ifdef CONFIG_TRANSPARENT_HUGEPAGE |
---|
401 | 483 | /* ifdef here to avoid bloating shmem.o when not necessary */ |
---|
402 | 484 | |
---|
403 | 485 | static int shmem_huge __read_mostly; |
---|
404 | 486 | |
---|
405 | | -#if defined(CONFIG_SYSFS) || defined(CONFIG_TMPFS) |
---|
| 487 | +#if defined(CONFIG_SYSFS) |
---|
406 | 488 | static int shmem_parse_huge(const char *str) |
---|
407 | 489 | { |
---|
408 | 490 | if (!strcmp(str, "never")) |
---|
.. | .. |
---|
419 | 501 | return SHMEM_HUGE_FORCE; |
---|
420 | 502 | return -EINVAL; |
---|
421 | 503 | } |
---|
| 504 | +#endif |
---|
422 | 505 | |
---|
| 506 | +#if defined(CONFIG_SYSFS) || defined(CONFIG_TMPFS) |
---|
423 | 507 | static const char *shmem_format_huge(int huge) |
---|
424 | 508 | { |
---|
425 | 509 | switch (huge) { |
---|
.. | .. |
---|
570 | 654 | struct shmem_sb_info *sbinfo = SHMEM_SB(sb); |
---|
571 | 655 | return READ_ONCE(sbinfo->shrinklist_len); |
---|
572 | 656 | } |
---|
573 | | -#else /* !CONFIG_TRANSPARENT_HUGE_PAGECACHE */ |
---|
| 657 | +#else /* !CONFIG_TRANSPARENT_HUGEPAGE */ |
---|
574 | 658 | |
---|
575 | 659 | #define shmem_huge SHMEM_HUGE_DENY |
---|
576 | 660 | |
---|
.. | .. |
---|
579 | 663 | { |
---|
580 | 664 | return 0; |
---|
581 | 665 | } |
---|
582 | | -#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE */ |
---|
| 666 | +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
---|
583 | 667 | |
---|
584 | 668 | static inline bool is_huge_enabled(struct shmem_sb_info *sbinfo) |
---|
585 | 669 | { |
---|
586 | | - if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE) && |
---|
| 670 | + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && |
---|
587 | 671 | (shmem_huge == SHMEM_HUGE_FORCE || sbinfo->huge) && |
---|
588 | 672 | shmem_huge != SHMEM_HUGE_DENY) |
---|
589 | 673 | return true; |
---|
.. | .. |
---|
595 | 679 | */ |
---|
596 | 680 | static int shmem_add_to_page_cache(struct page *page, |
---|
597 | 681 | struct address_space *mapping, |
---|
598 | | - pgoff_t index, void *expected) |
---|
| 682 | + pgoff_t index, void *expected, gfp_t gfp, |
---|
| 683 | + struct mm_struct *charge_mm) |
---|
599 | 684 | { |
---|
600 | | - int error, nr = hpage_nr_pages(page); |
---|
| 685 | + XA_STATE_ORDER(xas, &mapping->i_pages, index, compound_order(page)); |
---|
| 686 | + unsigned long i = 0; |
---|
| 687 | + unsigned long nr = compound_nr(page); |
---|
| 688 | + int error; |
---|
601 | 689 | |
---|
602 | 690 | VM_BUG_ON_PAGE(PageTail(page), page); |
---|
603 | 691 | VM_BUG_ON_PAGE(index != round_down(index, nr), page); |
---|
.. | .. |
---|
609 | 697 | page->mapping = mapping; |
---|
610 | 698 | page->index = index; |
---|
611 | 699 | |
---|
612 | | - xa_lock_irq(&mapping->i_pages); |
---|
613 | | - if (PageTransHuge(page)) { |
---|
614 | | - void __rcu **results; |
---|
615 | | - pgoff_t idx; |
---|
616 | | - int i; |
---|
617 | | - |
---|
618 | | - error = 0; |
---|
619 | | - if (radix_tree_gang_lookup_slot(&mapping->i_pages, |
---|
620 | | - &results, &idx, index, 1) && |
---|
621 | | - idx < index + HPAGE_PMD_NR) { |
---|
622 | | - error = -EEXIST; |
---|
623 | | - } |
---|
624 | | - |
---|
625 | | - if (!error) { |
---|
626 | | - for (i = 0; i < HPAGE_PMD_NR; i++) { |
---|
627 | | - error = radix_tree_insert(&mapping->i_pages, |
---|
628 | | - index + i, page + i); |
---|
629 | | - VM_BUG_ON(error); |
---|
| 700 | + if (!PageSwapCache(page)) { |
---|
| 701 | + error = mem_cgroup_charge(page, charge_mm, gfp); |
---|
| 702 | + if (error) { |
---|
| 703 | + if (PageTransHuge(page)) { |
---|
| 704 | + count_vm_event(THP_FILE_FALLBACK); |
---|
| 705 | + count_vm_event(THP_FILE_FALLBACK_CHARGE); |
---|
630 | 706 | } |
---|
631 | | - count_vm_event(THP_FILE_ALLOC); |
---|
| 707 | + goto error; |
---|
632 | 708 | } |
---|
633 | | - } else if (!expected) { |
---|
634 | | - error = radix_tree_insert(&mapping->i_pages, index, page); |
---|
635 | | - } else { |
---|
636 | | - error = shmem_radix_tree_replace(mapping, index, expected, |
---|
637 | | - page); |
---|
| 709 | + } |
---|
| 710 | + cgroup_throttle_swaprate(page, gfp); |
---|
| 711 | + |
---|
| 712 | + do { |
---|
| 713 | + void *entry; |
---|
| 714 | + xas_lock_irq(&xas); |
---|
| 715 | + entry = xas_find_conflict(&xas); |
---|
| 716 | + if (entry != expected) |
---|
| 717 | + xas_set_err(&xas, -EEXIST); |
---|
| 718 | + xas_create_range(&xas); |
---|
| 719 | + if (xas_error(&xas)) |
---|
| 720 | + goto unlock; |
---|
| 721 | +next: |
---|
| 722 | + xas_store(&xas, page); |
---|
| 723 | + if (++i < nr) { |
---|
| 724 | + xas_next(&xas); |
---|
| 725 | + goto next; |
---|
| 726 | + } |
---|
| 727 | + if (PageTransHuge(page)) { |
---|
| 728 | + count_vm_event(THP_FILE_ALLOC); |
---|
| 729 | + __inc_node_page_state(page, NR_SHMEM_THPS); |
---|
| 730 | + } |
---|
| 731 | + mapping->nrpages += nr; |
---|
| 732 | + __mod_lruvec_page_state(page, NR_FILE_PAGES, nr); |
---|
| 733 | + __mod_lruvec_page_state(page, NR_SHMEM, nr); |
---|
| 734 | +unlock: |
---|
| 735 | + xas_unlock_irq(&xas); |
---|
| 736 | + } while (xas_nomem(&xas, gfp)); |
---|
| 737 | + |
---|
| 738 | + if (xas_error(&xas)) { |
---|
| 739 | + error = xas_error(&xas); |
---|
| 740 | + goto error; |
---|
638 | 741 | } |
---|
639 | 742 | |
---|
640 | | - if (!error) { |
---|
641 | | - mapping->nrpages += nr; |
---|
642 | | - if (PageTransHuge(page)) |
---|
643 | | - __inc_node_page_state(page, NR_SHMEM_THPS); |
---|
644 | | - __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr); |
---|
645 | | - __mod_node_page_state(page_pgdat(page), NR_SHMEM, nr); |
---|
646 | | - xa_unlock_irq(&mapping->i_pages); |
---|
647 | | - } else { |
---|
648 | | - page->mapping = NULL; |
---|
649 | | - xa_unlock_irq(&mapping->i_pages); |
---|
650 | | - page_ref_sub(page, nr); |
---|
651 | | - } |
---|
| 743 | + return 0; |
---|
| 744 | +error: |
---|
| 745 | + page->mapping = NULL; |
---|
| 746 | + page_ref_sub(page, nr); |
---|
652 | 747 | return error; |
---|
653 | 748 | } |
---|
654 | 749 | |
---|
.. | .. |
---|
663 | 758 | VM_BUG_ON_PAGE(PageCompound(page), page); |
---|
664 | 759 | |
---|
665 | 760 | xa_lock_irq(&mapping->i_pages); |
---|
666 | | - error = shmem_radix_tree_replace(mapping, page->index, page, radswap); |
---|
| 761 | + error = shmem_replace_entry(mapping, page->index, page, radswap); |
---|
667 | 762 | page->mapping = NULL; |
---|
668 | 763 | mapping->nrpages--; |
---|
669 | | - __dec_node_page_state(page, NR_FILE_PAGES); |
---|
670 | | - __dec_node_page_state(page, NR_SHMEM); |
---|
| 764 | + __dec_lruvec_page_state(page, NR_FILE_PAGES); |
---|
| 765 | + __dec_lruvec_page_state(page, NR_SHMEM); |
---|
671 | 766 | xa_unlock_irq(&mapping->i_pages); |
---|
672 | 767 | put_page(page); |
---|
673 | 768 | BUG_ON(error); |
---|
674 | 769 | } |
---|
675 | 770 | |
---|
676 | 771 | /* |
---|
677 | | - * Remove swap entry from radix tree, free the swap and its page cache. |
---|
| 772 | + * Remove swap entry from page cache, free the swap and its page cache. |
---|
678 | 773 | */ |
---|
679 | 774 | static int shmem_free_swap(struct address_space *mapping, |
---|
680 | 775 | pgoff_t index, void *radswap) |
---|
681 | 776 | { |
---|
682 | 777 | void *old; |
---|
683 | 778 | |
---|
684 | | - xa_lock_irq(&mapping->i_pages); |
---|
685 | | - old = radix_tree_delete_item(&mapping->i_pages, index, radswap); |
---|
686 | | - xa_unlock_irq(&mapping->i_pages); |
---|
| 779 | + old = xa_cmpxchg_irq(&mapping->i_pages, index, radswap, NULL, 0); |
---|
687 | 780 | if (old != radswap) |
---|
688 | 781 | return -ENOENT; |
---|
689 | 782 | free_swap_and_cache(radix_to_swp_entry(radswap)); |
---|
.. | .. |
---|
700 | 793 | unsigned long shmem_partial_swap_usage(struct address_space *mapping, |
---|
701 | 794 | pgoff_t start, pgoff_t end) |
---|
702 | 795 | { |
---|
703 | | - struct radix_tree_iter iter; |
---|
704 | | - void __rcu **slot; |
---|
| 796 | + XA_STATE(xas, &mapping->i_pages, start); |
---|
705 | 797 | struct page *page; |
---|
706 | 798 | unsigned long swapped = 0; |
---|
707 | 799 | |
---|
708 | 800 | rcu_read_lock(); |
---|
709 | | - |
---|
710 | | - radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) { |
---|
711 | | - if (iter.index >= end) |
---|
712 | | - break; |
---|
713 | | - |
---|
714 | | - page = radix_tree_deref_slot(slot); |
---|
715 | | - |
---|
716 | | - if (radix_tree_deref_retry(page)) { |
---|
717 | | - slot = radix_tree_iter_retry(&iter); |
---|
| 801 | + xas_for_each(&xas, page, end - 1) { |
---|
| 802 | + if (xas_retry(&xas, page)) |
---|
718 | 803 | continue; |
---|
719 | | - } |
---|
720 | | - |
---|
721 | | - if (radix_tree_exceptional_entry(page)) |
---|
| 804 | + if (xa_is_value(page)) |
---|
722 | 805 | swapped++; |
---|
723 | 806 | |
---|
724 | 807 | if (need_resched()) { |
---|
725 | | - slot = radix_tree_iter_resume(slot, &iter); |
---|
| 808 | + xas_pause(&xas); |
---|
726 | 809 | cond_resched_rcu(); |
---|
727 | 810 | } |
---|
728 | 811 | } |
---|
.. | .. |
---|
797 | 880 | } |
---|
798 | 881 | |
---|
799 | 882 | /* |
---|
800 | | - * Remove range of pages and swap entries from radix tree, and free them. |
---|
| 883 | + * Check whether a hole-punch or truncation needs to split a huge page, |
---|
| 884 | + * returning true if no split was required, or the split has been successful. |
---|
| 885 | + * |
---|
| 886 | + * Eviction (or truncation to 0 size) should never need to split a huge page; |
---|
| 887 | + * but in rare cases might do so, if shmem_undo_range() failed to trylock on |
---|
| 888 | + * head, and then succeeded to trylock on tail. |
---|
| 889 | + * |
---|
| 890 | + * A split can only succeed when there are no additional references on the |
---|
| 891 | + * huge page: so the split below relies upon find_get_entries() having stopped |
---|
| 892 | + * when it found a subpage of the huge page, without getting further references. |
---|
| 893 | + */ |
---|
| 894 | +static bool shmem_punch_compound(struct page *page, pgoff_t start, pgoff_t end) |
---|
| 895 | +{ |
---|
| 896 | + if (!PageTransCompound(page)) |
---|
| 897 | + return true; |
---|
| 898 | + |
---|
| 899 | + /* Just proceed to delete a huge page wholly within the range punched */ |
---|
| 900 | + if (PageHead(page) && |
---|
| 901 | + page->index >= start && page->index + HPAGE_PMD_NR <= end) |
---|
| 902 | + return true; |
---|
| 903 | + |
---|
| 904 | + /* Try to split huge page, so we can truly punch the hole or truncate */ |
---|
| 905 | + return split_huge_page(page) >= 0; |
---|
| 906 | +} |
---|
| 907 | + |
---|
| 908 | +/* |
---|
| 909 | + * Remove range of pages and swap entries from page cache, and free them. |
---|
801 | 910 | * If !unfalloc, truncate or punch hole; if unfalloc, undo failed fallocate. |
---|
802 | 911 | */ |
---|
803 | 912 | static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, |
---|
.. | .. |
---|
833 | 942 | if (index >= end) |
---|
834 | 943 | break; |
---|
835 | 944 | |
---|
836 | | - if (radix_tree_exceptional_entry(page)) { |
---|
| 945 | + if (xa_is_value(page)) { |
---|
837 | 946 | if (unfalloc) |
---|
838 | 947 | continue; |
---|
839 | 948 | nr_swaps_freed += !shmem_free_swap(mapping, |
---|
.. | .. |
---|
846 | 955 | if (!trylock_page(page)) |
---|
847 | 956 | continue; |
---|
848 | 957 | |
---|
849 | | - if (PageTransTail(page)) { |
---|
850 | | - /* Middle of THP: zero out the page */ |
---|
851 | | - clear_highpage(page); |
---|
852 | | - unlock_page(page); |
---|
853 | | - continue; |
---|
854 | | - } else if (PageTransHuge(page)) { |
---|
855 | | - if (index == round_down(end, HPAGE_PMD_NR)) { |
---|
856 | | - /* |
---|
857 | | - * Range ends in the middle of THP: |
---|
858 | | - * zero out the page |
---|
859 | | - */ |
---|
860 | | - clear_highpage(page); |
---|
861 | | - unlock_page(page); |
---|
862 | | - continue; |
---|
863 | | - } |
---|
864 | | - index += HPAGE_PMD_NR - 1; |
---|
865 | | - i += HPAGE_PMD_NR - 1; |
---|
866 | | - } |
---|
867 | | - |
---|
868 | | - if (!unfalloc || !PageUptodate(page)) { |
---|
869 | | - VM_BUG_ON_PAGE(PageTail(page), page); |
---|
870 | | - if (page_mapping(page) == mapping) { |
---|
871 | | - VM_BUG_ON_PAGE(PageWriteback(page), page); |
---|
| 958 | + if ((!unfalloc || !PageUptodate(page)) && |
---|
| 959 | + page_mapping(page) == mapping) { |
---|
| 960 | + VM_BUG_ON_PAGE(PageWriteback(page), page); |
---|
| 961 | + if (shmem_punch_compound(page, start, end)) |
---|
872 | 962 | truncate_inode_page(mapping, page); |
---|
873 | | - } |
---|
874 | 963 | } |
---|
875 | 964 | unlock_page(page); |
---|
876 | 965 | } |
---|
.. | .. |
---|
930 | 1019 | if (index >= end) |
---|
931 | 1020 | break; |
---|
932 | 1021 | |
---|
933 | | - if (radix_tree_exceptional_entry(page)) { |
---|
| 1022 | + if (xa_is_value(page)) { |
---|
934 | 1023 | if (unfalloc) |
---|
935 | 1024 | continue; |
---|
936 | 1025 | if (shmem_free_swap(mapping, index, page)) { |
---|
.. | .. |
---|
944 | 1033 | |
---|
945 | 1034 | lock_page(page); |
---|
946 | 1035 | |
---|
947 | | - if (PageTransTail(page)) { |
---|
948 | | - /* Middle of THP: zero out the page */ |
---|
949 | | - clear_highpage(page); |
---|
950 | | - unlock_page(page); |
---|
951 | | - /* |
---|
952 | | - * Partial thp truncate due 'start' in middle |
---|
953 | | - * of THP: don't need to look on these pages |
---|
954 | | - * again on !pvec.nr restart. |
---|
955 | | - */ |
---|
956 | | - if (index != round_down(end, HPAGE_PMD_NR)) |
---|
957 | | - start++; |
---|
958 | | - continue; |
---|
959 | | - } else if (PageTransHuge(page)) { |
---|
960 | | - if (index == round_down(end, HPAGE_PMD_NR)) { |
---|
961 | | - /* |
---|
962 | | - * Range ends in the middle of THP: |
---|
963 | | - * zero out the page |
---|
964 | | - */ |
---|
965 | | - clear_highpage(page); |
---|
966 | | - unlock_page(page); |
---|
967 | | - continue; |
---|
968 | | - } |
---|
969 | | - index += HPAGE_PMD_NR - 1; |
---|
970 | | - i += HPAGE_PMD_NR - 1; |
---|
971 | | - } |
---|
972 | | - |
---|
973 | 1036 | if (!unfalloc || !PageUptodate(page)) { |
---|
974 | | - VM_BUG_ON_PAGE(PageTail(page), page); |
---|
975 | | - if (page_mapping(page) == mapping) { |
---|
976 | | - VM_BUG_ON_PAGE(PageWriteback(page), page); |
---|
977 | | - truncate_inode_page(mapping, page); |
---|
978 | | - } else { |
---|
| 1037 | + if (page_mapping(page) != mapping) { |
---|
979 | 1038 | /* Page was replaced by swap: retry */ |
---|
980 | 1039 | unlock_page(page); |
---|
981 | 1040 | index--; |
---|
982 | 1041 | break; |
---|
| 1042 | + } |
---|
| 1043 | + VM_BUG_ON_PAGE(PageWriteback(page), page); |
---|
| 1044 | + if (shmem_punch_compound(page, start, end)) |
---|
| 1045 | + truncate_inode_page(mapping, page); |
---|
| 1046 | + else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { |
---|
| 1047 | + /* Wipe the page and don't get stuck */ |
---|
| 1048 | + clear_highpage(page); |
---|
| 1049 | + flush_dcache_page(page); |
---|
| 1050 | + set_page_dirty(page); |
---|
| 1051 | + if (index < |
---|
| 1052 | + round_up(start, HPAGE_PMD_NR)) |
---|
| 1053 | + start = index + 1; |
---|
983 | 1054 | } |
---|
984 | 1055 | } |
---|
985 | 1056 | unlock_page(page); |
---|
.. | .. |
---|
1067 | 1138 | * Part of the huge page can be beyond i_size: subject |
---|
1068 | 1139 | * to shrink under memory pressure. |
---|
1069 | 1140 | */ |
---|
1070 | | - if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) { |
---|
| 1141 | + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { |
---|
1071 | 1142 | spin_lock(&sbinfo->shrinklist_lock); |
---|
1072 | 1143 | /* |
---|
1073 | 1144 | * _careful to defend against unlocked access to |
---|
.. | .. |
---|
1106 | 1177 | } |
---|
1107 | 1178 | spin_unlock(&sbinfo->shrinklist_lock); |
---|
1108 | 1179 | } |
---|
1109 | | - if (!list_empty(&info->swaplist)) { |
---|
| 1180 | + while (!list_empty(&info->swaplist)) { |
---|
| 1181 | + /* Wait while shmem_unuse() is scanning this inode... */ |
---|
| 1182 | + wait_var_event(&info->stop_eviction, |
---|
| 1183 | + !atomic_read(&info->stop_eviction)); |
---|
1110 | 1184 | mutex_lock(&shmem_swaplist_mutex); |
---|
1111 | | - list_del_init(&info->swaplist); |
---|
| 1185 | + /* ...but beware of the race if we peeked too early */ |
---|
| 1186 | + if (!atomic_read(&info->stop_eviction)) |
---|
| 1187 | + list_del_init(&info->swaplist); |
---|
1112 | 1188 | mutex_unlock(&shmem_swaplist_mutex); |
---|
1113 | 1189 | } |
---|
1114 | 1190 | } |
---|
.. | .. |
---|
1119 | 1195 | clear_inode(inode); |
---|
1120 | 1196 | } |
---|
1121 | 1197 | |
---|
1122 | | -static unsigned long find_swap_entry(struct radix_tree_root *root, void *item) |
---|
| 1198 | +extern struct swap_info_struct *swap_info[]; |
---|
| 1199 | + |
---|
| 1200 | +static int shmem_find_swap_entries(struct address_space *mapping, |
---|
| 1201 | + pgoff_t start, unsigned int nr_entries, |
---|
| 1202 | + struct page **entries, pgoff_t *indices, |
---|
| 1203 | + unsigned int type, bool frontswap) |
---|
1123 | 1204 | { |
---|
1124 | | - struct radix_tree_iter iter; |
---|
1125 | | - void __rcu **slot; |
---|
1126 | | - unsigned long found = -1; |
---|
1127 | | - unsigned int checked = 0; |
---|
| 1205 | + XA_STATE(xas, &mapping->i_pages, start); |
---|
| 1206 | + struct page *page; |
---|
| 1207 | + swp_entry_t entry; |
---|
| 1208 | + unsigned int ret = 0; |
---|
| 1209 | + |
---|
| 1210 | + if (!nr_entries) |
---|
| 1211 | + return 0; |
---|
1128 | 1212 | |
---|
1129 | 1213 | rcu_read_lock(); |
---|
1130 | | - radix_tree_for_each_slot(slot, root, &iter, 0) { |
---|
1131 | | - void *entry = radix_tree_deref_slot(slot); |
---|
1132 | | - |
---|
1133 | | - if (radix_tree_deref_retry(entry)) { |
---|
1134 | | - slot = radix_tree_iter_retry(&iter); |
---|
| 1214 | + xas_for_each(&xas, page, ULONG_MAX) { |
---|
| 1215 | + if (xas_retry(&xas, page)) |
---|
1135 | 1216 | continue; |
---|
| 1217 | + |
---|
| 1218 | + if (!xa_is_value(page)) |
---|
| 1219 | + continue; |
---|
| 1220 | + |
---|
| 1221 | + entry = radix_to_swp_entry(page); |
---|
| 1222 | + if (swp_type(entry) != type) |
---|
| 1223 | + continue; |
---|
| 1224 | + if (frontswap && |
---|
| 1225 | + !frontswap_test(swap_info[type], swp_offset(entry))) |
---|
| 1226 | + continue; |
---|
| 1227 | + |
---|
| 1228 | + indices[ret] = xas.xa_index; |
---|
| 1229 | + entries[ret] = page; |
---|
| 1230 | + |
---|
| 1231 | + if (need_resched()) { |
---|
| 1232 | + xas_pause(&xas); |
---|
| 1233 | + cond_resched_rcu(); |
---|
1136 | 1234 | } |
---|
1137 | | - if (entry == item) { |
---|
1138 | | - found = iter.index; |
---|
| 1235 | + if (++ret == nr_entries) |
---|
1139 | 1236 | break; |
---|
1140 | | - } |
---|
1141 | | - checked++; |
---|
1142 | | - if ((checked % 4096) != 0) |
---|
1143 | | - continue; |
---|
1144 | | - slot = radix_tree_iter_resume(slot, &iter); |
---|
1145 | | - cond_resched_rcu(); |
---|
1146 | 1237 | } |
---|
1147 | | - |
---|
1148 | 1238 | rcu_read_unlock(); |
---|
1149 | | - return found; |
---|
| 1239 | + |
---|
| 1240 | + return ret; |
---|
| 1241 | +} |
---|
| 1242 | + |
---|
| 1243 | +/* |
---|
| 1244 | + * Move the swapped pages for an inode to page cache. Returns the count |
---|
| 1245 | + * of pages swapped in, or the error in case of failure. |
---|
| 1246 | + */ |
---|
| 1247 | +static int shmem_unuse_swap_entries(struct inode *inode, struct pagevec pvec, |
---|
| 1248 | + pgoff_t *indices) |
---|
| 1249 | +{ |
---|
| 1250 | + int i = 0; |
---|
| 1251 | + int ret = 0; |
---|
| 1252 | + int error = 0; |
---|
| 1253 | + struct address_space *mapping = inode->i_mapping; |
---|
| 1254 | + |
---|
| 1255 | + for (i = 0; i < pvec.nr; i++) { |
---|
| 1256 | + struct page *page = pvec.pages[i]; |
---|
| 1257 | + |
---|
| 1258 | + if (!xa_is_value(page)) |
---|
| 1259 | + continue; |
---|
| 1260 | + error = shmem_swapin_page(inode, indices[i], |
---|
| 1261 | + &page, SGP_CACHE, |
---|
| 1262 | + mapping_gfp_mask(mapping), |
---|
| 1263 | + NULL, NULL); |
---|
| 1264 | + if (error == 0) { |
---|
| 1265 | + unlock_page(page); |
---|
| 1266 | + put_page(page); |
---|
| 1267 | + ret++; |
---|
| 1268 | + } |
---|
| 1269 | + if (error == -ENOMEM) |
---|
| 1270 | + break; |
---|
| 1271 | + error = 0; |
---|
| 1272 | + } |
---|
| 1273 | + return error ? error : ret; |
---|
1150 | 1274 | } |
---|
1151 | 1275 | |
---|
1152 | 1276 | /* |
---|
1153 | 1277 | * If swap found in inode, free it and move page from swapcache to filecache. |
---|
1154 | 1278 | */ |
---|
1155 | | -static int shmem_unuse_inode(struct shmem_inode_info *info, |
---|
1156 | | - swp_entry_t swap, struct page **pagep) |
---|
| 1279 | +static int shmem_unuse_inode(struct inode *inode, unsigned int type, |
---|
| 1280 | + bool frontswap, unsigned long *fs_pages_to_unuse) |
---|
1157 | 1281 | { |
---|
1158 | | - struct address_space *mapping = info->vfs_inode.i_mapping; |
---|
1159 | | - void *radswap; |
---|
1160 | | - pgoff_t index; |
---|
1161 | | - gfp_t gfp; |
---|
1162 | | - int error = 0; |
---|
| 1282 | + struct address_space *mapping = inode->i_mapping; |
---|
| 1283 | + pgoff_t start = 0; |
---|
| 1284 | + struct pagevec pvec; |
---|
| 1285 | + pgoff_t indices[PAGEVEC_SIZE]; |
---|
| 1286 | + bool frontswap_partial = (frontswap && *fs_pages_to_unuse > 0); |
---|
| 1287 | + int ret = 0; |
---|
1163 | 1288 | |
---|
1164 | | - radswap = swp_to_radix_entry(swap); |
---|
1165 | | - index = find_swap_entry(&mapping->i_pages, radswap); |
---|
1166 | | - if (index == -1) |
---|
1167 | | - return -EAGAIN; /* tell shmem_unuse we found nothing */ |
---|
| 1289 | + pagevec_init(&pvec); |
---|
| 1290 | + do { |
---|
| 1291 | + unsigned int nr_entries = PAGEVEC_SIZE; |
---|
1168 | 1292 | |
---|
1169 | | - /* |
---|
1170 | | - * Move _head_ to start search for next from here. |
---|
1171 | | - * But be careful: shmem_evict_inode checks list_empty without taking |
---|
1172 | | - * mutex, and there's an instant in list_move_tail when info->swaplist |
---|
1173 | | - * would appear empty, if it were the only one on shmem_swaplist. |
---|
1174 | | - */ |
---|
1175 | | - if (shmem_swaplist.next != &info->swaplist) |
---|
1176 | | - list_move_tail(&shmem_swaplist, &info->swaplist); |
---|
| 1293 | + if (frontswap_partial && *fs_pages_to_unuse < PAGEVEC_SIZE) |
---|
| 1294 | + nr_entries = *fs_pages_to_unuse; |
---|
1177 | 1295 | |
---|
1178 | | - gfp = mapping_gfp_mask(mapping); |
---|
1179 | | - if (shmem_should_replace_page(*pagep, gfp)) { |
---|
1180 | | - mutex_unlock(&shmem_swaplist_mutex); |
---|
1181 | | - error = shmem_replace_page(pagep, gfp, info, index); |
---|
1182 | | - mutex_lock(&shmem_swaplist_mutex); |
---|
1183 | | - /* |
---|
1184 | | - * We needed to drop mutex to make that restrictive page |
---|
1185 | | - * allocation, but the inode might have been freed while we |
---|
1186 | | - * dropped it: although a racing shmem_evict_inode() cannot |
---|
1187 | | - * complete without emptying the radix_tree, our page lock |
---|
1188 | | - * on this swapcache page is not enough to prevent that - |
---|
1189 | | - * free_swap_and_cache() of our swap entry will only |
---|
1190 | | - * trylock_page(), removing swap from radix_tree whatever. |
---|
1191 | | - * |
---|
1192 | | - * We must not proceed to shmem_add_to_page_cache() if the |
---|
1193 | | - * inode has been freed, but of course we cannot rely on |
---|
1194 | | - * inode or mapping or info to check that. However, we can |
---|
1195 | | - * safely check if our swap entry is still in use (and here |
---|
1196 | | - * it can't have got reused for another page): if it's still |
---|
1197 | | - * in use, then the inode cannot have been freed yet, and we |
---|
1198 | | - * can safely proceed (if it's no longer in use, that tells |
---|
1199 | | - * nothing about the inode, but we don't need to unuse swap). |
---|
1200 | | - */ |
---|
1201 | | - if (!page_swapcount(*pagep)) |
---|
1202 | | - error = -ENOENT; |
---|
1203 | | - } |
---|
1204 | | - |
---|
1205 | | - /* |
---|
1206 | | - * We rely on shmem_swaplist_mutex, not only to protect the swaplist, |
---|
1207 | | - * but also to hold up shmem_evict_inode(): so inode cannot be freed |
---|
1208 | | - * beneath us (pagelock doesn't help until the page is in pagecache). |
---|
1209 | | - */ |
---|
1210 | | - if (!error) |
---|
1211 | | - error = shmem_add_to_page_cache(*pagep, mapping, index, |
---|
1212 | | - radswap); |
---|
1213 | | - if (error != -ENOMEM) { |
---|
1214 | | - /* |
---|
1215 | | - * Truncation and eviction use free_swap_and_cache(), which |
---|
1216 | | - * only does trylock page: if we raced, best clean up here. |
---|
1217 | | - */ |
---|
1218 | | - delete_from_swap_cache(*pagep); |
---|
1219 | | - set_page_dirty(*pagep); |
---|
1220 | | - if (!error) { |
---|
1221 | | - spin_lock_irq(&info->lock); |
---|
1222 | | - info->swapped--; |
---|
1223 | | - spin_unlock_irq(&info->lock); |
---|
1224 | | - swap_free(swap); |
---|
| 1296 | + pvec.nr = shmem_find_swap_entries(mapping, start, nr_entries, |
---|
| 1297 | + pvec.pages, indices, |
---|
| 1298 | + type, frontswap); |
---|
| 1299 | + if (pvec.nr == 0) { |
---|
| 1300 | + ret = 0; |
---|
| 1301 | + break; |
---|
1225 | 1302 | } |
---|
1226 | | - } |
---|
1227 | | - return error; |
---|
| 1303 | + |
---|
| 1304 | + ret = shmem_unuse_swap_entries(inode, pvec, indices); |
---|
| 1305 | + if (ret < 0) |
---|
| 1306 | + break; |
---|
| 1307 | + |
---|
| 1308 | + if (frontswap_partial) { |
---|
| 1309 | + *fs_pages_to_unuse -= ret; |
---|
| 1310 | + if (*fs_pages_to_unuse == 0) { |
---|
| 1311 | + ret = FRONTSWAP_PAGES_UNUSED; |
---|
| 1312 | + break; |
---|
| 1313 | + } |
---|
| 1314 | + } |
---|
| 1315 | + |
---|
| 1316 | + start = indices[pvec.nr - 1]; |
---|
| 1317 | + } while (true); |
---|
| 1318 | + |
---|
| 1319 | + return ret; |
---|
1228 | 1320 | } |
---|
1229 | 1321 | |
---|
1230 | 1322 | /* |
---|
1231 | | - * Search through swapped inodes to find and replace swap by page. |
---|
| 1323 | + * Read all the shared memory data that resides in the swap |
---|
| 1324 | + * device 'type' back into memory, so the swap device can be |
---|
| 1325 | + * unused. |
---|
1232 | 1326 | */ |
---|
1233 | | -int shmem_unuse(swp_entry_t swap, struct page *page) |
---|
| 1327 | +int shmem_unuse(unsigned int type, bool frontswap, |
---|
| 1328 | + unsigned long *fs_pages_to_unuse) |
---|
1234 | 1329 | { |
---|
1235 | | - struct list_head *this, *next; |
---|
1236 | | - struct shmem_inode_info *info; |
---|
1237 | | - struct mem_cgroup *memcg; |
---|
| 1330 | + struct shmem_inode_info *info, *next; |
---|
1238 | 1331 | int error = 0; |
---|
1239 | 1332 | |
---|
1240 | | - /* |
---|
1241 | | - * There's a faint possibility that swap page was replaced before |
---|
1242 | | - * caller locked it: caller will come back later with the right page. |
---|
1243 | | - */ |
---|
1244 | | - if (unlikely(!PageSwapCache(page) || page_private(page) != swap.val)) |
---|
1245 | | - goto out; |
---|
1246 | | - |
---|
1247 | | - /* |
---|
1248 | | - * Charge page using GFP_KERNEL while we can wait, before taking |
---|
1249 | | - * the shmem_swaplist_mutex which might hold up shmem_writepage(). |
---|
1250 | | - * Charged back to the user (not to caller) when swap account is used. |
---|
1251 | | - */ |
---|
1252 | | - error = mem_cgroup_try_charge_delay(page, current->mm, GFP_KERNEL, |
---|
1253 | | - &memcg, false); |
---|
1254 | | - if (error) |
---|
1255 | | - goto out; |
---|
1256 | | - /* No radix_tree_preload: swap entry keeps a place for page in tree */ |
---|
1257 | | - error = -EAGAIN; |
---|
| 1333 | + if (list_empty(&shmem_swaplist)) |
---|
| 1334 | + return 0; |
---|
1258 | 1335 | |
---|
1259 | 1336 | mutex_lock(&shmem_swaplist_mutex); |
---|
1260 | | - list_for_each_safe(this, next, &shmem_swaplist) { |
---|
1261 | | - info = list_entry(this, struct shmem_inode_info, swaplist); |
---|
1262 | | - if (info->swapped) |
---|
1263 | | - error = shmem_unuse_inode(info, swap, &page); |
---|
1264 | | - else |
---|
| 1337 | + list_for_each_entry_safe(info, next, &shmem_swaplist, swaplist) { |
---|
| 1338 | + if (!info->swapped) { |
---|
1265 | 1339 | list_del_init(&info->swaplist); |
---|
| 1340 | + continue; |
---|
| 1341 | + } |
---|
| 1342 | + /* |
---|
| 1343 | + * Drop the swaplist mutex while searching the inode for swap; |
---|
| 1344 | + * but before doing so, make sure shmem_evict_inode() will not |
---|
| 1345 | + * remove placeholder inode from swaplist, nor let it be freed |
---|
| 1346 | + * (igrab() would protect from unlink, but not from unmount). |
---|
| 1347 | + */ |
---|
| 1348 | + atomic_inc(&info->stop_eviction); |
---|
| 1349 | + mutex_unlock(&shmem_swaplist_mutex); |
---|
| 1350 | + |
---|
| 1351 | + error = shmem_unuse_inode(&info->vfs_inode, type, frontswap, |
---|
| 1352 | + fs_pages_to_unuse); |
---|
1266 | 1353 | cond_resched(); |
---|
1267 | | - if (error != -EAGAIN) |
---|
| 1354 | + |
---|
| 1355 | + mutex_lock(&shmem_swaplist_mutex); |
---|
| 1356 | + next = list_next_entry(info, swaplist); |
---|
| 1357 | + if (!info->swapped) |
---|
| 1358 | + list_del_init(&info->swaplist); |
---|
| 1359 | + if (atomic_dec_and_test(&info->stop_eviction)) |
---|
| 1360 | + wake_up_var(&info->stop_eviction); |
---|
| 1361 | + if (error) |
---|
1268 | 1362 | break; |
---|
1269 | | - /* found nothing in this: move on to search the next */ |
---|
1270 | 1363 | } |
---|
1271 | 1364 | mutex_unlock(&shmem_swaplist_mutex); |
---|
1272 | 1365 | |
---|
1273 | | - if (error) { |
---|
1274 | | - if (error != -ENOMEM) |
---|
1275 | | - error = 0; |
---|
1276 | | - mem_cgroup_cancel_charge(page, memcg, false); |
---|
1277 | | - } else |
---|
1278 | | - mem_cgroup_commit_charge(page, memcg, true, false); |
---|
1279 | | -out: |
---|
1280 | | - unlock_page(page); |
---|
1281 | | - put_page(page); |
---|
1282 | 1366 | return error; |
---|
1283 | 1367 | } |
---|
1284 | 1368 | |
---|
.. | .. |
---|
1348 | 1432 | SetPageUptodate(page); |
---|
1349 | 1433 | } |
---|
1350 | 1434 | |
---|
| 1435 | + trace_android_vh_set_shmem_page_flag(page); |
---|
1351 | 1436 | swap = get_swap_page(page); |
---|
1352 | 1437 | if (!swap.val) |
---|
1353 | 1438 | goto redirty; |
---|
.. | .. |
---|
1362 | 1447 | */ |
---|
1363 | 1448 | mutex_lock(&shmem_swaplist_mutex); |
---|
1364 | 1449 | if (list_empty(&info->swaplist)) |
---|
1365 | | - list_add_tail(&info->swaplist, &shmem_swaplist); |
---|
| 1450 | + list_add(&info->swaplist, &shmem_swaplist); |
---|
1366 | 1451 | |
---|
1367 | | - if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) { |
---|
| 1452 | + if (add_to_swap_cache(page, swap, |
---|
| 1453 | + __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN, |
---|
| 1454 | + NULL) == 0) { |
---|
1368 | 1455 | spin_lock_irq(&info->lock); |
---|
1369 | 1456 | shmem_recalc_inode(inode); |
---|
1370 | 1457 | info->swapped++; |
---|
.. | .. |
---|
1406 | 1493 | { |
---|
1407 | 1494 | struct mempolicy *mpol = NULL; |
---|
1408 | 1495 | if (sbinfo->mpol) { |
---|
1409 | | - spin_lock(&sbinfo->stat_lock); /* prevent replace/use races */ |
---|
| 1496 | + raw_spin_lock(&sbinfo->stat_lock); /* prevent replace/use races */ |
---|
1410 | 1497 | mpol = sbinfo->mpol; |
---|
1411 | 1498 | mpol_get(mpol); |
---|
1412 | | - spin_unlock(&sbinfo->stat_lock); |
---|
| 1499 | + raw_spin_unlock(&sbinfo->stat_lock); |
---|
1413 | 1500 | } |
---|
1414 | 1501 | return mpol; |
---|
1415 | 1502 | } |
---|
.. | .. |
---|
1447 | 1534 | { |
---|
1448 | 1535 | struct vm_area_struct pvma; |
---|
1449 | 1536 | struct page *page; |
---|
1450 | | - struct vm_fault vmf; |
---|
| 1537 | + struct vm_fault vmf = { |
---|
| 1538 | + .vma = &pvma, |
---|
| 1539 | + }; |
---|
1451 | 1540 | |
---|
1452 | 1541 | shmem_pseudo_vma_init(&pvma, info, index); |
---|
1453 | | - vmf.vma = &pvma; |
---|
1454 | | - vmf.address = 0; |
---|
1455 | 1542 | page = swap_cluster_readahead(swap, gfp, &vmf); |
---|
1456 | 1543 | shmem_pseudo_vma_destroy(&pvma); |
---|
1457 | 1544 | |
---|
.. | .. |
---|
1462 | 1549 | struct shmem_inode_info *info, pgoff_t index) |
---|
1463 | 1550 | { |
---|
1464 | 1551 | struct vm_area_struct pvma; |
---|
1465 | | - struct inode *inode = &info->vfs_inode; |
---|
1466 | | - struct address_space *mapping = inode->i_mapping; |
---|
1467 | | - pgoff_t idx, hindex; |
---|
1468 | | - void __rcu **results; |
---|
| 1552 | + struct address_space *mapping = info->vfs_inode.i_mapping; |
---|
| 1553 | + pgoff_t hindex; |
---|
1469 | 1554 | struct page *page; |
---|
1470 | 1555 | |
---|
1471 | | - if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) |
---|
1472 | | - return NULL; |
---|
1473 | | - |
---|
1474 | 1556 | hindex = round_down(index, HPAGE_PMD_NR); |
---|
1475 | | - rcu_read_lock(); |
---|
1476 | | - if (radix_tree_gang_lookup_slot(&mapping->i_pages, &results, &idx, |
---|
1477 | | - hindex, 1) && idx < hindex + HPAGE_PMD_NR) { |
---|
1478 | | - rcu_read_unlock(); |
---|
| 1557 | + if (xa_find(&mapping->i_pages, &hindex, hindex + HPAGE_PMD_NR - 1, |
---|
| 1558 | + XA_PRESENT)) |
---|
1479 | 1559 | return NULL; |
---|
1480 | | - } |
---|
1481 | | - rcu_read_unlock(); |
---|
1482 | 1560 | |
---|
1483 | 1561 | shmem_pseudo_vma_init(&pvma, info, hindex); |
---|
1484 | 1562 | page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN, |
---|
.. | .. |
---|
1486 | 1564 | shmem_pseudo_vma_destroy(&pvma); |
---|
1487 | 1565 | if (page) |
---|
1488 | 1566 | prep_transhuge_page(page); |
---|
| 1567 | + else |
---|
| 1568 | + count_vm_event(THP_FILE_FALLBACK); |
---|
1489 | 1569 | return page; |
---|
1490 | 1570 | } |
---|
1491 | 1571 | |
---|
.. | .. |
---|
1493 | 1573 | struct shmem_inode_info *info, pgoff_t index) |
---|
1494 | 1574 | { |
---|
1495 | 1575 | struct vm_area_struct pvma; |
---|
1496 | | - struct page *page; |
---|
| 1576 | + struct page *page = NULL; |
---|
| 1577 | + |
---|
| 1578 | + trace_android_vh_shmem_alloc_page(&page); |
---|
| 1579 | + if (page) |
---|
| 1580 | + return page; |
---|
1497 | 1581 | |
---|
1498 | 1582 | shmem_pseudo_vma_init(&pvma, info, index); |
---|
1499 | 1583 | page = alloc_page_vma(gfp, &pvma, 0); |
---|
.. | .. |
---|
1511 | 1595 | int nr; |
---|
1512 | 1596 | int err = -ENOSPC; |
---|
1513 | 1597 | |
---|
1514 | | - if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) |
---|
| 1598 | + if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) |
---|
1515 | 1599 | huge = false; |
---|
1516 | 1600 | nr = huge ? HPAGE_PMD_NR : 1; |
---|
1517 | 1601 | |
---|
.. | .. |
---|
1589 | 1673 | * a nice clean interface for us to replace oldpage by newpage there. |
---|
1590 | 1674 | */ |
---|
1591 | 1675 | xa_lock_irq(&swap_mapping->i_pages); |
---|
1592 | | - error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage, |
---|
1593 | | - newpage); |
---|
| 1676 | + error = shmem_replace_entry(swap_mapping, swap_index, oldpage, newpage); |
---|
1594 | 1677 | if (!error) { |
---|
1595 | | - __inc_node_page_state(newpage, NR_FILE_PAGES); |
---|
1596 | | - __dec_node_page_state(oldpage, NR_FILE_PAGES); |
---|
| 1678 | + mem_cgroup_migrate(oldpage, newpage); |
---|
| 1679 | + __inc_lruvec_page_state(newpage, NR_FILE_PAGES); |
---|
| 1680 | + __dec_lruvec_page_state(oldpage, NR_FILE_PAGES); |
---|
1597 | 1681 | } |
---|
1598 | 1682 | xa_unlock_irq(&swap_mapping->i_pages); |
---|
1599 | 1683 | |
---|
.. | .. |
---|
1605 | 1689 | */ |
---|
1606 | 1690 | oldpage = newpage; |
---|
1607 | 1691 | } else { |
---|
1608 | | - mem_cgroup_migrate(oldpage, newpage); |
---|
1609 | | - lru_cache_add_anon(newpage); |
---|
| 1692 | + lru_cache_add(newpage); |
---|
1610 | 1693 | *pagep = newpage; |
---|
1611 | 1694 | } |
---|
1612 | 1695 | |
---|
.. | .. |
---|
1620 | 1703 | } |
---|
1621 | 1704 | |
---|
1622 | 1705 | /* |
---|
| 1706 | + * Swap in the page pointed to by *pagep. |
---|
| 1707 | + * Caller has to make sure that *pagep contains a valid swapped page. |
---|
| 1708 | + * Returns 0 and the page in pagep if success. On failure, returns the |
---|
| 1709 | + * error code and NULL in *pagep. |
---|
| 1710 | + */ |
---|
| 1711 | +static int shmem_swapin_page(struct inode *inode, pgoff_t index, |
---|
| 1712 | + struct page **pagep, enum sgp_type sgp, |
---|
| 1713 | + gfp_t gfp, struct vm_area_struct *vma, |
---|
| 1714 | + vm_fault_t *fault_type) |
---|
| 1715 | +{ |
---|
| 1716 | + struct address_space *mapping = inode->i_mapping; |
---|
| 1717 | + struct shmem_inode_info *info = SHMEM_I(inode); |
---|
| 1718 | + struct mm_struct *charge_mm = vma ? vma->vm_mm : current->mm; |
---|
| 1719 | + struct page *page; |
---|
| 1720 | + swp_entry_t swap; |
---|
| 1721 | + int error; |
---|
| 1722 | + |
---|
| 1723 | + VM_BUG_ON(!*pagep || !xa_is_value(*pagep)); |
---|
| 1724 | + swap = radix_to_swp_entry(*pagep); |
---|
| 1725 | + *pagep = NULL; |
---|
| 1726 | + |
---|
| 1727 | + /* Look it up and read it in.. */ |
---|
| 1728 | + page = lookup_swap_cache(swap, NULL, 0); |
---|
| 1729 | + if (!page) { |
---|
| 1730 | + /* Or update major stats only when swapin succeeds?? */ |
---|
| 1731 | + if (fault_type) { |
---|
| 1732 | + *fault_type |= VM_FAULT_MAJOR; |
---|
| 1733 | + count_vm_event(PGMAJFAULT); |
---|
| 1734 | + count_memcg_event_mm(charge_mm, PGMAJFAULT); |
---|
| 1735 | + } |
---|
| 1736 | + /* Here we actually start the io */ |
---|
| 1737 | + page = shmem_swapin(swap, gfp, info, index); |
---|
| 1738 | + if (!page) { |
---|
| 1739 | + error = -ENOMEM; |
---|
| 1740 | + goto failed; |
---|
| 1741 | + } |
---|
| 1742 | + } |
---|
| 1743 | + |
---|
| 1744 | + /* We have to do this with page locked to prevent races */ |
---|
| 1745 | + lock_page(page); |
---|
| 1746 | + if (!PageSwapCache(page) || page_private(page) != swap.val || |
---|
| 1747 | + !shmem_confirm_swap(mapping, index, swap)) { |
---|
| 1748 | + error = -EEXIST; |
---|
| 1749 | + goto unlock; |
---|
| 1750 | + } |
---|
| 1751 | + if (!PageUptodate(page)) { |
---|
| 1752 | + error = -EIO; |
---|
| 1753 | + goto failed; |
---|
| 1754 | + } |
---|
| 1755 | + wait_on_page_writeback(page); |
---|
| 1756 | + |
---|
| 1757 | + /* |
---|
| 1758 | + * Some architectures may have to restore extra metadata to the |
---|
| 1759 | + * physical page after reading from swap. |
---|
| 1760 | + */ |
---|
| 1761 | + arch_swap_restore(swap, page); |
---|
| 1762 | + |
---|
| 1763 | + if (shmem_should_replace_page(page, gfp)) { |
---|
| 1764 | + error = shmem_replace_page(&page, gfp, info, index); |
---|
| 1765 | + if (error) |
---|
| 1766 | + goto failed; |
---|
| 1767 | + } |
---|
| 1768 | + |
---|
| 1769 | + error = shmem_add_to_page_cache(page, mapping, index, |
---|
| 1770 | + swp_to_radix_entry(swap), gfp, |
---|
| 1771 | + charge_mm); |
---|
| 1772 | + if (error) |
---|
| 1773 | + goto failed; |
---|
| 1774 | + |
---|
| 1775 | + spin_lock_irq(&info->lock); |
---|
| 1776 | + info->swapped--; |
---|
| 1777 | + shmem_recalc_inode(inode); |
---|
| 1778 | + spin_unlock_irq(&info->lock); |
---|
| 1779 | + |
---|
| 1780 | + if (sgp == SGP_WRITE) |
---|
| 1781 | + mark_page_accessed(page); |
---|
| 1782 | + |
---|
| 1783 | + delete_from_swap_cache(page); |
---|
| 1784 | + set_page_dirty(page); |
---|
| 1785 | + swap_free(swap); |
---|
| 1786 | + |
---|
| 1787 | + *pagep = page; |
---|
| 1788 | + return 0; |
---|
| 1789 | +failed: |
---|
| 1790 | + if (!shmem_confirm_swap(mapping, index, swap)) |
---|
| 1791 | + error = -EEXIST; |
---|
| 1792 | +unlock: |
---|
| 1793 | + if (page) { |
---|
| 1794 | + unlock_page(page); |
---|
| 1795 | + put_page(page); |
---|
| 1796 | + } |
---|
| 1797 | + |
---|
| 1798 | + return error; |
---|
| 1799 | +} |
---|
| 1800 | + |
---|
| 1801 | +/* |
---|
1623 | 1802 | * shmem_getpage_gfp - find page in cache, or get from swap, or allocate |
---|
1624 | 1803 | * |
---|
1625 | 1804 | * If we allocate a new one we do not mark it dirty. That's up to the |
---|
1626 | 1805 | * vm. If we swap it in we mark it dirty since we also free the swap |
---|
1627 | 1806 | * entry since a page cannot live in both the swap and page cache. |
---|
1628 | 1807 | * |
---|
1629 | | - * fault_mm and fault_type are only supplied by shmem_fault: |
---|
| 1808 | + * vma, vmf, and fault_type are only supplied by shmem_fault: |
---|
1630 | 1809 | * otherwise they are NULL. |
---|
1631 | 1810 | */ |
---|
1632 | 1811 | static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, |
---|
.. | .. |
---|
1638 | 1817 | struct shmem_inode_info *info = SHMEM_I(inode); |
---|
1639 | 1818 | struct shmem_sb_info *sbinfo; |
---|
1640 | 1819 | struct mm_struct *charge_mm; |
---|
1641 | | - struct mem_cgroup *memcg; |
---|
1642 | 1820 | struct page *page; |
---|
1643 | | - swp_entry_t swap; |
---|
1644 | 1821 | enum sgp_type sgp_huge = sgp; |
---|
1645 | 1822 | pgoff_t hindex = index; |
---|
1646 | 1823 | int error; |
---|
.. | .. |
---|
1652 | 1829 | if (sgp == SGP_NOHUGE || sgp == SGP_HUGE) |
---|
1653 | 1830 | sgp = SGP_CACHE; |
---|
1654 | 1831 | repeat: |
---|
1655 | | - swap.val = 0; |
---|
1656 | | - page = find_lock_entry(mapping, index); |
---|
1657 | | - if (radix_tree_exceptional_entry(page)) { |
---|
1658 | | - swap = radix_to_swp_entry(page); |
---|
1659 | | - page = NULL; |
---|
1660 | | - } |
---|
1661 | | - |
---|
1662 | 1832 | if (sgp <= SGP_CACHE && |
---|
1663 | 1833 | ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) { |
---|
1664 | | - error = -EINVAL; |
---|
1665 | | - goto unlock; |
---|
| 1834 | + return -EINVAL; |
---|
1666 | 1835 | } |
---|
1667 | 1836 | |
---|
| 1837 | + sbinfo = SHMEM_SB(inode->i_sb); |
---|
| 1838 | + charge_mm = vma ? vma->vm_mm : current->mm; |
---|
| 1839 | + |
---|
| 1840 | + page = find_lock_entry(mapping, index); |
---|
| 1841 | + |
---|
| 1842 | + if (page && vma && userfaultfd_minor(vma)) { |
---|
| 1843 | + if (!xa_is_value(page)) { |
---|
| 1844 | + unlock_page(page); |
---|
| 1845 | + put_page(page); |
---|
| 1846 | + } |
---|
| 1847 | + *fault_type = handle_userfault(vmf, VM_UFFD_MINOR); |
---|
| 1848 | + return 0; |
---|
| 1849 | + } |
---|
| 1850 | + |
---|
| 1851 | + if (xa_is_value(page)) { |
---|
| 1852 | + error = shmem_swapin_page(inode, index, &page, |
---|
| 1853 | + sgp, gfp, vma, fault_type); |
---|
| 1854 | + if (error == -EEXIST) |
---|
| 1855 | + goto repeat; |
---|
| 1856 | + |
---|
| 1857 | + *pagep = page; |
---|
| 1858 | + return error; |
---|
| 1859 | + } |
---|
| 1860 | + |
---|
| 1861 | + if (page) |
---|
| 1862 | + hindex = page->index; |
---|
1668 | 1863 | if (page && sgp == SGP_WRITE) |
---|
1669 | 1864 | mark_page_accessed(page); |
---|
1670 | 1865 | |
---|
.. | .. |
---|
1675 | 1870 | unlock_page(page); |
---|
1676 | 1871 | put_page(page); |
---|
1677 | 1872 | page = NULL; |
---|
| 1873 | + hindex = index; |
---|
1678 | 1874 | } |
---|
1679 | | - if (page || (sgp == SGP_READ && !swap.val)) { |
---|
1680 | | - *pagep = page; |
---|
1681 | | - return 0; |
---|
1682 | | - } |
---|
| 1875 | + if (page || sgp == SGP_READ) |
---|
| 1876 | + goto out; |
---|
1683 | 1877 | |
---|
1684 | 1878 | /* |
---|
1685 | 1879 | * Fast cache lookup did not find it: |
---|
1686 | 1880 | * bring it back from swap or allocate. |
---|
1687 | 1881 | */ |
---|
1688 | | - sbinfo = SHMEM_SB(inode->i_sb); |
---|
1689 | | - charge_mm = vma ? vma->vm_mm : current->mm; |
---|
1690 | 1882 | |
---|
1691 | | - if (swap.val) { |
---|
1692 | | - /* Look it up and read it in.. */ |
---|
1693 | | - page = lookup_swap_cache(swap, NULL, 0); |
---|
1694 | | - if (!page) { |
---|
1695 | | - /* Or update major stats only when swapin succeeds?? */ |
---|
1696 | | - if (fault_type) { |
---|
1697 | | - *fault_type |= VM_FAULT_MAJOR; |
---|
1698 | | - count_vm_event(PGMAJFAULT); |
---|
1699 | | - count_memcg_event_mm(charge_mm, PGMAJFAULT); |
---|
1700 | | - } |
---|
1701 | | - /* Here we actually start the io */ |
---|
1702 | | - page = shmem_swapin(swap, gfp, info, index); |
---|
1703 | | - if (!page) { |
---|
1704 | | - error = -ENOMEM; |
---|
1705 | | - goto failed; |
---|
1706 | | - } |
---|
1707 | | - } |
---|
| 1883 | + if (vma && userfaultfd_missing(vma)) { |
---|
| 1884 | + *fault_type = handle_userfault(vmf, VM_UFFD_MISSING); |
---|
| 1885 | + return 0; |
---|
| 1886 | + } |
---|
1708 | 1887 | |
---|
1709 | | - /* We have to do this with page locked to prevent races */ |
---|
1710 | | - lock_page(page); |
---|
1711 | | - if (!PageSwapCache(page) || page_private(page) != swap.val || |
---|
1712 | | - !shmem_confirm_swap(mapping, index, swap)) { |
---|
1713 | | - error = -EEXIST; /* try again */ |
---|
1714 | | - goto unlock; |
---|
1715 | | - } |
---|
1716 | | - if (!PageUptodate(page)) { |
---|
1717 | | - error = -EIO; |
---|
1718 | | - goto failed; |
---|
1719 | | - } |
---|
1720 | | - wait_on_page_writeback(page); |
---|
| 1888 | + /* shmem_symlink() */ |
---|
| 1889 | + if (mapping->a_ops != &shmem_aops) |
---|
| 1890 | + goto alloc_nohuge; |
---|
| 1891 | + if (shmem_huge == SHMEM_HUGE_DENY || sgp_huge == SGP_NOHUGE) |
---|
| 1892 | + goto alloc_nohuge; |
---|
| 1893 | + if (shmem_huge == SHMEM_HUGE_FORCE) |
---|
| 1894 | + goto alloc_huge; |
---|
| 1895 | + switch (sbinfo->huge) { |
---|
| 1896 | + case SHMEM_HUGE_NEVER: |
---|
| 1897 | + goto alloc_nohuge; |
---|
| 1898 | + case SHMEM_HUGE_WITHIN_SIZE: { |
---|
| 1899 | + loff_t i_size; |
---|
| 1900 | + pgoff_t off; |
---|
1721 | 1901 | |
---|
1722 | | - if (shmem_should_replace_page(page, gfp)) { |
---|
1723 | | - error = shmem_replace_page(&page, gfp, info, index); |
---|
1724 | | - if (error) |
---|
1725 | | - goto failed; |
---|
1726 | | - } |
---|
1727 | | - |
---|
1728 | | - error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg, |
---|
1729 | | - false); |
---|
1730 | | - if (!error) { |
---|
1731 | | - error = shmem_add_to_page_cache(page, mapping, index, |
---|
1732 | | - swp_to_radix_entry(swap)); |
---|
1733 | | - /* |
---|
1734 | | - * We already confirmed swap under page lock, and make |
---|
1735 | | - * no memory allocation here, so usually no possibility |
---|
1736 | | - * of error; but free_swap_and_cache() only trylocks a |
---|
1737 | | - * page, so it is just possible that the entry has been |
---|
1738 | | - * truncated or holepunched since swap was confirmed. |
---|
1739 | | - * shmem_undo_range() will have done some of the |
---|
1740 | | - * unaccounting, now delete_from_swap_cache() will do |
---|
1741 | | - * the rest. |
---|
1742 | | - * Reset swap.val? No, leave it so "failed" goes back to |
---|
1743 | | - * "repeat": reading a hole and writing should succeed. |
---|
1744 | | - */ |
---|
1745 | | - if (error) { |
---|
1746 | | - mem_cgroup_cancel_charge(page, memcg, false); |
---|
1747 | | - delete_from_swap_cache(page); |
---|
1748 | | - } |
---|
1749 | | - } |
---|
1750 | | - if (error) |
---|
1751 | | - goto failed; |
---|
1752 | | - |
---|
1753 | | - mem_cgroup_commit_charge(page, memcg, true, false); |
---|
1754 | | - |
---|
1755 | | - spin_lock_irq(&info->lock); |
---|
1756 | | - info->swapped--; |
---|
1757 | | - shmem_recalc_inode(inode); |
---|
1758 | | - spin_unlock_irq(&info->lock); |
---|
1759 | | - |
---|
1760 | | - if (sgp == SGP_WRITE) |
---|
1761 | | - mark_page_accessed(page); |
---|
1762 | | - |
---|
1763 | | - delete_from_swap_cache(page); |
---|
1764 | | - set_page_dirty(page); |
---|
1765 | | - swap_free(swap); |
---|
1766 | | - |
---|
1767 | | - } else { |
---|
1768 | | - if (vma && userfaultfd_missing(vma)) { |
---|
1769 | | - *fault_type = handle_userfault(vmf, VM_UFFD_MISSING); |
---|
1770 | | - return 0; |
---|
1771 | | - } |
---|
1772 | | - |
---|
1773 | | - /* shmem_symlink() */ |
---|
1774 | | - if (mapping->a_ops != &shmem_aops) |
---|
1775 | | - goto alloc_nohuge; |
---|
1776 | | - if (shmem_huge == SHMEM_HUGE_DENY || sgp_huge == SGP_NOHUGE) |
---|
1777 | | - goto alloc_nohuge; |
---|
1778 | | - if (shmem_huge == SHMEM_HUGE_FORCE) |
---|
| 1902 | + off = round_up(index, HPAGE_PMD_NR); |
---|
| 1903 | + i_size = round_up(i_size_read(inode), PAGE_SIZE); |
---|
| 1904 | + if (i_size >= HPAGE_PMD_SIZE && |
---|
| 1905 | + i_size >> PAGE_SHIFT >= off) |
---|
1779 | 1906 | goto alloc_huge; |
---|
1780 | | - switch (sbinfo->huge) { |
---|
1781 | | - loff_t i_size; |
---|
1782 | | - pgoff_t off; |
---|
1783 | | - case SHMEM_HUGE_NEVER: |
---|
1784 | | - goto alloc_nohuge; |
---|
1785 | | - case SHMEM_HUGE_WITHIN_SIZE: |
---|
1786 | | - off = round_up(index, HPAGE_PMD_NR); |
---|
1787 | | - i_size = round_up(i_size_read(inode), PAGE_SIZE); |
---|
1788 | | - if (i_size >= HPAGE_PMD_SIZE && |
---|
1789 | | - i_size >> PAGE_SHIFT >= off) |
---|
1790 | | - goto alloc_huge; |
---|
1791 | | - /* fallthrough */ |
---|
1792 | | - case SHMEM_HUGE_ADVISE: |
---|
1793 | | - if (sgp_huge == SGP_HUGE) |
---|
1794 | | - goto alloc_huge; |
---|
1795 | | - /* TODO: implement fadvise() hints */ |
---|
1796 | | - goto alloc_nohuge; |
---|
1797 | | - } |
---|
| 1907 | + |
---|
| 1908 | + fallthrough; |
---|
| 1909 | + } |
---|
| 1910 | + case SHMEM_HUGE_ADVISE: |
---|
| 1911 | + if (sgp_huge == SGP_HUGE) |
---|
| 1912 | + goto alloc_huge; |
---|
| 1913 | + /* TODO: implement fadvise() hints */ |
---|
| 1914 | + goto alloc_nohuge; |
---|
| 1915 | + } |
---|
1798 | 1916 | |
---|
1799 | 1917 | alloc_huge: |
---|
1800 | | - page = shmem_alloc_and_acct_page(gfp, inode, index, true); |
---|
1801 | | - if (IS_ERR(page)) { |
---|
1802 | | -alloc_nohuge: page = shmem_alloc_and_acct_page(gfp, inode, |
---|
1803 | | - index, false); |
---|
1804 | | - } |
---|
1805 | | - if (IS_ERR(page)) { |
---|
1806 | | - int retry = 5; |
---|
1807 | | - error = PTR_ERR(page); |
---|
1808 | | - page = NULL; |
---|
1809 | | - if (error != -ENOSPC) |
---|
1810 | | - goto failed; |
---|
1811 | | - /* |
---|
1812 | | - * Try to reclaim some spece by splitting a huge page |
---|
1813 | | - * beyond i_size on the filesystem. |
---|
1814 | | - */ |
---|
1815 | | - while (retry--) { |
---|
1816 | | - int ret; |
---|
1817 | | - ret = shmem_unused_huge_shrink(sbinfo, NULL, 1); |
---|
1818 | | - if (ret == SHRINK_STOP) |
---|
1819 | | - break; |
---|
1820 | | - if (ret) |
---|
1821 | | - goto alloc_nohuge; |
---|
1822 | | - } |
---|
1823 | | - goto failed; |
---|
1824 | | - } |
---|
| 1918 | + page = shmem_alloc_and_acct_page(gfp, inode, index, true); |
---|
| 1919 | + if (IS_ERR(page)) { |
---|
| 1920 | +alloc_nohuge: |
---|
| 1921 | + page = shmem_alloc_and_acct_page(gfp, inode, |
---|
| 1922 | + index, false); |
---|
| 1923 | + } |
---|
| 1924 | + if (IS_ERR(page)) { |
---|
| 1925 | + int retry = 5; |
---|
1825 | 1926 | |
---|
1826 | | - if (PageTransHuge(page)) |
---|
1827 | | - hindex = round_down(index, HPAGE_PMD_NR); |
---|
1828 | | - else |
---|
1829 | | - hindex = index; |
---|
1830 | | - |
---|
1831 | | - if (sgp == SGP_WRITE) |
---|
1832 | | - __SetPageReferenced(page); |
---|
1833 | | - |
---|
1834 | | - error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg, |
---|
1835 | | - PageTransHuge(page)); |
---|
1836 | | - if (error) |
---|
1837 | | - goto unacct; |
---|
1838 | | - error = radix_tree_maybe_preload_order(gfp & GFP_RECLAIM_MASK, |
---|
1839 | | - compound_order(page)); |
---|
1840 | | - if (!error) { |
---|
1841 | | - error = shmem_add_to_page_cache(page, mapping, hindex, |
---|
1842 | | - NULL); |
---|
1843 | | - radix_tree_preload_end(); |
---|
1844 | | - } |
---|
1845 | | - if (error) { |
---|
1846 | | - mem_cgroup_cancel_charge(page, memcg, |
---|
1847 | | - PageTransHuge(page)); |
---|
1848 | | - goto unacct; |
---|
1849 | | - } |
---|
1850 | | - mem_cgroup_commit_charge(page, memcg, false, |
---|
1851 | | - PageTransHuge(page)); |
---|
1852 | | - lru_cache_add_anon(page); |
---|
1853 | | - |
---|
1854 | | - spin_lock_irq(&info->lock); |
---|
1855 | | - info->alloced += 1 << compound_order(page); |
---|
1856 | | - inode->i_blocks += BLOCKS_PER_PAGE << compound_order(page); |
---|
1857 | | - shmem_recalc_inode(inode); |
---|
1858 | | - spin_unlock_irq(&info->lock); |
---|
1859 | | - alloced = true; |
---|
1860 | | - |
---|
1861 | | - if (PageTransHuge(page) && |
---|
1862 | | - DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE) < |
---|
1863 | | - hindex + HPAGE_PMD_NR - 1) { |
---|
1864 | | - /* |
---|
1865 | | - * Part of the huge page is beyond i_size: subject |
---|
1866 | | - * to shrink under memory pressure. |
---|
1867 | | - */ |
---|
1868 | | - spin_lock(&sbinfo->shrinklist_lock); |
---|
1869 | | - /* |
---|
1870 | | - * _careful to defend against unlocked access to |
---|
1871 | | - * ->shrink_list in shmem_unused_huge_shrink() |
---|
1872 | | - */ |
---|
1873 | | - if (list_empty_careful(&info->shrinklist)) { |
---|
1874 | | - list_add_tail(&info->shrinklist, |
---|
1875 | | - &sbinfo->shrinklist); |
---|
1876 | | - sbinfo->shrinklist_len++; |
---|
1877 | | - } |
---|
1878 | | - spin_unlock(&sbinfo->shrinklist_lock); |
---|
1879 | | - } |
---|
1880 | | - |
---|
| 1927 | + error = PTR_ERR(page); |
---|
| 1928 | + page = NULL; |
---|
| 1929 | + if (error != -ENOSPC) |
---|
| 1930 | + goto unlock; |
---|
1881 | 1931 | /* |
---|
1882 | | - * Let SGP_FALLOC use the SGP_WRITE optimization on a new page. |
---|
| 1932 | + * Try to reclaim some space by splitting a huge page |
---|
| 1933 | + * beyond i_size on the filesystem. |
---|
1883 | 1934 | */ |
---|
1884 | | - if (sgp == SGP_FALLOC) |
---|
1885 | | - sgp = SGP_WRITE; |
---|
| 1935 | + while (retry--) { |
---|
| 1936 | + int ret; |
---|
| 1937 | + |
---|
| 1938 | + ret = shmem_unused_huge_shrink(sbinfo, NULL, 1); |
---|
| 1939 | + if (ret == SHRINK_STOP) |
---|
| 1940 | + break; |
---|
| 1941 | + if (ret) |
---|
| 1942 | + goto alloc_nohuge; |
---|
| 1943 | + } |
---|
| 1944 | + goto unlock; |
---|
| 1945 | + } |
---|
| 1946 | + |
---|
| 1947 | + if (PageTransHuge(page)) |
---|
| 1948 | + hindex = round_down(index, HPAGE_PMD_NR); |
---|
| 1949 | + else |
---|
| 1950 | + hindex = index; |
---|
| 1951 | + |
---|
| 1952 | + if (sgp == SGP_WRITE) |
---|
| 1953 | + __SetPageReferenced(page); |
---|
| 1954 | + |
---|
| 1955 | + error = shmem_add_to_page_cache(page, mapping, hindex, |
---|
| 1956 | + NULL, gfp & GFP_RECLAIM_MASK, |
---|
| 1957 | + charge_mm); |
---|
| 1958 | + if (error) |
---|
| 1959 | + goto unacct; |
---|
| 1960 | + lru_cache_add(page); |
---|
| 1961 | + |
---|
| 1962 | + spin_lock_irq(&info->lock); |
---|
| 1963 | + info->alloced += compound_nr(page); |
---|
| 1964 | + inode->i_blocks += BLOCKS_PER_PAGE << compound_order(page); |
---|
| 1965 | + shmem_recalc_inode(inode); |
---|
| 1966 | + spin_unlock_irq(&info->lock); |
---|
| 1967 | + alloced = true; |
---|
| 1968 | + |
---|
| 1969 | + if (PageTransHuge(page) && |
---|
| 1970 | + DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE) < |
---|
| 1971 | + hindex + HPAGE_PMD_NR - 1) { |
---|
| 1972 | + /* |
---|
| 1973 | + * Part of the huge page is beyond i_size: subject |
---|
| 1974 | + * to shrink under memory pressure. |
---|
| 1975 | + */ |
---|
| 1976 | + spin_lock(&sbinfo->shrinklist_lock); |
---|
| 1977 | + /* |
---|
| 1978 | + * _careful to defend against unlocked access to |
---|
| 1979 | + * ->shrink_list in shmem_unused_huge_shrink() |
---|
| 1980 | + */ |
---|
| 1981 | + if (list_empty_careful(&info->shrinklist)) { |
---|
| 1982 | + list_add_tail(&info->shrinklist, |
---|
| 1983 | + &sbinfo->shrinklist); |
---|
| 1984 | + sbinfo->shrinklist_len++; |
---|
| 1985 | + } |
---|
| 1986 | + spin_unlock(&sbinfo->shrinklist_lock); |
---|
| 1987 | + } |
---|
| 1988 | + |
---|
| 1989 | + /* |
---|
| 1990 | + * Let SGP_FALLOC use the SGP_WRITE optimization on a new page. |
---|
| 1991 | + */ |
---|
| 1992 | + if (sgp == SGP_FALLOC) |
---|
| 1993 | + sgp = SGP_WRITE; |
---|
1886 | 1994 | clear: |
---|
1887 | | - /* |
---|
1888 | | - * Let SGP_WRITE caller clear ends if write does not fill page; |
---|
1889 | | - * but SGP_FALLOC on a page fallocated earlier must initialize |
---|
1890 | | - * it now, lest undo on failure cancel our earlier guarantee. |
---|
1891 | | - */ |
---|
1892 | | - if (sgp != SGP_WRITE && !PageUptodate(page)) { |
---|
1893 | | - struct page *head = compound_head(page); |
---|
1894 | | - int i; |
---|
| 1995 | + /* |
---|
| 1996 | + * Let SGP_WRITE caller clear ends if write does not fill page; |
---|
| 1997 | + * but SGP_FALLOC on a page fallocated earlier must initialize |
---|
| 1998 | + * it now, lest undo on failure cancel our earlier guarantee. |
---|
| 1999 | + */ |
---|
| 2000 | + if (sgp != SGP_WRITE && !PageUptodate(page)) { |
---|
| 2001 | + int i; |
---|
1895 | 2002 | |
---|
1896 | | - for (i = 0; i < (1 << compound_order(head)); i++) { |
---|
1897 | | - clear_highpage(head + i); |
---|
1898 | | - flush_dcache_page(head + i); |
---|
1899 | | - } |
---|
1900 | | - SetPageUptodate(head); |
---|
| 2003 | + for (i = 0; i < compound_nr(page); i++) { |
---|
| 2004 | + clear_highpage(page + i); |
---|
| 2005 | + flush_dcache_page(page + i); |
---|
1901 | 2006 | } |
---|
| 2007 | + SetPageUptodate(page); |
---|
1902 | 2008 | } |
---|
1903 | 2009 | |
---|
1904 | 2010 | /* Perhaps the file has been truncated since we checked */ |
---|
.. | .. |
---|
1914 | 2020 | error = -EINVAL; |
---|
1915 | 2021 | goto unlock; |
---|
1916 | 2022 | } |
---|
| 2023 | +out: |
---|
1917 | 2024 | *pagep = page + index - hindex; |
---|
1918 | 2025 | return 0; |
---|
1919 | 2026 | |
---|
.. | .. |
---|
1921 | 2028 | * Error recovery. |
---|
1922 | 2029 | */ |
---|
1923 | 2030 | unacct: |
---|
1924 | | - shmem_inode_unacct_blocks(inode, 1 << compound_order(page)); |
---|
| 2031 | + shmem_inode_unacct_blocks(inode, compound_nr(page)); |
---|
1925 | 2032 | |
---|
1926 | 2033 | if (PageTransHuge(page)) { |
---|
1927 | 2034 | unlock_page(page); |
---|
1928 | 2035 | put_page(page); |
---|
1929 | 2036 | goto alloc_nohuge; |
---|
1930 | 2037 | } |
---|
1931 | | -failed: |
---|
1932 | | - if (swap.val && !shmem_confirm_swap(mapping, index, swap)) |
---|
1933 | | - error = -EEXIST; |
---|
1934 | 2038 | unlock: |
---|
1935 | 2039 | if (page) { |
---|
1936 | 2040 | unlock_page(page); |
---|
.. | .. |
---|
1942 | 2046 | spin_unlock_irq(&info->lock); |
---|
1943 | 2047 | goto repeat; |
---|
1944 | 2048 | } |
---|
1945 | | - if (error == -EEXIST) /* from above or from radix_tree_insert */ |
---|
| 2049 | + if (error == -EEXIST) |
---|
1946 | 2050 | goto repeat; |
---|
1947 | 2051 | return error; |
---|
1948 | 2052 | } |
---|
.. | .. |
---|
1994 | 2098 | shmem_falloc->waitq && |
---|
1995 | 2099 | vmf->pgoff >= shmem_falloc->start && |
---|
1996 | 2100 | vmf->pgoff < shmem_falloc->next) { |
---|
| 2101 | + struct file *fpin; |
---|
1997 | 2102 | wait_queue_head_t *shmem_falloc_waitq; |
---|
1998 | 2103 | DEFINE_WAIT_FUNC(shmem_fault_wait, synchronous_wake_function); |
---|
1999 | 2104 | |
---|
2000 | 2105 | ret = VM_FAULT_NOPAGE; |
---|
2001 | | - if ((vmf->flags & FAULT_FLAG_ALLOW_RETRY) && |
---|
2002 | | - !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) { |
---|
2003 | | - /* It's polite to up mmap_sem if we can */ |
---|
2004 | | - up_read(&vma->vm_mm->mmap_sem); |
---|
| 2106 | + fpin = maybe_unlock_mmap_for_io(vmf, NULL); |
---|
| 2107 | + if (fpin) |
---|
2005 | 2108 | ret = VM_FAULT_RETRY; |
---|
2006 | | - } |
---|
2007 | 2109 | |
---|
2008 | 2110 | shmem_falloc_waitq = shmem_falloc->waitq; |
---|
2009 | 2111 | prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait, |
---|
.. | .. |
---|
2021 | 2123 | spin_lock(&inode->i_lock); |
---|
2022 | 2124 | finish_wait(shmem_falloc_waitq, &shmem_fault_wait); |
---|
2023 | 2125 | spin_unlock(&inode->i_lock); |
---|
| 2126 | + |
---|
| 2127 | + if (fpin) |
---|
| 2128 | + fput(fpin); |
---|
2024 | 2129 | return ret; |
---|
2025 | 2130 | } |
---|
2026 | 2131 | spin_unlock(&inode->i_lock); |
---|
.. | .. |
---|
2059 | 2164 | get_area = current->mm->get_unmapped_area; |
---|
2060 | 2165 | addr = get_area(file, uaddr, len, pgoff, flags); |
---|
2061 | 2166 | |
---|
2062 | | - if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) |
---|
| 2167 | + if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) |
---|
2063 | 2168 | return addr; |
---|
2064 | 2169 | if (IS_ERR_VALUE(addr)) |
---|
2065 | 2170 | return addr; |
---|
.. | .. |
---|
2179 | 2284 | static int shmem_mmap(struct file *file, struct vm_area_struct *vma) |
---|
2180 | 2285 | { |
---|
2181 | 2286 | struct shmem_inode_info *info = SHMEM_I(file_inode(file)); |
---|
| 2287 | + int ret; |
---|
2182 | 2288 | |
---|
2183 | | - if (info->seals & F_SEAL_FUTURE_WRITE) { |
---|
2184 | | - /* |
---|
2185 | | - * New PROT_WRITE and MAP_SHARED mmaps are not allowed when |
---|
2186 | | - * "future write" seal active. |
---|
2187 | | - */ |
---|
2188 | | - if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE)) |
---|
2189 | | - return -EPERM; |
---|
| 2289 | + ret = seal_check_future_write(info->seals, vma); |
---|
| 2290 | + if (ret) |
---|
| 2291 | + return ret; |
---|
2190 | 2292 | |
---|
2191 | | - /* |
---|
2192 | | - * Since the F_SEAL_FUTURE_WRITE seals allow for a MAP_SHARED |
---|
2193 | | - * read-only mapping, take care to not allow mprotect to revert |
---|
2194 | | - * protections. |
---|
2195 | | - */ |
---|
2196 | | - vma->vm_flags &= ~(VM_MAYWRITE); |
---|
2197 | | - } |
---|
| 2293 | + /* arm64 - allow memory tagging on RAM-based files */ |
---|
| 2294 | + vma->vm_flags |= VM_MTE_ALLOWED; |
---|
2198 | 2295 | |
---|
2199 | 2296 | file_accessed(file); |
---|
2200 | 2297 | vma->vm_ops = &shmem_vm_ops; |
---|
2201 | | - if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE) && |
---|
| 2298 | + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && |
---|
2202 | 2299 | ((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) < |
---|
2203 | 2300 | (vma->vm_end & HPAGE_PMD_MASK)) { |
---|
2204 | 2301 | khugepaged_enter(vma, vma->vm_flags); |
---|
.. | .. |
---|
2212 | 2309 | struct inode *inode; |
---|
2213 | 2310 | struct shmem_inode_info *info; |
---|
2214 | 2311 | struct shmem_sb_info *sbinfo = SHMEM_SB(sb); |
---|
| 2312 | + ino_t ino; |
---|
2215 | 2313 | |
---|
2216 | | - if (shmem_reserve_inode(sb)) |
---|
| 2314 | + if (shmem_reserve_inode(sb, &ino)) |
---|
2217 | 2315 | return NULL; |
---|
2218 | 2316 | |
---|
2219 | 2317 | inode = new_inode(sb); |
---|
2220 | 2318 | if (inode) { |
---|
2221 | | - inode->i_ino = get_next_ino(); |
---|
| 2319 | + inode->i_ino = ino; |
---|
2222 | 2320 | inode_init_owner(inode, dir, mode); |
---|
2223 | 2321 | inode->i_blocks = 0; |
---|
2224 | 2322 | inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); |
---|
.. | .. |
---|
2226 | 2324 | info = SHMEM_I(inode); |
---|
2227 | 2325 | memset(info, 0, (char *)inode - (char *)info); |
---|
2228 | 2326 | spin_lock_init(&info->lock); |
---|
| 2327 | + atomic_set(&info->stop_eviction, 0); |
---|
2229 | 2328 | info->seals = F_SEAL_SEAL; |
---|
2230 | 2329 | info->flags = flags & VM_NORESERVE; |
---|
2231 | 2330 | INIT_LIST_HEAD(&info->shrinklist); |
---|
.. | .. |
---|
2272 | 2371 | return mapping->a_ops == &shmem_aops; |
---|
2273 | 2372 | } |
---|
2274 | 2373 | |
---|
2275 | | -static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, |
---|
2276 | | - pmd_t *dst_pmd, |
---|
2277 | | - struct vm_area_struct *dst_vma, |
---|
2278 | | - unsigned long dst_addr, |
---|
2279 | | - unsigned long src_addr, |
---|
2280 | | - bool zeropage, |
---|
2281 | | - struct page **pagep) |
---|
| 2374 | +#ifdef CONFIG_USERFAULTFD |
---|
| 2375 | +int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, |
---|
| 2376 | + pmd_t *dst_pmd, |
---|
| 2377 | + struct vm_area_struct *dst_vma, |
---|
| 2378 | + unsigned long dst_addr, |
---|
| 2379 | + unsigned long src_addr, |
---|
| 2380 | + bool zeropage, |
---|
| 2381 | + struct page **pagep) |
---|
2282 | 2382 | { |
---|
2283 | 2383 | struct inode *inode = file_inode(dst_vma->vm_file); |
---|
2284 | 2384 | struct shmem_inode_info *info = SHMEM_I(inode); |
---|
2285 | 2385 | struct address_space *mapping = inode->i_mapping; |
---|
2286 | 2386 | gfp_t gfp = mapping_gfp_mask(mapping); |
---|
2287 | 2387 | pgoff_t pgoff = linear_page_index(dst_vma, dst_addr); |
---|
2288 | | - struct mem_cgroup *memcg; |
---|
2289 | | - spinlock_t *ptl; |
---|
2290 | 2388 | void *page_kaddr; |
---|
2291 | 2389 | struct page *page; |
---|
2292 | | - pte_t _dst_pte, *dst_pte; |
---|
2293 | 2390 | int ret; |
---|
2294 | | - pgoff_t offset, max_off; |
---|
| 2391 | + pgoff_t max_off; |
---|
2295 | 2392 | |
---|
2296 | | - ret = -ENOMEM; |
---|
2297 | 2393 | if (!shmem_inode_acct_block(inode, 1)) { |
---|
2298 | 2394 | /* |
---|
2299 | 2395 | * We may have got a page, returned -ENOENT triggering a retry, |
---|
.. | .. |
---|
2304 | 2400 | put_page(*pagep); |
---|
2305 | 2401 | *pagep = NULL; |
---|
2306 | 2402 | } |
---|
2307 | | - goto out; |
---|
| 2403 | + return -ENOMEM; |
---|
2308 | 2404 | } |
---|
2309 | 2405 | |
---|
2310 | 2406 | if (!*pagep) { |
---|
| 2407 | + ret = -ENOMEM; |
---|
2311 | 2408 | page = shmem_alloc_page(gfp, info, pgoff); |
---|
2312 | 2409 | if (!page) |
---|
2313 | 2410 | goto out_unacct_blocks; |
---|
2314 | 2411 | |
---|
2315 | | - if (!zeropage) { /* mcopy_atomic */ |
---|
| 2412 | + if (!zeropage) { /* COPY */ |
---|
2316 | 2413 | page_kaddr = kmap_atomic(page); |
---|
2317 | 2414 | ret = copy_from_user(page_kaddr, |
---|
2318 | 2415 | (const void __user *)src_addr, |
---|
2319 | 2416 | PAGE_SIZE); |
---|
2320 | 2417 | kunmap_atomic(page_kaddr); |
---|
2321 | 2418 | |
---|
2322 | | - /* fallback to copy_from_user outside mmap_sem */ |
---|
| 2419 | + /* fallback to copy_from_user outside mmap_lock */ |
---|
2323 | 2420 | if (unlikely(ret)) { |
---|
2324 | 2421 | *pagep = page; |
---|
2325 | | - shmem_inode_unacct_blocks(inode, 1); |
---|
| 2422 | + ret = -ENOENT; |
---|
2326 | 2423 | /* don't free the page */ |
---|
2327 | | - return -ENOENT; |
---|
| 2424 | + goto out_unacct_blocks; |
---|
2328 | 2425 | } |
---|
2329 | | - } else { /* mfill_zeropage_atomic */ |
---|
| 2426 | + } else { /* ZEROPAGE */ |
---|
2330 | 2427 | clear_highpage(page); |
---|
2331 | 2428 | } |
---|
2332 | 2429 | } else { |
---|
.. | .. |
---|
2334 | 2431 | *pagep = NULL; |
---|
2335 | 2432 | } |
---|
2336 | 2433 | |
---|
2337 | | - VM_BUG_ON(PageLocked(page) || PageSwapBacked(page)); |
---|
| 2434 | + VM_BUG_ON(PageLocked(page)); |
---|
| 2435 | + VM_BUG_ON(PageSwapBacked(page)); |
---|
2338 | 2436 | __SetPageLocked(page); |
---|
2339 | 2437 | __SetPageSwapBacked(page); |
---|
2340 | 2438 | __SetPageUptodate(page); |
---|
2341 | 2439 | |
---|
2342 | 2440 | ret = -EFAULT; |
---|
2343 | | - offset = linear_page_index(dst_vma, dst_addr); |
---|
2344 | 2441 | max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); |
---|
2345 | | - if (unlikely(offset >= max_off)) |
---|
| 2442 | + if (unlikely(pgoff >= max_off)) |
---|
2346 | 2443 | goto out_release; |
---|
2347 | 2444 | |
---|
2348 | | - ret = mem_cgroup_try_charge_delay(page, dst_mm, gfp, &memcg, false); |
---|
| 2445 | + ret = shmem_add_to_page_cache(page, mapping, pgoff, NULL, |
---|
| 2446 | + gfp & GFP_RECLAIM_MASK, dst_mm); |
---|
2349 | 2447 | if (ret) |
---|
2350 | 2448 | goto out_release; |
---|
2351 | 2449 | |
---|
2352 | | - ret = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK); |
---|
2353 | | - if (!ret) { |
---|
2354 | | - ret = shmem_add_to_page_cache(page, mapping, pgoff, NULL); |
---|
2355 | | - radix_tree_preload_end(); |
---|
2356 | | - } |
---|
| 2450 | + ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr, |
---|
| 2451 | + page, true, false); |
---|
2357 | 2452 | if (ret) |
---|
2358 | | - goto out_release_uncharge; |
---|
2359 | | - |
---|
2360 | | - mem_cgroup_commit_charge(page, memcg, false, false); |
---|
2361 | | - |
---|
2362 | | - _dst_pte = mk_pte(page, dst_vma->vm_page_prot); |
---|
2363 | | - if (dst_vma->vm_flags & VM_WRITE) |
---|
2364 | | - _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte)); |
---|
2365 | | - else { |
---|
2366 | | - /* |
---|
2367 | | - * We don't set the pte dirty if the vma has no |
---|
2368 | | - * VM_WRITE permission, so mark the page dirty or it |
---|
2369 | | - * could be freed from under us. We could do it |
---|
2370 | | - * unconditionally before unlock_page(), but doing it |
---|
2371 | | - * only if VM_WRITE is not set is faster. |
---|
2372 | | - */ |
---|
2373 | | - set_page_dirty(page); |
---|
2374 | | - } |
---|
2375 | | - |
---|
2376 | | - dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl); |
---|
2377 | | - |
---|
2378 | | - ret = -EFAULT; |
---|
2379 | | - max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); |
---|
2380 | | - if (unlikely(offset >= max_off)) |
---|
2381 | | - goto out_release_uncharge_unlock; |
---|
2382 | | - |
---|
2383 | | - ret = -EEXIST; |
---|
2384 | | - if (!pte_none(*dst_pte)) |
---|
2385 | | - goto out_release_uncharge_unlock; |
---|
2386 | | - |
---|
2387 | | - lru_cache_add_anon(page); |
---|
| 2453 | + goto out_delete_from_cache; |
---|
2388 | 2454 | |
---|
2389 | 2455 | spin_lock_irq(&info->lock); |
---|
2390 | 2456 | info->alloced++; |
---|
.. | .. |
---|
2392 | 2458 | shmem_recalc_inode(inode); |
---|
2393 | 2459 | spin_unlock_irq(&info->lock); |
---|
2394 | 2460 | |
---|
2395 | | - inc_mm_counter(dst_mm, mm_counter_file(page)); |
---|
2396 | | - page_add_file_rmap(page, false); |
---|
2397 | | - set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte); |
---|
2398 | | - |
---|
2399 | | - /* No need to invalidate - it was non-present before */ |
---|
2400 | | - update_mmu_cache(dst_vma, dst_addr, dst_pte); |
---|
2401 | | - pte_unmap_unlock(dst_pte, ptl); |
---|
| 2461 | + SetPageDirty(page); |
---|
2402 | 2462 | unlock_page(page); |
---|
2403 | | - ret = 0; |
---|
2404 | | -out: |
---|
2405 | | - return ret; |
---|
2406 | | -out_release_uncharge_unlock: |
---|
2407 | | - pte_unmap_unlock(dst_pte, ptl); |
---|
2408 | | - ClearPageDirty(page); |
---|
| 2463 | + return 0; |
---|
| 2464 | +out_delete_from_cache: |
---|
2409 | 2465 | delete_from_page_cache(page); |
---|
2410 | | -out_release_uncharge: |
---|
2411 | | - mem_cgroup_cancel_charge(page, memcg, false); |
---|
2412 | 2466 | out_release: |
---|
2413 | 2467 | unlock_page(page); |
---|
2414 | 2468 | put_page(page); |
---|
2415 | 2469 | out_unacct_blocks: |
---|
2416 | 2470 | shmem_inode_unacct_blocks(inode, 1); |
---|
2417 | | - goto out; |
---|
| 2471 | + return ret; |
---|
2418 | 2472 | } |
---|
2419 | | - |
---|
2420 | | -int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, |
---|
2421 | | - pmd_t *dst_pmd, |
---|
2422 | | - struct vm_area_struct *dst_vma, |
---|
2423 | | - unsigned long dst_addr, |
---|
2424 | | - unsigned long src_addr, |
---|
2425 | | - struct page **pagep) |
---|
2426 | | -{ |
---|
2427 | | - return shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, |
---|
2428 | | - dst_addr, src_addr, false, pagep); |
---|
2429 | | -} |
---|
2430 | | - |
---|
2431 | | -int shmem_mfill_zeropage_pte(struct mm_struct *dst_mm, |
---|
2432 | | - pmd_t *dst_pmd, |
---|
2433 | | - struct vm_area_struct *dst_vma, |
---|
2434 | | - unsigned long dst_addr) |
---|
2435 | | -{ |
---|
2436 | | - struct page *page = NULL; |
---|
2437 | | - |
---|
2438 | | - return shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, |
---|
2439 | | - dst_addr, 0, true, &page); |
---|
2440 | | -} |
---|
| 2473 | +#endif /* CONFIG_USERFAULTFD */ |
---|
2441 | 2474 | |
---|
2442 | 2475 | #ifdef CONFIG_TMPFS |
---|
2443 | 2476 | static const struct inode_operations shmem_symlink_inode_operations; |
---|
.. | .. |
---|
2617 | 2650 | } |
---|
2618 | 2651 | |
---|
2619 | 2652 | /* |
---|
2620 | | - * llseek SEEK_DATA or SEEK_HOLE through the radix_tree. |
---|
| 2653 | + * llseek SEEK_DATA or SEEK_HOLE through the page cache. |
---|
2621 | 2654 | */ |
---|
2622 | 2655 | static pgoff_t shmem_seek_hole_data(struct address_space *mapping, |
---|
2623 | 2656 | pgoff_t index, pgoff_t end, int whence) |
---|
.. | .. |
---|
2647 | 2680 | index = indices[i]; |
---|
2648 | 2681 | } |
---|
2649 | 2682 | page = pvec.pages[i]; |
---|
2650 | | - if (page && !radix_tree_exceptional_entry(page)) { |
---|
| 2683 | + if (page && !xa_is_value(page)) { |
---|
2651 | 2684 | if (!PageUptodate(page)) |
---|
2652 | 2685 | page = NULL; |
---|
2653 | 2686 | } |
---|
.. | .. |
---|
2943 | 2976 | * first link must skip that, to get the accounting right. |
---|
2944 | 2977 | */ |
---|
2945 | 2978 | if (inode->i_nlink) { |
---|
2946 | | - ret = shmem_reserve_inode(inode->i_sb); |
---|
| 2979 | + ret = shmem_reserve_inode(inode->i_sb, NULL); |
---|
2947 | 2980 | if (ret) |
---|
2948 | 2981 | goto out; |
---|
2949 | 2982 | } |
---|
.. | .. |
---|
3095 | 3128 | |
---|
3096 | 3129 | error = security_inode_init_security(inode, dir, &dentry->d_name, |
---|
3097 | 3130 | shmem_initxattrs, NULL); |
---|
3098 | | - if (error) { |
---|
3099 | | - if (error != -EOPNOTSUPP) { |
---|
3100 | | - iput(inode); |
---|
3101 | | - return error; |
---|
3102 | | - } |
---|
3103 | | - error = 0; |
---|
| 3131 | + if (error && error != -EOPNOTSUPP) { |
---|
| 3132 | + iput(inode); |
---|
| 3133 | + return error; |
---|
3104 | 3134 | } |
---|
3105 | 3135 | |
---|
3106 | 3136 | inode->i_size = len-1; |
---|
.. | .. |
---|
3192 | 3222 | new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len, |
---|
3193 | 3223 | GFP_KERNEL); |
---|
3194 | 3224 | if (!new_xattr->name) { |
---|
3195 | | - kfree(new_xattr); |
---|
| 3225 | + kvfree(new_xattr); |
---|
3196 | 3226 | return -ENOMEM; |
---|
3197 | 3227 | } |
---|
3198 | 3228 | |
---|
.. | .. |
---|
3209 | 3239 | |
---|
3210 | 3240 | static int shmem_xattr_handler_get(const struct xattr_handler *handler, |
---|
3211 | 3241 | struct dentry *unused, struct inode *inode, |
---|
3212 | | - const char *name, void *buffer, size_t size) |
---|
| 3242 | + const char *name, void *buffer, size_t size, |
---|
| 3243 | + int flags) |
---|
3213 | 3244 | { |
---|
3214 | 3245 | struct shmem_inode_info *info = SHMEM_I(inode); |
---|
3215 | 3246 | |
---|
.. | .. |
---|
3225 | 3256 | struct shmem_inode_info *info = SHMEM_I(inode); |
---|
3226 | 3257 | |
---|
3227 | 3258 | name = xattr_full_name(handler, name); |
---|
3228 | | - return simple_xattr_set(&info->xattrs, name, value, size, flags); |
---|
| 3259 | + return simple_xattr_set(&info->xattrs, name, value, size, flags, NULL); |
---|
3229 | 3260 | } |
---|
3230 | 3261 | |
---|
3231 | 3262 | static const struct xattr_handler shmem_security_xattr_handler = { |
---|
.. | .. |
---|
3352 | 3383 | .fh_to_dentry = shmem_fh_to_dentry, |
---|
3353 | 3384 | }; |
---|
3354 | 3385 | |
---|
3355 | | -static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo, |
---|
3356 | | - bool remount) |
---|
| 3386 | +enum shmem_param { |
---|
| 3387 | + Opt_gid, |
---|
| 3388 | + Opt_huge, |
---|
| 3389 | + Opt_mode, |
---|
| 3390 | + Opt_mpol, |
---|
| 3391 | + Opt_nr_blocks, |
---|
| 3392 | + Opt_nr_inodes, |
---|
| 3393 | + Opt_size, |
---|
| 3394 | + Opt_uid, |
---|
| 3395 | + Opt_inode32, |
---|
| 3396 | + Opt_inode64, |
---|
| 3397 | +}; |
---|
| 3398 | + |
---|
| 3399 | +static const struct constant_table shmem_param_enums_huge[] = { |
---|
| 3400 | + {"never", SHMEM_HUGE_NEVER }, |
---|
| 3401 | + {"always", SHMEM_HUGE_ALWAYS }, |
---|
| 3402 | + {"within_size", SHMEM_HUGE_WITHIN_SIZE }, |
---|
| 3403 | + {"advise", SHMEM_HUGE_ADVISE }, |
---|
| 3404 | + {} |
---|
| 3405 | +}; |
---|
| 3406 | + |
---|
| 3407 | +const struct fs_parameter_spec shmem_fs_parameters[] = { |
---|
| 3408 | + fsparam_u32 ("gid", Opt_gid), |
---|
| 3409 | + fsparam_enum ("huge", Opt_huge, shmem_param_enums_huge), |
---|
| 3410 | + fsparam_u32oct("mode", Opt_mode), |
---|
| 3411 | + fsparam_string("mpol", Opt_mpol), |
---|
| 3412 | + fsparam_string("nr_blocks", Opt_nr_blocks), |
---|
| 3413 | + fsparam_string("nr_inodes", Opt_nr_inodes), |
---|
| 3414 | + fsparam_string("size", Opt_size), |
---|
| 3415 | + fsparam_u32 ("uid", Opt_uid), |
---|
| 3416 | + fsparam_flag ("inode32", Opt_inode32), |
---|
| 3417 | + fsparam_flag ("inode64", Opt_inode64), |
---|
| 3418 | + {} |
---|
| 3419 | +}; |
---|
| 3420 | + |
---|
| 3421 | +static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param) |
---|
3357 | 3422 | { |
---|
3358 | | - char *this_char, *value, *rest; |
---|
3359 | | - struct mempolicy *mpol = NULL; |
---|
3360 | | - uid_t uid; |
---|
3361 | | - gid_t gid; |
---|
| 3423 | + struct shmem_options *ctx = fc->fs_private; |
---|
| 3424 | + struct fs_parse_result result; |
---|
| 3425 | + unsigned long long size; |
---|
| 3426 | + char *rest; |
---|
| 3427 | + int opt; |
---|
| 3428 | + |
---|
| 3429 | + opt = fs_parse(fc, shmem_fs_parameters, param, &result); |
---|
| 3430 | + if (opt < 0) |
---|
| 3431 | + return opt; |
---|
| 3432 | + |
---|
| 3433 | + switch (opt) { |
---|
| 3434 | + case Opt_size: |
---|
| 3435 | + size = memparse(param->string, &rest); |
---|
| 3436 | + if (*rest == '%') { |
---|
| 3437 | + size <<= PAGE_SHIFT; |
---|
| 3438 | + size *= totalram_pages(); |
---|
| 3439 | + do_div(size, 100); |
---|
| 3440 | + rest++; |
---|
| 3441 | + } |
---|
| 3442 | + if (*rest) |
---|
| 3443 | + goto bad_value; |
---|
| 3444 | + ctx->blocks = DIV_ROUND_UP(size, PAGE_SIZE); |
---|
| 3445 | + ctx->seen |= SHMEM_SEEN_BLOCKS; |
---|
| 3446 | + break; |
---|
| 3447 | + case Opt_nr_blocks: |
---|
| 3448 | + ctx->blocks = memparse(param->string, &rest); |
---|
| 3449 | + if (*rest) |
---|
| 3450 | + goto bad_value; |
---|
| 3451 | + ctx->seen |= SHMEM_SEEN_BLOCKS; |
---|
| 3452 | + break; |
---|
| 3453 | + case Opt_nr_inodes: |
---|
| 3454 | + ctx->inodes = memparse(param->string, &rest); |
---|
| 3455 | + if (*rest) |
---|
| 3456 | + goto bad_value; |
---|
| 3457 | + ctx->seen |= SHMEM_SEEN_INODES; |
---|
| 3458 | + break; |
---|
| 3459 | + case Opt_mode: |
---|
| 3460 | + ctx->mode = result.uint_32 & 07777; |
---|
| 3461 | + break; |
---|
| 3462 | + case Opt_uid: |
---|
| 3463 | + ctx->uid = make_kuid(current_user_ns(), result.uint_32); |
---|
| 3464 | + if (!uid_valid(ctx->uid)) |
---|
| 3465 | + goto bad_value; |
---|
| 3466 | + break; |
---|
| 3467 | + case Opt_gid: |
---|
| 3468 | + ctx->gid = make_kgid(current_user_ns(), result.uint_32); |
---|
| 3469 | + if (!gid_valid(ctx->gid)) |
---|
| 3470 | + goto bad_value; |
---|
| 3471 | + break; |
---|
| 3472 | + case Opt_huge: |
---|
| 3473 | + ctx->huge = result.uint_32; |
---|
| 3474 | + if (ctx->huge != SHMEM_HUGE_NEVER && |
---|
| 3475 | + !(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && |
---|
| 3476 | + has_transparent_hugepage())) |
---|
| 3477 | + goto unsupported_parameter; |
---|
| 3478 | + ctx->seen |= SHMEM_SEEN_HUGE; |
---|
| 3479 | + break; |
---|
| 3480 | + case Opt_mpol: |
---|
| 3481 | + if (IS_ENABLED(CONFIG_NUMA)) { |
---|
| 3482 | + mpol_put(ctx->mpol); |
---|
| 3483 | + ctx->mpol = NULL; |
---|
| 3484 | + if (mpol_parse_str(param->string, &ctx->mpol)) |
---|
| 3485 | + goto bad_value; |
---|
| 3486 | + break; |
---|
| 3487 | + } |
---|
| 3488 | + goto unsupported_parameter; |
---|
| 3489 | + case Opt_inode32: |
---|
| 3490 | + ctx->full_inums = false; |
---|
| 3491 | + ctx->seen |= SHMEM_SEEN_INUMS; |
---|
| 3492 | + break; |
---|
| 3493 | + case Opt_inode64: |
---|
| 3494 | + if (sizeof(ino_t) < 8) { |
---|
| 3495 | + return invalfc(fc, |
---|
| 3496 | + "Cannot use inode64 with <64bit inums in kernel\n"); |
---|
| 3497 | + } |
---|
| 3498 | + ctx->full_inums = true; |
---|
| 3499 | + ctx->seen |= SHMEM_SEEN_INUMS; |
---|
| 3500 | + break; |
---|
| 3501 | + } |
---|
| 3502 | + return 0; |
---|
| 3503 | + |
---|
| 3504 | +unsupported_parameter: |
---|
| 3505 | + return invalfc(fc, "Unsupported parameter '%s'", param->key); |
---|
| 3506 | +bad_value: |
---|
| 3507 | + return invalfc(fc, "Bad value for '%s'", param->key); |
---|
| 3508 | +} |
---|
| 3509 | + |
---|
| 3510 | +static int shmem_parse_options(struct fs_context *fc, void *data) |
---|
| 3511 | +{ |
---|
| 3512 | + char *options = data; |
---|
| 3513 | + |
---|
| 3514 | + if (options) { |
---|
| 3515 | + int err = security_sb_eat_lsm_opts(options, &fc->security); |
---|
| 3516 | + if (err) |
---|
| 3517 | + return err; |
---|
| 3518 | + } |
---|
3362 | 3519 | |
---|
3363 | 3520 | while (options != NULL) { |
---|
3364 | | - this_char = options; |
---|
| 3521 | + char *this_char = options; |
---|
3365 | 3522 | for (;;) { |
---|
3366 | 3523 | /* |
---|
3367 | 3524 | * NUL-terminate this option: unfortunately, |
---|
.. | .. |
---|
3377 | 3534 | break; |
---|
3378 | 3535 | } |
---|
3379 | 3536 | } |
---|
3380 | | - if (!*this_char) |
---|
3381 | | - continue; |
---|
3382 | | - if ((value = strchr(this_char,'=')) != NULL) { |
---|
3383 | | - *value++ = 0; |
---|
3384 | | - } else { |
---|
3385 | | - pr_err("tmpfs: No value for mount option '%s'\n", |
---|
3386 | | - this_char); |
---|
3387 | | - goto error; |
---|
3388 | | - } |
---|
| 3537 | + if (*this_char) { |
---|
| 3538 | + char *value = strchr(this_char,'='); |
---|
| 3539 | + size_t len = 0; |
---|
| 3540 | + int err; |
---|
3389 | 3541 | |
---|
3390 | | - if (!strcmp(this_char,"size")) { |
---|
3391 | | - unsigned long long size; |
---|
3392 | | - size = memparse(value,&rest); |
---|
3393 | | - if (*rest == '%') { |
---|
3394 | | - size <<= PAGE_SHIFT; |
---|
3395 | | - size *= totalram_pages; |
---|
3396 | | - do_div(size, 100); |
---|
3397 | | - rest++; |
---|
| 3542 | + if (value) { |
---|
| 3543 | + *value++ = '\0'; |
---|
| 3544 | + len = strlen(value); |
---|
3398 | 3545 | } |
---|
3399 | | - if (*rest) |
---|
3400 | | - goto bad_val; |
---|
3401 | | - sbinfo->max_blocks = |
---|
3402 | | - DIV_ROUND_UP(size, PAGE_SIZE); |
---|
3403 | | - } else if (!strcmp(this_char,"nr_blocks")) { |
---|
3404 | | - sbinfo->max_blocks = memparse(value, &rest); |
---|
3405 | | - if (*rest) |
---|
3406 | | - goto bad_val; |
---|
3407 | | - } else if (!strcmp(this_char,"nr_inodes")) { |
---|
3408 | | - sbinfo->max_inodes = memparse(value, &rest); |
---|
3409 | | - if (*rest) |
---|
3410 | | - goto bad_val; |
---|
3411 | | - } else if (!strcmp(this_char,"mode")) { |
---|
3412 | | - if (remount) |
---|
3413 | | - continue; |
---|
3414 | | - sbinfo->mode = simple_strtoul(value, &rest, 8) & 07777; |
---|
3415 | | - if (*rest) |
---|
3416 | | - goto bad_val; |
---|
3417 | | - } else if (!strcmp(this_char,"uid")) { |
---|
3418 | | - if (remount) |
---|
3419 | | - continue; |
---|
3420 | | - uid = simple_strtoul(value, &rest, 0); |
---|
3421 | | - if (*rest) |
---|
3422 | | - goto bad_val; |
---|
3423 | | - sbinfo->uid = make_kuid(current_user_ns(), uid); |
---|
3424 | | - if (!uid_valid(sbinfo->uid)) |
---|
3425 | | - goto bad_val; |
---|
3426 | | - } else if (!strcmp(this_char,"gid")) { |
---|
3427 | | - if (remount) |
---|
3428 | | - continue; |
---|
3429 | | - gid = simple_strtoul(value, &rest, 0); |
---|
3430 | | - if (*rest) |
---|
3431 | | - goto bad_val; |
---|
3432 | | - sbinfo->gid = make_kgid(current_user_ns(), gid); |
---|
3433 | | - if (!gid_valid(sbinfo->gid)) |
---|
3434 | | - goto bad_val; |
---|
3435 | | -#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE |
---|
3436 | | - } else if (!strcmp(this_char, "huge")) { |
---|
3437 | | - int huge; |
---|
3438 | | - huge = shmem_parse_huge(value); |
---|
3439 | | - if (huge < 0) |
---|
3440 | | - goto bad_val; |
---|
3441 | | - if (!has_transparent_hugepage() && |
---|
3442 | | - huge != SHMEM_HUGE_NEVER) |
---|
3443 | | - goto bad_val; |
---|
3444 | | - sbinfo->huge = huge; |
---|
3445 | | -#endif |
---|
3446 | | -#ifdef CONFIG_NUMA |
---|
3447 | | - } else if (!strcmp(this_char,"mpol")) { |
---|
3448 | | - mpol_put(mpol); |
---|
3449 | | - mpol = NULL; |
---|
3450 | | - if (mpol_parse_str(value, &mpol)) |
---|
3451 | | - goto bad_val; |
---|
3452 | | -#endif |
---|
3453 | | - } else { |
---|
3454 | | - pr_err("tmpfs: Bad mount option %s\n", this_char); |
---|
3455 | | - goto error; |
---|
| 3546 | + err = vfs_parse_fs_string(fc, this_char, value, len); |
---|
| 3547 | + if (err < 0) |
---|
| 3548 | + return err; |
---|
3456 | 3549 | } |
---|
3457 | 3550 | } |
---|
3458 | | - sbinfo->mpol = mpol; |
---|
3459 | 3551 | return 0; |
---|
3460 | | - |
---|
3461 | | -bad_val: |
---|
3462 | | - pr_err("tmpfs: Bad value '%s' for mount option '%s'\n", |
---|
3463 | | - value, this_char); |
---|
3464 | | -error: |
---|
3465 | | - mpol_put(mpol); |
---|
3466 | | - return 1; |
---|
3467 | | - |
---|
3468 | 3552 | } |
---|
3469 | 3553 | |
---|
3470 | | -static int shmem_remount_fs(struct super_block *sb, int *flags, char *data) |
---|
| 3554 | +/* |
---|
| 3555 | + * Reconfigure a shmem filesystem. |
---|
| 3556 | + * |
---|
| 3557 | + * Note that we disallow change from limited->unlimited blocks/inodes while any |
---|
| 3558 | + * are in use; but we must separately disallow unlimited->limited, because in |
---|
| 3559 | + * that case we have no record of how much is already in use. |
---|
| 3560 | + */ |
---|
| 3561 | +static int shmem_reconfigure(struct fs_context *fc) |
---|
3471 | 3562 | { |
---|
3472 | | - struct shmem_sb_info *sbinfo = SHMEM_SB(sb); |
---|
3473 | | - struct shmem_sb_info config = *sbinfo; |
---|
| 3563 | + struct shmem_options *ctx = fc->fs_private; |
---|
| 3564 | + struct shmem_sb_info *sbinfo = SHMEM_SB(fc->root->d_sb); |
---|
3474 | 3565 | unsigned long inodes; |
---|
3475 | | - int error = -EINVAL; |
---|
| 3566 | + struct mempolicy *mpol = NULL; |
---|
| 3567 | + const char *err; |
---|
3476 | 3568 | |
---|
3477 | | - config.mpol = NULL; |
---|
3478 | | - if (shmem_parse_options(data, &config, true)) |
---|
3479 | | - return error; |
---|
3480 | | - |
---|
3481 | | - spin_lock(&sbinfo->stat_lock); |
---|
| 3569 | + raw_spin_lock(&sbinfo->stat_lock); |
---|
3482 | 3570 | inodes = sbinfo->max_inodes - sbinfo->free_inodes; |
---|
3483 | | - if (percpu_counter_compare(&sbinfo->used_blocks, config.max_blocks) > 0) |
---|
3484 | | - goto out; |
---|
3485 | | - if (config.max_inodes < inodes) |
---|
3486 | | - goto out; |
---|
3487 | | - /* |
---|
3488 | | - * Those tests disallow limited->unlimited while any are in use; |
---|
3489 | | - * but we must separately disallow unlimited->limited, because |
---|
3490 | | - * in that case we have no record of how much is already in use. |
---|
3491 | | - */ |
---|
3492 | | - if (config.max_blocks && !sbinfo->max_blocks) |
---|
3493 | | - goto out; |
---|
3494 | | - if (config.max_inodes && !sbinfo->max_inodes) |
---|
3495 | | - goto out; |
---|
| 3571 | + if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) { |
---|
| 3572 | + if (!sbinfo->max_blocks) { |
---|
| 3573 | + err = "Cannot retroactively limit size"; |
---|
| 3574 | + goto out; |
---|
| 3575 | + } |
---|
| 3576 | + if (percpu_counter_compare(&sbinfo->used_blocks, |
---|
| 3577 | + ctx->blocks) > 0) { |
---|
| 3578 | + err = "Too small a size for current use"; |
---|
| 3579 | + goto out; |
---|
| 3580 | + } |
---|
| 3581 | + } |
---|
| 3582 | + if ((ctx->seen & SHMEM_SEEN_INODES) && ctx->inodes) { |
---|
| 3583 | + if (!sbinfo->max_inodes) { |
---|
| 3584 | + err = "Cannot retroactively limit inodes"; |
---|
| 3585 | + goto out; |
---|
| 3586 | + } |
---|
| 3587 | + if (ctx->inodes < inodes) { |
---|
| 3588 | + err = "Too few inodes for current use"; |
---|
| 3589 | + goto out; |
---|
| 3590 | + } |
---|
| 3591 | + } |
---|
3496 | 3592 | |
---|
3497 | | - error = 0; |
---|
3498 | | - sbinfo->huge = config.huge; |
---|
3499 | | - sbinfo->max_blocks = config.max_blocks; |
---|
3500 | | - sbinfo->max_inodes = config.max_inodes; |
---|
3501 | | - sbinfo->free_inodes = config.max_inodes - inodes; |
---|
| 3593 | + if ((ctx->seen & SHMEM_SEEN_INUMS) && !ctx->full_inums && |
---|
| 3594 | + sbinfo->next_ino > UINT_MAX) { |
---|
| 3595 | + err = "Current inum too high to switch to 32-bit inums"; |
---|
| 3596 | + goto out; |
---|
| 3597 | + } |
---|
| 3598 | + |
---|
| 3599 | + if (ctx->seen & SHMEM_SEEN_HUGE) |
---|
| 3600 | + sbinfo->huge = ctx->huge; |
---|
| 3601 | + if (ctx->seen & SHMEM_SEEN_INUMS) |
---|
| 3602 | + sbinfo->full_inums = ctx->full_inums; |
---|
| 3603 | + if (ctx->seen & SHMEM_SEEN_BLOCKS) |
---|
| 3604 | + sbinfo->max_blocks = ctx->blocks; |
---|
| 3605 | + if (ctx->seen & SHMEM_SEEN_INODES) { |
---|
| 3606 | + sbinfo->max_inodes = ctx->inodes; |
---|
| 3607 | + sbinfo->free_inodes = ctx->inodes - inodes; |
---|
| 3608 | + } |
---|
3502 | 3609 | |
---|
3503 | 3610 | /* |
---|
3504 | 3611 | * Preserve previous mempolicy unless mpol remount option was specified. |
---|
3505 | 3612 | */ |
---|
3506 | | - if (config.mpol) { |
---|
3507 | | - mpol_put(sbinfo->mpol); |
---|
3508 | | - sbinfo->mpol = config.mpol; /* transfers initial ref */ |
---|
| 3613 | + if (ctx->mpol) { |
---|
| 3614 | + mpol = sbinfo->mpol; |
---|
| 3615 | + sbinfo->mpol = ctx->mpol; /* transfers initial ref */ |
---|
| 3616 | + ctx->mpol = NULL; |
---|
3509 | 3617 | } |
---|
| 3618 | + raw_spin_unlock(&sbinfo->stat_lock); |
---|
| 3619 | + mpol_put(mpol); |
---|
| 3620 | + return 0; |
---|
3510 | 3621 | out: |
---|
3511 | | - spin_unlock(&sbinfo->stat_lock); |
---|
3512 | | - return error; |
---|
| 3622 | + raw_spin_unlock(&sbinfo->stat_lock); |
---|
| 3623 | + return invalfc(fc, "%s", err); |
---|
3513 | 3624 | } |
---|
3514 | 3625 | |
---|
3515 | 3626 | static int shmem_show_options(struct seq_file *seq, struct dentry *root) |
---|
.. | .. |
---|
3529 | 3640 | if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID)) |
---|
3530 | 3641 | seq_printf(seq, ",gid=%u", |
---|
3531 | 3642 | from_kgid_munged(&init_user_ns, sbinfo->gid)); |
---|
3532 | | -#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE |
---|
| 3643 | + |
---|
| 3644 | + /* |
---|
| 3645 | + * Showing inode{64,32} might be useful even if it's the system default, |
---|
| 3646 | + * since then people don't have to resort to checking both here and |
---|
| 3647 | + * /proc/config.gz to confirm 64-bit inums were successfully applied |
---|
| 3648 | + * (which may not even exist if IKCONFIG_PROC isn't enabled). |
---|
| 3649 | + * |
---|
| 3650 | + * We hide it when inode64 isn't the default and we are using 32-bit |
---|
| 3651 | + * inodes, since that probably just means the feature isn't even under |
---|
| 3652 | + * consideration. |
---|
| 3653 | + * |
---|
| 3654 | + * As such: |
---|
| 3655 | + * |
---|
| 3656 | + * +-----------------+-----------------+ |
---|
| 3657 | + * | TMPFS_INODE64=y | TMPFS_INODE64=n | |
---|
| 3658 | + * +------------------+-----------------+-----------------+ |
---|
| 3659 | + * | full_inums=true | show | show | |
---|
| 3660 | + * | full_inums=false | show | hide | |
---|
| 3661 | + * +------------------+-----------------+-----------------+ |
---|
| 3662 | + * |
---|
| 3663 | + */ |
---|
| 3664 | + if (IS_ENABLED(CONFIG_TMPFS_INODE64) || sbinfo->full_inums) |
---|
| 3665 | + seq_printf(seq, ",inode%d", (sbinfo->full_inums ? 64 : 32)); |
---|
| 3666 | +#ifdef CONFIG_TRANSPARENT_HUGEPAGE |
---|
3533 | 3667 | /* Rightly or wrongly, show huge mount option unmasked by shmem_huge */ |
---|
3534 | 3668 | if (sbinfo->huge) |
---|
3535 | 3669 | seq_printf(seq, ",huge=%s", shmem_format_huge(sbinfo->huge)); |
---|
.. | .. |
---|
3544 | 3678 | { |
---|
3545 | 3679 | struct shmem_sb_info *sbinfo = SHMEM_SB(sb); |
---|
3546 | 3680 | |
---|
| 3681 | + free_percpu(sbinfo->ino_batch); |
---|
3547 | 3682 | percpu_counter_destroy(&sbinfo->used_blocks); |
---|
3548 | 3683 | mpol_put(sbinfo->mpol); |
---|
3549 | 3684 | kfree(sbinfo); |
---|
3550 | 3685 | sb->s_fs_info = NULL; |
---|
3551 | 3686 | } |
---|
3552 | 3687 | |
---|
3553 | | -int shmem_fill_super(struct super_block *sb, void *data, int silent) |
---|
| 3688 | +static int shmem_fill_super(struct super_block *sb, struct fs_context *fc) |
---|
3554 | 3689 | { |
---|
| 3690 | + struct shmem_options *ctx = fc->fs_private; |
---|
3555 | 3691 | struct inode *inode; |
---|
3556 | 3692 | struct shmem_sb_info *sbinfo; |
---|
3557 | 3693 | int err = -ENOMEM; |
---|
.. | .. |
---|
3562 | 3698 | if (!sbinfo) |
---|
3563 | 3699 | return -ENOMEM; |
---|
3564 | 3700 | |
---|
3565 | | - sbinfo->mode = 0777 | S_ISVTX; |
---|
3566 | | - sbinfo->uid = current_fsuid(); |
---|
3567 | | - sbinfo->gid = current_fsgid(); |
---|
3568 | 3701 | sb->s_fs_info = sbinfo; |
---|
3569 | 3702 | |
---|
3570 | 3703 | #ifdef CONFIG_TMPFS |
---|
.. | .. |
---|
3574 | 3707 | * but the internal instance is left unlimited. |
---|
3575 | 3708 | */ |
---|
3576 | 3709 | if (!(sb->s_flags & SB_KERNMOUNT)) { |
---|
3577 | | - sbinfo->max_blocks = shmem_default_max_blocks(); |
---|
3578 | | - sbinfo->max_inodes = shmem_default_max_inodes(); |
---|
3579 | | - if (shmem_parse_options(data, sbinfo, false)) { |
---|
3580 | | - err = -EINVAL; |
---|
3581 | | - goto failed; |
---|
3582 | | - } |
---|
| 3710 | + if (!(ctx->seen & SHMEM_SEEN_BLOCKS)) |
---|
| 3711 | + ctx->blocks = shmem_default_max_blocks(); |
---|
| 3712 | + if (!(ctx->seen & SHMEM_SEEN_INODES)) |
---|
| 3713 | + ctx->inodes = shmem_default_max_inodes(); |
---|
| 3714 | + if (!(ctx->seen & SHMEM_SEEN_INUMS)) |
---|
| 3715 | + ctx->full_inums = IS_ENABLED(CONFIG_TMPFS_INODE64); |
---|
3583 | 3716 | } else { |
---|
3584 | 3717 | sb->s_flags |= SB_NOUSER; |
---|
3585 | 3718 | } |
---|
.. | .. |
---|
3588 | 3721 | #else |
---|
3589 | 3722 | sb->s_flags |= SB_NOUSER; |
---|
3590 | 3723 | #endif |
---|
| 3724 | + sbinfo->max_blocks = ctx->blocks; |
---|
| 3725 | + sbinfo->free_inodes = sbinfo->max_inodes = ctx->inodes; |
---|
| 3726 | + if (sb->s_flags & SB_KERNMOUNT) { |
---|
| 3727 | + sbinfo->ino_batch = alloc_percpu(ino_t); |
---|
| 3728 | + if (!sbinfo->ino_batch) |
---|
| 3729 | + goto failed; |
---|
| 3730 | + } |
---|
| 3731 | + sbinfo->uid = ctx->uid; |
---|
| 3732 | + sbinfo->gid = ctx->gid; |
---|
| 3733 | + sbinfo->full_inums = ctx->full_inums; |
---|
| 3734 | + sbinfo->mode = ctx->mode; |
---|
| 3735 | + sbinfo->huge = ctx->huge; |
---|
| 3736 | + sbinfo->mpol = ctx->mpol; |
---|
| 3737 | + ctx->mpol = NULL; |
---|
3591 | 3738 | |
---|
3592 | | - spin_lock_init(&sbinfo->stat_lock); |
---|
| 3739 | + raw_spin_lock_init(&sbinfo->stat_lock); |
---|
3593 | 3740 | if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL)) |
---|
3594 | 3741 | goto failed; |
---|
3595 | | - sbinfo->free_inodes = sbinfo->max_inodes; |
---|
3596 | 3742 | spin_lock_init(&sbinfo->shrinklist_lock); |
---|
3597 | 3743 | INIT_LIST_HEAD(&sbinfo->shrinklist); |
---|
3598 | 3744 | |
---|
.. | .. |
---|
3625 | 3771 | return err; |
---|
3626 | 3772 | } |
---|
3627 | 3773 | |
---|
| 3774 | +static int shmem_get_tree(struct fs_context *fc) |
---|
| 3775 | +{ |
---|
| 3776 | + return get_tree_nodev(fc, shmem_fill_super); |
---|
| 3777 | +} |
---|
| 3778 | + |
---|
| 3779 | +static void shmem_free_fc(struct fs_context *fc) |
---|
| 3780 | +{ |
---|
| 3781 | + struct shmem_options *ctx = fc->fs_private; |
---|
| 3782 | + |
---|
| 3783 | + if (ctx) { |
---|
| 3784 | + mpol_put(ctx->mpol); |
---|
| 3785 | + kfree(ctx); |
---|
| 3786 | + } |
---|
| 3787 | +} |
---|
| 3788 | + |
---|
| 3789 | +static const struct fs_context_operations shmem_fs_context_ops = { |
---|
| 3790 | + .free = shmem_free_fc, |
---|
| 3791 | + .get_tree = shmem_get_tree, |
---|
| 3792 | +#ifdef CONFIG_TMPFS |
---|
| 3793 | + .parse_monolithic = shmem_parse_options, |
---|
| 3794 | + .parse_param = shmem_parse_one, |
---|
| 3795 | + .reconfigure = shmem_reconfigure, |
---|
| 3796 | +#endif |
---|
| 3797 | +}; |
---|
| 3798 | + |
---|
3628 | 3799 | static struct kmem_cache *shmem_inode_cachep; |
---|
3629 | 3800 | |
---|
3630 | 3801 | static struct inode *shmem_alloc_inode(struct super_block *sb) |
---|
.. | .. |
---|
3636 | 3807 | return &info->vfs_inode; |
---|
3637 | 3808 | } |
---|
3638 | 3809 | |
---|
3639 | | -static void shmem_destroy_callback(struct rcu_head *head) |
---|
| 3810 | +static void shmem_free_in_core_inode(struct inode *inode) |
---|
3640 | 3811 | { |
---|
3641 | | - struct inode *inode = container_of(head, struct inode, i_rcu); |
---|
3642 | 3812 | if (S_ISLNK(inode->i_mode)) |
---|
3643 | 3813 | kfree(inode->i_link); |
---|
3644 | 3814 | kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); |
---|
.. | .. |
---|
3648 | 3818 | { |
---|
3649 | 3819 | if (S_ISREG(inode->i_mode)) |
---|
3650 | 3820 | mpol_free_shared_policy(&SHMEM_I(inode)->policy); |
---|
3651 | | - call_rcu(&inode->i_rcu, shmem_destroy_callback); |
---|
3652 | 3821 | } |
---|
3653 | 3822 | |
---|
3654 | 3823 | static void shmem_init_inode(void *foo) |
---|
.. | .. |
---|
3739 | 3908 | |
---|
3740 | 3909 | static const struct super_operations shmem_ops = { |
---|
3741 | 3910 | .alloc_inode = shmem_alloc_inode, |
---|
| 3911 | + .free_inode = shmem_free_in_core_inode, |
---|
3742 | 3912 | .destroy_inode = shmem_destroy_inode, |
---|
3743 | 3913 | #ifdef CONFIG_TMPFS |
---|
3744 | 3914 | .statfs = shmem_statfs, |
---|
3745 | | - .remount_fs = shmem_remount_fs, |
---|
3746 | 3915 | .show_options = shmem_show_options, |
---|
3747 | 3916 | #endif |
---|
3748 | 3917 | .evict_inode = shmem_evict_inode, |
---|
3749 | 3918 | .drop_inode = generic_delete_inode, |
---|
3750 | 3919 | .put_super = shmem_put_super, |
---|
3751 | | -#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE |
---|
| 3920 | +#ifdef CONFIG_TRANSPARENT_HUGEPAGE |
---|
3752 | 3921 | .nr_cached_objects = shmem_unused_huge_count, |
---|
3753 | 3922 | .free_cached_objects = shmem_unused_huge_scan, |
---|
3754 | 3923 | #endif |
---|
.. | .. |
---|
3761 | 3930 | .set_policy = shmem_set_policy, |
---|
3762 | 3931 | .get_policy = shmem_get_policy, |
---|
3763 | 3932 | #endif |
---|
| 3933 | +#ifdef CONFIG_SPECULATIVE_PAGE_FAULT |
---|
| 3934 | + .allow_speculation = filemap_allow_speculation, |
---|
| 3935 | +#endif |
---|
3764 | 3936 | }; |
---|
3765 | 3937 | |
---|
3766 | | -static struct dentry *shmem_mount(struct file_system_type *fs_type, |
---|
3767 | | - int flags, const char *dev_name, void *data) |
---|
| 3938 | +int shmem_init_fs_context(struct fs_context *fc) |
---|
3768 | 3939 | { |
---|
3769 | | - return mount_nodev(fs_type, flags, data, shmem_fill_super); |
---|
| 3940 | + struct shmem_options *ctx; |
---|
| 3941 | + |
---|
| 3942 | + ctx = kzalloc(sizeof(struct shmem_options), GFP_KERNEL); |
---|
| 3943 | + if (!ctx) |
---|
| 3944 | + return -ENOMEM; |
---|
| 3945 | + |
---|
| 3946 | + ctx->mode = 0777 | S_ISVTX; |
---|
| 3947 | + ctx->uid = current_fsuid(); |
---|
| 3948 | + ctx->gid = current_fsgid(); |
---|
| 3949 | + |
---|
| 3950 | + fc->fs_private = ctx; |
---|
| 3951 | + fc->ops = &shmem_fs_context_ops; |
---|
| 3952 | + return 0; |
---|
3770 | 3953 | } |
---|
3771 | 3954 | |
---|
3772 | 3955 | static struct file_system_type shmem_fs_type = { |
---|
3773 | 3956 | .owner = THIS_MODULE, |
---|
3774 | 3957 | .name = "tmpfs", |
---|
3775 | | - .mount = shmem_mount, |
---|
| 3958 | + .init_fs_context = shmem_init_fs_context, |
---|
| 3959 | +#ifdef CONFIG_TMPFS |
---|
| 3960 | + .parameters = shmem_fs_parameters, |
---|
| 3961 | +#endif |
---|
3776 | 3962 | .kill_sb = kill_litter_super, |
---|
3777 | | - .fs_flags = FS_USERNS_MOUNT, |
---|
| 3963 | + .fs_flags = FS_USERNS_MOUNT | FS_THP_SUPPORT, |
---|
3778 | 3964 | }; |
---|
3779 | 3965 | |
---|
3780 | 3966 | int __init shmem_init(void) |
---|
3781 | 3967 | { |
---|
3782 | 3968 | int error; |
---|
3783 | | - |
---|
3784 | | - /* If rootfs called this, don't re-init */ |
---|
3785 | | - if (shmem_inode_cachep) |
---|
3786 | | - return 0; |
---|
3787 | 3969 | |
---|
3788 | 3970 | shmem_init_inodecache(); |
---|
3789 | 3971 | |
---|
.. | .. |
---|
3800 | 3982 | goto out1; |
---|
3801 | 3983 | } |
---|
3802 | 3984 | |
---|
3803 | | -#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE |
---|
| 3985 | +#ifdef CONFIG_TRANSPARENT_HUGEPAGE |
---|
3804 | 3986 | if (has_transparent_hugepage() && shmem_huge > SHMEM_HUGE_DENY) |
---|
3805 | 3987 | SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge; |
---|
3806 | 3988 | else |
---|
.. | .. |
---|
3816 | 3998 | return error; |
---|
3817 | 3999 | } |
---|
3818 | 4000 | |
---|
3819 | | -#if defined(CONFIG_TRANSPARENT_HUGE_PAGECACHE) && defined(CONFIG_SYSFS) |
---|
| 4001 | +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_SYSFS) |
---|
3820 | 4002 | static ssize_t shmem_enabled_show(struct kobject *kobj, |
---|
3821 | 4003 | struct kobj_attribute *attr, char *buf) |
---|
3822 | 4004 | { |
---|
3823 | | - int values[] = { |
---|
| 4005 | + static const int values[] = { |
---|
3824 | 4006 | SHMEM_HUGE_ALWAYS, |
---|
3825 | 4007 | SHMEM_HUGE_WITHIN_SIZE, |
---|
3826 | 4008 | SHMEM_HUGE_ADVISE, |
---|
.. | .. |
---|
3868 | 4050 | |
---|
3869 | 4051 | struct kobj_attribute shmem_enabled_attr = |
---|
3870 | 4052 | __ATTR(shmem_enabled, 0644, shmem_enabled_show, shmem_enabled_store); |
---|
3871 | | -#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE && CONFIG_SYSFS */ |
---|
| 4053 | +#endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_SYSFS */ |
---|
3872 | 4054 | |
---|
3873 | | -#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE |
---|
| 4055 | +#ifdef CONFIG_TRANSPARENT_HUGEPAGE |
---|
3874 | 4056 | bool shmem_huge_enabled(struct vm_area_struct *vma) |
---|
3875 | 4057 | { |
---|
3876 | 4058 | struct inode *inode = file_inode(vma->vm_file); |
---|
.. | .. |
---|
3878 | 4060 | loff_t i_size; |
---|
3879 | 4061 | pgoff_t off; |
---|
3880 | 4062 | |
---|
| 4063 | + if (!transhuge_vma_enabled(vma, vma->vm_flags)) |
---|
| 4064 | + return false; |
---|
3881 | 4065 | if (shmem_huge == SHMEM_HUGE_FORCE) |
---|
3882 | 4066 | return true; |
---|
3883 | 4067 | if (shmem_huge == SHMEM_HUGE_DENY) |
---|
.. | .. |
---|
3893 | 4077 | if (i_size >= HPAGE_PMD_SIZE && |
---|
3894 | 4078 | i_size >> PAGE_SHIFT >= off) |
---|
3895 | 4079 | return true; |
---|
3896 | | - /* fall through */ |
---|
| 4080 | + fallthrough; |
---|
3897 | 4081 | case SHMEM_HUGE_ADVISE: |
---|
3898 | 4082 | /* TODO: implement fadvise() hints */ |
---|
3899 | 4083 | return (vma->vm_flags & VM_HUGEPAGE); |
---|
.. | .. |
---|
3902 | 4086 | return false; |
---|
3903 | 4087 | } |
---|
3904 | 4088 | } |
---|
3905 | | -#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE */ |
---|
| 4089 | +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
---|
3906 | 4090 | |
---|
3907 | 4091 | #else /* !CONFIG_SHMEM */ |
---|
3908 | 4092 | |
---|
.. | .. |
---|
3917 | 4101 | |
---|
3918 | 4102 | static struct file_system_type shmem_fs_type = { |
---|
3919 | 4103 | .name = "tmpfs", |
---|
3920 | | - .mount = ramfs_mount, |
---|
| 4104 | + .init_fs_context = ramfs_init_fs_context, |
---|
| 4105 | + .parameters = ramfs_fs_parameters, |
---|
3921 | 4106 | .kill_sb = kill_litter_super, |
---|
3922 | 4107 | .fs_flags = FS_USERNS_MOUNT, |
---|
3923 | 4108 | }; |
---|
.. | .. |
---|
3932 | 4117 | return 0; |
---|
3933 | 4118 | } |
---|
3934 | 4119 | |
---|
3935 | | -int shmem_unuse(swp_entry_t swap, struct page *page) |
---|
| 4120 | +int shmem_unuse(unsigned int type, bool frontswap, |
---|
| 4121 | + unsigned long *fs_pages_to_unuse) |
---|
3936 | 4122 | { |
---|
3937 | 4123 | return 0; |
---|
3938 | 4124 | } |
---|
.. | .. |
---|
4047 | 4233 | |
---|
4048 | 4234 | /** |
---|
4049 | 4235 | * shmem_zero_setup - setup a shared anonymous mapping |
---|
4050 | | - * @vma: the vma to be mmapped is prepared by do_mmap_pgoff |
---|
| 4236 | + * @vma: the vma to be mmapped is prepared by do_mmap |
---|
4051 | 4237 | */ |
---|
4052 | 4238 | int shmem_zero_setup(struct vm_area_struct *vma) |
---|
4053 | 4239 | { |
---|
.. | .. |
---|
4055 | 4241 | loff_t size = vma->vm_end - vma->vm_start; |
---|
4056 | 4242 | |
---|
4057 | 4243 | /* |
---|
4058 | | - * Cloning a new file under mmap_sem leads to a lock ordering conflict |
---|
| 4244 | + * Cloning a new file under mmap_lock leads to a lock ordering conflict |
---|
4059 | 4245 | * between XFS directory reading and selinux: since this file is only |
---|
4060 | 4246 | * accessible to the user through its mapping, use S_PRIVATE flag to |
---|
4061 | 4247 | * bypass file security, in the same way as shmem_kernel_file_setup(). |
---|
.. | .. |
---|
4069 | 4255 | vma->vm_file = file; |
---|
4070 | 4256 | vma->vm_ops = &shmem_vm_ops; |
---|
4071 | 4257 | |
---|
4072 | | - if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE) && |
---|
| 4258 | + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && |
---|
4073 | 4259 | ((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) < |
---|
4074 | 4260 | (vma->vm_end & HPAGE_PMD_MASK)) { |
---|
4075 | 4261 | khugepaged_enter(vma, vma->vm_flags); |
---|
.. | .. |
---|
4117 | 4303 | #endif |
---|
4118 | 4304 | } |
---|
4119 | 4305 | EXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp); |
---|
| 4306 | + |
---|
| 4307 | +void shmem_mark_page_lazyfree(struct page *page, bool tail) |
---|
| 4308 | +{ |
---|
| 4309 | + mark_page_lazyfree_movetail(page, tail); |
---|
| 4310 | +} |
---|
| 4311 | +EXPORT_SYMBOL_GPL(shmem_mark_page_lazyfree); |
---|
| 4312 | + |
---|
| 4313 | +int reclaim_shmem_address_space(struct address_space *mapping) |
---|
| 4314 | +{ |
---|
| 4315 | +#ifdef CONFIG_SHMEM |
---|
| 4316 | + pgoff_t start = 0; |
---|
| 4317 | + struct page *page; |
---|
| 4318 | + LIST_HEAD(page_list); |
---|
| 4319 | + XA_STATE(xas, &mapping->i_pages, start); |
---|
| 4320 | + |
---|
| 4321 | + if (!shmem_mapping(mapping)) |
---|
| 4322 | + return -EINVAL; |
---|
| 4323 | + |
---|
| 4324 | + lru_add_drain(); |
---|
| 4325 | + |
---|
| 4326 | + rcu_read_lock(); |
---|
| 4327 | + xas_for_each(&xas, page, ULONG_MAX) { |
---|
| 4328 | + if (xas_retry(&xas, page)) |
---|
| 4329 | + continue; |
---|
| 4330 | + if (xa_is_value(page)) |
---|
| 4331 | + continue; |
---|
| 4332 | + if (isolate_lru_page(page)) |
---|
| 4333 | + continue; |
---|
| 4334 | + |
---|
| 4335 | + list_add(&page->lru, &page_list); |
---|
| 4336 | + |
---|
| 4337 | + if (need_resched()) { |
---|
| 4338 | + xas_pause(&xas); |
---|
| 4339 | + cond_resched_rcu(); |
---|
| 4340 | + } |
---|
| 4341 | + } |
---|
| 4342 | + rcu_read_unlock(); |
---|
| 4343 | + |
---|
| 4344 | + return reclaim_pages(&page_list); |
---|
| 4345 | +#else |
---|
| 4346 | + return 0; |
---|
| 4347 | +#endif |
---|
| 4348 | +} |
---|
| 4349 | +EXPORT_SYMBOL_GPL(reclaim_shmem_address_space); |
---|