.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * linux/mm/swapfile.c |
---|
3 | 4 | * |
---|
.. | .. |
---|
39 | 40 | #include <linux/swap_slots.h> |
---|
40 | 41 | #include <linux/sort.h> |
---|
41 | 42 | |
---|
42 | | -#include <asm/pgtable.h> |
---|
43 | 43 | #include <asm/tlbflush.h> |
---|
44 | 44 | #include <linux/swapops.h> |
---|
45 | 45 | #include <linux/swap_cgroup.h> |
---|
| 46 | +#include <trace/hooks/mm.h> |
---|
46 | 47 | |
---|
47 | 48 | static bool swap_count_continued(struct swap_info_struct *, pgoff_t, |
---|
48 | 49 | unsigned char); |
---|
.. | .. |
---|
98 | 99 | |
---|
99 | 100 | atomic_t nr_rotate_swap = ATOMIC_INIT(0); |
---|
100 | 101 | |
---|
101 | | -static struct swap_info_struct *swap_type_to_swap_info(int type) |
---|
| 102 | +struct swap_info_struct *swap_type_to_swap_info(int type) |
---|
102 | 103 | { |
---|
103 | 104 | if (type >= READ_ONCE(nr_swapfiles)) |
---|
104 | 105 | return NULL; |
---|
.. | .. |
---|
106 | 107 | smp_rmb(); /* Pairs with smp_wmb in alloc_swap_info. */ |
---|
107 | 108 | return READ_ONCE(swap_info[type]); |
---|
108 | 109 | } |
---|
| 110 | +EXPORT_SYMBOL_GPL(swap_type_to_swap_info); |
---|
109 | 111 | |
---|
110 | 112 | static inline unsigned char swap_count(unsigned char ent) |
---|
111 | 113 | { |
---|
112 | 114 | return ent & ~SWAP_HAS_CACHE; /* may include COUNT_CONTINUED flag */ |
---|
113 | 115 | } |
---|
114 | 116 | |
---|
| 117 | +/* Reclaim the swap entry anyway if possible */ |
---|
| 118 | +#define TTRS_ANYWAY 0x1 |
---|
| 119 | +/* |
---|
| 120 | + * Reclaim the swap entry if there are no more mappings of the |
---|
| 121 | + * corresponding page |
---|
| 122 | + */ |
---|
| 123 | +#define TTRS_UNMAPPED 0x2 |
---|
| 124 | +/* Reclaim the swap entry if swap is getting full*/ |
---|
| 125 | +#define TTRS_FULL 0x4 |
---|
| 126 | + |
---|
115 | 127 | /* returns 1 if swap entry is freed */ |
---|
116 | | -static int |
---|
117 | | -__try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset) |
---|
| 128 | +static int __try_to_reclaim_swap(struct swap_info_struct *si, |
---|
| 129 | + unsigned long offset, unsigned long flags) |
---|
118 | 130 | { |
---|
119 | 131 | swp_entry_t entry = swp_entry(si->type, offset); |
---|
120 | 132 | struct page *page; |
---|
121 | 133 | int ret = 0; |
---|
122 | 134 | |
---|
123 | | - page = find_get_page(swap_address_space(entry), swp_offset(entry)); |
---|
| 135 | + page = find_get_page(swap_address_space(entry), offset); |
---|
124 | 136 | if (!page) |
---|
125 | 137 | return 0; |
---|
126 | 138 | /* |
---|
127 | | - * This function is called from scan_swap_map() and it's called |
---|
128 | | - * by vmscan.c at reclaiming pages. So, we hold a lock on a page, here. |
---|
129 | | - * We have to use trylock for avoiding deadlock. This is a special |
---|
| 139 | + * When this function is called from scan_swap_map_slots() and it's |
---|
| 140 | + * called by vmscan.c at reclaiming pages. So, we hold a lock on a page, |
---|
| 141 | + * here. We have to use trylock for avoiding deadlock. This is a special |
---|
130 | 142 | * case and you should use try_to_free_swap() with explicit lock_page() |
---|
131 | 143 | * in usual operations. |
---|
132 | 144 | */ |
---|
133 | 145 | if (trylock_page(page)) { |
---|
134 | | - ret = try_to_free_swap(page); |
---|
| 146 | + if ((flags & TTRS_ANYWAY) || |
---|
| 147 | + ((flags & TTRS_UNMAPPED) && !page_mapped(page)) || |
---|
| 148 | + ((flags & TTRS_FULL) && mem_cgroup_swap_full(page))) |
---|
| 149 | + ret = try_to_free_swap(page); |
---|
135 | 150 | unlock_page(page); |
---|
136 | 151 | } |
---|
137 | 152 | put_page(page); |
---|
138 | 153 | return ret; |
---|
| 154 | +} |
---|
| 155 | + |
---|
| 156 | +static inline struct swap_extent *first_se(struct swap_info_struct *sis) |
---|
| 157 | +{ |
---|
| 158 | + struct rb_node *rb = rb_first(&sis->swap_extent_root); |
---|
| 159 | + return rb_entry(rb, struct swap_extent, rb_node); |
---|
| 160 | +} |
---|
| 161 | + |
---|
| 162 | +static inline struct swap_extent *next_se(struct swap_extent *se) |
---|
| 163 | +{ |
---|
| 164 | + struct rb_node *rb = rb_next(&se->rb_node); |
---|
| 165 | + return rb ? rb_entry(rb, struct swap_extent, rb_node) : NULL; |
---|
139 | 166 | } |
---|
140 | 167 | |
---|
141 | 168 | /* |
---|
.. | .. |
---|
150 | 177 | int err = 0; |
---|
151 | 178 | |
---|
152 | 179 | /* Do not discard the swap header page! */ |
---|
153 | | - se = &si->first_swap_extent; |
---|
| 180 | + se = first_se(si); |
---|
154 | 181 | start_block = (se->start_block + 1) << (PAGE_SHIFT - 9); |
---|
155 | 182 | nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9); |
---|
156 | 183 | if (nr_blocks) { |
---|
.. | .. |
---|
161 | 188 | cond_resched(); |
---|
162 | 189 | } |
---|
163 | 190 | |
---|
164 | | - list_for_each_entry(se, &si->first_swap_extent.list, list) { |
---|
| 191 | + for (se = next_se(se); se; se = next_se(se)) { |
---|
165 | 192 | start_block = se->start_block << (PAGE_SHIFT - 9); |
---|
166 | 193 | nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9); |
---|
167 | 194 | |
---|
.. | .. |
---|
175 | 202 | return err; /* That will often be -EOPNOTSUPP */ |
---|
176 | 203 | } |
---|
177 | 204 | |
---|
| 205 | +static struct swap_extent * |
---|
| 206 | +offset_to_swap_extent(struct swap_info_struct *sis, unsigned long offset) |
---|
| 207 | +{ |
---|
| 208 | + struct swap_extent *se; |
---|
| 209 | + struct rb_node *rb; |
---|
| 210 | + |
---|
| 211 | + rb = sis->swap_extent_root.rb_node; |
---|
| 212 | + while (rb) { |
---|
| 213 | + se = rb_entry(rb, struct swap_extent, rb_node); |
---|
| 214 | + if (offset < se->start_page) |
---|
| 215 | + rb = rb->rb_left; |
---|
| 216 | + else if (offset >= se->start_page + se->nr_pages) |
---|
| 217 | + rb = rb->rb_right; |
---|
| 218 | + else |
---|
| 219 | + return se; |
---|
| 220 | + } |
---|
| 221 | + /* It *must* be present */ |
---|
| 222 | + BUG(); |
---|
| 223 | +} |
---|
| 224 | + |
---|
| 225 | +sector_t swap_page_sector(struct page *page) |
---|
| 226 | +{ |
---|
| 227 | + struct swap_info_struct *sis = page_swap_info(page); |
---|
| 228 | + struct swap_extent *se; |
---|
| 229 | + sector_t sector; |
---|
| 230 | + pgoff_t offset; |
---|
| 231 | + |
---|
| 232 | + offset = __page_file_index(page); |
---|
| 233 | + se = offset_to_swap_extent(sis, offset); |
---|
| 234 | + sector = se->start_block + (offset - se->start_page); |
---|
| 235 | + return sector << (PAGE_SHIFT - 9); |
---|
| 236 | +} |
---|
| 237 | + |
---|
178 | 238 | /* |
---|
179 | 239 | * swap allocation tell device that a cluster of swap can now be discarded, |
---|
180 | 240 | * to allow the swap device to optimize its wear-levelling. |
---|
.. | .. |
---|
182 | 242 | static void discard_swap_cluster(struct swap_info_struct *si, |
---|
183 | 243 | pgoff_t start_page, pgoff_t nr_pages) |
---|
184 | 244 | { |
---|
185 | | - struct swap_extent *se = si->curr_swap_extent; |
---|
186 | | - int found_extent = 0; |
---|
| 245 | + struct swap_extent *se = offset_to_swap_extent(si, start_page); |
---|
187 | 246 | |
---|
188 | 247 | while (nr_pages) { |
---|
189 | | - if (se->start_page <= start_page && |
---|
190 | | - start_page < se->start_page + se->nr_pages) { |
---|
191 | | - pgoff_t offset = start_page - se->start_page; |
---|
192 | | - sector_t start_block = se->start_block + offset; |
---|
193 | | - sector_t nr_blocks = se->nr_pages - offset; |
---|
| 248 | + pgoff_t offset = start_page - se->start_page; |
---|
| 249 | + sector_t start_block = se->start_block + offset; |
---|
| 250 | + sector_t nr_blocks = se->nr_pages - offset; |
---|
194 | 251 | |
---|
195 | | - if (nr_blocks > nr_pages) |
---|
196 | | - nr_blocks = nr_pages; |
---|
197 | | - start_page += nr_blocks; |
---|
198 | | - nr_pages -= nr_blocks; |
---|
| 252 | + if (nr_blocks > nr_pages) |
---|
| 253 | + nr_blocks = nr_pages; |
---|
| 254 | + start_page += nr_blocks; |
---|
| 255 | + nr_pages -= nr_blocks; |
---|
199 | 256 | |
---|
200 | | - if (!found_extent++) |
---|
201 | | - si->curr_swap_extent = se; |
---|
| 257 | + start_block <<= PAGE_SHIFT - 9; |
---|
| 258 | + nr_blocks <<= PAGE_SHIFT - 9; |
---|
| 259 | + if (blkdev_issue_discard(si->bdev, start_block, |
---|
| 260 | + nr_blocks, GFP_NOIO, 0)) |
---|
| 261 | + break; |
---|
202 | 262 | |
---|
203 | | - start_block <<= PAGE_SHIFT - 9; |
---|
204 | | - nr_blocks <<= PAGE_SHIFT - 9; |
---|
205 | | - if (blkdev_issue_discard(si->bdev, start_block, |
---|
206 | | - nr_blocks, GFP_NOIO, 0)) |
---|
207 | | - break; |
---|
208 | | - } |
---|
209 | | - |
---|
210 | | - se = list_next_entry(se, list); |
---|
| 263 | + se = next_se(se); |
---|
211 | 264 | } |
---|
212 | 265 | } |
---|
213 | 266 | |
---|
.. | .. |
---|
562 | 615 | { |
---|
563 | 616 | struct percpu_cluster *cluster; |
---|
564 | 617 | struct swap_cluster_info *ci; |
---|
565 | | - bool found_free; |
---|
566 | 618 | unsigned long tmp, max; |
---|
567 | 619 | |
---|
568 | 620 | new_cluster: |
---|
.. | .. |
---|
575 | 627 | } else if (!cluster_list_empty(&si->discard_clusters)) { |
---|
576 | 628 | /* |
---|
577 | 629 | * we don't have free cluster but have some clusters in |
---|
578 | | - * discarding, do discard now and reclaim them |
---|
| 630 | + * discarding, do discard now and reclaim them, then |
---|
| 631 | + * reread cluster_next_cpu since we dropped si->lock |
---|
579 | 632 | */ |
---|
580 | 633 | swap_do_scheduled_discard(si); |
---|
581 | | - *scan_base = *offset = si->cluster_next; |
---|
| 634 | + *scan_base = this_cpu_read(*si->cluster_next_cpu); |
---|
| 635 | + *offset = *scan_base; |
---|
582 | 636 | goto new_cluster; |
---|
583 | 637 | } else |
---|
584 | 638 | return false; |
---|
585 | 639 | } |
---|
586 | | - |
---|
587 | | - found_free = false; |
---|
588 | 640 | |
---|
589 | 641 | /* |
---|
590 | 642 | * Other CPUs can use our cluster if they can't find a free cluster, |
---|
.. | .. |
---|
593 | 645 | tmp = cluster->next; |
---|
594 | 646 | max = min_t(unsigned long, si->max, |
---|
595 | 647 | (cluster_next(&cluster->index) + 1) * SWAPFILE_CLUSTER); |
---|
596 | | - if (tmp >= max) { |
---|
597 | | - cluster_set_null(&cluster->index); |
---|
598 | | - goto new_cluster; |
---|
599 | | - } |
---|
600 | | - ci = lock_cluster(si, tmp); |
---|
601 | | - while (tmp < max) { |
---|
602 | | - if (!si->swap_map[tmp]) { |
---|
603 | | - found_free = true; |
---|
604 | | - break; |
---|
| 648 | + if (tmp < max) { |
---|
| 649 | + ci = lock_cluster(si, tmp); |
---|
| 650 | + while (tmp < max) { |
---|
| 651 | + if (!si->swap_map[tmp]) |
---|
| 652 | + break; |
---|
| 653 | + tmp++; |
---|
605 | 654 | } |
---|
606 | | - tmp++; |
---|
| 655 | + unlock_cluster(ci); |
---|
607 | 656 | } |
---|
608 | | - unlock_cluster(ci); |
---|
609 | | - if (!found_free) { |
---|
| 657 | + if (tmp >= max) { |
---|
610 | 658 | cluster_set_null(&cluster->index); |
---|
611 | 659 | goto new_cluster; |
---|
612 | 660 | } |
---|
613 | 661 | cluster->next = tmp + 1; |
---|
614 | 662 | *offset = tmp; |
---|
615 | 663 | *scan_base = tmp; |
---|
616 | | - return found_free; |
---|
| 664 | + return true; |
---|
617 | 665 | } |
---|
618 | 666 | |
---|
619 | 667 | static void __del_from_avail_list(struct swap_info_struct *p) |
---|
.. | .. |
---|
639 | 687 | if (offset == si->lowest_bit) |
---|
640 | 688 | si->lowest_bit += nr_entries; |
---|
641 | 689 | if (end == si->highest_bit) |
---|
642 | | - si->highest_bit -= nr_entries; |
---|
| 690 | + WRITE_ONCE(si->highest_bit, si->highest_bit - nr_entries); |
---|
643 | 691 | si->inuse_pages += nr_entries; |
---|
644 | 692 | if (si->inuse_pages == si->pages) { |
---|
645 | 693 | si->lowest_bit = si->max; |
---|
.. | .. |
---|
663 | 711 | static void swap_range_free(struct swap_info_struct *si, unsigned long offset, |
---|
664 | 712 | unsigned int nr_entries) |
---|
665 | 713 | { |
---|
| 714 | + unsigned long begin = offset; |
---|
666 | 715 | unsigned long end = offset + nr_entries - 1; |
---|
667 | 716 | void (*swap_slot_free_notify)(struct block_device *, unsigned long); |
---|
| 717 | + bool skip = false; |
---|
668 | 718 | |
---|
669 | 719 | if (offset < si->lowest_bit) |
---|
670 | 720 | si->lowest_bit = offset; |
---|
671 | 721 | if (end > si->highest_bit) { |
---|
672 | 722 | bool was_full = !si->highest_bit; |
---|
673 | 723 | |
---|
674 | | - si->highest_bit = end; |
---|
| 724 | + WRITE_ONCE(si->highest_bit, end); |
---|
675 | 725 | if (was_full && (si->flags & SWP_WRITEOK)) |
---|
676 | 726 | add_to_avail_list(si); |
---|
677 | 727 | } |
---|
678 | | - atomic_long_add(nr_entries, &nr_swap_pages); |
---|
| 728 | + trace_android_vh_account_swap_pages(si, &skip); |
---|
| 729 | + if (!skip) |
---|
| 730 | + atomic_long_add(nr_entries, &nr_swap_pages); |
---|
679 | 731 | si->inuse_pages -= nr_entries; |
---|
680 | 732 | if (si->flags & SWP_BLKDEV) |
---|
681 | 733 | swap_slot_free_notify = |
---|
.. | .. |
---|
683 | 735 | else |
---|
684 | 736 | swap_slot_free_notify = NULL; |
---|
685 | 737 | while (offset <= end) { |
---|
| 738 | + arch_swap_invalidate_page(si->type, offset); |
---|
686 | 739 | frontswap_invalidate_page(si->type, offset); |
---|
687 | 740 | if (swap_slot_free_notify) |
---|
688 | 741 | swap_slot_free_notify(si->bdev, offset); |
---|
689 | 742 | offset++; |
---|
690 | 743 | } |
---|
| 744 | + clear_shadow_from_swap_cache(si->type, begin, end); |
---|
691 | 745 | } |
---|
692 | 746 | |
---|
693 | | -static int scan_swap_map_slots(struct swap_info_struct *si, |
---|
| 747 | +static void set_cluster_next(struct swap_info_struct *si, unsigned long next) |
---|
| 748 | +{ |
---|
| 749 | + unsigned long prev; |
---|
| 750 | + |
---|
| 751 | + if (!(si->flags & SWP_SOLIDSTATE)) { |
---|
| 752 | + si->cluster_next = next; |
---|
| 753 | + return; |
---|
| 754 | + } |
---|
| 755 | + |
---|
| 756 | + prev = this_cpu_read(*si->cluster_next_cpu); |
---|
| 757 | + /* |
---|
| 758 | + * Cross the swap address space size aligned trunk, choose |
---|
| 759 | + * another trunk randomly to avoid lock contention on swap |
---|
| 760 | + * address space if possible. |
---|
| 761 | + */ |
---|
| 762 | + if ((prev >> SWAP_ADDRESS_SPACE_SHIFT) != |
---|
| 763 | + (next >> SWAP_ADDRESS_SPACE_SHIFT)) { |
---|
| 764 | + /* No free swap slots available */ |
---|
| 765 | + if (si->highest_bit <= si->lowest_bit) |
---|
| 766 | + return; |
---|
| 767 | + next = si->lowest_bit + |
---|
| 768 | + prandom_u32_max(si->highest_bit - si->lowest_bit + 1); |
---|
| 769 | + next = ALIGN_DOWN(next, SWAP_ADDRESS_SPACE_PAGES); |
---|
| 770 | + next = max_t(unsigned int, next, si->lowest_bit); |
---|
| 771 | + } |
---|
| 772 | + this_cpu_write(*si->cluster_next_cpu, next); |
---|
| 773 | +} |
---|
| 774 | + |
---|
| 775 | +int scan_swap_map_slots(struct swap_info_struct *si, |
---|
694 | 776 | unsigned char usage, int nr, |
---|
695 | 777 | swp_entry_t slots[]) |
---|
696 | 778 | { |
---|
.. | .. |
---|
700 | 782 | unsigned long last_in_cluster = 0; |
---|
701 | 783 | int latency_ration = LATENCY_LIMIT; |
---|
702 | 784 | int n_ret = 0; |
---|
703 | | - |
---|
704 | | - if (nr > SWAP_BATCH) |
---|
705 | | - nr = SWAP_BATCH; |
---|
| 785 | + bool scanned_many = false; |
---|
706 | 786 | |
---|
707 | 787 | /* |
---|
708 | 788 | * We try to cluster swap pages by allocating them sequentially |
---|
.. | .. |
---|
716 | 796 | */ |
---|
717 | 797 | |
---|
718 | 798 | si->flags += SWP_SCANNING; |
---|
719 | | - scan_base = offset = si->cluster_next; |
---|
| 799 | + /* |
---|
| 800 | + * Use percpu scan base for SSD to reduce lock contention on |
---|
| 801 | + * cluster and swap cache. For HDD, sequential access is more |
---|
| 802 | + * important. |
---|
| 803 | + */ |
---|
| 804 | + if (si->flags & SWP_SOLIDSTATE) |
---|
| 805 | + scan_base = this_cpu_read(*si->cluster_next_cpu); |
---|
| 806 | + else |
---|
| 807 | + scan_base = si->cluster_next; |
---|
| 808 | + offset = scan_base; |
---|
720 | 809 | |
---|
721 | 810 | /* SSD algorithm */ |
---|
722 | 811 | if (si->cluster_info) { |
---|
723 | | - if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base)) |
---|
724 | | - goto checks; |
---|
725 | | - else |
---|
| 812 | + if (!scan_swap_map_try_ssd_cluster(si, &offset, &scan_base)) |
---|
726 | 813 | goto scan; |
---|
727 | | - } |
---|
728 | | - |
---|
729 | | - if (unlikely(!si->cluster_nr--)) { |
---|
| 814 | + } else if (unlikely(!si->cluster_nr--)) { |
---|
730 | 815 | if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER) { |
---|
731 | 816 | si->cluster_nr = SWAPFILE_CLUSTER - 1; |
---|
732 | 817 | goto checks; |
---|
.. | .. |
---|
789 | 874 | int swap_was_freed; |
---|
790 | 875 | unlock_cluster(ci); |
---|
791 | 876 | spin_unlock(&si->lock); |
---|
792 | | - swap_was_freed = __try_to_reclaim_swap(si, offset); |
---|
| 877 | + swap_was_freed = __try_to_reclaim_swap(si, offset, TTRS_ANYWAY); |
---|
793 | 878 | spin_lock(&si->lock); |
---|
794 | 879 | /* entry was freed successfully, try to use this again */ |
---|
795 | 880 | if (swap_was_freed) |
---|
.. | .. |
---|
804 | 889 | else |
---|
805 | 890 | goto done; |
---|
806 | 891 | } |
---|
807 | | - si->swap_map[offset] = usage; |
---|
| 892 | + WRITE_ONCE(si->swap_map[offset], usage); |
---|
808 | 893 | inc_cluster_info_page(si, si->cluster_info, offset); |
---|
809 | 894 | unlock_cluster(ci); |
---|
810 | 895 | |
---|
811 | 896 | swap_range_alloc(si, offset, 1); |
---|
812 | | - si->cluster_next = offset + 1; |
---|
813 | 897 | slots[n_ret++] = swp_entry(si->type, offset); |
---|
814 | 898 | |
---|
815 | 899 | /* got enough slots or reach max slots? */ |
---|
.. | .. |
---|
832 | 916 | if (si->cluster_info) { |
---|
833 | 917 | if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base)) |
---|
834 | 918 | goto checks; |
---|
835 | | - else |
---|
836 | | - goto done; |
---|
837 | | - } |
---|
838 | | - /* non-ssd case */ |
---|
839 | | - ++offset; |
---|
840 | | - |
---|
841 | | - /* non-ssd case, still more slots in cluster? */ |
---|
842 | | - if (si->cluster_nr && !si->swap_map[offset]) { |
---|
| 919 | + } else if (si->cluster_nr && !si->swap_map[++offset]) { |
---|
| 920 | + /* non-ssd case, still more slots in cluster? */ |
---|
843 | 921 | --si->cluster_nr; |
---|
844 | 922 | goto checks; |
---|
845 | 923 | } |
---|
846 | 924 | |
---|
| 925 | + /* |
---|
| 926 | + * Even if there's no free clusters available (fragmented), |
---|
| 927 | + * try to scan a little more quickly with lock held unless we |
---|
| 928 | + * have scanned too many slots already. |
---|
| 929 | + */ |
---|
| 930 | + if (!scanned_many) { |
---|
| 931 | + unsigned long scan_limit; |
---|
| 932 | + |
---|
| 933 | + if (offset < scan_base) |
---|
| 934 | + scan_limit = scan_base; |
---|
| 935 | + else |
---|
| 936 | + scan_limit = si->highest_bit; |
---|
| 937 | + for (; offset <= scan_limit && --latency_ration > 0; |
---|
| 938 | + offset++) { |
---|
| 939 | + if (!si->swap_map[offset]) |
---|
| 940 | + goto checks; |
---|
| 941 | + } |
---|
| 942 | + } |
---|
| 943 | + |
---|
847 | 944 | done: |
---|
| 945 | + set_cluster_next(si, offset + 1); |
---|
848 | 946 | si->flags -= SWP_SCANNING; |
---|
849 | 947 | return n_ret; |
---|
850 | 948 | |
---|
851 | 949 | scan: |
---|
852 | 950 | spin_unlock(&si->lock); |
---|
853 | | - while (++offset <= si->highest_bit) { |
---|
854 | | - if (!si->swap_map[offset]) { |
---|
| 951 | + while (++offset <= READ_ONCE(si->highest_bit)) { |
---|
| 952 | + if (data_race(!si->swap_map[offset])) { |
---|
855 | 953 | spin_lock(&si->lock); |
---|
856 | 954 | goto checks; |
---|
857 | 955 | } |
---|
858 | | - if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) { |
---|
| 956 | + if (vm_swap_full() && |
---|
| 957 | + READ_ONCE(si->swap_map[offset]) == SWAP_HAS_CACHE) { |
---|
859 | 958 | spin_lock(&si->lock); |
---|
860 | 959 | goto checks; |
---|
861 | 960 | } |
---|
862 | 961 | if (unlikely(--latency_ration < 0)) { |
---|
863 | 962 | cond_resched(); |
---|
864 | 963 | latency_ration = LATENCY_LIMIT; |
---|
| 964 | + scanned_many = true; |
---|
865 | 965 | } |
---|
866 | 966 | } |
---|
867 | 967 | offset = si->lowest_bit; |
---|
868 | 968 | while (offset < scan_base) { |
---|
869 | | - if (!si->swap_map[offset]) { |
---|
| 969 | + if (data_race(!si->swap_map[offset])) { |
---|
870 | 970 | spin_lock(&si->lock); |
---|
871 | 971 | goto checks; |
---|
872 | 972 | } |
---|
873 | | - if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) { |
---|
| 973 | + if (vm_swap_full() && |
---|
| 974 | + READ_ONCE(si->swap_map[offset]) == SWAP_HAS_CACHE) { |
---|
874 | 975 | spin_lock(&si->lock); |
---|
875 | 976 | goto checks; |
---|
876 | 977 | } |
---|
877 | 978 | if (unlikely(--latency_ration < 0)) { |
---|
878 | 979 | cond_resched(); |
---|
879 | 980 | latency_ration = LATENCY_LIMIT; |
---|
| 981 | + scanned_many = true; |
---|
880 | 982 | } |
---|
881 | 983 | offset++; |
---|
882 | 984 | } |
---|
.. | .. |
---|
886 | 988 | si->flags -= SWP_SCANNING; |
---|
887 | 989 | return n_ret; |
---|
888 | 990 | } |
---|
| 991 | +EXPORT_SYMBOL_GPL(scan_swap_map_slots); |
---|
889 | 992 | |
---|
890 | | -static int swap_alloc_cluster(struct swap_info_struct *si, swp_entry_t *slot) |
---|
| 993 | +int swap_alloc_cluster(struct swap_info_struct *si, swp_entry_t *slot) |
---|
891 | 994 | { |
---|
892 | 995 | unsigned long idx; |
---|
893 | 996 | struct swap_cluster_info *ci; |
---|
.. | .. |
---|
921 | 1024 | |
---|
922 | 1025 | return 1; |
---|
923 | 1026 | } |
---|
| 1027 | +EXPORT_SYMBOL_GPL(swap_alloc_cluster); |
---|
924 | 1028 | |
---|
925 | 1029 | static void swap_free_cluster(struct swap_info_struct *si, unsigned long idx) |
---|
926 | 1030 | { |
---|
.. | .. |
---|
928 | 1032 | struct swap_cluster_info *ci; |
---|
929 | 1033 | |
---|
930 | 1034 | ci = lock_cluster(si, offset); |
---|
| 1035 | + memset(si->swap_map + offset, 0, SWAPFILE_CLUSTER); |
---|
931 | 1036 | cluster_set_count_flag(ci, 0, 0); |
---|
932 | 1037 | free_cluster(si, idx); |
---|
933 | 1038 | unlock_cluster(ci); |
---|
.. | .. |
---|
960 | 1065 | /* Only single cluster request supported */ |
---|
961 | 1066 | WARN_ON_ONCE(n_goal > 1 && size == SWAPFILE_CLUSTER); |
---|
962 | 1067 | |
---|
| 1068 | + spin_lock(&swap_avail_lock); |
---|
| 1069 | + |
---|
963 | 1070 | avail_pgs = atomic_long_read(&nr_swap_pages) / size; |
---|
964 | | - if (avail_pgs <= 0) |
---|
| 1071 | + if (avail_pgs <= 0) { |
---|
| 1072 | + spin_unlock(&swap_avail_lock); |
---|
965 | 1073 | goto noswap; |
---|
| 1074 | + } |
---|
966 | 1075 | |
---|
967 | | - if (n_goal > SWAP_BATCH) |
---|
968 | | - n_goal = SWAP_BATCH; |
---|
969 | | - |
---|
970 | | - if (n_goal > avail_pgs) |
---|
971 | | - n_goal = avail_pgs; |
---|
| 1076 | + n_goal = min3((long)n_goal, (long)SWAP_BATCH, avail_pgs); |
---|
972 | 1077 | |
---|
973 | 1078 | atomic_long_sub(n_goal * size, &nr_swap_pages); |
---|
974 | | - |
---|
975 | | - spin_lock(&swap_avail_lock); |
---|
976 | 1079 | |
---|
977 | 1080 | start_over: |
---|
978 | 1081 | node = numa_node_id(); |
---|
.. | .. |
---|
1041 | 1144 | { |
---|
1042 | 1145 | struct swap_info_struct *si = swap_type_to_swap_info(type); |
---|
1043 | 1146 | pgoff_t offset; |
---|
| 1147 | + bool skip = false; |
---|
1044 | 1148 | |
---|
1045 | 1149 | if (!si) |
---|
1046 | 1150 | goto fail; |
---|
1047 | 1151 | |
---|
1048 | 1152 | spin_lock(&si->lock); |
---|
1049 | 1153 | if (si->flags & SWP_WRITEOK) { |
---|
1050 | | - atomic_long_dec(&nr_swap_pages); |
---|
1051 | 1154 | /* This is called for allocating swap entry, not cache */ |
---|
1052 | 1155 | offset = scan_swap_map(si, 1); |
---|
1053 | 1156 | if (offset) { |
---|
| 1157 | + trace_android_vh_account_swap_pages(si, &skip); |
---|
| 1158 | + if (!skip) |
---|
| 1159 | + atomic_long_dec(&nr_swap_pages); |
---|
1054 | 1160 | spin_unlock(&si->lock); |
---|
1055 | 1161 | return swp_entry(type, offset); |
---|
1056 | 1162 | } |
---|
1057 | | - atomic_long_inc(&nr_swap_pages); |
---|
1058 | 1163 | } |
---|
1059 | 1164 | spin_unlock(&si->lock); |
---|
1060 | 1165 | fail: |
---|
.. | .. |
---|
1064 | 1169 | static struct swap_info_struct *__swap_info_get(swp_entry_t entry) |
---|
1065 | 1170 | { |
---|
1066 | 1171 | struct swap_info_struct *p; |
---|
1067 | | - unsigned long offset, type; |
---|
| 1172 | + unsigned long offset; |
---|
1068 | 1173 | |
---|
1069 | 1174 | if (!entry.val) |
---|
1070 | 1175 | goto out; |
---|
1071 | | - type = swp_type(entry); |
---|
1072 | | - p = swap_type_to_swap_info(type); |
---|
| 1176 | + p = swp_swap_info(entry); |
---|
1073 | 1177 | if (!p) |
---|
1074 | 1178 | goto bad_nofile; |
---|
1075 | | - if (!(p->flags & SWP_USED)) |
---|
| 1179 | + if (data_race(!(p->flags & SWP_USED))) |
---|
1076 | 1180 | goto bad_device; |
---|
1077 | 1181 | offset = swp_offset(entry); |
---|
1078 | 1182 | if (offset >= p->max) |
---|
.. | .. |
---|
1098 | 1202 | p = __swap_info_get(entry); |
---|
1099 | 1203 | if (!p) |
---|
1100 | 1204 | goto out; |
---|
1101 | | - if (!p->swap_map[swp_offset(entry)]) |
---|
| 1205 | + if (data_race(!p->swap_map[swp_offset(entry)])) |
---|
1102 | 1206 | goto bad_free; |
---|
1103 | 1207 | return p; |
---|
1104 | 1208 | |
---|
1105 | 1209 | bad_free: |
---|
1106 | 1210 | pr_err("swap_info_get: %s%08lx\n", Unused_offset, entry.val); |
---|
1107 | | - goto out; |
---|
1108 | 1211 | out: |
---|
1109 | 1212 | return NULL; |
---|
1110 | 1213 | } |
---|
.. | .. |
---|
1167 | 1270 | } |
---|
1168 | 1271 | |
---|
1169 | 1272 | usage = count | has_cache; |
---|
1170 | | - p->swap_map[offset] = usage ? : SWAP_HAS_CACHE; |
---|
| 1273 | + if (usage) |
---|
| 1274 | + WRITE_ONCE(p->swap_map[offset], usage); |
---|
| 1275 | + else |
---|
| 1276 | + WRITE_ONCE(p->swap_map[offset], SWAP_HAS_CACHE); |
---|
1171 | 1277 | |
---|
1172 | 1278 | return usage; |
---|
1173 | 1279 | } |
---|
1174 | 1280 | |
---|
| 1281 | +/* |
---|
| 1282 | + * Check whether swap entry is valid in the swap device. If so, |
---|
| 1283 | + * return pointer to swap_info_struct, and keep the swap entry valid |
---|
| 1284 | + * via preventing the swap device from being swapoff, until |
---|
| 1285 | + * put_swap_device() is called. Otherwise return NULL. |
---|
| 1286 | + * |
---|
| 1287 | + * The entirety of the RCU read critical section must come before the |
---|
| 1288 | + * return from or after the call to synchronize_rcu() in |
---|
| 1289 | + * enable_swap_info() or swapoff(). So if "si->flags & SWP_VALID" is |
---|
| 1290 | + * true, the si->map, si->cluster_info, etc. must be valid in the |
---|
| 1291 | + * critical section. |
---|
| 1292 | + * |
---|
| 1293 | + * Notice that swapoff or swapoff+swapon can still happen before the |
---|
| 1294 | + * rcu_read_lock() in get_swap_device() or after the rcu_read_unlock() |
---|
| 1295 | + * in put_swap_device() if there isn't any other way to prevent |
---|
| 1296 | + * swapoff, such as page lock, page table lock, etc. The caller must |
---|
| 1297 | + * be prepared for that. For example, the following situation is |
---|
| 1298 | + * possible. |
---|
| 1299 | + * |
---|
| 1300 | + * CPU1 CPU2 |
---|
| 1301 | + * do_swap_page() |
---|
| 1302 | + * ... swapoff+swapon |
---|
| 1303 | + * __read_swap_cache_async() |
---|
| 1304 | + * swapcache_prepare() |
---|
| 1305 | + * __swap_duplicate() |
---|
| 1306 | + * // check swap_map |
---|
| 1307 | + * // verify PTE not changed |
---|
| 1308 | + * |
---|
| 1309 | + * In __swap_duplicate(), the swap_map need to be checked before |
---|
| 1310 | + * changing partly because the specified swap entry may be for another |
---|
| 1311 | + * swap device which has been swapoff. And in do_swap_page(), after |
---|
| 1312 | + * the page is read from the swap device, the PTE is verified not |
---|
| 1313 | + * changed with the page table locked to check whether the swap device |
---|
| 1314 | + * has been swapoff or swapoff+swapon. |
---|
| 1315 | + */ |
---|
| 1316 | +struct swap_info_struct *get_swap_device(swp_entry_t entry) |
---|
| 1317 | +{ |
---|
| 1318 | + struct swap_info_struct *si; |
---|
| 1319 | + unsigned long offset; |
---|
| 1320 | + |
---|
| 1321 | + if (!entry.val) |
---|
| 1322 | + goto out; |
---|
| 1323 | + si = swp_swap_info(entry); |
---|
| 1324 | + if (!si) |
---|
| 1325 | + goto bad_nofile; |
---|
| 1326 | + |
---|
| 1327 | + rcu_read_lock(); |
---|
| 1328 | + if (data_race(!(si->flags & SWP_VALID))) |
---|
| 1329 | + goto unlock_out; |
---|
| 1330 | + offset = swp_offset(entry); |
---|
| 1331 | + if (offset >= si->max) |
---|
| 1332 | + goto unlock_out; |
---|
| 1333 | + |
---|
| 1334 | + return si; |
---|
| 1335 | +bad_nofile: |
---|
| 1336 | + pr_err("%s: %s%08lx\n", __func__, Bad_file, entry.val); |
---|
| 1337 | +out: |
---|
| 1338 | + return NULL; |
---|
| 1339 | +unlock_out: |
---|
| 1340 | + rcu_read_unlock(); |
---|
| 1341 | + return NULL; |
---|
| 1342 | +} |
---|
| 1343 | + |
---|
1175 | 1344 | static unsigned char __swap_entry_free(struct swap_info_struct *p, |
---|
1176 | | - swp_entry_t entry, unsigned char usage) |
---|
| 1345 | + swp_entry_t entry) |
---|
1177 | 1346 | { |
---|
1178 | 1347 | struct swap_cluster_info *ci; |
---|
1179 | 1348 | unsigned long offset = swp_offset(entry); |
---|
| 1349 | + unsigned char usage; |
---|
1180 | 1350 | |
---|
1181 | 1351 | ci = lock_cluster_or_swap_info(p, offset); |
---|
1182 | | - usage = __swap_entry_free_locked(p, offset, usage); |
---|
| 1352 | + usage = __swap_entry_free_locked(p, offset, 1); |
---|
1183 | 1353 | unlock_cluster_or_swap_info(p, ci); |
---|
| 1354 | + if (!usage) |
---|
| 1355 | + free_swap_slot(entry); |
---|
1184 | 1356 | |
---|
1185 | 1357 | return usage; |
---|
1186 | 1358 | } |
---|
.. | .. |
---|
1211 | 1383 | struct swap_info_struct *p; |
---|
1212 | 1384 | |
---|
1213 | 1385 | p = _swap_info_get(entry); |
---|
1214 | | - if (p) { |
---|
1215 | | - if (!__swap_entry_free(p, entry, 1)) |
---|
1216 | | - free_swap_slot(entry); |
---|
1217 | | - } |
---|
| 1386 | + if (p) |
---|
| 1387 | + __swap_entry_free(p, entry); |
---|
1218 | 1388 | } |
---|
1219 | 1389 | |
---|
1220 | 1390 | /* |
---|
.. | .. |
---|
1229 | 1399 | unsigned char *map; |
---|
1230 | 1400 | unsigned int i, free_entries = 0; |
---|
1231 | 1401 | unsigned char val; |
---|
1232 | | - int size = swap_entry_size(hpage_nr_pages(page)); |
---|
| 1402 | + int size = swap_entry_size(thp_nr_pages(page)); |
---|
1233 | 1403 | |
---|
1234 | 1404 | si = _swap_info_get(entry); |
---|
1235 | 1405 | if (!si) |
---|
.. | .. |
---|
1249 | 1419 | if (free_entries == SWAPFILE_CLUSTER) { |
---|
1250 | 1420 | unlock_cluster_or_swap_info(si, ci); |
---|
1251 | 1421 | spin_lock(&si->lock); |
---|
1252 | | - ci = lock_cluster(si, offset); |
---|
1253 | | - memset(map, 0, SWAPFILE_CLUSTER); |
---|
1254 | | - unlock_cluster(ci); |
---|
1255 | 1422 | mem_cgroup_uncharge_swap(entry, SWAPFILE_CLUSTER); |
---|
1256 | 1423 | swap_free_cluster(si, idx); |
---|
1257 | 1424 | spin_unlock(&si->lock); |
---|
.. | .. |
---|
1321 | 1488 | if (p) |
---|
1322 | 1489 | spin_unlock(&p->lock); |
---|
1323 | 1490 | } |
---|
| 1491 | +EXPORT_SYMBOL_GPL(swapcache_free_entries); |
---|
1324 | 1492 | |
---|
1325 | 1493 | /* |
---|
1326 | 1494 | * How many references to page are currently swapped out? |
---|
.. | .. |
---|
1346 | 1514 | return count; |
---|
1347 | 1515 | } |
---|
1348 | 1516 | |
---|
1349 | | -int __swap_count(struct swap_info_struct *si, swp_entry_t entry) |
---|
| 1517 | +int __swap_count(swp_entry_t entry) |
---|
1350 | 1518 | { |
---|
| 1519 | + struct swap_info_struct *si; |
---|
1351 | 1520 | pgoff_t offset = swp_offset(entry); |
---|
| 1521 | + int count = 0; |
---|
1352 | 1522 | |
---|
1353 | | - return swap_count(si->swap_map[offset]); |
---|
| 1523 | + si = get_swap_device(entry); |
---|
| 1524 | + if (si) { |
---|
| 1525 | + count = swap_count(si->swap_map[offset]); |
---|
| 1526 | + put_swap_device(si); |
---|
| 1527 | + } |
---|
| 1528 | + return count; |
---|
1354 | 1529 | } |
---|
1355 | 1530 | |
---|
1356 | 1531 | static int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry) |
---|
.. | .. |
---|
1375 | 1550 | int count = 0; |
---|
1376 | 1551 | struct swap_info_struct *si; |
---|
1377 | 1552 | |
---|
1378 | | - si = __swap_info_get(entry); |
---|
1379 | | - if (si) |
---|
| 1553 | + si = get_swap_device(entry); |
---|
| 1554 | + if (si) { |
---|
1380 | 1555 | count = swap_swapcount(si, entry); |
---|
| 1556 | + put_swap_device(si); |
---|
| 1557 | + } |
---|
1381 | 1558 | return count; |
---|
1382 | 1559 | } |
---|
1383 | 1560 | |
---|
.. | .. |
---|
1624 | 1801 | int free_swap_and_cache(swp_entry_t entry) |
---|
1625 | 1802 | { |
---|
1626 | 1803 | struct swap_info_struct *p; |
---|
1627 | | - struct page *page = NULL; |
---|
1628 | 1804 | unsigned char count; |
---|
1629 | 1805 | |
---|
1630 | 1806 | if (non_swap_entry(entry)) |
---|
.. | .. |
---|
1632 | 1808 | |
---|
1633 | 1809 | p = _swap_info_get(entry); |
---|
1634 | 1810 | if (p) { |
---|
1635 | | - count = __swap_entry_free(p, entry, 1); |
---|
| 1811 | + count = __swap_entry_free(p, entry); |
---|
1636 | 1812 | if (count == SWAP_HAS_CACHE && |
---|
1637 | | - !swap_page_trans_huge_swapped(p, entry)) { |
---|
1638 | | - page = find_get_page(swap_address_space(entry), |
---|
1639 | | - swp_offset(entry)); |
---|
1640 | | - if (page && !trylock_page(page)) { |
---|
1641 | | - put_page(page); |
---|
1642 | | - page = NULL; |
---|
1643 | | - } |
---|
1644 | | - } else if (!count) |
---|
1645 | | - free_swap_slot(entry); |
---|
1646 | | - } |
---|
1647 | | - if (page) { |
---|
1648 | | - /* |
---|
1649 | | - * Not mapped elsewhere, or swap space full? Free it! |
---|
1650 | | - * Also recheck PageSwapCache now page is locked (above). |
---|
1651 | | - */ |
---|
1652 | | - if (PageSwapCache(page) && !PageWriteback(page) && |
---|
1653 | | - (!page_mapped(page) || mem_cgroup_swap_full(page)) && |
---|
1654 | | - !swap_page_trans_huge_swapped(p, entry)) { |
---|
1655 | | - page = compound_head(page); |
---|
1656 | | - delete_from_swap_cache(page); |
---|
1657 | | - SetPageDirty(page); |
---|
1658 | | - } |
---|
1659 | | - unlock_page(page); |
---|
1660 | | - put_page(page); |
---|
| 1813 | + !swap_page_trans_huge_swapped(p, entry)) |
---|
| 1814 | + __try_to_reclaim_swap(p, swp_offset(entry), |
---|
| 1815 | + TTRS_UNMAPPED | TTRS_FULL); |
---|
1661 | 1816 | } |
---|
1662 | 1817 | return p != NULL; |
---|
1663 | 1818 | } |
---|
.. | .. |
---|
1671 | 1826 | * |
---|
1672 | 1827 | * This is needed for the suspend to disk (aka swsusp). |
---|
1673 | 1828 | */ |
---|
1674 | | -int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p) |
---|
| 1829 | +int swap_type_of(dev_t device, sector_t offset) |
---|
1675 | 1830 | { |
---|
1676 | | - struct block_device *bdev = NULL; |
---|
1677 | 1831 | int type; |
---|
1678 | 1832 | |
---|
1679 | | - if (device) |
---|
1680 | | - bdev = bdget(device); |
---|
| 1833 | + if (!device) |
---|
| 1834 | + return -1; |
---|
1681 | 1835 | |
---|
1682 | 1836 | spin_lock(&swap_lock); |
---|
1683 | 1837 | for (type = 0; type < nr_swapfiles; type++) { |
---|
.. | .. |
---|
1686 | 1840 | if (!(sis->flags & SWP_WRITEOK)) |
---|
1687 | 1841 | continue; |
---|
1688 | 1842 | |
---|
1689 | | - if (!bdev) { |
---|
1690 | | - if (bdev_p) |
---|
1691 | | - *bdev_p = bdgrab(sis->bdev); |
---|
1692 | | - |
---|
1693 | | - spin_unlock(&swap_lock); |
---|
1694 | | - return type; |
---|
1695 | | - } |
---|
1696 | | - if (bdev == sis->bdev) { |
---|
1697 | | - struct swap_extent *se = &sis->first_swap_extent; |
---|
| 1843 | + if (device == sis->bdev->bd_dev) { |
---|
| 1844 | + struct swap_extent *se = first_se(sis); |
---|
1698 | 1845 | |
---|
1699 | 1846 | if (se->start_block == offset) { |
---|
1700 | | - if (bdev_p) |
---|
1701 | | - *bdev_p = bdgrab(sis->bdev); |
---|
1702 | | - |
---|
1703 | 1847 | spin_unlock(&swap_lock); |
---|
1704 | | - bdput(bdev); |
---|
1705 | 1848 | return type; |
---|
1706 | 1849 | } |
---|
1707 | 1850 | } |
---|
1708 | 1851 | } |
---|
1709 | 1852 | spin_unlock(&swap_lock); |
---|
1710 | | - if (bdev) |
---|
1711 | | - bdput(bdev); |
---|
| 1853 | + return -ENODEV; |
---|
| 1854 | +} |
---|
1712 | 1855 | |
---|
| 1856 | +int find_first_swap(dev_t *device) |
---|
| 1857 | +{ |
---|
| 1858 | + int type; |
---|
| 1859 | + |
---|
| 1860 | + spin_lock(&swap_lock); |
---|
| 1861 | + for (type = 0; type < nr_swapfiles; type++) { |
---|
| 1862 | + struct swap_info_struct *sis = swap_info[type]; |
---|
| 1863 | + |
---|
| 1864 | + if (!(sis->flags & SWP_WRITEOK)) |
---|
| 1865 | + continue; |
---|
| 1866 | + *device = sis->bdev->bd_dev; |
---|
| 1867 | + spin_unlock(&swap_lock); |
---|
| 1868 | + return type; |
---|
| 1869 | + } |
---|
| 1870 | + spin_unlock(&swap_lock); |
---|
1713 | 1871 | return -ENODEV; |
---|
1714 | 1872 | } |
---|
1715 | 1873 | |
---|
.. | .. |
---|
1756 | 1914 | |
---|
1757 | 1915 | static inline int pte_same_as_swp(pte_t pte, pte_t swp_pte) |
---|
1758 | 1916 | { |
---|
1759 | | - return pte_same(pte_swp_clear_soft_dirty(pte), swp_pte); |
---|
| 1917 | + return pte_same(pte_swp_clear_flags(pte), swp_pte); |
---|
1760 | 1918 | } |
---|
1761 | 1919 | |
---|
1762 | 1920 | /* |
---|
.. | .. |
---|
1768 | 1926 | unsigned long addr, swp_entry_t entry, struct page *page) |
---|
1769 | 1927 | { |
---|
1770 | 1928 | struct page *swapcache; |
---|
1771 | | - struct mem_cgroup *memcg; |
---|
1772 | 1929 | spinlock_t *ptl; |
---|
1773 | 1930 | pte_t *pte; |
---|
1774 | 1931 | int ret = 1; |
---|
.. | .. |
---|
1778 | 1935 | if (unlikely(!page)) |
---|
1779 | 1936 | return -ENOMEM; |
---|
1780 | 1937 | |
---|
1781 | | - if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, |
---|
1782 | | - &memcg, false)) { |
---|
1783 | | - ret = -ENOMEM; |
---|
1784 | | - goto out_nolock; |
---|
1785 | | - } |
---|
1786 | | - |
---|
1787 | 1938 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); |
---|
1788 | 1939 | if (unlikely(!pte_same_as_swp(*pte, swp_entry_to_pte(entry)))) { |
---|
1789 | | - mem_cgroup_cancel_charge(page, memcg, false); |
---|
1790 | 1940 | ret = 0; |
---|
1791 | 1941 | goto out; |
---|
1792 | 1942 | } |
---|
.. | .. |
---|
1798 | 1948 | pte_mkold(mk_pte(page, vma->vm_page_prot))); |
---|
1799 | 1949 | if (page == swapcache) { |
---|
1800 | 1950 | page_add_anon_rmap(page, vma, addr, false); |
---|
1801 | | - mem_cgroup_commit_charge(page, memcg, true, false); |
---|
1802 | 1951 | } else { /* ksm created a completely new copy */ |
---|
1803 | 1952 | page_add_new_anon_rmap(page, vma, addr, false); |
---|
1804 | | - mem_cgroup_commit_charge(page, memcg, false, false); |
---|
1805 | | - lru_cache_add_active_or_unevictable(page, vma); |
---|
| 1953 | + lru_cache_add_inactive_or_unevictable(page, vma); |
---|
1806 | 1954 | } |
---|
1807 | 1955 | swap_free(entry); |
---|
1808 | | - /* |
---|
1809 | | - * Move the page to the active list so it is not |
---|
1810 | | - * immediately swapped out again after swapon. |
---|
1811 | | - */ |
---|
1812 | | - activate_page(page); |
---|
1813 | 1956 | out: |
---|
1814 | 1957 | pte_unmap_unlock(pte, ptl); |
---|
1815 | | -out_nolock: |
---|
1816 | 1958 | if (page != swapcache) { |
---|
1817 | 1959 | unlock_page(page); |
---|
1818 | 1960 | put_page(page); |
---|
.. | .. |
---|
1821 | 1963 | } |
---|
1822 | 1964 | |
---|
1823 | 1965 | static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, |
---|
1824 | | - unsigned long addr, unsigned long end, |
---|
1825 | | - swp_entry_t entry, struct page *page) |
---|
| 1966 | + unsigned long addr, unsigned long end, |
---|
| 1967 | + unsigned int type, bool frontswap, |
---|
| 1968 | + unsigned long *fs_pages_to_unuse) |
---|
1826 | 1969 | { |
---|
1827 | | - pte_t swp_pte = swp_entry_to_pte(entry); |
---|
| 1970 | + struct page *page; |
---|
| 1971 | + swp_entry_t entry; |
---|
1828 | 1972 | pte_t *pte; |
---|
| 1973 | + struct swap_info_struct *si; |
---|
| 1974 | + unsigned long offset; |
---|
1829 | 1975 | int ret = 0; |
---|
| 1976 | + volatile unsigned char *swap_map; |
---|
1830 | 1977 | |
---|
1831 | | - /* |
---|
1832 | | - * We don't actually need pte lock while scanning for swp_pte: since |
---|
1833 | | - * we hold page lock and mmap_sem, swp_pte cannot be inserted into the |
---|
1834 | | - * page table while we're scanning; though it could get zapped, and on |
---|
1835 | | - * some architectures (e.g. x86_32 with PAE) we might catch a glimpse |
---|
1836 | | - * of unmatched parts which look like swp_pte, so unuse_pte must |
---|
1837 | | - * recheck under pte lock. Scanning without pte lock lets it be |
---|
1838 | | - * preemptable whenever CONFIG_PREEMPT but not CONFIG_HIGHPTE. |
---|
1839 | | - */ |
---|
| 1978 | + si = swap_info[type]; |
---|
1840 | 1979 | pte = pte_offset_map(pmd, addr); |
---|
1841 | 1980 | do { |
---|
1842 | | - /* |
---|
1843 | | - * swapoff spends a _lot_ of time in this loop! |
---|
1844 | | - * Test inline before going to call unuse_pte. |
---|
1845 | | - */ |
---|
1846 | | - if (unlikely(pte_same_as_swp(*pte, swp_pte))) { |
---|
1847 | | - pte_unmap(pte); |
---|
1848 | | - ret = unuse_pte(vma, pmd, addr, entry, page); |
---|
1849 | | - if (ret) |
---|
1850 | | - goto out; |
---|
1851 | | - pte = pte_offset_map(pmd, addr); |
---|
| 1981 | + if (!is_swap_pte(*pte)) |
---|
| 1982 | + continue; |
---|
| 1983 | + |
---|
| 1984 | + entry = pte_to_swp_entry(*pte); |
---|
| 1985 | + if (swp_type(entry) != type) |
---|
| 1986 | + continue; |
---|
| 1987 | + |
---|
| 1988 | + offset = swp_offset(entry); |
---|
| 1989 | + if (frontswap && !frontswap_test(si, offset)) |
---|
| 1990 | + continue; |
---|
| 1991 | + |
---|
| 1992 | + pte_unmap(pte); |
---|
| 1993 | + swap_map = &si->swap_map[offset]; |
---|
| 1994 | + page = lookup_swap_cache(entry, vma, addr); |
---|
| 1995 | + if (!page) { |
---|
| 1996 | + struct vm_fault vmf = { |
---|
| 1997 | + .vma = vma, |
---|
| 1998 | + .address = addr, |
---|
| 1999 | + .pmd = pmd, |
---|
| 2000 | + }; |
---|
| 2001 | + |
---|
| 2002 | + page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE, |
---|
| 2003 | + &vmf); |
---|
1852 | 2004 | } |
---|
| 2005 | + if (!page) { |
---|
| 2006 | + if (*swap_map == 0 || *swap_map == SWAP_MAP_BAD) |
---|
| 2007 | + goto try_next; |
---|
| 2008 | + return -ENOMEM; |
---|
| 2009 | + } |
---|
| 2010 | + |
---|
| 2011 | + lock_page(page); |
---|
| 2012 | + wait_on_page_writeback(page); |
---|
| 2013 | + ret = unuse_pte(vma, pmd, addr, entry, page); |
---|
| 2014 | + if (ret < 0) { |
---|
| 2015 | + unlock_page(page); |
---|
| 2016 | + put_page(page); |
---|
| 2017 | + goto out; |
---|
| 2018 | + } |
---|
| 2019 | + |
---|
| 2020 | + try_to_free_swap(page); |
---|
| 2021 | + trace_android_vh_unuse_swap_page(si, page); |
---|
| 2022 | + unlock_page(page); |
---|
| 2023 | + put_page(page); |
---|
| 2024 | + |
---|
| 2025 | + if (*fs_pages_to_unuse && !--(*fs_pages_to_unuse)) { |
---|
| 2026 | + ret = FRONTSWAP_PAGES_UNUSED; |
---|
| 2027 | + goto out; |
---|
| 2028 | + } |
---|
| 2029 | +try_next: |
---|
| 2030 | + pte = pte_offset_map(pmd, addr); |
---|
1853 | 2031 | } while (pte++, addr += PAGE_SIZE, addr != end); |
---|
1854 | 2032 | pte_unmap(pte - 1); |
---|
| 2033 | + |
---|
| 2034 | + ret = 0; |
---|
1855 | 2035 | out: |
---|
1856 | 2036 | return ret; |
---|
1857 | 2037 | } |
---|
1858 | 2038 | |
---|
1859 | 2039 | static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud, |
---|
1860 | 2040 | unsigned long addr, unsigned long end, |
---|
1861 | | - swp_entry_t entry, struct page *page) |
---|
| 2041 | + unsigned int type, bool frontswap, |
---|
| 2042 | + unsigned long *fs_pages_to_unuse) |
---|
1862 | 2043 | { |
---|
1863 | 2044 | pmd_t *pmd; |
---|
1864 | 2045 | unsigned long next; |
---|
.. | .. |
---|
1870 | 2051 | next = pmd_addr_end(addr, end); |
---|
1871 | 2052 | if (pmd_none_or_trans_huge_or_clear_bad(pmd)) |
---|
1872 | 2053 | continue; |
---|
1873 | | - ret = unuse_pte_range(vma, pmd, addr, next, entry, page); |
---|
| 2054 | + ret = unuse_pte_range(vma, pmd, addr, next, type, |
---|
| 2055 | + frontswap, fs_pages_to_unuse); |
---|
1874 | 2056 | if (ret) |
---|
1875 | 2057 | return ret; |
---|
1876 | 2058 | } while (pmd++, addr = next, addr != end); |
---|
.. | .. |
---|
1879 | 2061 | |
---|
1880 | 2062 | static inline int unuse_pud_range(struct vm_area_struct *vma, p4d_t *p4d, |
---|
1881 | 2063 | unsigned long addr, unsigned long end, |
---|
1882 | | - swp_entry_t entry, struct page *page) |
---|
| 2064 | + unsigned int type, bool frontswap, |
---|
| 2065 | + unsigned long *fs_pages_to_unuse) |
---|
1883 | 2066 | { |
---|
1884 | 2067 | pud_t *pud; |
---|
1885 | 2068 | unsigned long next; |
---|
.. | .. |
---|
1890 | 2073 | next = pud_addr_end(addr, end); |
---|
1891 | 2074 | if (pud_none_or_clear_bad(pud)) |
---|
1892 | 2075 | continue; |
---|
1893 | | - ret = unuse_pmd_range(vma, pud, addr, next, entry, page); |
---|
| 2076 | + ret = unuse_pmd_range(vma, pud, addr, next, type, |
---|
| 2077 | + frontswap, fs_pages_to_unuse); |
---|
1894 | 2078 | if (ret) |
---|
1895 | 2079 | return ret; |
---|
1896 | 2080 | } while (pud++, addr = next, addr != end); |
---|
.. | .. |
---|
1899 | 2083 | |
---|
1900 | 2084 | static inline int unuse_p4d_range(struct vm_area_struct *vma, pgd_t *pgd, |
---|
1901 | 2085 | unsigned long addr, unsigned long end, |
---|
1902 | | - swp_entry_t entry, struct page *page) |
---|
| 2086 | + unsigned int type, bool frontswap, |
---|
| 2087 | + unsigned long *fs_pages_to_unuse) |
---|
1903 | 2088 | { |
---|
1904 | 2089 | p4d_t *p4d; |
---|
1905 | 2090 | unsigned long next; |
---|
.. | .. |
---|
1910 | 2095 | next = p4d_addr_end(addr, end); |
---|
1911 | 2096 | if (p4d_none_or_clear_bad(p4d)) |
---|
1912 | 2097 | continue; |
---|
1913 | | - ret = unuse_pud_range(vma, p4d, addr, next, entry, page); |
---|
| 2098 | + ret = unuse_pud_range(vma, p4d, addr, next, type, |
---|
| 2099 | + frontswap, fs_pages_to_unuse); |
---|
1914 | 2100 | if (ret) |
---|
1915 | 2101 | return ret; |
---|
1916 | 2102 | } while (p4d++, addr = next, addr != end); |
---|
1917 | 2103 | return 0; |
---|
1918 | 2104 | } |
---|
1919 | 2105 | |
---|
1920 | | -static int unuse_vma(struct vm_area_struct *vma, |
---|
1921 | | - swp_entry_t entry, struct page *page) |
---|
| 2106 | +static int unuse_vma(struct vm_area_struct *vma, unsigned int type, |
---|
| 2107 | + bool frontswap, unsigned long *fs_pages_to_unuse) |
---|
1922 | 2108 | { |
---|
1923 | 2109 | pgd_t *pgd; |
---|
1924 | 2110 | unsigned long addr, end, next; |
---|
1925 | 2111 | int ret; |
---|
1926 | 2112 | |
---|
1927 | | - if (page_anon_vma(page)) { |
---|
1928 | | - addr = page_address_in_vma(page, vma); |
---|
1929 | | - if (addr == -EFAULT) |
---|
1930 | | - return 0; |
---|
1931 | | - else |
---|
1932 | | - end = addr + PAGE_SIZE; |
---|
1933 | | - } else { |
---|
1934 | | - addr = vma->vm_start; |
---|
1935 | | - end = vma->vm_end; |
---|
1936 | | - } |
---|
| 2113 | + addr = vma->vm_start; |
---|
| 2114 | + end = vma->vm_end; |
---|
1937 | 2115 | |
---|
1938 | 2116 | pgd = pgd_offset(vma->vm_mm, addr); |
---|
1939 | 2117 | do { |
---|
1940 | 2118 | next = pgd_addr_end(addr, end); |
---|
1941 | 2119 | if (pgd_none_or_clear_bad(pgd)) |
---|
1942 | 2120 | continue; |
---|
1943 | | - ret = unuse_p4d_range(vma, pgd, addr, next, entry, page); |
---|
| 2121 | + ret = unuse_p4d_range(vma, pgd, addr, next, type, |
---|
| 2122 | + frontswap, fs_pages_to_unuse); |
---|
1944 | 2123 | if (ret) |
---|
1945 | 2124 | return ret; |
---|
1946 | 2125 | } while (pgd++, addr = next, addr != end); |
---|
1947 | 2126 | return 0; |
---|
1948 | 2127 | } |
---|
1949 | 2128 | |
---|
1950 | | -static int unuse_mm(struct mm_struct *mm, |
---|
1951 | | - swp_entry_t entry, struct page *page) |
---|
| 2129 | +static int unuse_mm(struct mm_struct *mm, unsigned int type, |
---|
| 2130 | + bool frontswap, unsigned long *fs_pages_to_unuse) |
---|
1952 | 2131 | { |
---|
1953 | 2132 | struct vm_area_struct *vma; |
---|
1954 | 2133 | int ret = 0; |
---|
1955 | 2134 | |
---|
1956 | | - if (!down_read_trylock(&mm->mmap_sem)) { |
---|
1957 | | - /* |
---|
1958 | | - * Activate page so shrink_inactive_list is unlikely to unmap |
---|
1959 | | - * its ptes while lock is dropped, so swapoff can make progress. |
---|
1960 | | - */ |
---|
1961 | | - activate_page(page); |
---|
1962 | | - unlock_page(page); |
---|
1963 | | - down_read(&mm->mmap_sem); |
---|
1964 | | - lock_page(page); |
---|
1965 | | - } |
---|
| 2135 | + mmap_read_lock(mm); |
---|
1966 | 2136 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
---|
1967 | | - if (vma->anon_vma && (ret = unuse_vma(vma, entry, page))) |
---|
1968 | | - break; |
---|
| 2137 | + if (vma->anon_vma) { |
---|
| 2138 | + ret = unuse_vma(vma, type, frontswap, |
---|
| 2139 | + fs_pages_to_unuse); |
---|
| 2140 | + if (ret) |
---|
| 2141 | + break; |
---|
| 2142 | + } |
---|
1969 | 2143 | cond_resched(); |
---|
1970 | 2144 | } |
---|
1971 | | - up_read(&mm->mmap_sem); |
---|
1972 | | - return (ret < 0)? ret: 0; |
---|
| 2145 | + mmap_read_unlock(mm); |
---|
| 2146 | + return ret; |
---|
1973 | 2147 | } |
---|
1974 | 2148 | |
---|
1975 | 2149 | /* |
---|
1976 | 2150 | * Scan swap_map (or frontswap_map if frontswap parameter is true) |
---|
1977 | | - * from current position to next entry still in use. |
---|
1978 | | - * Recycle to start on reaching the end, returning 0 when empty. |
---|
| 2151 | + * from current position to next entry still in use. Return 0 |
---|
| 2152 | + * if there are no inuse entries after prev till end of the map. |
---|
1979 | 2153 | */ |
---|
1980 | 2154 | static unsigned int find_next_to_unuse(struct swap_info_struct *si, |
---|
1981 | 2155 | unsigned int prev, bool frontswap) |
---|
1982 | 2156 | { |
---|
1983 | | - unsigned int max = si->max; |
---|
1984 | | - unsigned int i = prev; |
---|
| 2157 | + unsigned int i; |
---|
1985 | 2158 | unsigned char count; |
---|
1986 | 2159 | |
---|
1987 | 2160 | /* |
---|
.. | .. |
---|
1990 | 2163 | * hits are okay, and sys_swapoff() has already prevented new |
---|
1991 | 2164 | * allocations from this area (while holding swap_lock). |
---|
1992 | 2165 | */ |
---|
1993 | | - for (;;) { |
---|
1994 | | - if (++i >= max) { |
---|
1995 | | - if (!prev) { |
---|
1996 | | - i = 0; |
---|
1997 | | - break; |
---|
1998 | | - } |
---|
1999 | | - /* |
---|
2000 | | - * No entries in use at top of swap_map, |
---|
2001 | | - * loop back to start and recheck there. |
---|
2002 | | - */ |
---|
2003 | | - max = prev + 1; |
---|
2004 | | - prev = 0; |
---|
2005 | | - i = 1; |
---|
2006 | | - } |
---|
| 2166 | + for (i = prev + 1; i < si->max; i++) { |
---|
2007 | 2167 | count = READ_ONCE(si->swap_map[i]); |
---|
2008 | 2168 | if (count && swap_count(count) != SWAP_MAP_BAD) |
---|
2009 | 2169 | if (!frontswap || frontswap_test(si, i)) |
---|
.. | .. |
---|
2011 | 2171 | if ((i % LATENCY_LIMIT) == 0) |
---|
2012 | 2172 | cond_resched(); |
---|
2013 | 2173 | } |
---|
| 2174 | + |
---|
| 2175 | + if (i == si->max) |
---|
| 2176 | + i = 0; |
---|
| 2177 | + |
---|
2014 | 2178 | return i; |
---|
2015 | 2179 | } |
---|
2016 | 2180 | |
---|
2017 | 2181 | /* |
---|
2018 | | - * We completely avoid races by reading each swap page in advance, |
---|
2019 | | - * and then search for the process using it. All the necessary |
---|
2020 | | - * page table adjustments can then be made atomically. |
---|
2021 | | - * |
---|
2022 | | - * if the boolean frontswap is true, only unuse pages_to_unuse pages; |
---|
| 2182 | + * If the boolean frontswap is true, only unuse pages_to_unuse pages; |
---|
2023 | 2183 | * pages_to_unuse==0 means all pages; ignored if frontswap is false |
---|
2024 | 2184 | */ |
---|
2025 | 2185 | int try_to_unuse(unsigned int type, bool frontswap, |
---|
2026 | 2186 | unsigned long pages_to_unuse) |
---|
2027 | 2187 | { |
---|
| 2188 | + struct mm_struct *prev_mm; |
---|
| 2189 | + struct mm_struct *mm; |
---|
| 2190 | + struct list_head *p; |
---|
| 2191 | + int retval = 0; |
---|
2028 | 2192 | struct swap_info_struct *si = swap_info[type]; |
---|
2029 | | - struct mm_struct *start_mm; |
---|
2030 | | - volatile unsigned char *swap_map; /* swap_map is accessed without |
---|
2031 | | - * locking. Mark it as volatile |
---|
2032 | | - * to prevent compiler doing |
---|
2033 | | - * something odd. |
---|
2034 | | - */ |
---|
2035 | | - unsigned char swcount; |
---|
2036 | 2193 | struct page *page; |
---|
2037 | 2194 | swp_entry_t entry; |
---|
2038 | | - unsigned int i = 0; |
---|
2039 | | - int retval = 0; |
---|
| 2195 | + unsigned int i; |
---|
2040 | 2196 | |
---|
2041 | | - /* |
---|
2042 | | - * When searching mms for an entry, a good strategy is to |
---|
2043 | | - * start at the first mm we freed the previous entry from |
---|
2044 | | - * (though actually we don't notice whether we or coincidence |
---|
2045 | | - * freed the entry). Initialize this start_mm with a hold. |
---|
2046 | | - * |
---|
2047 | | - * A simpler strategy would be to start at the last mm we |
---|
2048 | | - * freed the previous entry from; but that would take less |
---|
2049 | | - * advantage of mmlist ordering, which clusters forked mms |
---|
2050 | | - * together, child after parent. If we race with dup_mmap(), we |
---|
2051 | | - * prefer to resolve parent before child, lest we miss entries |
---|
2052 | | - * duplicated after we scanned child: using last mm would invert |
---|
2053 | | - * that. |
---|
2054 | | - */ |
---|
2055 | | - start_mm = &init_mm; |
---|
2056 | | - mmget(&init_mm); |
---|
| 2197 | + if (!READ_ONCE(si->inuse_pages)) |
---|
| 2198 | + return 0; |
---|
2057 | 2199 | |
---|
2058 | | - /* |
---|
2059 | | - * Keep on scanning until all entries have gone. Usually, |
---|
2060 | | - * one pass through swap_map is enough, but not necessarily: |
---|
2061 | | - * there are races when an instance of an entry might be missed. |
---|
2062 | | - */ |
---|
2063 | | - while ((i = find_next_to_unuse(si, i, frontswap)) != 0) { |
---|
2064 | | - if (signal_pending(current)) { |
---|
2065 | | - retval = -EINTR; |
---|
2066 | | - break; |
---|
2067 | | - } |
---|
| 2200 | + if (!frontswap) |
---|
| 2201 | + pages_to_unuse = 0; |
---|
2068 | 2202 | |
---|
2069 | | - /* |
---|
2070 | | - * Get a page for the entry, using the existing swap |
---|
2071 | | - * cache page if there is one. Otherwise, get a clean |
---|
2072 | | - * page and read the swap into it. |
---|
2073 | | - */ |
---|
2074 | | - swap_map = &si->swap_map[i]; |
---|
2075 | | - entry = swp_entry(type, i); |
---|
2076 | | - page = read_swap_cache_async(entry, |
---|
2077 | | - GFP_HIGHUSER_MOVABLE, NULL, 0, false); |
---|
2078 | | - if (!page) { |
---|
2079 | | - /* |
---|
2080 | | - * Either swap_duplicate() failed because entry |
---|
2081 | | - * has been freed independently, and will not be |
---|
2082 | | - * reused since sys_swapoff() already disabled |
---|
2083 | | - * allocation from here, or alloc_page() failed. |
---|
2084 | | - */ |
---|
2085 | | - swcount = *swap_map; |
---|
2086 | | - /* |
---|
2087 | | - * We don't hold lock here, so the swap entry could be |
---|
2088 | | - * SWAP_MAP_BAD (when the cluster is discarding). |
---|
2089 | | - * Instead of fail out, We can just skip the swap |
---|
2090 | | - * entry because swapoff will wait for discarding |
---|
2091 | | - * finish anyway. |
---|
2092 | | - */ |
---|
2093 | | - if (!swcount || swcount == SWAP_MAP_BAD) |
---|
2094 | | - continue; |
---|
2095 | | - retval = -ENOMEM; |
---|
2096 | | - break; |
---|
2097 | | - } |
---|
| 2203 | +retry: |
---|
| 2204 | + retval = shmem_unuse(type, frontswap, &pages_to_unuse); |
---|
| 2205 | + if (retval) |
---|
| 2206 | + goto out; |
---|
2098 | 2207 | |
---|
2099 | | - /* |
---|
2100 | | - * Don't hold on to start_mm if it looks like exiting. |
---|
2101 | | - */ |
---|
2102 | | - if (atomic_read(&start_mm->mm_users) == 1) { |
---|
2103 | | - mmput(start_mm); |
---|
2104 | | - start_mm = &init_mm; |
---|
2105 | | - mmget(&init_mm); |
---|
2106 | | - } |
---|
| 2208 | + prev_mm = &init_mm; |
---|
| 2209 | + mmget(prev_mm); |
---|
2107 | 2210 | |
---|
2108 | | - /* |
---|
2109 | | - * Wait for and lock page. When do_swap_page races with |
---|
2110 | | - * try_to_unuse, do_swap_page can handle the fault much |
---|
2111 | | - * faster than try_to_unuse can locate the entry. This |
---|
2112 | | - * apparently redundant "wait_on_page_locked" lets try_to_unuse |
---|
2113 | | - * defer to do_swap_page in such a case - in some tests, |
---|
2114 | | - * do_swap_page and try_to_unuse repeatedly compete. |
---|
2115 | | - */ |
---|
2116 | | - wait_on_page_locked(page); |
---|
2117 | | - wait_on_page_writeback(page); |
---|
2118 | | - lock_page(page); |
---|
2119 | | - wait_on_page_writeback(page); |
---|
| 2211 | + spin_lock(&mmlist_lock); |
---|
| 2212 | + p = &init_mm.mmlist; |
---|
| 2213 | + while (READ_ONCE(si->inuse_pages) && |
---|
| 2214 | + !signal_pending(current) && |
---|
| 2215 | + (p = p->next) != &init_mm.mmlist) { |
---|
2120 | 2216 | |
---|
2121 | | - /* |
---|
2122 | | - * Remove all references to entry. |
---|
2123 | | - */ |
---|
2124 | | - swcount = *swap_map; |
---|
2125 | | - if (swap_count(swcount) == SWAP_MAP_SHMEM) { |
---|
2126 | | - retval = shmem_unuse(entry, page); |
---|
2127 | | - /* page has already been unlocked and released */ |
---|
2128 | | - if (retval < 0) |
---|
2129 | | - break; |
---|
| 2217 | + mm = list_entry(p, struct mm_struct, mmlist); |
---|
| 2218 | + if (!mmget_not_zero(mm)) |
---|
2130 | 2219 | continue; |
---|
2131 | | - } |
---|
2132 | | - if (swap_count(swcount) && start_mm != &init_mm) |
---|
2133 | | - retval = unuse_mm(start_mm, entry, page); |
---|
| 2220 | + spin_unlock(&mmlist_lock); |
---|
| 2221 | + mmput(prev_mm); |
---|
| 2222 | + prev_mm = mm; |
---|
| 2223 | + retval = unuse_mm(mm, type, frontswap, &pages_to_unuse); |
---|
2134 | 2224 | |
---|
2135 | | - if (swap_count(*swap_map)) { |
---|
2136 | | - int set_start_mm = (*swap_map >= swcount); |
---|
2137 | | - struct list_head *p = &start_mm->mmlist; |
---|
2138 | | - struct mm_struct *new_start_mm = start_mm; |
---|
2139 | | - struct mm_struct *prev_mm = start_mm; |
---|
2140 | | - struct mm_struct *mm; |
---|
2141 | | - |
---|
2142 | | - mmget(new_start_mm); |
---|
2143 | | - mmget(prev_mm); |
---|
2144 | | - spin_lock(&mmlist_lock); |
---|
2145 | | - while (swap_count(*swap_map) && !retval && |
---|
2146 | | - (p = p->next) != &start_mm->mmlist) { |
---|
2147 | | - mm = list_entry(p, struct mm_struct, mmlist); |
---|
2148 | | - if (!mmget_not_zero(mm)) |
---|
2149 | | - continue; |
---|
2150 | | - spin_unlock(&mmlist_lock); |
---|
2151 | | - mmput(prev_mm); |
---|
2152 | | - prev_mm = mm; |
---|
2153 | | - |
---|
2154 | | - cond_resched(); |
---|
2155 | | - |
---|
2156 | | - swcount = *swap_map; |
---|
2157 | | - if (!swap_count(swcount)) /* any usage ? */ |
---|
2158 | | - ; |
---|
2159 | | - else if (mm == &init_mm) |
---|
2160 | | - set_start_mm = 1; |
---|
2161 | | - else |
---|
2162 | | - retval = unuse_mm(mm, entry, page); |
---|
2163 | | - |
---|
2164 | | - if (set_start_mm && *swap_map < swcount) { |
---|
2165 | | - mmput(new_start_mm); |
---|
2166 | | - mmget(mm); |
---|
2167 | | - new_start_mm = mm; |
---|
2168 | | - set_start_mm = 0; |
---|
2169 | | - } |
---|
2170 | | - spin_lock(&mmlist_lock); |
---|
2171 | | - } |
---|
2172 | | - spin_unlock(&mmlist_lock); |
---|
2173 | | - mmput(prev_mm); |
---|
2174 | | - mmput(start_mm); |
---|
2175 | | - start_mm = new_start_mm; |
---|
2176 | | - } |
---|
2177 | 2225 | if (retval) { |
---|
2178 | | - unlock_page(page); |
---|
2179 | | - put_page(page); |
---|
2180 | | - break; |
---|
| 2226 | + mmput(prev_mm); |
---|
| 2227 | + goto out; |
---|
2181 | 2228 | } |
---|
2182 | | - |
---|
2183 | | - /* |
---|
2184 | | - * If a reference remains (rare), we would like to leave |
---|
2185 | | - * the page in the swap cache; but try_to_unmap could |
---|
2186 | | - * then re-duplicate the entry once we drop page lock, |
---|
2187 | | - * so we might loop indefinitely; also, that page could |
---|
2188 | | - * not be swapped out to other storage meanwhile. So: |
---|
2189 | | - * delete from cache even if there's another reference, |
---|
2190 | | - * after ensuring that the data has been saved to disk - |
---|
2191 | | - * since if the reference remains (rarer), it will be |
---|
2192 | | - * read from disk into another page. Splitting into two |
---|
2193 | | - * pages would be incorrect if swap supported "shared |
---|
2194 | | - * private" pages, but they are handled by tmpfs files. |
---|
2195 | | - * |
---|
2196 | | - * Given how unuse_vma() targets one particular offset |
---|
2197 | | - * in an anon_vma, once the anon_vma has been determined, |
---|
2198 | | - * this splitting happens to be just what is needed to |
---|
2199 | | - * handle where KSM pages have been swapped out: re-reading |
---|
2200 | | - * is unnecessarily slow, but we can fix that later on. |
---|
2201 | | - */ |
---|
2202 | | - if (swap_count(*swap_map) && |
---|
2203 | | - PageDirty(page) && PageSwapCache(page)) { |
---|
2204 | | - struct writeback_control wbc = { |
---|
2205 | | - .sync_mode = WB_SYNC_NONE, |
---|
2206 | | - }; |
---|
2207 | | - |
---|
2208 | | - swap_writepage(compound_head(page), &wbc); |
---|
2209 | | - lock_page(page); |
---|
2210 | | - wait_on_page_writeback(page); |
---|
2211 | | - } |
---|
2212 | | - |
---|
2213 | | - /* |
---|
2214 | | - * It is conceivable that a racing task removed this page from |
---|
2215 | | - * swap cache just before we acquired the page lock at the top, |
---|
2216 | | - * or while we dropped it in unuse_mm(). The page might even |
---|
2217 | | - * be back in swap cache on another swap area: that we must not |
---|
2218 | | - * delete, since it may not have been written out to swap yet. |
---|
2219 | | - */ |
---|
2220 | | - if (PageSwapCache(page) && |
---|
2221 | | - likely(page_private(page) == entry.val) && |
---|
2222 | | - (!PageTransCompound(page) || |
---|
2223 | | - !swap_page_trans_huge_swapped(si, entry))) |
---|
2224 | | - delete_from_swap_cache(compound_head(page)); |
---|
2225 | | - |
---|
2226 | | - /* |
---|
2227 | | - * So we could skip searching mms once swap count went |
---|
2228 | | - * to 1, we did not mark any present ptes as dirty: must |
---|
2229 | | - * mark page dirty so shrink_page_list will preserve it. |
---|
2230 | | - */ |
---|
2231 | | - SetPageDirty(page); |
---|
2232 | | - unlock_page(page); |
---|
2233 | | - put_page(page); |
---|
2234 | 2229 | |
---|
2235 | 2230 | /* |
---|
2236 | 2231 | * Make sure that we aren't completely killing |
---|
2237 | 2232 | * interactive performance. |
---|
2238 | 2233 | */ |
---|
2239 | 2234 | cond_resched(); |
---|
2240 | | - if (frontswap && pages_to_unuse > 0) { |
---|
2241 | | - if (!--pages_to_unuse) |
---|
2242 | | - break; |
---|
2243 | | - } |
---|
| 2235 | + spin_lock(&mmlist_lock); |
---|
| 2236 | + } |
---|
| 2237 | + spin_unlock(&mmlist_lock); |
---|
| 2238 | + |
---|
| 2239 | + mmput(prev_mm); |
---|
| 2240 | + |
---|
| 2241 | + i = 0; |
---|
| 2242 | + while (READ_ONCE(si->inuse_pages) && |
---|
| 2243 | + !signal_pending(current) && |
---|
| 2244 | + (i = find_next_to_unuse(si, i, frontswap)) != 0) { |
---|
| 2245 | + |
---|
| 2246 | + entry = swp_entry(type, i); |
---|
| 2247 | + page = find_get_page(swap_address_space(entry), i); |
---|
| 2248 | + if (!page) |
---|
| 2249 | + continue; |
---|
| 2250 | + |
---|
| 2251 | + /* |
---|
| 2252 | + * It is conceivable that a racing task removed this page from |
---|
| 2253 | + * swap cache just before we acquired the page lock. The page |
---|
| 2254 | + * might even be back in swap cache on another swap area. But |
---|
| 2255 | + * that is okay, try_to_free_swap() only removes stale pages. |
---|
| 2256 | + */ |
---|
| 2257 | + lock_page(page); |
---|
| 2258 | + wait_on_page_writeback(page); |
---|
| 2259 | + try_to_free_swap(page); |
---|
| 2260 | + trace_android_vh_unuse_swap_page(si, page); |
---|
| 2261 | + unlock_page(page); |
---|
| 2262 | + put_page(page); |
---|
| 2263 | + |
---|
| 2264 | + /* |
---|
| 2265 | + * For frontswap, we just need to unuse pages_to_unuse, if |
---|
| 2266 | + * it was specified. Need not check frontswap again here as |
---|
| 2267 | + * we already zeroed out pages_to_unuse if not frontswap. |
---|
| 2268 | + */ |
---|
| 2269 | + if (pages_to_unuse && --pages_to_unuse == 0) |
---|
| 2270 | + goto out; |
---|
2244 | 2271 | } |
---|
2245 | 2272 | |
---|
2246 | | - mmput(start_mm); |
---|
2247 | | - return retval; |
---|
| 2273 | + /* |
---|
| 2274 | + * Lets check again to see if there are still swap entries in the map. |
---|
| 2275 | + * If yes, we would need to do retry the unuse logic again. |
---|
| 2276 | + * Under global memory pressure, swap entries can be reinserted back |
---|
| 2277 | + * into process space after the mmlist loop above passes over them. |
---|
| 2278 | + * |
---|
| 2279 | + * Limit the number of retries? No: when mmget_not_zero() above fails, |
---|
| 2280 | + * that mm is likely to be freeing swap from exit_mmap(), which proceeds |
---|
| 2281 | + * at its own independent pace; and even shmem_writepage() could have |
---|
| 2282 | + * been preempted after get_swap_page(), temporarily hiding that swap. |
---|
| 2283 | + * It's easy and robust (though cpu-intensive) just to keep retrying. |
---|
| 2284 | + */ |
---|
| 2285 | + if (READ_ONCE(si->inuse_pages)) { |
---|
| 2286 | + if (!signal_pending(current)) |
---|
| 2287 | + goto retry; |
---|
| 2288 | + retval = -EINTR; |
---|
| 2289 | + } |
---|
| 2290 | +out: |
---|
| 2291 | + return (retval == FRONTSWAP_PAGES_UNUSED) ? 0 : retval; |
---|
2248 | 2292 | } |
---|
2249 | 2293 | |
---|
2250 | 2294 | /* |
---|
.. | .. |
---|
2276 | 2320 | static sector_t map_swap_entry(swp_entry_t entry, struct block_device **bdev) |
---|
2277 | 2321 | { |
---|
2278 | 2322 | struct swap_info_struct *sis; |
---|
2279 | | - struct swap_extent *start_se; |
---|
2280 | 2323 | struct swap_extent *se; |
---|
2281 | 2324 | pgoff_t offset; |
---|
2282 | 2325 | |
---|
.. | .. |
---|
2284 | 2327 | *bdev = sis->bdev; |
---|
2285 | 2328 | |
---|
2286 | 2329 | offset = swp_offset(entry); |
---|
2287 | | - start_se = sis->curr_swap_extent; |
---|
2288 | | - se = start_se; |
---|
2289 | | - |
---|
2290 | | - for ( ; ; ) { |
---|
2291 | | - if (se->start_page <= offset && |
---|
2292 | | - offset < (se->start_page + se->nr_pages)) { |
---|
2293 | | - return se->start_block + (offset - se->start_page); |
---|
2294 | | - } |
---|
2295 | | - se = list_next_entry(se, list); |
---|
2296 | | - sis->curr_swap_extent = se; |
---|
2297 | | - BUG_ON(se == start_se); /* It *must* be present */ |
---|
2298 | | - } |
---|
| 2330 | + se = offset_to_swap_extent(sis, offset); |
---|
| 2331 | + return se->start_block + (offset - se->start_page); |
---|
2299 | 2332 | } |
---|
2300 | 2333 | |
---|
2301 | 2334 | /* |
---|
.. | .. |
---|
2305 | 2338 | { |
---|
2306 | 2339 | swp_entry_t entry; |
---|
2307 | 2340 | entry.val = page_private(page); |
---|
2308 | | - return map_swap_entry(entry, bdev) << (PAGE_SHIFT - 9); |
---|
| 2341 | + return map_swap_entry(entry, bdev); |
---|
2309 | 2342 | } |
---|
2310 | 2343 | |
---|
2311 | 2344 | /* |
---|
.. | .. |
---|
2313 | 2346 | */ |
---|
2314 | 2347 | static void destroy_swap_extents(struct swap_info_struct *sis) |
---|
2315 | 2348 | { |
---|
2316 | | - while (!list_empty(&sis->first_swap_extent.list)) { |
---|
2317 | | - struct swap_extent *se; |
---|
| 2349 | + while (!RB_EMPTY_ROOT(&sis->swap_extent_root)) { |
---|
| 2350 | + struct rb_node *rb = sis->swap_extent_root.rb_node; |
---|
| 2351 | + struct swap_extent *se = rb_entry(rb, struct swap_extent, rb_node); |
---|
2318 | 2352 | |
---|
2319 | | - se = list_first_entry(&sis->first_swap_extent.list, |
---|
2320 | | - struct swap_extent, list); |
---|
2321 | | - list_del(&se->list); |
---|
| 2353 | + rb_erase(rb, &sis->swap_extent_root); |
---|
2322 | 2354 | kfree(se); |
---|
2323 | 2355 | } |
---|
2324 | 2356 | |
---|
2325 | | - if (sis->flags & SWP_FILE) { |
---|
| 2357 | + if (sis->flags & SWP_ACTIVATED) { |
---|
2326 | 2358 | struct file *swap_file = sis->swap_file; |
---|
2327 | 2359 | struct address_space *mapping = swap_file->f_mapping; |
---|
2328 | 2360 | |
---|
2329 | | - sis->flags &= ~SWP_FILE; |
---|
2330 | | - mapping->a_ops->swap_deactivate(swap_file); |
---|
| 2361 | + sis->flags &= ~SWP_ACTIVATED; |
---|
| 2362 | + if (mapping->a_ops->swap_deactivate) |
---|
| 2363 | + mapping->a_ops->swap_deactivate(swap_file); |
---|
2331 | 2364 | } |
---|
2332 | 2365 | } |
---|
2333 | 2366 | |
---|
2334 | 2367 | /* |
---|
2335 | 2368 | * Add a block range (and the corresponding page range) into this swapdev's |
---|
2336 | | - * extent list. The extent list is kept sorted in page order. |
---|
| 2369 | + * extent tree. |
---|
2337 | 2370 | * |
---|
2338 | 2371 | * This function rather assumes that it is called in ascending page order. |
---|
2339 | 2372 | */ |
---|
.. | .. |
---|
2341 | 2374 | add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, |
---|
2342 | 2375 | unsigned long nr_pages, sector_t start_block) |
---|
2343 | 2376 | { |
---|
| 2377 | + struct rb_node **link = &sis->swap_extent_root.rb_node, *parent = NULL; |
---|
2344 | 2378 | struct swap_extent *se; |
---|
2345 | 2379 | struct swap_extent *new_se; |
---|
2346 | | - struct list_head *lh; |
---|
2347 | 2380 | |
---|
2348 | | - if (start_page == 0) { |
---|
2349 | | - se = &sis->first_swap_extent; |
---|
2350 | | - sis->curr_swap_extent = se; |
---|
2351 | | - se->start_page = 0; |
---|
2352 | | - se->nr_pages = nr_pages; |
---|
2353 | | - se->start_block = start_block; |
---|
2354 | | - return 1; |
---|
2355 | | - } else { |
---|
2356 | | - lh = sis->first_swap_extent.list.prev; /* Highest extent */ |
---|
2357 | | - se = list_entry(lh, struct swap_extent, list); |
---|
| 2381 | + /* |
---|
| 2382 | + * place the new node at the right most since the |
---|
| 2383 | + * function is called in ascending page order. |
---|
| 2384 | + */ |
---|
| 2385 | + while (*link) { |
---|
| 2386 | + parent = *link; |
---|
| 2387 | + link = &parent->rb_right; |
---|
| 2388 | + } |
---|
| 2389 | + |
---|
| 2390 | + if (parent) { |
---|
| 2391 | + se = rb_entry(parent, struct swap_extent, rb_node); |
---|
2358 | 2392 | BUG_ON(se->start_page + se->nr_pages != start_page); |
---|
2359 | 2393 | if (se->start_block + se->nr_pages == start_block) { |
---|
2360 | 2394 | /* Merge it */ |
---|
.. | .. |
---|
2363 | 2397 | } |
---|
2364 | 2398 | } |
---|
2365 | 2399 | |
---|
2366 | | - /* |
---|
2367 | | - * No merge. Insert a new extent, preserving ordering. |
---|
2368 | | - */ |
---|
| 2400 | + /* No merge, insert a new extent. */ |
---|
2369 | 2401 | new_se = kmalloc(sizeof(*se), GFP_KERNEL); |
---|
2370 | 2402 | if (new_se == NULL) |
---|
2371 | 2403 | return -ENOMEM; |
---|
.. | .. |
---|
2373 | 2405 | new_se->nr_pages = nr_pages; |
---|
2374 | 2406 | new_se->start_block = start_block; |
---|
2375 | 2407 | |
---|
2376 | | - list_add_tail(&new_se->list, &sis->first_swap_extent.list); |
---|
| 2408 | + rb_link_node(&new_se->rb_node, parent, link); |
---|
| 2409 | + rb_insert_color(&new_se->rb_node, &sis->swap_extent_root); |
---|
2377 | 2410 | return 1; |
---|
2378 | 2411 | } |
---|
2379 | 2412 | EXPORT_SYMBOL_GPL(add_swap_extent); |
---|
.. | .. |
---|
2423 | 2456 | |
---|
2424 | 2457 | if (mapping->a_ops->swap_activate) { |
---|
2425 | 2458 | ret = mapping->a_ops->swap_activate(sis, swap_file, span); |
---|
| 2459 | + if (ret >= 0) |
---|
| 2460 | + sis->flags |= SWP_ACTIVATED; |
---|
2426 | 2461 | if (!ret) { |
---|
2427 | | - sis->flags |= SWP_FILE; |
---|
| 2462 | + sis->flags |= SWP_FS_OPS; |
---|
2428 | 2463 | ret = add_swap_extent(sis, 0, sis->max, 0); |
---|
2429 | 2464 | *span = sis->pages; |
---|
2430 | 2465 | } |
---|
.. | .. |
---|
2446 | 2481 | return bdev ? bdev->bd_disk->node_id : NUMA_NO_NODE; |
---|
2447 | 2482 | } |
---|
2448 | 2483 | |
---|
2449 | | -static void _enable_swap_info(struct swap_info_struct *p, int prio, |
---|
2450 | | - unsigned char *swap_map, |
---|
2451 | | - struct swap_cluster_info *cluster_info) |
---|
| 2484 | +static void setup_swap_info(struct swap_info_struct *p, int prio, |
---|
| 2485 | + unsigned char *swap_map, |
---|
| 2486 | + struct swap_cluster_info *cluster_info) |
---|
2452 | 2487 | { |
---|
2453 | 2488 | int i; |
---|
2454 | 2489 | |
---|
.. | .. |
---|
2473 | 2508 | } |
---|
2474 | 2509 | p->swap_map = swap_map; |
---|
2475 | 2510 | p->cluster_info = cluster_info; |
---|
2476 | | - p->flags |= SWP_WRITEOK; |
---|
2477 | | - atomic_long_add(p->pages, &nr_swap_pages); |
---|
2478 | | - total_swap_pages += p->pages; |
---|
| 2511 | +} |
---|
2479 | 2512 | |
---|
| 2513 | +static void _enable_swap_info(struct swap_info_struct *p) |
---|
| 2514 | +{ |
---|
| 2515 | + bool skip = false; |
---|
| 2516 | + |
---|
| 2517 | + p->flags |= SWP_WRITEOK | SWP_VALID; |
---|
| 2518 | + trace_android_vh_account_swap_pages(p, &skip); |
---|
| 2519 | + if (!skip) { |
---|
| 2520 | + atomic_long_add(p->pages, &nr_swap_pages); |
---|
| 2521 | + total_swap_pages += p->pages; |
---|
| 2522 | + } |
---|
2480 | 2523 | assert_spin_locked(&swap_lock); |
---|
2481 | 2524 | /* |
---|
2482 | 2525 | * both lists are plists, and thus priority ordered. |
---|
.. | .. |
---|
2500 | 2543 | frontswap_init(p->type, frontswap_map); |
---|
2501 | 2544 | spin_lock(&swap_lock); |
---|
2502 | 2545 | spin_lock(&p->lock); |
---|
2503 | | - _enable_swap_info(p, prio, swap_map, cluster_info); |
---|
| 2546 | + setup_swap_info(p, prio, swap_map, cluster_info); |
---|
| 2547 | + spin_unlock(&p->lock); |
---|
| 2548 | + spin_unlock(&swap_lock); |
---|
| 2549 | + /* |
---|
| 2550 | + * Guarantee swap_map, cluster_info, etc. fields are valid |
---|
| 2551 | + * between get/put_swap_device() if SWP_VALID bit is set |
---|
| 2552 | + */ |
---|
| 2553 | + synchronize_rcu(); |
---|
| 2554 | + spin_lock(&swap_lock); |
---|
| 2555 | + spin_lock(&p->lock); |
---|
| 2556 | + _enable_swap_info(p); |
---|
2504 | 2557 | spin_unlock(&p->lock); |
---|
2505 | 2558 | spin_unlock(&swap_lock); |
---|
2506 | 2559 | } |
---|
.. | .. |
---|
2509 | 2562 | { |
---|
2510 | 2563 | spin_lock(&swap_lock); |
---|
2511 | 2564 | spin_lock(&p->lock); |
---|
2512 | | - _enable_swap_info(p, p->prio, p->swap_map, p->cluster_info); |
---|
| 2565 | + setup_swap_info(p, p->prio, p->swap_map, p->cluster_info); |
---|
| 2566 | + _enable_swap_info(p); |
---|
2513 | 2567 | spin_unlock(&p->lock); |
---|
2514 | 2568 | spin_unlock(&swap_lock); |
---|
2515 | 2569 | } |
---|
.. | .. |
---|
2537 | 2591 | struct filename *pathname; |
---|
2538 | 2592 | int err, found = 0; |
---|
2539 | 2593 | unsigned int old_block_size; |
---|
| 2594 | + bool skip = false; |
---|
2540 | 2595 | |
---|
2541 | 2596 | if (!capable(CAP_SYS_ADMIN)) |
---|
2542 | 2597 | return -EPERM; |
---|
.. | .. |
---|
2591 | 2646 | least_priority++; |
---|
2592 | 2647 | } |
---|
2593 | 2648 | plist_del(&p->list, &swap_active_head); |
---|
2594 | | - atomic_long_sub(p->pages, &nr_swap_pages); |
---|
2595 | | - total_swap_pages -= p->pages; |
---|
| 2649 | + trace_android_vh_account_swap_pages(p, &skip); |
---|
| 2650 | + if (!skip) { |
---|
| 2651 | + atomic_long_sub(p->pages, &nr_swap_pages); |
---|
| 2652 | + total_swap_pages -= p->pages; |
---|
| 2653 | + } |
---|
2596 | 2654 | p->flags &= ~SWP_WRITEOK; |
---|
2597 | 2655 | spin_unlock(&p->lock); |
---|
2598 | 2656 | spin_unlock(&swap_lock); |
---|
.. | .. |
---|
2611 | 2669 | } |
---|
2612 | 2670 | |
---|
2613 | 2671 | reenable_swap_slots_cache_unlock(); |
---|
| 2672 | + |
---|
| 2673 | + spin_lock(&swap_lock); |
---|
| 2674 | + spin_lock(&p->lock); |
---|
| 2675 | + p->flags &= ~SWP_VALID; /* mark swap device as invalid */ |
---|
| 2676 | + spin_unlock(&p->lock); |
---|
| 2677 | + spin_unlock(&swap_lock); |
---|
| 2678 | + /* |
---|
| 2679 | + * wait for swap operations protected by get/put_swap_device() |
---|
| 2680 | + * to complete |
---|
| 2681 | + */ |
---|
| 2682 | + synchronize_rcu(); |
---|
2614 | 2683 | |
---|
2615 | 2684 | flush_work(&p->discard_work); |
---|
2616 | 2685 | |
---|
.. | .. |
---|
2647 | 2716 | frontswap_map = frontswap_map_get(p); |
---|
2648 | 2717 | spin_unlock(&p->lock); |
---|
2649 | 2718 | spin_unlock(&swap_lock); |
---|
| 2719 | + arch_swap_invalidate_area(p->type); |
---|
2650 | 2720 | frontswap_invalidate_area(p->type); |
---|
2651 | 2721 | frontswap_map_set(p, NULL); |
---|
2652 | 2722 | mutex_unlock(&swapon_mutex); |
---|
2653 | 2723 | free_percpu(p->percpu_cluster); |
---|
2654 | 2724 | p->percpu_cluster = NULL; |
---|
| 2725 | + free_percpu(p->cluster_next_cpu); |
---|
| 2726 | + p->cluster_next_cpu = NULL; |
---|
2655 | 2727 | vfree(swap_map); |
---|
2656 | 2728 | kvfree(cluster_info); |
---|
2657 | 2729 | kvfree(frontswap_map); |
---|
.. | .. |
---|
2759 | 2831 | struct swap_info_struct *si = v; |
---|
2760 | 2832 | struct file *file; |
---|
2761 | 2833 | int len; |
---|
| 2834 | + unsigned int bytes, inuse; |
---|
2762 | 2835 | |
---|
2763 | 2836 | if (si == SEQ_START_TOKEN) { |
---|
2764 | | - seq_puts(swap,"Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n"); |
---|
| 2837 | + seq_puts(swap,"Filename\t\t\t\tType\t\tSize\t\tUsed\t\tPriority\n"); |
---|
2765 | 2838 | return 0; |
---|
2766 | 2839 | } |
---|
2767 | 2840 | |
---|
| 2841 | + bytes = si->pages << (PAGE_SHIFT - 10); |
---|
| 2842 | + inuse = si->inuse_pages << (PAGE_SHIFT - 10); |
---|
| 2843 | + |
---|
2768 | 2844 | file = si->swap_file; |
---|
2769 | 2845 | len = seq_file_path(swap, file, " \t\n\\"); |
---|
2770 | | - seq_printf(swap, "%*s%s\t%u\t%u\t%d\n", |
---|
| 2846 | + seq_printf(swap, "%*s%s\t%u\t%s%u\t%s%d\n", |
---|
2771 | 2847 | len < 40 ? 40 - len : 1, " ", |
---|
2772 | 2848 | S_ISBLK(file_inode(file)->i_mode) ? |
---|
2773 | 2849 | "partition" : "file\t", |
---|
2774 | | - si->pages << (PAGE_SHIFT - 10), |
---|
2775 | | - si->inuse_pages << (PAGE_SHIFT - 10), |
---|
| 2850 | + bytes, bytes < 10000000 ? "\t" : "", |
---|
| 2851 | + inuse, inuse < 10000000 ? "\t" : "", |
---|
2776 | 2852 | si->prio); |
---|
2777 | 2853 | return 0; |
---|
2778 | 2854 | } |
---|
.. | .. |
---|
2798 | 2874 | return 0; |
---|
2799 | 2875 | } |
---|
2800 | 2876 | |
---|
2801 | | -static const struct file_operations proc_swaps_operations = { |
---|
2802 | | - .open = swaps_open, |
---|
2803 | | - .read = seq_read, |
---|
2804 | | - .llseek = seq_lseek, |
---|
2805 | | - .release = seq_release, |
---|
2806 | | - .poll = swaps_poll, |
---|
| 2877 | +static const struct proc_ops swaps_proc_ops = { |
---|
| 2878 | + .proc_flags = PROC_ENTRY_PERMANENT, |
---|
| 2879 | + .proc_open = swaps_open, |
---|
| 2880 | + .proc_read = seq_read, |
---|
| 2881 | + .proc_lseek = seq_lseek, |
---|
| 2882 | + .proc_release = seq_release, |
---|
| 2883 | + .proc_poll = swaps_poll, |
---|
2807 | 2884 | }; |
---|
2808 | 2885 | |
---|
2809 | 2886 | static int __init procswaps_init(void) |
---|
2810 | 2887 | { |
---|
2811 | | - proc_create("swaps", 0, NULL, &proc_swaps_operations); |
---|
| 2888 | + proc_create("swaps", 0, NULL, &swaps_proc_ops); |
---|
2812 | 2889 | return 0; |
---|
2813 | 2890 | } |
---|
2814 | 2891 | __initcall(procswaps_init); |
---|
.. | .. |
---|
2825 | 2902 | |
---|
2826 | 2903 | static struct swap_info_struct *alloc_swap_info(void) |
---|
2827 | 2904 | { |
---|
2828 | | - struct swap_info_struct *p; |
---|
| 2905 | + struct swap_info_struct *p = NULL; |
---|
2829 | 2906 | struct swap_info_struct *defer = NULL; |
---|
2830 | 2907 | unsigned int type; |
---|
2831 | 2908 | int i; |
---|
2832 | | - int size = sizeof(*p) + nr_node_ids * sizeof(struct plist_node); |
---|
| 2909 | + bool skip = false; |
---|
2833 | 2910 | |
---|
2834 | | - p = kvzalloc(size, GFP_KERNEL); |
---|
| 2911 | + trace_android_rvh_alloc_si(&p, &skip); |
---|
| 2912 | + trace_android_vh_alloc_si(&p, &skip); |
---|
| 2913 | + if (!skip) |
---|
| 2914 | + p = kvzalloc(struct_size(p, avail_lists, nr_node_ids), GFP_KERNEL); |
---|
2835 | 2915 | if (!p) |
---|
2836 | 2916 | return ERR_PTR(-ENOMEM); |
---|
2837 | 2917 | |
---|
.. | .. |
---|
2863 | 2943 | * would be relying on p->type to remain valid. |
---|
2864 | 2944 | */ |
---|
2865 | 2945 | } |
---|
2866 | | - INIT_LIST_HEAD(&p->first_swap_extent.list); |
---|
| 2946 | + p->swap_extent_root = RB_ROOT; |
---|
2867 | 2947 | plist_node_init(&p->list, 0); |
---|
2868 | 2948 | for_each_node(i) |
---|
2869 | 2949 | plist_node_init(&p->avail_lists[i], 0); |
---|
.. | .. |
---|
2881 | 2961 | int error; |
---|
2882 | 2962 | |
---|
2883 | 2963 | if (S_ISBLK(inode->i_mode)) { |
---|
2884 | | - p->bdev = bdgrab(I_BDEV(inode)); |
---|
2885 | | - error = blkdev_get(p->bdev, |
---|
| 2964 | + p->bdev = blkdev_get_by_dev(inode->i_rdev, |
---|
2886 | 2965 | FMODE_READ | FMODE_WRITE | FMODE_EXCL, p); |
---|
2887 | | - if (error < 0) { |
---|
| 2966 | + if (IS_ERR(p->bdev)) { |
---|
| 2967 | + error = PTR_ERR(p->bdev); |
---|
2888 | 2968 | p->bdev = NULL; |
---|
2889 | 2969 | return error; |
---|
2890 | 2970 | } |
---|
.. | .. |
---|
2892 | 2972 | error = set_blocksize(p->bdev, PAGE_SIZE); |
---|
2893 | 2973 | if (error < 0) |
---|
2894 | 2974 | return error; |
---|
| 2975 | + /* |
---|
| 2976 | + * Zoned block devices contain zones that have a sequential |
---|
| 2977 | + * write only restriction. Hence zoned block devices are not |
---|
| 2978 | + * suitable for swapping. Disallow them here. |
---|
| 2979 | + */ |
---|
| 2980 | + if (blk_queue_is_zoned(p->bdev->bd_disk->queue)) |
---|
| 2981 | + return -EINVAL; |
---|
2895 | 2982 | p->flags |= SWP_BLKDEV; |
---|
2896 | 2983 | } else if (S_ISREG(inode->i_mode)) { |
---|
2897 | 2984 | p->bdev = inode->i_sb->s_bdev; |
---|
.. | .. |
---|
3188 | 3275 | goto bad_swap_unlock_inode; |
---|
3189 | 3276 | } |
---|
3190 | 3277 | |
---|
3191 | | - if (bdi_cap_stable_pages_required(inode_to_bdi(inode))) |
---|
| 3278 | + if (p->bdev && blk_queue_stable_writes(p->bdev->bd_disk->queue)) |
---|
3192 | 3279 | p->flags |= SWP_STABLE_WRITES; |
---|
3193 | 3280 | |
---|
3194 | | - if (bdi_cap_synchronous_io(inode_to_bdi(inode))) |
---|
| 3281 | + if (p->bdev && p->bdev->bd_disk->fops->rw_page) |
---|
3195 | 3282 | p->flags |= SWP_SYNCHRONOUS_IO; |
---|
3196 | 3283 | |
---|
3197 | 3284 | if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) { |
---|
.. | .. |
---|
3199 | 3286 | unsigned long ci, nr_cluster; |
---|
3200 | 3287 | |
---|
3201 | 3288 | p->flags |= SWP_SOLIDSTATE; |
---|
| 3289 | + p->cluster_next_cpu = alloc_percpu(unsigned int); |
---|
| 3290 | + if (!p->cluster_next_cpu) { |
---|
| 3291 | + error = -ENOMEM; |
---|
| 3292 | + goto bad_swap_unlock_inode; |
---|
| 3293 | + } |
---|
3202 | 3294 | /* |
---|
3203 | 3295 | * select a random position to start with to help wear leveling |
---|
3204 | 3296 | * SSD |
---|
3205 | 3297 | */ |
---|
3206 | | - p->cluster_next = 1 + (prandom_u32() % p->highest_bit); |
---|
| 3298 | + for_each_possible_cpu(cpu) { |
---|
| 3299 | + per_cpu(*p->cluster_next_cpu, cpu) = |
---|
| 3300 | + 1 + prandom_u32_max(p->highest_bit); |
---|
| 3301 | + } |
---|
3207 | 3302 | nr_cluster = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER); |
---|
3208 | 3303 | |
---|
3209 | 3304 | cluster_info = kvcalloc(nr_cluster, sizeof(*cluster_info), |
---|
.. | .. |
---|
3289 | 3384 | error = inode_drain_writes(inode); |
---|
3290 | 3385 | if (error) { |
---|
3291 | 3386 | inode->i_flags &= ~S_SWAPFILE; |
---|
3292 | | - goto bad_swap_unlock_inode; |
---|
| 3387 | + goto free_swap_address_space; |
---|
3293 | 3388 | } |
---|
3294 | 3389 | |
---|
3295 | 3390 | mutex_lock(&swapon_mutex); |
---|
.. | .. |
---|
3299 | 3394 | (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT; |
---|
3300 | 3395 | enable_swap_info(p, prio, swap_map, cluster_info, frontswap_map); |
---|
3301 | 3396 | |
---|
| 3397 | + trace_android_vh_init_swap_info_struct(p, swap_avail_heads); |
---|
3302 | 3398 | pr_info("Adding %uk swap on %s. Priority:%d extents:%d across:%lluk %s%s%s%s%s\n", |
---|
3303 | 3399 | p->pages<<(PAGE_SHIFT-10), name->name, p->prio, |
---|
3304 | 3400 | nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10), |
---|
.. | .. |
---|
3314 | 3410 | |
---|
3315 | 3411 | error = 0; |
---|
3316 | 3412 | goto out; |
---|
| 3413 | +free_swap_address_space: |
---|
| 3414 | + exit_swap_address_space(p->type); |
---|
3317 | 3415 | bad_swap_unlock_inode: |
---|
3318 | 3416 | inode_unlock(inode); |
---|
3319 | 3417 | bad_swap: |
---|
3320 | 3418 | free_percpu(p->percpu_cluster); |
---|
3321 | 3419 | p->percpu_cluster = NULL; |
---|
| 3420 | + free_percpu(p->cluster_next_cpu); |
---|
| 3421 | + p->cluster_next_cpu = NULL; |
---|
3322 | 3422 | if (inode && S_ISBLK(inode->i_mode) && p->bdev) { |
---|
3323 | 3423 | set_blocksize(p->bdev, p->old_block_size); |
---|
3324 | 3424 | blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); |
---|
.. | .. |
---|
3359 | 3459 | spin_lock(&swap_lock); |
---|
3360 | 3460 | for (type = 0; type < nr_swapfiles; type++) { |
---|
3361 | 3461 | struct swap_info_struct *si = swap_info[type]; |
---|
| 3462 | + bool skip = false; |
---|
3362 | 3463 | |
---|
3363 | | - if ((si->flags & SWP_USED) && !(si->flags & SWP_WRITEOK)) |
---|
| 3464 | + trace_android_vh_si_swapinfo(si, &skip); |
---|
| 3465 | + if (!skip && (si->flags & SWP_USED) && !(si->flags & SWP_WRITEOK)) |
---|
3364 | 3466 | nr_to_be_unused += si->inuse_pages; |
---|
3365 | 3467 | } |
---|
3366 | 3468 | val->freeswap = atomic_long_read(&nr_swap_pages) + nr_to_be_unused; |
---|
3367 | 3469 | val->totalswap = total_swap_pages + nr_to_be_unused; |
---|
3368 | 3470 | spin_unlock(&swap_lock); |
---|
3369 | 3471 | } |
---|
| 3472 | +EXPORT_SYMBOL_GPL(si_swapinfo); |
---|
3370 | 3473 | |
---|
3371 | 3474 | /* |
---|
3372 | 3475 | * Verify that a swap entry is valid and increment its swap map count. |
---|
.. | .. |
---|
3388 | 3491 | unsigned char has_cache; |
---|
3389 | 3492 | int err = -EINVAL; |
---|
3390 | 3493 | |
---|
3391 | | - if (non_swap_entry(entry)) |
---|
3392 | | - goto out; |
---|
3393 | | - |
---|
3394 | | - p = swp_swap_info(entry); |
---|
| 3494 | + p = get_swap_device(entry); |
---|
3395 | 3495 | if (!p) |
---|
3396 | | - goto bad_file; |
---|
| 3496 | + goto out; |
---|
3397 | 3497 | |
---|
3398 | 3498 | offset = swp_offset(entry); |
---|
3399 | | - if (unlikely(offset >= p->max)) |
---|
3400 | | - goto out; |
---|
3401 | | - |
---|
3402 | 3499 | ci = lock_cluster_or_swap_info(p, offset); |
---|
3403 | 3500 | |
---|
3404 | 3501 | count = p->swap_map[offset]; |
---|
.. | .. |
---|
3439 | 3536 | } else |
---|
3440 | 3537 | err = -ENOENT; /* unused swap entry */ |
---|
3441 | 3538 | |
---|
3442 | | - p->swap_map[offset] = count | has_cache; |
---|
| 3539 | + WRITE_ONCE(p->swap_map[offset], count | has_cache); |
---|
3443 | 3540 | |
---|
3444 | 3541 | unlock_out: |
---|
3445 | 3542 | unlock_cluster_or_swap_info(p, ci); |
---|
3446 | 3543 | out: |
---|
| 3544 | + if (p) |
---|
| 3545 | + put_swap_device(p); |
---|
3447 | 3546 | return err; |
---|
3448 | | - |
---|
3449 | | -bad_file: |
---|
3450 | | - pr_err("swap_dup: %s%08lx\n", Bad_file, entry.val); |
---|
3451 | | - goto out; |
---|
3452 | 3547 | } |
---|
3453 | 3548 | |
---|
3454 | 3549 | /* |
---|
.. | .. |
---|
3481 | 3576 | * |
---|
3482 | 3577 | * Called when allocating swap cache for existing swap entry, |
---|
3483 | 3578 | * This can return error codes. Returns 0 at success. |
---|
3484 | | - * -EBUSY means there is a swap cache. |
---|
| 3579 | + * -EEXIST means there is a swap cache. |
---|
3485 | 3580 | * Note: return code is different from swap_duplicate(). |
---|
3486 | 3581 | */ |
---|
3487 | 3582 | int swapcache_prepare(swp_entry_t entry) |
---|
.. | .. |
---|
3493 | 3588 | { |
---|
3494 | 3589 | return swap_type_to_swap_info(swp_type(entry)); |
---|
3495 | 3590 | } |
---|
| 3591 | +EXPORT_SYMBOL_GPL(swp_swap_info); |
---|
3496 | 3592 | |
---|
3497 | 3593 | struct swap_info_struct *page_swap_info(struct page *page) |
---|
3498 | 3594 | { |
---|
.. | .. |
---|
3540 | 3636 | struct page *list_page; |
---|
3541 | 3637 | pgoff_t offset; |
---|
3542 | 3638 | unsigned char count; |
---|
| 3639 | + int ret = 0; |
---|
3543 | 3640 | |
---|
3544 | 3641 | /* |
---|
3545 | 3642 | * When debugging, it's easier to use __GFP_ZERO here; but it's better |
---|
.. | .. |
---|
3547 | 3644 | */ |
---|
3548 | 3645 | page = alloc_page(gfp_mask | __GFP_HIGHMEM); |
---|
3549 | 3646 | |
---|
3550 | | - si = swap_info_get(entry); |
---|
| 3647 | + si = get_swap_device(entry); |
---|
3551 | 3648 | if (!si) { |
---|
3552 | 3649 | /* |
---|
3553 | 3650 | * An acceptable race has occurred since the failing |
---|
3554 | | - * __swap_duplicate(): the swap entry has been freed, |
---|
3555 | | - * perhaps even the whole swap_map cleared for swapoff. |
---|
| 3651 | + * __swap_duplicate(): the swap device may be swapoff |
---|
3556 | 3652 | */ |
---|
3557 | 3653 | goto outer; |
---|
3558 | 3654 | } |
---|
| 3655 | + spin_lock(&si->lock); |
---|
3559 | 3656 | |
---|
3560 | 3657 | offset = swp_offset(entry); |
---|
3561 | 3658 | |
---|
.. | .. |
---|
3573 | 3670 | } |
---|
3574 | 3671 | |
---|
3575 | 3672 | if (!page) { |
---|
3576 | | - unlock_cluster(ci); |
---|
3577 | | - spin_unlock(&si->lock); |
---|
3578 | | - return -ENOMEM; |
---|
| 3673 | + ret = -ENOMEM; |
---|
| 3674 | + goto out; |
---|
3579 | 3675 | } |
---|
3580 | 3676 | |
---|
3581 | 3677 | /* |
---|
.. | .. |
---|
3627 | 3723 | out: |
---|
3628 | 3724 | unlock_cluster(ci); |
---|
3629 | 3725 | spin_unlock(&si->lock); |
---|
| 3726 | + put_swap_device(si); |
---|
3630 | 3727 | outer: |
---|
3631 | 3728 | if (page) |
---|
3632 | 3729 | __free_page(page); |
---|
3633 | | - return 0; |
---|
| 3730 | + return ret; |
---|
3634 | 3731 | } |
---|
3635 | 3732 | |
---|
3636 | 3733 | /* |
---|
.. | .. |
---|
3658 | 3755 | |
---|
3659 | 3756 | spin_lock(&si->cont_lock); |
---|
3660 | 3757 | offset &= ~PAGE_MASK; |
---|
3661 | | - page = list_entry(head->lru.next, struct page, lru); |
---|
| 3758 | + page = list_next_entry(head, lru); |
---|
3662 | 3759 | map = kmap_atomic(page) + offset; |
---|
3663 | 3760 | |
---|
3664 | 3761 | if (count == SWAP_MAP_MAX) /* initial increment from swap_map */ |
---|
.. | .. |
---|
3670 | 3767 | */ |
---|
3671 | 3768 | while (*map == (SWAP_CONT_MAX | COUNT_CONTINUED)) { |
---|
3672 | 3769 | kunmap_atomic(map); |
---|
3673 | | - page = list_entry(page->lru.next, struct page, lru); |
---|
| 3770 | + page = list_next_entry(page, lru); |
---|
3674 | 3771 | BUG_ON(page == head); |
---|
3675 | 3772 | map = kmap_atomic(page) + offset; |
---|
3676 | 3773 | } |
---|
3677 | 3774 | if (*map == SWAP_CONT_MAX) { |
---|
3678 | 3775 | kunmap_atomic(map); |
---|
3679 | | - page = list_entry(page->lru.next, struct page, lru); |
---|
| 3776 | + page = list_next_entry(page, lru); |
---|
3680 | 3777 | if (page == head) { |
---|
3681 | 3778 | ret = false; /* add count continuation */ |
---|
3682 | 3779 | goto out; |
---|
.. | .. |
---|
3686 | 3783 | } |
---|
3687 | 3784 | *map += 1; |
---|
3688 | 3785 | kunmap_atomic(map); |
---|
3689 | | - page = list_entry(page->lru.prev, struct page, lru); |
---|
3690 | | - while (page != head) { |
---|
| 3786 | + while ((page = list_prev_entry(page, lru)) != head) { |
---|
3691 | 3787 | map = kmap_atomic(page) + offset; |
---|
3692 | 3788 | *map = COUNT_CONTINUED; |
---|
3693 | 3789 | kunmap_atomic(map); |
---|
3694 | | - page = list_entry(page->lru.prev, struct page, lru); |
---|
3695 | 3790 | } |
---|
3696 | 3791 | ret = true; /* incremented */ |
---|
3697 | 3792 | |
---|
.. | .. |
---|
3702 | 3797 | BUG_ON(count != COUNT_CONTINUED); |
---|
3703 | 3798 | while (*map == COUNT_CONTINUED) { |
---|
3704 | 3799 | kunmap_atomic(map); |
---|
3705 | | - page = list_entry(page->lru.next, struct page, lru); |
---|
| 3800 | + page = list_next_entry(page, lru); |
---|
3706 | 3801 | BUG_ON(page == head); |
---|
3707 | 3802 | map = kmap_atomic(page) + offset; |
---|
3708 | 3803 | } |
---|
.. | .. |
---|
3711 | 3806 | if (*map == 0) |
---|
3712 | 3807 | count = 0; |
---|
3713 | 3808 | kunmap_atomic(map); |
---|
3714 | | - page = list_entry(page->lru.prev, struct page, lru); |
---|
3715 | | - while (page != head) { |
---|
| 3809 | + while ((page = list_prev_entry(page, lru)) != head) { |
---|
3716 | 3810 | map = kmap_atomic(page) + offset; |
---|
3717 | 3811 | *map = SWAP_CONT_MAX | count; |
---|
3718 | 3812 | count = COUNT_CONTINUED; |
---|
3719 | 3813 | kunmap_atomic(map); |
---|
3720 | | - page = list_entry(page->lru.prev, struct page, lru); |
---|
3721 | 3814 | } |
---|
3722 | 3815 | ret = count == COUNT_CONTINUED; |
---|
3723 | 3816 | } |
---|
.. | .. |
---|
3749 | 3842 | } |
---|
3750 | 3843 | |
---|
3751 | 3844 | #if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) |
---|
3752 | | -void mem_cgroup_throttle_swaprate(struct mem_cgroup *memcg, int node, |
---|
3753 | | - gfp_t gfp_mask) |
---|
| 3845 | +void __cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask) |
---|
3754 | 3846 | { |
---|
3755 | 3847 | struct swap_info_struct *si, *next; |
---|
3756 | | - if (!(gfp_mask & __GFP_IO) || !memcg) |
---|
| 3848 | + int nid = page_to_nid(page); |
---|
| 3849 | + |
---|
| 3850 | + if (!(gfp_mask & __GFP_IO)) |
---|
3757 | 3851 | return; |
---|
3758 | 3852 | |
---|
3759 | 3853 | if (!blk_cgroup_congested()) |
---|
.. | .. |
---|
3767 | 3861 | return; |
---|
3768 | 3862 | |
---|
3769 | 3863 | spin_lock(&swap_avail_lock); |
---|
3770 | | - plist_for_each_entry_safe(si, next, &swap_avail_heads[node], |
---|
3771 | | - avail_lists[node]) { |
---|
| 3864 | + plist_for_each_entry_safe(si, next, &swap_avail_heads[nid], |
---|
| 3865 | + avail_lists[nid]) { |
---|
3772 | 3866 | if (si->bdev) { |
---|
3773 | | - blkcg_schedule_throttle(bdev_get_queue(si->bdev), |
---|
3774 | | - true); |
---|
| 3867 | + blkcg_schedule_throttle(bdev_get_queue(si->bdev), true); |
---|
3775 | 3868 | break; |
---|
3776 | 3869 | } |
---|
3777 | 3870 | } |
---|