.. | .. |
---|
60 | 60 | #include <linux/hugetlb.h> |
---|
61 | 61 | #include <linux/sched/rt.h> |
---|
62 | 62 | #include <linux/sched/mm.h> |
---|
| 63 | +#include <linux/locallock.h> |
---|
63 | 64 | #include <linux/page_owner.h> |
---|
64 | 65 | #include <linux/kthread.h> |
---|
65 | 66 | #include <linux/memcontrol.h> |
---|
.. | .. |
---|
352 | 353 | int nr_online_nodes __read_mostly = 1; |
---|
353 | 354 | EXPORT_SYMBOL(nr_node_ids); |
---|
354 | 355 | EXPORT_SYMBOL(nr_online_nodes); |
---|
| 356 | +#endif |
---|
| 357 | + |
---|
| 358 | +static DEFINE_LOCAL_IRQ_LOCK(pa_lock); |
---|
| 359 | + |
---|
| 360 | +#ifdef CONFIG_PREEMPT_RT_BASE |
---|
| 361 | +# define cpu_lock_irqsave(cpu, flags) \ |
---|
| 362 | + local_lock_irqsave_on(pa_lock, flags, cpu) |
---|
| 363 | +# define cpu_unlock_irqrestore(cpu, flags) \ |
---|
| 364 | + local_unlock_irqrestore_on(pa_lock, flags, cpu) |
---|
| 365 | +#else |
---|
| 366 | +# define cpu_lock_irqsave(cpu, flags) local_irq_save(flags) |
---|
| 367 | +# define cpu_unlock_irqrestore(cpu, flags) local_irq_restore(flags) |
---|
355 | 368 | #endif |
---|
356 | 369 | |
---|
357 | 370 | int page_group_by_mobility_disabled __read_mostly; |
---|
.. | .. |
---|
1172 | 1185 | } |
---|
1173 | 1186 | |
---|
1174 | 1187 | /* |
---|
1175 | | - * Frees a number of pages from the PCP lists |
---|
| 1188 | + * Frees a number of pages which have been collected from the pcp lists. |
---|
1176 | 1189 | * Assumes all pages on list are in same zone, and of same order. |
---|
1177 | 1190 | * count is the number of pages to free. |
---|
1178 | 1191 | * |
---|
.. | .. |
---|
1182 | 1195 | * And clear the zone's pages_scanned counter, to hold off the "all pages are |
---|
1183 | 1196 | * pinned" detection logic. |
---|
1184 | 1197 | */ |
---|
1185 | | -static void free_pcppages_bulk(struct zone *zone, int count, |
---|
1186 | | - struct per_cpu_pages *pcp) |
---|
| 1198 | +static void free_pcppages_bulk(struct zone *zone, struct list_head *head, |
---|
| 1199 | + bool zone_retry) |
---|
| 1200 | +{ |
---|
| 1201 | + bool isolated_pageblocks; |
---|
| 1202 | + struct page *page, *tmp; |
---|
| 1203 | + unsigned long flags; |
---|
| 1204 | + |
---|
| 1205 | + spin_lock_irqsave(&zone->lock, flags); |
---|
| 1206 | + isolated_pageblocks = has_isolate_pageblock(zone); |
---|
| 1207 | + |
---|
| 1208 | + /* |
---|
| 1209 | + * Use safe version since after __free_one_page(), |
---|
| 1210 | + * page->lru.next will not point to original list. |
---|
| 1211 | + */ |
---|
| 1212 | + list_for_each_entry_safe(page, tmp, head, lru) { |
---|
| 1213 | + int mt = get_pcppage_migratetype(page); |
---|
| 1214 | + |
---|
| 1215 | + if (page_zone(page) != zone) { |
---|
| 1216 | + /* |
---|
| 1217 | + * free_unref_page_list() sorts pages by zone. If we end |
---|
| 1218 | + * up with pages from a different NUMA nodes belonging |
---|
| 1219 | + * to the same ZONE index then we need to redo with the |
---|
| 1220 | + * correct ZONE pointer. Skip the page for now, redo it |
---|
| 1221 | + * on the next iteration. |
---|
| 1222 | + */ |
---|
| 1223 | + WARN_ON_ONCE(zone_retry == false); |
---|
| 1224 | + if (zone_retry) |
---|
| 1225 | + continue; |
---|
| 1226 | + } |
---|
| 1227 | + |
---|
| 1228 | + /* MIGRATE_ISOLATE page should not go to pcplists */ |
---|
| 1229 | + VM_BUG_ON_PAGE(is_migrate_isolate(mt), page); |
---|
| 1230 | + /* Pageblock could have been isolated meanwhile */ |
---|
| 1231 | + if (unlikely(isolated_pageblocks)) |
---|
| 1232 | + mt = get_pageblock_migratetype(page); |
---|
| 1233 | + |
---|
| 1234 | + list_del(&page->lru); |
---|
| 1235 | + __free_one_page(page, page_to_pfn(page), zone, 0, mt); |
---|
| 1236 | + trace_mm_page_pcpu_drain(page, 0, mt); |
---|
| 1237 | + } |
---|
| 1238 | + spin_unlock_irqrestore(&zone->lock, flags); |
---|
| 1239 | +} |
---|
| 1240 | + |
---|
| 1241 | +static void isolate_pcp_pages(int count, struct per_cpu_pages *pcp, |
---|
| 1242 | + struct list_head *dst) |
---|
| 1243 | + |
---|
1187 | 1244 | { |
---|
1188 | 1245 | int migratetype = 0; |
---|
1189 | 1246 | int batch_free = 0; |
---|
1190 | 1247 | int prefetch_nr = 0; |
---|
1191 | | - bool isolated_pageblocks; |
---|
1192 | | - struct page *page, *tmp; |
---|
1193 | | - LIST_HEAD(head); |
---|
| 1248 | + struct page *page; |
---|
1194 | 1249 | |
---|
1195 | 1250 | /* |
---|
1196 | 1251 | * Ensure proper count is passed which otherwise would stuck in the |
---|
.. | .. |
---|
1227 | 1282 | if (bulkfree_pcp_prepare(page)) |
---|
1228 | 1283 | continue; |
---|
1229 | 1284 | |
---|
1230 | | - list_add_tail(&page->lru, &head); |
---|
| 1285 | + list_add_tail(&page->lru, dst); |
---|
1231 | 1286 | |
---|
1232 | 1287 | /* |
---|
1233 | 1288 | * We are going to put the page back to the global |
---|
.. | .. |
---|
1242 | 1297 | prefetch_buddy(page); |
---|
1243 | 1298 | } while (--count && --batch_free && !list_empty(list)); |
---|
1244 | 1299 | } |
---|
1245 | | - |
---|
1246 | | - spin_lock(&zone->lock); |
---|
1247 | | - isolated_pageblocks = has_isolate_pageblock(zone); |
---|
1248 | | - |
---|
1249 | | - /* |
---|
1250 | | - * Use safe version since after __free_one_page(), |
---|
1251 | | - * page->lru.next will not point to original list. |
---|
1252 | | - */ |
---|
1253 | | - list_for_each_entry_safe(page, tmp, &head, lru) { |
---|
1254 | | - int mt = get_pcppage_migratetype(page); |
---|
1255 | | - /* MIGRATE_ISOLATE page should not go to pcplists */ |
---|
1256 | | - VM_BUG_ON_PAGE(is_migrate_isolate(mt), page); |
---|
1257 | | - /* Pageblock could have been isolated meanwhile */ |
---|
1258 | | - if (unlikely(isolated_pageblocks)) |
---|
1259 | | - mt = get_pageblock_migratetype(page); |
---|
1260 | | - |
---|
1261 | | - __free_one_page(page, page_to_pfn(page), zone, 0, mt); |
---|
1262 | | - trace_mm_page_pcpu_drain(page, 0, mt); |
---|
1263 | | - } |
---|
1264 | | - spin_unlock(&zone->lock); |
---|
1265 | 1300 | } |
---|
1266 | 1301 | |
---|
1267 | 1302 | static void free_one_page(struct zone *zone, |
---|
.. | .. |
---|
1363 | 1398 | return; |
---|
1364 | 1399 | |
---|
1365 | 1400 | migratetype = get_pfnblock_migratetype(page, pfn); |
---|
1366 | | - local_irq_save(flags); |
---|
| 1401 | + local_lock_irqsave(pa_lock, flags); |
---|
1367 | 1402 | __count_vm_events(PGFREE, 1 << order); |
---|
1368 | 1403 | free_one_page(page_zone(page), page, pfn, order, migratetype); |
---|
1369 | | - local_irq_restore(flags); |
---|
| 1404 | + local_unlock_irqrestore(pa_lock, flags); |
---|
1370 | 1405 | } |
---|
1371 | 1406 | |
---|
1372 | 1407 | static void __init __free_pages_boot_core(struct page *page, unsigned int order) |
---|
.. | .. |
---|
2673 | 2708 | { |
---|
2674 | 2709 | unsigned long flags; |
---|
2675 | 2710 | int to_drain, batch; |
---|
| 2711 | + LIST_HEAD(dst); |
---|
2676 | 2712 | |
---|
2677 | | - local_irq_save(flags); |
---|
| 2713 | + local_lock_irqsave(pa_lock, flags); |
---|
2678 | 2714 | batch = READ_ONCE(pcp->batch); |
---|
2679 | 2715 | to_drain = min(pcp->count, batch); |
---|
2680 | 2716 | if (to_drain > 0) |
---|
2681 | | - free_pcppages_bulk(zone, to_drain, pcp); |
---|
2682 | | - local_irq_restore(flags); |
---|
| 2717 | + isolate_pcp_pages(to_drain, pcp, &dst); |
---|
| 2718 | + |
---|
| 2719 | + local_unlock_irqrestore(pa_lock, flags); |
---|
| 2720 | + |
---|
| 2721 | + if (to_drain > 0) |
---|
| 2722 | + free_pcppages_bulk(zone, &dst, false); |
---|
2683 | 2723 | } |
---|
2684 | 2724 | #endif |
---|
2685 | 2725 | |
---|
.. | .. |
---|
2695 | 2735 | unsigned long flags; |
---|
2696 | 2736 | struct per_cpu_pageset *pset; |
---|
2697 | 2737 | struct per_cpu_pages *pcp; |
---|
| 2738 | + LIST_HEAD(dst); |
---|
| 2739 | + int count; |
---|
2698 | 2740 | |
---|
2699 | | - local_irq_save(flags); |
---|
| 2741 | + cpu_lock_irqsave(cpu, flags); |
---|
2700 | 2742 | pset = per_cpu_ptr(zone->pageset, cpu); |
---|
2701 | 2743 | |
---|
2702 | 2744 | pcp = &pset->pcp; |
---|
2703 | | - if (pcp->count) |
---|
2704 | | - free_pcppages_bulk(zone, pcp->count, pcp); |
---|
2705 | | - local_irq_restore(flags); |
---|
| 2745 | + count = pcp->count; |
---|
| 2746 | + if (count) |
---|
| 2747 | + isolate_pcp_pages(count, pcp, &dst); |
---|
| 2748 | + |
---|
| 2749 | + cpu_unlock_irqrestore(cpu, flags); |
---|
| 2750 | + |
---|
| 2751 | + if (count) |
---|
| 2752 | + free_pcppages_bulk(zone, &dst, false); |
---|
2706 | 2753 | } |
---|
2707 | 2754 | |
---|
2708 | 2755 | /* |
---|
.. | .. |
---|
2737 | 2784 | drain_pages(cpu); |
---|
2738 | 2785 | } |
---|
2739 | 2786 | |
---|
| 2787 | +#ifndef CONFIG_PREEMPT_RT_BASE |
---|
2740 | 2788 | static void drain_local_pages_wq(struct work_struct *work) |
---|
2741 | 2789 | { |
---|
2742 | 2790 | /* |
---|
.. | .. |
---|
2750 | 2798 | drain_local_pages(NULL); |
---|
2751 | 2799 | preempt_enable(); |
---|
2752 | 2800 | } |
---|
| 2801 | +#endif |
---|
2753 | 2802 | |
---|
2754 | 2803 | /* |
---|
2755 | 2804 | * Spill all the per-cpu pages from all CPUs back into the buddy allocator. |
---|
.. | .. |
---|
2816 | 2865 | else |
---|
2817 | 2866 | cpumask_clear_cpu(cpu, &cpus_with_pcps); |
---|
2818 | 2867 | } |
---|
2819 | | - |
---|
| 2868 | +#ifdef CONFIG_PREEMPT_RT_BASE |
---|
| 2869 | + for_each_cpu(cpu, &cpus_with_pcps) { |
---|
| 2870 | + if (zone) |
---|
| 2871 | + drain_pages_zone(cpu, zone); |
---|
| 2872 | + else |
---|
| 2873 | + drain_pages(cpu); |
---|
| 2874 | + } |
---|
| 2875 | +#else |
---|
2820 | 2876 | for_each_cpu(cpu, &cpus_with_pcps) { |
---|
2821 | 2877 | struct work_struct *work = per_cpu_ptr(&pcpu_drain, cpu); |
---|
2822 | 2878 | INIT_WORK(work, drain_local_pages_wq); |
---|
.. | .. |
---|
2824 | 2880 | } |
---|
2825 | 2881 | for_each_cpu(cpu, &cpus_with_pcps) |
---|
2826 | 2882 | flush_work(per_cpu_ptr(&pcpu_drain, cpu)); |
---|
| 2883 | +#endif |
---|
2827 | 2884 | |
---|
2828 | 2885 | mutex_unlock(&pcpu_drain_mutex); |
---|
2829 | 2886 | } |
---|
.. | .. |
---|
2895 | 2952 | return true; |
---|
2896 | 2953 | } |
---|
2897 | 2954 | |
---|
2898 | | -static void free_unref_page_commit(struct page *page, unsigned long pfn) |
---|
| 2955 | +static void free_unref_page_commit(struct page *page, unsigned long pfn, |
---|
| 2956 | + struct list_head *dst) |
---|
2899 | 2957 | { |
---|
2900 | 2958 | struct zone *zone = page_zone(page); |
---|
2901 | 2959 | struct per_cpu_pages *pcp; |
---|
.. | .. |
---|
2924 | 2982 | pcp->count++; |
---|
2925 | 2983 | if (pcp->count >= pcp->high) { |
---|
2926 | 2984 | unsigned long batch = READ_ONCE(pcp->batch); |
---|
2927 | | - free_pcppages_bulk(zone, batch, pcp); |
---|
| 2985 | + |
---|
| 2986 | + isolate_pcp_pages(batch, pcp, dst); |
---|
2928 | 2987 | } |
---|
2929 | 2988 | } |
---|
2930 | 2989 | |
---|
.. | .. |
---|
2935 | 2994 | { |
---|
2936 | 2995 | unsigned long flags; |
---|
2937 | 2996 | unsigned long pfn = page_to_pfn(page); |
---|
| 2997 | + struct zone *zone = page_zone(page); |
---|
| 2998 | + LIST_HEAD(dst); |
---|
2938 | 2999 | |
---|
2939 | 3000 | if (!free_unref_page_prepare(page, pfn)) |
---|
2940 | 3001 | return; |
---|
2941 | 3002 | |
---|
2942 | | - local_irq_save(flags); |
---|
2943 | | - free_unref_page_commit(page, pfn); |
---|
2944 | | - local_irq_restore(flags); |
---|
| 3003 | + local_lock_irqsave(pa_lock, flags); |
---|
| 3004 | + free_unref_page_commit(page, pfn, &dst); |
---|
| 3005 | + local_unlock_irqrestore(pa_lock, flags); |
---|
| 3006 | + if (!list_empty(&dst)) |
---|
| 3007 | + free_pcppages_bulk(zone, &dst, false); |
---|
2945 | 3008 | } |
---|
2946 | 3009 | |
---|
2947 | 3010 | /* |
---|
.. | .. |
---|
2952 | 3015 | struct page *page, *next; |
---|
2953 | 3016 | unsigned long flags, pfn; |
---|
2954 | 3017 | int batch_count = 0; |
---|
| 3018 | + struct list_head dsts[__MAX_NR_ZONES]; |
---|
| 3019 | + int i; |
---|
| 3020 | + |
---|
| 3021 | + for (i = 0; i < __MAX_NR_ZONES; i++) |
---|
| 3022 | + INIT_LIST_HEAD(&dsts[i]); |
---|
2955 | 3023 | |
---|
2956 | 3024 | /* Prepare pages for freeing */ |
---|
2957 | 3025 | list_for_each_entry_safe(page, next, list, lru) { |
---|
.. | .. |
---|
2961 | 3029 | set_page_private(page, pfn); |
---|
2962 | 3030 | } |
---|
2963 | 3031 | |
---|
2964 | | - local_irq_save(flags); |
---|
| 3032 | + local_lock_irqsave(pa_lock, flags); |
---|
2965 | 3033 | list_for_each_entry_safe(page, next, list, lru) { |
---|
2966 | 3034 | unsigned long pfn = page_private(page); |
---|
| 3035 | + enum zone_type type; |
---|
2967 | 3036 | |
---|
2968 | 3037 | set_page_private(page, 0); |
---|
2969 | 3038 | trace_mm_page_free_batched(page); |
---|
2970 | | - free_unref_page_commit(page, pfn); |
---|
| 3039 | + type = page_zonenum(page); |
---|
| 3040 | + free_unref_page_commit(page, pfn, &dsts[type]); |
---|
2971 | 3041 | |
---|
2972 | 3042 | /* |
---|
2973 | 3043 | * Guard against excessive IRQ disabled times when we get |
---|
2974 | 3044 | * a large list of pages to free. |
---|
2975 | 3045 | */ |
---|
2976 | 3046 | if (++batch_count == SWAP_CLUSTER_MAX) { |
---|
2977 | | - local_irq_restore(flags); |
---|
| 3047 | + local_unlock_irqrestore(pa_lock, flags); |
---|
2978 | 3048 | batch_count = 0; |
---|
2979 | | - local_irq_save(flags); |
---|
| 3049 | + local_lock_irqsave(pa_lock, flags); |
---|
2980 | 3050 | } |
---|
2981 | 3051 | } |
---|
2982 | | - local_irq_restore(flags); |
---|
| 3052 | + local_unlock_irqrestore(pa_lock, flags); |
---|
| 3053 | + |
---|
| 3054 | + for (i = 0; i < __MAX_NR_ZONES; ) { |
---|
| 3055 | + struct page *page; |
---|
| 3056 | + struct zone *zone; |
---|
| 3057 | + |
---|
| 3058 | + if (list_empty(&dsts[i])) { |
---|
| 3059 | + i++; |
---|
| 3060 | + continue; |
---|
| 3061 | + } |
---|
| 3062 | + |
---|
| 3063 | + page = list_first_entry(&dsts[i], struct page, lru); |
---|
| 3064 | + zone = page_zone(page); |
---|
| 3065 | + |
---|
| 3066 | + free_pcppages_bulk(zone, &dsts[i], true); |
---|
| 3067 | + } |
---|
2983 | 3068 | } |
---|
2984 | 3069 | |
---|
2985 | 3070 | /* |
---|
.. | .. |
---|
3124 | 3209 | struct page *page; |
---|
3125 | 3210 | unsigned long flags; |
---|
3126 | 3211 | |
---|
3127 | | - local_irq_save(flags); |
---|
| 3212 | + local_lock_irqsave(pa_lock, flags); |
---|
3128 | 3213 | pcp = &this_cpu_ptr(zone->pageset)->pcp; |
---|
3129 | 3214 | page = __rmqueue_pcplist(zone, migratetype, pcp, |
---|
3130 | 3215 | gfp_flags); |
---|
.. | .. |
---|
3132 | 3217 | __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); |
---|
3133 | 3218 | zone_statistics(preferred_zone, zone); |
---|
3134 | 3219 | } |
---|
3135 | | - local_irq_restore(flags); |
---|
| 3220 | + local_unlock_irqrestore(pa_lock, flags); |
---|
3136 | 3221 | return page; |
---|
3137 | 3222 | } |
---|
3138 | 3223 | |
---|
.. | .. |
---|
3159 | 3244 | * allocate greater than order-1 page units with __GFP_NOFAIL. |
---|
3160 | 3245 | */ |
---|
3161 | 3246 | WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1)); |
---|
3162 | | - spin_lock_irqsave(&zone->lock, flags); |
---|
| 3247 | + local_spin_lock_irqsave(pa_lock, &zone->lock, flags); |
---|
3163 | 3248 | |
---|
3164 | 3249 | do { |
---|
3165 | 3250 | page = NULL; |
---|
.. | .. |
---|
3186 | 3271 | |
---|
3187 | 3272 | __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); |
---|
3188 | 3273 | zone_statistics(preferred_zone, zone); |
---|
3189 | | - local_irq_restore(flags); |
---|
| 3274 | + local_unlock_irqrestore(pa_lock, flags); |
---|
3190 | 3275 | |
---|
3191 | 3276 | out: |
---|
3192 | 3277 | VM_BUG_ON_PAGE(page && bad_range(zone, page), page); |
---|
3193 | 3278 | return page; |
---|
3194 | 3279 | |
---|
3195 | 3280 | failed: |
---|
3196 | | - local_irq_restore(flags); |
---|
| 3281 | + local_unlock_irqrestore(pa_lock, flags); |
---|
3197 | 3282 | return NULL; |
---|
3198 | 3283 | } |
---|
3199 | 3284 | |
---|
.. | .. |
---|
7333 | 7418 | |
---|
7334 | 7419 | static int page_alloc_cpu_dead(unsigned int cpu) |
---|
7335 | 7420 | { |
---|
7336 | | - |
---|
| 7421 | + local_lock_irq_on(swapvec_lock, cpu); |
---|
7337 | 7422 | lru_add_drain_cpu(cpu); |
---|
| 7423 | + local_unlock_irq_on(swapvec_lock, cpu); |
---|
7338 | 7424 | drain_pages(cpu); |
---|
7339 | 7425 | |
---|
7340 | 7426 | /* |
---|
.. | .. |
---|
8257 | 8343 | struct per_cpu_pageset *pset; |
---|
8258 | 8344 | |
---|
8259 | 8345 | /* avoid races with drain_pages() */ |
---|
8260 | | - local_irq_save(flags); |
---|
| 8346 | + local_lock_irqsave(pa_lock, flags); |
---|
8261 | 8347 | if (zone->pageset != &boot_pageset) { |
---|
8262 | 8348 | for_each_online_cpu(cpu) { |
---|
8263 | 8349 | pset = per_cpu_ptr(zone->pageset, cpu); |
---|
.. | .. |
---|
8266 | 8352 | free_percpu(zone->pageset); |
---|
8267 | 8353 | zone->pageset = &boot_pageset; |
---|
8268 | 8354 | } |
---|
8269 | | - local_irq_restore(flags); |
---|
| 8355 | + local_unlock_irqrestore(pa_lock, flags); |
---|
8270 | 8356 | } |
---|
8271 | 8357 | |
---|
8272 | 8358 | #ifdef CONFIG_MEMORY_HOTREMOVE |
---|