.. | .. |
---|
15 | 15 | #define CREATE_TRACE_POINTS |
---|
16 | 16 | #include <trace/events/page_isolation.h> |
---|
17 | 17 | |
---|
18 | | -static int set_migratetype_isolate(struct page *page, int migratetype, |
---|
19 | | - bool skip_hwpoisoned_pages) |
---|
| 18 | +static int set_migratetype_isolate(struct page *page, int migratetype, int isol_flags) |
---|
20 | 19 | { |
---|
21 | | - struct zone *zone; |
---|
22 | | - unsigned long flags, pfn; |
---|
23 | | - struct memory_isolate_notify arg; |
---|
24 | | - int notifier_ret; |
---|
25 | | - int ret = -EBUSY; |
---|
26 | | - |
---|
27 | | - zone = page_zone(page); |
---|
| 20 | + struct zone *zone = page_zone(page); |
---|
| 21 | + struct page *unmovable; |
---|
| 22 | + unsigned long flags; |
---|
28 | 23 | |
---|
29 | 24 | spin_lock_irqsave(&zone->lock, flags); |
---|
30 | 25 | |
---|
31 | 26 | /* |
---|
32 | 27 | * We assume the caller intended to SET migrate type to isolate. |
---|
33 | 28 | * If it is already set, then someone else must have raced and |
---|
34 | | - * set it before us. Return -EBUSY |
---|
| 29 | + * set it before us. |
---|
35 | 30 | */ |
---|
36 | | - if (is_migrate_isolate_page(page)) |
---|
37 | | - goto out; |
---|
| 31 | + if (is_migrate_isolate_page(page)) { |
---|
| 32 | + spin_unlock_irqrestore(&zone->lock, flags); |
---|
| 33 | + return -EBUSY; |
---|
| 34 | + } |
---|
38 | 35 | |
---|
39 | | - pfn = page_to_pfn(page); |
---|
40 | | - arg.start_pfn = pfn; |
---|
41 | | - arg.nr_pages = pageblock_nr_pages; |
---|
42 | | - arg.pages_found = 0; |
---|
43 | | - |
---|
44 | | - /* |
---|
45 | | - * It may be possible to isolate a pageblock even if the |
---|
46 | | - * migratetype is not MIGRATE_MOVABLE. The memory isolation |
---|
47 | | - * notifier chain is used by balloon drivers to return the |
---|
48 | | - * number of pages in a range that are held by the balloon |
---|
49 | | - * driver to shrink memory. If all the pages are accounted for |
---|
50 | | - * by balloons, are free, or on the LRU, isolation can continue. |
---|
51 | | - * Later, for example, when memory hotplug notifier runs, these |
---|
52 | | - * pages reported as "can be isolated" should be isolated(freed) |
---|
53 | | - * by the balloon driver through the memory notifier chain. |
---|
54 | | - */ |
---|
55 | | - notifier_ret = memory_isolate_notify(MEM_ISOLATE_COUNT, &arg); |
---|
56 | | - notifier_ret = notifier_to_errno(notifier_ret); |
---|
57 | | - if (notifier_ret) |
---|
58 | | - goto out; |
---|
59 | 36 | /* |
---|
60 | 37 | * FIXME: Now, memory hotplug doesn't call shrink_slab() by itself. |
---|
61 | 38 | * We just check MOVABLE pages. |
---|
62 | 39 | */ |
---|
63 | | - if (!has_unmovable_pages(zone, page, arg.pages_found, migratetype, |
---|
64 | | - skip_hwpoisoned_pages)) |
---|
65 | | - ret = 0; |
---|
66 | | - |
---|
67 | | - /* |
---|
68 | | - * immobile means "not-on-lru" pages. If immobile is larger than |
---|
69 | | - * removable-by-driver pages reported by notifier, we'll fail. |
---|
70 | | - */ |
---|
71 | | - |
---|
72 | | -out: |
---|
73 | | - if (!ret) { |
---|
| 40 | + unmovable = has_unmovable_pages(zone, page, migratetype, isol_flags); |
---|
| 41 | + if (!unmovable) { |
---|
74 | 42 | unsigned long nr_pages; |
---|
75 | 43 | int mt = get_pageblock_migratetype(page); |
---|
76 | 44 | |
---|
.. | .. |
---|
80 | 48 | NULL); |
---|
81 | 49 | |
---|
82 | 50 | __mod_zone_freepage_state(zone, -nr_pages, mt); |
---|
| 51 | + spin_unlock_irqrestore(&zone->lock, flags); |
---|
| 52 | + return 0; |
---|
83 | 53 | } |
---|
84 | 54 | |
---|
85 | 55 | spin_unlock_irqrestore(&zone->lock, flags); |
---|
86 | | - if (!ret) |
---|
87 | | - drain_all_pages(zone); |
---|
88 | | - return ret; |
---|
| 56 | + if (isol_flags & REPORT_FAILURE) { |
---|
| 57 | + /* |
---|
| 58 | + * printk() with zone->lock held will likely trigger a |
---|
| 59 | + * lockdep splat, so defer it here. |
---|
| 60 | + */ |
---|
| 61 | + dump_page(unmovable, "unmovable page"); |
---|
| 62 | + } |
---|
| 63 | + |
---|
| 64 | + return -EBUSY; |
---|
89 | 65 | } |
---|
90 | 66 | |
---|
91 | 67 | static void unset_migratetype_isolate(struct page *page, unsigned migratetype) |
---|
.. | .. |
---|
111 | 87 | * these pages to be merged. |
---|
112 | 88 | */ |
---|
113 | 89 | if (PageBuddy(page)) { |
---|
114 | | - order = page_order(page); |
---|
| 90 | + order = buddy_order(page); |
---|
115 | 91 | if (order >= pageblock_order) { |
---|
116 | 92 | pfn = page_to_pfn(page); |
---|
117 | 93 | buddy_pfn = __find_buddy_pfn(pfn, order); |
---|
.. | .. |
---|
129 | 105 | * If we isolate freepage with more than pageblock_order, there |
---|
130 | 106 | * should be no freepage in the range, so we could avoid costly |
---|
131 | 107 | * pageblock scanning for freepage moving. |
---|
| 108 | + * |
---|
| 109 | + * We didn't actually touch any of the isolated pages, so place them |
---|
| 110 | + * to the tail of the freelist. This is an optimization for memory |
---|
| 111 | + * onlining - just onlined memory won't immediately be considered for |
---|
| 112 | + * allocation. |
---|
132 | 113 | */ |
---|
133 | 114 | if (!isolated_page) { |
---|
134 | 115 | nr_pages = move_freepages_block(zone, page, migratetype, NULL); |
---|
135 | 116 | __mod_zone_freepage_state(zone, nr_pages, migratetype); |
---|
136 | 117 | } |
---|
137 | 118 | set_pageblock_migratetype(page, migratetype); |
---|
| 119 | + if (isolated_page) |
---|
| 120 | + __putback_isolated_page(page, order, migratetype); |
---|
138 | 121 | zone->nr_isolate_pageblock--; |
---|
139 | 122 | out: |
---|
140 | 123 | spin_unlock_irqrestore(&zone->lock, flags); |
---|
141 | | - if (isolated_page) { |
---|
142 | | - post_alloc_hook(page, order, __GFP_MOVABLE); |
---|
143 | | - __free_pages(page, order); |
---|
144 | | - } |
---|
145 | 124 | } |
---|
146 | 125 | |
---|
147 | 126 | static inline struct page * |
---|
.. | .. |
---|
152 | 131 | for (i = 0; i < nr_pages; i++) { |
---|
153 | 132 | struct page *page; |
---|
154 | 133 | |
---|
155 | | - if (!pfn_valid_within(pfn + i)) |
---|
156 | | - continue; |
---|
157 | 134 | page = pfn_to_online_page(pfn + i); |
---|
158 | 135 | if (!page) |
---|
159 | 136 | continue; |
---|
.. | .. |
---|
162 | 139 | return NULL; |
---|
163 | 140 | } |
---|
164 | 141 | |
---|
165 | | -/* |
---|
166 | | - * start_isolate_page_range() -- make page-allocation-type of range of pages |
---|
167 | | - * to be MIGRATE_ISOLATE. |
---|
168 | | - * @start_pfn: The lower PFN of the range to be isolated. |
---|
169 | | - * @end_pfn: The upper PFN of the range to be isolated. |
---|
170 | | - * @migratetype: migrate type to set in error recovery. |
---|
| 142 | +/** |
---|
| 143 | + * start_isolate_page_range() - make page-allocation-type of range of pages to |
---|
| 144 | + * be MIGRATE_ISOLATE. |
---|
| 145 | + * @start_pfn: The lower PFN of the range to be isolated. |
---|
| 146 | + * @end_pfn: The upper PFN of the range to be isolated. |
---|
| 147 | + * start_pfn/end_pfn must be aligned to pageblock_order. |
---|
| 148 | + * @migratetype: Migrate type to set in error recovery. |
---|
| 149 | + * @flags: The following flags are allowed (they can be combined in |
---|
| 150 | + * a bit mask) |
---|
| 151 | + * MEMORY_OFFLINE - isolate to offline (!allocate) memory |
---|
| 152 | + * e.g., skip over PageHWPoison() pages |
---|
| 153 | + * and PageOffline() pages. |
---|
| 154 | + * REPORT_FAILURE - report details about the failure to |
---|
| 155 | + * isolate the range |
---|
171 | 156 | * |
---|
172 | 157 | * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in |
---|
173 | 158 | * the range will never be allocated. Any free pages and pages freed in the |
---|
174 | | - * future will not be allocated again. |
---|
175 | | - * |
---|
176 | | - * start_pfn/end_pfn must be aligned to pageblock_order. |
---|
177 | | - * Return 0 on success and -EBUSY if any part of range cannot be isolated. |
---|
| 159 | + * future will not be allocated again. If specified range includes migrate types |
---|
| 160 | + * other than MOVABLE or CMA, this will fail with -EBUSY. For isolating all |
---|
| 161 | + * pages in the range finally, the caller have to free all pages in the range. |
---|
| 162 | + * test_page_isolated() can be used for test it. |
---|
178 | 163 | * |
---|
179 | 164 | * There is no high level synchronization mechanism that prevents two threads |
---|
180 | | - * from trying to isolate overlapping ranges. If this happens, one thread |
---|
| 165 | + * from trying to isolate overlapping ranges. If this happens, one thread |
---|
181 | 166 | * will notice pageblocks in the overlapping range already set to isolate. |
---|
182 | 167 | * This happens in set_migratetype_isolate, and set_migratetype_isolate |
---|
183 | | - * returns an error. We then clean up by restoring the migration type on |
---|
184 | | - * pageblocks we may have modified and return -EBUSY to caller. This |
---|
| 168 | + * returns an error. We then clean up by restoring the migration type on |
---|
| 169 | + * pageblocks we may have modified and return -EBUSY to caller. This |
---|
185 | 170 | * prevents two threads from simultaneously working on overlapping ranges. |
---|
| 171 | + * |
---|
| 172 | + * Please note that there is no strong synchronization with the page allocator |
---|
| 173 | + * either. Pages might be freed while their page blocks are marked ISOLATED. |
---|
| 174 | + * A call to drain_all_pages() after isolation can flush most of them. However |
---|
| 175 | + * in some cases pages might still end up on pcp lists and that would allow |
---|
| 176 | + * for their allocation even when they are in fact isolated already. Depending |
---|
| 177 | + * on how strong of a guarantee the caller needs, further drain_all_pages() |
---|
| 178 | + * might be needed (e.g. __offline_pages will need to call it after check for |
---|
| 179 | + * isolated range for a next retry). |
---|
| 180 | + * |
---|
| 181 | + * Return: 0 on success and -EBUSY if any part of range cannot be isolated. |
---|
186 | 182 | */ |
---|
187 | 183 | int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, |
---|
188 | | - unsigned migratetype, bool skip_hwpoisoned_pages) |
---|
| 184 | + unsigned migratetype, int flags, |
---|
| 185 | + unsigned long *failed_pfn) |
---|
189 | 186 | { |
---|
190 | 187 | unsigned long pfn; |
---|
191 | 188 | unsigned long undo_pfn; |
---|
.. | .. |
---|
198 | 195 | pfn < end_pfn; |
---|
199 | 196 | pfn += pageblock_nr_pages) { |
---|
200 | 197 | page = __first_valid_page(pfn, pageblock_nr_pages); |
---|
201 | | - if (page && |
---|
202 | | - set_migratetype_isolate(page, migratetype, skip_hwpoisoned_pages)) { |
---|
203 | | - undo_pfn = pfn; |
---|
204 | | - goto undo; |
---|
| 198 | + if (page) { |
---|
| 199 | + if (set_migratetype_isolate(page, migratetype, flags)) { |
---|
| 200 | + undo_pfn = pfn; |
---|
| 201 | + if (failed_pfn) |
---|
| 202 | + *failed_pfn = page_to_pfn(page); |
---|
| 203 | + goto undo; |
---|
| 204 | + } |
---|
205 | 205 | } |
---|
206 | 206 | } |
---|
207 | 207 | return 0; |
---|
.. | .. |
---|
221 | 221 | /* |
---|
222 | 222 | * Make isolated pages available again. |
---|
223 | 223 | */ |
---|
224 | | -int undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, |
---|
| 224 | +void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, |
---|
225 | 225 | unsigned migratetype) |
---|
226 | 226 | { |
---|
227 | 227 | unsigned long pfn; |
---|
.. | .. |
---|
238 | 238 | continue; |
---|
239 | 239 | unset_migratetype_isolate(page, migratetype); |
---|
240 | 240 | } |
---|
241 | | - return 0; |
---|
242 | 241 | } |
---|
243 | 242 | /* |
---|
244 | 243 | * Test all pages in the range is free(means isolated) or not. |
---|
.. | .. |
---|
249 | 248 | */ |
---|
250 | 249 | static unsigned long |
---|
251 | 250 | __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn, |
---|
252 | | - bool skip_hwpoisoned_pages) |
---|
| 251 | + int flags) |
---|
253 | 252 | { |
---|
254 | 253 | struct page *page; |
---|
255 | 254 | |
---|
.. | .. |
---|
265 | 264 | * the correct MIGRATE_ISOLATE freelist. There is no |
---|
266 | 265 | * simple way to verify that as VM_BUG_ON(), though. |
---|
267 | 266 | */ |
---|
268 | | - pfn += 1 << page_order(page); |
---|
269 | | - else if (skip_hwpoisoned_pages && PageHWPoison(page)) |
---|
| 267 | + pfn += 1 << buddy_order(page); |
---|
| 268 | + else if ((flags & MEMORY_OFFLINE) && PageHWPoison(page)) |
---|
270 | 269 | /* A HWPoisoned page cannot be also PageBuddy */ |
---|
| 270 | + pfn++; |
---|
| 271 | + else if ((flags & MEMORY_OFFLINE) && PageOffline(page) && |
---|
| 272 | + !page_count(page)) |
---|
| 273 | + /* |
---|
| 274 | + * The responsible driver agreed to skip PageOffline() |
---|
| 275 | + * pages when offlining memory by dropping its |
---|
| 276 | + * reference in MEM_GOING_OFFLINE. |
---|
| 277 | + */ |
---|
271 | 278 | pfn++; |
---|
272 | 279 | else |
---|
273 | 280 | break; |
---|
274 | 281 | } |
---|
275 | | -#ifdef CONFIG_ARCH_ROCKCHIP |
---|
| 282 | +#ifdef CONFIG_NO_GKI |
---|
276 | 283 | if (pfn < end_pfn) |
---|
277 | 284 | dump_page_owner(page); |
---|
278 | 285 | #endif |
---|
.. | .. |
---|
282 | 289 | |
---|
283 | 290 | /* Caller should ensure that requested range is in a single zone */ |
---|
284 | 291 | int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, |
---|
285 | | - bool skip_hwpoisoned_pages) |
---|
| 292 | + int isol_flags, unsigned long *failed_pfn) |
---|
286 | 293 | { |
---|
287 | 294 | unsigned long pfn, flags; |
---|
288 | 295 | struct page *page; |
---|
.. | .. |
---|
304 | 311 | /* Check all pages are free or marked as ISOLATED */ |
---|
305 | 312 | zone = page_zone(page); |
---|
306 | 313 | spin_lock_irqsave(&zone->lock, flags); |
---|
307 | | - pfn = __test_page_isolated_in_pageblock(start_pfn, end_pfn, |
---|
308 | | - skip_hwpoisoned_pages); |
---|
| 314 | + pfn = __test_page_isolated_in_pageblock(start_pfn, end_pfn, isol_flags); |
---|
309 | 315 | spin_unlock_irqrestore(&zone->lock, flags); |
---|
310 | 316 | |
---|
311 | 317 | trace_test_pages_isolated(start_pfn, end_pfn, pfn); |
---|
| 318 | + if (pfn < end_pfn) { |
---|
| 319 | + page_pinner_failure_detect(pfn_to_page(pfn)); |
---|
| 320 | + if (failed_pfn) |
---|
| 321 | + *failed_pfn = pfn; |
---|
| 322 | + return -EBUSY; |
---|
| 323 | + } |
---|
312 | 324 | |
---|
313 | | - return pfn < end_pfn ? -EBUSY : 0; |
---|
314 | | -} |
---|
315 | | - |
---|
316 | | -struct page *alloc_migrate_target(struct page *page, unsigned long private) |
---|
317 | | -{ |
---|
318 | | - return new_page_nodemask(page, numa_node_id(), &node_states[N_MEMORY]); |
---|
| 325 | + return 0; |
---|
319 | 326 | } |
---|