hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/mm/page_isolation.c
....@@ -15,62 +15,30 @@
1515 #define CREATE_TRACE_POINTS
1616 #include <trace/events/page_isolation.h>
1717
18
-static int set_migratetype_isolate(struct page *page, int migratetype,
19
- bool skip_hwpoisoned_pages)
18
+static int set_migratetype_isolate(struct page *page, int migratetype, int isol_flags)
2019 {
21
- struct zone *zone;
22
- unsigned long flags, pfn;
23
- struct memory_isolate_notify arg;
24
- int notifier_ret;
25
- int ret = -EBUSY;
26
-
27
- zone = page_zone(page);
20
+ struct zone *zone = page_zone(page);
21
+ struct page *unmovable;
22
+ unsigned long flags;
2823
2924 spin_lock_irqsave(&zone->lock, flags);
3025
3126 /*
3227 * We assume the caller intended to SET migrate type to isolate.
3328 * If it is already set, then someone else must have raced and
34
- * set it before us. Return -EBUSY
29
+ * set it before us.
3530 */
36
- if (is_migrate_isolate_page(page))
37
- goto out;
31
+ if (is_migrate_isolate_page(page)) {
32
+ spin_unlock_irqrestore(&zone->lock, flags);
33
+ return -EBUSY;
34
+ }
3835
39
- pfn = page_to_pfn(page);
40
- arg.start_pfn = pfn;
41
- arg.nr_pages = pageblock_nr_pages;
42
- arg.pages_found = 0;
43
-
44
- /*
45
- * It may be possible to isolate a pageblock even if the
46
- * migratetype is not MIGRATE_MOVABLE. The memory isolation
47
- * notifier chain is used by balloon drivers to return the
48
- * number of pages in a range that are held by the balloon
49
- * driver to shrink memory. If all the pages are accounted for
50
- * by balloons, are free, or on the LRU, isolation can continue.
51
- * Later, for example, when memory hotplug notifier runs, these
52
- * pages reported as "can be isolated" should be isolated(freed)
53
- * by the balloon driver through the memory notifier chain.
54
- */
55
- notifier_ret = memory_isolate_notify(MEM_ISOLATE_COUNT, &arg);
56
- notifier_ret = notifier_to_errno(notifier_ret);
57
- if (notifier_ret)
58
- goto out;
5936 /*
6037 * FIXME: Now, memory hotplug doesn't call shrink_slab() by itself.
6138 * We just check MOVABLE pages.
6239 */
63
- if (!has_unmovable_pages(zone, page, arg.pages_found, migratetype,
64
- skip_hwpoisoned_pages))
65
- ret = 0;
66
-
67
- /*
68
- * immobile means "not-on-lru" pages. If immobile is larger than
69
- * removable-by-driver pages reported by notifier, we'll fail.
70
- */
71
-
72
-out:
73
- if (!ret) {
40
+ unmovable = has_unmovable_pages(zone, page, migratetype, isol_flags);
41
+ if (!unmovable) {
7442 unsigned long nr_pages;
7543 int mt = get_pageblock_migratetype(page);
7644
....@@ -80,12 +48,20 @@
8048 NULL);
8149
8250 __mod_zone_freepage_state(zone, -nr_pages, mt);
51
+ spin_unlock_irqrestore(&zone->lock, flags);
52
+ return 0;
8353 }
8454
8555 spin_unlock_irqrestore(&zone->lock, flags);
86
- if (!ret)
87
- drain_all_pages(zone);
88
- return ret;
56
+ if (isol_flags & REPORT_FAILURE) {
57
+ /*
58
+ * printk() with zone->lock held will likely trigger a
59
+ * lockdep splat, so defer it here.
60
+ */
61
+ dump_page(unmovable, "unmovable page");
62
+ }
63
+
64
+ return -EBUSY;
8965 }
9066
9167 static void unset_migratetype_isolate(struct page *page, unsigned migratetype)
....@@ -111,7 +87,7 @@
11187 * these pages to be merged.
11288 */
11389 if (PageBuddy(page)) {
114
- order = page_order(page);
90
+ order = buddy_order(page);
11591 if (order >= pageblock_order) {
11692 pfn = page_to_pfn(page);
11793 buddy_pfn = __find_buddy_pfn(pfn, order);
....@@ -129,19 +105,22 @@
129105 * If we isolate freepage with more than pageblock_order, there
130106 * should be no freepage in the range, so we could avoid costly
131107 * pageblock scanning for freepage moving.
108
+ *
109
+ * We didn't actually touch any of the isolated pages, so place them
110
+ * to the tail of the freelist. This is an optimization for memory
111
+ * onlining - just onlined memory won't immediately be considered for
112
+ * allocation.
132113 */
133114 if (!isolated_page) {
134115 nr_pages = move_freepages_block(zone, page, migratetype, NULL);
135116 __mod_zone_freepage_state(zone, nr_pages, migratetype);
136117 }
137118 set_pageblock_migratetype(page, migratetype);
119
+ if (isolated_page)
120
+ __putback_isolated_page(page, order, migratetype);
138121 zone->nr_isolate_pageblock--;
139122 out:
140123 spin_unlock_irqrestore(&zone->lock, flags);
141
- if (isolated_page) {
142
- post_alloc_hook(page, order, __GFP_MOVABLE);
143
- __free_pages(page, order);
144
- }
145124 }
146125
147126 static inline struct page *
....@@ -152,8 +131,6 @@
152131 for (i = 0; i < nr_pages; i++) {
153132 struct page *page;
154133
155
- if (!pfn_valid_within(pfn + i))
156
- continue;
157134 page = pfn_to_online_page(pfn + i);
158135 if (!page)
159136 continue;
....@@ -162,30 +139,50 @@
162139 return NULL;
163140 }
164141
165
-/*
166
- * start_isolate_page_range() -- make page-allocation-type of range of pages
167
- * to be MIGRATE_ISOLATE.
168
- * @start_pfn: The lower PFN of the range to be isolated.
169
- * @end_pfn: The upper PFN of the range to be isolated.
170
- * @migratetype: migrate type to set in error recovery.
142
+/**
143
+ * start_isolate_page_range() - make page-allocation-type of range of pages to
144
+ * be MIGRATE_ISOLATE.
145
+ * @start_pfn: The lower PFN of the range to be isolated.
146
+ * @end_pfn: The upper PFN of the range to be isolated.
147
+ * start_pfn/end_pfn must be aligned to pageblock_order.
148
+ * @migratetype: Migrate type to set in error recovery.
149
+ * @flags: The following flags are allowed (they can be combined in
150
+ * a bit mask)
151
+ * MEMORY_OFFLINE - isolate to offline (!allocate) memory
152
+ * e.g., skip over PageHWPoison() pages
153
+ * and PageOffline() pages.
154
+ * REPORT_FAILURE - report details about the failure to
155
+ * isolate the range
171156 *
172157 * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in
173158 * the range will never be allocated. Any free pages and pages freed in the
174
- * future will not be allocated again.
175
- *
176
- * start_pfn/end_pfn must be aligned to pageblock_order.
177
- * Return 0 on success and -EBUSY if any part of range cannot be isolated.
159
+ * future will not be allocated again. If specified range includes migrate types
160
+ * other than MOVABLE or CMA, this will fail with -EBUSY. For isolating all
161
+ * pages in the range finally, the caller have to free all pages in the range.
162
+ * test_page_isolated() can be used for test it.
178163 *
179164 * There is no high level synchronization mechanism that prevents two threads
180
- * from trying to isolate overlapping ranges. If this happens, one thread
165
+ * from trying to isolate overlapping ranges. If this happens, one thread
181166 * will notice pageblocks in the overlapping range already set to isolate.
182167 * This happens in set_migratetype_isolate, and set_migratetype_isolate
183
- * returns an error. We then clean up by restoring the migration type on
184
- * pageblocks we may have modified and return -EBUSY to caller. This
168
+ * returns an error. We then clean up by restoring the migration type on
169
+ * pageblocks we may have modified and return -EBUSY to caller. This
185170 * prevents two threads from simultaneously working on overlapping ranges.
171
+ *
172
+ * Please note that there is no strong synchronization with the page allocator
173
+ * either. Pages might be freed while their page blocks are marked ISOLATED.
174
+ * A call to drain_all_pages() after isolation can flush most of them. However
175
+ * in some cases pages might still end up on pcp lists and that would allow
176
+ * for their allocation even when they are in fact isolated already. Depending
177
+ * on how strong of a guarantee the caller needs, further drain_all_pages()
178
+ * might be needed (e.g. __offline_pages will need to call it after check for
179
+ * isolated range for a next retry).
180
+ *
181
+ * Return: 0 on success and -EBUSY if any part of range cannot be isolated.
186182 */
187183 int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
188
- unsigned migratetype, bool skip_hwpoisoned_pages)
184
+ unsigned migratetype, int flags,
185
+ unsigned long *failed_pfn)
189186 {
190187 unsigned long pfn;
191188 unsigned long undo_pfn;
....@@ -198,10 +195,13 @@
198195 pfn < end_pfn;
199196 pfn += pageblock_nr_pages) {
200197 page = __first_valid_page(pfn, pageblock_nr_pages);
201
- if (page &&
202
- set_migratetype_isolate(page, migratetype, skip_hwpoisoned_pages)) {
203
- undo_pfn = pfn;
204
- goto undo;
198
+ if (page) {
199
+ if (set_migratetype_isolate(page, migratetype, flags)) {
200
+ undo_pfn = pfn;
201
+ if (failed_pfn)
202
+ *failed_pfn = page_to_pfn(page);
203
+ goto undo;
204
+ }
205205 }
206206 }
207207 return 0;
....@@ -221,7 +221,7 @@
221221 /*
222222 * Make isolated pages available again.
223223 */
224
-int undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
224
+void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
225225 unsigned migratetype)
226226 {
227227 unsigned long pfn;
....@@ -238,7 +238,6 @@
238238 continue;
239239 unset_migratetype_isolate(page, migratetype);
240240 }
241
- return 0;
242241 }
243242 /*
244243 * Test all pages in the range is free(means isolated) or not.
....@@ -249,7 +248,7 @@
249248 */
250249 static unsigned long
251250 __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn,
252
- bool skip_hwpoisoned_pages)
251
+ int flags)
253252 {
254253 struct page *page;
255254
....@@ -265,14 +264,22 @@
265264 * the correct MIGRATE_ISOLATE freelist. There is no
266265 * simple way to verify that as VM_BUG_ON(), though.
267266 */
268
- pfn += 1 << page_order(page);
269
- else if (skip_hwpoisoned_pages && PageHWPoison(page))
267
+ pfn += 1 << buddy_order(page);
268
+ else if ((flags & MEMORY_OFFLINE) && PageHWPoison(page))
270269 /* A HWPoisoned page cannot be also PageBuddy */
270
+ pfn++;
271
+ else if ((flags & MEMORY_OFFLINE) && PageOffline(page) &&
272
+ !page_count(page))
273
+ /*
274
+ * The responsible driver agreed to skip PageOffline()
275
+ * pages when offlining memory by dropping its
276
+ * reference in MEM_GOING_OFFLINE.
277
+ */
271278 pfn++;
272279 else
273280 break;
274281 }
275
-#ifdef CONFIG_ARCH_ROCKCHIP
282
+#ifdef CONFIG_NO_GKI
276283 if (pfn < end_pfn)
277284 dump_page_owner(page);
278285 #endif
....@@ -282,7 +289,7 @@
282289
283290 /* Caller should ensure that requested range is in a single zone */
284291 int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
285
- bool skip_hwpoisoned_pages)
292
+ int isol_flags, unsigned long *failed_pfn)
286293 {
287294 unsigned long pfn, flags;
288295 struct page *page;
....@@ -304,16 +311,16 @@
304311 /* Check all pages are free or marked as ISOLATED */
305312 zone = page_zone(page);
306313 spin_lock_irqsave(&zone->lock, flags);
307
- pfn = __test_page_isolated_in_pageblock(start_pfn, end_pfn,
308
- skip_hwpoisoned_pages);
314
+ pfn = __test_page_isolated_in_pageblock(start_pfn, end_pfn, isol_flags);
309315 spin_unlock_irqrestore(&zone->lock, flags);
310316
311317 trace_test_pages_isolated(start_pfn, end_pfn, pfn);
318
+ if (pfn < end_pfn) {
319
+ page_pinner_failure_detect(pfn_to_page(pfn));
320
+ if (failed_pfn)
321
+ *failed_pfn = pfn;
322
+ return -EBUSY;
323
+ }
312324
313
- return pfn < end_pfn ? -EBUSY : 0;
314
-}
315
-
316
-struct page *alloc_migrate_target(struct page *page, unsigned long private)
317
-{
318
- return new_page_nodemask(page, numa_node_id(), &node_states[N_MEMORY]);
325
+ return 0;
319326 }