| .. | .. |
|---|
| 15 | 15 | #define CREATE_TRACE_POINTS |
|---|
| 16 | 16 | #include <trace/events/page_isolation.h> |
|---|
| 17 | 17 | |
|---|
| 18 | | -static int set_migratetype_isolate(struct page *page, int migratetype, |
|---|
| 19 | | - bool skip_hwpoisoned_pages) |
|---|
| 18 | +static int set_migratetype_isolate(struct page *page, int migratetype, int isol_flags) |
|---|
| 20 | 19 | { |
|---|
| 21 | | - struct zone *zone; |
|---|
| 22 | | - unsigned long flags, pfn; |
|---|
| 23 | | - struct memory_isolate_notify arg; |
|---|
| 24 | | - int notifier_ret; |
|---|
| 25 | | - int ret = -EBUSY; |
|---|
| 26 | | - |
|---|
| 27 | | - zone = page_zone(page); |
|---|
| 20 | + struct zone *zone = page_zone(page); |
|---|
| 21 | + struct page *unmovable; |
|---|
| 22 | + unsigned long flags; |
|---|
| 28 | 23 | |
|---|
| 29 | 24 | spin_lock_irqsave(&zone->lock, flags); |
|---|
| 30 | 25 | |
|---|
| 31 | 26 | /* |
|---|
| 32 | 27 | * We assume the caller intended to SET migrate type to isolate. |
|---|
| 33 | 28 | * If it is already set, then someone else must have raced and |
|---|
| 34 | | - * set it before us. Return -EBUSY |
|---|
| 29 | + * set it before us. |
|---|
| 35 | 30 | */ |
|---|
| 36 | | - if (is_migrate_isolate_page(page)) |
|---|
| 37 | | - goto out; |
|---|
| 31 | + if (is_migrate_isolate_page(page)) { |
|---|
| 32 | + spin_unlock_irqrestore(&zone->lock, flags); |
|---|
| 33 | + return -EBUSY; |
|---|
| 34 | + } |
|---|
| 38 | 35 | |
|---|
| 39 | | - pfn = page_to_pfn(page); |
|---|
| 40 | | - arg.start_pfn = pfn; |
|---|
| 41 | | - arg.nr_pages = pageblock_nr_pages; |
|---|
| 42 | | - arg.pages_found = 0; |
|---|
| 43 | | - |
|---|
| 44 | | - /* |
|---|
| 45 | | - * It may be possible to isolate a pageblock even if the |
|---|
| 46 | | - * migratetype is not MIGRATE_MOVABLE. The memory isolation |
|---|
| 47 | | - * notifier chain is used by balloon drivers to return the |
|---|
| 48 | | - * number of pages in a range that are held by the balloon |
|---|
| 49 | | - * driver to shrink memory. If all the pages are accounted for |
|---|
| 50 | | - * by balloons, are free, or on the LRU, isolation can continue. |
|---|
| 51 | | - * Later, for example, when memory hotplug notifier runs, these |
|---|
| 52 | | - * pages reported as "can be isolated" should be isolated(freed) |
|---|
| 53 | | - * by the balloon driver through the memory notifier chain. |
|---|
| 54 | | - */ |
|---|
| 55 | | - notifier_ret = memory_isolate_notify(MEM_ISOLATE_COUNT, &arg); |
|---|
| 56 | | - notifier_ret = notifier_to_errno(notifier_ret); |
|---|
| 57 | | - if (notifier_ret) |
|---|
| 58 | | - goto out; |
|---|
| 59 | 36 | /* |
|---|
| 60 | 37 | * FIXME: Now, memory hotplug doesn't call shrink_slab() by itself. |
|---|
| 61 | 38 | * We just check MOVABLE pages. |
|---|
| 62 | 39 | */ |
|---|
| 63 | | - if (!has_unmovable_pages(zone, page, arg.pages_found, migratetype, |
|---|
| 64 | | - skip_hwpoisoned_pages)) |
|---|
| 65 | | - ret = 0; |
|---|
| 66 | | - |
|---|
| 67 | | - /* |
|---|
| 68 | | - * immobile means "not-on-lru" pages. If immobile is larger than |
|---|
| 69 | | - * removable-by-driver pages reported by notifier, we'll fail. |
|---|
| 70 | | - */ |
|---|
| 71 | | - |
|---|
| 72 | | -out: |
|---|
| 73 | | - if (!ret) { |
|---|
| 40 | + unmovable = has_unmovable_pages(zone, page, migratetype, isol_flags); |
|---|
| 41 | + if (!unmovable) { |
|---|
| 74 | 42 | unsigned long nr_pages; |
|---|
| 75 | 43 | int mt = get_pageblock_migratetype(page); |
|---|
| 76 | 44 | |
|---|
| .. | .. |
|---|
| 80 | 48 | NULL); |
|---|
| 81 | 49 | |
|---|
| 82 | 50 | __mod_zone_freepage_state(zone, -nr_pages, mt); |
|---|
| 51 | + spin_unlock_irqrestore(&zone->lock, flags); |
|---|
| 52 | + return 0; |
|---|
| 83 | 53 | } |
|---|
| 84 | 54 | |
|---|
| 85 | 55 | spin_unlock_irqrestore(&zone->lock, flags); |
|---|
| 86 | | - if (!ret) |
|---|
| 87 | | - drain_all_pages(zone); |
|---|
| 88 | | - return ret; |
|---|
| 56 | + if (isol_flags & REPORT_FAILURE) { |
|---|
| 57 | + /* |
|---|
| 58 | + * printk() with zone->lock held will likely trigger a |
|---|
| 59 | + * lockdep splat, so defer it here. |
|---|
| 60 | + */ |
|---|
| 61 | + dump_page(unmovable, "unmovable page"); |
|---|
| 62 | + } |
|---|
| 63 | + |
|---|
| 64 | + return -EBUSY; |
|---|
| 89 | 65 | } |
|---|
| 90 | 66 | |
|---|
| 91 | 67 | static void unset_migratetype_isolate(struct page *page, unsigned migratetype) |
|---|
| .. | .. |
|---|
| 111 | 87 | * these pages to be merged. |
|---|
| 112 | 88 | */ |
|---|
| 113 | 89 | if (PageBuddy(page)) { |
|---|
| 114 | | - order = page_order(page); |
|---|
| 90 | + order = buddy_order(page); |
|---|
| 115 | 91 | if (order >= pageblock_order) { |
|---|
| 116 | 92 | pfn = page_to_pfn(page); |
|---|
| 117 | 93 | buddy_pfn = __find_buddy_pfn(pfn, order); |
|---|
| .. | .. |
|---|
| 129 | 105 | * If we isolate freepage with more than pageblock_order, there |
|---|
| 130 | 106 | * should be no freepage in the range, so we could avoid costly |
|---|
| 131 | 107 | * pageblock scanning for freepage moving. |
|---|
| 108 | + * |
|---|
| 109 | + * We didn't actually touch any of the isolated pages, so place them |
|---|
| 110 | + * to the tail of the freelist. This is an optimization for memory |
|---|
| 111 | + * onlining - just onlined memory won't immediately be considered for |
|---|
| 112 | + * allocation. |
|---|
| 132 | 113 | */ |
|---|
| 133 | 114 | if (!isolated_page) { |
|---|
| 134 | 115 | nr_pages = move_freepages_block(zone, page, migratetype, NULL); |
|---|
| 135 | 116 | __mod_zone_freepage_state(zone, nr_pages, migratetype); |
|---|
| 136 | 117 | } |
|---|
| 137 | 118 | set_pageblock_migratetype(page, migratetype); |
|---|
| 119 | + if (isolated_page) |
|---|
| 120 | + __putback_isolated_page(page, order, migratetype); |
|---|
| 138 | 121 | zone->nr_isolate_pageblock--; |
|---|
| 139 | 122 | out: |
|---|
| 140 | 123 | spin_unlock_irqrestore(&zone->lock, flags); |
|---|
| 141 | | - if (isolated_page) { |
|---|
| 142 | | - post_alloc_hook(page, order, __GFP_MOVABLE); |
|---|
| 143 | | - __free_pages(page, order); |
|---|
| 144 | | - } |
|---|
| 145 | 124 | } |
|---|
| 146 | 125 | |
|---|
| 147 | 126 | static inline struct page * |
|---|
| .. | .. |
|---|
| 152 | 131 | for (i = 0; i < nr_pages; i++) { |
|---|
| 153 | 132 | struct page *page; |
|---|
| 154 | 133 | |
|---|
| 155 | | - if (!pfn_valid_within(pfn + i)) |
|---|
| 156 | | - continue; |
|---|
| 157 | 134 | page = pfn_to_online_page(pfn + i); |
|---|
| 158 | 135 | if (!page) |
|---|
| 159 | 136 | continue; |
|---|
| .. | .. |
|---|
| 162 | 139 | return NULL; |
|---|
| 163 | 140 | } |
|---|
| 164 | 141 | |
|---|
| 165 | | -/* |
|---|
| 166 | | - * start_isolate_page_range() -- make page-allocation-type of range of pages |
|---|
| 167 | | - * to be MIGRATE_ISOLATE. |
|---|
| 168 | | - * @start_pfn: The lower PFN of the range to be isolated. |
|---|
| 169 | | - * @end_pfn: The upper PFN of the range to be isolated. |
|---|
| 170 | | - * @migratetype: migrate type to set in error recovery. |
|---|
| 142 | +/** |
|---|
| 143 | + * start_isolate_page_range() - make page-allocation-type of range of pages to |
|---|
| 144 | + * be MIGRATE_ISOLATE. |
|---|
| 145 | + * @start_pfn: The lower PFN of the range to be isolated. |
|---|
| 146 | + * @end_pfn: The upper PFN of the range to be isolated. |
|---|
| 147 | + * start_pfn/end_pfn must be aligned to pageblock_order. |
|---|
| 148 | + * @migratetype: Migrate type to set in error recovery. |
|---|
| 149 | + * @flags: The following flags are allowed (they can be combined in |
|---|
| 150 | + * a bit mask) |
|---|
| 151 | + * MEMORY_OFFLINE - isolate to offline (!allocate) memory |
|---|
| 152 | + * e.g., skip over PageHWPoison() pages |
|---|
| 153 | + * and PageOffline() pages. |
|---|
| 154 | + * REPORT_FAILURE - report details about the failure to |
|---|
| 155 | + * isolate the range |
|---|
| 171 | 156 | * |
|---|
| 172 | 157 | * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in |
|---|
| 173 | 158 | * the range will never be allocated. Any free pages and pages freed in the |
|---|
| 174 | | - * future will not be allocated again. |
|---|
| 175 | | - * |
|---|
| 176 | | - * start_pfn/end_pfn must be aligned to pageblock_order. |
|---|
| 177 | | - * Return 0 on success and -EBUSY if any part of range cannot be isolated. |
|---|
| 159 | + * future will not be allocated again. If specified range includes migrate types |
|---|
| 160 | + * other than MOVABLE or CMA, this will fail with -EBUSY. For isolating all |
|---|
| 161 | + * pages in the range finally, the caller have to free all pages in the range. |
|---|
| 162 | + * test_page_isolated() can be used for test it. |
|---|
| 178 | 163 | * |
|---|
| 179 | 164 | * There is no high level synchronization mechanism that prevents two threads |
|---|
| 180 | | - * from trying to isolate overlapping ranges. If this happens, one thread |
|---|
| 165 | + * from trying to isolate overlapping ranges. If this happens, one thread |
|---|
| 181 | 166 | * will notice pageblocks in the overlapping range already set to isolate. |
|---|
| 182 | 167 | * This happens in set_migratetype_isolate, and set_migratetype_isolate |
|---|
| 183 | | - * returns an error. We then clean up by restoring the migration type on |
|---|
| 184 | | - * pageblocks we may have modified and return -EBUSY to caller. This |
|---|
| 168 | + * returns an error. We then clean up by restoring the migration type on |
|---|
| 169 | + * pageblocks we may have modified and return -EBUSY to caller. This |
|---|
| 185 | 170 | * prevents two threads from simultaneously working on overlapping ranges. |
|---|
| 171 | + * |
|---|
| 172 | + * Please note that there is no strong synchronization with the page allocator |
|---|
| 173 | + * either. Pages might be freed while their page blocks are marked ISOLATED. |
|---|
| 174 | + * A call to drain_all_pages() after isolation can flush most of them. However |
|---|
| 175 | + * in some cases pages might still end up on pcp lists and that would allow |
|---|
| 176 | + * for their allocation even when they are in fact isolated already. Depending |
|---|
| 177 | + * on how strong of a guarantee the caller needs, further drain_all_pages() |
|---|
| 178 | + * might be needed (e.g. __offline_pages will need to call it after check for |
|---|
| 179 | + * isolated range for a next retry). |
|---|
| 180 | + * |
|---|
| 181 | + * Return: 0 on success and -EBUSY if any part of range cannot be isolated. |
|---|
| 186 | 182 | */ |
|---|
| 187 | 183 | int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, |
|---|
| 188 | | - unsigned migratetype, bool skip_hwpoisoned_pages) |
|---|
| 184 | + unsigned migratetype, int flags, |
|---|
| 185 | + unsigned long *failed_pfn) |
|---|
| 189 | 186 | { |
|---|
| 190 | 187 | unsigned long pfn; |
|---|
| 191 | 188 | unsigned long undo_pfn; |
|---|
| .. | .. |
|---|
| 198 | 195 | pfn < end_pfn; |
|---|
| 199 | 196 | pfn += pageblock_nr_pages) { |
|---|
| 200 | 197 | page = __first_valid_page(pfn, pageblock_nr_pages); |
|---|
| 201 | | - if (page && |
|---|
| 202 | | - set_migratetype_isolate(page, migratetype, skip_hwpoisoned_pages)) { |
|---|
| 203 | | - undo_pfn = pfn; |
|---|
| 204 | | - goto undo; |
|---|
| 198 | + if (page) { |
|---|
| 199 | + if (set_migratetype_isolate(page, migratetype, flags)) { |
|---|
| 200 | + undo_pfn = pfn; |
|---|
| 201 | + if (failed_pfn) |
|---|
| 202 | + *failed_pfn = page_to_pfn(page); |
|---|
| 203 | + goto undo; |
|---|
| 204 | + } |
|---|
| 205 | 205 | } |
|---|
| 206 | 206 | } |
|---|
| 207 | 207 | return 0; |
|---|
| .. | .. |
|---|
| 221 | 221 | /* |
|---|
| 222 | 222 | * Make isolated pages available again. |
|---|
| 223 | 223 | */ |
|---|
| 224 | | -int undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, |
|---|
| 224 | +void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, |
|---|
| 225 | 225 | unsigned migratetype) |
|---|
| 226 | 226 | { |
|---|
| 227 | 227 | unsigned long pfn; |
|---|
| .. | .. |
|---|
| 238 | 238 | continue; |
|---|
| 239 | 239 | unset_migratetype_isolate(page, migratetype); |
|---|
| 240 | 240 | } |
|---|
| 241 | | - return 0; |
|---|
| 242 | 241 | } |
|---|
| 243 | 242 | /* |
|---|
| 244 | 243 | * Test all pages in the range is free(means isolated) or not. |
|---|
| .. | .. |
|---|
| 249 | 248 | */ |
|---|
| 250 | 249 | static unsigned long |
|---|
| 251 | 250 | __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn, |
|---|
| 252 | | - bool skip_hwpoisoned_pages) |
|---|
| 251 | + int flags) |
|---|
| 253 | 252 | { |
|---|
| 254 | 253 | struct page *page; |
|---|
| 255 | 254 | |
|---|
| .. | .. |
|---|
| 265 | 264 | * the correct MIGRATE_ISOLATE freelist. There is no |
|---|
| 266 | 265 | * simple way to verify that as VM_BUG_ON(), though. |
|---|
| 267 | 266 | */ |
|---|
| 268 | | - pfn += 1 << page_order(page); |
|---|
| 269 | | - else if (skip_hwpoisoned_pages && PageHWPoison(page)) |
|---|
| 267 | + pfn += 1 << buddy_order(page); |
|---|
| 268 | + else if ((flags & MEMORY_OFFLINE) && PageHWPoison(page)) |
|---|
| 270 | 269 | /* A HWPoisoned page cannot be also PageBuddy */ |
|---|
| 270 | + pfn++; |
|---|
| 271 | + else if ((flags & MEMORY_OFFLINE) && PageOffline(page) && |
|---|
| 272 | + !page_count(page)) |
|---|
| 273 | + /* |
|---|
| 274 | + * The responsible driver agreed to skip PageOffline() |
|---|
| 275 | + * pages when offlining memory by dropping its |
|---|
| 276 | + * reference in MEM_GOING_OFFLINE. |
|---|
| 277 | + */ |
|---|
| 271 | 278 | pfn++; |
|---|
| 272 | 279 | else |
|---|
| 273 | 280 | break; |
|---|
| 274 | 281 | } |
|---|
| 275 | | -#ifdef CONFIG_ARCH_ROCKCHIP |
|---|
| 282 | +#ifdef CONFIG_NO_GKI |
|---|
| 276 | 283 | if (pfn < end_pfn) |
|---|
| 277 | 284 | dump_page_owner(page); |
|---|
| 278 | 285 | #endif |
|---|
| .. | .. |
|---|
| 282 | 289 | |
|---|
| 283 | 290 | /* Caller should ensure that requested range is in a single zone */ |
|---|
| 284 | 291 | int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, |
|---|
| 285 | | - bool skip_hwpoisoned_pages) |
|---|
| 292 | + int isol_flags, unsigned long *failed_pfn) |
|---|
| 286 | 293 | { |
|---|
| 287 | 294 | unsigned long pfn, flags; |
|---|
| 288 | 295 | struct page *page; |
|---|
| .. | .. |
|---|
| 304 | 311 | /* Check all pages are free or marked as ISOLATED */ |
|---|
| 305 | 312 | zone = page_zone(page); |
|---|
| 306 | 313 | spin_lock_irqsave(&zone->lock, flags); |
|---|
| 307 | | - pfn = __test_page_isolated_in_pageblock(start_pfn, end_pfn, |
|---|
| 308 | | - skip_hwpoisoned_pages); |
|---|
| 314 | + pfn = __test_page_isolated_in_pageblock(start_pfn, end_pfn, isol_flags); |
|---|
| 309 | 315 | spin_unlock_irqrestore(&zone->lock, flags); |
|---|
| 310 | 316 | |
|---|
| 311 | 317 | trace_test_pages_isolated(start_pfn, end_pfn, pfn); |
|---|
| 318 | + if (pfn < end_pfn) { |
|---|
| 319 | + page_pinner_failure_detect(pfn_to_page(pfn)); |
|---|
| 320 | + if (failed_pfn) |
|---|
| 321 | + *failed_pfn = pfn; |
|---|
| 322 | + return -EBUSY; |
|---|
| 323 | + } |
|---|
| 312 | 324 | |
|---|
| 313 | | - return pfn < end_pfn ? -EBUSY : 0; |
|---|
| 314 | | -} |
|---|
| 315 | | - |
|---|
| 316 | | -struct page *alloc_migrate_target(struct page *page, unsigned long private) |
|---|
| 317 | | -{ |
|---|
| 318 | | - return new_page_nodemask(page, numa_node_id(), &node_states[N_MEMORY]); |
|---|
| 325 | + return 0; |
|---|
| 319 | 326 | } |
|---|