.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * Copyright (C) 2008, 2009 Intel Corporation |
---|
3 | 4 | * Authors: Andi Kleen, Fengguang Wu |
---|
4 | | - * |
---|
5 | | - * This software may be redistributed and/or modified under the terms of |
---|
6 | | - * the GNU General Public License ("GPL") version 2 only as published by the |
---|
7 | | - * Free Software Foundation. |
---|
8 | 5 | * |
---|
9 | 6 | * High level machine check handler. Handles pages reported by the |
---|
10 | 7 | * hardware as being corrupted usually due to a multi-bit ECC memory or cache |
---|
.. | .. |
---|
67 | 64 | int sysctl_memory_failure_recovery __read_mostly = 1; |
---|
68 | 65 | |
---|
69 | 66 | atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0); |
---|
| 67 | + |
---|
| 68 | +static bool page_handle_poison(struct page *page, bool hugepage_or_freepage, bool release) |
---|
| 69 | +{ |
---|
| 70 | + if (hugepage_or_freepage) { |
---|
| 71 | + /* |
---|
| 72 | + * Doing this check for free pages is also fine since dissolve_free_huge_page |
---|
| 73 | + * returns 0 for non-hugetlb pages as well. |
---|
| 74 | + */ |
---|
| 75 | + if (dissolve_free_huge_page(page) || !take_page_off_buddy(page)) |
---|
| 76 | + /* |
---|
| 77 | + * We could fail to take off the target page from buddy |
---|
| 78 | + * for example due to racy page allocaiton, but that's |
---|
| 79 | + * acceptable because soft-offlined page is not broken |
---|
| 80 | + * and if someone really want to use it, they should |
---|
| 81 | + * take it. |
---|
| 82 | + */ |
---|
| 83 | + return false; |
---|
| 84 | + } |
---|
| 85 | + |
---|
| 86 | + SetPageHWPoison(page); |
---|
| 87 | + if (release) |
---|
| 88 | + put_page(page); |
---|
| 89 | + page_ref_inc(page); |
---|
| 90 | + num_poisoned_pages_inc(); |
---|
| 91 | + |
---|
| 92 | + return true; |
---|
| 93 | +} |
---|
70 | 94 | |
---|
71 | 95 | #if defined(CONFIG_HWPOISON_INJECT) || defined(CONFIG_HWPOISON_INJECT_MODULE) |
---|
72 | 96 | |
---|
.. | .. |
---|
213 | 237 | { |
---|
214 | 238 | struct task_struct *t = tk->tsk; |
---|
215 | 239 | short addr_lsb = tk->size_shift; |
---|
216 | | - int ret; |
---|
| 240 | + int ret = 0; |
---|
217 | 241 | |
---|
218 | | - pr_err("Memory failure: %#lx: Killing %s:%d due to hardware memory corruption\n", |
---|
219 | | - pfn, t->comm, t->pid); |
---|
| 242 | + pr_err("Memory failure: %#lx: Sending SIGBUS to %s:%d due to hardware memory corruption\n", |
---|
| 243 | + pfn, t->comm, t->pid); |
---|
220 | 244 | |
---|
221 | | - if ((flags & MF_ACTION_REQUIRED) && t->mm == current->mm) { |
---|
222 | | - ret = force_sig_mceerr(BUS_MCEERR_AR, (void __user *)tk->addr, |
---|
223 | | - addr_lsb, current); |
---|
| 245 | + if (flags & MF_ACTION_REQUIRED) { |
---|
| 246 | + WARN_ON_ONCE(t != current); |
---|
| 247 | + ret = force_sig_mceerr(BUS_MCEERR_AR, |
---|
| 248 | + (void __user *)tk->addr, addr_lsb); |
---|
224 | 249 | } else { |
---|
225 | 250 | /* |
---|
226 | 251 | * Don't use force here, it's convenient if the signal |
---|
.. | .. |
---|
306 | 331 | /* |
---|
307 | 332 | * Schedule a process for later kill. |
---|
308 | 333 | * Uses GFP_ATOMIC allocations to avoid potential recursions in the VM. |
---|
309 | | - * TBD would GFP_NOIO be enough? |
---|
310 | 334 | */ |
---|
311 | 335 | static void add_to_kill(struct task_struct *tsk, struct page *p, |
---|
312 | 336 | struct vm_area_struct *vma, |
---|
313 | | - struct list_head *to_kill, |
---|
314 | | - struct to_kill **tkc) |
---|
| 337 | + struct list_head *to_kill) |
---|
315 | 338 | { |
---|
316 | 339 | struct to_kill *tk; |
---|
317 | 340 | |
---|
318 | | - if (*tkc) { |
---|
319 | | - tk = *tkc; |
---|
320 | | - *tkc = NULL; |
---|
321 | | - } else { |
---|
322 | | - tk = kmalloc(sizeof(struct to_kill), GFP_ATOMIC); |
---|
323 | | - if (!tk) { |
---|
324 | | - pr_err("Memory failure: Out of memory while machine check handling\n"); |
---|
325 | | - return; |
---|
326 | | - } |
---|
| 341 | + tk = kmalloc(sizeof(struct to_kill), GFP_ATOMIC); |
---|
| 342 | + if (!tk) { |
---|
| 343 | + pr_err("Memory failure: Out of memory while machine check handling\n"); |
---|
| 344 | + return; |
---|
327 | 345 | } |
---|
| 346 | + |
---|
328 | 347 | tk->addr = page_address_in_vma(p, vma); |
---|
329 | 348 | if (is_zone_device_page(p)) |
---|
330 | 349 | tk->size_shift = dev_pagemap_mapping_shift(p, vma); |
---|
331 | 350 | else |
---|
332 | | - tk->size_shift = compound_order(compound_head(p)) + PAGE_SHIFT; |
---|
| 351 | + tk->size_shift = page_shift(compound_head(p)); |
---|
333 | 352 | |
---|
334 | 353 | /* |
---|
335 | 354 | * Send SIGKILL if "tk->addr == -EFAULT". Also, as |
---|
.. | .. |
---|
348 | 367 | kfree(tk); |
---|
349 | 368 | return; |
---|
350 | 369 | } |
---|
| 370 | + |
---|
351 | 371 | get_task_struct(tsk); |
---|
352 | 372 | tk->tsk = tsk; |
---|
353 | 373 | list_add_tail(&tk->nd, to_kill); |
---|
.. | .. |
---|
407 | 427 | { |
---|
408 | 428 | struct task_struct *t; |
---|
409 | 429 | |
---|
410 | | - for_each_thread(tsk, t) |
---|
411 | | - if ((t->flags & PF_MCE_PROCESS) && (t->flags & PF_MCE_EARLY)) |
---|
412 | | - return t; |
---|
| 430 | + for_each_thread(tsk, t) { |
---|
| 431 | + if (t->flags & PF_MCE_PROCESS) { |
---|
| 432 | + if (t->flags & PF_MCE_EARLY) |
---|
| 433 | + return t; |
---|
| 434 | + } else { |
---|
| 435 | + if (sysctl_memory_failure_early_kill) |
---|
| 436 | + return t; |
---|
| 437 | + } |
---|
| 438 | + } |
---|
413 | 439 | return NULL; |
---|
414 | 440 | } |
---|
415 | 441 | |
---|
.. | .. |
---|
418 | 444 | * to be signaled when some page under the process is hwpoisoned. |
---|
419 | 445 | * Return task_struct of the dedicated thread (main thread unless explicitly |
---|
420 | 446 | * specified) if the process is "early kill," and otherwise returns NULL. |
---|
| 447 | + * |
---|
| 448 | + * Note that the above is true for Action Optional case, but not for Action |
---|
| 449 | + * Required case where SIGBUS should sent only to the current thread. |
---|
421 | 450 | */ |
---|
422 | 451 | static struct task_struct *task_early_kill(struct task_struct *tsk, |
---|
423 | 452 | int force_early) |
---|
424 | 453 | { |
---|
425 | | - struct task_struct *t; |
---|
426 | 454 | if (!tsk->mm) |
---|
427 | 455 | return NULL; |
---|
428 | | - if (force_early) |
---|
429 | | - return tsk; |
---|
430 | | - t = find_early_kill_thread(tsk); |
---|
431 | | - if (t) |
---|
432 | | - return t; |
---|
433 | | - if (sysctl_memory_failure_early_kill) |
---|
434 | | - return tsk; |
---|
435 | | - return NULL; |
---|
| 456 | + if (force_early) { |
---|
| 457 | + /* |
---|
| 458 | + * Comparing ->mm here because current task might represent |
---|
| 459 | + * a subthread, while tsk always points to the main thread. |
---|
| 460 | + */ |
---|
| 461 | + if (tsk->mm == current->mm) |
---|
| 462 | + return current; |
---|
| 463 | + else |
---|
| 464 | + return NULL; |
---|
| 465 | + } |
---|
| 466 | + return find_early_kill_thread(tsk); |
---|
436 | 467 | } |
---|
437 | 468 | |
---|
438 | 469 | /* |
---|
439 | 470 | * Collect processes when the error hit an anonymous page. |
---|
440 | 471 | */ |
---|
441 | 472 | static void collect_procs_anon(struct page *page, struct list_head *to_kill, |
---|
442 | | - struct to_kill **tkc, int force_early) |
---|
| 473 | + int force_early) |
---|
443 | 474 | { |
---|
444 | 475 | struct vm_area_struct *vma; |
---|
445 | 476 | struct task_struct *tsk; |
---|
446 | 477 | struct anon_vma *av; |
---|
447 | 478 | pgoff_t pgoff; |
---|
448 | 479 | |
---|
449 | | - av = page_lock_anon_vma_read(page); |
---|
| 480 | + av = page_lock_anon_vma_read(page, NULL); |
---|
450 | 481 | if (av == NULL) /* Not actually mapped anymore */ |
---|
451 | 482 | return; |
---|
452 | 483 | |
---|
.. | .. |
---|
464 | 495 | if (!page_mapped_in_vma(page, vma)) |
---|
465 | 496 | continue; |
---|
466 | 497 | if (vma->vm_mm == t->mm) |
---|
467 | | - add_to_kill(t, page, vma, to_kill, tkc); |
---|
| 498 | + add_to_kill(t, page, vma, to_kill); |
---|
468 | 499 | } |
---|
469 | 500 | } |
---|
470 | 501 | read_unlock(&tasklist_lock); |
---|
.. | .. |
---|
475 | 506 | * Collect processes when the error hit a file mapped page. |
---|
476 | 507 | */ |
---|
477 | 508 | static void collect_procs_file(struct page *page, struct list_head *to_kill, |
---|
478 | | - struct to_kill **tkc, int force_early) |
---|
| 509 | + int force_early) |
---|
479 | 510 | { |
---|
480 | 511 | struct vm_area_struct *vma; |
---|
481 | 512 | struct task_struct *tsk; |
---|
482 | 513 | struct address_space *mapping = page->mapping; |
---|
| 514 | + pgoff_t pgoff; |
---|
483 | 515 | |
---|
484 | 516 | i_mmap_lock_read(mapping); |
---|
485 | 517 | read_lock(&tasklist_lock); |
---|
| 518 | + pgoff = page_to_pgoff(page); |
---|
486 | 519 | for_each_process(tsk) { |
---|
487 | | - pgoff_t pgoff = page_to_pgoff(page); |
---|
488 | 520 | struct task_struct *t = task_early_kill(tsk, force_early); |
---|
489 | 521 | |
---|
490 | 522 | if (!t) |
---|
.. | .. |
---|
499 | 531 | * to be informed of all such data corruptions. |
---|
500 | 532 | */ |
---|
501 | 533 | if (vma->vm_mm == t->mm) |
---|
502 | | - add_to_kill(t, page, vma, to_kill, tkc); |
---|
| 534 | + add_to_kill(t, page, vma, to_kill); |
---|
503 | 535 | } |
---|
504 | 536 | } |
---|
505 | 537 | read_unlock(&tasklist_lock); |
---|
.. | .. |
---|
508 | 540 | |
---|
509 | 541 | /* |
---|
510 | 542 | * Collect the processes who have the corrupted page mapped to kill. |
---|
511 | | - * This is done in two steps for locking reasons. |
---|
512 | | - * First preallocate one tokill structure outside the spin locks, |
---|
513 | | - * so that we can kill at least one process reasonably reliable. |
---|
514 | 543 | */ |
---|
515 | 544 | static void collect_procs(struct page *page, struct list_head *tokill, |
---|
516 | 545 | int force_early) |
---|
517 | 546 | { |
---|
518 | | - struct to_kill *tk; |
---|
519 | | - |
---|
520 | 547 | if (!page->mapping) |
---|
521 | 548 | return; |
---|
522 | 549 | |
---|
523 | | - tk = kmalloc(sizeof(struct to_kill), GFP_NOIO); |
---|
524 | | - if (!tk) |
---|
525 | | - return; |
---|
526 | 550 | if (PageAnon(page)) |
---|
527 | | - collect_procs_anon(page, tokill, &tk, force_early); |
---|
| 551 | + collect_procs_anon(page, tokill, force_early); |
---|
528 | 552 | else |
---|
529 | | - collect_procs_file(page, tokill, &tk, force_early); |
---|
530 | | - kfree(tk); |
---|
| 553 | + collect_procs_file(page, tokill, force_early); |
---|
531 | 554 | } |
---|
532 | 555 | |
---|
533 | 556 | static const char *action_name[] = { |
---|
.. | .. |
---|
559 | 582 | [MF_MSG_BUDDY] = "free buddy page", |
---|
560 | 583 | [MF_MSG_BUDDY_2ND] = "free buddy page (2nd try)", |
---|
561 | 584 | [MF_MSG_DAX] = "dax page", |
---|
| 585 | + [MF_MSG_UNSPLIT_THP] = "unsplit thp", |
---|
562 | 586 | [MF_MSG_UNKNOWN] = "unknown page", |
---|
563 | 587 | }; |
---|
564 | 588 | |
---|
.. | .. |
---|
829 | 853 | #define sc ((1UL << PG_swapcache) | (1UL << PG_swapbacked)) |
---|
830 | 854 | #define unevict (1UL << PG_unevictable) |
---|
831 | 855 | #define mlock (1UL << PG_mlocked) |
---|
832 | | -#define writeback (1UL << PG_writeback) |
---|
833 | 856 | #define lru (1UL << PG_lru) |
---|
834 | 857 | #define head (1UL << PG_head) |
---|
835 | 858 | #define slab (1UL << PG_slab) |
---|
.. | .. |
---|
878 | 901 | #undef sc |
---|
879 | 902 | #undef unevict |
---|
880 | 903 | #undef mlock |
---|
881 | | -#undef writeback |
---|
882 | 904 | #undef lru |
---|
883 | 905 | #undef head |
---|
884 | 906 | #undef slab |
---|
.. | .. |
---|
930 | 952 | * Return: return 0 if failed to grab the refcount, otherwise true (some |
---|
931 | 953 | * non-zero value.) |
---|
932 | 954 | */ |
---|
933 | | -int get_hwpoison_page(struct page *page) |
---|
| 955 | +static int get_hwpoison_page(struct page *page) |
---|
934 | 956 | { |
---|
935 | 957 | struct page *head = compound_head(page); |
---|
936 | 958 | |
---|
.. | .. |
---|
959 | 981 | |
---|
960 | 982 | return 0; |
---|
961 | 983 | } |
---|
962 | | -EXPORT_SYMBOL_GPL(get_hwpoison_page); |
---|
963 | 984 | |
---|
964 | 985 | /* |
---|
965 | 986 | * Do all that is necessary to remove user space mappings. Unmap |
---|
.. | .. |
---|
968 | 989 | static bool hwpoison_user_mappings(struct page *p, unsigned long pfn, |
---|
969 | 990 | int flags, struct page **hpagep) |
---|
970 | 991 | { |
---|
971 | | - enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS; |
---|
| 992 | + enum ttu_flags ttu = TTU_IGNORE_MLOCK; |
---|
972 | 993 | struct address_space *mapping; |
---|
973 | 994 | LIST_HEAD(tokill); |
---|
974 | | - bool unmap_success; |
---|
| 995 | + bool unmap_success = true; |
---|
975 | 996 | int kill = 1, forcekill; |
---|
976 | 997 | struct page *hpage = *hpagep; |
---|
977 | 998 | bool mlocked = PageMlocked(hpage); |
---|
.. | .. |
---|
1011 | 1032 | */ |
---|
1012 | 1033 | mapping = page_mapping(hpage); |
---|
1013 | 1034 | if (!(flags & MF_MUST_KILL) && !PageDirty(hpage) && mapping && |
---|
1014 | | - mapping_cap_writeback_dirty(mapping)) { |
---|
| 1035 | + mapping_can_writeback(mapping)) { |
---|
1015 | 1036 | if (page_mkclean(hpage)) { |
---|
1016 | 1037 | SetPageDirty(hpage); |
---|
1017 | 1038 | } else { |
---|
.. | .. |
---|
1033 | 1054 | if (kill) |
---|
1034 | 1055 | collect_procs(hpage, &tokill, flags & MF_ACTION_REQUIRED); |
---|
1035 | 1056 | |
---|
1036 | | - unmap_success = try_to_unmap(hpage, ttu); |
---|
| 1057 | + if (!PageHuge(hpage)) { |
---|
| 1058 | + unmap_success = try_to_unmap(hpage, ttu); |
---|
| 1059 | + } else { |
---|
| 1060 | + if (!PageAnon(hpage)) { |
---|
| 1061 | + /* |
---|
| 1062 | + * For hugetlb pages in shared mappings, try_to_unmap |
---|
| 1063 | + * could potentially call huge_pmd_unshare. Because of |
---|
| 1064 | + * this, take semaphore in write mode here and set |
---|
| 1065 | + * TTU_RMAP_LOCKED to indicate we have taken the lock |
---|
| 1066 | + * at this higer level. |
---|
| 1067 | + */ |
---|
| 1068 | + mapping = hugetlb_page_mapping_lock_write(hpage); |
---|
| 1069 | + if (mapping) { |
---|
| 1070 | + unmap_success = try_to_unmap(hpage, |
---|
| 1071 | + ttu|TTU_RMAP_LOCKED); |
---|
| 1072 | + i_mmap_unlock_write(mapping); |
---|
| 1073 | + } else { |
---|
| 1074 | + pr_info("Memory failure: %#lx: could not lock mapping for mapped huge page\n", pfn); |
---|
| 1075 | + unmap_success = false; |
---|
| 1076 | + } |
---|
| 1077 | + } else { |
---|
| 1078 | + unmap_success = try_to_unmap(hpage, ttu); |
---|
| 1079 | + } |
---|
| 1080 | + } |
---|
1037 | 1081 | if (!unmap_success) |
---|
1038 | 1082 | pr_err("Memory failure: %#lx: failed to unmap page (mapcount=%d)\n", |
---|
1039 | 1083 | pfn, page_mapcount(hpage)); |
---|
.. | .. |
---|
1084 | 1128 | return page_action(ps, p, pfn); |
---|
1085 | 1129 | } |
---|
1086 | 1130 | |
---|
| 1131 | +static int try_to_split_thp_page(struct page *page, const char *msg) |
---|
| 1132 | +{ |
---|
| 1133 | + lock_page(page); |
---|
| 1134 | + if (!PageAnon(page) || unlikely(split_huge_page(page))) { |
---|
| 1135 | + unsigned long pfn = page_to_pfn(page); |
---|
| 1136 | + |
---|
| 1137 | + unlock_page(page); |
---|
| 1138 | + if (!PageAnon(page)) |
---|
| 1139 | + pr_info("%s: %#lx: non anonymous thp\n", msg, pfn); |
---|
| 1140 | + else |
---|
| 1141 | + pr_info("%s: %#lx: thp split failed\n", msg, pfn); |
---|
| 1142 | + put_page(page); |
---|
| 1143 | + return -EBUSY; |
---|
| 1144 | + } |
---|
| 1145 | + unlock_page(page); |
---|
| 1146 | + |
---|
| 1147 | + return 0; |
---|
| 1148 | +} |
---|
| 1149 | + |
---|
1087 | 1150 | static int memory_failure_hugetlb(unsigned long pfn, int flags) |
---|
1088 | 1151 | { |
---|
1089 | 1152 | struct page *p = pfn_to_page(pfn); |
---|
.. | .. |
---|
1125 | 1188 | pr_err("Memory failure: %#lx: just unpoisoned\n", pfn); |
---|
1126 | 1189 | num_poisoned_pages_dec(); |
---|
1127 | 1190 | unlock_page(head); |
---|
1128 | | - put_hwpoison_page(head); |
---|
| 1191 | + put_page(head); |
---|
1129 | 1192 | return 0; |
---|
1130 | 1193 | } |
---|
1131 | 1194 | |
---|
.. | .. |
---|
1166 | 1229 | LIST_HEAD(tokill); |
---|
1167 | 1230 | int rc = -EBUSY; |
---|
1168 | 1231 | loff_t start; |
---|
| 1232 | + dax_entry_t cookie; |
---|
| 1233 | + |
---|
| 1234 | + if (flags & MF_COUNT_INCREASED) |
---|
| 1235 | + /* |
---|
| 1236 | + * Drop the extra refcount in case we come from madvise(). |
---|
| 1237 | + */ |
---|
| 1238 | + put_page(page); |
---|
| 1239 | + |
---|
| 1240 | + /* device metadata space is not recoverable */ |
---|
| 1241 | + if (!pgmap_pfn_valid(pgmap, pfn)) { |
---|
| 1242 | + rc = -ENXIO; |
---|
| 1243 | + goto out; |
---|
| 1244 | + } |
---|
1169 | 1245 | |
---|
1170 | 1246 | /* |
---|
1171 | 1247 | * Prevent the inode from being freed while we are interrogating |
---|
.. | .. |
---|
1174 | 1250 | * also prevents changes to the mapping of this pfn until |
---|
1175 | 1251 | * poison signaling is complete. |
---|
1176 | 1252 | */ |
---|
1177 | | - if (!dax_lock_mapping_entry(page)) |
---|
| 1253 | + cookie = dax_lock_page(page); |
---|
| 1254 | + if (!cookie) |
---|
1178 | 1255 | goto out; |
---|
1179 | 1256 | |
---|
1180 | 1257 | if (hwpoison_filter(page)) { |
---|
.. | .. |
---|
1182 | 1259 | goto unlock; |
---|
1183 | 1260 | } |
---|
1184 | 1261 | |
---|
1185 | | - switch (pgmap->type) { |
---|
1186 | | - case MEMORY_DEVICE_PRIVATE: |
---|
1187 | | - case MEMORY_DEVICE_PUBLIC: |
---|
| 1262 | + if (pgmap->type == MEMORY_DEVICE_PRIVATE) { |
---|
1188 | 1263 | /* |
---|
1189 | 1264 | * TODO: Handle HMM pages which may need coordination |
---|
1190 | 1265 | * with device-side memory. |
---|
1191 | 1266 | */ |
---|
1192 | 1267 | goto unlock; |
---|
1193 | | - default: |
---|
1194 | | - break; |
---|
1195 | 1268 | } |
---|
1196 | 1269 | |
---|
1197 | 1270 | /* |
---|
.. | .. |
---|
1225 | 1298 | kill_procs(&tokill, flags & MF_MUST_KILL, !unmap_success, pfn, flags); |
---|
1226 | 1299 | rc = 0; |
---|
1227 | 1300 | unlock: |
---|
1228 | | - dax_unlock_mapping_entry(page); |
---|
| 1301 | + dax_unlock_page(page, cookie); |
---|
1229 | 1302 | out: |
---|
1230 | 1303 | /* drop pgmap ref acquired in caller */ |
---|
1231 | 1304 | put_dev_pagemap(pgmap); |
---|
.. | .. |
---|
1308 | 1381 | } |
---|
1309 | 1382 | |
---|
1310 | 1383 | if (PageTransHuge(hpage)) { |
---|
1311 | | - lock_page(p); |
---|
1312 | | - if (!PageAnon(p) || unlikely(split_huge_page(p))) { |
---|
1313 | | - unlock_page(p); |
---|
1314 | | - if (!PageAnon(p)) |
---|
1315 | | - pr_err("Memory failure: %#lx: non anonymous thp\n", |
---|
1316 | | - pfn); |
---|
1317 | | - else |
---|
1318 | | - pr_err("Memory failure: %#lx: thp split failed\n", |
---|
1319 | | - pfn); |
---|
1320 | | - if (TestClearPageHWPoison(p)) |
---|
1321 | | - num_poisoned_pages_dec(); |
---|
1322 | | - put_hwpoison_page(p); |
---|
| 1384 | + if (try_to_split_thp_page(p, "Memory Failure") < 0) { |
---|
| 1385 | + action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED); |
---|
1323 | 1386 | return -EBUSY; |
---|
1324 | 1387 | } |
---|
1325 | | - unlock_page(p); |
---|
1326 | 1388 | VM_BUG_ON_PAGE(!page_count(p), p); |
---|
1327 | | - hpage = compound_head(p); |
---|
1328 | 1389 | } |
---|
1329 | 1390 | |
---|
1330 | 1391 | /* |
---|
.. | .. |
---|
1364 | 1425 | * page_remove_rmap() in try_to_unmap_one(). So to determine page status |
---|
1365 | 1426 | * correctly, we save a copy of the page flags at this time. |
---|
1366 | 1427 | */ |
---|
1367 | | - if (PageHuge(p)) |
---|
1368 | | - page_flags = hpage->flags; |
---|
1369 | | - else |
---|
1370 | | - page_flags = p->flags; |
---|
| 1428 | + page_flags = p->flags; |
---|
1371 | 1429 | |
---|
1372 | 1430 | /* |
---|
1373 | 1431 | * unpoison always clear PG_hwpoison inside page lock |
---|
.. | .. |
---|
1376 | 1434 | pr_err("Memory failure: %#lx: just unpoisoned\n", pfn); |
---|
1377 | 1435 | num_poisoned_pages_dec(); |
---|
1378 | 1436 | unlock_page(p); |
---|
1379 | | - put_hwpoison_page(p); |
---|
| 1437 | + put_page(p); |
---|
1380 | 1438 | return 0; |
---|
1381 | 1439 | } |
---|
1382 | 1440 | if (hwpoison_filter(p)) { |
---|
1383 | 1441 | if (TestClearPageHWPoison(p)) |
---|
1384 | 1442 | num_poisoned_pages_dec(); |
---|
1385 | 1443 | unlock_page(p); |
---|
1386 | | - put_hwpoison_page(p); |
---|
| 1444 | + put_page(p); |
---|
1387 | 1445 | return 0; |
---|
1388 | 1446 | } |
---|
1389 | 1447 | |
---|
.. | .. |
---|
1404 | 1462 | /* |
---|
1405 | 1463 | * Now take care of user space mappings. |
---|
1406 | 1464 | * Abort on fail: __delete_from_page_cache() assumes unmapped page. |
---|
1407 | | - * |
---|
1408 | | - * When the raw error page is thp tail page, hpage points to the raw |
---|
1409 | | - * page after thp split. |
---|
1410 | 1465 | */ |
---|
1411 | | - if (!hwpoison_user_mappings(p, pfn, flags, &hpage)) { |
---|
| 1466 | + if (!hwpoison_user_mappings(p, pfn, flags, &p)) { |
---|
1412 | 1467 | action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED); |
---|
1413 | 1468 | res = -EBUSY; |
---|
1414 | 1469 | goto out; |
---|
.. | .. |
---|
1492 | 1547 | unsigned long proc_flags; |
---|
1493 | 1548 | int gotten; |
---|
1494 | 1549 | |
---|
1495 | | - mf_cpu = this_cpu_ptr(&memory_failure_cpu); |
---|
| 1550 | + mf_cpu = container_of(work, struct memory_failure_cpu, work); |
---|
1496 | 1551 | for (;;) { |
---|
1497 | 1552 | spin_lock_irqsave(&mf_cpu->lock, proc_flags); |
---|
1498 | 1553 | gotten = kfifo_get(&mf_cpu->fifo, &entry); |
---|
.. | .. |
---|
1500 | 1555 | if (!gotten) |
---|
1501 | 1556 | break; |
---|
1502 | 1557 | if (entry.flags & MF_SOFT_OFFLINE) |
---|
1503 | | - soft_offline_page(pfn_to_page(entry.pfn), entry.flags); |
---|
| 1558 | + soft_offline_page(entry.pfn, entry.flags); |
---|
1504 | 1559 | else |
---|
1505 | 1560 | memory_failure(entry.pfn, entry.flags); |
---|
1506 | 1561 | } |
---|
| 1562 | +} |
---|
| 1563 | + |
---|
| 1564 | +/* |
---|
| 1565 | + * Process memory_failure work queued on the specified CPU. |
---|
| 1566 | + * Used to avoid return-to-userspace racing with the memory_failure workqueue. |
---|
| 1567 | + */ |
---|
| 1568 | +void memory_failure_queue_kick(int cpu) |
---|
| 1569 | +{ |
---|
| 1570 | + struct memory_failure_cpu *mf_cpu; |
---|
| 1571 | + |
---|
| 1572 | + mf_cpu = &per_cpu(memory_failure_cpu, cpu); |
---|
| 1573 | + cancel_work_sync(&mf_cpu->work); |
---|
| 1574 | + memory_failure_work_func(&mf_cpu->work); |
---|
1507 | 1575 | } |
---|
1508 | 1576 | |
---|
1509 | 1577 | static int __init memory_failure_init(void) |
---|
.. | .. |
---|
1612 | 1680 | } |
---|
1613 | 1681 | unlock_page(page); |
---|
1614 | 1682 | |
---|
1615 | | - put_hwpoison_page(page); |
---|
| 1683 | + put_page(page); |
---|
1616 | 1684 | if (freeit && !(pfn == my_zero_pfn(0) && page_count(p) == 1)) |
---|
1617 | | - put_hwpoison_page(page); |
---|
| 1685 | + put_page(page); |
---|
1618 | 1686 | |
---|
1619 | 1687 | return 0; |
---|
1620 | 1688 | } |
---|
1621 | 1689 | EXPORT_SYMBOL(unpoison_memory); |
---|
1622 | 1690 | |
---|
1623 | | -static struct page *new_page(struct page *p, unsigned long private) |
---|
| 1691 | +/* |
---|
| 1692 | + * Safely get reference count of an arbitrary page. |
---|
| 1693 | + * Returns 0 for a free page, 1 for an in-use page, -EIO for a page-type we |
---|
| 1694 | + * cannot handle and -EBUSY if we raced with an allocation. |
---|
| 1695 | + * We only incremented refcount in case the page was already in-use and it is |
---|
| 1696 | + * a known type we can handle. |
---|
| 1697 | + */ |
---|
| 1698 | +static int get_any_page(struct page *p, int flags) |
---|
1624 | 1699 | { |
---|
1625 | | - int nid = page_to_nid(p); |
---|
| 1700 | + int ret = 0, pass = 0; |
---|
| 1701 | + bool count_increased = false; |
---|
1626 | 1702 | |
---|
1627 | | - return new_page_nodemask(p, nid, &node_states[N_MEMORY]); |
---|
| 1703 | + if (flags & MF_COUNT_INCREASED) |
---|
| 1704 | + count_increased = true; |
---|
| 1705 | + |
---|
| 1706 | +try_again: |
---|
| 1707 | + if (!count_increased && !get_hwpoison_page(p)) { |
---|
| 1708 | + if (page_count(p)) { |
---|
| 1709 | + /* We raced with an allocation, retry. */ |
---|
| 1710 | + if (pass++ < 3) |
---|
| 1711 | + goto try_again; |
---|
| 1712 | + ret = -EBUSY; |
---|
| 1713 | + } else if (!PageHuge(p) && !is_free_buddy_page(p)) { |
---|
| 1714 | + /* We raced with put_page, retry. */ |
---|
| 1715 | + if (pass++ < 3) |
---|
| 1716 | + goto try_again; |
---|
| 1717 | + ret = -EIO; |
---|
| 1718 | + } |
---|
| 1719 | + } else { |
---|
| 1720 | + if (PageHuge(p) || PageLRU(p) || __PageMovable(p)) { |
---|
| 1721 | + ret = 1; |
---|
| 1722 | + } else { |
---|
| 1723 | + /* |
---|
| 1724 | + * A page we cannot handle. Check whether we can turn |
---|
| 1725 | + * it into something we can handle. |
---|
| 1726 | + */ |
---|
| 1727 | + if (pass++ < 3) { |
---|
| 1728 | + put_page(p); |
---|
| 1729 | + shake_page(p, 1); |
---|
| 1730 | + count_increased = false; |
---|
| 1731 | + goto try_again; |
---|
| 1732 | + } |
---|
| 1733 | + put_page(p); |
---|
| 1734 | + ret = -EIO; |
---|
| 1735 | + } |
---|
| 1736 | + } |
---|
| 1737 | + |
---|
| 1738 | + return ret; |
---|
| 1739 | +} |
---|
| 1740 | + |
---|
| 1741 | +static bool isolate_page(struct page *page, struct list_head *pagelist) |
---|
| 1742 | +{ |
---|
| 1743 | + bool isolated = false; |
---|
| 1744 | + bool lru = PageLRU(page); |
---|
| 1745 | + |
---|
| 1746 | + if (PageHuge(page)) { |
---|
| 1747 | + isolated = !isolate_hugetlb(page, pagelist); |
---|
| 1748 | + } else { |
---|
| 1749 | + if (lru) |
---|
| 1750 | + isolated = !isolate_lru_page(page); |
---|
| 1751 | + else |
---|
| 1752 | + isolated = !isolate_movable_page(page, ISOLATE_UNEVICTABLE); |
---|
| 1753 | + |
---|
| 1754 | + if (isolated) |
---|
| 1755 | + list_add(&page->lru, pagelist); |
---|
| 1756 | + } |
---|
| 1757 | + |
---|
| 1758 | + if (isolated && lru) |
---|
| 1759 | + inc_node_page_state(page, NR_ISOLATED_ANON + |
---|
| 1760 | + page_is_file_lru(page)); |
---|
| 1761 | + |
---|
| 1762 | + /* |
---|
| 1763 | + * If we succeed to isolate the page, we grabbed another refcount on |
---|
| 1764 | + * the page, so we can safely drop the one we got from get_any_pages(). |
---|
| 1765 | + * If we failed to isolate the page, it means that we cannot go further |
---|
| 1766 | + * and we will return an error, so drop the reference we got from |
---|
| 1767 | + * get_any_pages() as well. |
---|
| 1768 | + */ |
---|
| 1769 | + put_page(page); |
---|
| 1770 | + return isolated; |
---|
1628 | 1771 | } |
---|
1629 | 1772 | |
---|
1630 | 1773 | /* |
---|
1631 | | - * Safely get reference count of an arbitrary page. |
---|
1632 | | - * Returns 0 for a free page, -EIO for a zero refcount page |
---|
1633 | | - * that is not free, and 1 for any other page type. |
---|
1634 | | - * For 1 the page is returned with increased page count, otherwise not. |
---|
| 1774 | + * __soft_offline_page handles hugetlb-pages and non-hugetlb pages. |
---|
| 1775 | + * If the page is a non-dirty unmapped page-cache page, it simply invalidates. |
---|
| 1776 | + * If the page is mapped, it migrates the contents over. |
---|
1635 | 1777 | */ |
---|
1636 | | -static int __get_any_page(struct page *p, unsigned long pfn, int flags) |
---|
| 1778 | +static int __soft_offline_page(struct page *page) |
---|
1637 | 1779 | { |
---|
1638 | | - int ret; |
---|
1639 | | - |
---|
1640 | | - if (flags & MF_COUNT_INCREASED) |
---|
1641 | | - return 1; |
---|
1642 | | - |
---|
1643 | | - /* |
---|
1644 | | - * When the target page is a free hugepage, just remove it |
---|
1645 | | - * from free hugepage list. |
---|
1646 | | - */ |
---|
1647 | | - if (!get_hwpoison_page(p)) { |
---|
1648 | | - if (PageHuge(p)) { |
---|
1649 | | - pr_info("%s: %#lx free huge page\n", __func__, pfn); |
---|
1650 | | - ret = 0; |
---|
1651 | | - } else if (is_free_buddy_page(p)) { |
---|
1652 | | - pr_info("%s: %#lx free buddy page\n", __func__, pfn); |
---|
1653 | | - ret = 0; |
---|
1654 | | - } else { |
---|
1655 | | - pr_info("%s: %#lx: unknown zero refcount page type %lx\n", |
---|
1656 | | - __func__, pfn, p->flags); |
---|
1657 | | - ret = -EIO; |
---|
1658 | | - } |
---|
1659 | | - } else { |
---|
1660 | | - /* Not a free page */ |
---|
1661 | | - ret = 1; |
---|
1662 | | - } |
---|
1663 | | - return ret; |
---|
1664 | | -} |
---|
1665 | | - |
---|
1666 | | -static int get_any_page(struct page *page, unsigned long pfn, int flags) |
---|
1667 | | -{ |
---|
1668 | | - int ret = __get_any_page(page, pfn, flags); |
---|
1669 | | - |
---|
1670 | | - if (ret == 1 && !PageHuge(page) && |
---|
1671 | | - !PageLRU(page) && !__PageMovable(page)) { |
---|
1672 | | - /* |
---|
1673 | | - * Try to free it. |
---|
1674 | | - */ |
---|
1675 | | - put_hwpoison_page(page); |
---|
1676 | | - shake_page(page, 1); |
---|
1677 | | - |
---|
1678 | | - /* |
---|
1679 | | - * Did it turn free? |
---|
1680 | | - */ |
---|
1681 | | - ret = __get_any_page(page, pfn, 0); |
---|
1682 | | - if (ret == 1 && !PageLRU(page)) { |
---|
1683 | | - /* Drop page reference which is from __get_any_page() */ |
---|
1684 | | - put_hwpoison_page(page); |
---|
1685 | | - pr_info("soft_offline: %#lx: unknown non LRU page type %lx (%pGp)\n", |
---|
1686 | | - pfn, page->flags, &page->flags); |
---|
1687 | | - return -EIO; |
---|
1688 | | - } |
---|
1689 | | - } |
---|
1690 | | - return ret; |
---|
1691 | | -} |
---|
1692 | | - |
---|
1693 | | -static int soft_offline_huge_page(struct page *page, int flags) |
---|
1694 | | -{ |
---|
1695 | | - int ret; |
---|
| 1780 | + int ret = 0; |
---|
1696 | 1781 | unsigned long pfn = page_to_pfn(page); |
---|
1697 | 1782 | struct page *hpage = compound_head(page); |
---|
| 1783 | + char const *msg_page[] = {"page", "hugepage"}; |
---|
| 1784 | + bool huge = PageHuge(page); |
---|
1698 | 1785 | LIST_HEAD(pagelist); |
---|
1699 | | - |
---|
1700 | | - /* |
---|
1701 | | - * This double-check of PageHWPoison is to avoid the race with |
---|
1702 | | - * memory_failure(). See also comment in __soft_offline_page(). |
---|
1703 | | - */ |
---|
1704 | | - lock_page(hpage); |
---|
1705 | | - if (PageHWPoison(hpage)) { |
---|
1706 | | - unlock_page(hpage); |
---|
1707 | | - put_hwpoison_page(hpage); |
---|
1708 | | - pr_info("soft offline: %#lx hugepage already poisoned\n", pfn); |
---|
1709 | | - return -EBUSY; |
---|
1710 | | - } |
---|
1711 | | - unlock_page(hpage); |
---|
1712 | | - |
---|
1713 | | - ret = isolate_huge_page(hpage, &pagelist); |
---|
1714 | | - /* |
---|
1715 | | - * get_any_page() and isolate_huge_page() takes a refcount each, |
---|
1716 | | - * so need to drop one here. |
---|
1717 | | - */ |
---|
1718 | | - put_hwpoison_page(hpage); |
---|
1719 | | - if (!ret) { |
---|
1720 | | - pr_info("soft offline: %#lx hugepage failed to isolate\n", pfn); |
---|
1721 | | - return -EBUSY; |
---|
1722 | | - } |
---|
1723 | | - |
---|
1724 | | - ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL, |
---|
1725 | | - MIGRATE_SYNC, MR_MEMORY_FAILURE); |
---|
1726 | | - if (ret) { |
---|
1727 | | - pr_info("soft offline: %#lx: hugepage migration failed %d, type %lx (%pGp)\n", |
---|
1728 | | - pfn, ret, page->flags, &page->flags); |
---|
1729 | | - if (!list_empty(&pagelist)) |
---|
1730 | | - putback_movable_pages(&pagelist); |
---|
1731 | | - if (ret > 0) |
---|
1732 | | - ret = -EIO; |
---|
1733 | | - } else { |
---|
1734 | | - /* |
---|
1735 | | - * We set PG_hwpoison only when the migration source hugepage |
---|
1736 | | - * was successfully dissolved, because otherwise hwpoisoned |
---|
1737 | | - * hugepage remains on free hugepage list, then userspace will |
---|
1738 | | - * find it as SIGBUS by allocation failure. That's not expected |
---|
1739 | | - * in soft-offlining. |
---|
1740 | | - */ |
---|
1741 | | - ret = dissolve_free_huge_page(page); |
---|
1742 | | - if (!ret) { |
---|
1743 | | - if (set_hwpoison_free_buddy_page(page)) |
---|
1744 | | - num_poisoned_pages_inc(); |
---|
1745 | | - else |
---|
1746 | | - ret = -EBUSY; |
---|
1747 | | - } |
---|
1748 | | - } |
---|
1749 | | - return ret; |
---|
1750 | | -} |
---|
1751 | | - |
---|
1752 | | -static int __soft_offline_page(struct page *page, int flags) |
---|
1753 | | -{ |
---|
1754 | | - int ret; |
---|
1755 | | - unsigned long pfn = page_to_pfn(page); |
---|
| 1786 | + struct migration_target_control mtc = { |
---|
| 1787 | + .nid = NUMA_NO_NODE, |
---|
| 1788 | + .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL, |
---|
| 1789 | + }; |
---|
1756 | 1790 | |
---|
1757 | 1791 | /* |
---|
1758 | 1792 | * Check PageHWPoison again inside page lock because PageHWPoison |
---|
.. | .. |
---|
1761 | 1795 | * so there's no race between soft_offline_page() and memory_failure(). |
---|
1762 | 1796 | */ |
---|
1763 | 1797 | lock_page(page); |
---|
1764 | | - wait_on_page_writeback(page); |
---|
| 1798 | + if (!PageHuge(page)) |
---|
| 1799 | + wait_on_page_writeback(page); |
---|
1765 | 1800 | if (PageHWPoison(page)) { |
---|
1766 | 1801 | unlock_page(page); |
---|
1767 | | - put_hwpoison_page(page); |
---|
| 1802 | + put_page(page); |
---|
1768 | 1803 | pr_info("soft offline: %#lx page already poisoned\n", pfn); |
---|
1769 | | - return -EBUSY; |
---|
| 1804 | + return 0; |
---|
1770 | 1805 | } |
---|
1771 | | - /* |
---|
1772 | | - * Try to invalidate first. This should work for |
---|
1773 | | - * non dirty unmapped page cache pages. |
---|
1774 | | - */ |
---|
1775 | | - ret = invalidate_inode_page(page); |
---|
| 1806 | + |
---|
| 1807 | + if (!PageHuge(page)) |
---|
| 1808 | + /* |
---|
| 1809 | + * Try to invalidate first. This should work for |
---|
| 1810 | + * non dirty unmapped page cache pages. |
---|
| 1811 | + */ |
---|
| 1812 | + ret = invalidate_inode_page(page); |
---|
1776 | 1813 | unlock_page(page); |
---|
| 1814 | + |
---|
1777 | 1815 | /* |
---|
1778 | 1816 | * RED-PEN would be better to keep it isolated here, but we |
---|
1779 | 1817 | * would need to fix isolation locking first. |
---|
1780 | 1818 | */ |
---|
1781 | | - if (ret == 1) { |
---|
1782 | | - put_hwpoison_page(page); |
---|
| 1819 | + if (ret) { |
---|
1783 | 1820 | pr_info("soft_offline: %#lx: invalidated\n", pfn); |
---|
1784 | | - SetPageHWPoison(page); |
---|
1785 | | - num_poisoned_pages_inc(); |
---|
| 1821 | + page_handle_poison(page, false, true); |
---|
1786 | 1822 | return 0; |
---|
1787 | 1823 | } |
---|
1788 | 1824 | |
---|
1789 | | - /* |
---|
1790 | | - * Simple invalidation didn't work. |
---|
1791 | | - * Try to migrate to a new page instead. migrate.c |
---|
1792 | | - * handles a large number of cases for us. |
---|
1793 | | - */ |
---|
1794 | | - if (PageLRU(page)) |
---|
1795 | | - ret = isolate_lru_page(page); |
---|
1796 | | - else |
---|
1797 | | - ret = isolate_movable_page(page, ISOLATE_UNEVICTABLE); |
---|
1798 | | - /* |
---|
1799 | | - * Drop page reference which is came from get_any_page() |
---|
1800 | | - * successful isolate_lru_page() already took another one. |
---|
1801 | | - */ |
---|
1802 | | - put_hwpoison_page(page); |
---|
1803 | | - if (!ret) { |
---|
1804 | | - LIST_HEAD(pagelist); |
---|
1805 | | - /* |
---|
1806 | | - * After isolated lru page, the PageLRU will be cleared, |
---|
1807 | | - * so use !__PageMovable instead for LRU page's mapping |
---|
1808 | | - * cannot have PAGE_MAPPING_MOVABLE. |
---|
1809 | | - */ |
---|
1810 | | - if (!__PageMovable(page)) |
---|
1811 | | - inc_node_page_state(page, NR_ISOLATED_ANON + |
---|
1812 | | - page_is_file_cache(page)); |
---|
1813 | | - list_add(&page->lru, &pagelist); |
---|
1814 | | - ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL, |
---|
1815 | | - MIGRATE_SYNC, MR_MEMORY_FAILURE); |
---|
1816 | | - if (ret) { |
---|
| 1825 | + if (isolate_page(hpage, &pagelist)) { |
---|
| 1826 | + ret = migrate_pages(&pagelist, alloc_migration_target, NULL, |
---|
| 1827 | + (unsigned long)&mtc, MIGRATE_SYNC, MR_MEMORY_FAILURE); |
---|
| 1828 | + if (!ret) { |
---|
| 1829 | + bool release = !huge; |
---|
| 1830 | + |
---|
| 1831 | + if (!page_handle_poison(page, huge, release)) |
---|
| 1832 | + ret = -EBUSY; |
---|
| 1833 | + } else { |
---|
1817 | 1834 | if (!list_empty(&pagelist)) |
---|
1818 | 1835 | putback_movable_pages(&pagelist); |
---|
1819 | 1836 | |
---|
1820 | | - pr_info("soft offline: %#lx: migration failed %d, type %lx (%pGp)\n", |
---|
1821 | | - pfn, ret, page->flags, &page->flags); |
---|
| 1837 | + pr_info("soft offline: %#lx: %s migration failed %d, type %lx (%pGp)\n", |
---|
| 1838 | + pfn, msg_page[huge], ret, page->flags, &page->flags); |
---|
1822 | 1839 | if (ret > 0) |
---|
1823 | | - ret = -EIO; |
---|
| 1840 | + ret = -EBUSY; |
---|
1824 | 1841 | } |
---|
1825 | 1842 | } else { |
---|
1826 | | - pr_info("soft offline: %#lx: isolation failed: %d, page count %d, type %lx (%pGp)\n", |
---|
1827 | | - pfn, ret, page_count(page), page->flags, &page->flags); |
---|
| 1843 | + pr_info("soft offline: %#lx: %s isolation failed, page count %d, type %lx (%pGp)\n", |
---|
| 1844 | + pfn, msg_page[huge], page_count(page), page->flags, &page->flags); |
---|
| 1845 | + ret = -EBUSY; |
---|
1828 | 1846 | } |
---|
1829 | 1847 | return ret; |
---|
1830 | 1848 | } |
---|
1831 | 1849 | |
---|
1832 | | -static int soft_offline_in_use_page(struct page *page, int flags) |
---|
| 1850 | +static int soft_offline_in_use_page(struct page *page) |
---|
1833 | 1851 | { |
---|
1834 | | - int ret; |
---|
1835 | | - int mt; |
---|
1836 | 1852 | struct page *hpage = compound_head(page); |
---|
1837 | 1853 | |
---|
1838 | | - if (!PageHuge(page) && PageTransHuge(hpage)) { |
---|
1839 | | - lock_page(page); |
---|
1840 | | - if (!PageAnon(page) || unlikely(split_huge_page(page))) { |
---|
1841 | | - unlock_page(page); |
---|
1842 | | - if (!PageAnon(page)) |
---|
1843 | | - pr_info("soft offline: %#lx: non anonymous thp\n", page_to_pfn(page)); |
---|
1844 | | - else |
---|
1845 | | - pr_info("soft offline: %#lx: thp split failed\n", page_to_pfn(page)); |
---|
1846 | | - put_hwpoison_page(page); |
---|
| 1854 | + if (!PageHuge(page) && PageTransHuge(hpage)) |
---|
| 1855 | + if (try_to_split_thp_page(page, "soft offline") < 0) |
---|
1847 | 1856 | return -EBUSY; |
---|
1848 | | - } |
---|
1849 | | - unlock_page(page); |
---|
1850 | | - } |
---|
1851 | | - |
---|
1852 | | - /* |
---|
1853 | | - * Setting MIGRATE_ISOLATE here ensures that the page will be linked |
---|
1854 | | - * to free list immediately (not via pcplist) when released after |
---|
1855 | | - * successful page migration. Otherwise we can't guarantee that the |
---|
1856 | | - * page is really free after put_page() returns, so |
---|
1857 | | - * set_hwpoison_free_buddy_page() highly likely fails. |
---|
1858 | | - */ |
---|
1859 | | - mt = get_pageblock_migratetype(page); |
---|
1860 | | - set_pageblock_migratetype(page, MIGRATE_ISOLATE); |
---|
1861 | | - if (PageHuge(page)) |
---|
1862 | | - ret = soft_offline_huge_page(page, flags); |
---|
1863 | | - else |
---|
1864 | | - ret = __soft_offline_page(page, flags); |
---|
1865 | | - set_pageblock_migratetype(page, mt); |
---|
1866 | | - return ret; |
---|
| 1857 | + return __soft_offline_page(page); |
---|
1867 | 1858 | } |
---|
1868 | 1859 | |
---|
1869 | | -static int soft_offline_free_page(struct page *page) |
---|
| 1860 | +static void put_ref_page(struct page *page) |
---|
1870 | 1861 | { |
---|
1871 | | - int rc = dissolve_free_huge_page(page); |
---|
1872 | | - |
---|
1873 | | - if (!rc) { |
---|
1874 | | - if (set_hwpoison_free_buddy_page(page)) |
---|
1875 | | - num_poisoned_pages_inc(); |
---|
1876 | | - else |
---|
1877 | | - rc = -EBUSY; |
---|
1878 | | - } |
---|
1879 | | - return rc; |
---|
| 1862 | + if (page) |
---|
| 1863 | + put_page(page); |
---|
1880 | 1864 | } |
---|
1881 | 1865 | |
---|
1882 | 1866 | /** |
---|
1883 | 1867 | * soft_offline_page - Soft offline a page. |
---|
1884 | | - * @page: page to offline |
---|
| 1868 | + * @pfn: pfn to soft-offline |
---|
1885 | 1869 | * @flags: flags. Same as memory_failure(). |
---|
1886 | 1870 | * |
---|
1887 | 1871 | * Returns 0 on success, otherwise negated errno. |
---|
.. | .. |
---|
1901 | 1885 | * This is not a 100% solution for all memory, but tries to be |
---|
1902 | 1886 | * ``good enough'' for the majority of memory. |
---|
1903 | 1887 | */ |
---|
1904 | | -int soft_offline_page(struct page *page, int flags) |
---|
| 1888 | +int soft_offline_page(unsigned long pfn, int flags) |
---|
1905 | 1889 | { |
---|
1906 | 1890 | int ret; |
---|
1907 | | - unsigned long pfn = page_to_pfn(page); |
---|
| 1891 | + bool try_again = true; |
---|
| 1892 | + struct page *page, *ref_page = NULL; |
---|
1908 | 1893 | |
---|
1909 | | - if (is_zone_device_page(page)) { |
---|
1910 | | - pr_debug_ratelimited("soft_offline: %#lx page is device page\n", |
---|
1911 | | - pfn); |
---|
1912 | | - if (flags & MF_COUNT_INCREASED) |
---|
1913 | | - put_page(page); |
---|
| 1894 | + WARN_ON_ONCE(!pfn_valid(pfn) && (flags & MF_COUNT_INCREASED)); |
---|
| 1895 | + |
---|
| 1896 | + if (!pfn_valid(pfn)) |
---|
| 1897 | + return -ENXIO; |
---|
| 1898 | + if (flags & MF_COUNT_INCREASED) |
---|
| 1899 | + ref_page = pfn_to_page(pfn); |
---|
| 1900 | + |
---|
| 1901 | + /* Only online pages can be soft-offlined (esp., not ZONE_DEVICE). */ |
---|
| 1902 | + page = pfn_to_online_page(pfn); |
---|
| 1903 | + if (!page) { |
---|
| 1904 | + put_ref_page(ref_page); |
---|
1914 | 1905 | return -EIO; |
---|
1915 | 1906 | } |
---|
1916 | 1907 | |
---|
1917 | 1908 | if (PageHWPoison(page)) { |
---|
1918 | | - pr_info("soft offline: %#lx page already poisoned\n", pfn); |
---|
1919 | | - if (flags & MF_COUNT_INCREASED) |
---|
1920 | | - put_hwpoison_page(page); |
---|
1921 | | - return -EBUSY; |
---|
| 1909 | + pr_info("%s: %#lx page already poisoned\n", __func__, pfn); |
---|
| 1910 | + put_ref_page(ref_page); |
---|
| 1911 | + return 0; |
---|
1922 | 1912 | } |
---|
1923 | 1913 | |
---|
| 1914 | +retry: |
---|
1924 | 1915 | get_online_mems(); |
---|
1925 | | - ret = get_any_page(page, pfn, flags); |
---|
| 1916 | + ret = get_any_page(page, flags); |
---|
1926 | 1917 | put_online_mems(); |
---|
1927 | 1918 | |
---|
1928 | | - if (ret > 0) |
---|
1929 | | - ret = soft_offline_in_use_page(page, flags); |
---|
1930 | | - else if (ret == 0) |
---|
1931 | | - ret = soft_offline_free_page(page); |
---|
| 1919 | + if (ret > 0) { |
---|
| 1920 | + ret = soft_offline_in_use_page(page); |
---|
| 1921 | + } else if (ret == 0) { |
---|
| 1922 | + if (!page_handle_poison(page, true, false)) { |
---|
| 1923 | + if (try_again) { |
---|
| 1924 | + try_again = false; |
---|
| 1925 | + flags &= ~MF_COUNT_INCREASED; |
---|
| 1926 | + goto retry; |
---|
| 1927 | + } |
---|
| 1928 | + ret = -EBUSY; |
---|
| 1929 | + } |
---|
| 1930 | + } else if (ret == -EIO) { |
---|
| 1931 | + pr_info("%s: %#lx: unknown page type: %lx (%pGp)\n", |
---|
| 1932 | + __func__, pfn, page->flags, &page->flags); |
---|
| 1933 | + } |
---|
1932 | 1934 | |
---|
1933 | 1935 | return ret; |
---|
1934 | 1936 | } |
---|