.. | .. |
---|
21 | 21 | #include <linux/vmalloc.h> |
---|
22 | 22 | #include <linux/swap_slots.h> |
---|
23 | 23 | #include <linux/huge_mm.h> |
---|
24 | | - |
---|
25 | | -#include <asm/pgtable.h> |
---|
| 24 | +#include <linux/shmem_fs.h> |
---|
26 | 25 | #include "internal.h" |
---|
27 | 26 | |
---|
28 | 27 | /* |
---|
.. | .. |
---|
59 | 58 | #define GET_SWAP_RA_VAL(vma) \ |
---|
60 | 59 | (atomic_long_read(&(vma)->swap_readahead_info) ? : 4) |
---|
61 | 60 | |
---|
62 | | -#define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0) |
---|
63 | | -#define ADD_CACHE_INFO(x, nr) do { swap_cache_info.x += (nr); } while (0) |
---|
| 61 | +#define INC_CACHE_INFO(x) data_race(swap_cache_info.x++) |
---|
| 62 | +#define ADD_CACHE_INFO(x, nr) data_race(swap_cache_info.x += (nr)) |
---|
64 | 63 | |
---|
65 | 64 | static struct { |
---|
66 | 65 | unsigned long add_total; |
---|
.. | .. |
---|
74 | 73 | unsigned int i, j, nr; |
---|
75 | 74 | unsigned long ret = 0; |
---|
76 | 75 | struct address_space *spaces; |
---|
| 76 | + struct swap_info_struct *si; |
---|
77 | 77 | |
---|
78 | | - rcu_read_lock(); |
---|
79 | 78 | for (i = 0; i < MAX_SWAPFILES; i++) { |
---|
80 | | - /* |
---|
81 | | - * The corresponding entries in nr_swapper_spaces and |
---|
82 | | - * swapper_spaces will be reused only after at least |
---|
83 | | - * one grace period. So it is impossible for them |
---|
84 | | - * belongs to different usage. |
---|
85 | | - */ |
---|
86 | | - nr = nr_swapper_spaces[i]; |
---|
87 | | - spaces = rcu_dereference(swapper_spaces[i]); |
---|
88 | | - if (!nr || !spaces) |
---|
| 79 | + swp_entry_t entry = swp_entry(i, 1); |
---|
| 80 | + |
---|
| 81 | + /* Avoid get_swap_device() to warn for bad swap entry */ |
---|
| 82 | + if (!swp_swap_info(entry)) |
---|
89 | 83 | continue; |
---|
| 84 | + /* Prevent swapoff to free swapper_spaces */ |
---|
| 85 | + si = get_swap_device(entry); |
---|
| 86 | + if (!si) |
---|
| 87 | + continue; |
---|
| 88 | + nr = nr_swapper_spaces[i]; |
---|
| 89 | + spaces = swapper_spaces[i]; |
---|
90 | 90 | for (j = 0; j < nr; j++) |
---|
91 | 91 | ret += spaces[j].nrpages; |
---|
| 92 | + put_swap_device(si); |
---|
92 | 93 | } |
---|
93 | | - rcu_read_unlock(); |
---|
94 | 94 | return ret; |
---|
95 | 95 | } |
---|
| 96 | +EXPORT_SYMBOL_GPL(total_swapcache_pages); |
---|
96 | 97 | |
---|
97 | 98 | static atomic_t swapin_readahead_hits = ATOMIC_INIT(4); |
---|
98 | 99 | |
---|
.. | .. |
---|
107 | 108 | printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10)); |
---|
108 | 109 | } |
---|
109 | 110 | |
---|
| 111 | +void *get_shadow_from_swap_cache(swp_entry_t entry) |
---|
| 112 | +{ |
---|
| 113 | + struct address_space *address_space = swap_address_space(entry); |
---|
| 114 | + pgoff_t idx = swp_offset(entry); |
---|
| 115 | + struct page *page; |
---|
| 116 | + |
---|
| 117 | + page = find_get_entry(address_space, idx); |
---|
| 118 | + if (xa_is_value(page)) |
---|
| 119 | + return page; |
---|
| 120 | + if (page) |
---|
| 121 | + put_page(page); |
---|
| 122 | + return NULL; |
---|
| 123 | +} |
---|
| 124 | + |
---|
110 | 125 | /* |
---|
111 | | - * __add_to_swap_cache resembles add_to_page_cache_locked on swapper_space, |
---|
| 126 | + * add_to_swap_cache resembles add_to_page_cache_locked on swapper_space, |
---|
112 | 127 | * but sets SwapCache flag and private instead of mapping and index. |
---|
113 | 128 | */ |
---|
114 | | -int __add_to_swap_cache(struct page *page, swp_entry_t entry) |
---|
| 129 | +int add_to_swap_cache(struct page *page, swp_entry_t entry, |
---|
| 130 | + gfp_t gfp, void **shadowp) |
---|
115 | 131 | { |
---|
116 | | - int error, i, nr = hpage_nr_pages(page); |
---|
117 | | - struct address_space *address_space; |
---|
| 132 | + struct address_space *address_space = swap_address_space(entry); |
---|
118 | 133 | pgoff_t idx = swp_offset(entry); |
---|
| 134 | + XA_STATE_ORDER(xas, &address_space->i_pages, idx, compound_order(page)); |
---|
| 135 | + unsigned long i, nr = thp_nr_pages(page); |
---|
| 136 | + void *old; |
---|
119 | 137 | |
---|
120 | 138 | VM_BUG_ON_PAGE(!PageLocked(page), page); |
---|
121 | 139 | VM_BUG_ON_PAGE(PageSwapCache(page), page); |
---|
.. | .. |
---|
124 | 142 | page_ref_add(page, nr); |
---|
125 | 143 | SetPageSwapCache(page); |
---|
126 | 144 | |
---|
127 | | - address_space = swap_address_space(entry); |
---|
128 | | - xa_lock_irq(&address_space->i_pages); |
---|
129 | | - for (i = 0; i < nr; i++) { |
---|
130 | | - set_page_private(page + i, entry.val + i); |
---|
131 | | - error = radix_tree_insert(&address_space->i_pages, |
---|
132 | | - idx + i, page + i); |
---|
133 | | - if (unlikely(error)) |
---|
134 | | - break; |
---|
135 | | - } |
---|
136 | | - if (likely(!error)) { |
---|
| 145 | + do { |
---|
| 146 | + unsigned long nr_shadows = 0; |
---|
| 147 | + |
---|
| 148 | + xas_lock_irq(&xas); |
---|
| 149 | + xas_create_range(&xas); |
---|
| 150 | + if (xas_error(&xas)) |
---|
| 151 | + goto unlock; |
---|
| 152 | + for (i = 0; i < nr; i++) { |
---|
| 153 | + VM_BUG_ON_PAGE(xas.xa_index != idx + i, page); |
---|
| 154 | + old = xas_load(&xas); |
---|
| 155 | + if (xa_is_value(old)) { |
---|
| 156 | + nr_shadows++; |
---|
| 157 | + if (shadowp) |
---|
| 158 | + *shadowp = old; |
---|
| 159 | + } |
---|
| 160 | + set_page_private(page + i, entry.val + i); |
---|
| 161 | + xas_store(&xas, page); |
---|
| 162 | + xas_next(&xas); |
---|
| 163 | + } |
---|
| 164 | + address_space->nrexceptional -= nr_shadows; |
---|
137 | 165 | address_space->nrpages += nr; |
---|
138 | 166 | __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr); |
---|
139 | 167 | ADD_CACHE_INFO(add_total, nr); |
---|
140 | | - } else { |
---|
141 | | - /* |
---|
142 | | - * Only the context which have set SWAP_HAS_CACHE flag |
---|
143 | | - * would call add_to_swap_cache(). |
---|
144 | | - * So add_to_swap_cache() doesn't returns -EEXIST. |
---|
145 | | - */ |
---|
146 | | - VM_BUG_ON(error == -EEXIST); |
---|
147 | | - set_page_private(page + i, 0UL); |
---|
148 | | - while (i--) { |
---|
149 | | - radix_tree_delete(&address_space->i_pages, idx + i); |
---|
150 | | - set_page_private(page + i, 0UL); |
---|
151 | | - } |
---|
152 | | - ClearPageSwapCache(page); |
---|
153 | | - page_ref_sub(page, nr); |
---|
154 | | - } |
---|
155 | | - xa_unlock_irq(&address_space->i_pages); |
---|
| 168 | +unlock: |
---|
| 169 | + xas_unlock_irq(&xas); |
---|
| 170 | + } while (xas_nomem(&xas, gfp)); |
---|
156 | 171 | |
---|
157 | | - return error; |
---|
158 | | -} |
---|
| 172 | + if (!xas_error(&xas)) |
---|
| 173 | + return 0; |
---|
159 | 174 | |
---|
160 | | - |
---|
161 | | -int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) |
---|
162 | | -{ |
---|
163 | | - int error; |
---|
164 | | - |
---|
165 | | - error = radix_tree_maybe_preload_order(gfp_mask, compound_order(page)); |
---|
166 | | - if (!error) { |
---|
167 | | - error = __add_to_swap_cache(page, entry); |
---|
168 | | - radix_tree_preload_end(); |
---|
169 | | - } |
---|
170 | | - return error; |
---|
| 175 | + ClearPageSwapCache(page); |
---|
| 176 | + page_ref_sub(page, nr); |
---|
| 177 | + return xas_error(&xas); |
---|
171 | 178 | } |
---|
172 | 179 | |
---|
173 | 180 | /* |
---|
174 | 181 | * This must be called only on pages that have |
---|
175 | 182 | * been verified to be in the swap cache. |
---|
176 | 183 | */ |
---|
177 | | -void __delete_from_swap_cache(struct page *page) |
---|
| 184 | +void __delete_from_swap_cache(struct page *page, |
---|
| 185 | + swp_entry_t entry, void *shadow) |
---|
178 | 186 | { |
---|
179 | | - struct address_space *address_space; |
---|
180 | | - int i, nr = hpage_nr_pages(page); |
---|
181 | | - swp_entry_t entry; |
---|
182 | | - pgoff_t idx; |
---|
| 187 | + struct address_space *address_space = swap_address_space(entry); |
---|
| 188 | + int i, nr = thp_nr_pages(page); |
---|
| 189 | + pgoff_t idx = swp_offset(entry); |
---|
| 190 | + XA_STATE(xas, &address_space->i_pages, idx); |
---|
183 | 191 | |
---|
184 | 192 | VM_BUG_ON_PAGE(!PageLocked(page), page); |
---|
185 | 193 | VM_BUG_ON_PAGE(!PageSwapCache(page), page); |
---|
186 | 194 | VM_BUG_ON_PAGE(PageWriteback(page), page); |
---|
187 | 195 | |
---|
188 | | - entry.val = page_private(page); |
---|
189 | | - address_space = swap_address_space(entry); |
---|
190 | | - idx = swp_offset(entry); |
---|
191 | 196 | for (i = 0; i < nr; i++) { |
---|
192 | | - radix_tree_delete(&address_space->i_pages, idx + i); |
---|
| 197 | + void *entry = xas_store(&xas, shadow); |
---|
| 198 | + VM_BUG_ON_PAGE(entry != page, entry); |
---|
193 | 199 | set_page_private(page + i, 0); |
---|
| 200 | + xas_next(&xas); |
---|
194 | 201 | } |
---|
195 | 202 | ClearPageSwapCache(page); |
---|
| 203 | + if (shadow) |
---|
| 204 | + address_space->nrexceptional += nr; |
---|
196 | 205 | address_space->nrpages -= nr; |
---|
197 | 206 | __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr); |
---|
198 | 207 | ADD_CACHE_INFO(del_total, nr); |
---|
.. | .. |
---|
218 | 227 | return 0; |
---|
219 | 228 | |
---|
220 | 229 | /* |
---|
221 | | - * Radix-tree node allocations from PF_MEMALLOC contexts could |
---|
| 230 | + * XArray node allocations from PF_MEMALLOC contexts could |
---|
222 | 231 | * completely exhaust the page allocator. __GFP_NOMEMALLOC |
---|
223 | 232 | * stops emergency reserves from being allocated. |
---|
224 | 233 | * |
---|
.. | .. |
---|
229 | 238 | * Add it to the swap cache. |
---|
230 | 239 | */ |
---|
231 | 240 | err = add_to_swap_cache(page, entry, |
---|
232 | | - __GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN); |
---|
233 | | - /* -ENOMEM radix-tree allocation failure */ |
---|
| 241 | + __GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN, NULL); |
---|
234 | 242 | if (err) |
---|
235 | 243 | /* |
---|
236 | 244 | * add_to_swap_cache() doesn't return -EEXIST, so we can safely |
---|
.. | .. |
---|
239 | 247 | goto fail; |
---|
240 | 248 | /* |
---|
241 | 249 | * Normally the page will be dirtied in unmap because its pte should be |
---|
242 | | - * dirty. A special case is MADV_FREE page. The page'e pte could have |
---|
| 250 | + * dirty. A special case is MADV_FREE page. The page's pte could have |
---|
243 | 251 | * dirty bit cleared but the page's SwapBacked bit is still set because |
---|
244 | 252 | * clearing the dirty bit and SwapBacked bit has no lock protected. For |
---|
245 | 253 | * such page, unmap will not set dirty bit for it, so page reclaim will |
---|
.. | .. |
---|
264 | 272 | */ |
---|
265 | 273 | void delete_from_swap_cache(struct page *page) |
---|
266 | 274 | { |
---|
267 | | - swp_entry_t entry; |
---|
268 | | - struct address_space *address_space; |
---|
| 275 | + swp_entry_t entry = { .val = page_private(page) }; |
---|
| 276 | + struct address_space *address_space = swap_address_space(entry); |
---|
269 | 277 | |
---|
270 | | - entry.val = page_private(page); |
---|
271 | | - |
---|
272 | | - address_space = swap_address_space(entry); |
---|
273 | 278 | xa_lock_irq(&address_space->i_pages); |
---|
274 | | - __delete_from_swap_cache(page); |
---|
| 279 | + __delete_from_swap_cache(page, entry, NULL); |
---|
275 | 280 | xa_unlock_irq(&address_space->i_pages); |
---|
276 | 281 | |
---|
277 | 282 | put_swap_page(page, entry); |
---|
278 | | - page_ref_sub(page, hpage_nr_pages(page)); |
---|
| 283 | + page_ref_sub(page, thp_nr_pages(page)); |
---|
| 284 | +} |
---|
| 285 | + |
---|
| 286 | +void clear_shadow_from_swap_cache(int type, unsigned long begin, |
---|
| 287 | + unsigned long end) |
---|
| 288 | +{ |
---|
| 289 | + unsigned long curr = begin; |
---|
| 290 | + void *old; |
---|
| 291 | + |
---|
| 292 | + for (;;) { |
---|
| 293 | + unsigned long nr_shadows = 0; |
---|
| 294 | + swp_entry_t entry = swp_entry(type, curr); |
---|
| 295 | + struct address_space *address_space = swap_address_space(entry); |
---|
| 296 | + XA_STATE(xas, &address_space->i_pages, curr); |
---|
| 297 | + |
---|
| 298 | + xa_lock_irq(&address_space->i_pages); |
---|
| 299 | + xas_for_each(&xas, old, end) { |
---|
| 300 | + if (!xa_is_value(old)) |
---|
| 301 | + continue; |
---|
| 302 | + xas_store(&xas, NULL); |
---|
| 303 | + nr_shadows++; |
---|
| 304 | + } |
---|
| 305 | + address_space->nrexceptional -= nr_shadows; |
---|
| 306 | + xa_unlock_irq(&address_space->i_pages); |
---|
| 307 | + |
---|
| 308 | + /* search the next swapcache until we meet end */ |
---|
| 309 | + curr >>= SWAP_ADDRESS_SPACE_SHIFT; |
---|
| 310 | + curr++; |
---|
| 311 | + curr <<= SWAP_ADDRESS_SPACE_SHIFT; |
---|
| 312 | + if (curr > end) |
---|
| 313 | + break; |
---|
| 314 | + } |
---|
279 | 315 | } |
---|
280 | 316 | |
---|
281 | 317 | /* |
---|
.. | .. |
---|
335 | 371 | unsigned long addr) |
---|
336 | 372 | { |
---|
337 | 373 | struct page *page; |
---|
| 374 | + struct swap_info_struct *si; |
---|
338 | 375 | |
---|
| 376 | + si = get_swap_device(entry); |
---|
| 377 | + if (!si) |
---|
| 378 | + return NULL; |
---|
339 | 379 | page = find_get_page(swap_address_space(entry), swp_offset(entry)); |
---|
| 380 | + put_swap_device(si); |
---|
340 | 381 | |
---|
341 | 382 | INC_CACHE_INFO(find_total); |
---|
342 | 383 | if (page) { |
---|
.. | .. |
---|
375 | 416 | return page; |
---|
376 | 417 | } |
---|
377 | 418 | |
---|
| 419 | +/** |
---|
| 420 | + * find_get_incore_page - Find and get a page from the page or swap caches. |
---|
| 421 | + * @mapping: The address_space to search. |
---|
| 422 | + * @index: The page cache index. |
---|
| 423 | + * |
---|
| 424 | + * This differs from find_get_page() in that it will also look for the |
---|
| 425 | + * page in the swap cache. |
---|
| 426 | + * |
---|
| 427 | + * Return: The found page or %NULL. |
---|
| 428 | + */ |
---|
| 429 | +struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index) |
---|
| 430 | +{ |
---|
| 431 | + swp_entry_t swp; |
---|
| 432 | + struct swap_info_struct *si; |
---|
| 433 | + struct page *page = find_get_entry(mapping, index); |
---|
| 434 | + |
---|
| 435 | + if (!page) |
---|
| 436 | + return page; |
---|
| 437 | + if (!xa_is_value(page)) |
---|
| 438 | + return find_subpage(page, index); |
---|
| 439 | + if (!shmem_mapping(mapping)) |
---|
| 440 | + return NULL; |
---|
| 441 | + |
---|
| 442 | + swp = radix_to_swp_entry(page); |
---|
| 443 | + /* Prevent swapoff from happening to us */ |
---|
| 444 | + si = get_swap_device(swp); |
---|
| 445 | + if (!si) |
---|
| 446 | + return NULL; |
---|
| 447 | + page = find_get_page(swap_address_space(swp), swp_offset(swp)); |
---|
| 448 | + put_swap_device(si); |
---|
| 449 | + return page; |
---|
| 450 | +} |
---|
| 451 | + |
---|
378 | 452 | struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, |
---|
379 | 453 | struct vm_area_struct *vma, unsigned long addr, |
---|
380 | 454 | bool *new_page_allocated) |
---|
381 | 455 | { |
---|
382 | | - struct page *found_page, *new_page = NULL; |
---|
383 | | - struct address_space *swapper_space = swap_address_space(entry); |
---|
384 | | - int err; |
---|
| 456 | + struct swap_info_struct *si; |
---|
| 457 | + struct page *page; |
---|
| 458 | + void *shadow = NULL; |
---|
| 459 | + |
---|
385 | 460 | *new_page_allocated = false; |
---|
386 | 461 | |
---|
387 | | - do { |
---|
| 462 | + for (;;) { |
---|
| 463 | + int err; |
---|
388 | 464 | /* |
---|
389 | 465 | * First check the swap cache. Since this is normally |
---|
390 | 466 | * called after lookup_swap_cache() failed, re-calling |
---|
391 | 467 | * that would confuse statistics. |
---|
392 | 468 | */ |
---|
393 | | - found_page = find_get_page(swapper_space, swp_offset(entry)); |
---|
394 | | - if (found_page) |
---|
395 | | - break; |
---|
| 469 | + si = get_swap_device(entry); |
---|
| 470 | + if (!si) |
---|
| 471 | + return NULL; |
---|
| 472 | + page = find_get_page(swap_address_space(entry), |
---|
| 473 | + swp_offset(entry)); |
---|
| 474 | + put_swap_device(si); |
---|
| 475 | + if (page) |
---|
| 476 | + return page; |
---|
396 | 477 | |
---|
397 | 478 | /* |
---|
398 | 479 | * Just skip read ahead for unused swap slot. |
---|
.. | .. |
---|
403 | 484 | * else swap_off will be aborted if we return NULL. |
---|
404 | 485 | */ |
---|
405 | 486 | if (!__swp_swapcount(entry) && swap_slot_cache_enabled) |
---|
406 | | - break; |
---|
| 487 | + return NULL; |
---|
407 | 488 | |
---|
408 | 489 | /* |
---|
409 | | - * Get a new page to read into from swap. |
---|
| 490 | + * Get a new page to read into from swap. Allocate it now, |
---|
| 491 | + * before marking swap_map SWAP_HAS_CACHE, when -EEXIST will |
---|
| 492 | + * cause any racers to loop around until we add it to cache. |
---|
410 | 493 | */ |
---|
411 | | - if (!new_page) { |
---|
412 | | - new_page = alloc_page_vma(gfp_mask, vma, addr); |
---|
413 | | - if (!new_page) |
---|
414 | | - break; /* Out of memory */ |
---|
415 | | - } |
---|
416 | | - |
---|
417 | | - /* |
---|
418 | | - * call radix_tree_preload() while we can wait. |
---|
419 | | - */ |
---|
420 | | - err = radix_tree_maybe_preload(gfp_mask & GFP_RECLAIM_MASK); |
---|
421 | | - if (err) |
---|
422 | | - break; |
---|
| 494 | + page = alloc_page_vma(gfp_mask, vma, addr); |
---|
| 495 | + if (!page) |
---|
| 496 | + return NULL; |
---|
423 | 497 | |
---|
424 | 498 | /* |
---|
425 | 499 | * Swap entry may have been freed since our caller observed it. |
---|
426 | 500 | */ |
---|
427 | 501 | err = swapcache_prepare(entry); |
---|
428 | | - if (err == -EEXIST) { |
---|
429 | | - radix_tree_preload_end(); |
---|
430 | | - /* |
---|
431 | | - * We might race against get_swap_page() and stumble |
---|
432 | | - * across a SWAP_HAS_CACHE swap_map entry whose page |
---|
433 | | - * has not been brought into the swapcache yet. |
---|
434 | | - */ |
---|
435 | | - cond_resched(); |
---|
436 | | - continue; |
---|
437 | | - } |
---|
438 | | - if (err) { /* swp entry is obsolete ? */ |
---|
439 | | - radix_tree_preload_end(); |
---|
| 502 | + if (!err) |
---|
440 | 503 | break; |
---|
441 | | - } |
---|
442 | 504 | |
---|
443 | | - /* May fail (-ENOMEM) if radix-tree node allocation failed. */ |
---|
444 | | - __SetPageLocked(new_page); |
---|
445 | | - __SetPageSwapBacked(new_page); |
---|
446 | | - err = __add_to_swap_cache(new_page, entry); |
---|
447 | | - if (likely(!err)) { |
---|
448 | | - radix_tree_preload_end(); |
---|
449 | | - /* |
---|
450 | | - * Initiate read into locked page and return. |
---|
451 | | - */ |
---|
452 | | - SetPageWorkingset(new_page); |
---|
453 | | - lru_cache_add_anon(new_page); |
---|
454 | | - *new_page_allocated = true; |
---|
455 | | - return new_page; |
---|
456 | | - } |
---|
457 | | - radix_tree_preload_end(); |
---|
458 | | - __ClearPageLocked(new_page); |
---|
| 505 | + put_page(page); |
---|
| 506 | + if (err != -EEXIST) |
---|
| 507 | + return NULL; |
---|
| 508 | + |
---|
459 | 509 | /* |
---|
460 | | - * add_to_swap_cache() doesn't return -EEXIST, so we can safely |
---|
461 | | - * clear SWAP_HAS_CACHE flag. |
---|
| 510 | + * We might race against __delete_from_swap_cache(), and |
---|
| 511 | + * stumble across a swap_map entry whose SWAP_HAS_CACHE |
---|
| 512 | + * has not yet been cleared. Or race against another |
---|
| 513 | + * __read_swap_cache_async(), which has set SWAP_HAS_CACHE |
---|
| 514 | + * in swap_map, but not yet added its page to swap cache. |
---|
462 | 515 | */ |
---|
463 | | - put_swap_page(new_page, entry); |
---|
464 | | - } while (err != -ENOMEM); |
---|
| 516 | + schedule_timeout_uninterruptible(1); |
---|
| 517 | + } |
---|
465 | 518 | |
---|
466 | | - if (new_page) |
---|
467 | | - put_page(new_page); |
---|
468 | | - return found_page; |
---|
| 519 | + /* |
---|
| 520 | + * The swap entry is ours to swap in. Prepare the new page. |
---|
| 521 | + */ |
---|
| 522 | + |
---|
| 523 | + __SetPageLocked(page); |
---|
| 524 | + __SetPageSwapBacked(page); |
---|
| 525 | + |
---|
| 526 | + /* May fail (-ENOMEM) if XArray node allocation failed. */ |
---|
| 527 | + if (add_to_swap_cache(page, entry, gfp_mask & GFP_RECLAIM_MASK, &shadow)) { |
---|
| 528 | + put_swap_page(page, entry); |
---|
| 529 | + goto fail_unlock; |
---|
| 530 | + } |
---|
| 531 | + |
---|
| 532 | + if (mem_cgroup_charge(page, NULL, gfp_mask)) { |
---|
| 533 | + delete_from_swap_cache(page); |
---|
| 534 | + goto fail_unlock; |
---|
| 535 | + } |
---|
| 536 | + |
---|
| 537 | + if (shadow) |
---|
| 538 | + workingset_refault(page, shadow); |
---|
| 539 | + |
---|
| 540 | + /* Caller will initiate read into locked page */ |
---|
| 541 | + SetPageWorkingset(page); |
---|
| 542 | + lru_cache_add(page); |
---|
| 543 | + *new_page_allocated = true; |
---|
| 544 | + return page; |
---|
| 545 | + |
---|
| 546 | +fail_unlock: |
---|
| 547 | + unlock_page(page); |
---|
| 548 | + put_page(page); |
---|
| 549 | + return NULL; |
---|
469 | 550 | } |
---|
470 | 551 | |
---|
471 | 552 | /* |
---|
.. | .. |
---|
565 | 646 | * the readahead. |
---|
566 | 647 | * |
---|
567 | 648 | * Caller must hold down_read on the vma->vm_mm if vmf->vma is not NULL. |
---|
| 649 | + * This is needed to ensure the VMA will not be freed in our back. In the case |
---|
| 650 | + * of the speculative page fault handler, this cannot happen, even if we don't |
---|
| 651 | + * hold the mmap_sem. Callees are assumed to take care of reading VMA's fields |
---|
| 652 | + * using READ_ONCE() to read consistent values. |
---|
568 | 653 | */ |
---|
569 | 654 | struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask, |
---|
570 | 655 | struct vm_fault *vmf) |
---|
.. | .. |
---|
583 | 668 | mask = swapin_nr_pages(offset) - 1; |
---|
584 | 669 | if (!mask) |
---|
585 | 670 | goto skip; |
---|
| 671 | + |
---|
| 672 | + /* Test swap type to make sure the dereference is safe */ |
---|
| 673 | + if (likely(si->flags & (SWP_BLKDEV | SWP_FS_OPS))) { |
---|
| 674 | + struct inode *inode = si->swap_file->f_mapping->host; |
---|
| 675 | + if (inode_read_congested(inode)) |
---|
| 676 | + goto skip; |
---|
| 677 | + } |
---|
586 | 678 | |
---|
587 | 679 | do_poll = false; |
---|
588 | 680 | /* Read a page_cluster sized and aligned cluster around offset. */ |
---|
.. | .. |
---|
628 | 720 | return -ENOMEM; |
---|
629 | 721 | for (i = 0; i < nr; i++) { |
---|
630 | 722 | space = spaces + i; |
---|
631 | | - INIT_RADIX_TREE(&space->i_pages, GFP_ATOMIC|__GFP_NOWARN); |
---|
| 723 | + xa_init_flags(&space->i_pages, XA_FLAGS_LOCK_IRQ); |
---|
632 | 724 | atomic_set(&space->i_mmap_writable, 0); |
---|
633 | 725 | space->a_ops = &swap_aops; |
---|
634 | 726 | /* swap cache doesn't use writeback related tags */ |
---|
635 | 727 | mapping_set_no_writeback_tags(space); |
---|
636 | 728 | } |
---|
637 | 729 | nr_swapper_spaces[type] = nr; |
---|
638 | | - rcu_assign_pointer(swapper_spaces[type], spaces); |
---|
| 730 | + swapper_spaces[type] = spaces; |
---|
639 | 731 | |
---|
640 | 732 | return 0; |
---|
641 | 733 | } |
---|
642 | 734 | |
---|
643 | 735 | void exit_swap_address_space(unsigned int type) |
---|
644 | 736 | { |
---|
645 | | - struct address_space *spaces; |
---|
646 | | - |
---|
647 | | - spaces = swapper_spaces[type]; |
---|
| 737 | + kvfree(swapper_spaces[type]); |
---|
648 | 738 | nr_swapper_spaces[type] = 0; |
---|
649 | | - rcu_assign_pointer(swapper_spaces[type], NULL); |
---|
650 | | - synchronize_rcu(); |
---|
651 | | - kvfree(spaces); |
---|
| 739 | + swapper_spaces[type] = NULL; |
---|
652 | 740 | } |
---|
653 | 741 | |
---|
654 | 742 | static inline void swap_ra_clamp_pfn(struct vm_area_struct *vma, |
---|
.. | .. |
---|
658 | 746 | unsigned long *start, |
---|
659 | 747 | unsigned long *end) |
---|
660 | 748 | { |
---|
661 | | - *start = max3(lpfn, PFN_DOWN(vma->vm_start), |
---|
| 749 | + *start = max3(lpfn, PFN_DOWN(READ_ONCE(vma->vm_start)), |
---|
662 | 750 | PFN_DOWN(faddr & PMD_MASK)); |
---|
663 | | - *end = min3(rpfn, PFN_DOWN(vma->vm_end), |
---|
| 751 | + *end = min3(rpfn, PFN_DOWN(READ_ONCE(vma->vm_end)), |
---|
664 | 752 | PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE)); |
---|
665 | 753 | } |
---|
666 | 754 | |
---|
.. | .. |
---|
732 | 820 | pte_unmap(orig_pte); |
---|
733 | 821 | } |
---|
734 | 822 | |
---|
| 823 | +/** |
---|
| 824 | + * swap_vma_readahead - swap in pages in hope we need them soon |
---|
| 825 | + * @fentry: swap entry of this memory |
---|
| 826 | + * @gfp_mask: memory allocation flags |
---|
| 827 | + * @vmf: fault information |
---|
| 828 | + * |
---|
| 829 | + * Returns the struct page for entry and addr, after queueing swapin. |
---|
| 830 | + * |
---|
| 831 | + * Primitive swap readahead code. We simply read in a few pages whoes |
---|
| 832 | + * virtual addresses are around the fault address in the same vma. |
---|
| 833 | + * |
---|
| 834 | + * Caller must hold read mmap_lock if vmf->vma is not NULL. |
---|
| 835 | + * |
---|
| 836 | + */ |
---|
735 | 837 | static struct page *swap_vma_readahead(swp_entry_t fentry, gfp_t gfp_mask, |
---|
736 | 838 | struct vm_fault *vmf) |
---|
737 | 839 | { |
---|