.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * linux/mm/swap.c |
---|
3 | 4 | * |
---|
.. | .. |
---|
7 | 8 | /* |
---|
8 | 9 | * This file contains the default values for the operation of the |
---|
9 | 10 | * Linux VM subsystem. Fine-tuning documentation can be found in |
---|
10 | | - * Documentation/sysctl/vm.txt. |
---|
| 11 | + * Documentation/admin-guide/sysctl/vm.rst. |
---|
11 | 12 | * Started 18.12.91 |
---|
12 | 13 | * Swap aging added 23.2.95, Stephen Tweedie. |
---|
13 | 14 | * Buffermem limits added 12.3.98, Rik van Riel. |
---|
.. | .. |
---|
29 | 30 | #include <linux/cpu.h> |
---|
30 | 31 | #include <linux/notifier.h> |
---|
31 | 32 | #include <linux/backing-dev.h> |
---|
32 | | -#include <linux/memremap.h> |
---|
33 | 33 | #include <linux/memcontrol.h> |
---|
34 | 34 | #include <linux/gfp.h> |
---|
35 | 35 | #include <linux/uio.h> |
---|
36 | | -#include <linux/locallock.h> |
---|
37 | 36 | #include <linux/hugetlb.h> |
---|
38 | 37 | #include <linux/page_idle.h> |
---|
| 38 | +#include <linux/local_lock.h> |
---|
| 39 | +#include <linux/buffer_head.h> |
---|
39 | 40 | |
---|
40 | 41 | #include "internal.h" |
---|
41 | 42 | |
---|
.. | .. |
---|
45 | 46 | /* How many pages do we try to swap or page in/out together? */ |
---|
46 | 47 | int page_cluster; |
---|
47 | 48 | |
---|
48 | | -static DEFINE_PER_CPU(struct pagevec, lru_add_pvec); |
---|
49 | | -static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs); |
---|
50 | | -static DEFINE_PER_CPU(struct pagevec, lru_deactivate_file_pvecs); |
---|
51 | | -static DEFINE_PER_CPU(struct pagevec, lru_lazyfree_pvecs); |
---|
| 49 | +/* Protecting only lru_rotate.pvec which requires disabling interrupts */ |
---|
| 50 | +struct lru_rotate { |
---|
| 51 | + local_lock_t lock; |
---|
| 52 | + struct pagevec pvec; |
---|
| 53 | +}; |
---|
| 54 | +static DEFINE_PER_CPU(struct lru_rotate, lru_rotate) = { |
---|
| 55 | + .lock = INIT_LOCAL_LOCK(lock), |
---|
| 56 | +}; |
---|
| 57 | + |
---|
| 58 | +/* |
---|
| 59 | + * The following struct pagevec are grouped together because they are protected |
---|
| 60 | + * by disabling preemption (and interrupts remain enabled). |
---|
| 61 | + */ |
---|
| 62 | +struct lru_pvecs { |
---|
| 63 | + local_lock_t lock; |
---|
| 64 | + struct pagevec lru_add; |
---|
| 65 | + struct pagevec lru_deactivate_file; |
---|
| 66 | + struct pagevec lru_deactivate; |
---|
| 67 | + struct pagevec lru_lazyfree; |
---|
| 68 | + struct pagevec lru_lazyfree_movetail; |
---|
52 | 69 | #ifdef CONFIG_SMP |
---|
53 | | -static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs); |
---|
| 70 | + struct pagevec activate_page; |
---|
54 | 71 | #endif |
---|
55 | | -static DEFINE_LOCAL_IRQ_LOCK(rotate_lock); |
---|
56 | | -DEFINE_LOCAL_IRQ_LOCK(swapvec_lock); |
---|
| 72 | +}; |
---|
| 73 | +static DEFINE_PER_CPU(struct lru_pvecs, lru_pvecs) = { |
---|
| 74 | + .lock = INIT_LOCAL_LOCK(lock), |
---|
| 75 | +}; |
---|
57 | 76 | |
---|
58 | 77 | /* |
---|
59 | 78 | * This path almost never happens for VM activity - pages are normally |
---|
.. | .. |
---|
62 | 81 | static void __page_cache_release(struct page *page) |
---|
63 | 82 | { |
---|
64 | 83 | if (PageLRU(page)) { |
---|
65 | | - struct zone *zone = page_zone(page); |
---|
| 84 | + pg_data_t *pgdat = page_pgdat(page); |
---|
66 | 85 | struct lruvec *lruvec; |
---|
67 | 86 | unsigned long flags; |
---|
68 | 87 | |
---|
69 | | - spin_lock_irqsave(zone_lru_lock(zone), flags); |
---|
70 | | - lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat); |
---|
| 88 | + spin_lock_irqsave(&pgdat->lru_lock, flags); |
---|
| 89 | + lruvec = mem_cgroup_page_lruvec(page, pgdat); |
---|
71 | 90 | VM_BUG_ON_PAGE(!PageLRU(page), page); |
---|
72 | 91 | __ClearPageLRU(page); |
---|
73 | 92 | del_page_from_lru_list(page, lruvec, page_off_lru(page)); |
---|
74 | | - spin_unlock_irqrestore(zone_lru_lock(zone), flags); |
---|
| 93 | + spin_unlock_irqrestore(&pgdat->lru_lock, flags); |
---|
75 | 94 | } |
---|
76 | 95 | __ClearPageWaiters(page); |
---|
77 | | - mem_cgroup_uncharge(page); |
---|
78 | 96 | } |
---|
79 | 97 | |
---|
80 | 98 | static void __put_single_page(struct page *page) |
---|
81 | 99 | { |
---|
82 | 100 | __page_cache_release(page); |
---|
| 101 | + mem_cgroup_uncharge(page); |
---|
83 | 102 | free_unref_page(page); |
---|
84 | 103 | } |
---|
85 | 104 | |
---|
86 | 105 | static void __put_compound_page(struct page *page) |
---|
87 | 106 | { |
---|
88 | | - compound_page_dtor *dtor; |
---|
89 | | - |
---|
90 | 107 | /* |
---|
91 | 108 | * __page_cache_release() is supposed to be called for thp, not for |
---|
92 | 109 | * hugetlb. This is because hugetlb page does never have PageLRU set |
---|
.. | .. |
---|
95 | 112 | */ |
---|
96 | 113 | if (!PageHuge(page)) |
---|
97 | 114 | __page_cache_release(page); |
---|
98 | | - dtor = get_compound_page_dtor(page); |
---|
99 | | - (*dtor)(page); |
---|
| 115 | + destroy_compound_page(page); |
---|
100 | 116 | } |
---|
101 | 117 | |
---|
102 | 118 | void __put_page(struct page *page) |
---|
.. | .. |
---|
130 | 146 | while (!list_empty(pages)) { |
---|
131 | 147 | struct page *victim; |
---|
132 | 148 | |
---|
133 | | - victim = list_entry(pages->prev, struct page, lru); |
---|
| 149 | + victim = lru_to_page(pages); |
---|
134 | 150 | list_del(&victim->lru); |
---|
135 | 151 | put_page(victim); |
---|
136 | 152 | } |
---|
.. | .. |
---|
227 | 243 | del_page_from_lru_list(page, lruvec, page_lru(page)); |
---|
228 | 244 | ClearPageActive(page); |
---|
229 | 245 | add_page_to_lru_list_tail(page, lruvec, page_lru(page)); |
---|
230 | | - (*pgmoved)++; |
---|
| 246 | + (*pgmoved) += thp_nr_pages(page); |
---|
231 | 247 | } |
---|
232 | 248 | } |
---|
233 | 249 | |
---|
.. | .. |
---|
243 | 259 | __count_vm_events(PGROTATED, pgmoved); |
---|
244 | 260 | } |
---|
245 | 261 | |
---|
| 262 | +/* return true if pagevec needs to drain */ |
---|
| 263 | +static bool pagevec_add_and_need_flush(struct pagevec *pvec, struct page *page) |
---|
| 264 | +{ |
---|
| 265 | + bool ret = false; |
---|
| 266 | + |
---|
| 267 | + if (!pagevec_add(pvec, page) || PageCompound(page) || |
---|
| 268 | + lru_cache_disabled()) |
---|
| 269 | + ret = true; |
---|
| 270 | + |
---|
| 271 | + return ret; |
---|
| 272 | +} |
---|
| 273 | + |
---|
246 | 274 | /* |
---|
247 | 275 | * Writeback is about to end against a page which has been marked for immediate |
---|
248 | 276 | * reclaim. If it still appears to be reclaimable, move it to the tail of the |
---|
.. | .. |
---|
256 | 284 | unsigned long flags; |
---|
257 | 285 | |
---|
258 | 286 | get_page(page); |
---|
259 | | - local_lock_irqsave(rotate_lock, flags); |
---|
260 | | - pvec = this_cpu_ptr(&lru_rotate_pvecs); |
---|
261 | | - if (!pagevec_add(pvec, page) || PageCompound(page)) |
---|
| 287 | + local_lock_irqsave(&lru_rotate.lock, flags); |
---|
| 288 | + pvec = this_cpu_ptr(&lru_rotate.pvec); |
---|
| 289 | + if (pagevec_add_and_need_flush(pvec, page)) |
---|
262 | 290 | pagevec_move_tail(pvec); |
---|
263 | | - local_unlock_irqrestore(rotate_lock, flags); |
---|
| 291 | + local_unlock_irqrestore(&lru_rotate.lock, flags); |
---|
264 | 292 | } |
---|
265 | 293 | } |
---|
266 | 294 | |
---|
267 | | -static void update_page_reclaim_stat(struct lruvec *lruvec, |
---|
268 | | - int file, int rotated) |
---|
| 295 | +void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages) |
---|
269 | 296 | { |
---|
270 | | - struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; |
---|
| 297 | + do { |
---|
| 298 | + unsigned long lrusize; |
---|
271 | 299 | |
---|
272 | | - reclaim_stat->recent_scanned[file]++; |
---|
273 | | - if (rotated) |
---|
274 | | - reclaim_stat->recent_rotated[file]++; |
---|
| 300 | + /* Record cost event */ |
---|
| 301 | + if (file) |
---|
| 302 | + lruvec->file_cost += nr_pages; |
---|
| 303 | + else |
---|
| 304 | + lruvec->anon_cost += nr_pages; |
---|
| 305 | + |
---|
| 306 | + /* |
---|
| 307 | + * Decay previous events |
---|
| 308 | + * |
---|
| 309 | + * Because workloads change over time (and to avoid |
---|
| 310 | + * overflow) we keep these statistics as a floating |
---|
| 311 | + * average, which ends up weighing recent refaults |
---|
| 312 | + * more than old ones. |
---|
| 313 | + */ |
---|
| 314 | + lrusize = lruvec_page_state(lruvec, NR_INACTIVE_ANON) + |
---|
| 315 | + lruvec_page_state(lruvec, NR_ACTIVE_ANON) + |
---|
| 316 | + lruvec_page_state(lruvec, NR_INACTIVE_FILE) + |
---|
| 317 | + lruvec_page_state(lruvec, NR_ACTIVE_FILE); |
---|
| 318 | + |
---|
| 319 | + if (lruvec->file_cost + lruvec->anon_cost > lrusize / 4) { |
---|
| 320 | + lruvec->file_cost /= 2; |
---|
| 321 | + lruvec->anon_cost /= 2; |
---|
| 322 | + } |
---|
| 323 | + } while ((lruvec = parent_lruvec(lruvec))); |
---|
| 324 | +} |
---|
| 325 | + |
---|
| 326 | +void lru_note_cost_page(struct page *page) |
---|
| 327 | +{ |
---|
| 328 | + lru_note_cost(mem_cgroup_page_lruvec(page, page_pgdat(page)), |
---|
| 329 | + page_is_file_lru(page), thp_nr_pages(page)); |
---|
275 | 330 | } |
---|
276 | 331 | |
---|
277 | 332 | static void __activate_page(struct page *page, struct lruvec *lruvec, |
---|
278 | 333 | void *arg) |
---|
279 | 334 | { |
---|
280 | 335 | if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { |
---|
281 | | - int file = page_is_file_cache(page); |
---|
282 | 336 | int lru = page_lru_base_type(page); |
---|
| 337 | + int nr_pages = thp_nr_pages(page); |
---|
283 | 338 | |
---|
284 | 339 | del_page_from_lru_list(page, lruvec, lru); |
---|
285 | 340 | SetPageActive(page); |
---|
.. | .. |
---|
287 | 342 | add_page_to_lru_list(page, lruvec, lru); |
---|
288 | 343 | trace_mm_lru_activate(page); |
---|
289 | 344 | |
---|
290 | | - __count_vm_event(PGACTIVATE); |
---|
291 | | - update_page_reclaim_stat(lruvec, file, 1); |
---|
| 345 | + __count_vm_events(PGACTIVATE, nr_pages); |
---|
| 346 | + __count_memcg_events(lruvec_memcg(lruvec), PGACTIVATE, |
---|
| 347 | + nr_pages); |
---|
292 | 348 | } |
---|
293 | 349 | } |
---|
294 | 350 | |
---|
295 | 351 | #ifdef CONFIG_SMP |
---|
296 | 352 | static void activate_page_drain(int cpu) |
---|
297 | 353 | { |
---|
298 | | - struct pagevec *pvec = &per_cpu(activate_page_pvecs, cpu); |
---|
| 354 | + struct pagevec *pvec = &per_cpu(lru_pvecs.activate_page, cpu); |
---|
299 | 355 | |
---|
300 | 356 | if (pagevec_count(pvec)) |
---|
301 | 357 | pagevec_lru_move_fn(pvec, __activate_page, NULL); |
---|
.. | .. |
---|
303 | 359 | |
---|
304 | 360 | static bool need_activate_page_drain(int cpu) |
---|
305 | 361 | { |
---|
306 | | - return pagevec_count(&per_cpu(activate_page_pvecs, cpu)) != 0; |
---|
| 362 | + return pagevec_count(&per_cpu(lru_pvecs.activate_page, cpu)) != 0; |
---|
307 | 363 | } |
---|
308 | 364 | |
---|
309 | | -void activate_page(struct page *page) |
---|
| 365 | +static void activate_page(struct page *page) |
---|
310 | 366 | { |
---|
311 | 367 | page = compound_head(page); |
---|
312 | 368 | if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { |
---|
313 | | - struct pagevec *pvec = &get_locked_var(swapvec_lock, |
---|
314 | | - activate_page_pvecs); |
---|
| 369 | + struct pagevec *pvec; |
---|
315 | 370 | |
---|
| 371 | + local_lock(&lru_pvecs.lock); |
---|
| 372 | + pvec = this_cpu_ptr(&lru_pvecs.activate_page); |
---|
316 | 373 | get_page(page); |
---|
317 | | - if (!pagevec_add(pvec, page) || PageCompound(page)) |
---|
| 374 | + if (pagevec_add_and_need_flush(pvec, page)) |
---|
318 | 375 | pagevec_lru_move_fn(pvec, __activate_page, NULL); |
---|
319 | | - put_locked_var(swapvec_lock, activate_page_pvecs); |
---|
| 376 | + local_unlock(&lru_pvecs.lock); |
---|
320 | 377 | } |
---|
321 | 378 | } |
---|
322 | 379 | |
---|
.. | .. |
---|
325 | 382 | { |
---|
326 | 383 | } |
---|
327 | 384 | |
---|
328 | | -void activate_page(struct page *page) |
---|
| 385 | +static void activate_page(struct page *page) |
---|
329 | 386 | { |
---|
330 | | - struct zone *zone = page_zone(page); |
---|
| 387 | + pg_data_t *pgdat = page_pgdat(page); |
---|
331 | 388 | |
---|
332 | 389 | page = compound_head(page); |
---|
333 | | - spin_lock_irq(zone_lru_lock(zone)); |
---|
334 | | - __activate_page(page, mem_cgroup_page_lruvec(page, zone->zone_pgdat), NULL); |
---|
335 | | - spin_unlock_irq(zone_lru_lock(zone)); |
---|
| 390 | + spin_lock_irq(&pgdat->lru_lock); |
---|
| 391 | + __activate_page(page, mem_cgroup_page_lruvec(page, pgdat), NULL); |
---|
| 392 | + spin_unlock_irq(&pgdat->lru_lock); |
---|
336 | 393 | } |
---|
337 | 394 | #endif |
---|
338 | 395 | |
---|
339 | 396 | static void __lru_cache_activate_page(struct page *page) |
---|
340 | 397 | { |
---|
341 | | - struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec); |
---|
| 398 | + struct pagevec *pvec; |
---|
342 | 399 | int i; |
---|
| 400 | + |
---|
| 401 | + local_lock(&lru_pvecs.lock); |
---|
| 402 | + pvec = this_cpu_ptr(&lru_pvecs.lru_add); |
---|
343 | 403 | |
---|
344 | 404 | /* |
---|
345 | 405 | * Search backwards on the optimistic assumption that the page being |
---|
.. | .. |
---|
360 | 420 | } |
---|
361 | 421 | } |
---|
362 | 422 | |
---|
363 | | - put_locked_var(swapvec_lock, lru_add_pvec); |
---|
| 423 | + local_unlock(&lru_pvecs.lock); |
---|
364 | 424 | } |
---|
365 | 425 | |
---|
366 | 426 | /* |
---|
.. | .. |
---|
376 | 436 | void mark_page_accessed(struct page *page) |
---|
377 | 437 | { |
---|
378 | 438 | page = compound_head(page); |
---|
379 | | - if (!PageActive(page) && !PageUnevictable(page) && |
---|
380 | | - PageReferenced(page)) { |
---|
381 | 439 | |
---|
| 440 | + trace_android_vh_mark_page_accessed(page); |
---|
| 441 | + if (!PageReferenced(page)) { |
---|
| 442 | + SetPageReferenced(page); |
---|
| 443 | + } else if (PageUnevictable(page)) { |
---|
| 444 | + /* |
---|
| 445 | + * Unevictable pages are on the "LRU_UNEVICTABLE" list. But, |
---|
| 446 | + * this list is never rotated or maintained, so marking an |
---|
| 447 | + * evictable page accessed has no effect. |
---|
| 448 | + */ |
---|
| 449 | + } else if (!PageActive(page)) { |
---|
382 | 450 | /* |
---|
383 | 451 | * If the page is on the LRU, queue it for activation via |
---|
384 | | - * activate_page_pvecs. Otherwise, assume the page is on a |
---|
| 452 | + * lru_pvecs.activate_page. Otherwise, assume the page is on a |
---|
385 | 453 | * pagevec, mark it active and it'll be moved to the active |
---|
386 | 454 | * LRU on the next drain. |
---|
387 | 455 | */ |
---|
.. | .. |
---|
390 | 458 | else |
---|
391 | 459 | __lru_cache_activate_page(page); |
---|
392 | 460 | ClearPageReferenced(page); |
---|
393 | | - if (page_is_file_cache(page)) |
---|
394 | | - workingset_activation(page); |
---|
395 | | - } else if (!PageReferenced(page)) { |
---|
396 | | - SetPageReferenced(page); |
---|
| 461 | + workingset_activation(page); |
---|
397 | 462 | } |
---|
398 | 463 | if (page_is_idle(page)) |
---|
399 | 464 | clear_page_idle(page); |
---|
400 | 465 | } |
---|
401 | 466 | EXPORT_SYMBOL(mark_page_accessed); |
---|
402 | | - |
---|
403 | | -static void __lru_cache_add(struct page *page) |
---|
404 | | -{ |
---|
405 | | - struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec); |
---|
406 | | - |
---|
407 | | - get_page(page); |
---|
408 | | - if (!pagevec_add(pvec, page) || PageCompound(page)) |
---|
409 | | - __pagevec_lru_add(pvec); |
---|
410 | | - put_locked_var(swapvec_lock, lru_add_pvec); |
---|
411 | | -} |
---|
412 | | - |
---|
413 | | -/** |
---|
414 | | - * lru_cache_add_anon - add a page to the page lists |
---|
415 | | - * @page: the page to add |
---|
416 | | - */ |
---|
417 | | -void lru_cache_add_anon(struct page *page) |
---|
418 | | -{ |
---|
419 | | - if (PageActive(page)) |
---|
420 | | - ClearPageActive(page); |
---|
421 | | - __lru_cache_add(page); |
---|
422 | | -} |
---|
423 | | - |
---|
424 | | -void lru_cache_add_file(struct page *page) |
---|
425 | | -{ |
---|
426 | | - if (PageActive(page)) |
---|
427 | | - ClearPageActive(page); |
---|
428 | | - __lru_cache_add(page); |
---|
429 | | -} |
---|
430 | | -EXPORT_SYMBOL(lru_cache_add_file); |
---|
431 | 467 | |
---|
432 | 468 | /** |
---|
433 | 469 | * lru_cache_add - add a page to a page list |
---|
.. | .. |
---|
440 | 476 | */ |
---|
441 | 477 | void lru_cache_add(struct page *page) |
---|
442 | 478 | { |
---|
| 479 | + struct pagevec *pvec; |
---|
| 480 | + |
---|
443 | 481 | VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page); |
---|
444 | 482 | VM_BUG_ON_PAGE(PageLRU(page), page); |
---|
445 | | - __lru_cache_add(page); |
---|
| 483 | + |
---|
| 484 | + get_page(page); |
---|
| 485 | + local_lock(&lru_pvecs.lock); |
---|
| 486 | + pvec = this_cpu_ptr(&lru_pvecs.lru_add); |
---|
| 487 | + if (pagevec_add_and_need_flush(pvec, page)) |
---|
| 488 | + __pagevec_lru_add(pvec); |
---|
| 489 | + local_unlock(&lru_pvecs.lock); |
---|
446 | 490 | } |
---|
| 491 | +EXPORT_SYMBOL(lru_cache_add); |
---|
447 | 492 | |
---|
448 | 493 | /** |
---|
449 | | - * lru_cache_add_active_or_unevictable |
---|
| 494 | + * lru_cache_add_inactive_or_unevictable |
---|
450 | 495 | * @page: the page to be added to LRU |
---|
451 | 496 | * @vma: vma in which page is mapped for determining reclaimability |
---|
452 | 497 | * |
---|
453 | | - * Place @page on the active or unevictable LRU list, depending on its |
---|
454 | | - * evictability. Note that if the page is not evictable, it goes |
---|
455 | | - * directly back onto it's zone's unevictable list, it does NOT use a |
---|
456 | | - * per cpu pagevec. |
---|
| 498 | + * Place @page on the inactive or unevictable LRU list, depending on its |
---|
| 499 | + * evictability. |
---|
457 | 500 | */ |
---|
458 | | -void lru_cache_add_active_or_unevictable(struct page *page, |
---|
459 | | - struct vm_area_struct *vma) |
---|
| 501 | +void __lru_cache_add_inactive_or_unevictable(struct page *page, |
---|
| 502 | + unsigned long vma_flags) |
---|
460 | 503 | { |
---|
| 504 | + bool unevictable; |
---|
| 505 | + |
---|
461 | 506 | VM_BUG_ON_PAGE(PageLRU(page), page); |
---|
462 | 507 | |
---|
463 | | - if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) |
---|
464 | | - SetPageActive(page); |
---|
465 | | - else if (!TestSetPageMlocked(page)) { |
---|
| 508 | + unevictable = (vma_flags & (VM_LOCKED | VM_SPECIAL)) == VM_LOCKED; |
---|
| 509 | + if (unlikely(unevictable) && !TestSetPageMlocked(page)) { |
---|
| 510 | + int nr_pages = thp_nr_pages(page); |
---|
466 | 511 | /* |
---|
467 | 512 | * We use the irq-unsafe __mod_zone_page_stat because this |
---|
468 | 513 | * counter is not modified from interrupt context, and the pte |
---|
469 | 514 | * lock is held(spinlock), which implies preemption disabled. |
---|
470 | 515 | */ |
---|
471 | | - __mod_zone_page_state(page_zone(page), NR_MLOCK, |
---|
472 | | - hpage_nr_pages(page)); |
---|
473 | | - count_vm_event(UNEVICTABLE_PGMLOCKED); |
---|
| 516 | + __mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages); |
---|
| 517 | + count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages); |
---|
474 | 518 | } |
---|
475 | 519 | lru_cache_add(page); |
---|
476 | 520 | } |
---|
.. | .. |
---|
499 | 543 | static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec, |
---|
500 | 544 | void *arg) |
---|
501 | 545 | { |
---|
502 | | - int lru, file; |
---|
| 546 | + int lru; |
---|
503 | 547 | bool active; |
---|
| 548 | + int nr_pages = thp_nr_pages(page); |
---|
504 | 549 | |
---|
505 | 550 | if (!PageLRU(page)) |
---|
506 | 551 | return; |
---|
.. | .. |
---|
513 | 558 | return; |
---|
514 | 559 | |
---|
515 | 560 | active = PageActive(page); |
---|
516 | | - file = page_is_file_cache(page); |
---|
517 | 561 | lru = page_lru_base_type(page); |
---|
518 | 562 | |
---|
519 | 563 | del_page_from_lru_list(page, lruvec, lru + active); |
---|
520 | 564 | ClearPageActive(page); |
---|
521 | 565 | ClearPageReferenced(page); |
---|
522 | | - add_page_to_lru_list(page, lruvec, lru); |
---|
523 | 566 | |
---|
524 | 567 | if (PageWriteback(page) || PageDirty(page)) { |
---|
525 | 568 | /* |
---|
.. | .. |
---|
527 | 570 | * It can make readahead confusing. But race window |
---|
528 | 571 | * is _really_ small and it's non-critical problem. |
---|
529 | 572 | */ |
---|
| 573 | + add_page_to_lru_list(page, lruvec, lru); |
---|
530 | 574 | SetPageReclaim(page); |
---|
531 | 575 | } else { |
---|
532 | 576 | /* |
---|
533 | 577 | * The page's writeback ends up during pagevec |
---|
534 | 578 | * We moves tha page into tail of inactive. |
---|
535 | 579 | */ |
---|
536 | | - list_move_tail(&page->lru, &lruvec->lists[lru]); |
---|
537 | | - __count_vm_event(PGROTATED); |
---|
| 580 | + add_page_to_lru_list_tail(page, lruvec, lru); |
---|
| 581 | + __count_vm_events(PGROTATED, nr_pages); |
---|
538 | 582 | } |
---|
539 | 583 | |
---|
540 | | - if (active) |
---|
541 | | - __count_vm_event(PGDEACTIVATE); |
---|
542 | | - update_page_reclaim_stat(lruvec, file, 0); |
---|
| 584 | + if (active) { |
---|
| 585 | + __count_vm_events(PGDEACTIVATE, nr_pages); |
---|
| 586 | + __count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, |
---|
| 587 | + nr_pages); |
---|
| 588 | + } |
---|
543 | 589 | } |
---|
544 | 590 | |
---|
| 591 | +static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec, |
---|
| 592 | + void *arg) |
---|
| 593 | +{ |
---|
| 594 | + if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) { |
---|
| 595 | + int lru = page_lru_base_type(page); |
---|
| 596 | + int nr_pages = thp_nr_pages(page); |
---|
| 597 | + |
---|
| 598 | + del_page_from_lru_list(page, lruvec, lru + LRU_ACTIVE); |
---|
| 599 | + ClearPageActive(page); |
---|
| 600 | + ClearPageReferenced(page); |
---|
| 601 | + add_page_to_lru_list(page, lruvec, lru); |
---|
| 602 | + |
---|
| 603 | + __count_vm_events(PGDEACTIVATE, nr_pages); |
---|
| 604 | + __count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, |
---|
| 605 | + nr_pages); |
---|
| 606 | + } |
---|
| 607 | +} |
---|
545 | 608 | |
---|
546 | 609 | static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec, |
---|
547 | 610 | void *arg) |
---|
.. | .. |
---|
549 | 612 | if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) && |
---|
550 | 613 | !PageSwapCache(page) && !PageUnevictable(page)) { |
---|
551 | 614 | bool active = PageActive(page); |
---|
| 615 | + int nr_pages = thp_nr_pages(page); |
---|
552 | 616 | |
---|
553 | 617 | del_page_from_lru_list(page, lruvec, |
---|
554 | 618 | LRU_INACTIVE_ANON + active); |
---|
555 | 619 | ClearPageActive(page); |
---|
556 | 620 | ClearPageReferenced(page); |
---|
557 | 621 | /* |
---|
558 | | - * lazyfree pages are clean anonymous pages. They have |
---|
559 | | - * SwapBacked flag cleared to distinguish normal anonymous |
---|
560 | | - * pages |
---|
| 622 | + * Lazyfree pages are clean anonymous pages. They have |
---|
| 623 | + * PG_swapbacked flag cleared, to distinguish them from normal |
---|
| 624 | + * anonymous pages |
---|
561 | 625 | */ |
---|
562 | 626 | ClearPageSwapBacked(page); |
---|
563 | 627 | add_page_to_lru_list(page, lruvec, LRU_INACTIVE_FILE); |
---|
564 | 628 | |
---|
565 | | - __count_vm_events(PGLAZYFREE, hpage_nr_pages(page)); |
---|
566 | | - count_memcg_page_event(page, PGLAZYFREE); |
---|
567 | | - update_page_reclaim_stat(lruvec, 1, 0); |
---|
| 629 | + __count_vm_events(PGLAZYFREE, nr_pages); |
---|
| 630 | + __count_memcg_events(lruvec_memcg(lruvec), PGLAZYFREE, |
---|
| 631 | + nr_pages); |
---|
| 632 | + } |
---|
| 633 | +} |
---|
| 634 | + |
---|
| 635 | +static void lru_lazyfree_movetail_fn(struct page *page, struct lruvec *lruvec, |
---|
| 636 | + void *arg) |
---|
| 637 | +{ |
---|
| 638 | + bool *add_to_tail = (bool *)arg; |
---|
| 639 | + |
---|
| 640 | + if (PageLRU(page) && !PageUnevictable(page) && PageSwapBacked(page) && |
---|
| 641 | + !PageSwapCache(page)) { |
---|
| 642 | + bool active = PageActive(page); |
---|
| 643 | + |
---|
| 644 | + del_page_from_lru_list(page, lruvec, |
---|
| 645 | + LRU_INACTIVE_ANON + active); |
---|
| 646 | + ClearPageActive(page); |
---|
| 647 | + ClearPageReferenced(page); |
---|
| 648 | + if (add_to_tail && *add_to_tail) |
---|
| 649 | + add_page_to_lru_list_tail(page, lruvec, LRU_INACTIVE_FILE); |
---|
| 650 | + else |
---|
| 651 | + add_page_to_lru_list(page, lruvec, LRU_INACTIVE_FILE); |
---|
568 | 652 | } |
---|
569 | 653 | } |
---|
570 | 654 | |
---|
.. | .. |
---|
575 | 659 | */ |
---|
576 | 660 | void lru_add_drain_cpu(int cpu) |
---|
577 | 661 | { |
---|
578 | | - struct pagevec *pvec = &per_cpu(lru_add_pvec, cpu); |
---|
| 662 | + struct pagevec *pvec = &per_cpu(lru_pvecs.lru_add, cpu); |
---|
579 | 663 | |
---|
580 | 664 | if (pagevec_count(pvec)) |
---|
581 | 665 | __pagevec_lru_add(pvec); |
---|
582 | 666 | |
---|
583 | | - pvec = &per_cpu(lru_rotate_pvecs, cpu); |
---|
584 | | - if (pagevec_count(pvec)) { |
---|
| 667 | + pvec = &per_cpu(lru_rotate.pvec, cpu); |
---|
| 668 | + /* Disabling interrupts below acts as a compiler barrier. */ |
---|
| 669 | + if (data_race(pagevec_count(pvec))) { |
---|
585 | 670 | unsigned long flags; |
---|
586 | 671 | |
---|
587 | 672 | /* No harm done if a racing interrupt already did this */ |
---|
588 | | -#ifdef CONFIG_PREEMPT_RT_BASE |
---|
589 | | - local_lock_irqsave_on(rotate_lock, flags, cpu); |
---|
| 673 | + local_lock_irqsave(&lru_rotate.lock, flags); |
---|
590 | 674 | pagevec_move_tail(pvec); |
---|
591 | | - local_unlock_irqrestore_on(rotate_lock, flags, cpu); |
---|
592 | | -#else |
---|
593 | | - local_lock_irqsave(rotate_lock, flags); |
---|
594 | | - pagevec_move_tail(pvec); |
---|
595 | | - local_unlock_irqrestore(rotate_lock, flags); |
---|
596 | | -#endif |
---|
| 675 | + local_unlock_irqrestore(&lru_rotate.lock, flags); |
---|
597 | 676 | } |
---|
598 | 677 | |
---|
599 | | - pvec = &per_cpu(lru_deactivate_file_pvecs, cpu); |
---|
| 678 | + pvec = &per_cpu(lru_pvecs.lru_deactivate_file, cpu); |
---|
600 | 679 | if (pagevec_count(pvec)) |
---|
601 | 680 | pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL); |
---|
602 | 681 | |
---|
603 | | - pvec = &per_cpu(lru_lazyfree_pvecs, cpu); |
---|
| 682 | + pvec = &per_cpu(lru_pvecs.lru_deactivate, cpu); |
---|
| 683 | + if (pagevec_count(pvec)) |
---|
| 684 | + pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL); |
---|
| 685 | + |
---|
| 686 | + pvec = &per_cpu(lru_pvecs.lru_lazyfree, cpu); |
---|
604 | 687 | if (pagevec_count(pvec)) |
---|
605 | 688 | pagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL); |
---|
| 689 | + |
---|
| 690 | + pvec = &per_cpu(lru_pvecs.lru_lazyfree_movetail, cpu); |
---|
| 691 | + if (pagevec_count(pvec)) |
---|
| 692 | + pagevec_lru_move_fn(pvec, lru_lazyfree_movetail_fn, NULL); |
---|
606 | 693 | |
---|
607 | 694 | activate_page_drain(cpu); |
---|
608 | 695 | } |
---|
.. | .. |
---|
625 | 712 | return; |
---|
626 | 713 | |
---|
627 | 714 | if (likely(get_page_unless_zero(page))) { |
---|
628 | | - struct pagevec *pvec = &get_locked_var(swapvec_lock, |
---|
629 | | - lru_deactivate_file_pvecs); |
---|
| 715 | + struct pagevec *pvec; |
---|
630 | 716 | |
---|
631 | | - if (!pagevec_add(pvec, page) || PageCompound(page)) |
---|
| 717 | + local_lock(&lru_pvecs.lock); |
---|
| 718 | + pvec = this_cpu_ptr(&lru_pvecs.lru_deactivate_file); |
---|
| 719 | + |
---|
| 720 | + if (pagevec_add_and_need_flush(pvec, page)) |
---|
632 | 721 | pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL); |
---|
633 | | - put_locked_var(swapvec_lock, lru_deactivate_file_pvecs); |
---|
| 722 | + local_unlock(&lru_pvecs.lock); |
---|
| 723 | + } |
---|
| 724 | +} |
---|
| 725 | + |
---|
| 726 | +/* |
---|
| 727 | + * deactivate_page - deactivate a page |
---|
| 728 | + * @page: page to deactivate |
---|
| 729 | + * |
---|
| 730 | + * deactivate_page() moves @page to the inactive list if @page was on the active |
---|
| 731 | + * list and was not an unevictable page. This is done to accelerate the reclaim |
---|
| 732 | + * of @page. |
---|
| 733 | + */ |
---|
| 734 | +void deactivate_page(struct page *page) |
---|
| 735 | +{ |
---|
| 736 | + if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) { |
---|
| 737 | + struct pagevec *pvec; |
---|
| 738 | + |
---|
| 739 | + local_lock(&lru_pvecs.lock); |
---|
| 740 | + pvec = this_cpu_ptr(&lru_pvecs.lru_deactivate); |
---|
| 741 | + get_page(page); |
---|
| 742 | + if (pagevec_add_and_need_flush(pvec, page)) |
---|
| 743 | + pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL); |
---|
| 744 | + local_unlock(&lru_pvecs.lock); |
---|
634 | 745 | } |
---|
635 | 746 | } |
---|
636 | 747 | |
---|
.. | .. |
---|
645 | 756 | { |
---|
646 | 757 | if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) && |
---|
647 | 758 | !PageSwapCache(page) && !PageUnevictable(page)) { |
---|
648 | | - struct pagevec *pvec = &get_locked_var(swapvec_lock, |
---|
649 | | - lru_lazyfree_pvecs); |
---|
| 759 | + struct pagevec *pvec; |
---|
650 | 760 | |
---|
| 761 | + local_lock(&lru_pvecs.lock); |
---|
| 762 | + pvec = this_cpu_ptr(&lru_pvecs.lru_lazyfree); |
---|
651 | 763 | get_page(page); |
---|
652 | | - if (!pagevec_add(pvec, page) || PageCompound(page)) |
---|
| 764 | + if (pagevec_add_and_need_flush(pvec, page)) |
---|
653 | 765 | pagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL); |
---|
654 | | - put_locked_var(swapvec_lock, lru_lazyfree_pvecs); |
---|
| 766 | + local_unlock(&lru_pvecs.lock); |
---|
| 767 | + } |
---|
| 768 | +} |
---|
| 769 | + |
---|
| 770 | +/** |
---|
| 771 | + * mark_page_lazyfree_movetail - make a swapbacked page lazyfree |
---|
| 772 | + * @page: page to deactivate |
---|
| 773 | + * |
---|
| 774 | + * mark_page_lazyfree_movetail() moves @page to the tail of inactive file list. |
---|
| 775 | + * This is done to accelerate the reclaim of @page. |
---|
| 776 | + */ |
---|
| 777 | +void mark_page_lazyfree_movetail(struct page *page, bool tail) |
---|
| 778 | +{ |
---|
| 779 | + if (PageLRU(page) && !PageUnevictable(page) && PageSwapBacked(page) && |
---|
| 780 | + !PageSwapCache(page)) { |
---|
| 781 | + struct pagevec *pvec; |
---|
| 782 | + |
---|
| 783 | + local_lock(&lru_pvecs.lock); |
---|
| 784 | + pvec = this_cpu_ptr(&lru_pvecs.lru_lazyfree_movetail); |
---|
| 785 | + get_page(page); |
---|
| 786 | + if (pagevec_add_and_need_flush(pvec, page)) |
---|
| 787 | + pagevec_lru_move_fn(pvec, |
---|
| 788 | + lru_lazyfree_movetail_fn, &tail); |
---|
| 789 | + local_unlock(&lru_pvecs.lock); |
---|
655 | 790 | } |
---|
656 | 791 | } |
---|
657 | 792 | |
---|
658 | 793 | void lru_add_drain(void) |
---|
659 | 794 | { |
---|
660 | | - lru_add_drain_cpu(local_lock_cpu(swapvec_lock)); |
---|
661 | | - local_unlock_cpu(swapvec_lock); |
---|
| 795 | + local_lock(&lru_pvecs.lock); |
---|
| 796 | + lru_add_drain_cpu(smp_processor_id()); |
---|
| 797 | + local_unlock(&lru_pvecs.lock); |
---|
| 798 | +} |
---|
| 799 | + |
---|
| 800 | +/* |
---|
| 801 | + * It's called from per-cpu workqueue context in SMP case so |
---|
| 802 | + * lru_add_drain_cpu and invalidate_bh_lrus_cpu should run on |
---|
| 803 | + * the same cpu. It shouldn't be a problem in !SMP case since |
---|
| 804 | + * the core is only one and the locks will disable preemption. |
---|
| 805 | + */ |
---|
| 806 | +static void lru_add_and_bh_lrus_drain(void) |
---|
| 807 | +{ |
---|
| 808 | + local_lock(&lru_pvecs.lock); |
---|
| 809 | + lru_add_drain_cpu(smp_processor_id()); |
---|
| 810 | + local_unlock(&lru_pvecs.lock); |
---|
| 811 | + invalidate_bh_lrus_cpu(); |
---|
| 812 | +} |
---|
| 813 | + |
---|
| 814 | +void lru_add_drain_cpu_zone(struct zone *zone) |
---|
| 815 | +{ |
---|
| 816 | + local_lock(&lru_pvecs.lock); |
---|
| 817 | + lru_add_drain_cpu(smp_processor_id()); |
---|
| 818 | + drain_local_pages(zone); |
---|
| 819 | + local_unlock(&lru_pvecs.lock); |
---|
662 | 820 | } |
---|
663 | 821 | |
---|
664 | 822 | #ifdef CONFIG_SMP |
---|
665 | | - |
---|
666 | | -#ifdef CONFIG_PREEMPT_RT_BASE |
---|
667 | | -static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work) |
---|
668 | | -{ |
---|
669 | | - local_lock_on(swapvec_lock, cpu); |
---|
670 | | - lru_add_drain_cpu(cpu); |
---|
671 | | - local_unlock_on(swapvec_lock, cpu); |
---|
672 | | -} |
---|
673 | | - |
---|
674 | | -#else |
---|
675 | 823 | |
---|
676 | 824 | static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work); |
---|
677 | 825 | |
---|
678 | 826 | static void lru_add_drain_per_cpu(struct work_struct *dummy) |
---|
679 | 827 | { |
---|
680 | | - lru_add_drain(); |
---|
| 828 | + lru_add_and_bh_lrus_drain(); |
---|
681 | 829 | } |
---|
682 | | - |
---|
683 | | -static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work) |
---|
684 | | -{ |
---|
685 | | - struct work_struct *work = &per_cpu(lru_add_drain_work, cpu); |
---|
686 | | - |
---|
687 | | - INIT_WORK(work, lru_add_drain_per_cpu); |
---|
688 | | - queue_work_on(cpu, mm_percpu_wq, work); |
---|
689 | | - cpumask_set_cpu(cpu, has_work); |
---|
690 | | -} |
---|
691 | | -#endif |
---|
692 | 830 | |
---|
693 | 831 | /* |
---|
694 | 832 | * Doesn't need any cpu hotplug locking because we do rely on per-cpu |
---|
.. | .. |
---|
697 | 835 | * Calling this function with cpu hotplug locks held can actually lead |
---|
698 | 836 | * to obscure indirect dependencies via WQ context. |
---|
699 | 837 | */ |
---|
700 | | -void lru_add_drain_all(void) |
---|
| 838 | +inline void __lru_add_drain_all(bool force_all_cpus) |
---|
701 | 839 | { |
---|
702 | | - static DEFINE_MUTEX(lock); |
---|
| 840 | + /* |
---|
| 841 | + * lru_drain_gen - Global pages generation number |
---|
| 842 | + * |
---|
| 843 | + * (A) Definition: global lru_drain_gen = x implies that all generations |
---|
| 844 | + * 0 < n <= x are already *scheduled* for draining. |
---|
| 845 | + * |
---|
| 846 | + * This is an optimization for the highly-contended use case where a |
---|
| 847 | + * user space workload keeps constantly generating a flow of pages for |
---|
| 848 | + * each CPU. |
---|
| 849 | + */ |
---|
| 850 | + static unsigned int lru_drain_gen; |
---|
703 | 851 | static struct cpumask has_work; |
---|
704 | | - int cpu; |
---|
| 852 | + static DEFINE_MUTEX(lock); |
---|
| 853 | + unsigned cpu, this_gen; |
---|
705 | 854 | |
---|
706 | 855 | /* |
---|
707 | 856 | * Make sure nobody triggers this path before mm_percpu_wq is fully |
---|
.. | .. |
---|
710 | 859 | if (WARN_ON(!mm_percpu_wq)) |
---|
711 | 860 | return; |
---|
712 | 861 | |
---|
| 862 | + /* |
---|
| 863 | + * Guarantee pagevec counter stores visible by this CPU are visible to |
---|
| 864 | + * other CPUs before loading the current drain generation. |
---|
| 865 | + */ |
---|
| 866 | + smp_mb(); |
---|
| 867 | + |
---|
| 868 | + /* |
---|
| 869 | + * (B) Locally cache global LRU draining generation number |
---|
| 870 | + * |
---|
| 871 | + * The read barrier ensures that the counter is loaded before the mutex |
---|
| 872 | + * is taken. It pairs with smp_mb() inside the mutex critical section |
---|
| 873 | + * at (D). |
---|
| 874 | + */ |
---|
| 875 | + this_gen = smp_load_acquire(&lru_drain_gen); |
---|
| 876 | + |
---|
713 | 877 | mutex_lock(&lock); |
---|
| 878 | + |
---|
| 879 | + /* |
---|
| 880 | + * (C) Exit the draining operation if a newer generation, from another |
---|
| 881 | + * lru_add_drain_all(), was already scheduled for draining. Check (A). |
---|
| 882 | + */ |
---|
| 883 | + if (unlikely(this_gen != lru_drain_gen && !force_all_cpus)) |
---|
| 884 | + goto done; |
---|
| 885 | + |
---|
| 886 | + /* |
---|
| 887 | + * (D) Increment global generation number |
---|
| 888 | + * |
---|
| 889 | + * Pairs with smp_load_acquire() at (B), outside of the critical |
---|
| 890 | + * section. Use a full memory barrier to guarantee that the new global |
---|
| 891 | + * drain generation number is stored before loading pagevec counters. |
---|
| 892 | + * |
---|
| 893 | + * This pairing must be done here, before the for_each_online_cpu loop |
---|
| 894 | + * below which drains the page vectors. |
---|
| 895 | + * |
---|
| 896 | + * Let x, y, and z represent some system CPU numbers, where x < y < z. |
---|
| 897 | + * Assume CPU #z is is in the middle of the for_each_online_cpu loop |
---|
| 898 | + * below and has already reached CPU #y's per-cpu data. CPU #x comes |
---|
| 899 | + * along, adds some pages to its per-cpu vectors, then calls |
---|
| 900 | + * lru_add_drain_all(). |
---|
| 901 | + * |
---|
| 902 | + * If the paired barrier is done at any later step, e.g. after the |
---|
| 903 | + * loop, CPU #x will just exit at (C) and miss flushing out all of its |
---|
| 904 | + * added pages. |
---|
| 905 | + */ |
---|
| 906 | + WRITE_ONCE(lru_drain_gen, lru_drain_gen + 1); |
---|
| 907 | + smp_mb(); |
---|
| 908 | + |
---|
714 | 909 | cpumask_clear(&has_work); |
---|
715 | | - |
---|
716 | 910 | for_each_online_cpu(cpu) { |
---|
| 911 | + struct work_struct *work = &per_cpu(lru_add_drain_work, cpu); |
---|
717 | 912 | |
---|
718 | | - if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) || |
---|
719 | | - pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) || |
---|
720 | | - pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) || |
---|
721 | | - pagevec_count(&per_cpu(lru_lazyfree_pvecs, cpu)) || |
---|
722 | | - need_activate_page_drain(cpu)) |
---|
723 | | - remote_lru_add_drain(cpu, &has_work); |
---|
| 913 | + if (force_all_cpus || |
---|
| 914 | + pagevec_count(&per_cpu(lru_pvecs.lru_add, cpu)) || |
---|
| 915 | + data_race(pagevec_count(&per_cpu(lru_rotate.pvec, cpu))) || |
---|
| 916 | + pagevec_count(&per_cpu(lru_pvecs.lru_deactivate_file, cpu)) || |
---|
| 917 | + pagevec_count(&per_cpu(lru_pvecs.lru_deactivate, cpu)) || |
---|
| 918 | + pagevec_count(&per_cpu(lru_pvecs.lru_lazyfree, cpu)) || |
---|
| 919 | + pagevec_count(&per_cpu(lru_pvecs.lru_lazyfree_movetail, cpu)) || |
---|
| 920 | + need_activate_page_drain(cpu) || |
---|
| 921 | + has_bh_in_lru(cpu, NULL)) { |
---|
| 922 | + INIT_WORK(work, lru_add_drain_per_cpu); |
---|
| 923 | + queue_work_on(cpu, mm_percpu_wq, work); |
---|
| 924 | + __cpumask_set_cpu(cpu, &has_work); |
---|
| 925 | + } |
---|
724 | 926 | } |
---|
725 | 927 | |
---|
726 | | -#ifndef CONFIG_PREEMPT_RT_BASE |
---|
727 | 928 | for_each_cpu(cpu, &has_work) |
---|
728 | 929 | flush_work(&per_cpu(lru_add_drain_work, cpu)); |
---|
729 | | -#endif |
---|
730 | 930 | |
---|
| 931 | +done: |
---|
731 | 932 | mutex_unlock(&lock); |
---|
| 933 | +} |
---|
| 934 | + |
---|
| 935 | +void lru_add_drain_all(void) |
---|
| 936 | +{ |
---|
| 937 | + __lru_add_drain_all(false); |
---|
732 | 938 | } |
---|
733 | 939 | #else |
---|
734 | 940 | void lru_add_drain_all(void) |
---|
735 | 941 | { |
---|
736 | 942 | lru_add_drain(); |
---|
737 | 943 | } |
---|
| 944 | +#endif /* CONFIG_SMP */ |
---|
| 945 | + |
---|
| 946 | +static atomic_t lru_disable_count = ATOMIC_INIT(0); |
---|
| 947 | + |
---|
| 948 | +bool lru_cache_disabled(void) |
---|
| 949 | +{ |
---|
| 950 | + return atomic_read(&lru_disable_count) != 0; |
---|
| 951 | +} |
---|
| 952 | + |
---|
| 953 | +void lru_cache_enable(void) |
---|
| 954 | +{ |
---|
| 955 | + atomic_dec(&lru_disable_count); |
---|
| 956 | +} |
---|
| 957 | +EXPORT_SYMBOL_GPL(lru_cache_enable); |
---|
| 958 | + |
---|
| 959 | +/* |
---|
| 960 | + * lru_cache_disable() needs to be called before we start compiling |
---|
| 961 | + * a list of pages to be migrated using isolate_lru_page(). |
---|
| 962 | + * It drains pages on LRU cache and then disable on all cpus until |
---|
| 963 | + * lru_cache_enable is called. |
---|
| 964 | + * |
---|
| 965 | + * Must be paired with a call to lru_cache_enable(). |
---|
| 966 | + */ |
---|
| 967 | +void lru_cache_disable(void) |
---|
| 968 | +{ |
---|
| 969 | + /* |
---|
| 970 | + * If someone is already disabled lru_cache, just return with |
---|
| 971 | + * increasing the lru_disable_count. |
---|
| 972 | + */ |
---|
| 973 | + if (atomic_inc_not_zero(&lru_disable_count)) |
---|
| 974 | + return; |
---|
| 975 | +#ifdef CONFIG_SMP |
---|
| 976 | + /* |
---|
| 977 | + * lru_add_drain_all in the force mode will schedule draining on |
---|
| 978 | + * all online CPUs so any calls of lru_cache_disabled wrapped by |
---|
| 979 | + * local_lock or preemption disabled would be ordered by that. |
---|
| 980 | + * The atomic operation doesn't need to have stronger ordering |
---|
| 981 | + * requirements because that is enforeced by the scheduling |
---|
| 982 | + * guarantees. |
---|
| 983 | + */ |
---|
| 984 | + __lru_add_drain_all(true); |
---|
| 985 | +#else |
---|
| 986 | + lru_add_and_bh_lrus_drain(); |
---|
738 | 987 | #endif |
---|
| 988 | + atomic_inc(&lru_disable_count); |
---|
| 989 | +} |
---|
| 990 | +EXPORT_SYMBOL_GPL(lru_cache_disable); |
---|
739 | 991 | |
---|
740 | 992 | /** |
---|
741 | 993 | * release_pages - batched put_page() |
---|
.. | .. |
---|
751 | 1003 | LIST_HEAD(pages_to_free); |
---|
752 | 1004 | struct pglist_data *locked_pgdat = NULL; |
---|
753 | 1005 | struct lruvec *lruvec; |
---|
754 | | - unsigned long uninitialized_var(flags); |
---|
755 | | - unsigned int uninitialized_var(lock_batch); |
---|
| 1006 | + unsigned long flags; |
---|
| 1007 | + unsigned int lock_batch; |
---|
756 | 1008 | |
---|
757 | 1009 | for (i = 0; i < nr; i++) { |
---|
758 | 1010 | struct page *page = pages[i]; |
---|
.. | .. |
---|
767 | 1019 | locked_pgdat = NULL; |
---|
768 | 1020 | } |
---|
769 | 1021 | |
---|
| 1022 | + page = compound_head(page); |
---|
770 | 1023 | if (is_huge_zero_page(page)) |
---|
771 | 1024 | continue; |
---|
772 | 1025 | |
---|
.. | .. |
---|
778 | 1031 | } |
---|
779 | 1032 | /* |
---|
780 | 1033 | * ZONE_DEVICE pages that return 'false' from |
---|
781 | | - * put_devmap_managed_page() do not require special |
---|
| 1034 | + * page_is_devmap_managed() do not require special |
---|
782 | 1035 | * processing, and instead, expect a call to |
---|
783 | 1036 | * put_page_testzero(). |
---|
784 | 1037 | */ |
---|
785 | | - if (put_devmap_managed_page(page)) |
---|
| 1038 | + if (page_is_devmap_managed(page)) { |
---|
| 1039 | + put_devmap_managed_page(page); |
---|
786 | 1040 | continue; |
---|
| 1041 | + } |
---|
787 | 1042 | } |
---|
788 | 1043 | |
---|
789 | | - page = compound_head(page); |
---|
790 | 1044 | if (!put_page_testzero(page)) |
---|
791 | 1045 | continue; |
---|
792 | 1046 | |
---|
.. | .. |
---|
817 | 1071 | del_page_from_lru_list(page, lruvec, page_off_lru(page)); |
---|
818 | 1072 | } |
---|
819 | 1073 | |
---|
820 | | - /* Clear Active bit in case of parallel mark_page_accessed */ |
---|
821 | | - __ClearPageActive(page); |
---|
822 | 1074 | __ClearPageWaiters(page); |
---|
823 | 1075 | |
---|
824 | 1076 | list_add(&page->lru, &pages_to_free); |
---|
.. | .. |
---|
857 | 1109 | void lru_add_page_tail(struct page *page, struct page *page_tail, |
---|
858 | 1110 | struct lruvec *lruvec, struct list_head *list) |
---|
859 | 1111 | { |
---|
860 | | - const int file = 0; |
---|
861 | | - |
---|
862 | 1112 | VM_BUG_ON_PAGE(!PageHead(page), page); |
---|
863 | 1113 | VM_BUG_ON_PAGE(PageCompound(page_tail), page); |
---|
864 | 1114 | VM_BUG_ON_PAGE(PageLRU(page_tail), page); |
---|
865 | | - VM_BUG_ON(NR_CPUS != 1 && |
---|
866 | | - !spin_is_locked(&lruvec_pgdat(lruvec)->lru_lock)); |
---|
| 1115 | + lockdep_assert_held(&lruvec_pgdat(lruvec)->lru_lock); |
---|
867 | 1116 | |
---|
868 | 1117 | if (!list) |
---|
869 | 1118 | SetPageLRU(page_tail); |
---|
.. | .. |
---|
875 | 1124 | get_page(page_tail); |
---|
876 | 1125 | list_add_tail(&page_tail->lru, list); |
---|
877 | 1126 | } else { |
---|
878 | | - struct list_head *list_head; |
---|
879 | 1127 | /* |
---|
880 | 1128 | * Head page has not yet been counted, as an hpage, |
---|
881 | 1129 | * so we must account for each subpage individually. |
---|
882 | 1130 | * |
---|
883 | | - * Use the standard add function to put page_tail on the list, |
---|
884 | | - * but then correct its position so they all end up in order. |
---|
| 1131 | + * Put page_tail on the list at the correct position |
---|
| 1132 | + * so they all end up in order. |
---|
885 | 1133 | */ |
---|
886 | | - add_page_to_lru_list(page_tail, lruvec, page_lru(page_tail)); |
---|
887 | | - list_head = page_tail->lru.prev; |
---|
888 | | - list_move_tail(&page_tail->lru, list_head); |
---|
| 1134 | + add_page_to_lru_list_tail(page_tail, lruvec, |
---|
| 1135 | + page_lru(page_tail)); |
---|
889 | 1136 | } |
---|
890 | | - |
---|
891 | | - if (!PageUnevictable(page)) |
---|
892 | | - update_page_reclaim_stat(lruvec, file, PageActive(page_tail)); |
---|
893 | 1137 | } |
---|
894 | 1138 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
---|
895 | 1139 | |
---|
.. | .. |
---|
898 | 1142 | { |
---|
899 | 1143 | enum lru_list lru; |
---|
900 | 1144 | int was_unevictable = TestClearPageUnevictable(page); |
---|
| 1145 | + int nr_pages = thp_nr_pages(page); |
---|
901 | 1146 | |
---|
902 | 1147 | VM_BUG_ON_PAGE(PageLRU(page), page); |
---|
903 | 1148 | |
---|
904 | | - SetPageLRU(page); |
---|
905 | 1149 | /* |
---|
906 | 1150 | * Page becomes evictable in two ways: |
---|
907 | | - * 1) Within LRU lock [munlock_vma_pages() and __munlock_pagevec()]. |
---|
| 1151 | + * 1) Within LRU lock [munlock_vma_page() and __munlock_pagevec()]. |
---|
908 | 1152 | * 2) Before acquiring LRU lock to put the page to correct LRU and then |
---|
909 | 1153 | * a) do PageLRU check with lock [check_move_unevictable_pages] |
---|
910 | 1154 | * b) do PageLRU check before lock [clear_page_mlock] |
---|
.. | .. |
---|
928 | 1172 | * looking at the same page) and the evictable page will be stranded |
---|
929 | 1173 | * in an unevictable LRU. |
---|
930 | 1174 | */ |
---|
931 | | - smp_mb(); |
---|
| 1175 | + SetPageLRU(page); |
---|
| 1176 | + smp_mb__after_atomic(); |
---|
932 | 1177 | |
---|
933 | 1178 | if (page_evictable(page)) { |
---|
934 | 1179 | lru = page_lru(page); |
---|
935 | | - update_page_reclaim_stat(lruvec, page_is_file_cache(page), |
---|
936 | | - PageActive(page)); |
---|
937 | 1180 | if (was_unevictable) |
---|
938 | | - count_vm_event(UNEVICTABLE_PGRESCUED); |
---|
| 1181 | + __count_vm_events(UNEVICTABLE_PGRESCUED, nr_pages); |
---|
939 | 1182 | } else { |
---|
940 | 1183 | lru = LRU_UNEVICTABLE; |
---|
941 | 1184 | ClearPageActive(page); |
---|
942 | 1185 | SetPageUnevictable(page); |
---|
943 | 1186 | if (!was_unevictable) |
---|
944 | | - count_vm_event(UNEVICTABLE_PGCULLED); |
---|
| 1187 | + __count_vm_events(UNEVICTABLE_PGCULLED, nr_pages); |
---|
945 | 1188 | } |
---|
946 | 1189 | |
---|
947 | 1190 | add_page_to_lru_list(page, lruvec, lru); |
---|
.. | .. |
---|
956 | 1199 | { |
---|
957 | 1200 | pagevec_lru_move_fn(pvec, __pagevec_lru_add_fn, NULL); |
---|
958 | 1201 | } |
---|
959 | | -EXPORT_SYMBOL(__pagevec_lru_add); |
---|
960 | 1202 | |
---|
961 | 1203 | /** |
---|
962 | 1204 | * pagevec_lookup_entries - gang pagecache lookup |
---|
.. | .. |
---|
974 | 1216 | * The search returns a group of mapping-contiguous entries with |
---|
975 | 1217 | * ascending indexes. There may be holes in the indices due to |
---|
976 | 1218 | * not-present entries. |
---|
| 1219 | + * |
---|
| 1220 | + * Only one subpage of a Transparent Huge Page is returned in one call: |
---|
| 1221 | + * allowing truncate_inode_pages_range() to evict the whole THP without |
---|
| 1222 | + * cycling through a pagevec of extra references. |
---|
977 | 1223 | * |
---|
978 | 1224 | * pagevec_lookup_entries() returns the number of entries which were |
---|
979 | 1225 | * found. |
---|
.. | .. |
---|
1003 | 1249 | |
---|
1004 | 1250 | for (i = 0, j = 0; i < pagevec_count(pvec); i++) { |
---|
1005 | 1251 | struct page *page = pvec->pages[i]; |
---|
1006 | | - if (!radix_tree_exceptional_entry(page)) |
---|
| 1252 | + if (!xa_is_value(page)) |
---|
1007 | 1253 | pvec->pages[j++] = page; |
---|
1008 | 1254 | } |
---|
1009 | 1255 | pvec->nr = j; |
---|
.. | .. |
---|
1040 | 1286 | |
---|
1041 | 1287 | unsigned pagevec_lookup_range_tag(struct pagevec *pvec, |
---|
1042 | 1288 | struct address_space *mapping, pgoff_t *index, pgoff_t end, |
---|
1043 | | - int tag) |
---|
| 1289 | + xa_mark_t tag) |
---|
1044 | 1290 | { |
---|
1045 | 1291 | pvec->nr = find_get_pages_range_tag(mapping, index, end, tag, |
---|
1046 | 1292 | PAGEVEC_SIZE, pvec->pages); |
---|
.. | .. |
---|
1050 | 1296 | |
---|
1051 | 1297 | unsigned pagevec_lookup_range_nr_tag(struct pagevec *pvec, |
---|
1052 | 1298 | struct address_space *mapping, pgoff_t *index, pgoff_t end, |
---|
1053 | | - int tag, unsigned max_pages) |
---|
| 1299 | + xa_mark_t tag, unsigned max_pages) |
---|
1054 | 1300 | { |
---|
1055 | 1301 | pvec->nr = find_get_pages_range_tag(mapping, index, end, tag, |
---|
1056 | 1302 | min_t(unsigned int, max_pages, PAGEVEC_SIZE), pvec->pages); |
---|
.. | .. |
---|
1062 | 1308 | */ |
---|
1063 | 1309 | void __init swap_setup(void) |
---|
1064 | 1310 | { |
---|
1065 | | - unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT); |
---|
| 1311 | + unsigned long megs = totalram_pages() >> (20 - PAGE_SHIFT); |
---|
1066 | 1312 | |
---|
1067 | 1313 | /* Use a smaller cluster for small-memory machines */ |
---|
1068 | 1314 | if (megs < 16) |
---|
.. | .. |
---|
1074 | 1320 | * _really_ don't want to cluster much more |
---|
1075 | 1321 | */ |
---|
1076 | 1322 | } |
---|
| 1323 | + |
---|
| 1324 | +#ifdef CONFIG_DEV_PAGEMAP_OPS |
---|
| 1325 | +void put_devmap_managed_page(struct page *page) |
---|
| 1326 | +{ |
---|
| 1327 | + int count; |
---|
| 1328 | + |
---|
| 1329 | + if (WARN_ON_ONCE(!page_is_devmap_managed(page))) |
---|
| 1330 | + return; |
---|
| 1331 | + |
---|
| 1332 | + count = page_ref_dec_return(page); |
---|
| 1333 | + |
---|
| 1334 | + /* |
---|
| 1335 | + * devmap page refcounts are 1-based, rather than 0-based: if |
---|
| 1336 | + * refcount is 1, then the page is free and the refcount is |
---|
| 1337 | + * stable because nobody holds a reference on the page. |
---|
| 1338 | + */ |
---|
| 1339 | + if (count == 1) |
---|
| 1340 | + free_devmap_managed_page(page); |
---|
| 1341 | + else if (!count) |
---|
| 1342 | + __put_page(page); |
---|
| 1343 | +} |
---|
| 1344 | +EXPORT_SYMBOL(put_devmap_managed_page); |
---|
| 1345 | +#endif |
---|