| .. | .. |
|---|
| 12 | 12 | #include <linux/fs.h> |
|---|
| 13 | 13 | #include <linux/atomic.h> |
|---|
| 14 | 14 | #include <linux/page-flags.h> |
|---|
| 15 | | -#include <linux/locallock.h> |
|---|
| 16 | 15 | #include <asm/page.h> |
|---|
| 17 | 16 | |
|---|
| 18 | 17 | struct notifier_block; |
|---|
| .. | .. |
|---|
| 149 | 148 | * We always assume that blocks are of size PAGE_SIZE. |
|---|
| 150 | 149 | */ |
|---|
| 151 | 150 | struct swap_extent { |
|---|
| 152 | | - struct list_head list; |
|---|
| 151 | + struct rb_node rb_node; |
|---|
| 153 | 152 | pgoff_t start_page; |
|---|
| 154 | 153 | pgoff_t nr_pages; |
|---|
| 155 | 154 | sector_t start_block; |
|---|
| .. | .. |
|---|
| 170 | 169 | SWP_SOLIDSTATE = (1 << 4), /* blkdev seeks are cheap */ |
|---|
| 171 | 170 | SWP_CONTINUED = (1 << 5), /* swap_map has count continuation */ |
|---|
| 172 | 171 | SWP_BLKDEV = (1 << 6), /* its a block device */ |
|---|
| 173 | | - SWP_FILE = (1 << 7), /* set after swap_activate success */ |
|---|
| 174 | | - SWP_AREA_DISCARD = (1 << 8), /* single-time swap area discards */ |
|---|
| 175 | | - SWP_PAGE_DISCARD = (1 << 9), /* freed swap page-cluster discards */ |
|---|
| 176 | | - SWP_STABLE_WRITES = (1 << 10), /* no overwrite PG_writeback pages */ |
|---|
| 177 | | - SWP_SYNCHRONOUS_IO = (1 << 11), /* synchronous IO is efficient */ |
|---|
| 172 | + SWP_ACTIVATED = (1 << 7), /* set after swap_activate success */ |
|---|
| 173 | + SWP_FS_OPS = (1 << 8), /* swapfile operations go through fs */ |
|---|
| 174 | + SWP_AREA_DISCARD = (1 << 9), /* single-time swap area discards */ |
|---|
| 175 | + SWP_PAGE_DISCARD = (1 << 10), /* freed swap page-cluster discards */ |
|---|
| 176 | + SWP_STABLE_WRITES = (1 << 11), /* no overwrite PG_writeback pages */ |
|---|
| 177 | + SWP_SYNCHRONOUS_IO = (1 << 12), /* synchronous IO is efficient */ |
|---|
| 178 | + SWP_VALID = (1 << 13), /* swap is valid to be operated on? */ |
|---|
| 178 | 179 | /* add others here before... */ |
|---|
| 179 | | - SWP_SCANNING = (1 << 12), /* refcount in scan_swap_map */ |
|---|
| 180 | + SWP_SCANNING = (1 << 14), /* refcount in scan_swap_map */ |
|---|
| 180 | 181 | }; |
|---|
| 181 | 182 | |
|---|
| 182 | 183 | #define SWAP_CLUSTER_MAX 32UL |
|---|
| 183 | 184 | #define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX |
|---|
| 184 | 185 | |
|---|
| 185 | | -#define SWAP_MAP_MAX 0x3e /* Max duplication count, in first swap_map */ |
|---|
| 186 | | -#define SWAP_MAP_BAD 0x3f /* Note pageblock is bad, in first swap_map */ |
|---|
| 186 | +/* Bit flag in swap_map */ |
|---|
| 187 | 187 | #define SWAP_HAS_CACHE 0x40 /* Flag page is cached, in first swap_map */ |
|---|
| 188 | | -#define SWAP_CONT_MAX 0x7f /* Max count, in each swap_map continuation */ |
|---|
| 189 | | -#define COUNT_CONTINUED 0x80 /* See swap_map continuation for full count */ |
|---|
| 190 | | -#define SWAP_MAP_SHMEM 0xbf /* Owned by shmem/tmpfs, in first swap_map */ |
|---|
| 188 | +#define COUNT_CONTINUED 0x80 /* Flag swap_map continuation for full count */ |
|---|
| 189 | + |
|---|
| 190 | +/* Special value in first swap_map */ |
|---|
| 191 | +#define SWAP_MAP_MAX 0x3e /* Max count */ |
|---|
| 192 | +#define SWAP_MAP_BAD 0x3f /* Note page is bad */ |
|---|
| 193 | +#define SWAP_MAP_SHMEM 0xbf /* Owned by shmem/tmpfs */ |
|---|
| 194 | + |
|---|
| 195 | +/* Special value in each swap_map continuation */ |
|---|
| 196 | +#define SWAP_CONT_MAX 0x7f /* Max count */ |
|---|
| 191 | 197 | |
|---|
| 192 | 198 | /* |
|---|
| 193 | 199 | * We use this to track usage of a cluster. A cluster is a block of swap disk |
|---|
| .. | .. |
|---|
| 246 | 252 | unsigned int inuse_pages; /* number of those currently in use */ |
|---|
| 247 | 253 | unsigned int cluster_next; /* likely index for next allocation */ |
|---|
| 248 | 254 | unsigned int cluster_nr; /* countdown to next cluster search */ |
|---|
| 255 | + unsigned int __percpu *cluster_next_cpu; /*percpu index for next allocation */ |
|---|
| 249 | 256 | struct percpu_cluster __percpu *percpu_cluster; /* per cpu's swap location */ |
|---|
| 250 | | - struct swap_extent *curr_swap_extent; |
|---|
| 251 | | - struct swap_extent first_swap_extent; |
|---|
| 257 | + struct rb_root swap_extent_root;/* root of the swap extent rbtree */ |
|---|
| 252 | 258 | struct block_device *bdev; /* swap device or bdev of swap file */ |
|---|
| 253 | 259 | struct file *swap_file; /* seldom referenced */ |
|---|
| 254 | 260 | unsigned int old_block_size; /* seldom referenced */ |
|---|
| .. | .. |
|---|
| 275 | 281 | */ |
|---|
| 276 | 282 | struct work_struct discard_work; /* discard worker */ |
|---|
| 277 | 283 | struct swap_cluster_list discard_clusters; /* discard clusters list */ |
|---|
| 278 | | - unsigned int write_pending; |
|---|
| 279 | | - unsigned int max_writes; |
|---|
| 280 | | - struct plist_node avail_lists[0]; /* |
|---|
| 284 | + struct plist_node avail_lists[]; /* |
|---|
| 281 | 285 | * entries in swap_avail_heads, one |
|---|
| 282 | 286 | * entry per node. |
|---|
| 283 | 287 | * Must be last as the number of the |
|---|
| .. | .. |
|---|
| 309 | 313 | }; |
|---|
| 310 | 314 | |
|---|
| 311 | 315 | /* linux/mm/workingset.c */ |
|---|
| 312 | | -void *workingset_eviction(struct address_space *mapping, struct page *page); |
|---|
| 316 | +void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages); |
|---|
| 317 | +void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg); |
|---|
| 313 | 318 | void workingset_refault(struct page *page, void *shadow); |
|---|
| 314 | 319 | void workingset_activation(struct page *page); |
|---|
| 315 | 320 | |
|---|
| 316 | | -/* Do not use directly, use workingset_lookup_update */ |
|---|
| 317 | | -void workingset_update_node(struct radix_tree_node *node); |
|---|
| 318 | | - |
|---|
| 319 | | -/* Returns workingset_update_node() if the mapping has shadow entries. */ |
|---|
| 320 | | -#define workingset_lookup_update(mapping) \ |
|---|
| 321 | | -({ \ |
|---|
| 322 | | - radix_tree_update_node_t __helper = workingset_update_node; \ |
|---|
| 323 | | - if (dax_mapping(mapping) || shmem_mapping(mapping)) \ |
|---|
| 324 | | - __helper = NULL; \ |
|---|
| 325 | | - __helper; \ |
|---|
| 326 | | -}) |
|---|
| 321 | +/* Only track the nodes of mappings with shadow entries */ |
|---|
| 322 | +void workingset_update_node(struct xa_node *node); |
|---|
| 323 | +#define mapping_set_update(xas, mapping) do { \ |
|---|
| 324 | + if (!dax_mapping(mapping) && !shmem_mapping(mapping)) \ |
|---|
| 325 | + xas_set_update(xas, workingset_update_node); \ |
|---|
| 326 | +} while (0) |
|---|
| 327 | 327 | |
|---|
| 328 | 328 | /* linux/mm/page_alloc.c */ |
|---|
| 329 | | -extern unsigned long totalram_pages; |
|---|
| 330 | 329 | extern unsigned long totalreserve_pages; |
|---|
| 331 | 330 | extern unsigned long nr_free_buffer_pages(void); |
|---|
| 332 | | -extern unsigned long nr_free_pagecache_pages(void); |
|---|
| 333 | 331 | |
|---|
| 334 | 332 | /* Definition of global_zone_page_state not available yet */ |
|---|
| 335 | 333 | #define nr_free_pages() global_zone_page_state(NR_FREE_PAGES) |
|---|
| 336 | 334 | |
|---|
| 337 | 335 | |
|---|
| 338 | 336 | /* linux/mm/swap.c */ |
|---|
| 339 | | -DECLARE_LOCAL_IRQ_LOCK(swapvec_lock); |
|---|
| 337 | +extern void lru_note_cost(struct lruvec *lruvec, bool file, |
|---|
| 338 | + unsigned int nr_pages); |
|---|
| 339 | +extern void lru_note_cost_page(struct page *); |
|---|
| 340 | 340 | extern void lru_cache_add(struct page *); |
|---|
| 341 | | -extern void lru_cache_add_anon(struct page *page); |
|---|
| 342 | | -extern void lru_cache_add_file(struct page *page); |
|---|
| 343 | 341 | extern void lru_add_page_tail(struct page *page, struct page *page_tail, |
|---|
| 344 | 342 | struct lruvec *lruvec, struct list_head *head); |
|---|
| 345 | | -extern void activate_page(struct page *); |
|---|
| 346 | 343 | extern void mark_page_accessed(struct page *); |
|---|
| 344 | + |
|---|
| 345 | +extern bool lru_cache_disabled(void); |
|---|
| 346 | +extern void lru_cache_disable(void); |
|---|
| 347 | +extern void lru_cache_enable(void); |
|---|
| 347 | 348 | extern void lru_add_drain(void); |
|---|
| 348 | 349 | extern void lru_add_drain_cpu(int cpu); |
|---|
| 350 | +extern void lru_add_drain_cpu_zone(struct zone *zone); |
|---|
| 349 | 351 | extern void lru_add_drain_all(void); |
|---|
| 350 | 352 | extern void rotate_reclaimable_page(struct page *page); |
|---|
| 351 | 353 | extern void deactivate_file_page(struct page *page); |
|---|
| 354 | +extern void deactivate_page(struct page *page); |
|---|
| 352 | 355 | extern void mark_page_lazyfree(struct page *page); |
|---|
| 356 | +extern void mark_page_lazyfree_movetail(struct page *page, bool tail); |
|---|
| 353 | 357 | extern void swap_setup(void); |
|---|
| 354 | 358 | |
|---|
| 355 | | -extern void lru_cache_add_active_or_unevictable(struct page *page, |
|---|
| 356 | | - struct vm_area_struct *vma); |
|---|
| 359 | +extern void __lru_cache_add_inactive_or_unevictable(struct page *page, |
|---|
| 360 | + unsigned long vma_flags); |
|---|
| 361 | + |
|---|
| 362 | +static inline void lru_cache_add_inactive_or_unevictable(struct page *page, |
|---|
| 363 | + struct vm_area_struct *vma) |
|---|
| 364 | +{ |
|---|
| 365 | + return __lru_cache_add_inactive_or_unevictable(page, vma->vm_flags); |
|---|
| 366 | +} |
|---|
| 357 | 367 | |
|---|
| 358 | 368 | /* linux/mm/vmscan.c */ |
|---|
| 359 | 369 | extern unsigned long zone_reclaimable_pages(struct zone *zone); |
|---|
| .. | .. |
|---|
| 371 | 381 | extern unsigned long shrink_all_memory(unsigned long nr_pages); |
|---|
| 372 | 382 | extern int vm_swappiness; |
|---|
| 373 | 383 | extern int remove_mapping(struct address_space *mapping, struct page *page); |
|---|
| 374 | | -extern unsigned long vm_total_pages; |
|---|
| 375 | 384 | |
|---|
| 385 | +extern unsigned long reclaim_pages(struct list_head *page_list); |
|---|
| 376 | 386 | #ifdef CONFIG_NUMA |
|---|
| 377 | 387 | extern int node_reclaim_mode; |
|---|
| 378 | 388 | extern int sysctl_min_unmapped_ratio; |
|---|
| .. | .. |
|---|
| 381 | 391 | #define node_reclaim_mode 0 |
|---|
| 382 | 392 | #endif |
|---|
| 383 | 393 | |
|---|
| 384 | | -extern int page_evictable(struct page *page); |
|---|
| 385 | 394 | extern void check_move_unevictable_pages(struct pagevec *pvec); |
|---|
| 386 | 395 | |
|---|
| 387 | 396 | extern int kswapd_run(int nid); |
|---|
| .. | .. |
|---|
| 415 | 424 | extern unsigned long total_swapcache_pages(void); |
|---|
| 416 | 425 | extern void show_swap_cache_info(void); |
|---|
| 417 | 426 | extern int add_to_swap(struct page *page); |
|---|
| 418 | | -extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t); |
|---|
| 419 | | -extern int __add_to_swap_cache(struct page *page, swp_entry_t entry); |
|---|
| 420 | | -extern void __delete_from_swap_cache(struct page *); |
|---|
| 427 | +extern void *get_shadow_from_swap_cache(swp_entry_t entry); |
|---|
| 428 | +extern int add_to_swap_cache(struct page *page, swp_entry_t entry, |
|---|
| 429 | + gfp_t gfp, void **shadowp); |
|---|
| 430 | +extern void __delete_from_swap_cache(struct page *page, |
|---|
| 431 | + swp_entry_t entry, void *shadow); |
|---|
| 421 | 432 | extern void delete_from_swap_cache(struct page *); |
|---|
| 433 | +extern void clear_shadow_from_swap_cache(int type, unsigned long begin, |
|---|
| 434 | + unsigned long end); |
|---|
| 422 | 435 | extern void free_page_and_swap_cache(struct page *); |
|---|
| 423 | 436 | extern void free_pages_and_swap_cache(struct page **, int); |
|---|
| 424 | 437 | extern struct page *lookup_swap_cache(swp_entry_t entry, |
|---|
| 425 | 438 | struct vm_area_struct *vma, |
|---|
| 426 | 439 | unsigned long addr); |
|---|
| 440 | +struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index); |
|---|
| 427 | 441 | extern struct page *read_swap_cache_async(swp_entry_t, gfp_t, |
|---|
| 428 | 442 | struct vm_area_struct *vma, unsigned long addr, |
|---|
| 429 | 443 | bool do_poll); |
|---|
| .. | .. |
|---|
| 464 | 478 | extern void swap_free(swp_entry_t); |
|---|
| 465 | 479 | extern void swapcache_free_entries(swp_entry_t *entries, int n); |
|---|
| 466 | 480 | extern int free_swap_and_cache(swp_entry_t); |
|---|
| 467 | | -extern int swap_type_of(dev_t, sector_t, struct block_device **); |
|---|
| 481 | +int swap_type_of(dev_t device, sector_t offset); |
|---|
| 482 | +int find_first_swap(dev_t *device); |
|---|
| 468 | 483 | extern unsigned int count_swap_pages(int, int); |
|---|
| 469 | 484 | extern sector_t map_swap_page(struct page *, struct block_device **); |
|---|
| 470 | 485 | extern sector_t swapdev_block(int, pgoff_t); |
|---|
| 471 | 486 | extern int page_swapcount(struct page *); |
|---|
| 472 | | -extern int __swap_count(struct swap_info_struct *si, swp_entry_t entry); |
|---|
| 487 | +extern int __swap_count(swp_entry_t entry); |
|---|
| 473 | 488 | extern int __swp_swapcount(swp_entry_t entry); |
|---|
| 474 | 489 | extern int swp_swapcount(swp_entry_t entry); |
|---|
| 475 | 490 | extern struct swap_info_struct *page_swap_info(struct page *); |
|---|
| .. | .. |
|---|
| 479 | 494 | struct backing_dev_info; |
|---|
| 480 | 495 | extern int init_swap_address_space(unsigned int type, unsigned long nr_pages); |
|---|
| 481 | 496 | extern void exit_swap_address_space(unsigned int type); |
|---|
| 497 | +extern struct swap_info_struct *get_swap_device(swp_entry_t entry); |
|---|
| 498 | +sector_t swap_page_sector(struct page *page); |
|---|
| 499 | + |
|---|
| 500 | +static inline void put_swap_device(struct swap_info_struct *si) |
|---|
| 501 | +{ |
|---|
| 502 | + rcu_read_unlock(); |
|---|
| 503 | +} |
|---|
| 482 | 504 | |
|---|
| 483 | 505 | #else /* CONFIG_SWAP */ |
|---|
| 484 | 506 | |
|---|
| .. | .. |
|---|
| 560 | 582 | return NULL; |
|---|
| 561 | 583 | } |
|---|
| 562 | 584 | |
|---|
| 585 | +static inline |
|---|
| 586 | +struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index) |
|---|
| 587 | +{ |
|---|
| 588 | + return find_get_page(mapping, index); |
|---|
| 589 | +} |
|---|
| 590 | + |
|---|
| 563 | 591 | static inline int add_to_swap(struct page *page) |
|---|
| 564 | 592 | { |
|---|
| 565 | 593 | return 0; |
|---|
| 566 | 594 | } |
|---|
| 567 | 595 | |
|---|
| 596 | +static inline void *get_shadow_from_swap_cache(swp_entry_t entry) |
|---|
| 597 | +{ |
|---|
| 598 | + return NULL; |
|---|
| 599 | +} |
|---|
| 600 | + |
|---|
| 568 | 601 | static inline int add_to_swap_cache(struct page *page, swp_entry_t entry, |
|---|
| 569 | | - gfp_t gfp_mask) |
|---|
| 602 | + gfp_t gfp_mask, void **shadowp) |
|---|
| 570 | 603 | { |
|---|
| 571 | 604 | return -1; |
|---|
| 572 | 605 | } |
|---|
| 573 | 606 | |
|---|
| 574 | | -static inline void __delete_from_swap_cache(struct page *page) |
|---|
| 607 | +static inline void __delete_from_swap_cache(struct page *page, |
|---|
| 608 | + swp_entry_t entry, void *shadow) |
|---|
| 575 | 609 | { |
|---|
| 576 | 610 | } |
|---|
| 577 | 611 | |
|---|
| 578 | 612 | static inline void delete_from_swap_cache(struct page *page) |
|---|
| 613 | +{ |
|---|
| 614 | +} |
|---|
| 615 | + |
|---|
| 616 | +static inline void clear_shadow_from_swap_cache(int type, unsigned long begin, |
|---|
| 617 | + unsigned long end) |
|---|
| 579 | 618 | { |
|---|
| 580 | 619 | } |
|---|
| 581 | 620 | |
|---|
| .. | .. |
|---|
| 584 | 623 | return 0; |
|---|
| 585 | 624 | } |
|---|
| 586 | 625 | |
|---|
| 587 | | -static inline int __swap_count(struct swap_info_struct *si, swp_entry_t entry) |
|---|
| 626 | +static inline int __swap_count(swp_entry_t entry) |
|---|
| 588 | 627 | { |
|---|
| 589 | 628 | return 0; |
|---|
| 590 | 629 | } |
|---|
| .. | .. |
|---|
| 633 | 672 | return vm_swappiness; |
|---|
| 634 | 673 | |
|---|
| 635 | 674 | /* root ? */ |
|---|
| 636 | | - if (mem_cgroup_disabled() || !memcg->css.parent) |
|---|
| 675 | + if (mem_cgroup_disabled() || mem_cgroup_is_root(memcg)) |
|---|
| 637 | 676 | return vm_swappiness; |
|---|
| 638 | 677 | |
|---|
| 639 | 678 | return memcg->swappiness; |
|---|
| .. | .. |
|---|
| 646 | 685 | #endif |
|---|
| 647 | 686 | |
|---|
| 648 | 687 | #if defined(CONFIG_SWAP) && defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) |
|---|
| 649 | | -extern void mem_cgroup_throttle_swaprate(struct mem_cgroup *memcg, int node, |
|---|
| 650 | | - gfp_t gfp_mask); |
|---|
| 688 | +extern void __cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask); |
|---|
| 689 | +static inline void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask) |
|---|
| 690 | +{ |
|---|
| 691 | + if (mem_cgroup_disabled()) |
|---|
| 692 | + return; |
|---|
| 693 | + __cgroup_throttle_swaprate(page, gfp_mask); |
|---|
| 694 | +} |
|---|
| 651 | 695 | #else |
|---|
| 652 | | -static inline void mem_cgroup_throttle_swaprate(struct mem_cgroup *memcg, |
|---|
| 653 | | - int node, gfp_t gfp_mask) |
|---|
| 696 | +static inline void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask) |
|---|
| 654 | 697 | { |
|---|
| 655 | 698 | } |
|---|
| 656 | 699 | #endif |
|---|
| 657 | 700 | |
|---|
| 658 | 701 | #ifdef CONFIG_MEMCG_SWAP |
|---|
| 659 | 702 | extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry); |
|---|
| 660 | | -extern int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry); |
|---|
| 661 | | -extern void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages); |
|---|
| 703 | +extern int __mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry); |
|---|
| 704 | +static inline int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry) |
|---|
| 705 | +{ |
|---|
| 706 | + if (mem_cgroup_disabled()) |
|---|
| 707 | + return 0; |
|---|
| 708 | + return __mem_cgroup_try_charge_swap(page, entry); |
|---|
| 709 | +} |
|---|
| 710 | + |
|---|
| 711 | +extern void __mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages); |
|---|
| 712 | +static inline void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages) |
|---|
| 713 | +{ |
|---|
| 714 | + if (mem_cgroup_disabled()) |
|---|
| 715 | + return; |
|---|
| 716 | + __mem_cgroup_uncharge_swap(entry, nr_pages); |
|---|
| 717 | +} |
|---|
| 718 | + |
|---|
| 662 | 719 | extern long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg); |
|---|
| 663 | 720 | extern bool mem_cgroup_swap_full(struct page *page); |
|---|
| 664 | 721 | #else |
|---|