| .. | .. |
|---|
| 39 | 39 | #include <linux/highmem.h> |
|---|
| 40 | 40 | #include <linux/string.h> |
|---|
| 41 | 41 | #include <linux/slab.h> |
|---|
| 42 | +#include <linux/pgtable.h> |
|---|
| 42 | 43 | #include <asm/tlbflush.h> |
|---|
| 43 | | -#include <asm/pgtable.h> |
|---|
| 44 | 44 | #include <linux/cpumask.h> |
|---|
| 45 | 45 | #include <linux/cpu.h> |
|---|
| 46 | 46 | #include <linux/vmalloc.h> |
|---|
| .. | .. |
|---|
| 52 | 52 | #include <linux/zsmalloc.h> |
|---|
| 53 | 53 | #include <linux/zpool.h> |
|---|
| 54 | 54 | #include <linux/mount.h> |
|---|
| 55 | +#include <linux/pseudo_fs.h> |
|---|
| 55 | 56 | #include <linux/migrate.h> |
|---|
| 56 | 57 | #include <linux/wait.h> |
|---|
| 57 | 58 | #include <linux/pagemap.h> |
|---|
| .. | .. |
|---|
| 78 | 79 | |
|---|
| 79 | 80 | /* |
|---|
| 80 | 81 | * Object location (<PFN>, <obj_idx>) is encoded as |
|---|
| 81 | | - * as single (unsigned long) handle value. |
|---|
| 82 | + * a single (unsigned long) handle value. |
|---|
| 82 | 83 | * |
|---|
| 83 | 84 | * Note that object index <obj_idx> starts from 0. |
|---|
| 84 | 85 | * |
|---|
| .. | .. |
|---|
| 292 | 293 | }; |
|---|
| 293 | 294 | |
|---|
| 294 | 295 | struct mapping_area { |
|---|
| 295 | | -#ifdef CONFIG_PGTABLE_MAPPING |
|---|
| 296 | | - struct vm_struct *vm; /* vm area for mapping object that span pages */ |
|---|
| 297 | | -#else |
|---|
| 298 | 296 | char *vm_buf; /* copy buffer for objects that span pages */ |
|---|
| 299 | | -#endif |
|---|
| 300 | 297 | char *vm_addr; /* address of kmap_atomic()'ed pages */ |
|---|
| 301 | 298 | enum zs_mapmode vm_mm; /* mapping mode */ |
|---|
| 302 | 299 | }; |
|---|
| .. | .. |
|---|
| 350 | 347 | static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp) |
|---|
| 351 | 348 | { |
|---|
| 352 | 349 | return (unsigned long)kmem_cache_alloc(pool->handle_cachep, |
|---|
| 353 | | - gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE)); |
|---|
| 350 | + gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE|__GFP_CMA)); |
|---|
| 354 | 351 | } |
|---|
| 355 | 352 | |
|---|
| 356 | 353 | static void cache_free_handle(struct zs_pool *pool, unsigned long handle) |
|---|
| .. | .. |
|---|
| 361 | 358 | static struct zspage *cache_alloc_zspage(struct zs_pool *pool, gfp_t flags) |
|---|
| 362 | 359 | { |
|---|
| 363 | 360 | return kmem_cache_alloc(pool->zspage_cachep, |
|---|
| 364 | | - flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE)); |
|---|
| 361 | + flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE|__GFP_CMA)); |
|---|
| 365 | 362 | } |
|---|
| 366 | 363 | |
|---|
| 367 | 364 | static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage) |
|---|
| .. | .. |
|---|
| 423 | 420 | case ZPOOL_MM_WO: |
|---|
| 424 | 421 | zs_mm = ZS_MM_WO; |
|---|
| 425 | 422 | break; |
|---|
| 426 | | - case ZPOOL_MM_RW: /* fallthru */ |
|---|
| 423 | + case ZPOOL_MM_RW: |
|---|
| 427 | 424 | default: |
|---|
| 428 | 425 | zs_mm = ZS_MM_RW; |
|---|
| 429 | 426 | break; |
|---|
| .. | .. |
|---|
| 442 | 439 | } |
|---|
| 443 | 440 | |
|---|
| 444 | 441 | static struct zpool_driver zs_zpool_driver = { |
|---|
| 445 | | - .type = "zsmalloc", |
|---|
| 446 | | - .owner = THIS_MODULE, |
|---|
| 447 | | - .create = zs_zpool_create, |
|---|
| 448 | | - .destroy = zs_zpool_destroy, |
|---|
| 449 | | - .malloc = zs_zpool_malloc, |
|---|
| 450 | | - .free = zs_zpool_free, |
|---|
| 451 | | - .map = zs_zpool_map, |
|---|
| 452 | | - .unmap = zs_zpool_unmap, |
|---|
| 453 | | - .total_size = zs_zpool_total_size, |
|---|
| 442 | + .type = "zsmalloc", |
|---|
| 443 | + .owner = THIS_MODULE, |
|---|
| 444 | + .create = zs_zpool_create, |
|---|
| 445 | + .destroy = zs_zpool_destroy, |
|---|
| 446 | + .malloc_support_movable = true, |
|---|
| 447 | + .malloc = zs_zpool_malloc, |
|---|
| 448 | + .free = zs_zpool_free, |
|---|
| 449 | + .map = zs_zpool_map, |
|---|
| 450 | + .unmap = zs_zpool_unmap, |
|---|
| 451 | + .total_size = zs_zpool_total_size, |
|---|
| 454 | 452 | }; |
|---|
| 455 | 453 | |
|---|
| 456 | 454 | MODULE_ALIAS("zpool-zsmalloc"); |
|---|
| .. | .. |
|---|
| 475 | 473 | return zspage->inuse; |
|---|
| 476 | 474 | } |
|---|
| 477 | 475 | |
|---|
| 478 | | -static inline void set_zspage_inuse(struct zspage *zspage, int val) |
|---|
| 479 | | -{ |
|---|
| 480 | | - zspage->inuse = val; |
|---|
| 481 | | -} |
|---|
| 482 | 476 | |
|---|
| 483 | 477 | static inline void mod_zspage_inuse(struct zspage *zspage, int val) |
|---|
| 484 | 478 | { |
|---|
| .. | .. |
|---|
| 580 | 574 | } |
|---|
| 581 | 575 | |
|---|
| 582 | 576 | zs_stat_root = debugfs_create_dir("zsmalloc", NULL); |
|---|
| 583 | | - if (!zs_stat_root) |
|---|
| 584 | | - pr_warn("debugfs 'zsmalloc' stat dir creation failed\n"); |
|---|
| 585 | 577 | } |
|---|
| 586 | 578 | |
|---|
| 587 | 579 | static void __exit zs_stat_exit(void) |
|---|
| .. | .. |
|---|
| 652 | 644 | |
|---|
| 653 | 645 | static void zs_pool_stat_create(struct zs_pool *pool, const char *name) |
|---|
| 654 | 646 | { |
|---|
| 655 | | - struct dentry *entry; |
|---|
| 656 | | - |
|---|
| 657 | 647 | if (!zs_stat_root) { |
|---|
| 658 | 648 | pr_warn("no root stat dir, not creating <%s> stat dir\n", name); |
|---|
| 659 | 649 | return; |
|---|
| 660 | 650 | } |
|---|
| 661 | 651 | |
|---|
| 662 | | - entry = debugfs_create_dir(name, zs_stat_root); |
|---|
| 663 | | - if (!entry) { |
|---|
| 664 | | - pr_warn("debugfs dir <%s> creation failed\n", name); |
|---|
| 665 | | - return; |
|---|
| 666 | | - } |
|---|
| 667 | | - pool->stat_dentry = entry; |
|---|
| 652 | + pool->stat_dentry = debugfs_create_dir(name, zs_stat_root); |
|---|
| 668 | 653 | |
|---|
| 669 | | - entry = debugfs_create_file("classes", S_IFREG | 0444, |
|---|
| 670 | | - pool->stat_dentry, pool, |
|---|
| 671 | | - &zs_stats_size_fops); |
|---|
| 672 | | - if (!entry) { |
|---|
| 673 | | - pr_warn("%s: debugfs file entry <%s> creation failed\n", |
|---|
| 674 | | - name, "classes"); |
|---|
| 675 | | - debugfs_remove_recursive(pool->stat_dentry); |
|---|
| 676 | | - pool->stat_dentry = NULL; |
|---|
| 677 | | - } |
|---|
| 654 | + debugfs_create_file("classes", S_IFREG | 0444, pool->stat_dentry, pool, |
|---|
| 655 | + &zs_stats_size_fops); |
|---|
| 678 | 656 | } |
|---|
| 679 | 657 | |
|---|
| 680 | 658 | static void zs_pool_stat_destroy(struct zs_pool *pool) |
|---|
| .. | .. |
|---|
| 909 | 887 | return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle); |
|---|
| 910 | 888 | } |
|---|
| 911 | 889 | |
|---|
| 912 | | -static void pin_tag(unsigned long handle) |
|---|
| 890 | +static void pin_tag(unsigned long handle) __acquires(bitlock) |
|---|
| 913 | 891 | { |
|---|
| 914 | 892 | bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle); |
|---|
| 915 | 893 | } |
|---|
| 916 | 894 | |
|---|
| 917 | | -static void unpin_tag(unsigned long handle) |
|---|
| 895 | +static void unpin_tag(unsigned long handle) __releases(bitlock) |
|---|
| 918 | 896 | { |
|---|
| 919 | 897 | bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle); |
|---|
| 920 | 898 | } |
|---|
| .. | .. |
|---|
| 1131 | 1109 | return zspage; |
|---|
| 1132 | 1110 | } |
|---|
| 1133 | 1111 | |
|---|
| 1134 | | -#ifdef CONFIG_PGTABLE_MAPPING |
|---|
| 1135 | | -static inline int __zs_cpu_up(struct mapping_area *area) |
|---|
| 1136 | | -{ |
|---|
| 1137 | | - /* |
|---|
| 1138 | | - * Make sure we don't leak memory if a cpu UP notification |
|---|
| 1139 | | - * and zs_init() race and both call zs_cpu_up() on the same cpu |
|---|
| 1140 | | - */ |
|---|
| 1141 | | - if (area->vm) |
|---|
| 1142 | | - return 0; |
|---|
| 1143 | | - area->vm = alloc_vm_area(PAGE_SIZE * 2, NULL); |
|---|
| 1144 | | - if (!area->vm) |
|---|
| 1145 | | - return -ENOMEM; |
|---|
| 1146 | | - return 0; |
|---|
| 1147 | | -} |
|---|
| 1148 | | - |
|---|
| 1149 | | -static inline void __zs_cpu_down(struct mapping_area *area) |
|---|
| 1150 | | -{ |
|---|
| 1151 | | - if (area->vm) |
|---|
| 1152 | | - free_vm_area(area->vm); |
|---|
| 1153 | | - area->vm = NULL; |
|---|
| 1154 | | -} |
|---|
| 1155 | | - |
|---|
| 1156 | | -static inline void *__zs_map_object(struct mapping_area *area, |
|---|
| 1157 | | - struct page *pages[2], int off, int size) |
|---|
| 1158 | | -{ |
|---|
| 1159 | | - BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, pages)); |
|---|
| 1160 | | - area->vm_addr = area->vm->addr; |
|---|
| 1161 | | - return area->vm_addr + off; |
|---|
| 1162 | | -} |
|---|
| 1163 | | - |
|---|
| 1164 | | -static inline void __zs_unmap_object(struct mapping_area *area, |
|---|
| 1165 | | - struct page *pages[2], int off, int size) |
|---|
| 1166 | | -{ |
|---|
| 1167 | | - unsigned long addr = (unsigned long)area->vm_addr; |
|---|
| 1168 | | - |
|---|
| 1169 | | - unmap_kernel_range(addr, PAGE_SIZE * 2); |
|---|
| 1170 | | -} |
|---|
| 1171 | | - |
|---|
| 1172 | | -#else /* CONFIG_PGTABLE_MAPPING */ |
|---|
| 1173 | | - |
|---|
| 1174 | 1112 | static inline int __zs_cpu_up(struct mapping_area *area) |
|---|
| 1175 | 1113 | { |
|---|
| 1176 | 1114 | /* |
|---|
| .. | .. |
|---|
| 1250 | 1188 | /* enable page faults to match kunmap_atomic() return conditions */ |
|---|
| 1251 | 1189 | pagefault_enable(); |
|---|
| 1252 | 1190 | } |
|---|
| 1253 | | - |
|---|
| 1254 | | -#endif /* CONFIG_PGTABLE_MAPPING */ |
|---|
| 1255 | 1191 | |
|---|
| 1256 | 1192 | static int zs_cpu_prepare(unsigned int cpu) |
|---|
| 1257 | 1193 | { |
|---|
| .. | .. |
|---|
| 1812 | 1748 | */ |
|---|
| 1813 | 1749 | static void lock_zspage(struct zspage *zspage) |
|---|
| 1814 | 1750 | { |
|---|
| 1815 | | - struct page *page = get_first_page(zspage); |
|---|
| 1751 | + struct page *curr_page, *page; |
|---|
| 1816 | 1752 | |
|---|
| 1817 | | - do { |
|---|
| 1818 | | - lock_page(page); |
|---|
| 1819 | | - } while ((page = get_next_page(page)) != NULL); |
|---|
| 1753 | + /* |
|---|
| 1754 | + * Pages we haven't locked yet can be migrated off the list while we're |
|---|
| 1755 | + * trying to lock them, so we need to be careful and only attempt to |
|---|
| 1756 | + * lock each page under migrate_read_lock(). Otherwise, the page we lock |
|---|
| 1757 | + * may no longer belong to the zspage. This means that we may wait for |
|---|
| 1758 | + * the wrong page to unlock, so we must take a reference to the page |
|---|
| 1759 | + * prior to waiting for it to unlock outside migrate_read_lock(). |
|---|
| 1760 | + */ |
|---|
| 1761 | + while (1) { |
|---|
| 1762 | + migrate_read_lock(zspage); |
|---|
| 1763 | + page = get_first_page(zspage); |
|---|
| 1764 | + if (trylock_page(page)) |
|---|
| 1765 | + break; |
|---|
| 1766 | + get_page(page); |
|---|
| 1767 | + migrate_read_unlock(zspage); |
|---|
| 1768 | + wait_on_page_locked(page); |
|---|
| 1769 | + put_page(page); |
|---|
| 1770 | + } |
|---|
| 1771 | + |
|---|
| 1772 | + curr_page = page; |
|---|
| 1773 | + while ((page = get_next_page(curr_page))) { |
|---|
| 1774 | + if (trylock_page(page)) { |
|---|
| 1775 | + curr_page = page; |
|---|
| 1776 | + } else { |
|---|
| 1777 | + get_page(page); |
|---|
| 1778 | + migrate_read_unlock(zspage); |
|---|
| 1779 | + wait_on_page_locked(page); |
|---|
| 1780 | + put_page(page); |
|---|
| 1781 | + migrate_read_lock(zspage); |
|---|
| 1782 | + } |
|---|
| 1783 | + } |
|---|
| 1784 | + migrate_read_unlock(zspage); |
|---|
| 1820 | 1785 | } |
|---|
| 1821 | 1786 | |
|---|
| 1822 | | -static struct dentry *zs_mount(struct file_system_type *fs_type, |
|---|
| 1823 | | - int flags, const char *dev_name, void *data) |
|---|
| 1787 | +static int zs_init_fs_context(struct fs_context *fc) |
|---|
| 1824 | 1788 | { |
|---|
| 1825 | | - static const struct dentry_operations ops = { |
|---|
| 1826 | | - .d_dname = simple_dname, |
|---|
| 1827 | | - }; |
|---|
| 1828 | | - |
|---|
| 1829 | | - return mount_pseudo(fs_type, "zsmalloc:", NULL, &ops, ZSMALLOC_MAGIC); |
|---|
| 1789 | + return init_pseudo(fc, ZSMALLOC_MAGIC) ? 0 : -ENOMEM; |
|---|
| 1830 | 1790 | } |
|---|
| 1831 | 1791 | |
|---|
| 1832 | 1792 | static struct file_system_type zsmalloc_fs = { |
|---|
| 1833 | 1793 | .name = "zsmalloc", |
|---|
| 1834 | | - .mount = zs_mount, |
|---|
| 1794 | + .init_fs_context = zs_init_fs_context, |
|---|
| 1835 | 1795 | .kill_sb = kill_anon_super, |
|---|
| 1836 | 1796 | }; |
|---|
| 1837 | 1797 | |
|---|
| .. | .. |
|---|
| 1856 | 1816 | rwlock_init(&zspage->lock); |
|---|
| 1857 | 1817 | } |
|---|
| 1858 | 1818 | |
|---|
| 1859 | | -static void migrate_read_lock(struct zspage *zspage) |
|---|
| 1819 | +static void migrate_read_lock(struct zspage *zspage) __acquires(&zspage->lock) |
|---|
| 1860 | 1820 | { |
|---|
| 1861 | 1821 | read_lock(&zspage->lock); |
|---|
| 1862 | 1822 | } |
|---|
| 1863 | 1823 | |
|---|
| 1864 | | -static void migrate_read_unlock(struct zspage *zspage) |
|---|
| 1824 | +static void migrate_read_unlock(struct zspage *zspage) __releases(&zspage->lock) |
|---|
| 1865 | 1825 | { |
|---|
| 1866 | 1826 | read_unlock(&zspage->lock); |
|---|
| 1867 | 1827 | } |
|---|