.. | .. |
---|
39 | 39 | #include <linux/highmem.h> |
---|
40 | 40 | #include <linux/string.h> |
---|
41 | 41 | #include <linux/slab.h> |
---|
| 42 | +#include <linux/pgtable.h> |
---|
42 | 43 | #include <asm/tlbflush.h> |
---|
43 | | -#include <asm/pgtable.h> |
---|
44 | 44 | #include <linux/cpumask.h> |
---|
45 | 45 | #include <linux/cpu.h> |
---|
46 | 46 | #include <linux/vmalloc.h> |
---|
.. | .. |
---|
52 | 52 | #include <linux/zsmalloc.h> |
---|
53 | 53 | #include <linux/zpool.h> |
---|
54 | 54 | #include <linux/mount.h> |
---|
| 55 | +#include <linux/pseudo_fs.h> |
---|
55 | 56 | #include <linux/migrate.h> |
---|
56 | 57 | #include <linux/wait.h> |
---|
57 | 58 | #include <linux/pagemap.h> |
---|
.. | .. |
---|
78 | 79 | |
---|
79 | 80 | /* |
---|
80 | 81 | * Object location (<PFN>, <obj_idx>) is encoded as |
---|
81 | | - * as single (unsigned long) handle value. |
---|
| 82 | + * a single (unsigned long) handle value. |
---|
82 | 83 | * |
---|
83 | 84 | * Note that object index <obj_idx> starts from 0. |
---|
84 | 85 | * |
---|
.. | .. |
---|
292 | 293 | }; |
---|
293 | 294 | |
---|
294 | 295 | struct mapping_area { |
---|
295 | | -#ifdef CONFIG_PGTABLE_MAPPING |
---|
296 | | - struct vm_struct *vm; /* vm area for mapping object that span pages */ |
---|
297 | | -#else |
---|
298 | 296 | char *vm_buf; /* copy buffer for objects that span pages */ |
---|
299 | | -#endif |
---|
300 | 297 | char *vm_addr; /* address of kmap_atomic()'ed pages */ |
---|
301 | 298 | enum zs_mapmode vm_mm; /* mapping mode */ |
---|
302 | 299 | }; |
---|
.. | .. |
---|
350 | 347 | static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp) |
---|
351 | 348 | { |
---|
352 | 349 | return (unsigned long)kmem_cache_alloc(pool->handle_cachep, |
---|
353 | | - gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE)); |
---|
| 350 | + gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE|__GFP_CMA)); |
---|
354 | 351 | } |
---|
355 | 352 | |
---|
356 | 353 | static void cache_free_handle(struct zs_pool *pool, unsigned long handle) |
---|
.. | .. |
---|
361 | 358 | static struct zspage *cache_alloc_zspage(struct zs_pool *pool, gfp_t flags) |
---|
362 | 359 | { |
---|
363 | 360 | return kmem_cache_alloc(pool->zspage_cachep, |
---|
364 | | - flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE)); |
---|
| 361 | + flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE|__GFP_CMA)); |
---|
365 | 362 | } |
---|
366 | 363 | |
---|
367 | 364 | static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage) |
---|
.. | .. |
---|
423 | 420 | case ZPOOL_MM_WO: |
---|
424 | 421 | zs_mm = ZS_MM_WO; |
---|
425 | 422 | break; |
---|
426 | | - case ZPOOL_MM_RW: /* fallthru */ |
---|
| 423 | + case ZPOOL_MM_RW: |
---|
427 | 424 | default: |
---|
428 | 425 | zs_mm = ZS_MM_RW; |
---|
429 | 426 | break; |
---|
.. | .. |
---|
442 | 439 | } |
---|
443 | 440 | |
---|
444 | 441 | static struct zpool_driver zs_zpool_driver = { |
---|
445 | | - .type = "zsmalloc", |
---|
446 | | - .owner = THIS_MODULE, |
---|
447 | | - .create = zs_zpool_create, |
---|
448 | | - .destroy = zs_zpool_destroy, |
---|
449 | | - .malloc = zs_zpool_malloc, |
---|
450 | | - .free = zs_zpool_free, |
---|
451 | | - .map = zs_zpool_map, |
---|
452 | | - .unmap = zs_zpool_unmap, |
---|
453 | | - .total_size = zs_zpool_total_size, |
---|
| 442 | + .type = "zsmalloc", |
---|
| 443 | + .owner = THIS_MODULE, |
---|
| 444 | + .create = zs_zpool_create, |
---|
| 445 | + .destroy = zs_zpool_destroy, |
---|
| 446 | + .malloc_support_movable = true, |
---|
| 447 | + .malloc = zs_zpool_malloc, |
---|
| 448 | + .free = zs_zpool_free, |
---|
| 449 | + .map = zs_zpool_map, |
---|
| 450 | + .unmap = zs_zpool_unmap, |
---|
| 451 | + .total_size = zs_zpool_total_size, |
---|
454 | 452 | }; |
---|
455 | 453 | |
---|
456 | 454 | MODULE_ALIAS("zpool-zsmalloc"); |
---|
.. | .. |
---|
475 | 473 | return zspage->inuse; |
---|
476 | 474 | } |
---|
477 | 475 | |
---|
478 | | -static inline void set_zspage_inuse(struct zspage *zspage, int val) |
---|
479 | | -{ |
---|
480 | | - zspage->inuse = val; |
---|
481 | | -} |
---|
482 | 476 | |
---|
483 | 477 | static inline void mod_zspage_inuse(struct zspage *zspage, int val) |
---|
484 | 478 | { |
---|
.. | .. |
---|
580 | 574 | } |
---|
581 | 575 | |
---|
582 | 576 | zs_stat_root = debugfs_create_dir("zsmalloc", NULL); |
---|
583 | | - if (!zs_stat_root) |
---|
584 | | - pr_warn("debugfs 'zsmalloc' stat dir creation failed\n"); |
---|
585 | 577 | } |
---|
586 | 578 | |
---|
587 | 579 | static void __exit zs_stat_exit(void) |
---|
.. | .. |
---|
652 | 644 | |
---|
653 | 645 | static void zs_pool_stat_create(struct zs_pool *pool, const char *name) |
---|
654 | 646 | { |
---|
655 | | - struct dentry *entry; |
---|
656 | | - |
---|
657 | 647 | if (!zs_stat_root) { |
---|
658 | 648 | pr_warn("no root stat dir, not creating <%s> stat dir\n", name); |
---|
659 | 649 | return; |
---|
660 | 650 | } |
---|
661 | 651 | |
---|
662 | | - entry = debugfs_create_dir(name, zs_stat_root); |
---|
663 | | - if (!entry) { |
---|
664 | | - pr_warn("debugfs dir <%s> creation failed\n", name); |
---|
665 | | - return; |
---|
666 | | - } |
---|
667 | | - pool->stat_dentry = entry; |
---|
| 652 | + pool->stat_dentry = debugfs_create_dir(name, zs_stat_root); |
---|
668 | 653 | |
---|
669 | | - entry = debugfs_create_file("classes", S_IFREG | 0444, |
---|
670 | | - pool->stat_dentry, pool, |
---|
671 | | - &zs_stats_size_fops); |
---|
672 | | - if (!entry) { |
---|
673 | | - pr_warn("%s: debugfs file entry <%s> creation failed\n", |
---|
674 | | - name, "classes"); |
---|
675 | | - debugfs_remove_recursive(pool->stat_dentry); |
---|
676 | | - pool->stat_dentry = NULL; |
---|
677 | | - } |
---|
| 654 | + debugfs_create_file("classes", S_IFREG | 0444, pool->stat_dentry, pool, |
---|
| 655 | + &zs_stats_size_fops); |
---|
678 | 656 | } |
---|
679 | 657 | |
---|
680 | 658 | static void zs_pool_stat_destroy(struct zs_pool *pool) |
---|
.. | .. |
---|
909 | 887 | return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle); |
---|
910 | 888 | } |
---|
911 | 889 | |
---|
912 | | -static void pin_tag(unsigned long handle) |
---|
| 890 | +static void pin_tag(unsigned long handle) __acquires(bitlock) |
---|
913 | 891 | { |
---|
914 | 892 | bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle); |
---|
915 | 893 | } |
---|
916 | 894 | |
---|
917 | | -static void unpin_tag(unsigned long handle) |
---|
| 895 | +static void unpin_tag(unsigned long handle) __releases(bitlock) |
---|
918 | 896 | { |
---|
919 | 897 | bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle); |
---|
920 | 898 | } |
---|
.. | .. |
---|
1131 | 1109 | return zspage; |
---|
1132 | 1110 | } |
---|
1133 | 1111 | |
---|
1134 | | -#ifdef CONFIG_PGTABLE_MAPPING |
---|
1135 | | -static inline int __zs_cpu_up(struct mapping_area *area) |
---|
1136 | | -{ |
---|
1137 | | - /* |
---|
1138 | | - * Make sure we don't leak memory if a cpu UP notification |
---|
1139 | | - * and zs_init() race and both call zs_cpu_up() on the same cpu |
---|
1140 | | - */ |
---|
1141 | | - if (area->vm) |
---|
1142 | | - return 0; |
---|
1143 | | - area->vm = alloc_vm_area(PAGE_SIZE * 2, NULL); |
---|
1144 | | - if (!area->vm) |
---|
1145 | | - return -ENOMEM; |
---|
1146 | | - return 0; |
---|
1147 | | -} |
---|
1148 | | - |
---|
1149 | | -static inline void __zs_cpu_down(struct mapping_area *area) |
---|
1150 | | -{ |
---|
1151 | | - if (area->vm) |
---|
1152 | | - free_vm_area(area->vm); |
---|
1153 | | - area->vm = NULL; |
---|
1154 | | -} |
---|
1155 | | - |
---|
1156 | | -static inline void *__zs_map_object(struct mapping_area *area, |
---|
1157 | | - struct page *pages[2], int off, int size) |
---|
1158 | | -{ |
---|
1159 | | - BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, pages)); |
---|
1160 | | - area->vm_addr = area->vm->addr; |
---|
1161 | | - return area->vm_addr + off; |
---|
1162 | | -} |
---|
1163 | | - |
---|
1164 | | -static inline void __zs_unmap_object(struct mapping_area *area, |
---|
1165 | | - struct page *pages[2], int off, int size) |
---|
1166 | | -{ |
---|
1167 | | - unsigned long addr = (unsigned long)area->vm_addr; |
---|
1168 | | - |
---|
1169 | | - unmap_kernel_range(addr, PAGE_SIZE * 2); |
---|
1170 | | -} |
---|
1171 | | - |
---|
1172 | | -#else /* CONFIG_PGTABLE_MAPPING */ |
---|
1173 | | - |
---|
1174 | 1112 | static inline int __zs_cpu_up(struct mapping_area *area) |
---|
1175 | 1113 | { |
---|
1176 | 1114 | /* |
---|
.. | .. |
---|
1250 | 1188 | /* enable page faults to match kunmap_atomic() return conditions */ |
---|
1251 | 1189 | pagefault_enable(); |
---|
1252 | 1190 | } |
---|
1253 | | - |
---|
1254 | | -#endif /* CONFIG_PGTABLE_MAPPING */ |
---|
1255 | 1191 | |
---|
1256 | 1192 | static int zs_cpu_prepare(unsigned int cpu) |
---|
1257 | 1193 | { |
---|
.. | .. |
---|
1812 | 1748 | */ |
---|
1813 | 1749 | static void lock_zspage(struct zspage *zspage) |
---|
1814 | 1750 | { |
---|
1815 | | - struct page *page = get_first_page(zspage); |
---|
| 1751 | + struct page *curr_page, *page; |
---|
1816 | 1752 | |
---|
1817 | | - do { |
---|
1818 | | - lock_page(page); |
---|
1819 | | - } while ((page = get_next_page(page)) != NULL); |
---|
| 1753 | + /* |
---|
| 1754 | + * Pages we haven't locked yet can be migrated off the list while we're |
---|
| 1755 | + * trying to lock them, so we need to be careful and only attempt to |
---|
| 1756 | + * lock each page under migrate_read_lock(). Otherwise, the page we lock |
---|
| 1757 | + * may no longer belong to the zspage. This means that we may wait for |
---|
| 1758 | + * the wrong page to unlock, so we must take a reference to the page |
---|
| 1759 | + * prior to waiting for it to unlock outside migrate_read_lock(). |
---|
| 1760 | + */ |
---|
| 1761 | + while (1) { |
---|
| 1762 | + migrate_read_lock(zspage); |
---|
| 1763 | + page = get_first_page(zspage); |
---|
| 1764 | + if (trylock_page(page)) |
---|
| 1765 | + break; |
---|
| 1766 | + get_page(page); |
---|
| 1767 | + migrate_read_unlock(zspage); |
---|
| 1768 | + wait_on_page_locked(page); |
---|
| 1769 | + put_page(page); |
---|
| 1770 | + } |
---|
| 1771 | + |
---|
| 1772 | + curr_page = page; |
---|
| 1773 | + while ((page = get_next_page(curr_page))) { |
---|
| 1774 | + if (trylock_page(page)) { |
---|
| 1775 | + curr_page = page; |
---|
| 1776 | + } else { |
---|
| 1777 | + get_page(page); |
---|
| 1778 | + migrate_read_unlock(zspage); |
---|
| 1779 | + wait_on_page_locked(page); |
---|
| 1780 | + put_page(page); |
---|
| 1781 | + migrate_read_lock(zspage); |
---|
| 1782 | + } |
---|
| 1783 | + } |
---|
| 1784 | + migrate_read_unlock(zspage); |
---|
1820 | 1785 | } |
---|
1821 | 1786 | |
---|
1822 | | -static struct dentry *zs_mount(struct file_system_type *fs_type, |
---|
1823 | | - int flags, const char *dev_name, void *data) |
---|
| 1787 | +static int zs_init_fs_context(struct fs_context *fc) |
---|
1824 | 1788 | { |
---|
1825 | | - static const struct dentry_operations ops = { |
---|
1826 | | - .d_dname = simple_dname, |
---|
1827 | | - }; |
---|
1828 | | - |
---|
1829 | | - return mount_pseudo(fs_type, "zsmalloc:", NULL, &ops, ZSMALLOC_MAGIC); |
---|
| 1789 | + return init_pseudo(fc, ZSMALLOC_MAGIC) ? 0 : -ENOMEM; |
---|
1830 | 1790 | } |
---|
1831 | 1791 | |
---|
1832 | 1792 | static struct file_system_type zsmalloc_fs = { |
---|
1833 | 1793 | .name = "zsmalloc", |
---|
1834 | | - .mount = zs_mount, |
---|
| 1794 | + .init_fs_context = zs_init_fs_context, |
---|
1835 | 1795 | .kill_sb = kill_anon_super, |
---|
1836 | 1796 | }; |
---|
1837 | 1797 | |
---|
.. | .. |
---|
1856 | 1816 | rwlock_init(&zspage->lock); |
---|
1857 | 1817 | } |
---|
1858 | 1818 | |
---|
1859 | | -static void migrate_read_lock(struct zspage *zspage) |
---|
| 1819 | +static void migrate_read_lock(struct zspage *zspage) __acquires(&zspage->lock) |
---|
1860 | 1820 | { |
---|
1861 | 1821 | read_lock(&zspage->lock); |
---|
1862 | 1822 | } |
---|
1863 | 1823 | |
---|
1864 | | -static void migrate_read_unlock(struct zspage *zspage) |
---|
| 1824 | +static void migrate_read_unlock(struct zspage *zspage) __releases(&zspage->lock) |
---|
1865 | 1825 | { |
---|
1866 | 1826 | read_unlock(&zspage->lock); |
---|
1867 | 1827 | } |
---|