.. | .. |
---|
39 | 39 | #include <linux/highmem.h> |
---|
40 | 40 | #include <linux/string.h> |
---|
41 | 41 | #include <linux/slab.h> |
---|
| 42 | +#include <linux/pgtable.h> |
---|
42 | 43 | #include <asm/tlbflush.h> |
---|
43 | | -#include <asm/pgtable.h> |
---|
44 | 44 | #include <linux/cpumask.h> |
---|
45 | 45 | #include <linux/cpu.h> |
---|
46 | 46 | #include <linux/vmalloc.h> |
---|
.. | .. |
---|
52 | 52 | #include <linux/zsmalloc.h> |
---|
53 | 53 | #include <linux/zpool.h> |
---|
54 | 54 | #include <linux/mount.h> |
---|
| 55 | +#include <linux/pseudo_fs.h> |
---|
55 | 56 | #include <linux/migrate.h> |
---|
56 | 57 | #include <linux/wait.h> |
---|
57 | 58 | #include <linux/pagemap.h> |
---|
58 | 59 | #include <linux/fs.h> |
---|
59 | | -#include <linux/locallock.h> |
---|
60 | 60 | |
---|
61 | 61 | #define ZSPAGE_MAGIC 0x58 |
---|
62 | 62 | |
---|
.. | .. |
---|
74 | 74 | */ |
---|
75 | 75 | #define ZS_MAX_ZSPAGE_ORDER 2 |
---|
76 | 76 | #define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER) |
---|
| 77 | + |
---|
77 | 78 | #define ZS_HANDLE_SIZE (sizeof(unsigned long)) |
---|
78 | | - |
---|
79 | | -#ifdef CONFIG_PREEMPT_RT_FULL |
---|
80 | | - |
---|
81 | | -struct zsmalloc_handle { |
---|
82 | | - unsigned long addr; |
---|
83 | | - struct mutex lock; |
---|
84 | | -}; |
---|
85 | | - |
---|
86 | | -#define ZS_HANDLE_ALLOC_SIZE (sizeof(struct zsmalloc_handle)) |
---|
87 | | - |
---|
88 | | -#else |
---|
89 | | - |
---|
90 | | -#define ZS_HANDLE_ALLOC_SIZE (sizeof(unsigned long)) |
---|
91 | | -#endif |
---|
92 | 79 | |
---|
93 | 80 | /* |
---|
94 | 81 | * Object location (<PFN>, <obj_idx>) is encoded as |
---|
95 | | - * as single (unsigned long) handle value. |
---|
| 82 | + * a single (unsigned long) handle value. |
---|
96 | 83 | * |
---|
97 | 84 | * Note that object index <obj_idx> starts from 0. |
---|
98 | 85 | * |
---|
.. | .. |
---|
306 | 293 | }; |
---|
307 | 294 | |
---|
308 | 295 | struct mapping_area { |
---|
309 | | -#ifdef CONFIG_PGTABLE_MAPPING |
---|
310 | | - struct vm_struct *vm; /* vm area for mapping object that span pages */ |
---|
311 | | -#else |
---|
312 | 296 | char *vm_buf; /* copy buffer for objects that span pages */ |
---|
313 | | -#endif |
---|
314 | 297 | char *vm_addr; /* address of kmap_atomic()'ed pages */ |
---|
315 | 298 | enum zs_mapmode vm_mm; /* mapping mode */ |
---|
316 | 299 | }; |
---|
.. | .. |
---|
339 | 322 | |
---|
340 | 323 | static int create_cache(struct zs_pool *pool) |
---|
341 | 324 | { |
---|
342 | | - pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_ALLOC_SIZE, |
---|
| 325 | + pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE, |
---|
343 | 326 | 0, 0, NULL); |
---|
344 | 327 | if (!pool->handle_cachep) |
---|
345 | 328 | return 1; |
---|
.. | .. |
---|
363 | 346 | |
---|
364 | 347 | static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp) |
---|
365 | 348 | { |
---|
366 | | - void *p; |
---|
367 | | - |
---|
368 | | - p = kmem_cache_alloc(pool->handle_cachep, |
---|
369 | | - gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE)); |
---|
370 | | -#ifdef CONFIG_PREEMPT_RT_FULL |
---|
371 | | - if (p) { |
---|
372 | | - struct zsmalloc_handle *zh = p; |
---|
373 | | - |
---|
374 | | - mutex_init(&zh->lock); |
---|
375 | | - } |
---|
376 | | -#endif |
---|
377 | | - return (unsigned long)p; |
---|
| 349 | + return (unsigned long)kmem_cache_alloc(pool->handle_cachep, |
---|
| 350 | + gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE|__GFP_CMA)); |
---|
378 | 351 | } |
---|
379 | | - |
---|
380 | | -#ifdef CONFIG_PREEMPT_RT_FULL |
---|
381 | | -static struct zsmalloc_handle *zs_get_pure_handle(unsigned long handle) |
---|
382 | | -{ |
---|
383 | | - return (void *)(handle &~((1 << OBJ_TAG_BITS) - 1)); |
---|
384 | | -} |
---|
385 | | -#endif |
---|
386 | 352 | |
---|
387 | 353 | static void cache_free_handle(struct zs_pool *pool, unsigned long handle) |
---|
388 | 354 | { |
---|
.. | .. |
---|
392 | 358 | static struct zspage *cache_alloc_zspage(struct zs_pool *pool, gfp_t flags) |
---|
393 | 359 | { |
---|
394 | 360 | return kmem_cache_alloc(pool->zspage_cachep, |
---|
395 | | - flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE)); |
---|
| 361 | + flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE|__GFP_CMA)); |
---|
396 | 362 | } |
---|
397 | 363 | |
---|
398 | 364 | static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage) |
---|
.. | .. |
---|
402 | 368 | |
---|
403 | 369 | static void record_obj(unsigned long handle, unsigned long obj) |
---|
404 | 370 | { |
---|
405 | | -#ifdef CONFIG_PREEMPT_RT_FULL |
---|
406 | | - struct zsmalloc_handle *zh = zs_get_pure_handle(handle); |
---|
407 | | - |
---|
408 | | - WRITE_ONCE(zh->addr, obj); |
---|
409 | | -#else |
---|
410 | 371 | /* |
---|
411 | 372 | * lsb of @obj represents handle lock while other bits |
---|
412 | 373 | * represent object value the handle is pointing so |
---|
413 | 374 | * updating shouldn't do store tearing. |
---|
414 | 375 | */ |
---|
415 | 376 | WRITE_ONCE(*(unsigned long *)handle, obj); |
---|
416 | | -#endif |
---|
417 | 377 | } |
---|
418 | 378 | |
---|
419 | 379 | /* zpool driver */ |
---|
.. | .. |
---|
460 | 420 | case ZPOOL_MM_WO: |
---|
461 | 421 | zs_mm = ZS_MM_WO; |
---|
462 | 422 | break; |
---|
463 | | - case ZPOOL_MM_RW: /* fallthru */ |
---|
| 423 | + case ZPOOL_MM_RW: |
---|
464 | 424 | default: |
---|
465 | 425 | zs_mm = ZS_MM_RW; |
---|
466 | 426 | break; |
---|
.. | .. |
---|
479 | 439 | } |
---|
480 | 440 | |
---|
481 | 441 | static struct zpool_driver zs_zpool_driver = { |
---|
482 | | - .type = "zsmalloc", |
---|
483 | | - .owner = THIS_MODULE, |
---|
484 | | - .create = zs_zpool_create, |
---|
485 | | - .destroy = zs_zpool_destroy, |
---|
486 | | - .malloc = zs_zpool_malloc, |
---|
487 | | - .free = zs_zpool_free, |
---|
488 | | - .map = zs_zpool_map, |
---|
489 | | - .unmap = zs_zpool_unmap, |
---|
490 | | - .total_size = zs_zpool_total_size, |
---|
| 442 | + .type = "zsmalloc", |
---|
| 443 | + .owner = THIS_MODULE, |
---|
| 444 | + .create = zs_zpool_create, |
---|
| 445 | + .destroy = zs_zpool_destroy, |
---|
| 446 | + .malloc_support_movable = true, |
---|
| 447 | + .malloc = zs_zpool_malloc, |
---|
| 448 | + .free = zs_zpool_free, |
---|
| 449 | + .map = zs_zpool_map, |
---|
| 450 | + .unmap = zs_zpool_unmap, |
---|
| 451 | + .total_size = zs_zpool_total_size, |
---|
491 | 452 | }; |
---|
492 | 453 | |
---|
493 | 454 | MODULE_ALIAS("zpool-zsmalloc"); |
---|
.. | .. |
---|
495 | 456 | |
---|
496 | 457 | /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */ |
---|
497 | 458 | static DEFINE_PER_CPU(struct mapping_area, zs_map_area); |
---|
498 | | -static DEFINE_LOCAL_IRQ_LOCK(zs_map_area_lock); |
---|
499 | 459 | |
---|
500 | 460 | static bool is_zspage_isolated(struct zspage *zspage) |
---|
501 | 461 | { |
---|
.. | .. |
---|
513 | 473 | return zspage->inuse; |
---|
514 | 474 | } |
---|
515 | 475 | |
---|
516 | | -static inline void set_zspage_inuse(struct zspage *zspage, int val) |
---|
517 | | -{ |
---|
518 | | - zspage->inuse = val; |
---|
519 | | -} |
---|
520 | 476 | |
---|
521 | 477 | static inline void mod_zspage_inuse(struct zspage *zspage, int val) |
---|
522 | 478 | { |
---|
.. | .. |
---|
618 | 574 | } |
---|
619 | 575 | |
---|
620 | 576 | zs_stat_root = debugfs_create_dir("zsmalloc", NULL); |
---|
621 | | - if (!zs_stat_root) |
---|
622 | | - pr_warn("debugfs 'zsmalloc' stat dir creation failed\n"); |
---|
623 | 577 | } |
---|
624 | 578 | |
---|
625 | 579 | static void __exit zs_stat_exit(void) |
---|
.. | .. |
---|
690 | 644 | |
---|
691 | 645 | static void zs_pool_stat_create(struct zs_pool *pool, const char *name) |
---|
692 | 646 | { |
---|
693 | | - struct dentry *entry; |
---|
694 | | - |
---|
695 | 647 | if (!zs_stat_root) { |
---|
696 | 648 | pr_warn("no root stat dir, not creating <%s> stat dir\n", name); |
---|
697 | 649 | return; |
---|
698 | 650 | } |
---|
699 | 651 | |
---|
700 | | - entry = debugfs_create_dir(name, zs_stat_root); |
---|
701 | | - if (!entry) { |
---|
702 | | - pr_warn("debugfs dir <%s> creation failed\n", name); |
---|
703 | | - return; |
---|
704 | | - } |
---|
705 | | - pool->stat_dentry = entry; |
---|
| 652 | + pool->stat_dentry = debugfs_create_dir(name, zs_stat_root); |
---|
706 | 653 | |
---|
707 | | - entry = debugfs_create_file("classes", S_IFREG | 0444, |
---|
708 | | - pool->stat_dentry, pool, |
---|
709 | | - &zs_stats_size_fops); |
---|
710 | | - if (!entry) { |
---|
711 | | - pr_warn("%s: debugfs file entry <%s> creation failed\n", |
---|
712 | | - name, "classes"); |
---|
713 | | - debugfs_remove_recursive(pool->stat_dentry); |
---|
714 | | - pool->stat_dentry = NULL; |
---|
715 | | - } |
---|
| 654 | + debugfs_create_file("classes", S_IFREG | 0444, pool->stat_dentry, pool, |
---|
| 655 | + &zs_stats_size_fops); |
---|
716 | 656 | } |
---|
717 | 657 | |
---|
718 | 658 | static void zs_pool_stat_destroy(struct zs_pool *pool) |
---|
.. | .. |
---|
925 | 865 | |
---|
926 | 866 | static unsigned long handle_to_obj(unsigned long handle) |
---|
927 | 867 | { |
---|
928 | | -#ifdef CONFIG_PREEMPT_RT_FULL |
---|
929 | | - struct zsmalloc_handle *zh = zs_get_pure_handle(handle); |
---|
930 | | - |
---|
931 | | - return zh->addr; |
---|
932 | | -#else |
---|
933 | 868 | return *(unsigned long *)handle; |
---|
934 | | -#endif |
---|
935 | 869 | } |
---|
936 | 870 | |
---|
937 | 871 | static unsigned long obj_to_head(struct page *page, void *obj) |
---|
.. | .. |
---|
945 | 879 | |
---|
946 | 880 | static inline int testpin_tag(unsigned long handle) |
---|
947 | 881 | { |
---|
948 | | -#ifdef CONFIG_PREEMPT_RT_FULL |
---|
949 | | - struct zsmalloc_handle *zh = zs_get_pure_handle(handle); |
---|
950 | | - |
---|
951 | | - return mutex_is_locked(&zh->lock); |
---|
952 | | -#else |
---|
953 | 882 | return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle); |
---|
954 | | -#endif |
---|
955 | 883 | } |
---|
956 | 884 | |
---|
957 | 885 | static inline int trypin_tag(unsigned long handle) |
---|
958 | 886 | { |
---|
959 | | -#ifdef CONFIG_PREEMPT_RT_FULL |
---|
960 | | - struct zsmalloc_handle *zh = zs_get_pure_handle(handle); |
---|
961 | | - |
---|
962 | | - return mutex_trylock(&zh->lock); |
---|
963 | | -#else |
---|
964 | 887 | return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle); |
---|
965 | | -#endif |
---|
966 | 888 | } |
---|
967 | 889 | |
---|
968 | | -static void pin_tag(unsigned long handle) |
---|
| 890 | +static void pin_tag(unsigned long handle) __acquires(bitlock) |
---|
969 | 891 | { |
---|
970 | | -#ifdef CONFIG_PREEMPT_RT_FULL |
---|
971 | | - struct zsmalloc_handle *zh = zs_get_pure_handle(handle); |
---|
972 | | - |
---|
973 | | - return mutex_lock(&zh->lock); |
---|
974 | | -#else |
---|
975 | 892 | bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle); |
---|
976 | | -#endif |
---|
977 | 893 | } |
---|
978 | 894 | |
---|
979 | | -static void unpin_tag(unsigned long handle) |
---|
| 895 | +static void unpin_tag(unsigned long handle) __releases(bitlock) |
---|
980 | 896 | { |
---|
981 | | -#ifdef CONFIG_PREEMPT_RT_FULL |
---|
982 | | - struct zsmalloc_handle *zh = zs_get_pure_handle(handle); |
---|
983 | | - |
---|
984 | | - return mutex_unlock(&zh->lock); |
---|
985 | | -#else |
---|
986 | 897 | bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle); |
---|
987 | | -#endif |
---|
988 | 898 | } |
---|
989 | 899 | |
---|
990 | 900 | static void reset_page(struct page *page) |
---|
.. | .. |
---|
1199 | 1109 | return zspage; |
---|
1200 | 1110 | } |
---|
1201 | 1111 | |
---|
1202 | | -#ifdef CONFIG_PGTABLE_MAPPING |
---|
1203 | | -static inline int __zs_cpu_up(struct mapping_area *area) |
---|
1204 | | -{ |
---|
1205 | | - /* |
---|
1206 | | - * Make sure we don't leak memory if a cpu UP notification |
---|
1207 | | - * and zs_init() race and both call zs_cpu_up() on the same cpu |
---|
1208 | | - */ |
---|
1209 | | - if (area->vm) |
---|
1210 | | - return 0; |
---|
1211 | | - area->vm = alloc_vm_area(PAGE_SIZE * 2, NULL); |
---|
1212 | | - if (!area->vm) |
---|
1213 | | - return -ENOMEM; |
---|
1214 | | - return 0; |
---|
1215 | | -} |
---|
1216 | | - |
---|
1217 | | -static inline void __zs_cpu_down(struct mapping_area *area) |
---|
1218 | | -{ |
---|
1219 | | - if (area->vm) |
---|
1220 | | - free_vm_area(area->vm); |
---|
1221 | | - area->vm = NULL; |
---|
1222 | | -} |
---|
1223 | | - |
---|
1224 | | -static inline void *__zs_map_object(struct mapping_area *area, |
---|
1225 | | - struct page *pages[2], int off, int size) |
---|
1226 | | -{ |
---|
1227 | | - BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, pages)); |
---|
1228 | | - area->vm_addr = area->vm->addr; |
---|
1229 | | - return area->vm_addr + off; |
---|
1230 | | -} |
---|
1231 | | - |
---|
1232 | | -static inline void __zs_unmap_object(struct mapping_area *area, |
---|
1233 | | - struct page *pages[2], int off, int size) |
---|
1234 | | -{ |
---|
1235 | | - unsigned long addr = (unsigned long)area->vm_addr; |
---|
1236 | | - |
---|
1237 | | - unmap_kernel_range(addr, PAGE_SIZE * 2); |
---|
1238 | | -} |
---|
1239 | | - |
---|
1240 | | -#else /* CONFIG_PGTABLE_MAPPING */ |
---|
1241 | | - |
---|
1242 | 1112 | static inline int __zs_cpu_up(struct mapping_area *area) |
---|
1243 | 1113 | { |
---|
1244 | 1114 | /* |
---|
.. | .. |
---|
1318 | 1188 | /* enable page faults to match kunmap_atomic() return conditions */ |
---|
1319 | 1189 | pagefault_enable(); |
---|
1320 | 1190 | } |
---|
1321 | | - |
---|
1322 | | -#endif /* CONFIG_PGTABLE_MAPPING */ |
---|
1323 | 1191 | |
---|
1324 | 1192 | static int zs_cpu_prepare(unsigned int cpu) |
---|
1325 | 1193 | { |
---|
.. | .. |
---|
1410 | 1278 | class = pool->size_class[class_idx]; |
---|
1411 | 1279 | off = (class->size * obj_idx) & ~PAGE_MASK; |
---|
1412 | 1280 | |
---|
1413 | | - area = &get_locked_var(zs_map_area_lock, zs_map_area); |
---|
| 1281 | + area = &get_cpu_var(zs_map_area); |
---|
1414 | 1282 | area->vm_mm = mm; |
---|
1415 | 1283 | if (off + class->size <= PAGE_SIZE) { |
---|
1416 | 1284 | /* this object is contained entirely within a page */ |
---|
.. | .. |
---|
1464 | 1332 | |
---|
1465 | 1333 | __zs_unmap_object(area, pages, off, class->size); |
---|
1466 | 1334 | } |
---|
1467 | | - put_locked_var(zs_map_area_lock, zs_map_area); |
---|
| 1335 | + put_cpu_var(zs_map_area); |
---|
1468 | 1336 | |
---|
1469 | 1337 | migrate_read_unlock(zspage); |
---|
1470 | 1338 | unpin_tag(handle); |
---|
.. | .. |
---|
1880 | 1748 | */ |
---|
1881 | 1749 | static void lock_zspage(struct zspage *zspage) |
---|
1882 | 1750 | { |
---|
1883 | | - struct page *page = get_first_page(zspage); |
---|
| 1751 | + struct page *curr_page, *page; |
---|
1884 | 1752 | |
---|
1885 | | - do { |
---|
1886 | | - lock_page(page); |
---|
1887 | | - } while ((page = get_next_page(page)) != NULL); |
---|
| 1753 | + /* |
---|
| 1754 | + * Pages we haven't locked yet can be migrated off the list while we're |
---|
| 1755 | + * trying to lock them, so we need to be careful and only attempt to |
---|
| 1756 | + * lock each page under migrate_read_lock(). Otherwise, the page we lock |
---|
| 1757 | + * may no longer belong to the zspage. This means that we may wait for |
---|
| 1758 | + * the wrong page to unlock, so we must take a reference to the page |
---|
| 1759 | + * prior to waiting for it to unlock outside migrate_read_lock(). |
---|
| 1760 | + */ |
---|
| 1761 | + while (1) { |
---|
| 1762 | + migrate_read_lock(zspage); |
---|
| 1763 | + page = get_first_page(zspage); |
---|
| 1764 | + if (trylock_page(page)) |
---|
| 1765 | + break; |
---|
| 1766 | + get_page(page); |
---|
| 1767 | + migrate_read_unlock(zspage); |
---|
| 1768 | + wait_on_page_locked(page); |
---|
| 1769 | + put_page(page); |
---|
| 1770 | + } |
---|
| 1771 | + |
---|
| 1772 | + curr_page = page; |
---|
| 1773 | + while ((page = get_next_page(curr_page))) { |
---|
| 1774 | + if (trylock_page(page)) { |
---|
| 1775 | + curr_page = page; |
---|
| 1776 | + } else { |
---|
| 1777 | + get_page(page); |
---|
| 1778 | + migrate_read_unlock(zspage); |
---|
| 1779 | + wait_on_page_locked(page); |
---|
| 1780 | + put_page(page); |
---|
| 1781 | + migrate_read_lock(zspage); |
---|
| 1782 | + } |
---|
| 1783 | + } |
---|
| 1784 | + migrate_read_unlock(zspage); |
---|
1888 | 1785 | } |
---|
1889 | 1786 | |
---|
1890 | | -static struct dentry *zs_mount(struct file_system_type *fs_type, |
---|
1891 | | - int flags, const char *dev_name, void *data) |
---|
| 1787 | +static int zs_init_fs_context(struct fs_context *fc) |
---|
1892 | 1788 | { |
---|
1893 | | - static const struct dentry_operations ops = { |
---|
1894 | | - .d_dname = simple_dname, |
---|
1895 | | - }; |
---|
1896 | | - |
---|
1897 | | - return mount_pseudo(fs_type, "zsmalloc:", NULL, &ops, ZSMALLOC_MAGIC); |
---|
| 1789 | + return init_pseudo(fc, ZSMALLOC_MAGIC) ? 0 : -ENOMEM; |
---|
1898 | 1790 | } |
---|
1899 | 1791 | |
---|
1900 | 1792 | static struct file_system_type zsmalloc_fs = { |
---|
1901 | 1793 | .name = "zsmalloc", |
---|
1902 | | - .mount = zs_mount, |
---|
| 1794 | + .init_fs_context = zs_init_fs_context, |
---|
1903 | 1795 | .kill_sb = kill_anon_super, |
---|
1904 | 1796 | }; |
---|
1905 | 1797 | |
---|
.. | .. |
---|
1924 | 1816 | rwlock_init(&zspage->lock); |
---|
1925 | 1817 | } |
---|
1926 | 1818 | |
---|
1927 | | -static void migrate_read_lock(struct zspage *zspage) |
---|
| 1819 | +static void migrate_read_lock(struct zspage *zspage) __acquires(&zspage->lock) |
---|
1928 | 1820 | { |
---|
1929 | 1821 | read_lock(&zspage->lock); |
---|
1930 | 1822 | } |
---|
1931 | 1823 | |
---|
1932 | | -static void migrate_read_unlock(struct zspage *zspage) |
---|
| 1824 | +static void migrate_read_unlock(struct zspage *zspage) __releases(&zspage->lock) |
---|
1933 | 1825 | { |
---|
1934 | 1826 | read_unlock(&zspage->lock); |
---|
1935 | 1827 | } |
---|