hc
2024-05-13 9d77db3c730780c8ef5ccd4b66403ff5675cfe4e
kernel/mm/zsmalloc.c
....@@ -39,8 +39,8 @@
3939 #include <linux/highmem.h>
4040 #include <linux/string.h>
4141 #include <linux/slab.h>
42
+#include <linux/pgtable.h>
4243 #include <asm/tlbflush.h>
43
-#include <asm/pgtable.h>
4444 #include <linux/cpumask.h>
4545 #include <linux/cpu.h>
4646 #include <linux/vmalloc.h>
....@@ -52,6 +52,7 @@
5252 #include <linux/zsmalloc.h>
5353 #include <linux/zpool.h>
5454 #include <linux/mount.h>
55
+#include <linux/pseudo_fs.h>
5556 #include <linux/migrate.h>
5657 #include <linux/wait.h>
5758 #include <linux/pagemap.h>
....@@ -78,7 +79,7 @@
7879
7980 /*
8081 * Object location (<PFN>, <obj_idx>) is encoded as
81
- * as single (unsigned long) handle value.
82
+ * a single (unsigned long) handle value.
8283 *
8384 * Note that object index <obj_idx> starts from 0.
8485 *
....@@ -292,11 +293,7 @@
292293 };
293294
294295 struct mapping_area {
295
-#ifdef CONFIG_PGTABLE_MAPPING
296
- struct vm_struct *vm; /* vm area for mapping object that span pages */
297
-#else
298296 char *vm_buf; /* copy buffer for objects that span pages */
299
-#endif
300297 char *vm_addr; /* address of kmap_atomic()'ed pages */
301298 enum zs_mapmode vm_mm; /* mapping mode */
302299 };
....@@ -350,7 +347,7 @@
350347 static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp)
351348 {
352349 return (unsigned long)kmem_cache_alloc(pool->handle_cachep,
353
- gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
350
+ gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE|__GFP_CMA));
354351 }
355352
356353 static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
....@@ -361,7 +358,7 @@
361358 static struct zspage *cache_alloc_zspage(struct zs_pool *pool, gfp_t flags)
362359 {
363360 return kmem_cache_alloc(pool->zspage_cachep,
364
- flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
361
+ flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE|__GFP_CMA));
365362 }
366363
367364 static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage)
....@@ -423,7 +420,7 @@
423420 case ZPOOL_MM_WO:
424421 zs_mm = ZS_MM_WO;
425422 break;
426
- case ZPOOL_MM_RW: /* fallthru */
423
+ case ZPOOL_MM_RW:
427424 default:
428425 zs_mm = ZS_MM_RW;
429426 break;
....@@ -442,15 +439,16 @@
442439 }
443440
444441 static struct zpool_driver zs_zpool_driver = {
445
- .type = "zsmalloc",
446
- .owner = THIS_MODULE,
447
- .create = zs_zpool_create,
448
- .destroy = zs_zpool_destroy,
449
- .malloc = zs_zpool_malloc,
450
- .free = zs_zpool_free,
451
- .map = zs_zpool_map,
452
- .unmap = zs_zpool_unmap,
453
- .total_size = zs_zpool_total_size,
442
+ .type = "zsmalloc",
443
+ .owner = THIS_MODULE,
444
+ .create = zs_zpool_create,
445
+ .destroy = zs_zpool_destroy,
446
+ .malloc_support_movable = true,
447
+ .malloc = zs_zpool_malloc,
448
+ .free = zs_zpool_free,
449
+ .map = zs_zpool_map,
450
+ .unmap = zs_zpool_unmap,
451
+ .total_size = zs_zpool_total_size,
454452 };
455453
456454 MODULE_ALIAS("zpool-zsmalloc");
....@@ -475,10 +473,6 @@
475473 return zspage->inuse;
476474 }
477475
478
-static inline void set_zspage_inuse(struct zspage *zspage, int val)
479
-{
480
- zspage->inuse = val;
481
-}
482476
483477 static inline void mod_zspage_inuse(struct zspage *zspage, int val)
484478 {
....@@ -580,8 +574,6 @@
580574 }
581575
582576 zs_stat_root = debugfs_create_dir("zsmalloc", NULL);
583
- if (!zs_stat_root)
584
- pr_warn("debugfs 'zsmalloc' stat dir creation failed\n");
585577 }
586578
587579 static void __exit zs_stat_exit(void)
....@@ -652,29 +644,15 @@
652644
653645 static void zs_pool_stat_create(struct zs_pool *pool, const char *name)
654646 {
655
- struct dentry *entry;
656
-
657647 if (!zs_stat_root) {
658648 pr_warn("no root stat dir, not creating <%s> stat dir\n", name);
659649 return;
660650 }
661651
662
- entry = debugfs_create_dir(name, zs_stat_root);
663
- if (!entry) {
664
- pr_warn("debugfs dir <%s> creation failed\n", name);
665
- return;
666
- }
667
- pool->stat_dentry = entry;
652
+ pool->stat_dentry = debugfs_create_dir(name, zs_stat_root);
668653
669
- entry = debugfs_create_file("classes", S_IFREG | 0444,
670
- pool->stat_dentry, pool,
671
- &zs_stats_size_fops);
672
- if (!entry) {
673
- pr_warn("%s: debugfs file entry <%s> creation failed\n",
674
- name, "classes");
675
- debugfs_remove_recursive(pool->stat_dentry);
676
- pool->stat_dentry = NULL;
677
- }
654
+ debugfs_create_file("classes", S_IFREG | 0444, pool->stat_dentry, pool,
655
+ &zs_stats_size_fops);
678656 }
679657
680658 static void zs_pool_stat_destroy(struct zs_pool *pool)
....@@ -909,12 +887,12 @@
909887 return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle);
910888 }
911889
912
-static void pin_tag(unsigned long handle)
890
+static void pin_tag(unsigned long handle) __acquires(bitlock)
913891 {
914892 bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle);
915893 }
916894
917
-static void unpin_tag(unsigned long handle)
895
+static void unpin_tag(unsigned long handle) __releases(bitlock)
918896 {
919897 bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle);
920898 }
....@@ -1131,46 +1109,6 @@
11311109 return zspage;
11321110 }
11331111
1134
-#ifdef CONFIG_PGTABLE_MAPPING
1135
-static inline int __zs_cpu_up(struct mapping_area *area)
1136
-{
1137
- /*
1138
- * Make sure we don't leak memory if a cpu UP notification
1139
- * and zs_init() race and both call zs_cpu_up() on the same cpu
1140
- */
1141
- if (area->vm)
1142
- return 0;
1143
- area->vm = alloc_vm_area(PAGE_SIZE * 2, NULL);
1144
- if (!area->vm)
1145
- return -ENOMEM;
1146
- return 0;
1147
-}
1148
-
1149
-static inline void __zs_cpu_down(struct mapping_area *area)
1150
-{
1151
- if (area->vm)
1152
- free_vm_area(area->vm);
1153
- area->vm = NULL;
1154
-}
1155
-
1156
-static inline void *__zs_map_object(struct mapping_area *area,
1157
- struct page *pages[2], int off, int size)
1158
-{
1159
- BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, pages));
1160
- area->vm_addr = area->vm->addr;
1161
- return area->vm_addr + off;
1162
-}
1163
-
1164
-static inline void __zs_unmap_object(struct mapping_area *area,
1165
- struct page *pages[2], int off, int size)
1166
-{
1167
- unsigned long addr = (unsigned long)area->vm_addr;
1168
-
1169
- unmap_kernel_range(addr, PAGE_SIZE * 2);
1170
-}
1171
-
1172
-#else /* CONFIG_PGTABLE_MAPPING */
1173
-
11741112 static inline int __zs_cpu_up(struct mapping_area *area)
11751113 {
11761114 /*
....@@ -1250,8 +1188,6 @@
12501188 /* enable page faults to match kunmap_atomic() return conditions */
12511189 pagefault_enable();
12521190 }
1253
-
1254
-#endif /* CONFIG_PGTABLE_MAPPING */
12551191
12561192 static int zs_cpu_prepare(unsigned int cpu)
12571193 {
....@@ -1812,26 +1748,50 @@
18121748 */
18131749 static void lock_zspage(struct zspage *zspage)
18141750 {
1815
- struct page *page = get_first_page(zspage);
1751
+ struct page *curr_page, *page;
18161752
1817
- do {
1818
- lock_page(page);
1819
- } while ((page = get_next_page(page)) != NULL);
1753
+ /*
1754
+ * Pages we haven't locked yet can be migrated off the list while we're
1755
+ * trying to lock them, so we need to be careful and only attempt to
1756
+ * lock each page under migrate_read_lock(). Otherwise, the page we lock
1757
+ * may no longer belong to the zspage. This means that we may wait for
1758
+ * the wrong page to unlock, so we must take a reference to the page
1759
+ * prior to waiting for it to unlock outside migrate_read_lock().
1760
+ */
1761
+ while (1) {
1762
+ migrate_read_lock(zspage);
1763
+ page = get_first_page(zspage);
1764
+ if (trylock_page(page))
1765
+ break;
1766
+ get_page(page);
1767
+ migrate_read_unlock(zspage);
1768
+ wait_on_page_locked(page);
1769
+ put_page(page);
1770
+ }
1771
+
1772
+ curr_page = page;
1773
+ while ((page = get_next_page(curr_page))) {
1774
+ if (trylock_page(page)) {
1775
+ curr_page = page;
1776
+ } else {
1777
+ get_page(page);
1778
+ migrate_read_unlock(zspage);
1779
+ wait_on_page_locked(page);
1780
+ put_page(page);
1781
+ migrate_read_lock(zspage);
1782
+ }
1783
+ }
1784
+ migrate_read_unlock(zspage);
18201785 }
18211786
1822
-static struct dentry *zs_mount(struct file_system_type *fs_type,
1823
- int flags, const char *dev_name, void *data)
1787
+static int zs_init_fs_context(struct fs_context *fc)
18241788 {
1825
- static const struct dentry_operations ops = {
1826
- .d_dname = simple_dname,
1827
- };
1828
-
1829
- return mount_pseudo(fs_type, "zsmalloc:", NULL, &ops, ZSMALLOC_MAGIC);
1789
+ return init_pseudo(fc, ZSMALLOC_MAGIC) ? 0 : -ENOMEM;
18301790 }
18311791
18321792 static struct file_system_type zsmalloc_fs = {
18331793 .name = "zsmalloc",
1834
- .mount = zs_mount,
1794
+ .init_fs_context = zs_init_fs_context,
18351795 .kill_sb = kill_anon_super,
18361796 };
18371797
....@@ -1856,12 +1816,12 @@
18561816 rwlock_init(&zspage->lock);
18571817 }
18581818
1859
-static void migrate_read_lock(struct zspage *zspage)
1819
+static void migrate_read_lock(struct zspage *zspage) __acquires(&zspage->lock)
18601820 {
18611821 read_lock(&zspage->lock);
18621822 }
18631823
1864
-static void migrate_read_unlock(struct zspage *zspage)
1824
+static void migrate_read_unlock(struct zspage *zspage) __releases(&zspage->lock)
18651825 {
18661826 read_unlock(&zspage->lock);
18671827 }