hc
2024-05-13 9d77db3c730780c8ef5ccd4b66403ff5675cfe4e
kernel/mm/zsmalloc.c
....@@ -39,8 +39,8 @@
3939 #include <linux/highmem.h>
4040 #include <linux/string.h>
4141 #include <linux/slab.h>
42
+#include <linux/pgtable.h>
4243 #include <asm/tlbflush.h>
43
-#include <asm/pgtable.h>
4444 #include <linux/cpumask.h>
4545 #include <linux/cpu.h>
4646 #include <linux/vmalloc.h>
....@@ -52,11 +52,11 @@
5252 #include <linux/zsmalloc.h>
5353 #include <linux/zpool.h>
5454 #include <linux/mount.h>
55
+#include <linux/pseudo_fs.h>
5556 #include <linux/migrate.h>
5657 #include <linux/wait.h>
5758 #include <linux/pagemap.h>
5859 #include <linux/fs.h>
59
-#include <linux/locallock.h>
6060
6161 #define ZSPAGE_MAGIC 0x58
6262
....@@ -74,25 +74,12 @@
7474 */
7575 #define ZS_MAX_ZSPAGE_ORDER 2
7676 #define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER)
77
+
7778 #define ZS_HANDLE_SIZE (sizeof(unsigned long))
78
-
79
-#ifdef CONFIG_PREEMPT_RT_FULL
80
-
81
-struct zsmalloc_handle {
82
- unsigned long addr;
83
- struct mutex lock;
84
-};
85
-
86
-#define ZS_HANDLE_ALLOC_SIZE (sizeof(struct zsmalloc_handle))
87
-
88
-#else
89
-
90
-#define ZS_HANDLE_ALLOC_SIZE (sizeof(unsigned long))
91
-#endif
9279
9380 /*
9481 * Object location (<PFN>, <obj_idx>) is encoded as
95
- * as single (unsigned long) handle value.
82
+ * a single (unsigned long) handle value.
9683 *
9784 * Note that object index <obj_idx> starts from 0.
9885 *
....@@ -306,11 +293,7 @@
306293 };
307294
308295 struct mapping_area {
309
-#ifdef CONFIG_PGTABLE_MAPPING
310
- struct vm_struct *vm; /* vm area for mapping object that span pages */
311
-#else
312296 char *vm_buf; /* copy buffer for objects that span pages */
313
-#endif
314297 char *vm_addr; /* address of kmap_atomic()'ed pages */
315298 enum zs_mapmode vm_mm; /* mapping mode */
316299 };
....@@ -339,7 +322,7 @@
339322
340323 static int create_cache(struct zs_pool *pool)
341324 {
342
- pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_ALLOC_SIZE,
325
+ pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE,
343326 0, 0, NULL);
344327 if (!pool->handle_cachep)
345328 return 1;
....@@ -363,26 +346,9 @@
363346
364347 static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp)
365348 {
366
- void *p;
367
-
368
- p = kmem_cache_alloc(pool->handle_cachep,
369
- gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
370
-#ifdef CONFIG_PREEMPT_RT_FULL
371
- if (p) {
372
- struct zsmalloc_handle *zh = p;
373
-
374
- mutex_init(&zh->lock);
375
- }
376
-#endif
377
- return (unsigned long)p;
349
+ return (unsigned long)kmem_cache_alloc(pool->handle_cachep,
350
+ gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE|__GFP_CMA));
378351 }
379
-
380
-#ifdef CONFIG_PREEMPT_RT_FULL
381
-static struct zsmalloc_handle *zs_get_pure_handle(unsigned long handle)
382
-{
383
- return (void *)(handle &~((1 << OBJ_TAG_BITS) - 1));
384
-}
385
-#endif
386352
387353 static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
388354 {
....@@ -392,7 +358,7 @@
392358 static struct zspage *cache_alloc_zspage(struct zs_pool *pool, gfp_t flags)
393359 {
394360 return kmem_cache_alloc(pool->zspage_cachep,
395
- flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
361
+ flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE|__GFP_CMA));
396362 }
397363
398364 static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage)
....@@ -402,18 +368,12 @@
402368
403369 static void record_obj(unsigned long handle, unsigned long obj)
404370 {
405
-#ifdef CONFIG_PREEMPT_RT_FULL
406
- struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
407
-
408
- WRITE_ONCE(zh->addr, obj);
409
-#else
410371 /*
411372 * lsb of @obj represents handle lock while other bits
412373 * represent object value the handle is pointing so
413374 * updating shouldn't do store tearing.
414375 */
415376 WRITE_ONCE(*(unsigned long *)handle, obj);
416
-#endif
417377 }
418378
419379 /* zpool driver */
....@@ -460,7 +420,7 @@
460420 case ZPOOL_MM_WO:
461421 zs_mm = ZS_MM_WO;
462422 break;
463
- case ZPOOL_MM_RW: /* fallthru */
423
+ case ZPOOL_MM_RW:
464424 default:
465425 zs_mm = ZS_MM_RW;
466426 break;
....@@ -479,15 +439,16 @@
479439 }
480440
481441 static struct zpool_driver zs_zpool_driver = {
482
- .type = "zsmalloc",
483
- .owner = THIS_MODULE,
484
- .create = zs_zpool_create,
485
- .destroy = zs_zpool_destroy,
486
- .malloc = zs_zpool_malloc,
487
- .free = zs_zpool_free,
488
- .map = zs_zpool_map,
489
- .unmap = zs_zpool_unmap,
490
- .total_size = zs_zpool_total_size,
442
+ .type = "zsmalloc",
443
+ .owner = THIS_MODULE,
444
+ .create = zs_zpool_create,
445
+ .destroy = zs_zpool_destroy,
446
+ .malloc_support_movable = true,
447
+ .malloc = zs_zpool_malloc,
448
+ .free = zs_zpool_free,
449
+ .map = zs_zpool_map,
450
+ .unmap = zs_zpool_unmap,
451
+ .total_size = zs_zpool_total_size,
491452 };
492453
493454 MODULE_ALIAS("zpool-zsmalloc");
....@@ -495,7 +456,6 @@
495456
496457 /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
497458 static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
498
-static DEFINE_LOCAL_IRQ_LOCK(zs_map_area_lock);
499459
500460 static bool is_zspage_isolated(struct zspage *zspage)
501461 {
....@@ -513,10 +473,6 @@
513473 return zspage->inuse;
514474 }
515475
516
-static inline void set_zspage_inuse(struct zspage *zspage, int val)
517
-{
518
- zspage->inuse = val;
519
-}
520476
521477 static inline void mod_zspage_inuse(struct zspage *zspage, int val)
522478 {
....@@ -618,8 +574,6 @@
618574 }
619575
620576 zs_stat_root = debugfs_create_dir("zsmalloc", NULL);
621
- if (!zs_stat_root)
622
- pr_warn("debugfs 'zsmalloc' stat dir creation failed\n");
623577 }
624578
625579 static void __exit zs_stat_exit(void)
....@@ -690,29 +644,15 @@
690644
691645 static void zs_pool_stat_create(struct zs_pool *pool, const char *name)
692646 {
693
- struct dentry *entry;
694
-
695647 if (!zs_stat_root) {
696648 pr_warn("no root stat dir, not creating <%s> stat dir\n", name);
697649 return;
698650 }
699651
700
- entry = debugfs_create_dir(name, zs_stat_root);
701
- if (!entry) {
702
- pr_warn("debugfs dir <%s> creation failed\n", name);
703
- return;
704
- }
705
- pool->stat_dentry = entry;
652
+ pool->stat_dentry = debugfs_create_dir(name, zs_stat_root);
706653
707
- entry = debugfs_create_file("classes", S_IFREG | 0444,
708
- pool->stat_dentry, pool,
709
- &zs_stats_size_fops);
710
- if (!entry) {
711
- pr_warn("%s: debugfs file entry <%s> creation failed\n",
712
- name, "classes");
713
- debugfs_remove_recursive(pool->stat_dentry);
714
- pool->stat_dentry = NULL;
715
- }
654
+ debugfs_create_file("classes", S_IFREG | 0444, pool->stat_dentry, pool,
655
+ &zs_stats_size_fops);
716656 }
717657
718658 static void zs_pool_stat_destroy(struct zs_pool *pool)
....@@ -925,13 +865,7 @@
925865
926866 static unsigned long handle_to_obj(unsigned long handle)
927867 {
928
-#ifdef CONFIG_PREEMPT_RT_FULL
929
- struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
930
-
931
- return zh->addr;
932
-#else
933868 return *(unsigned long *)handle;
934
-#endif
935869 }
936870
937871 static unsigned long obj_to_head(struct page *page, void *obj)
....@@ -945,46 +879,22 @@
945879
946880 static inline int testpin_tag(unsigned long handle)
947881 {
948
-#ifdef CONFIG_PREEMPT_RT_FULL
949
- struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
950
-
951
- return mutex_is_locked(&zh->lock);
952
-#else
953882 return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle);
954
-#endif
955883 }
956884
957885 static inline int trypin_tag(unsigned long handle)
958886 {
959
-#ifdef CONFIG_PREEMPT_RT_FULL
960
- struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
961
-
962
- return mutex_trylock(&zh->lock);
963
-#else
964887 return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle);
965
-#endif
966888 }
967889
968
-static void pin_tag(unsigned long handle)
890
+static void pin_tag(unsigned long handle) __acquires(bitlock)
969891 {
970
-#ifdef CONFIG_PREEMPT_RT_FULL
971
- struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
972
-
973
- return mutex_lock(&zh->lock);
974
-#else
975892 bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle);
976
-#endif
977893 }
978894
979
-static void unpin_tag(unsigned long handle)
895
+static void unpin_tag(unsigned long handle) __releases(bitlock)
980896 {
981
-#ifdef CONFIG_PREEMPT_RT_FULL
982
- struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
983
-
984
- return mutex_unlock(&zh->lock);
985
-#else
986897 bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle);
987
-#endif
988898 }
989899
990900 static void reset_page(struct page *page)
....@@ -1199,46 +1109,6 @@
11991109 return zspage;
12001110 }
12011111
1202
-#ifdef CONFIG_PGTABLE_MAPPING
1203
-static inline int __zs_cpu_up(struct mapping_area *area)
1204
-{
1205
- /*
1206
- * Make sure we don't leak memory if a cpu UP notification
1207
- * and zs_init() race and both call zs_cpu_up() on the same cpu
1208
- */
1209
- if (area->vm)
1210
- return 0;
1211
- area->vm = alloc_vm_area(PAGE_SIZE * 2, NULL);
1212
- if (!area->vm)
1213
- return -ENOMEM;
1214
- return 0;
1215
-}
1216
-
1217
-static inline void __zs_cpu_down(struct mapping_area *area)
1218
-{
1219
- if (area->vm)
1220
- free_vm_area(area->vm);
1221
- area->vm = NULL;
1222
-}
1223
-
1224
-static inline void *__zs_map_object(struct mapping_area *area,
1225
- struct page *pages[2], int off, int size)
1226
-{
1227
- BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, pages));
1228
- area->vm_addr = area->vm->addr;
1229
- return area->vm_addr + off;
1230
-}
1231
-
1232
-static inline void __zs_unmap_object(struct mapping_area *area,
1233
- struct page *pages[2], int off, int size)
1234
-{
1235
- unsigned long addr = (unsigned long)area->vm_addr;
1236
-
1237
- unmap_kernel_range(addr, PAGE_SIZE * 2);
1238
-}
1239
-
1240
-#else /* CONFIG_PGTABLE_MAPPING */
1241
-
12421112 static inline int __zs_cpu_up(struct mapping_area *area)
12431113 {
12441114 /*
....@@ -1318,8 +1188,6 @@
13181188 /* enable page faults to match kunmap_atomic() return conditions */
13191189 pagefault_enable();
13201190 }
1321
-
1322
-#endif /* CONFIG_PGTABLE_MAPPING */
13231191
13241192 static int zs_cpu_prepare(unsigned int cpu)
13251193 {
....@@ -1410,7 +1278,7 @@
14101278 class = pool->size_class[class_idx];
14111279 off = (class->size * obj_idx) & ~PAGE_MASK;
14121280
1413
- area = &get_locked_var(zs_map_area_lock, zs_map_area);
1281
+ area = &get_cpu_var(zs_map_area);
14141282 area->vm_mm = mm;
14151283 if (off + class->size <= PAGE_SIZE) {
14161284 /* this object is contained entirely within a page */
....@@ -1464,7 +1332,7 @@
14641332
14651333 __zs_unmap_object(area, pages, off, class->size);
14661334 }
1467
- put_locked_var(zs_map_area_lock, zs_map_area);
1335
+ put_cpu_var(zs_map_area);
14681336
14691337 migrate_read_unlock(zspage);
14701338 unpin_tag(handle);
....@@ -1880,26 +1748,50 @@
18801748 */
18811749 static void lock_zspage(struct zspage *zspage)
18821750 {
1883
- struct page *page = get_first_page(zspage);
1751
+ struct page *curr_page, *page;
18841752
1885
- do {
1886
- lock_page(page);
1887
- } while ((page = get_next_page(page)) != NULL);
1753
+ /*
1754
+ * Pages we haven't locked yet can be migrated off the list while we're
1755
+ * trying to lock them, so we need to be careful and only attempt to
1756
+ * lock each page under migrate_read_lock(). Otherwise, the page we lock
1757
+ * may no longer belong to the zspage. This means that we may wait for
1758
+ * the wrong page to unlock, so we must take a reference to the page
1759
+ * prior to waiting for it to unlock outside migrate_read_lock().
1760
+ */
1761
+ while (1) {
1762
+ migrate_read_lock(zspage);
1763
+ page = get_first_page(zspage);
1764
+ if (trylock_page(page))
1765
+ break;
1766
+ get_page(page);
1767
+ migrate_read_unlock(zspage);
1768
+ wait_on_page_locked(page);
1769
+ put_page(page);
1770
+ }
1771
+
1772
+ curr_page = page;
1773
+ while ((page = get_next_page(curr_page))) {
1774
+ if (trylock_page(page)) {
1775
+ curr_page = page;
1776
+ } else {
1777
+ get_page(page);
1778
+ migrate_read_unlock(zspage);
1779
+ wait_on_page_locked(page);
1780
+ put_page(page);
1781
+ migrate_read_lock(zspage);
1782
+ }
1783
+ }
1784
+ migrate_read_unlock(zspage);
18881785 }
18891786
1890
-static struct dentry *zs_mount(struct file_system_type *fs_type,
1891
- int flags, const char *dev_name, void *data)
1787
+static int zs_init_fs_context(struct fs_context *fc)
18921788 {
1893
- static const struct dentry_operations ops = {
1894
- .d_dname = simple_dname,
1895
- };
1896
-
1897
- return mount_pseudo(fs_type, "zsmalloc:", NULL, &ops, ZSMALLOC_MAGIC);
1789
+ return init_pseudo(fc, ZSMALLOC_MAGIC) ? 0 : -ENOMEM;
18981790 }
18991791
19001792 static struct file_system_type zsmalloc_fs = {
19011793 .name = "zsmalloc",
1902
- .mount = zs_mount,
1794
+ .init_fs_context = zs_init_fs_context,
19031795 .kill_sb = kill_anon_super,
19041796 };
19051797
....@@ -1924,12 +1816,12 @@
19241816 rwlock_init(&zspage->lock);
19251817 }
19261818
1927
-static void migrate_read_lock(struct zspage *zspage)
1819
+static void migrate_read_lock(struct zspage *zspage) __acquires(&zspage->lock)
19281820 {
19291821 read_lock(&zspage->lock);
19301822 }
19311823
1932
-static void migrate_read_unlock(struct zspage *zspage)
1824
+static void migrate_read_unlock(struct zspage *zspage) __releases(&zspage->lock)
19331825 {
19341826 read_unlock(&zspage->lock);
19351827 }