hc
2024-05-14 bedbef8ad3e75a304af6361af235302bcc61d06b
kernel/drivers/block/zram/zram_drv.c
....@@ -33,6 +33,7 @@
3333 #include <linux/sysfs.h>
3434 #include <linux/debugfs.h>
3535 #include <linux/cpuhotplug.h>
36
+#include <linux/part_stat.h>
3637
3738 #include "zram_drv.h"
3839
....@@ -41,7 +42,7 @@
4142 static DEFINE_MUTEX(zram_index_mutex);
4243
4344 static int zram_major;
44
-static const char *default_compressor = "lzo";
45
+static const char *default_compressor = "lzo-rle";
4546
4647 /* Module params (documentation at end) */
4748 static unsigned int num_devices = 1;
....@@ -51,44 +52,13 @@
5152 */
5253 static size_t huge_class_size;
5354
55
+static const struct block_device_operations zram_devops;
56
+static const struct block_device_operations zram_wb_devops;
57
+
5458 static void zram_free_page(struct zram *zram, size_t index);
5559 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
5660 u32 index, int offset, struct bio *bio);
5761
58
-
59
-#ifdef CONFIG_PREEMPT_RT_BASE
60
-static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages)
61
-{
62
- size_t index;
63
-
64
- for (index = 0; index < num_pages; index++)
65
- spin_lock_init(&zram->table[index].lock);
66
-}
67
-
68
-static int zram_slot_trylock(struct zram *zram, u32 index)
69
-{
70
- int ret;
71
-
72
- ret = spin_trylock(&zram->table[index].lock);
73
- if (ret)
74
- __set_bit(ZRAM_LOCK, &zram->table[index].value);
75
- return ret;
76
-}
77
-
78
-static void zram_slot_lock(struct zram *zram, u32 index)
79
-{
80
- spin_lock(&zram->table[index].lock);
81
- __set_bit(ZRAM_LOCK, &zram->table[index].value);
82
-}
83
-
84
-static void zram_slot_unlock(struct zram *zram, u32 index)
85
-{
86
- __clear_bit(ZRAM_LOCK, &zram->table[index].value);
87
- spin_unlock(&zram->table[index].lock);
88
-}
89
-
90
-#else
91
-static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages) { }
9262
9363 static int zram_slot_trylock(struct zram *zram, u32 index)
9464 {
....@@ -104,7 +74,6 @@
10474 {
10575 bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags);
10676 }
107
-#endif
10877
10978 static inline bool init_done(struct zram *zram)
11079 {
....@@ -242,14 +211,17 @@
242211
243212 static bool page_same_filled(void *ptr, unsigned long *element)
244213 {
245
- unsigned int pos;
246214 unsigned long *page;
247215 unsigned long val;
216
+ unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1;
248217
249218 page = (unsigned long *)ptr;
250219 val = page[0];
251220
252
- for (pos = 1; pos < PAGE_SIZE / sizeof(*page); pos++) {
221
+ if (val != page[last_pos])
222
+ return false;
223
+
224
+ for (pos = 1; pos < last_pos; pos++) {
253225 if (val != page[pos])
254226 return false;
255227 }
....@@ -325,18 +297,8 @@
325297 struct zram *zram = dev_to_zram(dev);
326298 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
327299 int index;
328
- char mode_buf[8];
329
- ssize_t sz;
330300
331
- sz = strscpy(mode_buf, buf, sizeof(mode_buf));
332
- if (sz <= 0)
333
- return -EINVAL;
334
-
335
- /* ignore trailing new line */
336
- if (mode_buf[sz - 1] == '\n')
337
- mode_buf[sz - 1] = 0x00;
338
-
339
- if (strcmp(mode_buf, "all"))
301
+ if (!sysfs_streq(buf, "all"))
340302 return -EINVAL;
341303
342304 down_read(&zram->init_lock);
....@@ -449,8 +411,7 @@
449411 zram->backing_dev = NULL;
450412 zram->old_block_size = 0;
451413 zram->bdev = NULL;
452
- zram->disk->queue->backing_dev_info->capabilities |=
453
- BDI_CAP_SYNCHRONOUS_IO;
414
+ zram->disk->fops = &zram_devops;
454415 kvfree(zram->bitmap);
455416 zram->bitmap = NULL;
456417 }
....@@ -516,7 +477,7 @@
516477 if (sz > 0 && file_name[sz - 1] == '\n')
517478 file_name[sz - 1] = 0x00;
518479
519
- backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0);
480
+ backing_dev = filp_open_block(file_name, O_RDWR|O_LARGEFILE, 0);
520481 if (IS_ERR(backing_dev)) {
521482 err = PTR_ERR(backing_dev);
522483 backing_dev = NULL;
....@@ -532,9 +493,10 @@
532493 goto out;
533494 }
534495
535
- bdev = bdgrab(I_BDEV(inode));
536
- err = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram);
537
- if (err < 0) {
496
+ bdev = blkdev_get_by_dev(inode->i_rdev,
497
+ FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram);
498
+ if (IS_ERR(bdev)) {
499
+ err = PTR_ERR(bdev);
538500 bdev = NULL;
539501 goto out;
540502 }
....@@ -569,8 +531,7 @@
569531 * freely but in fact, IO is going on so finally could cause
570532 * use-after-free when the IO is really done.
571533 */
572
- zram->disk->queue->backing_dev_info->capabilities &=
573
- ~BDI_CAP_SYNCHRONOUS_IO;
534
+ zram->disk->fops = &zram_wb_devops;
574535 up_write(&zram->init_lock);
575536
576537 pr_info("setup backing device %s\n", file_name);
....@@ -659,38 +620,41 @@
659620 return 1;
660621 }
661622
623
+#define PAGE_WB_SIG "page_index="
624
+
625
+#define PAGE_WRITEBACK 0
662626 #define HUGE_WRITEBACK 1
663627 #define IDLE_WRITEBACK 2
628
+
664629
665630 static ssize_t writeback_store(struct device *dev,
666631 struct device_attribute *attr, const char *buf, size_t len)
667632 {
668633 struct zram *zram = dev_to_zram(dev);
669634 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
670
- unsigned long index;
635
+ unsigned long index = 0;
671636 struct bio bio;
672637 struct bio_vec bio_vec;
673638 struct page *page;
674
- ssize_t ret, sz;
675
- char mode_buf[8];
676
- int mode = -1;
639
+ ssize_t ret = len;
640
+ int mode, err;
677641 unsigned long blk_idx = 0;
678642
679
- sz = strscpy(mode_buf, buf, sizeof(mode_buf));
680
- if (sz <= 0)
681
- return -EINVAL;
682
-
683
- /* ignore trailing newline */
684
- if (mode_buf[sz - 1] == '\n')
685
- mode_buf[sz - 1] = 0x00;
686
-
687
- if (!strcmp(mode_buf, "idle"))
643
+ if (sysfs_streq(buf, "idle"))
688644 mode = IDLE_WRITEBACK;
689
- else if (!strcmp(mode_buf, "huge"))
645
+ else if (sysfs_streq(buf, "huge"))
690646 mode = HUGE_WRITEBACK;
647
+ else {
648
+ if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1))
649
+ return -EINVAL;
691650
692
- if (mode == -1)
693
- return -EINVAL;
651
+ if (kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index) ||
652
+ index >= nr_pages)
653
+ return -EINVAL;
654
+
655
+ nr_pages = 1;
656
+ mode = PAGE_WRITEBACK;
657
+ }
694658
695659 down_read(&zram->init_lock);
696660 if (!init_done(zram)) {
....@@ -709,7 +673,7 @@
709673 goto release_init_lock;
710674 }
711675
712
- for (index = 0; index < nr_pages; index++) {
676
+ for (; nr_pages != 0; index++, nr_pages--) {
713677 struct bio_vec bvec;
714678
715679 bvec.bv_page = page;
....@@ -774,12 +738,17 @@
774738 * XXX: A single page IO would be inefficient for write
775739 * but it would be not bad as starter.
776740 */
777
- ret = submit_bio_wait(&bio);
778
- if (ret) {
741
+ err = submit_bio_wait(&bio);
742
+ if (err) {
779743 zram_slot_lock(zram, index);
780744 zram_clear_flag(zram, index, ZRAM_UNDER_WB);
781745 zram_clear_flag(zram, index, ZRAM_IDLE);
782746 zram_slot_unlock(zram, index);
747
+ /*
748
+ * Return last IO error unless every IO were
749
+ * not suceeded.
750
+ */
751
+ ret = err;
783752 continue;
784753 }
785754
....@@ -817,7 +786,6 @@
817786
818787 if (blk_idx)
819788 free_block_bdev(zram, blk_idx);
820
- ret = len;
821789 __free_page(page);
822790 release_init_lock:
823791 up_read(&zram->init_lock);
....@@ -845,9 +813,9 @@
845813 }
846814
847815 /*
848
- * Block layer want one ->make_request_fn to be active at a time
849
- * so if we use chained IO with parent IO in same context,
850
- * it's a deadlock. To avoid, it, it uses worker thread context.
816
+ * Block layer want one ->submit_bio to be active at a time, so if we use
817
+ * chained IO with parent IO in same context, it's a deadlock. To avoid that,
818
+ * use a worker thread context.
851819 */
852820 static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
853821 unsigned long entry, struct bio *bio)
....@@ -1180,8 +1148,6 @@
11801148 #endif
11811149 static DEVICE_ATTR_RO(debug_stat);
11821150
1183
-
1184
-
11851151 static void zram_meta_free(struct zram *zram, u64 disksize)
11861152 {
11871153 size_t num_pages = disksize >> PAGE_SHIFT;
....@@ -1212,7 +1178,6 @@
12121178
12131179 if (!huge_class_size)
12141180 huge_class_size = zs_huge_class_size(zram->mem_pool);
1215
- zram_meta_init_table_locks(zram, num_pages);
12161181 return true;
12171182 }
12181183
....@@ -1271,11 +1236,11 @@
12711236 static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
12721237 struct bio *bio, bool partial_io)
12731238 {
1274
- int ret;
1239
+ struct zcomp_strm *zstrm;
12751240 unsigned long handle;
12761241 unsigned int size;
12771242 void *src, *dst;
1278
- struct zcomp_strm *zstrm;
1243
+ int ret;
12791244
12801245 zram_slot_lock(zram, index);
12811246 if (zram_test_flag(zram, index, ZRAM_WB)) {
....@@ -1306,7 +1271,9 @@
13061271
13071272 size = zram_get_obj_size(zram, index);
13081273
1309
- zstrm = zcomp_stream_get(zram->comp);
1274
+ if (size != PAGE_SIZE)
1275
+ zstrm = zcomp_stream_get(zram->comp);
1276
+
13101277 src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
13111278 if (size == PAGE_SIZE) {
13121279 dst = kmap_atomic(page);
....@@ -1314,17 +1281,16 @@
13141281 kunmap_atomic(dst);
13151282 ret = 0;
13161283 } else {
1317
-
13181284 dst = kmap_atomic(page);
13191285 ret = zcomp_decompress(zstrm, src, size, dst);
13201286 kunmap_atomic(dst);
1287
+ zcomp_stream_put(zram->comp);
13211288 }
13221289 zs_unmap_object(zram->mem_pool, handle);
1323
- zcomp_stream_put(zram->comp);
13241290 zram_slot_unlock(zram, index);
13251291
13261292 /* Should NEVER happen. Return bio error if it does. */
1327
- if (unlikely(ret))
1293
+ if (WARN_ON(ret))
13281294 pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
13291295
13301296 return ret;
....@@ -1419,13 +1385,14 @@
14191385 __GFP_KSWAPD_RECLAIM |
14201386 __GFP_NOWARN |
14211387 __GFP_HIGHMEM |
1422
- __GFP_MOVABLE);
1388
+ __GFP_MOVABLE |
1389
+ __GFP_CMA);
14231390 if (!handle) {
14241391 zcomp_stream_put(zram->comp);
14251392 atomic64_inc(&zram->stats.writestall);
14261393 handle = zs_malloc(zram->mem_pool, comp_len,
14271394 GFP_NOIO | __GFP_HIGHMEM |
1428
- __GFP_MOVABLE);
1395
+ __GFP_MOVABLE | __GFP_CMA);
14291396 if (handle)
14301397 goto compress_again;
14311398 return -ENOMEM;
....@@ -1566,12 +1533,7 @@
15661533 static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
15671534 int offset, unsigned int op, struct bio *bio)
15681535 {
1569
- unsigned long start_time = jiffies;
1570
- struct request_queue *q = zram->disk->queue;
15711536 int ret;
1572
-
1573
- generic_start_io_acct(q, op, bvec->bv_len >> SECTOR_SHIFT,
1574
- &zram->disk->part0);
15751537
15761538 if (!op_is_write(op)) {
15771539 atomic64_inc(&zram->stats.num_reads);
....@@ -1581,8 +1543,6 @@
15811543 atomic64_inc(&zram->stats.num_writes);
15821544 ret = zram_bvec_write(zram, bvec, index, offset, bio);
15831545 }
1584
-
1585
- generic_end_io_acct(q, op, &zram->disk->part0, start_time);
15861546
15871547 zram_slot_lock(zram, index);
15881548 zram_accessed(zram, index);
....@@ -1604,6 +1564,7 @@
16041564 u32 index;
16051565 struct bio_vec bvec;
16061566 struct bvec_iter iter;
1567
+ unsigned long start_time;
16071568
16081569 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
16091570 offset = (bio->bi_iter.bi_sector &
....@@ -1619,6 +1580,7 @@
16191580 break;
16201581 }
16211582
1583
+ start_time = bio_start_io_acct(bio);
16221584 bio_for_each_segment(bvec, bio, iter) {
16231585 struct bio_vec bv = bvec;
16241586 unsigned int unwritten = bvec.bv_len;
....@@ -1627,8 +1589,10 @@
16271589 bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset,
16281590 unwritten);
16291591 if (zram_bvec_rw(zram, &bv, index, offset,
1630
- bio_op(bio), bio) < 0)
1631
- goto out;
1592
+ bio_op(bio), bio) < 0) {
1593
+ bio->bi_status = BLK_STS_IOERR;
1594
+ break;
1595
+ }
16321596
16331597 bv.bv_offset += bv.bv_len;
16341598 unwritten -= bv.bv_len;
....@@ -1636,20 +1600,16 @@
16361600 update_position(&index, &offset, &bv);
16371601 } while (unwritten);
16381602 }
1639
-
1603
+ bio_end_io_acct(bio, start_time);
16401604 bio_endio(bio);
1641
- return;
1642
-
1643
-out:
1644
- bio_io_error(bio);
16451605 }
16461606
16471607 /*
16481608 * Handler function for all zram I/O requests.
16491609 */
1650
-static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio)
1610
+static blk_qc_t zram_submit_bio(struct bio *bio)
16511611 {
1652
- struct zram *zram = queue->queuedata;
1612
+ struct zram *zram = bio->bi_disk->private_data;
16531613
16541614 if (!valid_io_request(zram, bio->bi_iter.bi_sector,
16551615 bio->bi_iter.bi_size)) {
....@@ -1689,6 +1649,7 @@
16891649 u32 index;
16901650 struct zram *zram;
16911651 struct bio_vec bv;
1652
+ unsigned long start_time;
16921653
16931654 if (PageTransHuge(page))
16941655 return -ENOTSUPP;
....@@ -1707,7 +1668,9 @@
17071668 bv.bv_len = PAGE_SIZE;
17081669 bv.bv_offset = 0;
17091670
1671
+ start_time = disk_start_io_acct(bdev->bd_disk, SECTORS_PER_PAGE, op);
17101672 ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL);
1673
+ disk_end_io_acct(bdev->bd_disk, op, start_time);
17111674 out:
17121675 /*
17131676 * If I/O fails, just return error(ie, non-zero) without
....@@ -1799,7 +1762,7 @@
17991762 zram->disksize = disksize;
18001763 set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
18011764
1802
- revalidate_disk(zram->disk);
1765
+ revalidate_disk_size(zram->disk, true);
18031766 up_write(&zram->init_lock);
18041767
18051768 return len;
....@@ -1846,7 +1809,7 @@
18461809 /* Make sure all the pending I/O are finished */
18471810 fsync_bdev(bdev);
18481811 zram_reset_device(zram);
1849
- revalidate_disk(zram->disk);
1812
+ revalidate_disk_size(zram->disk, true);
18501813 bdput(bdev);
18511814
18521815 mutex_lock(&bdev->bd_mutex);
....@@ -1873,8 +1836,16 @@
18731836
18741837 static const struct block_device_operations zram_devops = {
18751838 .open = zram_open,
1839
+ .submit_bio = zram_submit_bio,
18761840 .swap_slot_free_notify = zram_slot_free_notify,
18771841 .rw_page = zram_rw_page,
1842
+ .owner = THIS_MODULE
1843
+};
1844
+
1845
+static const struct block_device_operations zram_wb_devops = {
1846
+ .open = zram_open,
1847
+ .submit_bio = zram_submit_bio,
1848
+ .swap_slot_free_notify = zram_slot_free_notify,
18781849 .owner = THIS_MODULE
18791850 };
18801851
....@@ -1951,15 +1922,13 @@
19511922 #ifdef CONFIG_ZRAM_WRITEBACK
19521923 spin_lock_init(&zram->wb_limit_lock);
19531924 #endif
1954
- queue = blk_alloc_queue(GFP_KERNEL);
1925
+ queue = blk_alloc_queue(NUMA_NO_NODE);
19551926 if (!queue) {
19561927 pr_err("Error allocating disk queue for device %d\n",
19571928 device_id);
19581929 ret = -ENOMEM;
19591930 goto out_free_idr;
19601931 }
1961
-
1962
- blk_queue_make_request(queue, zram_make_request);
19631932
19641933 /* gendisk structure */
19651934 zram->disk = alloc_disk(1);
....@@ -1974,7 +1943,6 @@
19741943 zram->disk->first_minor = device_id;
19751944 zram->disk->fops = &zram_devops;
19761945 zram->disk->queue = queue;
1977
- zram->disk->queue->queuedata = zram;
19781946 zram->disk->private_data = zram;
19791947 snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
19801948
....@@ -2008,10 +1976,8 @@
20081976 if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
20091977 blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
20101978
2011
- zram->disk->queue->backing_dev_info->capabilities |=
2012
- (BDI_CAP_STABLE_WRITES | BDI_CAP_SYNCHRONOUS_IO);
2013
- disk_to_dev(zram->disk)->groups = zram_disk_attr_groups;
2014
- add_disk(zram->disk);
1979
+ blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue);
1980
+ device_add_disk(NULL, zram->disk, zram_disk_attr_groups);
20151981
20161982 strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
20171983
....@@ -2047,6 +2013,7 @@
20472013 mutex_unlock(&bdev->bd_mutex);
20482014
20492015 zram_debugfs_unregister(zram);
2016
+
20502017 /* Make sure all the pending I/O are finished */
20512018 fsync_bdev(bdev);
20522019 zram_reset_device(zram);