hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/drivers/block/zram/zram_drv.c
....@@ -33,6 +33,7 @@
3333 #include <linux/sysfs.h>
3434 #include <linux/debugfs.h>
3535 #include <linux/cpuhotplug.h>
36
+#include <linux/part_stat.h>
3637
3738 #include "zram_drv.h"
3839
....@@ -41,7 +42,7 @@
4142 static DEFINE_MUTEX(zram_index_mutex);
4243
4344 static int zram_major;
44
-static const char *default_compressor = "lzo";
45
+static const char *default_compressor = "lzo-rle";
4546
4647 /* Module params (documentation at end) */
4748 static unsigned int num_devices = 1;
....@@ -50,6 +51,9 @@
5051 * uncompressed in memory.
5152 */
5253 static size_t huge_class_size;
54
+
55
+static const struct block_device_operations zram_devops;
56
+static const struct block_device_operations zram_wb_devops;
5357
5458 static void zram_free_page(struct zram *zram, size_t index);
5559 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
....@@ -207,14 +211,17 @@
207211
208212 static bool page_same_filled(void *ptr, unsigned long *element)
209213 {
210
- unsigned int pos;
211214 unsigned long *page;
212215 unsigned long val;
216
+ unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1;
213217
214218 page = (unsigned long *)ptr;
215219 val = page[0];
216220
217
- for (pos = 1; pos < PAGE_SIZE / sizeof(*page); pos++) {
221
+ if (val != page[last_pos])
222
+ return false;
223
+
224
+ for (pos = 1; pos < last_pos; pos++) {
218225 if (val != page[pos])
219226 return false;
220227 }
....@@ -290,18 +297,8 @@
290297 struct zram *zram = dev_to_zram(dev);
291298 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
292299 int index;
293
- char mode_buf[8];
294
- ssize_t sz;
295300
296
- sz = strscpy(mode_buf, buf, sizeof(mode_buf));
297
- if (sz <= 0)
298
- return -EINVAL;
299
-
300
- /* ignore trailing new line */
301
- if (mode_buf[sz - 1] == '\n')
302
- mode_buf[sz - 1] = 0x00;
303
-
304
- if (strcmp(mode_buf, "all"))
301
+ if (!sysfs_streq(buf, "all"))
305302 return -EINVAL;
306303
307304 down_read(&zram->init_lock);
....@@ -414,8 +411,7 @@
414411 zram->backing_dev = NULL;
415412 zram->old_block_size = 0;
416413 zram->bdev = NULL;
417
- zram->disk->queue->backing_dev_info->capabilities |=
418
- BDI_CAP_SYNCHRONOUS_IO;
414
+ zram->disk->fops = &zram_devops;
419415 kvfree(zram->bitmap);
420416 zram->bitmap = NULL;
421417 }
....@@ -481,7 +477,7 @@
481477 if (sz > 0 && file_name[sz - 1] == '\n')
482478 file_name[sz - 1] = 0x00;
483479
484
- backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0);
480
+ backing_dev = filp_open_block(file_name, O_RDWR|O_LARGEFILE, 0);
485481 if (IS_ERR(backing_dev)) {
486482 err = PTR_ERR(backing_dev);
487483 backing_dev = NULL;
....@@ -497,9 +493,10 @@
497493 goto out;
498494 }
499495
500
- bdev = bdgrab(I_BDEV(inode));
501
- err = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram);
502
- if (err < 0) {
496
+ bdev = blkdev_get_by_dev(inode->i_rdev,
497
+ FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram);
498
+ if (IS_ERR(bdev)) {
499
+ err = PTR_ERR(bdev);
503500 bdev = NULL;
504501 goto out;
505502 }
....@@ -534,8 +531,7 @@
534531 * freely but in fact, IO is going on so finally could cause
535532 * use-after-free when the IO is really done.
536533 */
537
- zram->disk->queue->backing_dev_info->capabilities &=
538
- ~BDI_CAP_SYNCHRONOUS_IO;
534
+ zram->disk->fops = &zram_wb_devops;
539535 up_write(&zram->init_lock);
540536
541537 pr_info("setup backing device %s\n", file_name);
....@@ -624,38 +620,41 @@
624620 return 1;
625621 }
626622
623
+#define PAGE_WB_SIG "page_index="
624
+
625
+#define PAGE_WRITEBACK 0
627626 #define HUGE_WRITEBACK 1
628627 #define IDLE_WRITEBACK 2
628
+
629629
630630 static ssize_t writeback_store(struct device *dev,
631631 struct device_attribute *attr, const char *buf, size_t len)
632632 {
633633 struct zram *zram = dev_to_zram(dev);
634634 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
635
- unsigned long index;
635
+ unsigned long index = 0;
636636 struct bio bio;
637637 struct bio_vec bio_vec;
638638 struct page *page;
639
- ssize_t ret, sz;
640
- char mode_buf[8];
641
- int mode = -1;
639
+ ssize_t ret = len;
640
+ int mode, err;
642641 unsigned long blk_idx = 0;
643642
644
- sz = strscpy(mode_buf, buf, sizeof(mode_buf));
645
- if (sz <= 0)
646
- return -EINVAL;
647
-
648
- /* ignore trailing newline */
649
- if (mode_buf[sz - 1] == '\n')
650
- mode_buf[sz - 1] = 0x00;
651
-
652
- if (!strcmp(mode_buf, "idle"))
643
+ if (sysfs_streq(buf, "idle"))
653644 mode = IDLE_WRITEBACK;
654
- else if (!strcmp(mode_buf, "huge"))
645
+ else if (sysfs_streq(buf, "huge"))
655646 mode = HUGE_WRITEBACK;
647
+ else {
648
+ if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1))
649
+ return -EINVAL;
656650
657
- if (mode == -1)
658
- return -EINVAL;
651
+ if (kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index) ||
652
+ index >= nr_pages)
653
+ return -EINVAL;
654
+
655
+ nr_pages = 1;
656
+ mode = PAGE_WRITEBACK;
657
+ }
659658
660659 down_read(&zram->init_lock);
661660 if (!init_done(zram)) {
....@@ -674,7 +673,7 @@
674673 goto release_init_lock;
675674 }
676675
677
- for (index = 0; index < nr_pages; index++) {
676
+ for (; nr_pages != 0; index++, nr_pages--) {
678677 struct bio_vec bvec;
679678
680679 bvec.bv_page = page;
....@@ -739,12 +738,17 @@
739738 * XXX: A single page IO would be inefficient for write
740739 * but it would be not bad as starter.
741740 */
742
- ret = submit_bio_wait(&bio);
743
- if (ret) {
741
+ err = submit_bio_wait(&bio);
742
+ if (err) {
744743 zram_slot_lock(zram, index);
745744 zram_clear_flag(zram, index, ZRAM_UNDER_WB);
746745 zram_clear_flag(zram, index, ZRAM_IDLE);
747746 zram_slot_unlock(zram, index);
747
+ /*
748
+ * Return last IO error unless every IO were
749
+ * not suceeded.
750
+ */
751
+ ret = err;
748752 continue;
749753 }
750754
....@@ -782,7 +786,6 @@
782786
783787 if (blk_idx)
784788 free_block_bdev(zram, blk_idx);
785
- ret = len;
786789 __free_page(page);
787790 release_init_lock:
788791 up_read(&zram->init_lock);
....@@ -810,9 +813,9 @@
810813 }
811814
812815 /*
813
- * Block layer want one ->make_request_fn to be active at a time
814
- * so if we use chained IO with parent IO in same context,
815
- * it's a deadlock. To avoid, it, it uses worker thread context.
816
+ * Block layer want one ->submit_bio to be active at a time, so if we use
817
+ * chained IO with parent IO in same context, it's a deadlock. To avoid that,
818
+ * use a worker thread context.
816819 */
817820 static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
818821 unsigned long entry, struct bio *bio)
....@@ -1233,10 +1236,11 @@
12331236 static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
12341237 struct bio *bio, bool partial_io)
12351238 {
1236
- int ret;
1239
+ struct zcomp_strm *zstrm;
12371240 unsigned long handle;
12381241 unsigned int size;
12391242 void *src, *dst;
1243
+ int ret;
12401244
12411245 zram_slot_lock(zram, index);
12421246 if (zram_test_flag(zram, index, ZRAM_WB)) {
....@@ -1267,6 +1271,9 @@
12671271
12681272 size = zram_get_obj_size(zram, index);
12691273
1274
+ if (size != PAGE_SIZE)
1275
+ zstrm = zcomp_stream_get(zram->comp);
1276
+
12701277 src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
12711278 if (size == PAGE_SIZE) {
12721279 dst = kmap_atomic(page);
....@@ -1274,8 +1281,6 @@
12741281 kunmap_atomic(dst);
12751282 ret = 0;
12761283 } else {
1277
- struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);
1278
-
12791284 dst = kmap_atomic(page);
12801285 ret = zcomp_decompress(zstrm, src, size, dst);
12811286 kunmap_atomic(dst);
....@@ -1285,7 +1290,7 @@
12851290 zram_slot_unlock(zram, index);
12861291
12871292 /* Should NEVER happen. Return bio error if it does. */
1288
- if (unlikely(ret))
1293
+ if (WARN_ON(ret))
12891294 pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
12901295
12911296 return ret;
....@@ -1380,13 +1385,14 @@
13801385 __GFP_KSWAPD_RECLAIM |
13811386 __GFP_NOWARN |
13821387 __GFP_HIGHMEM |
1383
- __GFP_MOVABLE);
1388
+ __GFP_MOVABLE |
1389
+ __GFP_CMA);
13841390 if (!handle) {
13851391 zcomp_stream_put(zram->comp);
13861392 atomic64_inc(&zram->stats.writestall);
13871393 handle = zs_malloc(zram->mem_pool, comp_len,
13881394 GFP_NOIO | __GFP_HIGHMEM |
1389
- __GFP_MOVABLE);
1395
+ __GFP_MOVABLE | __GFP_CMA);
13901396 if (handle)
13911397 goto compress_again;
13921398 return -ENOMEM;
....@@ -1527,12 +1533,7 @@
15271533 static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
15281534 int offset, unsigned int op, struct bio *bio)
15291535 {
1530
- unsigned long start_time = jiffies;
1531
- struct request_queue *q = zram->disk->queue;
15321536 int ret;
1533
-
1534
- generic_start_io_acct(q, op, bvec->bv_len >> SECTOR_SHIFT,
1535
- &zram->disk->part0);
15361537
15371538 if (!op_is_write(op)) {
15381539 atomic64_inc(&zram->stats.num_reads);
....@@ -1542,8 +1543,6 @@
15421543 atomic64_inc(&zram->stats.num_writes);
15431544 ret = zram_bvec_write(zram, bvec, index, offset, bio);
15441545 }
1545
-
1546
- generic_end_io_acct(q, op, &zram->disk->part0, start_time);
15471546
15481547 zram_slot_lock(zram, index);
15491548 zram_accessed(zram, index);
....@@ -1565,6 +1564,7 @@
15651564 u32 index;
15661565 struct bio_vec bvec;
15671566 struct bvec_iter iter;
1567
+ unsigned long start_time;
15681568
15691569 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
15701570 offset = (bio->bi_iter.bi_sector &
....@@ -1580,6 +1580,7 @@
15801580 break;
15811581 }
15821582
1583
+ start_time = bio_start_io_acct(bio);
15831584 bio_for_each_segment(bvec, bio, iter) {
15841585 struct bio_vec bv = bvec;
15851586 unsigned int unwritten = bvec.bv_len;
....@@ -1588,8 +1589,10 @@
15881589 bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset,
15891590 unwritten);
15901591 if (zram_bvec_rw(zram, &bv, index, offset,
1591
- bio_op(bio), bio) < 0)
1592
- goto out;
1592
+ bio_op(bio), bio) < 0) {
1593
+ bio->bi_status = BLK_STS_IOERR;
1594
+ break;
1595
+ }
15931596
15941597 bv.bv_offset += bv.bv_len;
15951598 unwritten -= bv.bv_len;
....@@ -1597,20 +1600,16 @@
15971600 update_position(&index, &offset, &bv);
15981601 } while (unwritten);
15991602 }
1600
-
1603
+ bio_end_io_acct(bio, start_time);
16011604 bio_endio(bio);
1602
- return;
1603
-
1604
-out:
1605
- bio_io_error(bio);
16061605 }
16071606
16081607 /*
16091608 * Handler function for all zram I/O requests.
16101609 */
1611
-static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio)
1610
+static blk_qc_t zram_submit_bio(struct bio *bio)
16121611 {
1613
- struct zram *zram = queue->queuedata;
1612
+ struct zram *zram = bio->bi_disk->private_data;
16141613
16151614 if (!valid_io_request(zram, bio->bi_iter.bi_sector,
16161615 bio->bi_iter.bi_size)) {
....@@ -1650,6 +1649,7 @@
16501649 u32 index;
16511650 struct zram *zram;
16521651 struct bio_vec bv;
1652
+ unsigned long start_time;
16531653
16541654 if (PageTransHuge(page))
16551655 return -ENOTSUPP;
....@@ -1668,7 +1668,9 @@
16681668 bv.bv_len = PAGE_SIZE;
16691669 bv.bv_offset = 0;
16701670
1671
+ start_time = disk_start_io_acct(bdev->bd_disk, SECTORS_PER_PAGE, op);
16711672 ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL);
1673
+ disk_end_io_acct(bdev->bd_disk, op, start_time);
16721674 out:
16731675 /*
16741676 * If I/O fails, just return error(ie, non-zero) without
....@@ -1760,7 +1762,7 @@
17601762 zram->disksize = disksize;
17611763 set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
17621764
1763
- revalidate_disk(zram->disk);
1765
+ revalidate_disk_size(zram->disk, true);
17641766 up_write(&zram->init_lock);
17651767
17661768 return len;
....@@ -1807,7 +1809,7 @@
18071809 /* Make sure all the pending I/O are finished */
18081810 fsync_bdev(bdev);
18091811 zram_reset_device(zram);
1810
- revalidate_disk(zram->disk);
1812
+ revalidate_disk_size(zram->disk, true);
18111813 bdput(bdev);
18121814
18131815 mutex_lock(&bdev->bd_mutex);
....@@ -1834,8 +1836,16 @@
18341836
18351837 static const struct block_device_operations zram_devops = {
18361838 .open = zram_open,
1839
+ .submit_bio = zram_submit_bio,
18371840 .swap_slot_free_notify = zram_slot_free_notify,
18381841 .rw_page = zram_rw_page,
1842
+ .owner = THIS_MODULE
1843
+};
1844
+
1845
+static const struct block_device_operations zram_wb_devops = {
1846
+ .open = zram_open,
1847
+ .submit_bio = zram_submit_bio,
1848
+ .swap_slot_free_notify = zram_slot_free_notify,
18391849 .owner = THIS_MODULE
18401850 };
18411851
....@@ -1912,15 +1922,13 @@
19121922 #ifdef CONFIG_ZRAM_WRITEBACK
19131923 spin_lock_init(&zram->wb_limit_lock);
19141924 #endif
1915
- queue = blk_alloc_queue(GFP_KERNEL);
1925
+ queue = blk_alloc_queue(NUMA_NO_NODE);
19161926 if (!queue) {
19171927 pr_err("Error allocating disk queue for device %d\n",
19181928 device_id);
19191929 ret = -ENOMEM;
19201930 goto out_free_idr;
19211931 }
1922
-
1923
- blk_queue_make_request(queue, zram_make_request);
19241932
19251933 /* gendisk structure */
19261934 zram->disk = alloc_disk(1);
....@@ -1935,7 +1943,6 @@
19351943 zram->disk->first_minor = device_id;
19361944 zram->disk->fops = &zram_devops;
19371945 zram->disk->queue = queue;
1938
- zram->disk->queue->queuedata = zram;
19391946 zram->disk->private_data = zram;
19401947 snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
19411948
....@@ -1969,10 +1976,8 @@
19691976 if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
19701977 blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
19711978
1972
- zram->disk->queue->backing_dev_info->capabilities |=
1973
- (BDI_CAP_STABLE_WRITES | BDI_CAP_SYNCHRONOUS_IO);
1974
- disk_to_dev(zram->disk)->groups = zram_disk_attr_groups;
1975
- add_disk(zram->disk);
1979
+ blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue);
1980
+ device_add_disk(NULL, zram->disk, zram_disk_attr_groups);
19761981
19771982 strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
19781983
....@@ -2008,6 +2013,7 @@
20082013 mutex_unlock(&bdev->bd_mutex);
20092014
20102015 zram_debugfs_unregister(zram);
2016
+
20112017 /* Make sure all the pending I/O are finished */
20122018 fsync_bdev(bdev);
20132019 zram_reset_device(zram);