From 102a0743326a03cd1a1202ceda21e175b7d3575c Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Tue, 20 Feb 2024 01:20:52 +0000 Subject: [PATCH] add new system file --- kernel/mm/page_io.c | 181 +++++++++++++++++++++------------------------ 1 files changed, 84 insertions(+), 97 deletions(-) diff --git a/kernel/mm/page_io.c b/kernel/mm/page_io.c index 295d9f8..04b3a7e 100644 --- a/kernel/mm/page_io.c +++ b/kernel/mm/page_io.c @@ -25,25 +25,23 @@ #include <linux/psi.h> #include <linux/uio.h> #include <linux/sched/task.h> -#include <asm/pgtable.h> +#include <trace/hooks/mm.h> static struct bio *get_swap_bio(gfp_t gfp_flags, struct page *page, bio_end_io_t end_io) { - int i, nr = hpage_nr_pages(page); struct bio *bio; - bio = bio_alloc(gfp_flags, nr); + bio = bio_alloc(gfp_flags, 1); if (bio) { struct block_device *bdev; bio->bi_iter.bi_sector = map_swap_page(page, &bdev); bio_set_dev(bio, bdev); + bio->bi_iter.bi_sector <<= PAGE_SHIFT - 9; bio->bi_end_io = end_io; - for (i = 0; i < nr; i++) - bio_add_page(bio, page + i, PAGE_SIZE, 0); - VM_BUG_ON(bio->bi_iter.bi_size != PAGE_SIZE * nr); + bio_add_page(bio, page, thp_size(page), 0); } return bio; } @@ -63,62 +61,13 @@ * Also clear PG_reclaim to avoid rotate_reclaimable_page() */ set_page_dirty(page); - pr_alert("Write-error on swap-device (%u:%u:%llu)\n", - MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)), - (unsigned long long)bio->bi_iter.bi_sector); + pr_alert_ratelimited("Write-error on swap-device (%u:%u:%llu)\n", + MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)), + (unsigned long long)bio->bi_iter.bi_sector); ClearPageReclaim(page); } end_page_writeback(page); bio_put(bio); -} - -static void swap_slot_free_notify(struct page *page) -{ - struct swap_info_struct *sis; - struct gendisk *disk; - swp_entry_t entry; - - /* - * There is no guarantee that the page is in swap cache - the software - * suspend code (at least) uses end_swap_bio_read() against a non- - * swapcache page. So we must check PG_swapcache before proceeding with - * this optimization. - */ - if (unlikely(!PageSwapCache(page))) - return; - - sis = page_swap_info(page); - if (!(sis->flags & SWP_BLKDEV)) - return; - - /* - * The swap subsystem performs lazy swap slot freeing, - * expecting that the page will be swapped out again. - * So we can avoid an unnecessary write if the page - * isn't redirtied. - * This is good for real swap storage because we can - * reduce unnecessary I/O and enhance wear-leveling - * if an SSD is used as the as swap device. - * But if in-memory swap device (eg zram) is used, - * this causes a duplicated copy between uncompressed - * data in VM-owned memory and compressed data in - * zram-owned memory. So let's free zram-owned memory - * and make the VM-owned decompressed page *dirty*, - * so the page should be swapped out somewhere again if - * we again wish to reclaim it. - */ - disk = sis->bdev->bd_disk; - entry.val = page_private(page); - if (disk->fops->swap_slot_free_notify && - __swap_count(sis, entry) == 1) { - unsigned long offset; - - offset = swp_offset(entry); - - SetPageDirty(page); - disk->fops->swap_slot_free_notify(sis->bdev, - offset); - } } static void end_swap_bio_read(struct bio *bio) @@ -129,20 +78,21 @@ if (bio->bi_status) { SetPageError(page); ClearPageUptodate(page); - pr_alert("Read-error on swap-device (%u:%u:%llu)\n", - MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)), - (unsigned long long)bio->bi_iter.bi_sector); + pr_alert_ratelimited("Read-error on swap-device (%u:%u:%llu)\n", + MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)), + (unsigned long long)bio->bi_iter.bi_sector); goto out; } SetPageUptodate(page); - swap_slot_free_notify(page); out: unlock_page(page); WRITE_ONCE(bio->bi_private, NULL); bio_put(bio); - wake_up_process(waiter); - put_task_struct(waiter); + if (waiter) { + blk_wake_io_task(waiter); + put_task_struct(waiter); + } } int generic_swapfile_activate(struct swap_info_struct *sis, @@ -165,7 +115,7 @@ blocks_per_page = PAGE_SIZE >> blkbits; /* - * Map all the blocks into the extent list. This code doesn't try + * Map all the blocks into the extent tree. This code doesn't try * to be very smart. */ probe_block = 0; @@ -178,8 +128,9 @@ cond_resched(); - first_block = bmap(inode, probe_block); - if (first_block == 0) + first_block = probe_block; + ret = bmap(inode, &first_block); + if (ret || !first_block) goto bad_bmap; /* @@ -194,9 +145,11 @@ block_in_page++) { sector_t block; - block = bmap(inode, probe_block + block_in_page); - if (block == 0) + block = probe_block + block_in_page; + ret = bmap(inode, &block); + if (ret || !block) goto bad_bmap; + if (block != first_block + block_in_page) { /* Discontiguity */ probe_block++; @@ -251,6 +204,16 @@ unlock_page(page); goto out; } + /* + * Arch code may have to preserve more data than just the page + * contents, e.g. memory tags. + */ + ret = arch_prepare_to_swap(page); + if (ret) { + set_page_dirty(page); + unlock_page(page); + goto out; + } if (frontswap_store(page) == 0) { set_page_writeback(page); unlock_page(page); @@ -268,8 +231,25 @@ if (unlikely(PageTransHuge(page))) count_vm_event(THP_SWPOUT); #endif - count_vm_events(PSWPOUT, hpage_nr_pages(page)); + count_vm_events(PSWPOUT, thp_nr_pages(page)); } + +#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) +static void bio_associate_blkg_from_page(struct bio *bio, struct page *page) +{ + struct cgroup_subsys_state *css; + + if (!page->mem_cgroup) + return; + + rcu_read_lock(); + css = cgroup_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys); + bio_associate_blkg_from_css(bio, css); + rcu_read_unlock(); +} +#else +#define bio_associate_blkg_from_page(bio, page) do { } while (0) +#endif /* CONFIG_MEMCG && CONFIG_BLK_CGROUP */ int __swap_writepage(struct page *page, struct writeback_control *wbc, bio_end_io_t end_write_func) @@ -277,9 +257,10 @@ struct bio *bio; int ret; struct swap_info_struct *sis = page_swap_info(page); + bool skip = false; VM_BUG_ON_PAGE(!PageSwapCache(page), page); - if (sis->flags & SWP_FILE) { + if (data_race(sis->flags & SWP_FS_OPS)) { struct kiocb kiocb; struct file *swap_file = sis->swap_file; struct address_space *mapping = swap_file->f_mapping; @@ -290,7 +271,7 @@ }; struct iov_iter from; - iov_iter_bvec(&from, ITER_BVEC | WRITE, &bv, 1, PAGE_SIZE); + iov_iter_bvec(&from, WRITE, &bv, 1, PAGE_SIZE); init_sync_kiocb(&kiocb, swap_file); kiocb.ki_pos = page_file_offset(page); @@ -298,6 +279,7 @@ unlock_page(page); ret = mapping->a_ops->direct_IO(&kiocb, &from); if (ret == PAGE_SIZE) { + trace_android_vh_count_pswpout(sis); count_vm_event(PSWPOUT); ret = 0; } else { @@ -320,29 +302,30 @@ return ret; } - ret = bdev_write_page(sis->bdev, map_swap_page(page, &sis->bdev), - page, wbc); + ret = bdev_write_page(sis->bdev, swap_page_sector(page), page, wbc); if (!ret) { - count_swpout_vm_event(page); + trace_android_vh_count_swpout_vm_event(sis, page, &skip); + if (!skip) + count_swpout_vm_event(page); return 0; } - ret = 0; bio = get_swap_bio(GFP_NOIO, page, end_write_func); if (bio == NULL) { set_page_dirty(page); unlock_page(page); - ret = -ENOMEM; - goto out; + return -ENOMEM; } bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc); - bio_associate_blkcg_from_page(bio, page); - count_swpout_vm_event(page); + bio_associate_blkg_from_page(bio, page); + trace_android_vh_count_swpout_vm_event(sis, page, &skip); + if (!skip) + count_swpout_vm_event(page); set_page_writeback(page); unlock_page(page); submit_bio(bio); -out: - return ret; + + return 0; } int swap_readpage(struct page *page, bool synchronous) @@ -371,25 +354,25 @@ goto out; } - if (sis->flags & SWP_FILE) { + if (data_race(sis->flags & SWP_FS_OPS)) { struct file *swap_file = sis->swap_file; struct address_space *mapping = swap_file->f_mapping; ret = mapping->a_ops->readpage(swap_file, page); - if (!ret) + if (!ret) { + trace_android_vh_count_pswpin(sis); count_vm_event(PSWPIN); + } goto out; } - ret = bdev_read_page(sis->bdev, map_swap_page(page, &sis->bdev), page); - if (!ret) { - if (trylock_page(page)) { - swap_slot_free_notify(page); - unlock_page(page); + if (sis->flags & SWP_SYNCHRONOUS_IO) { + ret = bdev_read_page(sis->bdev, swap_page_sector(page), page); + if (!ret) { + trace_android_vh_count_pswpin(sis); + count_vm_event(PSWPIN); + goto out; } - - count_vm_event(PSWPIN); - goto out; } ret = 0; @@ -404,9 +387,13 @@ * Keep this task valid during swap readpage because the oom killer may * attempt to access it in the page fault retry time check. */ - get_task_struct(current); - bio->bi_private = current; bio_set_op_attrs(bio, REQ_OP_READ, 0); + if (synchronous) { + bio->bi_opf |= REQ_HIPRI; + get_task_struct(current); + bio->bi_private = current; + } + trace_android_vh_count_pswpin(sis); count_vm_event(PSWPIN); bio_get(bio); qc = submit_bio(bio); @@ -415,8 +402,8 @@ if (!READ_ONCE(bio->bi_private)) break; - if (!blk_poll(disk->queue, qc)) - break; + if (!blk_poll(disk->queue, qc, true)) + blk_io_schedule(); } __set_current_state(TASK_RUNNING); bio_put(bio); @@ -430,7 +417,7 @@ { struct swap_info_struct *sis = page_swap_info(page); - if (sis->flags & SWP_FILE) { + if (data_race(sis->flags & SWP_FS_OPS)) { struct address_space *mapping = sis->swap_file->f_mapping; VM_BUG_ON_PAGE(!PageSwapCache(page), page); -- Gitblit v1.6.2