From 102a0743326a03cd1a1202ceda21e175b7d3575c Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Tue, 20 Feb 2024 01:20:52 +0000 Subject: [PATCH] add new system file --- kernel/mm/readahead.c | 383 ++++++++++++++++++++++++++++++++---------------------- 1 files changed, 224 insertions(+), 159 deletions(-) diff --git a/kernel/mm/readahead.c b/kernel/mm/readahead.c index 4e63014..7bb2c3e 100644 --- a/kernel/mm/readahead.c +++ b/kernel/mm/readahead.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * mm/readahead.c - address_space-level file readahead. * @@ -21,8 +22,14 @@ #include <linux/mm_inline.h> #include <linux/blk-cgroup.h> #include <linux/fadvise.h> +#include <linux/sched/mm.h> +#include <trace/hooks/mm.h> #include "internal.h" + +#if defined(CONFIG_ARCH_ROCKCHIP) && defined(CONFIG_NO_GKI) +#include <linux/fscrypt.h> +#endif /* * Initialise a struct file's readahead state. Assumes that the caller has @@ -81,6 +88,8 @@ * @data: private data for the callback routine. * * Hides the details of the LRU cache etc from the filesystems. + * + * Returns: %0 on success, error return by @filler otherwise */ int read_cache_pages(struct address_space *mapping, struct list_head *pages, int (*filler)(void *, struct page *), void *data) @@ -110,96 +119,129 @@ EXPORT_SYMBOL(read_cache_pages); -static int read_pages(struct address_space *mapping, struct file *filp, - struct list_head *pages, unsigned int nr_pages, gfp_t gfp) +gfp_t readahead_gfp_mask(struct address_space *x) { + gfp_t mask = mapping_gfp_mask(x) | __GFP_NORETRY | __GFP_NOWARN; + + trace_android_rvh_set_readahead_gfp_mask(&mask); + return mask; +} +EXPORT_SYMBOL_GPL(readahead_gfp_mask); + +static void read_pages(struct readahead_control *rac, struct list_head *pages, + bool skip_page) +{ + const struct address_space_operations *aops = rac->mapping->a_ops; + struct page *page; struct blk_plug plug; - unsigned page_idx; - int ret; + + if (!readahead_count(rac)) + goto out; blk_start_plug(&plug); - if (mapping->a_ops->readpages) { - ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages); + if (aops->readahead) { + aops->readahead(rac); + /* Clean up the remaining pages */ + while ((page = readahead_page(rac))) { + unlock_page(page); + put_page(page); + } + } else if (aops->readpages) { + aops->readpages(rac->file, rac->mapping, pages, + readahead_count(rac)); /* Clean up the remaining pages */ put_pages_list(pages); - goto out; + rac->_index += rac->_nr_pages; + rac->_nr_pages = 0; + } else { + while ((page = readahead_page(rac))) { + aops->readpage(rac->file, page); + put_page(page); + } } - for (page_idx = 0; page_idx < nr_pages; page_idx++) { - struct page *page = lru_to_page(pages); - list_del(&page->lru); - if (!add_to_page_cache_lru(page, mapping, page->index, gfp)) - mapping->a_ops->readpage(filp, page); - put_page(page); - } - ret = 0; - -out: blk_finish_plug(&plug); - return ret; + BUG_ON(!list_empty(pages)); + BUG_ON(readahead_count(rac)); + +out: + if (skip_page) + rac->_index++; } -/* - * __do_page_cache_readahead() actually reads a chunk of disk. It allocates - * the pages first, then submits them for I/O. This avoids the very bad - * behaviour which would occur if page allocations are causing VM writeback. - * We really don't want to intermingle reads and writes like that. +/** + * page_cache_ra_unbounded - Start unchecked readahead. + * @ractl: Readahead control. + * @nr_to_read: The number of pages to read. + * @lookahead_size: Where to start the next readahead. * - * Returns the number of pages requested, or the maximum amount of I/O allowed. + * This function is for filesystems to call when they want to start + * readahead beyond a file's stated i_size. This is almost certainly + * not the function you want to call. Use page_cache_async_readahead() + * or page_cache_sync_readahead() instead. + * + * Context: File is referenced by caller. Mutexes may be held by caller. + * May sleep, but will not reenter filesystem to reclaim memory. */ -unsigned int __do_page_cache_readahead(struct address_space *mapping, - struct file *filp, pgoff_t offset, unsigned long nr_to_read, - unsigned long lookahead_size) +void page_cache_ra_unbounded(struct readahead_control *ractl, + unsigned long nr_to_read, unsigned long lookahead_size) { - struct inode *inode = mapping->host; - struct page *page; - unsigned long end_index; /* The last page we want to read */ + struct address_space *mapping = ractl->mapping; + unsigned long index = readahead_index(ractl); LIST_HEAD(page_pool); - int page_idx; - unsigned int nr_pages = 0; - loff_t isize = i_size_read(inode); gfp_t gfp_mask = readahead_gfp_mask(mapping); + unsigned long i; - if (isize == 0) - goto out; - - end_index = ((isize - 1) >> PAGE_SHIFT); + /* + * Partway through the readahead operation, we will have added + * locked pages to the page cache, but will not yet have submitted + * them for I/O. Adding another page may need to allocate memory, + * which can trigger memory reclaim. Telling the VM we're in + * the middle of a filesystem operation will cause it to not + * touch file-backed pages, preventing a deadlock. Most (all?) + * filesystems already specify __GFP_NOFS in their mapping's + * gfp_mask, but let's be explicit here. + */ + unsigned int nofs = memalloc_nofs_save(); /* * Preallocate as many pages as we will need. */ - for (page_idx = 0; page_idx < nr_to_read; page_idx++) { - pgoff_t page_offset = offset + page_idx; + for (i = 0; i < nr_to_read; i++) { + struct page *page = xa_load(&mapping->i_pages, index + i); - if (page_offset > end_index) - break; + BUG_ON(index + i != ractl->_index + ractl->_nr_pages); - rcu_read_lock(); - page = radix_tree_lookup(&mapping->i_pages, page_offset); - rcu_read_unlock(); - if (page && !radix_tree_exceptional_entry(page)) { + if (page && !xa_is_value(page)) { /* - * Page already present? Kick off the current batch of - * contiguous pages before continuing with the next - * batch. + * Page already present? Kick off the current batch + * of contiguous pages before continuing with the + * next batch. This page may be the one we would + * have intended to mark as Readahead, but we don't + * have a stable reference to this page, and it's + * not worth getting one just for that. */ - if (nr_pages) - read_pages(mapping, filp, &page_pool, nr_pages, - gfp_mask); - nr_pages = 0; + read_pages(ractl, &page_pool, true); continue; } page = __page_cache_alloc(gfp_mask); if (!page) break; - page->index = page_offset; - list_add(&page->lru, &page_pool); - if (page_idx == nr_to_read - lookahead_size) + if (mapping->a_ops->readpages) { + page->index = index + i; + list_add(&page->lru, &page_pool); + } else if (add_to_page_cache_lru(page, mapping, index + i, + gfp_mask) < 0) { + put_page(page); + read_pages(ractl, &page_pool, true); + continue; + } + if (i == nr_to_read - lookahead_size) SetPageReadahead(page); - nr_pages++; + ractl->_nr_pages++; } /* @@ -207,44 +249,88 @@ * uptodate then the caller will launch readpage again, and * will then handle the error. */ - if (nr_pages) - read_pages(mapping, filp, &page_pool, nr_pages, gfp_mask); - BUG_ON(!list_empty(&page_pool)); -out: - return nr_pages; + read_pages(ractl, &page_pool, false); + memalloc_nofs_restore(nofs); +} +EXPORT_SYMBOL_GPL(page_cache_ra_unbounded); + +/* + * do_page_cache_ra() actually reads a chunk of disk. It allocates + * the pages first, then submits them for I/O. This avoids the very bad + * behaviour which would occur if page allocations are causing VM writeback. + * We really don't want to intermingle reads and writes like that. + */ +void do_page_cache_ra(struct readahead_control *ractl, + unsigned long nr_to_read, unsigned long lookahead_size) +{ + struct inode *inode = ractl->mapping->host; + unsigned long index = readahead_index(ractl); + loff_t isize = i_size_read(inode); + pgoff_t end_index; /* The last page we want to read */ + + if (isize == 0) + return; + + end_index = (isize - 1) >> PAGE_SHIFT; + if (index > end_index) + return; + /* Don't read past the page containing the last byte of the file */ + if (nr_to_read > end_index - index) + nr_to_read = end_index - index + 1; + + page_cache_ra_unbounded(ractl, nr_to_read, lookahead_size); } /* * Chunk the readahead into 2 megabyte units, so that we don't pin too much * memory at once. */ -int force_page_cache_readahead(struct address_space *mapping, struct file *filp, - pgoff_t offset, unsigned long nr_to_read) +void force_page_cache_ra(struct readahead_control *ractl, + struct file_ra_state *ra, unsigned long nr_to_read) { + struct address_space *mapping = ractl->mapping; struct backing_dev_info *bdi = inode_to_bdi(mapping->host); - struct file_ra_state *ra = &filp->f_ra; - unsigned long max_pages; + unsigned long max_pages, index; +#if defined(CONFIG_ARCH_ROCKCHIP) && defined(CONFIG_NO_GKI) + bool force_lookahead = false; +#endif - if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages)) - return -EINVAL; + if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages && + !mapping->a_ops->readahead)) + return; /* * If the request exceeds the readahead window, allow the read to * be up to the optimal hardware IO size */ + index = readahead_index(ractl); max_pages = max_t(unsigned long, bdi->io_pages, ra->ra_pages); - nr_to_read = min(nr_to_read, max_pages); +#if defined(CONFIG_ARCH_ROCKCHIP) && defined(CONFIG_NO_GKI) + /* For files with fscrypt enabled, to allow IO and the encryption + * or decryption process to ping-pong, lookahead is forcibly enabled. + */ + if (nr_to_read > max_pages && fscrypt_inode_uses_fs_layer_crypto(mapping->host)) + force_lookahead = true; +#endif + nr_to_read = min_t(unsigned long, nr_to_read, max_pages); while (nr_to_read) { unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_SIZE; if (this_chunk > nr_to_read) this_chunk = nr_to_read; - __do_page_cache_readahead(mapping, filp, offset, this_chunk, 0); + ractl->_index = index; +#if defined(CONFIG_ARCH_ROCKCHIP) && defined(CONFIG_NO_GKI) + if (force_lookahead) + do_page_cache_ra(ractl, this_chunk, this_chunk / 2); + else + do_page_cache_ra(ractl, this_chunk, 0); +#else + do_page_cache_ra(ractl, this_chunk, 0); +#endif - offset += this_chunk; + index += this_chunk; nr_to_read -= this_chunk; } - return 0; } /* @@ -272,17 +358,15 @@ * return it as the new window size. */ static unsigned long get_next_ra_size(struct file_ra_state *ra, - unsigned long max) + unsigned long max) { unsigned long cur = ra->size; - unsigned long newsize; if (cur < max / 16) - newsize = 4 * cur; - else - newsize = 2 * cur; - - return min(newsize, max); + return 4 * cur; + if (cur <= max / 2) + return 2 * cur; + return max; } /* @@ -325,21 +409,21 @@ */ /* - * Count contiguously cached pages from @offset-1 to @offset-@max, + * Count contiguously cached pages from @index-1 to @index-@max, * this count is a conservative estimation of * - length of the sequential read sequence, or * - thrashing threshold in memory tight systems */ static pgoff_t count_history_pages(struct address_space *mapping, - pgoff_t offset, unsigned long max) + pgoff_t index, unsigned long max) { pgoff_t head; rcu_read_lock(); - head = page_cache_prev_hole(mapping, offset - 1, max); + head = page_cache_prev_miss(mapping, index - 1, max); rcu_read_unlock(); - return offset - 1 - head; + return index - 1 - head; } /* @@ -347,13 +431,13 @@ */ static int try_context_readahead(struct address_space *mapping, struct file_ra_state *ra, - pgoff_t offset, + pgoff_t index, unsigned long req_size, unsigned long max) { pgoff_t size; - size = count_history_pages(mapping, offset, max); + size = count_history_pages(mapping, index, max); /* * not enough history pages: @@ -366,10 +450,10 @@ * starts from beginning of file: * it is a strong indication of long-run stream (or whole-file-read) */ - if (size >= offset) + if (size >= index) size *= 2; - ra->start = offset; + ra->start = index; ra->size = min(size + req_size, max); ra->async_size = 1; @@ -379,16 +463,15 @@ /* * A minimal readahead algorithm for trivial sequential/random reads. */ -static unsigned long -ondemand_readahead(struct address_space *mapping, - struct file_ra_state *ra, struct file *filp, - bool hit_readahead_marker, pgoff_t offset, - unsigned long req_size) +static void ondemand_readahead(struct readahead_control *ractl, + struct file_ra_state *ra, bool hit_readahead_marker, + unsigned long req_size) { - struct backing_dev_info *bdi = inode_to_bdi(mapping->host); + struct backing_dev_info *bdi = inode_to_bdi(ractl->mapping->host); unsigned long max_pages = ra->ra_pages; unsigned long add_pages; - pgoff_t prev_offset; + unsigned long index = readahead_index(ractl); + pgoff_t prev_index; /* * If the request exceeds the readahead window, allow the read to @@ -397,18 +480,20 @@ if (req_size > max_pages && bdi->io_pages > max_pages) max_pages = min(req_size, bdi->io_pages); + trace_android_vh_ra_tuning_max_page(ractl, &max_pages); + /* * start of file */ - if (!offset) + if (!index) goto initial_readahead; /* - * It's the expected callback offset, assume sequential access. + * It's the expected callback index, assume sequential access. * Ramp up sizes, and push forward the readahead window. */ - if ((offset == (ra->start + ra->size - ra->async_size) || - offset == (ra->start + ra->size))) { + if ((index == (ra->start + ra->size - ra->async_size) || + index == (ra->start + ra->size))) { ra->start += ra->size; ra->size = get_next_ra_size(ra, max_pages); ra->async_size = ra->size; @@ -425,14 +510,15 @@ pgoff_t start; rcu_read_lock(); - start = page_cache_next_hole(mapping, offset + 1, max_pages); + start = page_cache_next_miss(ractl->mapping, index + 1, + max_pages); rcu_read_unlock(); - if (!start || start - offset > max_pages) - return 0; + if (!start || start - index > max_pages) + return; ra->start = start; - ra->size = start - offset; /* old async_size */ + ra->size = start - index; /* old async_size */ ra->size += req_size; ra->size = get_next_ra_size(ra, max_pages); ra->async_size = ra->size; @@ -447,28 +533,30 @@ /* * sequential cache miss - * trivial case: (offset - prev_offset) == 1 - * unaligned reads: (offset - prev_offset) == 0 + * trivial case: (index - prev_index) == 1 + * unaligned reads: (index - prev_index) == 0 */ - prev_offset = (unsigned long long)ra->prev_pos >> PAGE_SHIFT; - if (offset - prev_offset <= 1UL) + prev_index = (unsigned long long)ra->prev_pos >> PAGE_SHIFT; + if (index - prev_index <= 1UL) goto initial_readahead; /* * Query the page cache and look for the traces(cached history pages) * that a sequential stream would leave behind. */ - if (try_context_readahead(mapping, ra, offset, req_size, max_pages)) + if (try_context_readahead(ractl->mapping, ra, index, req_size, + max_pages)) goto readit; /* * standalone, small random read * Read as is, and do not pollute the readahead state. */ - return __do_page_cache_readahead(mapping, filp, offset, req_size, 0); + do_page_cache_ra(ractl, req_size, 0); + return; initial_readahead: - ra->start = offset; + ra->start = index; ra->size = get_init_ra_size(req_size, max_pages); ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size; @@ -479,7 +567,7 @@ * the resulted next readahead window into the current one. * Take care of maximum IO pages as above. */ - if (offset == ra->start && ra->size == ra->async_size) { + if (index == ra->start && ra->size == ra->async_size) { add_pages = get_next_ra_size(ra, max_pages); if (ra->size + add_pages <= max_pages) { ra->async_size = add_pages; @@ -490,65 +578,42 @@ } } - return ra_submit(ra, mapping, filp); + ractl->_index = ra->start; + do_page_cache_ra(ractl, ra->size, ra->async_size); } -/** - * page_cache_sync_readahead - generic file readahead - * @mapping: address_space which holds the pagecache and I/O vectors - * @ra: file_ra_state which holds the readahead state - * @filp: passed on to ->readpage() and ->readpages() - * @offset: start offset into @mapping, in pagecache page-sized units - * @req_size: hint: total size of the read which the caller is performing in - * pagecache pages - * - * page_cache_sync_readahead() should be called when a cache miss happened: - * it will submit the read. The readahead logic may decide to piggyback more - * pages onto the read request if access patterns suggest it will improve - * performance. - */ -void page_cache_sync_readahead(struct address_space *mapping, - struct file_ra_state *ra, struct file *filp, - pgoff_t offset, unsigned long req_size) +void page_cache_sync_ra(struct readahead_control *ractl, + struct file_ra_state *ra, unsigned long req_count) { - /* no read-ahead */ - if (!ra->ra_pages) - return; + bool do_forced_ra = ractl->file && (ractl->file->f_mode & FMODE_RANDOM); - if (blk_cgroup_congested()) - return; + /* + * Even if read-ahead is disabled, issue this request as read-ahead + * as we'll need it to satisfy the requested range. The forced + * read-ahead will do the right thing and limit the read to just the + * requested range, which we'll set to 1 page for this case. + */ + if (!ra->ra_pages || blk_cgroup_congested()) { + if (!ractl->file) + return; + req_count = 1; + do_forced_ra = true; + } /* be dumb */ - if (filp && (filp->f_mode & FMODE_RANDOM)) { - force_page_cache_readahead(mapping, filp, offset, req_size); + if (do_forced_ra) { + force_page_cache_ra(ractl, ra, req_count); return; } /* do read-ahead */ - ondemand_readahead(mapping, ra, filp, false, offset, req_size); + ondemand_readahead(ractl, ra, false, req_count); } -EXPORT_SYMBOL_GPL(page_cache_sync_readahead); +EXPORT_SYMBOL_GPL(page_cache_sync_ra); -/** - * page_cache_async_readahead - file readahead for marked pages - * @mapping: address_space which holds the pagecache and I/O vectors - * @ra: file_ra_state which holds the readahead state - * @filp: passed on to ->readpage() and ->readpages() - * @page: the page at @offset which has the PG_readahead flag set - * @offset: start offset into @mapping, in pagecache page-sized units - * @req_size: hint: total size of the read which the caller is performing in - * pagecache pages - * - * page_cache_async_readahead() should be called when a page is used which - * has the PG_readahead flag; this is a marker to suggest that the application - * has used up enough of the readahead window that we should start pulling in - * more pages. - */ -void -page_cache_async_readahead(struct address_space *mapping, - struct file_ra_state *ra, struct file *filp, - struct page *page, pgoff_t offset, - unsigned long req_size) +void page_cache_async_ra(struct readahead_control *ractl, + struct file_ra_state *ra, struct page *page, + unsigned long req_count) { /* no read-ahead */ if (!ra->ra_pages) @@ -565,16 +630,16 @@ /* * Defer asynchronous read-ahead on IO congestion. */ - if (inode_read_congested(mapping->host)) + if (inode_read_congested(ractl->mapping->host)) return; if (blk_cgroup_congested()) return; /* do read-ahead */ - ondemand_readahead(mapping, ra, filp, true, offset, req_size); + ondemand_readahead(ractl, ra, true, req_count); } -EXPORT_SYMBOL_GPL(page_cache_async_readahead); +EXPORT_SYMBOL_GPL(page_cache_async_ra); ssize_t ksys_readahead(int fd, loff_t offset, size_t count) { -- Gitblit v1.6.2