From 01573e231f18eb2d99162747186f59511f56b64d Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Fri, 08 Dec 2023 10:40:48 +0000 Subject: [PATCH] 移去rt --- kernel/include/linux/pagemap.h | 515 +++++++++++++++++++++++++++++++++++++++++++++++++------- 1 files changed, 448 insertions(+), 67 deletions(-) diff --git a/kernel/include/linux/pagemap.h b/kernel/include/linux/pagemap.h index e6cebce..27d22bb 100644 --- a/kernel/include/linux/pagemap.h +++ b/kernel/include/linux/pagemap.h @@ -15,6 +15,7 @@ #include <linux/bitops.h> #include <linux/hardirq.h> /* for in_interrupt() */ #include <linux/hugetlb_inline.h> +#include <linux/sched/debug.h> struct pagevec; @@ -29,12 +30,13 @@ AS_EXITING = 4, /* final truncate in progress */ /* writeback related tags are not used */ AS_NO_WRITEBACK_TAGS = 5, + AS_THP_SUPPORT = 6, /* THPs supported */ }; /** * mapping_set_error - record a writeback error in the address_space - * @mapping - the mapping in which an error should be set - * @error - the error to set in the mapping + * @mapping: the mapping in which an error should be set + * @error: the error to set in the mapping * * When writeback fails in some way, we must record that error so that * userspace can be informed when fsync and the like are called. We endeavor @@ -51,7 +53,11 @@ return; /* Record in wb_err for checkers using errseq_t based tracking */ - filemap_set_wb_err(mapping, error); + __filemap_set_wb_err(mapping, error); + + /* Record it in superblock */ + if (mapping->host) + errseq_set(&mapping->host->i_sb->s_wb_err, error); /* Record it in flags for now, for legacy callers */ if (error == -ENOSPC) @@ -70,11 +76,9 @@ clear_bit(AS_UNEVICTABLE, &mapping->flags); } -static inline int mapping_unevictable(struct address_space *mapping) +static inline bool mapping_unevictable(struct address_space *mapping) { - if (mapping) - return test_bit(AS_UNEVICTABLE, &mapping->flags); - return !!mapping; + return mapping && test_bit(AS_UNEVICTABLE, &mapping->flags); } static inline void mapping_set_exiting(struct address_space *mapping) @@ -116,6 +120,40 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) { m->gfp_mask = mask; +} + +static inline bool mapping_thp_support(struct address_space *mapping) +{ + return test_bit(AS_THP_SUPPORT, &mapping->flags); +} + +static inline int filemap_nr_thps(struct address_space *mapping) +{ +#ifdef CONFIG_READ_ONLY_THP_FOR_FS + return atomic_read(&mapping->nr_thps); +#else + return 0; +#endif +} + +static inline void filemap_nr_thps_inc(struct address_space *mapping) +{ +#ifdef CONFIG_READ_ONLY_THP_FOR_FS + if (!mapping_thp_support(mapping)) + atomic_inc(&mapping->nr_thps); +#else + WARN_ON_ONCE(1); +#endif +} + +static inline void filemap_nr_thps_dec(struct address_space *mapping) +{ +#ifdef CONFIG_READ_ONLY_THP_FOR_FS + if (!mapping_thp_support(mapping)) + atomic_dec(&mapping->nr_thps); +#else + WARN_ON_ONCE(1); +#endif } void release_pages(struct page **pages, int nr); @@ -164,7 +202,7 @@ * will find the page or it will not. Likewise, the old find_get_page could run * either before the insertion or afterwards, depending on timing. */ -static inline int page_cache_get_speculative(struct page *page) +static inline int __page_cache_add_speculative(struct page *page, int count) { #ifdef CONFIG_TINY_RCU # ifdef CONFIG_PREEMPT_COUNT @@ -180,10 +218,10 @@ * SMP requires. */ VM_BUG_ON_PAGE(page_count(page) == 0, page); - page_ref_inc(page); + page_ref_add(page, count); #else - if (unlikely(!get_page_unless_zero(page))) { + if (unlikely(!page_ref_add_unless(page, count, 0))) { /* * Either the page has been freed, or will be freed. * In either case, retry here and the caller should @@ -197,27 +235,51 @@ return 1; } -/* - * Same as above, but add instead of inc (could just be merged) - */ +static inline int page_cache_get_speculative(struct page *page) +{ + return __page_cache_add_speculative(page, 1); +} + static inline int page_cache_add_speculative(struct page *page, int count) { - VM_BUG_ON(in_interrupt()); + return __page_cache_add_speculative(page, count); +} -#if !defined(CONFIG_SMP) && defined(CONFIG_TREE_RCU) -# ifdef CONFIG_PREEMPT_COUNT - VM_BUG_ON(!in_atomic() && !irqs_disabled()); -# endif - VM_BUG_ON_PAGE(page_count(page) == 0, page); - page_ref_add(page, count); +/** + * attach_page_private - Attach private data to a page. + * @page: Page to attach data to. + * @data: Data to attach to page. + * + * Attaching private data to a page increments the page's reference count. + * The data must be detached before the page will be freed. + */ +static inline void attach_page_private(struct page *page, void *data) +{ + get_page(page); + set_page_private(page, (unsigned long)data); + SetPagePrivate(page); +} -#else - if (unlikely(!page_ref_add_unless(page, count, 0))) - return 0; -#endif - VM_BUG_ON_PAGE(PageCompound(page) && page != compound_head(page), page); +/** + * detach_page_private - Detach private data from a page. + * @page: Page to detach data from. + * + * Removes the data that was previously attached to the page and decrements + * the refcount on the page. + * + * Return: Data that was attached to the page. + */ +static inline void *detach_page_private(struct page *page) +{ + void *data = (void *)page_private(page); - return 1; + if (!PagePrivate(page)) + return NULL; + ClearPagePrivate(page); + set_page_private(page, 0); + put_page(page); + + return data; } #ifdef CONFIG_NUMA @@ -234,16 +296,13 @@ return __page_cache_alloc(mapping_gfp_mask(x)); } -static inline gfp_t readahead_gfp_mask(struct address_space *x) -{ - return mapping_gfp_mask(x) | __GFP_NORETRY | __GFP_NOWARN; -} +gfp_t readahead_gfp_mask(struct address_space *x); typedef int filler_t(void *, struct page *); -pgoff_t page_cache_next_hole(struct address_space *mapping, +pgoff_t page_cache_next_miss(struct address_space *mapping, pgoff_t index, unsigned long max_scan); -pgoff_t page_cache_prev_hole(struct address_space *mapping, +pgoff_t page_cache_prev_miss(struct address_space *mapping, pgoff_t index, unsigned long max_scan); #define FGP_ACCESSED 0x00000001 @@ -253,6 +312,7 @@ #define FGP_NOFS 0x00000010 #define FGP_NOWAIT 0x00000020 #define FGP_FOR_MMAP 0x00000040 +#define FGP_HEAD 0x00000080 struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset, int fgp_flags, gfp_t cache_gfp_mask); @@ -282,20 +342,39 @@ /** * find_lock_page - locate, pin and lock a pagecache page * @mapping: the address_space to search - * @offset: the page index + * @index: the page index * - * Looks up the page cache slot at @mapping & @offset. If there is a + * Looks up the page cache entry at @mapping & @index. If there is a * page cache page, it is returned locked and with an increased * refcount. * - * Otherwise, %NULL is returned. - * - * find_lock_page() may sleep. + * Context: May sleep. + * Return: A struct page or %NULL if there is no page in the cache for this + * index. */ static inline struct page *find_lock_page(struct address_space *mapping, - pgoff_t offset) + pgoff_t index) { - return pagecache_get_page(mapping, offset, FGP_LOCK, 0); + return pagecache_get_page(mapping, index, FGP_LOCK, 0); +} + +/** + * find_lock_head - Locate, pin and lock a pagecache page. + * @mapping: The address_space to search. + * @index: The page index. + * + * Looks up the page cache entry at @mapping & @index. If there is a + * page cache page, its head page is returned locked and with an increased + * refcount. + * + * Context: May sleep. + * Return: A struct page which is !PageTail, or %NULL if there is no page + * in the cache for this index. + */ +static inline struct page *find_lock_head(struct address_space *mapping, + pgoff_t index) +{ + return pagecache_get_page(mapping, index, FGP_LOCK | FGP_HEAD, 0); } /** @@ -318,9 +397,9 @@ * atomic allocation! */ static inline struct page *find_or_create_page(struct address_space *mapping, - pgoff_t offset, gfp_t gfp_mask) + pgoff_t index, gfp_t gfp_mask) { - return pagecache_get_page(mapping, offset, + return pagecache_get_page(mapping, index, FGP_LOCK|FGP_ACCESSED|FGP_CREAT, gfp_mask); } @@ -346,8 +425,28 @@ mapping_gfp_mask(mapping)); } -struct page *find_get_entry(struct address_space *mapping, pgoff_t offset); -struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset); +/* Does this page contain this index? */ +static inline bool thp_contains(struct page *head, pgoff_t index) +{ + /* HugeTLBfs indexes the page cache in units of hpage_size */ + if (PageHuge(head)) + return head->index == index; + return page_index(head) == (index & ~(thp_nr_pages(head) - 1UL)); +} + +/* + * Given the page we found in the page cache, return the page corresponding + * to this index in the file + */ +static inline struct page *find_subpage(struct page *head, pgoff_t index) +{ + /* HugeTLBfs wants the head page regardless */ + if (PageHuge(head)) + return head; + + return head + (index & (thp_nr_pages(head) - 1)); +} + unsigned find_get_entries(struct address_space *mapping, pgoff_t start, unsigned int nr_entries, struct page **entries, pgoff_t *indices); @@ -364,18 +463,15 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start, unsigned int nr_pages, struct page **pages); unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index, - pgoff_t end, int tag, unsigned int nr_pages, + pgoff_t end, xa_mark_t tag, unsigned int nr_pages, struct page **pages); static inline unsigned find_get_pages_tag(struct address_space *mapping, - pgoff_t *index, int tag, unsigned int nr_pages, + pgoff_t *index, xa_mark_t tag, unsigned int nr_pages, struct page **pages) { return find_get_pages_range_tag(mapping, index, (pgoff_t)-1, tag, nr_pages, pages); } -unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start, - int tag, unsigned int nr_entries, - struct page **entries, pgoff_t *indices); struct page *grab_cache_page_write_begin(struct address_space *mapping, pgoff_t index, unsigned flags); @@ -457,17 +553,46 @@ pgoff_t pgoff; if (unlikely(is_vm_hugetlb_page(vma))) return linear_hugepage_index(vma, address); - pgoff = (address - vma->vm_start) >> PAGE_SHIFT; - pgoff += vma->vm_pgoff; + pgoff = (address - READ_ONCE(vma->vm_start)) >> PAGE_SHIFT; + pgoff += READ_ONCE(vma->vm_pgoff); return pgoff; } -extern void __sched __lock_page(struct page *page); -extern int __sched __lock_page_killable(struct page *page); -extern int __sched __lock_page_or_retry(struct page *page, struct mm_struct *mm, +struct wait_page_key { + struct page *page; + int bit_nr; + int page_match; +}; + +struct wait_page_queue { + struct page *page; + int bit_nr; + wait_queue_entry_t wait; +}; + +static inline bool wake_page_match(struct wait_page_queue *wait_page, + struct wait_page_key *key) +{ + if (wait_page->page != key->page) + return false; + key->page_match = 1; + + if (wait_page->bit_nr != key->bit_nr) + return false; + + return true; +} + +extern void __lock_page(struct page *page); +extern int __lock_page_killable(struct page *page); +extern int __lock_page_async(struct page *page, struct wait_page_queue *wait); +extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm, unsigned int flags); extern void unlock_page(struct page *page); +/* + * Return true if the page was successfully locked + */ static inline int trylock_page(struct page *page) { page = compound_head(page); @@ -498,15 +623,30 @@ } /* + * lock_page_async - Lock the page, unless this would block. If the page + * is already locked, then queue a callback when the page becomes unlocked. + * This callback can then retry the operation. + * + * Returns 0 if the page is locked successfully, or -EIOCBQUEUED if the page + * was already locked and the callback defined in 'wait' was queued. + */ +static inline __sched int lock_page_async(struct page *page, + struct wait_page_queue *wait) +{ + if (!trylock_page(page)) + return __lock_page_async(page, wait); + return 0; +} + +/* * lock_page_or_retry - Lock the page, unless this would block and the * caller indicated that it can handle a retry. * - * Return value and mmap_sem implications depend on flags; see + * Return value and mmap_lock implications depend on flags; see * __lock_page_or_retry(). */ -static inline __sched int lock_page_or_retry(struct page *page, - struct mm_struct *mm, - unsigned int flags) +static inline __sched int lock_page_or_retry(struct page *page, struct mm_struct *mm, + unsigned int flags) { might_sleep(); return trylock_page(page) || __lock_page_or_retry(page, mm, flags); @@ -516,8 +656,8 @@ * This is exported only for wait_on_page_locked/wait_on_page_writeback, etc., * and should not be used directly. */ -extern void __sched wait_on_page_bit(struct page *page, int bit_nr); -extern int __sched wait_on_page_bit_killable(struct page *page, int bit_nr); +extern void wait_on_page_bit(struct page *page, int bit_nr); +extern int wait_on_page_bit_killable(struct page *page, int bit_nr); /* * Wait for a page to be unlocked. @@ -539,15 +679,9 @@ return wait_on_page_bit_killable(compound_head(page), PG_locked); } -/* - * Wait for a page to complete writeback - */ -static inline __sched void wait_on_page_writeback(struct page *page) -{ - if (PageWriteback(page)) - wait_on_page_bit(page, PG_writeback); -} +extern void put_and_wait_on_page_locked(struct page *page); +void wait_on_page_writeback(struct page *page); extern void end_page_writeback(struct page *page); void wait_for_stable_page(struct page *page); @@ -641,10 +775,257 @@ return error; } +/** + * struct readahead_control - Describes a readahead request. + * + * A readahead request is for consecutive pages. Filesystems which + * implement the ->readahead method should call readahead_page() or + * readahead_page_batch() in a loop and attempt to start I/O against + * each page in the request. + * + * Most of the fields in this struct are private and should be accessed + * by the functions below. + * + * @file: The file, used primarily by network filesystems for authentication. + * May be NULL if invoked internally by the filesystem. + * @mapping: Readahead this filesystem object. + */ +struct readahead_control { + struct file *file; + struct address_space *mapping; +/* private: use the readahead_* accessors instead */ + pgoff_t _index; + unsigned int _nr_pages; + unsigned int _batch_count; +}; + +#define DEFINE_READAHEAD(rac, f, m, i) \ + struct readahead_control rac = { \ + .file = f, \ + .mapping = m, \ + ._index = i, \ + } + +#define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE) + +void page_cache_ra_unbounded(struct readahead_control *, + unsigned long nr_to_read, unsigned long lookahead_count); +void page_cache_sync_ra(struct readahead_control *, struct file_ra_state *, + unsigned long req_count); +void page_cache_async_ra(struct readahead_control *, struct file_ra_state *, + struct page *, unsigned long req_count); + +/** + * page_cache_sync_readahead - generic file readahead + * @mapping: address_space which holds the pagecache and I/O vectors + * @ra: file_ra_state which holds the readahead state + * @file: Used by the filesystem for authentication. + * @index: Index of first page to be read. + * @req_count: Total number of pages being read by the caller. + * + * page_cache_sync_readahead() should be called when a cache miss happened: + * it will submit the read. The readahead logic may decide to piggyback more + * pages onto the read request if access patterns suggest it will improve + * performance. + */ +static inline +void page_cache_sync_readahead(struct address_space *mapping, + struct file_ra_state *ra, struct file *file, pgoff_t index, + unsigned long req_count) +{ + DEFINE_READAHEAD(ractl, file, mapping, index); + page_cache_sync_ra(&ractl, ra, req_count); +} + +/** + * page_cache_async_readahead - file readahead for marked pages + * @mapping: address_space which holds the pagecache and I/O vectors + * @ra: file_ra_state which holds the readahead state + * @file: Used by the filesystem for authentication. + * @page: The page at @index which triggered the readahead call. + * @index: Index of first page to be read. + * @req_count: Total number of pages being read by the caller. + * + * page_cache_async_readahead() should be called when a page is used which + * is marked as PageReadahead; this is a marker to suggest that the application + * has used up enough of the readahead window that we should start pulling in + * more pages. + */ +static inline +void page_cache_async_readahead(struct address_space *mapping, + struct file_ra_state *ra, struct file *file, + struct page *page, pgoff_t index, unsigned long req_count) +{ + DEFINE_READAHEAD(ractl, file, mapping, index); + page_cache_async_ra(&ractl, ra, page, req_count); +} + +/** + * readahead_page - Get the next page to read. + * @rac: The current readahead request. + * + * Context: The page is locked and has an elevated refcount. The caller + * should decreases the refcount once the page has been submitted for I/O + * and unlock the page once all I/O to that page has completed. + * Return: A pointer to the next page, or %NULL if we are done. + */ +static inline struct page *readahead_page(struct readahead_control *rac) +{ + struct page *page; + + BUG_ON(rac->_batch_count > rac->_nr_pages); + rac->_nr_pages -= rac->_batch_count; + rac->_index += rac->_batch_count; + + if (!rac->_nr_pages) { + rac->_batch_count = 0; + return NULL; + } + + page = xa_load(&rac->mapping->i_pages, rac->_index); + VM_BUG_ON_PAGE(!PageLocked(page), page); + rac->_batch_count = thp_nr_pages(page); + + return page; +} + +static inline unsigned int __readahead_batch(struct readahead_control *rac, + struct page **array, unsigned int array_sz) +{ + unsigned int i = 0; + XA_STATE(xas, &rac->mapping->i_pages, 0); + struct page *page; + + BUG_ON(rac->_batch_count > rac->_nr_pages); + rac->_nr_pages -= rac->_batch_count; + rac->_index += rac->_batch_count; + rac->_batch_count = 0; + + xas_set(&xas, rac->_index); + rcu_read_lock(); + xas_for_each(&xas, page, rac->_index + rac->_nr_pages - 1) { + if (xas_retry(&xas, page)) + continue; + VM_BUG_ON_PAGE(!PageLocked(page), page); + VM_BUG_ON_PAGE(PageTail(page), page); + array[i++] = page; + rac->_batch_count += thp_nr_pages(page); + + /* + * The page cache isn't using multi-index entries yet, + * so the xas cursor needs to be manually moved to the + * next index. This can be removed once the page cache + * is converted. + */ + if (PageHead(page)) + xas_set(&xas, rac->_index + rac->_batch_count); + + if (i == array_sz) + break; + } + rcu_read_unlock(); + + return i; +} + +/** + * readahead_page_batch - Get a batch of pages to read. + * @rac: The current readahead request. + * @array: An array of pointers to struct page. + * + * Context: The pages are locked and have an elevated refcount. The caller + * should decreases the refcount once the page has been submitted for I/O + * and unlock the page once all I/O to that page has completed. + * Return: The number of pages placed in the array. 0 indicates the request + * is complete. + */ +#define readahead_page_batch(rac, array) \ + __readahead_batch(rac, array, ARRAY_SIZE(array)) + +/** + * readahead_pos - The byte offset into the file of this readahead request. + * @rac: The readahead request. + */ +static inline loff_t readahead_pos(struct readahead_control *rac) +{ + return (loff_t)rac->_index * PAGE_SIZE; +} + +/** + * readahead_length - The number of bytes in this readahead request. + * @rac: The readahead request. + */ +static inline loff_t readahead_length(struct readahead_control *rac) +{ + return (loff_t)rac->_nr_pages * PAGE_SIZE; +} + +/** + * readahead_index - The index of the first page in this readahead request. + * @rac: The readahead request. + */ +static inline pgoff_t readahead_index(struct readahead_control *rac) +{ + return rac->_index; +} + +/** + * readahead_count - The number of pages in this readahead request. + * @rac: The readahead request. + */ +static inline unsigned int readahead_count(struct readahead_control *rac) +{ + return rac->_nr_pages; +} + static inline unsigned long dir_pages(struct inode *inode) { return (unsigned long)(inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT; } +/** + * page_mkwrite_check_truncate - check if page was truncated + * @page: the page to check + * @inode: the inode to check the page against + * + * Returns the number of bytes in the page up to EOF, + * or -EFAULT if the page was truncated. + */ +static inline int page_mkwrite_check_truncate(struct page *page, + struct inode *inode) +{ + loff_t size = i_size_read(inode); + pgoff_t index = size >> PAGE_SHIFT; + int offset = offset_in_page(size); + + if (page->mapping != inode->i_mapping) + return -EFAULT; + + /* page is wholly inside EOF */ + if (page->index < index) + return PAGE_SIZE; + /* page is wholly past EOF */ + if (page->index > index || !offset) + return -EFAULT; + /* page is partially inside EOF */ + return offset; +} + +/** + * i_blocks_per_page - How many blocks fit in this page. + * @inode: The inode which contains the blocks. + * @page: The page (head page if the page is a THP). + * + * If the block size is larger than the size of this page, return zero. + * + * Context: The caller should hold a refcount on the page to prevent it + * from being split. + * Return: The number of filesystem blocks covered by this page. + */ +static inline +unsigned int i_blocks_per_page(struct inode *inode, struct page *page) +{ + return thp_size(page) >> inode->i_blkbits; +} #endif /* _LINUX_PAGEMAP_H */ -- Gitblit v1.6.2