From 244b2c5ca8b14627e4a17755e5922221e121c771 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Wed, 09 Oct 2024 06:15:07 +0000
Subject: [PATCH] change system file
---
kernel/mm/page-writeback.c | 266 +++++++++++++++++++++++++++++-----------------------
1 files changed, 149 insertions(+), 117 deletions(-)
diff --git a/kernel/mm/page-writeback.c b/kernel/mm/page-writeback.c
index 078f146..9d3196a 100644
--- a/kernel/mm/page-writeback.c
+++ b/kernel/mm/page-writeback.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* mm/page-writeback.c
*
@@ -40,6 +41,9 @@
#include <trace/events/writeback.h>
#include "internal.h"
+
+#undef CREATE_TRACE_POINT
+#include <trace/hooks/mm.h>
/*
* Sleep at most 200ms at a time in balance_dirty_pages().
@@ -256,7 +260,7 @@
* requiring writeback.
*
* This number of dirtyable pages is the base value of which the
- * user-configurable dirty ratio is the effictive number of pages that
+ * user-configurable dirty ratio is the effective number of pages that
* are allowed to be actually dirtied. Per individual zone, or
* globally by using the sum of dirtyable pages over all zones.
*
@@ -270,7 +274,7 @@
* node_dirtyable_memory - number of dirtyable pages in a node
* @pgdat: the node
*
- * Returns the node's number of pages potentially available for dirty
+ * Return: the node's number of pages potentially available for dirty
* page cache. This is the base value for the per-node dirty limits.
*/
static unsigned long node_dirtyable_memory(struct pglist_data *pgdat)
@@ -355,7 +359,7 @@
/**
* global_dirtyable_memory - number of globally dirtyable pages
*
- * Returns the global number of pages potentially available for dirty
+ * Return: the global number of pages potentially available for dirty
* page cache. This is the base value for the global dirty limits.
*/
static unsigned long global_dirtyable_memory(void)
@@ -386,8 +390,7 @@
* Calculate @dtc->thresh and ->bg_thresh considering
* vm_dirty_{bytes|ratio} and dirty_background_{bytes|ratio}. The caller
* must ensure that @dtc->avail is set before calling this function. The
- * dirty limits will be lifted by 1/4 for PF_LESS_THROTTLE (ie. nfsd) and
- * real-time tasks.
+ * dirty limits will be lifted by 1/4 for real-time tasks.
*/
static void domain_dirty_limits(struct dirty_throttle_control *dtc)
{
@@ -435,7 +438,7 @@
if (bg_thresh >= thresh)
bg_thresh = thresh / 2;
tsk = current;
- if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) {
+ if (rt_task(tsk)) {
bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32;
thresh += thresh / 4 + global_wb_domain.dirty_limit / 32;
}
@@ -470,7 +473,7 @@
* node_dirty_limit - maximum number of dirty pages allowed in a node
* @pgdat: the node
*
- * Returns the maximum number of dirty pages allowed in a node, based
+ * Return: the maximum number of dirty pages allowed in a node, based
* on the node's dirtyable memory.
*/
static unsigned long node_dirty_limit(struct pglist_data *pgdat)
@@ -485,7 +488,7 @@
else
dirty = vm_dirty_ratio * node_memory / 100;
- if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk))
+ if (rt_task(tsk))
dirty += dirty / 4;
return dirty;
@@ -495,7 +498,7 @@
* node_dirty_ok - tells whether a node is within its dirty limits
* @pgdat: the node to check
*
- * Returns %true when the dirty pages in @pgdat are within the node's
+ * Return: %true when the dirty pages in @pgdat are within the node's
* dirty limit, %false if the limit is exceeded.
*/
bool node_dirty_ok(struct pglist_data *pgdat)
@@ -504,15 +507,13 @@
unsigned long nr_pages = 0;
nr_pages += node_page_state(pgdat, NR_FILE_DIRTY);
- nr_pages += node_page_state(pgdat, NR_UNSTABLE_NFS);
nr_pages += node_page_state(pgdat, NR_WRITEBACK);
return nr_pages <= limit;
}
int dirty_background_ratio_handler(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp,
- loff_t *ppos)
+ void *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
@@ -523,8 +524,7 @@
}
int dirty_background_bytes_handler(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp,
- loff_t *ppos)
+ void *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
@@ -534,9 +534,8 @@
return ret;
}
-int dirty_ratio_handler(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp,
- loff_t *ppos)
+int dirty_ratio_handler(struct ctl_table *table, int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
{
int old_ratio = vm_dirty_ratio;
int ret;
@@ -550,8 +549,7 @@
}
int dirty_bytes_handler(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp,
- loff_t *ppos)
+ void *buffer, size_t *lenp, loff_t *ppos)
{
unsigned long old_bytes = vm_dirty_bytes;
int ret;
@@ -743,9 +741,6 @@
* __wb_calc_thresh - @wb's share of dirty throttling threshold
* @dtc: dirty_throttle_context of interest
*
- * Returns @wb's dirty limit in pages. The term "dirty" in the context of
- * dirty balancing includes all PG_dirty, PG_writeback and NFS unstable pages.
- *
* Note that balance_dirty_pages() will only seriously take it as a hard limit
* when sleeping max_pause per page is not enough to keep the dirty pages under
* control. For example, when the device is completely stalled due to some error
@@ -759,13 +754,16 @@
*
* The wb's share of dirty limit will be adapting to its throughput and
* bounded by the bdi->min_ratio and/or bdi->max_ratio parameters, if set.
+ *
+ * Return: @wb's dirty limit in pages. The term "dirty" in the context of
+ * dirty balancing includes all PG_dirty and PG_writeback pages.
*/
static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc)
{
struct wb_domain *dom = dtc_dom(dtc);
unsigned long thresh = dtc->thresh;
u64 wb_thresh;
- long numerator, denominator;
+ unsigned long numerator, denominator;
unsigned long wb_min_ratio, wb_max_ratio;
/*
@@ -776,7 +774,7 @@
wb_thresh = (thresh * (100 - bdi_min_ratio)) / 100;
wb_thresh *= numerator;
- do_div(wb_thresh, denominator);
+ wb_thresh = div64_ul(wb_thresh, denominator);
wb_min_max_ratio(dtc->wb, &wb_min_ratio, &wb_max_ratio);
@@ -1101,7 +1099,7 @@
bw = written - min(written, wb->written_stamp);
bw *= HZ;
if (unlikely(elapsed > period)) {
- do_div(bw, elapsed);
+ bw = div64_ul(bw, elapsed);
avg = bw;
goto out;
}
@@ -1566,7 +1564,7 @@
struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
&mdtc_stor : NULL;
struct dirty_throttle_control *sdtc;
- unsigned long nr_reclaimable; /* = file_dirty + unstable_nfs */
+ unsigned long nr_reclaimable; /* = file_dirty */
long period;
long pause;
long max_pause;
@@ -1586,14 +1584,7 @@
unsigned long m_thresh = 0;
unsigned long m_bg_thresh = 0;
- /*
- * Unstable writes are a feature of certain networked
- * filesystems (i.e. NFS) in which data may have been
- * written to the server's write cache, but has not yet
- * been flushed to permanent storage.
- */
- nr_reclaimable = global_node_page_state(NR_FILE_DIRTY) +
- global_node_page_state(NR_UNSTABLE_NFS);
+ nr_reclaimable = global_node_page_state(NR_FILE_DIRTY);
gdtc->avail = global_dirtyable_memory();
gdtc->dirty = nr_reclaimable + global_node_page_state(NR_WRITEBACK);
@@ -1637,6 +1628,9 @@
}
}
+ trace_android_vh_mm_dirty_limits(gdtc, strictlimit, dirty, bg_thresh,
+ nr_reclaimable, pages_dirtied);
+
/*
* Throttle it only when the background writeback cannot
* catch-up. This avoids (excessively) small writeouts
@@ -1652,8 +1646,12 @@
if (dirty <= dirty_freerun_ceiling(thresh, bg_thresh) &&
(!mdtc ||
m_dirty <= dirty_freerun_ceiling(m_thresh, m_bg_thresh))) {
- unsigned long intv = dirty_poll_interval(dirty, thresh);
- unsigned long m_intv = ULONG_MAX;
+ unsigned long intv;
+ unsigned long m_intv;
+
+free_running:
+ intv = dirty_poll_interval(dirty, thresh);
+ m_intv = ULONG_MAX;
current->dirty_paused_when = now;
current->nr_dirtied = 0;
@@ -1666,12 +1664,25 @@
if (unlikely(!writeback_in_progress(wb)))
wb_start_background_writeback(wb);
+ mem_cgroup_flush_foreign(wb);
+
/*
* Calculate global domain's pos_ratio and select the
* global dtc by default.
*/
- if (!strictlimit)
+ if (!strictlimit) {
wb_dirty_limits(gdtc);
+
+ if ((current->flags & PF_LOCAL_THROTTLE) &&
+ gdtc->wb_dirty <
+ dirty_freerun_ceiling(gdtc->wb_thresh,
+ gdtc->wb_bg_thresh))
+ /*
+ * LOCAL_THROTTLE tasks must not be throttled
+ * when below the per-wb freerun ceiling.
+ */
+ goto free_running;
+ }
dirty_exceeded = (gdtc->wb_dirty > gdtc->wb_thresh) &&
((gdtc->dirty > gdtc->thresh) || strictlimit);
@@ -1686,9 +1697,20 @@
* both global and memcg domains. Choose the one
* w/ lower pos_ratio.
*/
- if (!strictlimit)
+ if (!strictlimit) {
wb_dirty_limits(mdtc);
+ if ((current->flags & PF_LOCAL_THROTTLE) &&
+ mdtc->wb_dirty <
+ dirty_freerun_ceiling(mdtc->wb_thresh,
+ mdtc->wb_bg_thresh))
+ /*
+ * LOCAL_THROTTLE tasks must not be
+ * throttled when below the per-wb
+ * freerun ceiling.
+ */
+ goto free_running;
+ }
dirty_exceeded |= (mdtc->wb_dirty > mdtc->wb_thresh) &&
((mdtc->dirty > mdtc->thresh) || strictlimit);
@@ -1866,7 +1888,7 @@
int ratelimit;
int *p;
- if (!bdi_cap_account_dirty(bdi))
+ if (!(bdi->capabilities & BDI_CAP_WRITEBACK))
return;
if (inode_cgwb_enabled(inode))
@@ -1918,7 +1940,9 @@
* @wb: bdi_writeback of interest
*
* Determines whether background writeback should keep writing @wb or it's
- * clean enough. Returns %true if writeback should continue.
+ * clean enough.
+ *
+ * Return: %true if writeback should continue.
*/
bool wb_over_bg_thresh(struct bdi_writeback *wb)
{
@@ -1927,21 +1951,27 @@
struct dirty_throttle_control * const gdtc = &gdtc_stor;
struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
&mdtc_stor : NULL;
+ unsigned long reclaimable;
+ unsigned long thresh;
/*
* Similar to balance_dirty_pages() but ignores pages being written
* as we're trying to decide whether to put more under writeback.
*/
gdtc->avail = global_dirtyable_memory();
- gdtc->dirty = global_node_page_state(NR_FILE_DIRTY) +
- global_node_page_state(NR_UNSTABLE_NFS);
+ gdtc->dirty = global_node_page_state(NR_FILE_DIRTY);
domain_dirty_limits(gdtc);
if (gdtc->dirty > gdtc->bg_thresh)
return true;
- if (wb_stat(wb, WB_RECLAIMABLE) >
- wb_calc_thresh(gdtc->wb, gdtc->bg_thresh))
+ thresh = wb_calc_thresh(gdtc->wb, gdtc->bg_thresh);
+ if (thresh < 2 * wb_stat_error())
+ reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE);
+ else
+ reclaimable = wb_stat(wb, WB_RECLAIMABLE);
+
+ if (reclaimable > thresh)
return true;
if (mdtc) {
@@ -1955,8 +1985,13 @@
if (mdtc->dirty > mdtc->bg_thresh)
return true;
- if (wb_stat(wb, WB_RECLAIMABLE) >
- wb_calc_thresh(mdtc->wb, mdtc->bg_thresh))
+ thresh = wb_calc_thresh(mdtc->wb, mdtc->bg_thresh);
+ if (thresh < 2 * wb_stat_error())
+ reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE);
+ else
+ reclaimable = wb_stat(wb, WB_RECLAIMABLE);
+
+ if (reclaimable > thresh)
return true;
}
@@ -1967,7 +2002,7 @@
* sysctl handler for /proc/sys/vm/dirty_writeback_centisecs
*/
int dirty_writeback_centisecs_handler(struct ctl_table *table, int write,
- void __user *buffer, size_t *length, loff_t *ppos)
+ void *buffer, size_t *length, loff_t *ppos)
{
unsigned int old_interval = dirty_writeback_interval;
int ret;
@@ -2059,13 +2094,11 @@
* Called early on to tune the page writeback dirty limits.
*
* We used to scale dirty pages according to how total memory
- * related to pages that could be allocated for buffers (by
- * comparing nr_free_buffer_pages() to vm_total_pages.
+ * related to pages that could be allocated for buffers.
*
* However, that was when we used "dirty_ratio" to scale with
* all memory, and we don't do that any more. "dirty_ratio"
- * is now applied to total non-HIGHPAGE memory (by subtracting
- * totalhigh_pages from vm_total_pages), and as such we can't
+ * is now applied to total non-HIGHPAGE memory, and as such we can't
* get into the old insane situation any more where we had
* large amounts of dirty pages compared to a small amount of
* non-HIGHMEM memory.
@@ -2097,34 +2130,25 @@
* dirty pages in the file (thus it is important for this function to be quick
* so that it can tag pages faster than a dirtying process can create them).
*/
-/*
- * We tag pages in batches of WRITEBACK_TAG_BATCH to reduce the i_pages lock
- * latency.
- */
void tag_pages_for_writeback(struct address_space *mapping,
pgoff_t start, pgoff_t end)
{
-#define WRITEBACK_TAG_BATCH 4096
- unsigned long tagged = 0;
- struct radix_tree_iter iter;
- void **slot;
+ XA_STATE(xas, &mapping->i_pages, start);
+ unsigned int tagged = 0;
+ void *page;
- xa_lock_irq(&mapping->i_pages);
- radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, start,
- PAGECACHE_TAG_DIRTY) {
- if (iter.index > end)
- break;
- radix_tree_iter_tag_set(&mapping->i_pages, &iter,
- PAGECACHE_TAG_TOWRITE);
- tagged++;
- if ((tagged % WRITEBACK_TAG_BATCH) != 0)
+ xas_lock_irq(&xas);
+ xas_for_each_marked(&xas, page, end, PAGECACHE_TAG_DIRTY) {
+ xas_set_mark(&xas, PAGECACHE_TAG_TOWRITE);
+ if (++tagged % XA_CHECK_SCHED)
continue;
- slot = radix_tree_iter_resume(slot, &iter);
- xa_unlock_irq(&mapping->i_pages);
+
+ xas_pause(&xas);
+ xas_unlock_irq(&xas);
cond_resched();
- xa_lock_irq(&mapping->i_pages);
+ xas_lock_irq(&xas);
}
- xa_unlock_irq(&mapping->i_pages);
+ xas_unlock_irq(&xas);
}
EXPORT_SYMBOL(tag_pages_for_writeback);
@@ -2156,6 +2180,8 @@
* lock/page writeback access order inversion - we should only ever lock
* multiple pages in ascending page->index order, and looping back to the start
* of the file violates that rule and causes deadlocks.
+ *
+ * Return: %0 on success, negative error code otherwise
*/
int write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc, writepage_t writepage,
@@ -2166,17 +2192,15 @@
int error;
struct pagevec pvec;
int nr_pages;
- pgoff_t uninitialized_var(writeback_index);
pgoff_t index;
pgoff_t end; /* Inclusive */
pgoff_t done_index;
int range_whole = 0;
- int tag;
+ xa_mark_t tag;
pagevec_init(&pvec);
if (wbc->range_cyclic) {
- writeback_index = mapping->writeback_index; /* prev offset */
- index = writeback_index;
+ index = mapping->writeback_index; /* prev offset */
end = -1;
} else {
index = wbc->range_start >> PAGE_SHIFT;
@@ -2184,12 +2208,12 @@
if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
range_whole = 1;
}
- if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
- tag = PAGECACHE_TAG_TOWRITE;
- else
- tag = PAGECACHE_TAG_DIRTY;
- if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
+ if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) {
tag_pages_for_writeback(mapping, index, end);
+ tag = PAGECACHE_TAG_TOWRITE;
+ } else {
+ tag = PAGECACHE_TAG_DIRTY;
+ }
done_index = index;
while (!done && (index <= end)) {
int i;
@@ -2314,6 +2338,8 @@
*
* This is a library function, which implements the writepages()
* address_space_operation.
+ *
+ * Return: %0 on success, negative error code otherwise
*/
int generic_writepages(struct address_space *mapping,
struct writeback_control *wbc)
@@ -2360,6 +2386,8 @@
*
* Note that the mapping's AS_EIO/AS_ENOSPC flags will be cleared when this
* function returns.
+ *
+ * Return: %0 on success, negative error code otherwise
*/
int write_one_page(struct page *page)
{
@@ -2413,7 +2441,7 @@
trace_writeback_dirty_page(page, mapping);
- if (mapping_cap_account_dirty(mapping)) {
+ if (mapping_can_writeback(mapping)) {
struct bdi_writeback *wb;
inode_attach_wb(inode, page);
@@ -2427,9 +2455,10 @@
task_io_account_write(PAGE_SIZE);
current->nr_dirtied++;
this_cpu_inc(bdp_ratelimits);
+
+ mem_cgroup_track_foreign_dirty(page, wb);
}
}
-EXPORT_SYMBOL(account_page_dirtied);
/*
* Helper function for deaccounting dirty page without writeback.
@@ -2439,7 +2468,7 @@
void account_page_cleaned(struct page *page, struct address_space *mapping,
struct bdi_writeback *wb)
{
- if (mapping_cap_account_dirty(mapping)) {
+ if (mapping_can_writeback(mapping)) {
dec_lruvec_page_state(page, NR_FILE_DIRTY);
dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
dec_wb_stat(wb, WB_RECLAIMABLE);
@@ -2449,7 +2478,7 @@
/*
* For address_spaces which do not use buffers. Just tag the page as dirty in
- * its radix tree.
+ * the xarray.
*
* This is also used when a single buffer is being dirtied: we want to set the
* page dirty in that case, but not all the buffers. This is a "bottom-up"
@@ -2475,7 +2504,7 @@
BUG_ON(page_mapping(page) != mapping);
WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
account_page_dirtied(page, mapping);
- radix_tree_tag_set(&mapping->i_pages, page_index(page),
+ __xa_set_mark(&mapping->i_pages, page_index(page),
PAGECACHE_TAG_DIRTY);
xa_unlock_irqrestore(&mapping->i_pages, flags);
unlock_page_memcg(page);
@@ -2502,7 +2531,7 @@
{
struct address_space *mapping = page->mapping;
- if (mapping && mapping_cap_account_dirty(mapping)) {
+ if (mapping && mapping_can_writeback(mapping)) {
struct inode *inode = mapping->host;
struct bdi_writeback *wb;
struct wb_lock_cookie cookie = {};
@@ -2614,7 +2643,7 @@
{
struct address_space *mapping = page_mapping(page);
- if (mapping_cap_account_dirty(mapping)) {
+ if (mapping_can_writeback(mapping)) {
struct inode *inode = mapping->host;
struct bdi_writeback *wb;
struct wb_lock_cookie cookie = {};
@@ -2638,13 +2667,13 @@
* Returns true if the page was previously dirty.
*
* This is for preparing to put the page under writeout. We leave the page
- * tagged as dirty in the radix tree so that a concurrent write-for-sync
+ * tagged as dirty in the xarray so that a concurrent write-for-sync
* can discover it via a PAGECACHE_TAG_DIRTY walk. The ->writepage
* implementation will run either set_page_writeback() or set_page_dirty(),
- * at which stage we bring the page's dirty flag and radix-tree dirty tag
+ * at which stage we bring the page's dirty flag and xarray dirty tag
* back into sync.
*
- * This incoherency between the page's dirty flag and radix-tree tag is
+ * This incoherency between the page's dirty flag and xarray tag is
* unfortunate, but it only exists while the page is locked.
*/
int clear_page_dirty_for_io(struct page *page)
@@ -2652,9 +2681,9 @@
struct address_space *mapping = page_mapping(page);
int ret = 0;
- BUG_ON(!PageLocked(page));
+ VM_BUG_ON_PAGE(!PageLocked(page), page);
- if (mapping && mapping_cap_account_dirty(mapping)) {
+ if (mapping && mapping_can_writeback(mapping)) {
struct inode *inode = mapping->host;
struct bdi_writeback *wb;
struct wb_lock_cookie cookie = {};
@@ -2725,9 +2754,9 @@
xa_lock_irqsave(&mapping->i_pages, flags);
ret = TestClearPageWriteback(page);
if (ret) {
- radix_tree_tag_clear(&mapping->i_pages, page_index(page),
+ __xa_clear_mark(&mapping->i_pages, page_index(page),
PAGECACHE_TAG_WRITEBACK);
- if (bdi_cap_account_writeback(bdi)) {
+ if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) {
struct bdi_writeback *wb = inode_to_wb(inode);
dec_wb_stat(wb, WB_WRITEBACK);
@@ -2743,12 +2772,6 @@
} else {
ret = TestClearPageWriteback(page);
}
- /*
- * NOTE: Page might be free now! Writeback doesn't hold a page
- * reference on its own, it relies on truncation to wait for
- * the clearing of PG_writeback. The below can only access
- * page state that is static across allocation cycles.
- */
if (ret) {
dec_lruvec_state(lruvec, NR_WRITEBACK);
dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
@@ -2761,15 +2784,17 @@
int __test_set_page_writeback(struct page *page, bool keep_write)
{
struct address_space *mapping = page_mapping(page);
- int ret;
+ int ret, access_ret;
lock_page_memcg(page);
if (mapping && mapping_use_writeback_tags(mapping)) {
+ XA_STATE(xas, &mapping->i_pages, page_index(page));
struct inode *inode = mapping->host;
struct backing_dev_info *bdi = inode_to_bdi(inode);
unsigned long flags;
- xa_lock_irqsave(&mapping->i_pages, flags);
+ xas_lock_irqsave(&xas, flags);
+ xas_load(&xas);
ret = TestSetPageWriteback(page);
if (!ret) {
bool on_wblist;
@@ -2777,9 +2802,8 @@
on_wblist = mapping_tagged(mapping,
PAGECACHE_TAG_WRITEBACK);
- radix_tree_tag_set(&mapping->i_pages, page_index(page),
- PAGECACHE_TAG_WRITEBACK);
- if (bdi_cap_account_writeback(bdi))
+ xas_set_mark(&xas, PAGECACHE_TAG_WRITEBACK);
+ if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT)
inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
/*
@@ -2791,12 +2815,10 @@
sb_mark_inode_writeback(mapping->host);
}
if (!PageDirty(page))
- radix_tree_tag_clear(&mapping->i_pages, page_index(page),
- PAGECACHE_TAG_DIRTY);
+ xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY);
if (!keep_write)
- radix_tree_tag_clear(&mapping->i_pages, page_index(page),
- PAGECACHE_TAG_TOWRITE);
- xa_unlock_irqrestore(&mapping->i_pages, flags);
+ xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE);
+ xas_unlock_irqrestore(&xas, flags);
} else {
ret = TestSetPageWriteback(page);
}
@@ -2805,20 +2827,29 @@
inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
}
unlock_page_memcg(page);
+ access_ret = arch_make_page_accessible(page);
+ /*
+ * If writeback has been triggered on a page that cannot be made
+ * accessible, it is too late to recover here.
+ */
+ VM_BUG_ON_PAGE(access_ret != 0, page);
+
return ret;
}
EXPORT_SYMBOL(__test_set_page_writeback);
/*
- * Return true if any of the pages in the mapping are marked with the
- * passed tag.
+ * Wait for a page to complete writeback
*/
-int mapping_tagged(struct address_space *mapping, int tag)
+void wait_on_page_writeback(struct page *page)
{
- return radix_tree_tagged(&mapping->i_pages, tag);
+ while (PageWriteback(page)) {
+ trace_wait_on_page_writeback(page, page_mapping(page));
+ wait_on_page_bit(page, PG_writeback);
+ }
}
-EXPORT_SYMBOL(mapping_tagged);
+EXPORT_SYMBOL_GPL(wait_on_page_writeback);
/**
* wait_for_stable_page() - wait for writeback to finish, if necessary.
@@ -2830,7 +2861,8 @@
*/
void wait_for_stable_page(struct page *page)
{
- if (bdi_cap_stable_pages_required(inode_to_bdi(page->mapping->host)))
+ page = thp_head(page);
+ if (page->mapping->host->i_sb->s_iflags & SB_I_STABLE_WRITES)
wait_on_page_writeback(page);
}
EXPORT_SYMBOL_GPL(wait_for_stable_page);
--
Gitblit v1.6.2