From 8ac6c7a54ed1b98d142dce24b11c6de6a1e239a5 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Tue, 22 Oct 2024 10:36:11 +0000
Subject: [PATCH] 修改4g拨号为QMI,需要在系统里后台执行quectel-CM
---
kernel/fs/fs-writeback.c | 277 ++++++++++++++++++++++++++++++++++++------------------
1 files changed, 183 insertions(+), 94 deletions(-)
diff --git a/kernel/fs/fs-writeback.c b/kernel/fs/fs-writeback.c
index 869a34a..86a1155 100644
--- a/kernel/fs/fs-writeback.c
+++ b/kernel/fs/fs-writeback.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* fs/fs-writeback.c
*
@@ -35,10 +36,6 @@
*/
#define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_SHIFT - 10))
-struct wb_completion {
- atomic_t cnt;
-};
-
/*
* Passed into wb_writeback(), essentially a subset of writeback_control
*/
@@ -57,19 +54,6 @@
struct list_head list; /* pending work list */
struct wb_completion *done; /* set if the caller waits */
};
-
-/*
- * If one wants to wait for one or more wb_writeback_works, each work's
- * ->done should be set to a wb_completion defined using the following
- * macro. Once all work items are issued with wb_queue_work(), the caller
- * can wait for the completion of all using wb_wait_for_completion(). Work
- * items which are waited upon aren't freed automatically on completion.
- */
-#define DEFINE_WB_COMPLETION_ONSTACK(cmpl) \
- struct wb_completion cmpl = { \
- .cnt = ATOMIC_INIT(1), \
- }
-
/*
* If an inode is constantly having its pages dirtied, but then the
@@ -181,8 +165,13 @@
if (work->auto_free)
kfree(work);
- if (done && atomic_dec_and_test(&done->cnt))
- wake_up_all(&wb->bdi->wb_waitq);
+ if (done) {
+ wait_queue_head_t *waitq = done->waitq;
+
+ /* @done can't be accessed after the following dec */
+ if (atomic_dec_and_test(&done->cnt))
+ wake_up_all(waitq);
+ }
}
static void wb_queue_work(struct bdi_writeback *wb,
@@ -206,28 +195,44 @@
/**
* wb_wait_for_completion - wait for completion of bdi_writeback_works
- * @bdi: bdi work items were issued to
* @done: target wb_completion
*
* Wait for one or more work items issued to @bdi with their ->done field
- * set to @done, which should have been defined with
- * DEFINE_WB_COMPLETION_ONSTACK(). This function returns after all such
- * work items are completed. Work items which are waited upon aren't freed
+ * set to @done, which should have been initialized with
+ * DEFINE_WB_COMPLETION(). This function returns after all such work items
+ * are completed. Work items which are waited upon aren't freed
* automatically on completion.
*/
-static void wb_wait_for_completion(struct backing_dev_info *bdi,
- struct wb_completion *done)
+void wb_wait_for_completion(struct wb_completion *done)
{
atomic_dec(&done->cnt); /* put down the initial count */
- wait_event(bdi->wb_waitq, !atomic_read(&done->cnt));
+ wait_event(*done->waitq, !atomic_read(&done->cnt));
}
#ifdef CONFIG_CGROUP_WRITEBACK
-/* parameters for foreign inode detection, see wb_detach_inode() */
+/*
+ * Parameters for foreign inode detection, see wbc_detach_inode() to see
+ * how they're used.
+ *
+ * These paramters are inherently heuristical as the detection target
+ * itself is fuzzy. All we want to do is detaching an inode from the
+ * current owner if it's being written to by some other cgroups too much.
+ *
+ * The current cgroup writeback is built on the assumption that multiple
+ * cgroups writing to the same inode concurrently is very rare and a mode
+ * of operation which isn't well supported. As such, the goal is not
+ * taking too long when a different cgroup takes over an inode while
+ * avoiding too aggressive flip-flops from occasional foreign writes.
+ *
+ * We record, very roughly, 2s worth of IO time history and if more than
+ * half of that is foreign, trigger the switch. The recording is quantized
+ * to 16 slots. To avoid tiny writes from swinging the decision too much,
+ * writes smaller than 1/8 of avg size are ignored.
+ */
#define WB_FRN_TIME_SHIFT 13 /* 1s = 2^13, upto 8 secs w/ 16bit */
#define WB_FRN_TIME_AVG_SHIFT 3 /* avg = avg * 7/8 + new * 1/8 */
-#define WB_FRN_TIME_CUT_DIV 2 /* ignore rounds < avg / 2 */
+#define WB_FRN_TIME_CUT_DIV 8 /* ignore rounds < avg / 8 */
#define WB_FRN_TIME_PERIOD (2 * (1 << WB_FRN_TIME_SHIFT)) /* 2s */
#define WB_FRN_HIST_SLOTS 16 /* inode->i_wb_frn_history is 16bit */
@@ -237,6 +242,7 @@
/* if foreign slots >= 8, switch */
#define WB_FRN_HIST_MAX_SLOTS (WB_FRN_HIST_THR_SLOTS / 2 + 1)
/* one round can affect upto 5 slots */
+#define WB_FRN_MAX_IN_FLIGHT 1024 /* don't queue too many concurrently */
static atomic_t isw_nr_in_flight = ATOMIC_INIT(0);
static struct workqueue_struct *isw_wq;
@@ -352,9 +358,9 @@
struct address_space *mapping = inode->i_mapping;
struct bdi_writeback *old_wb = inode->i_wb;
struct bdi_writeback *new_wb = isw->new_wb;
- struct radix_tree_iter iter;
+ XA_STATE(xas, &mapping->i_pages, 0);
+ struct page *page;
bool switched = false;
- void **slot;
/*
* If @inode switches cgwb membership while sync_inodes_sb() is
@@ -389,30 +395,25 @@
if (unlikely(inode->i_state & I_FREEING))
goto skip_switch;
+ trace_inode_switch_wbs(inode, old_wb, new_wb);
+
/*
* Count and transfer stats. Note that PAGECACHE_TAG_DIRTY points
* to possibly dirty pages while PAGECACHE_TAG_WRITEBACK points to
* pages actually under writeback.
*/
- radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, 0,
- PAGECACHE_TAG_DIRTY) {
- struct page *page = radix_tree_deref_slot_protected(slot,
- &mapping->i_pages.xa_lock);
- if (likely(page) && PageDirty(page)) {
+ xas_for_each_marked(&xas, page, ULONG_MAX, PAGECACHE_TAG_DIRTY) {
+ if (PageDirty(page)) {
dec_wb_stat(old_wb, WB_RECLAIMABLE);
inc_wb_stat(new_wb, WB_RECLAIMABLE);
}
}
- radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, 0,
- PAGECACHE_TAG_WRITEBACK) {
- struct page *page = radix_tree_deref_slot_protected(slot,
- &mapping->i_pages.xa_lock);
- if (likely(page)) {
- WARN_ON_ONCE(!PageWriteback(page));
- dec_wb_stat(old_wb, WB_WRITEBACK);
- inc_wb_stat(new_wb, WB_WRITEBACK);
- }
+ xas_set(&xas, 0);
+ xas_for_each_marked(&xas, page, ULONG_MAX, PAGECACHE_TAG_WRITEBACK) {
+ WARN_ON_ONCE(!PageWriteback(page));
+ dec_wb_stat(old_wb, WB_WRITEBACK);
+ inc_wb_stat(new_wb, WB_WRITEBACK);
}
wb_get(new_wb);
@@ -496,18 +497,15 @@
if (inode->i_state & I_WB_SWITCH)
return;
- /*
- * Avoid starting new switches while sync_inodes_sb() is in
- * progress. Otherwise, if the down_write protected issue path
- * blocks heavily, we might end up starting a large number of
- * switches which will block on the rwsem.
- */
- if (!down_read_trylock(&bdi->wb_switch_rwsem))
+ /* avoid queueing a new switch if too many are already in flight */
+ if (atomic_read(&isw_nr_in_flight) > WB_FRN_MAX_IN_FLIGHT)
return;
isw = kzalloc(sizeof(*isw), GFP_ATOMIC);
if (!isw)
- goto out_unlock;
+ return;
+
+ atomic_inc(&isw_nr_in_flight);
/* find and pin the new wb */
rcu_read_lock();
@@ -544,17 +542,13 @@
* Let's continue after I_WB_SWITCH is guaranteed to be visible.
*/
call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
-
- atomic_inc(&isw_nr_in_flight);
-
- goto out_unlock;
+ return;
out_free:
+ atomic_dec(&isw_nr_in_flight);
if (isw->new_wb)
wb_put(isw->new_wb);
kfree(isw);
-out_unlock:
- up_read(&bdi->wb_switch_rwsem);
}
/**
@@ -598,6 +592,7 @@
if (unlikely(wb_dying(wbc->wb) && !css_is_dying(wbc->wb->memcg_css)))
inode_switch_wbs(inode, wbc->wb_id);
}
+EXPORT_SYMBOL_GPL(wbc_attach_and_unlock_inode);
/**
* wbc_detach_inode - disassociate wbc from inode and perform foreign detection
@@ -695,6 +690,9 @@
if (wbc->wb_id != max_id)
history |= (1U << slots) - 1;
+ if (history)
+ trace_inode_foreign_history(inode, wbc, history);
+
/*
* Switch if the current wb isn't the consistent winner.
* If there are multiple closely competing dirtiers, the
@@ -702,7 +700,7 @@
* is okay. The main goal is avoiding keeping an inode on
* the wrong wb for an extended period of time.
*/
- if (hweight32(history) > WB_FRN_HIST_THR_SLOTS)
+ if (hweight16(history) > WB_FRN_HIST_THR_SLOTS)
inode_switch_wbs(inode, max_id);
}
@@ -717,9 +715,10 @@
wb_put(wbc->wb);
wbc->wb = NULL;
}
+EXPORT_SYMBOL_GPL(wbc_detach_inode);
/**
- * wbc_account_io - account IO issued during writeback
+ * wbc_account_cgroup_owner - account writeback to update inode cgroup ownership
* @wbc: writeback_control of the writeback in progress
* @page: page being written out
* @bytes: number of bytes being written out
@@ -728,8 +727,8 @@
* controlled by @wbc. Keep the book for foreign inode detection. See
* wbc_detach_inode().
*/
-void wbc_account_io(struct writeback_control *wbc, struct page *page,
- size_t bytes)
+void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
+ size_t bytes)
{
struct cgroup_subsys_state *css;
int id;
@@ -740,7 +739,7 @@
* behind a slow cgroup. Ultimately, we want pageout() to kick off
* regular writeback instead of writing things out itself.
*/
- if (!wbc->wb)
+ if (!wbc->wb || wbc->no_cgroup_owner)
return;
css = mem_cgroup_css_from_page(page);
@@ -766,7 +765,7 @@
else
wbc->wb_tcand_bytes -= min(bytes, wbc->wb_tcand_bytes);
}
-EXPORT_SYMBOL_GPL(wbc_account_io);
+EXPORT_SYMBOL_GPL(wbc_account_cgroup_owner);
/**
* inode_congested - test whether an inode is congested
@@ -856,7 +855,7 @@
restart:
rcu_read_lock();
list_for_each_entry_continue_rcu(wb, &bdi->wb_list, bdi_node) {
- DEFINE_WB_COMPLETION_ONSTACK(fallback_work_done);
+ DEFINE_WB_COMPLETION(fallback_work_done, bdi);
struct wb_writeback_work fallback_work;
struct wb_writeback_work *work;
long nr_pages;
@@ -885,6 +884,16 @@
continue;
}
+ /*
+ * If wb_tryget fails, the wb has been shutdown, skip it.
+ *
+ * Pin @wb so that it stays on @bdi->wb_list. This allows
+ * continuing iteration from @wb after dropping and
+ * regrabbing rcu read lock.
+ */
+ if (!wb_tryget(wb))
+ continue;
+
/* alloc failed, execute synchronously using on-stack fallback */
work = &fallback_work;
*work = *base_work;
@@ -893,23 +902,99 @@
work->done = &fallback_work_done;
wb_queue_work(wb, work);
-
- /*
- * Pin @wb so that it stays on @bdi->wb_list. This allows
- * continuing iteration from @wb after dropping and
- * regrabbing rcu read lock.
- */
- wb_get(wb);
last_wb = wb;
rcu_read_unlock();
- wb_wait_for_completion(bdi, &fallback_work_done);
+ wb_wait_for_completion(&fallback_work_done);
goto restart;
}
rcu_read_unlock();
if (last_wb)
wb_put(last_wb);
+}
+
+/**
+ * cgroup_writeback_by_id - initiate cgroup writeback from bdi and memcg IDs
+ * @bdi_id: target bdi id
+ * @memcg_id: target memcg css id
+ * @nr: number of pages to write, 0 for best-effort dirty flushing
+ * @reason: reason why some writeback work initiated
+ * @done: target wb_completion
+ *
+ * Initiate flush of the bdi_writeback identified by @bdi_id and @memcg_id
+ * with the specified parameters.
+ */
+int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr,
+ enum wb_reason reason, struct wb_completion *done)
+{
+ struct backing_dev_info *bdi;
+ struct cgroup_subsys_state *memcg_css;
+ struct bdi_writeback *wb;
+ struct wb_writeback_work *work;
+ int ret;
+
+ /* lookup bdi and memcg */
+ bdi = bdi_get_by_id(bdi_id);
+ if (!bdi)
+ return -ENOENT;
+
+ rcu_read_lock();
+ memcg_css = css_from_id(memcg_id, &memory_cgrp_subsys);
+ if (memcg_css && !css_tryget(memcg_css))
+ memcg_css = NULL;
+ rcu_read_unlock();
+ if (!memcg_css) {
+ ret = -ENOENT;
+ goto out_bdi_put;
+ }
+
+ /*
+ * And find the associated wb. If the wb isn't there already
+ * there's nothing to flush, don't create one.
+ */
+ wb = wb_get_lookup(bdi, memcg_css);
+ if (!wb) {
+ ret = -ENOENT;
+ goto out_css_put;
+ }
+
+ /*
+ * If @nr is zero, the caller is attempting to write out most of
+ * the currently dirty pages. Let's take the current dirty page
+ * count and inflate it by 25% which should be large enough to
+ * flush out most dirty pages while avoiding getting livelocked by
+ * concurrent dirtiers.
+ */
+ if (!nr) {
+ unsigned long filepages, headroom, dirty, writeback;
+
+ mem_cgroup_wb_stats(wb, &filepages, &headroom, &dirty,
+ &writeback);
+ nr = dirty * 10 / 8;
+ }
+
+ /* issue the writeback work */
+ work = kzalloc(sizeof(*work), GFP_NOWAIT | __GFP_NOWARN);
+ if (work) {
+ work->nr_pages = nr;
+ work->sync_mode = WB_SYNC_NONE;
+ work->range_cyclic = 1;
+ work->reason = reason;
+ work->done = done;
+ work->auto_free = 1;
+ wb_queue_work(wb, work);
+ ret = 0;
+ } else {
+ ret = -ENOMEM;
+ }
+
+ wb_put(wb);
+out_css_put:
+ css_put(memcg_css);
+out_bdi_put:
+ bdi_put(bdi);
+ return ret;
}
/**
@@ -995,7 +1080,6 @@
static unsigned long get_nr_dirty_pages(void)
{
return global_node_page_state(NR_FILE_DIRTY) +
- global_node_page_state(NR_UNSTABLE_NFS) +
get_nr_dirty_inodes();
}
@@ -1053,6 +1137,7 @@
spin_unlock(&inode->i_lock);
spin_unlock(&wb->list_lock);
}
+EXPORT_SYMBOL(inode_io_list_del);
/*
* mark an inode as under writeback on the sb
@@ -1568,11 +1653,12 @@
};
unsigned long start_time = jiffies;
long write_chunk;
- long wrote = 0; /* count both pages and inodes */
+ long total_wrote = 0; /* count both pages and inodes */
while (!list_empty(&wb->b_io)) {
struct inode *inode = wb_inode(wb->b_io.prev);
struct bdi_writeback *tmp_wb;
+ long wrote;
if (inode->i_sb != sb) {
if (work->sb) {
@@ -1648,7 +1734,9 @@
wbc_detach_inode(&wbc);
work->nr_pages -= write_chunk - wbc.nr_to_write;
- wrote += write_chunk - wbc.nr_to_write;
+ wrote = write_chunk - wbc.nr_to_write - wbc.pages_skipped;
+ wrote = wrote < 0 ? 0 : wrote;
+ total_wrote += wrote;
if (need_resched()) {
/*
@@ -1670,7 +1758,7 @@
tmp_wb = inode_to_wb_and_lock_list(inode);
spin_lock(&inode->i_lock);
if (!(inode->i_state & I_DIRTY_ALL))
- wrote++;
+ total_wrote++;
requeue_inode(inode, tmp_wb, &wbc);
inode_sync_complete(inode);
spin_unlock(&inode->i_lock);
@@ -1684,14 +1772,14 @@
* bail out to wb_writeback() often enough to check
* background threshold and other termination conditions.
*/
- if (wrote) {
+ if (total_wrote) {
if (time_is_before_jiffies(start_time + HZ / 10UL))
break;
if (work->nr_pages <= 0)
break;
}
}
- return wrote;
+ return total_wrote;
}
static long __writeback_inodes_wb(struct bdi_writeback *wb,
@@ -2110,7 +2198,7 @@
__initcall(start_dirtytime_writeback);
int dirtytime_interval_handler(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
+ void *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
@@ -2222,9 +2310,9 @@
wb = locked_inode_to_wb_and_lock_list(inode);
- WARN(bdi_cap_writeback_dirty(wb->bdi) &&
+ WARN((wb->bdi->capabilities & BDI_CAP_WRITEBACK) &&
!test_bit(WB_registered, &wb->state),
- "bdi-%s not registered\n", wb->bdi->name);
+ "bdi-%s not registered\n", bdi_dev_name(wb->bdi));
inode->dirtied_when = jiffies;
if (dirtytime)
@@ -2247,7 +2335,8 @@
* to make sure background write-back happens
* later.
*/
- if (bdi_cap_writeback_dirty(wb->bdi) && wakeup_bdi)
+ if (wakeup_bdi &&
+ (wb->bdi->capabilities & BDI_CAP_WRITEBACK))
wb_wakeup_delayed(wb);
return;
}
@@ -2255,7 +2344,7 @@
out_unlock_inode:
spin_unlock(&inode->i_lock);
}
-EXPORT_SYMBOL(__mark_inode_dirty);
+EXPORT_SYMBOL_NS(__mark_inode_dirty, ANDROID_GKI_VFS_EXPORT_ONLY);
/*
* The @s_sync_lock is used to serialise concurrent sync operations
@@ -2354,7 +2443,8 @@
static void __writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr,
enum wb_reason reason, bool skip_if_busy)
{
- DEFINE_WB_COMPLETION_ONSTACK(done);
+ struct backing_dev_info *bdi = sb->s_bdi;
+ DEFINE_WB_COMPLETION(done, bdi);
struct wb_writeback_work work = {
.sb = sb,
.sync_mode = WB_SYNC_NONE,
@@ -2363,14 +2453,13 @@
.nr_pages = nr,
.reason = reason,
};
- struct backing_dev_info *bdi = sb->s_bdi;
if (!bdi_has_dirty_io(bdi) || bdi == &noop_backing_dev_info)
return;
WARN_ON(!rwsem_is_locked(&sb->s_umount));
bdi_split_work_to_wbs(sb->s_bdi, &work, skip_if_busy);
- wb_wait_for_completion(bdi, &done);
+ wb_wait_for_completion(&done);
}
/**
@@ -2421,7 +2510,7 @@
__writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason, true);
up_read(&sb->s_umount);
}
-EXPORT_SYMBOL(try_to_writeback_inodes_sb);
+EXPORT_SYMBOL_NS(try_to_writeback_inodes_sb, ANDROID_GKI_VFS_EXPORT_ONLY);
/**
* sync_inodes_sb - sync sb inode pages
@@ -2432,7 +2521,8 @@
*/
void sync_inodes_sb(struct super_block *sb)
{
- DEFINE_WB_COMPLETION_ONSTACK(done);
+ struct backing_dev_info *bdi = sb->s_bdi;
+ DEFINE_WB_COMPLETION(done, bdi);
struct wb_writeback_work work = {
.sb = sb,
.sync_mode = WB_SYNC_ALL,
@@ -2442,7 +2532,6 @@
.reason = WB_REASON_SYNC,
.for_sync = 1,
};
- struct backing_dev_info *bdi = sb->s_bdi;
/*
* Can't skip on !bdi_has_dirty() because we should wait for !dirty
@@ -2456,7 +2545,7 @@
/* protect against inode wb switch, see inode_switch_wbs_work_fn() */
bdi_down_write_wb_switch_rwsem(bdi);
bdi_split_work_to_wbs(bdi, &work, false);
- wb_wait_for_completion(bdi, &done);
+ wb_wait_for_completion(&done);
bdi_up_write_wb_switch_rwsem(bdi);
wait_sb_inodes(sb);
@@ -2482,13 +2571,13 @@
.range_end = LLONG_MAX,
};
- if (!mapping_cap_writeback_dirty(inode->i_mapping))
+ if (!mapping_can_writeback(inode->i_mapping))
wbc.nr_to_write = 0;
might_sleep();
return writeback_single_inode(inode, &wbc);
}
-EXPORT_SYMBOL(write_inode_now);
+EXPORT_SYMBOL_NS(write_inode_now, ANDROID_GKI_VFS_EXPORT_ONLY);
/**
* sync_inode - write an inode and its pages to disk.
@@ -2525,4 +2614,4 @@
return sync_inode(inode, &wbc);
}
-EXPORT_SYMBOL(sync_inode_metadata);
+EXPORT_SYMBOL_NS(sync_inode_metadata, ANDROID_GKI_VFS_EXPORT_ONLY);
--
Gitblit v1.6.2