.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * fs/fs-writeback.c |
---|
3 | 4 | * |
---|
.. | .. |
---|
35 | 36 | */ |
---|
36 | 37 | #define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_SHIFT - 10)) |
---|
37 | 38 | |
---|
38 | | -struct wb_completion { |
---|
39 | | - atomic_t cnt; |
---|
40 | | -}; |
---|
41 | | - |
---|
42 | 39 | /* |
---|
43 | 40 | * Passed into wb_writeback(), essentially a subset of writeback_control |
---|
44 | 41 | */ |
---|
.. | .. |
---|
57 | 54 | struct list_head list; /* pending work list */ |
---|
58 | 55 | struct wb_completion *done; /* set if the caller waits */ |
---|
59 | 56 | }; |
---|
60 | | - |
---|
61 | | -/* |
---|
62 | | - * If one wants to wait for one or more wb_writeback_works, each work's |
---|
63 | | - * ->done should be set to a wb_completion defined using the following |
---|
64 | | - * macro. Once all work items are issued with wb_queue_work(), the caller |
---|
65 | | - * can wait for the completion of all using wb_wait_for_completion(). Work |
---|
66 | | - * items which are waited upon aren't freed automatically on completion. |
---|
67 | | - */ |
---|
68 | | -#define DEFINE_WB_COMPLETION_ONSTACK(cmpl) \ |
---|
69 | | - struct wb_completion cmpl = { \ |
---|
70 | | - .cnt = ATOMIC_INIT(1), \ |
---|
71 | | - } |
---|
72 | | - |
---|
73 | 57 | |
---|
74 | 58 | /* |
---|
75 | 59 | * If an inode is constantly having its pages dirtied, but then the |
---|
.. | .. |
---|
181 | 165 | |
---|
182 | 166 | if (work->auto_free) |
---|
183 | 167 | kfree(work); |
---|
184 | | - if (done && atomic_dec_and_test(&done->cnt)) |
---|
185 | | - wake_up_all(&wb->bdi->wb_waitq); |
---|
| 168 | + if (done) { |
---|
| 169 | + wait_queue_head_t *waitq = done->waitq; |
---|
| 170 | + |
---|
| 171 | + /* @done can't be accessed after the following dec */ |
---|
| 172 | + if (atomic_dec_and_test(&done->cnt)) |
---|
| 173 | + wake_up_all(waitq); |
---|
| 174 | + } |
---|
186 | 175 | } |
---|
187 | 176 | |
---|
188 | 177 | static void wb_queue_work(struct bdi_writeback *wb, |
---|
.. | .. |
---|
206 | 195 | |
---|
207 | 196 | /** |
---|
208 | 197 | * wb_wait_for_completion - wait for completion of bdi_writeback_works |
---|
209 | | - * @bdi: bdi work items were issued to |
---|
210 | 198 | * @done: target wb_completion |
---|
211 | 199 | * |
---|
212 | 200 | * Wait for one or more work items issued to @bdi with their ->done field |
---|
213 | | - * set to @done, which should have been defined with |
---|
214 | | - * DEFINE_WB_COMPLETION_ONSTACK(). This function returns after all such |
---|
215 | | - * work items are completed. Work items which are waited upon aren't freed |
---|
| 201 | + * set to @done, which should have been initialized with |
---|
| 202 | + * DEFINE_WB_COMPLETION(). This function returns after all such work items |
---|
| 203 | + * are completed. Work items which are waited upon aren't freed |
---|
216 | 204 | * automatically on completion. |
---|
217 | 205 | */ |
---|
218 | | -static void wb_wait_for_completion(struct backing_dev_info *bdi, |
---|
219 | | - struct wb_completion *done) |
---|
| 206 | +void wb_wait_for_completion(struct wb_completion *done) |
---|
220 | 207 | { |
---|
221 | 208 | atomic_dec(&done->cnt); /* put down the initial count */ |
---|
222 | | - wait_event(bdi->wb_waitq, !atomic_read(&done->cnt)); |
---|
| 209 | + wait_event(*done->waitq, !atomic_read(&done->cnt)); |
---|
223 | 210 | } |
---|
224 | 211 | |
---|
225 | 212 | #ifdef CONFIG_CGROUP_WRITEBACK |
---|
226 | 213 | |
---|
227 | | -/* parameters for foreign inode detection, see wb_detach_inode() */ |
---|
| 214 | +/* |
---|
| 215 | + * Parameters for foreign inode detection, see wbc_detach_inode() to see |
---|
| 216 | + * how they're used. |
---|
| 217 | + * |
---|
| 218 | + * These paramters are inherently heuristical as the detection target |
---|
| 219 | + * itself is fuzzy. All we want to do is detaching an inode from the |
---|
| 220 | + * current owner if it's being written to by some other cgroups too much. |
---|
| 221 | + * |
---|
| 222 | + * The current cgroup writeback is built on the assumption that multiple |
---|
| 223 | + * cgroups writing to the same inode concurrently is very rare and a mode |
---|
| 224 | + * of operation which isn't well supported. As such, the goal is not |
---|
| 225 | + * taking too long when a different cgroup takes over an inode while |
---|
| 226 | + * avoiding too aggressive flip-flops from occasional foreign writes. |
---|
| 227 | + * |
---|
| 228 | + * We record, very roughly, 2s worth of IO time history and if more than |
---|
| 229 | + * half of that is foreign, trigger the switch. The recording is quantized |
---|
| 230 | + * to 16 slots. To avoid tiny writes from swinging the decision too much, |
---|
| 231 | + * writes smaller than 1/8 of avg size are ignored. |
---|
| 232 | + */ |
---|
228 | 233 | #define WB_FRN_TIME_SHIFT 13 /* 1s = 2^13, upto 8 secs w/ 16bit */ |
---|
229 | 234 | #define WB_FRN_TIME_AVG_SHIFT 3 /* avg = avg * 7/8 + new * 1/8 */ |
---|
230 | | -#define WB_FRN_TIME_CUT_DIV 2 /* ignore rounds < avg / 2 */ |
---|
| 235 | +#define WB_FRN_TIME_CUT_DIV 8 /* ignore rounds < avg / 8 */ |
---|
231 | 236 | #define WB_FRN_TIME_PERIOD (2 * (1 << WB_FRN_TIME_SHIFT)) /* 2s */ |
---|
232 | 237 | |
---|
233 | 238 | #define WB_FRN_HIST_SLOTS 16 /* inode->i_wb_frn_history is 16bit */ |
---|
.. | .. |
---|
237 | 242 | /* if foreign slots >= 8, switch */ |
---|
238 | 243 | #define WB_FRN_HIST_MAX_SLOTS (WB_FRN_HIST_THR_SLOTS / 2 + 1) |
---|
239 | 244 | /* one round can affect upto 5 slots */ |
---|
| 245 | +#define WB_FRN_MAX_IN_FLIGHT 1024 /* don't queue too many concurrently */ |
---|
240 | 246 | |
---|
241 | 247 | static atomic_t isw_nr_in_flight = ATOMIC_INIT(0); |
---|
242 | 248 | static struct workqueue_struct *isw_wq; |
---|
.. | .. |
---|
352 | 358 | struct address_space *mapping = inode->i_mapping; |
---|
353 | 359 | struct bdi_writeback *old_wb = inode->i_wb; |
---|
354 | 360 | struct bdi_writeback *new_wb = isw->new_wb; |
---|
355 | | - struct radix_tree_iter iter; |
---|
| 361 | + XA_STATE(xas, &mapping->i_pages, 0); |
---|
| 362 | + struct page *page; |
---|
356 | 363 | bool switched = false; |
---|
357 | | - void **slot; |
---|
358 | 364 | |
---|
359 | 365 | /* |
---|
360 | 366 | * If @inode switches cgwb membership while sync_inodes_sb() is |
---|
.. | .. |
---|
389 | 395 | if (unlikely(inode->i_state & I_FREEING)) |
---|
390 | 396 | goto skip_switch; |
---|
391 | 397 | |
---|
| 398 | + trace_inode_switch_wbs(inode, old_wb, new_wb); |
---|
| 399 | + |
---|
392 | 400 | /* |
---|
393 | 401 | * Count and transfer stats. Note that PAGECACHE_TAG_DIRTY points |
---|
394 | 402 | * to possibly dirty pages while PAGECACHE_TAG_WRITEBACK points to |
---|
395 | 403 | * pages actually under writeback. |
---|
396 | 404 | */ |
---|
397 | | - radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, 0, |
---|
398 | | - PAGECACHE_TAG_DIRTY) { |
---|
399 | | - struct page *page = radix_tree_deref_slot_protected(slot, |
---|
400 | | - &mapping->i_pages.xa_lock); |
---|
401 | | - if (likely(page) && PageDirty(page)) { |
---|
| 405 | + xas_for_each_marked(&xas, page, ULONG_MAX, PAGECACHE_TAG_DIRTY) { |
---|
| 406 | + if (PageDirty(page)) { |
---|
402 | 407 | dec_wb_stat(old_wb, WB_RECLAIMABLE); |
---|
403 | 408 | inc_wb_stat(new_wb, WB_RECLAIMABLE); |
---|
404 | 409 | } |
---|
405 | 410 | } |
---|
406 | 411 | |
---|
407 | | - radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, 0, |
---|
408 | | - PAGECACHE_TAG_WRITEBACK) { |
---|
409 | | - struct page *page = radix_tree_deref_slot_protected(slot, |
---|
410 | | - &mapping->i_pages.xa_lock); |
---|
411 | | - if (likely(page)) { |
---|
412 | | - WARN_ON_ONCE(!PageWriteback(page)); |
---|
413 | | - dec_wb_stat(old_wb, WB_WRITEBACK); |
---|
414 | | - inc_wb_stat(new_wb, WB_WRITEBACK); |
---|
415 | | - } |
---|
| 412 | + xas_set(&xas, 0); |
---|
| 413 | + xas_for_each_marked(&xas, page, ULONG_MAX, PAGECACHE_TAG_WRITEBACK) { |
---|
| 414 | + WARN_ON_ONCE(!PageWriteback(page)); |
---|
| 415 | + dec_wb_stat(old_wb, WB_WRITEBACK); |
---|
| 416 | + inc_wb_stat(new_wb, WB_WRITEBACK); |
---|
416 | 417 | } |
---|
417 | 418 | |
---|
418 | 419 | wb_get(new_wb); |
---|
.. | .. |
---|
496 | 497 | if (inode->i_state & I_WB_SWITCH) |
---|
497 | 498 | return; |
---|
498 | 499 | |
---|
499 | | - /* |
---|
500 | | - * Avoid starting new switches while sync_inodes_sb() is in |
---|
501 | | - * progress. Otherwise, if the down_write protected issue path |
---|
502 | | - * blocks heavily, we might end up starting a large number of |
---|
503 | | - * switches which will block on the rwsem. |
---|
504 | | - */ |
---|
505 | | - if (!down_read_trylock(&bdi->wb_switch_rwsem)) |
---|
| 500 | + /* avoid queueing a new switch if too many are already in flight */ |
---|
| 501 | + if (atomic_read(&isw_nr_in_flight) > WB_FRN_MAX_IN_FLIGHT) |
---|
506 | 502 | return; |
---|
507 | 503 | |
---|
508 | 504 | isw = kzalloc(sizeof(*isw), GFP_ATOMIC); |
---|
509 | 505 | if (!isw) |
---|
510 | | - goto out_unlock; |
---|
| 506 | + return; |
---|
| 507 | + |
---|
| 508 | + atomic_inc(&isw_nr_in_flight); |
---|
511 | 509 | |
---|
512 | 510 | /* find and pin the new wb */ |
---|
513 | 511 | rcu_read_lock(); |
---|
.. | .. |
---|
544 | 542 | * Let's continue after I_WB_SWITCH is guaranteed to be visible. |
---|
545 | 543 | */ |
---|
546 | 544 | call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn); |
---|
547 | | - |
---|
548 | | - atomic_inc(&isw_nr_in_flight); |
---|
549 | | - |
---|
550 | | - goto out_unlock; |
---|
| 545 | + return; |
---|
551 | 546 | |
---|
552 | 547 | out_free: |
---|
| 548 | + atomic_dec(&isw_nr_in_flight); |
---|
553 | 549 | if (isw->new_wb) |
---|
554 | 550 | wb_put(isw->new_wb); |
---|
555 | 551 | kfree(isw); |
---|
556 | | -out_unlock: |
---|
557 | | - up_read(&bdi->wb_switch_rwsem); |
---|
558 | 552 | } |
---|
559 | 553 | |
---|
560 | 554 | /** |
---|
.. | .. |
---|
598 | 592 | if (unlikely(wb_dying(wbc->wb) && !css_is_dying(wbc->wb->memcg_css))) |
---|
599 | 593 | inode_switch_wbs(inode, wbc->wb_id); |
---|
600 | 594 | } |
---|
| 595 | +EXPORT_SYMBOL_GPL(wbc_attach_and_unlock_inode); |
---|
601 | 596 | |
---|
602 | 597 | /** |
---|
603 | 598 | * wbc_detach_inode - disassociate wbc from inode and perform foreign detection |
---|
.. | .. |
---|
695 | 690 | if (wbc->wb_id != max_id) |
---|
696 | 691 | history |= (1U << slots) - 1; |
---|
697 | 692 | |
---|
| 693 | + if (history) |
---|
| 694 | + trace_inode_foreign_history(inode, wbc, history); |
---|
| 695 | + |
---|
698 | 696 | /* |
---|
699 | 697 | * Switch if the current wb isn't the consistent winner. |
---|
700 | 698 | * If there are multiple closely competing dirtiers, the |
---|
.. | .. |
---|
702 | 700 | * is okay. The main goal is avoiding keeping an inode on |
---|
703 | 701 | * the wrong wb for an extended period of time. |
---|
704 | 702 | */ |
---|
705 | | - if (hweight32(history) > WB_FRN_HIST_THR_SLOTS) |
---|
| 703 | + if (hweight16(history) > WB_FRN_HIST_THR_SLOTS) |
---|
706 | 704 | inode_switch_wbs(inode, max_id); |
---|
707 | 705 | } |
---|
708 | 706 | |
---|
.. | .. |
---|
717 | 715 | wb_put(wbc->wb); |
---|
718 | 716 | wbc->wb = NULL; |
---|
719 | 717 | } |
---|
| 718 | +EXPORT_SYMBOL_GPL(wbc_detach_inode); |
---|
720 | 719 | |
---|
721 | 720 | /** |
---|
722 | | - * wbc_account_io - account IO issued during writeback |
---|
| 721 | + * wbc_account_cgroup_owner - account writeback to update inode cgroup ownership |
---|
723 | 722 | * @wbc: writeback_control of the writeback in progress |
---|
724 | 723 | * @page: page being written out |
---|
725 | 724 | * @bytes: number of bytes being written out |
---|
.. | .. |
---|
728 | 727 | * controlled by @wbc. Keep the book for foreign inode detection. See |
---|
729 | 728 | * wbc_detach_inode(). |
---|
730 | 729 | */ |
---|
731 | | -void wbc_account_io(struct writeback_control *wbc, struct page *page, |
---|
732 | | - size_t bytes) |
---|
| 730 | +void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page, |
---|
| 731 | + size_t bytes) |
---|
733 | 732 | { |
---|
734 | 733 | struct cgroup_subsys_state *css; |
---|
735 | 734 | int id; |
---|
.. | .. |
---|
740 | 739 | * behind a slow cgroup. Ultimately, we want pageout() to kick off |
---|
741 | 740 | * regular writeback instead of writing things out itself. |
---|
742 | 741 | */ |
---|
743 | | - if (!wbc->wb) |
---|
| 742 | + if (!wbc->wb || wbc->no_cgroup_owner) |
---|
744 | 743 | return; |
---|
745 | 744 | |
---|
746 | 745 | css = mem_cgroup_css_from_page(page); |
---|
.. | .. |
---|
766 | 765 | else |
---|
767 | 766 | wbc->wb_tcand_bytes -= min(bytes, wbc->wb_tcand_bytes); |
---|
768 | 767 | } |
---|
769 | | -EXPORT_SYMBOL_GPL(wbc_account_io); |
---|
| 768 | +EXPORT_SYMBOL_GPL(wbc_account_cgroup_owner); |
---|
770 | 769 | |
---|
771 | 770 | /** |
---|
772 | 771 | * inode_congested - test whether an inode is congested |
---|
.. | .. |
---|
856 | 855 | restart: |
---|
857 | 856 | rcu_read_lock(); |
---|
858 | 857 | list_for_each_entry_continue_rcu(wb, &bdi->wb_list, bdi_node) { |
---|
859 | | - DEFINE_WB_COMPLETION_ONSTACK(fallback_work_done); |
---|
| 858 | + DEFINE_WB_COMPLETION(fallback_work_done, bdi); |
---|
860 | 859 | struct wb_writeback_work fallback_work; |
---|
861 | 860 | struct wb_writeback_work *work; |
---|
862 | 861 | long nr_pages; |
---|
.. | .. |
---|
885 | 884 | continue; |
---|
886 | 885 | } |
---|
887 | 886 | |
---|
| 887 | + /* |
---|
| 888 | + * If wb_tryget fails, the wb has been shutdown, skip it. |
---|
| 889 | + * |
---|
| 890 | + * Pin @wb so that it stays on @bdi->wb_list. This allows |
---|
| 891 | + * continuing iteration from @wb after dropping and |
---|
| 892 | + * regrabbing rcu read lock. |
---|
| 893 | + */ |
---|
| 894 | + if (!wb_tryget(wb)) |
---|
| 895 | + continue; |
---|
| 896 | + |
---|
888 | 897 | /* alloc failed, execute synchronously using on-stack fallback */ |
---|
889 | 898 | work = &fallback_work; |
---|
890 | 899 | *work = *base_work; |
---|
.. | .. |
---|
893 | 902 | work->done = &fallback_work_done; |
---|
894 | 903 | |
---|
895 | 904 | wb_queue_work(wb, work); |
---|
896 | | - |
---|
897 | | - /* |
---|
898 | | - * Pin @wb so that it stays on @bdi->wb_list. This allows |
---|
899 | | - * continuing iteration from @wb after dropping and |
---|
900 | | - * regrabbing rcu read lock. |
---|
901 | | - */ |
---|
902 | | - wb_get(wb); |
---|
903 | 905 | last_wb = wb; |
---|
904 | 906 | |
---|
905 | 907 | rcu_read_unlock(); |
---|
906 | | - wb_wait_for_completion(bdi, &fallback_work_done); |
---|
| 908 | + wb_wait_for_completion(&fallback_work_done); |
---|
907 | 909 | goto restart; |
---|
908 | 910 | } |
---|
909 | 911 | rcu_read_unlock(); |
---|
910 | 912 | |
---|
911 | 913 | if (last_wb) |
---|
912 | 914 | wb_put(last_wb); |
---|
| 915 | +} |
---|
| 916 | + |
---|
| 917 | +/** |
---|
| 918 | + * cgroup_writeback_by_id - initiate cgroup writeback from bdi and memcg IDs |
---|
| 919 | + * @bdi_id: target bdi id |
---|
| 920 | + * @memcg_id: target memcg css id |
---|
| 921 | + * @nr: number of pages to write, 0 for best-effort dirty flushing |
---|
| 922 | + * @reason: reason why some writeback work initiated |
---|
| 923 | + * @done: target wb_completion |
---|
| 924 | + * |
---|
| 925 | + * Initiate flush of the bdi_writeback identified by @bdi_id and @memcg_id |
---|
| 926 | + * with the specified parameters. |
---|
| 927 | + */ |
---|
| 928 | +int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr, |
---|
| 929 | + enum wb_reason reason, struct wb_completion *done) |
---|
| 930 | +{ |
---|
| 931 | + struct backing_dev_info *bdi; |
---|
| 932 | + struct cgroup_subsys_state *memcg_css; |
---|
| 933 | + struct bdi_writeback *wb; |
---|
| 934 | + struct wb_writeback_work *work; |
---|
| 935 | + int ret; |
---|
| 936 | + |
---|
| 937 | + /* lookup bdi and memcg */ |
---|
| 938 | + bdi = bdi_get_by_id(bdi_id); |
---|
| 939 | + if (!bdi) |
---|
| 940 | + return -ENOENT; |
---|
| 941 | + |
---|
| 942 | + rcu_read_lock(); |
---|
| 943 | + memcg_css = css_from_id(memcg_id, &memory_cgrp_subsys); |
---|
| 944 | + if (memcg_css && !css_tryget(memcg_css)) |
---|
| 945 | + memcg_css = NULL; |
---|
| 946 | + rcu_read_unlock(); |
---|
| 947 | + if (!memcg_css) { |
---|
| 948 | + ret = -ENOENT; |
---|
| 949 | + goto out_bdi_put; |
---|
| 950 | + } |
---|
| 951 | + |
---|
| 952 | + /* |
---|
| 953 | + * And find the associated wb. If the wb isn't there already |
---|
| 954 | + * there's nothing to flush, don't create one. |
---|
| 955 | + */ |
---|
| 956 | + wb = wb_get_lookup(bdi, memcg_css); |
---|
| 957 | + if (!wb) { |
---|
| 958 | + ret = -ENOENT; |
---|
| 959 | + goto out_css_put; |
---|
| 960 | + } |
---|
| 961 | + |
---|
| 962 | + /* |
---|
| 963 | + * If @nr is zero, the caller is attempting to write out most of |
---|
| 964 | + * the currently dirty pages. Let's take the current dirty page |
---|
| 965 | + * count and inflate it by 25% which should be large enough to |
---|
| 966 | + * flush out most dirty pages while avoiding getting livelocked by |
---|
| 967 | + * concurrent dirtiers. |
---|
| 968 | + */ |
---|
| 969 | + if (!nr) { |
---|
| 970 | + unsigned long filepages, headroom, dirty, writeback; |
---|
| 971 | + |
---|
| 972 | + mem_cgroup_wb_stats(wb, &filepages, &headroom, &dirty, |
---|
| 973 | + &writeback); |
---|
| 974 | + nr = dirty * 10 / 8; |
---|
| 975 | + } |
---|
| 976 | + |
---|
| 977 | + /* issue the writeback work */ |
---|
| 978 | + work = kzalloc(sizeof(*work), GFP_NOWAIT | __GFP_NOWARN); |
---|
| 979 | + if (work) { |
---|
| 980 | + work->nr_pages = nr; |
---|
| 981 | + work->sync_mode = WB_SYNC_NONE; |
---|
| 982 | + work->range_cyclic = 1; |
---|
| 983 | + work->reason = reason; |
---|
| 984 | + work->done = done; |
---|
| 985 | + work->auto_free = 1; |
---|
| 986 | + wb_queue_work(wb, work); |
---|
| 987 | + ret = 0; |
---|
| 988 | + } else { |
---|
| 989 | + ret = -ENOMEM; |
---|
| 990 | + } |
---|
| 991 | + |
---|
| 992 | + wb_put(wb); |
---|
| 993 | +out_css_put: |
---|
| 994 | + css_put(memcg_css); |
---|
| 995 | +out_bdi_put: |
---|
| 996 | + bdi_put(bdi); |
---|
| 997 | + return ret; |
---|
913 | 998 | } |
---|
914 | 999 | |
---|
915 | 1000 | /** |
---|
.. | .. |
---|
995 | 1080 | static unsigned long get_nr_dirty_pages(void) |
---|
996 | 1081 | { |
---|
997 | 1082 | return global_node_page_state(NR_FILE_DIRTY) + |
---|
998 | | - global_node_page_state(NR_UNSTABLE_NFS) + |
---|
999 | 1083 | get_nr_dirty_inodes(); |
---|
1000 | 1084 | } |
---|
1001 | 1085 | |
---|
.. | .. |
---|
1053 | 1137 | spin_unlock(&inode->i_lock); |
---|
1054 | 1138 | spin_unlock(&wb->list_lock); |
---|
1055 | 1139 | } |
---|
| 1140 | +EXPORT_SYMBOL(inode_io_list_del); |
---|
1056 | 1141 | |
---|
1057 | 1142 | /* |
---|
1058 | 1143 | * mark an inode as under writeback on the sb |
---|
.. | .. |
---|
1568 | 1653 | }; |
---|
1569 | 1654 | unsigned long start_time = jiffies; |
---|
1570 | 1655 | long write_chunk; |
---|
1571 | | - long wrote = 0; /* count both pages and inodes */ |
---|
| 1656 | + long total_wrote = 0; /* count both pages and inodes */ |
---|
1572 | 1657 | |
---|
1573 | 1658 | while (!list_empty(&wb->b_io)) { |
---|
1574 | 1659 | struct inode *inode = wb_inode(wb->b_io.prev); |
---|
1575 | 1660 | struct bdi_writeback *tmp_wb; |
---|
| 1661 | + long wrote; |
---|
1576 | 1662 | |
---|
1577 | 1663 | if (inode->i_sb != sb) { |
---|
1578 | 1664 | if (work->sb) { |
---|
.. | .. |
---|
1648 | 1734 | |
---|
1649 | 1735 | wbc_detach_inode(&wbc); |
---|
1650 | 1736 | work->nr_pages -= write_chunk - wbc.nr_to_write; |
---|
1651 | | - wrote += write_chunk - wbc.nr_to_write; |
---|
| 1737 | + wrote = write_chunk - wbc.nr_to_write - wbc.pages_skipped; |
---|
| 1738 | + wrote = wrote < 0 ? 0 : wrote; |
---|
| 1739 | + total_wrote += wrote; |
---|
1652 | 1740 | |
---|
1653 | 1741 | if (need_resched()) { |
---|
1654 | 1742 | /* |
---|
.. | .. |
---|
1670 | 1758 | tmp_wb = inode_to_wb_and_lock_list(inode); |
---|
1671 | 1759 | spin_lock(&inode->i_lock); |
---|
1672 | 1760 | if (!(inode->i_state & I_DIRTY_ALL)) |
---|
1673 | | - wrote++; |
---|
| 1761 | + total_wrote++; |
---|
1674 | 1762 | requeue_inode(inode, tmp_wb, &wbc); |
---|
1675 | 1763 | inode_sync_complete(inode); |
---|
1676 | 1764 | spin_unlock(&inode->i_lock); |
---|
.. | .. |
---|
1684 | 1772 | * bail out to wb_writeback() often enough to check |
---|
1685 | 1773 | * background threshold and other termination conditions. |
---|
1686 | 1774 | */ |
---|
1687 | | - if (wrote) { |
---|
| 1775 | + if (total_wrote) { |
---|
1688 | 1776 | if (time_is_before_jiffies(start_time + HZ / 10UL)) |
---|
1689 | 1777 | break; |
---|
1690 | 1778 | if (work->nr_pages <= 0) |
---|
1691 | 1779 | break; |
---|
1692 | 1780 | } |
---|
1693 | 1781 | } |
---|
1694 | | - return wrote; |
---|
| 1782 | + return total_wrote; |
---|
1695 | 1783 | } |
---|
1696 | 1784 | |
---|
1697 | 1785 | static long __writeback_inodes_wb(struct bdi_writeback *wb, |
---|
.. | .. |
---|
2110 | 2198 | __initcall(start_dirtytime_writeback); |
---|
2111 | 2199 | |
---|
2112 | 2200 | int dirtytime_interval_handler(struct ctl_table *table, int write, |
---|
2113 | | - void __user *buffer, size_t *lenp, loff_t *ppos) |
---|
| 2201 | + void *buffer, size_t *lenp, loff_t *ppos) |
---|
2114 | 2202 | { |
---|
2115 | 2203 | int ret; |
---|
2116 | 2204 | |
---|
.. | .. |
---|
2222 | 2310 | |
---|
2223 | 2311 | wb = locked_inode_to_wb_and_lock_list(inode); |
---|
2224 | 2312 | |
---|
2225 | | - WARN(bdi_cap_writeback_dirty(wb->bdi) && |
---|
| 2313 | + WARN((wb->bdi->capabilities & BDI_CAP_WRITEBACK) && |
---|
2226 | 2314 | !test_bit(WB_registered, &wb->state), |
---|
2227 | | - "bdi-%s not registered\n", wb->bdi->name); |
---|
| 2315 | + "bdi-%s not registered\n", bdi_dev_name(wb->bdi)); |
---|
2228 | 2316 | |
---|
2229 | 2317 | inode->dirtied_when = jiffies; |
---|
2230 | 2318 | if (dirtytime) |
---|
.. | .. |
---|
2247 | 2335 | * to make sure background write-back happens |
---|
2248 | 2336 | * later. |
---|
2249 | 2337 | */ |
---|
2250 | | - if (bdi_cap_writeback_dirty(wb->bdi) && wakeup_bdi) |
---|
| 2338 | + if (wakeup_bdi && |
---|
| 2339 | + (wb->bdi->capabilities & BDI_CAP_WRITEBACK)) |
---|
2251 | 2340 | wb_wakeup_delayed(wb); |
---|
2252 | 2341 | return; |
---|
2253 | 2342 | } |
---|
.. | .. |
---|
2255 | 2344 | out_unlock_inode: |
---|
2256 | 2345 | spin_unlock(&inode->i_lock); |
---|
2257 | 2346 | } |
---|
2258 | | -EXPORT_SYMBOL(__mark_inode_dirty); |
---|
| 2347 | +EXPORT_SYMBOL_NS(__mark_inode_dirty, ANDROID_GKI_VFS_EXPORT_ONLY); |
---|
2259 | 2348 | |
---|
2260 | 2349 | /* |
---|
2261 | 2350 | * The @s_sync_lock is used to serialise concurrent sync operations |
---|
.. | .. |
---|
2354 | 2443 | static void __writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr, |
---|
2355 | 2444 | enum wb_reason reason, bool skip_if_busy) |
---|
2356 | 2445 | { |
---|
2357 | | - DEFINE_WB_COMPLETION_ONSTACK(done); |
---|
| 2446 | + struct backing_dev_info *bdi = sb->s_bdi; |
---|
| 2447 | + DEFINE_WB_COMPLETION(done, bdi); |
---|
2358 | 2448 | struct wb_writeback_work work = { |
---|
2359 | 2449 | .sb = sb, |
---|
2360 | 2450 | .sync_mode = WB_SYNC_NONE, |
---|
.. | .. |
---|
2363 | 2453 | .nr_pages = nr, |
---|
2364 | 2454 | .reason = reason, |
---|
2365 | 2455 | }; |
---|
2366 | | - struct backing_dev_info *bdi = sb->s_bdi; |
---|
2367 | 2456 | |
---|
2368 | 2457 | if (!bdi_has_dirty_io(bdi) || bdi == &noop_backing_dev_info) |
---|
2369 | 2458 | return; |
---|
2370 | 2459 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
---|
2371 | 2460 | |
---|
2372 | 2461 | bdi_split_work_to_wbs(sb->s_bdi, &work, skip_if_busy); |
---|
2373 | | - wb_wait_for_completion(bdi, &done); |
---|
| 2462 | + wb_wait_for_completion(&done); |
---|
2374 | 2463 | } |
---|
2375 | 2464 | |
---|
2376 | 2465 | /** |
---|
.. | .. |
---|
2421 | 2510 | __writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason, true); |
---|
2422 | 2511 | up_read(&sb->s_umount); |
---|
2423 | 2512 | } |
---|
2424 | | -EXPORT_SYMBOL(try_to_writeback_inodes_sb); |
---|
| 2513 | +EXPORT_SYMBOL_NS(try_to_writeback_inodes_sb, ANDROID_GKI_VFS_EXPORT_ONLY); |
---|
2425 | 2514 | |
---|
2426 | 2515 | /** |
---|
2427 | 2516 | * sync_inodes_sb - sync sb inode pages |
---|
.. | .. |
---|
2432 | 2521 | */ |
---|
2433 | 2522 | void sync_inodes_sb(struct super_block *sb) |
---|
2434 | 2523 | { |
---|
2435 | | - DEFINE_WB_COMPLETION_ONSTACK(done); |
---|
| 2524 | + struct backing_dev_info *bdi = sb->s_bdi; |
---|
| 2525 | + DEFINE_WB_COMPLETION(done, bdi); |
---|
2436 | 2526 | struct wb_writeback_work work = { |
---|
2437 | 2527 | .sb = sb, |
---|
2438 | 2528 | .sync_mode = WB_SYNC_ALL, |
---|
.. | .. |
---|
2442 | 2532 | .reason = WB_REASON_SYNC, |
---|
2443 | 2533 | .for_sync = 1, |
---|
2444 | 2534 | }; |
---|
2445 | | - struct backing_dev_info *bdi = sb->s_bdi; |
---|
2446 | 2535 | |
---|
2447 | 2536 | /* |
---|
2448 | 2537 | * Can't skip on !bdi_has_dirty() because we should wait for !dirty |
---|
.. | .. |
---|
2456 | 2545 | /* protect against inode wb switch, see inode_switch_wbs_work_fn() */ |
---|
2457 | 2546 | bdi_down_write_wb_switch_rwsem(bdi); |
---|
2458 | 2547 | bdi_split_work_to_wbs(bdi, &work, false); |
---|
2459 | | - wb_wait_for_completion(bdi, &done); |
---|
| 2548 | + wb_wait_for_completion(&done); |
---|
2460 | 2549 | bdi_up_write_wb_switch_rwsem(bdi); |
---|
2461 | 2550 | |
---|
2462 | 2551 | wait_sb_inodes(sb); |
---|
.. | .. |
---|
2482 | 2571 | .range_end = LLONG_MAX, |
---|
2483 | 2572 | }; |
---|
2484 | 2573 | |
---|
2485 | | - if (!mapping_cap_writeback_dirty(inode->i_mapping)) |
---|
| 2574 | + if (!mapping_can_writeback(inode->i_mapping)) |
---|
2486 | 2575 | wbc.nr_to_write = 0; |
---|
2487 | 2576 | |
---|
2488 | 2577 | might_sleep(); |
---|
2489 | 2578 | return writeback_single_inode(inode, &wbc); |
---|
2490 | 2579 | } |
---|
2491 | | -EXPORT_SYMBOL(write_inode_now); |
---|
| 2580 | +EXPORT_SYMBOL_NS(write_inode_now, ANDROID_GKI_VFS_EXPORT_ONLY); |
---|
2492 | 2581 | |
---|
2493 | 2582 | /** |
---|
2494 | 2583 | * sync_inode - write an inode and its pages to disk. |
---|
.. | .. |
---|
2525 | 2614 | |
---|
2526 | 2615 | return sync_inode(inode, &wbc); |
---|
2527 | 2616 | } |
---|
2528 | | -EXPORT_SYMBOL(sync_inode_metadata); |
---|
| 2617 | +EXPORT_SYMBOL_NS(sync_inode_metadata, ANDROID_GKI_VFS_EXPORT_ONLY); |
---|