| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * fs/fs-writeback.c |
|---|
| 3 | 4 | * |
|---|
| .. | .. |
|---|
| 35 | 36 | */ |
|---|
| 36 | 37 | #define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_SHIFT - 10)) |
|---|
| 37 | 38 | |
|---|
| 38 | | -struct wb_completion { |
|---|
| 39 | | - atomic_t cnt; |
|---|
| 40 | | -}; |
|---|
| 41 | | - |
|---|
| 42 | 39 | /* |
|---|
| 43 | 40 | * Passed into wb_writeback(), essentially a subset of writeback_control |
|---|
| 44 | 41 | */ |
|---|
| .. | .. |
|---|
| 57 | 54 | struct list_head list; /* pending work list */ |
|---|
| 58 | 55 | struct wb_completion *done; /* set if the caller waits */ |
|---|
| 59 | 56 | }; |
|---|
| 60 | | - |
|---|
| 61 | | -/* |
|---|
| 62 | | - * If one wants to wait for one or more wb_writeback_works, each work's |
|---|
| 63 | | - * ->done should be set to a wb_completion defined using the following |
|---|
| 64 | | - * macro. Once all work items are issued with wb_queue_work(), the caller |
|---|
| 65 | | - * can wait for the completion of all using wb_wait_for_completion(). Work |
|---|
| 66 | | - * items which are waited upon aren't freed automatically on completion. |
|---|
| 67 | | - */ |
|---|
| 68 | | -#define DEFINE_WB_COMPLETION_ONSTACK(cmpl) \ |
|---|
| 69 | | - struct wb_completion cmpl = { \ |
|---|
| 70 | | - .cnt = ATOMIC_INIT(1), \ |
|---|
| 71 | | - } |
|---|
| 72 | | - |
|---|
| 73 | 57 | |
|---|
| 74 | 58 | /* |
|---|
| 75 | 59 | * If an inode is constantly having its pages dirtied, but then the |
|---|
| .. | .. |
|---|
| 181 | 165 | |
|---|
| 182 | 166 | if (work->auto_free) |
|---|
| 183 | 167 | kfree(work); |
|---|
| 184 | | - if (done && atomic_dec_and_test(&done->cnt)) |
|---|
| 185 | | - wake_up_all(&wb->bdi->wb_waitq); |
|---|
| 168 | + if (done) { |
|---|
| 169 | + wait_queue_head_t *waitq = done->waitq; |
|---|
| 170 | + |
|---|
| 171 | + /* @done can't be accessed after the following dec */ |
|---|
| 172 | + if (atomic_dec_and_test(&done->cnt)) |
|---|
| 173 | + wake_up_all(waitq); |
|---|
| 174 | + } |
|---|
| 186 | 175 | } |
|---|
| 187 | 176 | |
|---|
| 188 | 177 | static void wb_queue_work(struct bdi_writeback *wb, |
|---|
| .. | .. |
|---|
| 206 | 195 | |
|---|
| 207 | 196 | /** |
|---|
| 208 | 197 | * wb_wait_for_completion - wait for completion of bdi_writeback_works |
|---|
| 209 | | - * @bdi: bdi work items were issued to |
|---|
| 210 | 198 | * @done: target wb_completion |
|---|
| 211 | 199 | * |
|---|
| 212 | 200 | * Wait for one or more work items issued to @bdi with their ->done field |
|---|
| 213 | | - * set to @done, which should have been defined with |
|---|
| 214 | | - * DEFINE_WB_COMPLETION_ONSTACK(). This function returns after all such |
|---|
| 215 | | - * work items are completed. Work items which are waited upon aren't freed |
|---|
| 201 | + * set to @done, which should have been initialized with |
|---|
| 202 | + * DEFINE_WB_COMPLETION(). This function returns after all such work items |
|---|
| 203 | + * are completed. Work items which are waited upon aren't freed |
|---|
| 216 | 204 | * automatically on completion. |
|---|
| 217 | 205 | */ |
|---|
| 218 | | -static void wb_wait_for_completion(struct backing_dev_info *bdi, |
|---|
| 219 | | - struct wb_completion *done) |
|---|
| 206 | +void wb_wait_for_completion(struct wb_completion *done) |
|---|
| 220 | 207 | { |
|---|
| 221 | 208 | atomic_dec(&done->cnt); /* put down the initial count */ |
|---|
| 222 | | - wait_event(bdi->wb_waitq, !atomic_read(&done->cnt)); |
|---|
| 209 | + wait_event(*done->waitq, !atomic_read(&done->cnt)); |
|---|
| 223 | 210 | } |
|---|
| 224 | 211 | |
|---|
| 225 | 212 | #ifdef CONFIG_CGROUP_WRITEBACK |
|---|
| 226 | 213 | |
|---|
| 227 | | -/* parameters for foreign inode detection, see wb_detach_inode() */ |
|---|
| 214 | +/* |
|---|
| 215 | + * Parameters for foreign inode detection, see wbc_detach_inode() to see |
|---|
| 216 | + * how they're used. |
|---|
| 217 | + * |
|---|
| 218 | + * These paramters are inherently heuristical as the detection target |
|---|
| 219 | + * itself is fuzzy. All we want to do is detaching an inode from the |
|---|
| 220 | + * current owner if it's being written to by some other cgroups too much. |
|---|
| 221 | + * |
|---|
| 222 | + * The current cgroup writeback is built on the assumption that multiple |
|---|
| 223 | + * cgroups writing to the same inode concurrently is very rare and a mode |
|---|
| 224 | + * of operation which isn't well supported. As such, the goal is not |
|---|
| 225 | + * taking too long when a different cgroup takes over an inode while |
|---|
| 226 | + * avoiding too aggressive flip-flops from occasional foreign writes. |
|---|
| 227 | + * |
|---|
| 228 | + * We record, very roughly, 2s worth of IO time history and if more than |
|---|
| 229 | + * half of that is foreign, trigger the switch. The recording is quantized |
|---|
| 230 | + * to 16 slots. To avoid tiny writes from swinging the decision too much, |
|---|
| 231 | + * writes smaller than 1/8 of avg size are ignored. |
|---|
| 232 | + */ |
|---|
| 228 | 233 | #define WB_FRN_TIME_SHIFT 13 /* 1s = 2^13, upto 8 secs w/ 16bit */ |
|---|
| 229 | 234 | #define WB_FRN_TIME_AVG_SHIFT 3 /* avg = avg * 7/8 + new * 1/8 */ |
|---|
| 230 | | -#define WB_FRN_TIME_CUT_DIV 2 /* ignore rounds < avg / 2 */ |
|---|
| 235 | +#define WB_FRN_TIME_CUT_DIV 8 /* ignore rounds < avg / 8 */ |
|---|
| 231 | 236 | #define WB_FRN_TIME_PERIOD (2 * (1 << WB_FRN_TIME_SHIFT)) /* 2s */ |
|---|
| 232 | 237 | |
|---|
| 233 | 238 | #define WB_FRN_HIST_SLOTS 16 /* inode->i_wb_frn_history is 16bit */ |
|---|
| .. | .. |
|---|
| 237 | 242 | /* if foreign slots >= 8, switch */ |
|---|
| 238 | 243 | #define WB_FRN_HIST_MAX_SLOTS (WB_FRN_HIST_THR_SLOTS / 2 + 1) |
|---|
| 239 | 244 | /* one round can affect upto 5 slots */ |
|---|
| 245 | +#define WB_FRN_MAX_IN_FLIGHT 1024 /* don't queue too many concurrently */ |
|---|
| 240 | 246 | |
|---|
| 241 | 247 | static atomic_t isw_nr_in_flight = ATOMIC_INIT(0); |
|---|
| 242 | 248 | static struct workqueue_struct *isw_wq; |
|---|
| .. | .. |
|---|
| 352 | 358 | struct address_space *mapping = inode->i_mapping; |
|---|
| 353 | 359 | struct bdi_writeback *old_wb = inode->i_wb; |
|---|
| 354 | 360 | struct bdi_writeback *new_wb = isw->new_wb; |
|---|
| 355 | | - struct radix_tree_iter iter; |
|---|
| 361 | + XA_STATE(xas, &mapping->i_pages, 0); |
|---|
| 362 | + struct page *page; |
|---|
| 356 | 363 | bool switched = false; |
|---|
| 357 | | - void **slot; |
|---|
| 358 | 364 | |
|---|
| 359 | 365 | /* |
|---|
| 360 | 366 | * If @inode switches cgwb membership while sync_inodes_sb() is |
|---|
| .. | .. |
|---|
| 389 | 395 | if (unlikely(inode->i_state & I_FREEING)) |
|---|
| 390 | 396 | goto skip_switch; |
|---|
| 391 | 397 | |
|---|
| 398 | + trace_inode_switch_wbs(inode, old_wb, new_wb); |
|---|
| 399 | + |
|---|
| 392 | 400 | /* |
|---|
| 393 | 401 | * Count and transfer stats. Note that PAGECACHE_TAG_DIRTY points |
|---|
| 394 | 402 | * to possibly dirty pages while PAGECACHE_TAG_WRITEBACK points to |
|---|
| 395 | 403 | * pages actually under writeback. |
|---|
| 396 | 404 | */ |
|---|
| 397 | | - radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, 0, |
|---|
| 398 | | - PAGECACHE_TAG_DIRTY) { |
|---|
| 399 | | - struct page *page = radix_tree_deref_slot_protected(slot, |
|---|
| 400 | | - &mapping->i_pages.xa_lock); |
|---|
| 401 | | - if (likely(page) && PageDirty(page)) { |
|---|
| 405 | + xas_for_each_marked(&xas, page, ULONG_MAX, PAGECACHE_TAG_DIRTY) { |
|---|
| 406 | + if (PageDirty(page)) { |
|---|
| 402 | 407 | dec_wb_stat(old_wb, WB_RECLAIMABLE); |
|---|
| 403 | 408 | inc_wb_stat(new_wb, WB_RECLAIMABLE); |
|---|
| 404 | 409 | } |
|---|
| 405 | 410 | } |
|---|
| 406 | 411 | |
|---|
| 407 | | - radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, 0, |
|---|
| 408 | | - PAGECACHE_TAG_WRITEBACK) { |
|---|
| 409 | | - struct page *page = radix_tree_deref_slot_protected(slot, |
|---|
| 410 | | - &mapping->i_pages.xa_lock); |
|---|
| 411 | | - if (likely(page)) { |
|---|
| 412 | | - WARN_ON_ONCE(!PageWriteback(page)); |
|---|
| 413 | | - dec_wb_stat(old_wb, WB_WRITEBACK); |
|---|
| 414 | | - inc_wb_stat(new_wb, WB_WRITEBACK); |
|---|
| 415 | | - } |
|---|
| 412 | + xas_set(&xas, 0); |
|---|
| 413 | + xas_for_each_marked(&xas, page, ULONG_MAX, PAGECACHE_TAG_WRITEBACK) { |
|---|
| 414 | + WARN_ON_ONCE(!PageWriteback(page)); |
|---|
| 415 | + dec_wb_stat(old_wb, WB_WRITEBACK); |
|---|
| 416 | + inc_wb_stat(new_wb, WB_WRITEBACK); |
|---|
| 416 | 417 | } |
|---|
| 417 | 418 | |
|---|
| 418 | 419 | wb_get(new_wb); |
|---|
| .. | .. |
|---|
| 496 | 497 | if (inode->i_state & I_WB_SWITCH) |
|---|
| 497 | 498 | return; |
|---|
| 498 | 499 | |
|---|
| 499 | | - /* |
|---|
| 500 | | - * Avoid starting new switches while sync_inodes_sb() is in |
|---|
| 501 | | - * progress. Otherwise, if the down_write protected issue path |
|---|
| 502 | | - * blocks heavily, we might end up starting a large number of |
|---|
| 503 | | - * switches which will block on the rwsem. |
|---|
| 504 | | - */ |
|---|
| 505 | | - if (!down_read_trylock(&bdi->wb_switch_rwsem)) |
|---|
| 500 | + /* avoid queueing a new switch if too many are already in flight */ |
|---|
| 501 | + if (atomic_read(&isw_nr_in_flight) > WB_FRN_MAX_IN_FLIGHT) |
|---|
| 506 | 502 | return; |
|---|
| 507 | 503 | |
|---|
| 508 | 504 | isw = kzalloc(sizeof(*isw), GFP_ATOMIC); |
|---|
| 509 | 505 | if (!isw) |
|---|
| 510 | | - goto out_unlock; |
|---|
| 506 | + return; |
|---|
| 507 | + |
|---|
| 508 | + atomic_inc(&isw_nr_in_flight); |
|---|
| 511 | 509 | |
|---|
| 512 | 510 | /* find and pin the new wb */ |
|---|
| 513 | 511 | rcu_read_lock(); |
|---|
| .. | .. |
|---|
| 544 | 542 | * Let's continue after I_WB_SWITCH is guaranteed to be visible. |
|---|
| 545 | 543 | */ |
|---|
| 546 | 544 | call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn); |
|---|
| 547 | | - |
|---|
| 548 | | - atomic_inc(&isw_nr_in_flight); |
|---|
| 549 | | - |
|---|
| 550 | | - goto out_unlock; |
|---|
| 545 | + return; |
|---|
| 551 | 546 | |
|---|
| 552 | 547 | out_free: |
|---|
| 548 | + atomic_dec(&isw_nr_in_flight); |
|---|
| 553 | 549 | if (isw->new_wb) |
|---|
| 554 | 550 | wb_put(isw->new_wb); |
|---|
| 555 | 551 | kfree(isw); |
|---|
| 556 | | -out_unlock: |
|---|
| 557 | | - up_read(&bdi->wb_switch_rwsem); |
|---|
| 558 | 552 | } |
|---|
| 559 | 553 | |
|---|
| 560 | 554 | /** |
|---|
| .. | .. |
|---|
| 598 | 592 | if (unlikely(wb_dying(wbc->wb) && !css_is_dying(wbc->wb->memcg_css))) |
|---|
| 599 | 593 | inode_switch_wbs(inode, wbc->wb_id); |
|---|
| 600 | 594 | } |
|---|
| 595 | +EXPORT_SYMBOL_GPL(wbc_attach_and_unlock_inode); |
|---|
| 601 | 596 | |
|---|
| 602 | 597 | /** |
|---|
| 603 | 598 | * wbc_detach_inode - disassociate wbc from inode and perform foreign detection |
|---|
| .. | .. |
|---|
| 695 | 690 | if (wbc->wb_id != max_id) |
|---|
| 696 | 691 | history |= (1U << slots) - 1; |
|---|
| 697 | 692 | |
|---|
| 693 | + if (history) |
|---|
| 694 | + trace_inode_foreign_history(inode, wbc, history); |
|---|
| 695 | + |
|---|
| 698 | 696 | /* |
|---|
| 699 | 697 | * Switch if the current wb isn't the consistent winner. |
|---|
| 700 | 698 | * If there are multiple closely competing dirtiers, the |
|---|
| .. | .. |
|---|
| 717 | 715 | wb_put(wbc->wb); |
|---|
| 718 | 716 | wbc->wb = NULL; |
|---|
| 719 | 717 | } |
|---|
| 718 | +EXPORT_SYMBOL_GPL(wbc_detach_inode); |
|---|
| 720 | 719 | |
|---|
| 721 | 720 | /** |
|---|
| 722 | | - * wbc_account_io - account IO issued during writeback |
|---|
| 721 | + * wbc_account_cgroup_owner - account writeback to update inode cgroup ownership |
|---|
| 723 | 722 | * @wbc: writeback_control of the writeback in progress |
|---|
| 724 | 723 | * @page: page being written out |
|---|
| 725 | 724 | * @bytes: number of bytes being written out |
|---|
| .. | .. |
|---|
| 728 | 727 | * controlled by @wbc. Keep the book for foreign inode detection. See |
|---|
| 729 | 728 | * wbc_detach_inode(). |
|---|
| 730 | 729 | */ |
|---|
| 731 | | -void wbc_account_io(struct writeback_control *wbc, struct page *page, |
|---|
| 732 | | - size_t bytes) |
|---|
| 730 | +void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page, |
|---|
| 731 | + size_t bytes) |
|---|
| 733 | 732 | { |
|---|
| 734 | 733 | struct cgroup_subsys_state *css; |
|---|
| 735 | 734 | int id; |
|---|
| .. | .. |
|---|
| 740 | 739 | * behind a slow cgroup. Ultimately, we want pageout() to kick off |
|---|
| 741 | 740 | * regular writeback instead of writing things out itself. |
|---|
| 742 | 741 | */ |
|---|
| 743 | | - if (!wbc->wb) |
|---|
| 742 | + if (!wbc->wb || wbc->no_cgroup_owner) |
|---|
| 744 | 743 | return; |
|---|
| 745 | 744 | |
|---|
| 746 | 745 | css = mem_cgroup_css_from_page(page); |
|---|
| .. | .. |
|---|
| 766 | 765 | else |
|---|
| 767 | 766 | wbc->wb_tcand_bytes -= min(bytes, wbc->wb_tcand_bytes); |
|---|
| 768 | 767 | } |
|---|
| 769 | | -EXPORT_SYMBOL_GPL(wbc_account_io); |
|---|
| 768 | +EXPORT_SYMBOL_GPL(wbc_account_cgroup_owner); |
|---|
| 770 | 769 | |
|---|
| 771 | 770 | /** |
|---|
| 772 | 771 | * inode_congested - test whether an inode is congested |
|---|
| .. | .. |
|---|
| 856 | 855 | restart: |
|---|
| 857 | 856 | rcu_read_lock(); |
|---|
| 858 | 857 | list_for_each_entry_continue_rcu(wb, &bdi->wb_list, bdi_node) { |
|---|
| 859 | | - DEFINE_WB_COMPLETION_ONSTACK(fallback_work_done); |
|---|
| 858 | + DEFINE_WB_COMPLETION(fallback_work_done, bdi); |
|---|
| 860 | 859 | struct wb_writeback_work fallback_work; |
|---|
| 861 | 860 | struct wb_writeback_work *work; |
|---|
| 862 | 861 | long nr_pages; |
|---|
| .. | .. |
|---|
| 903 | 902 | last_wb = wb; |
|---|
| 904 | 903 | |
|---|
| 905 | 904 | rcu_read_unlock(); |
|---|
| 906 | | - wb_wait_for_completion(bdi, &fallback_work_done); |
|---|
| 905 | + wb_wait_for_completion(&fallback_work_done); |
|---|
| 907 | 906 | goto restart; |
|---|
| 908 | 907 | } |
|---|
| 909 | 908 | rcu_read_unlock(); |
|---|
| 910 | 909 | |
|---|
| 911 | 910 | if (last_wb) |
|---|
| 912 | 911 | wb_put(last_wb); |
|---|
| 912 | +} |
|---|
| 913 | + |
|---|
| 914 | +/** |
|---|
| 915 | + * cgroup_writeback_by_id - initiate cgroup writeback from bdi and memcg IDs |
|---|
| 916 | + * @bdi_id: target bdi id |
|---|
| 917 | + * @memcg_id: target memcg css id |
|---|
| 918 | + * @nr: number of pages to write, 0 for best-effort dirty flushing |
|---|
| 919 | + * @reason: reason why some writeback work initiated |
|---|
| 920 | + * @done: target wb_completion |
|---|
| 921 | + * |
|---|
| 922 | + * Initiate flush of the bdi_writeback identified by @bdi_id and @memcg_id |
|---|
| 923 | + * with the specified parameters. |
|---|
| 924 | + */ |
|---|
| 925 | +int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr, |
|---|
| 926 | + enum wb_reason reason, struct wb_completion *done) |
|---|
| 927 | +{ |
|---|
| 928 | + struct backing_dev_info *bdi; |
|---|
| 929 | + struct cgroup_subsys_state *memcg_css; |
|---|
| 930 | + struct bdi_writeback *wb; |
|---|
| 931 | + struct wb_writeback_work *work; |
|---|
| 932 | + int ret; |
|---|
| 933 | + |
|---|
| 934 | + /* lookup bdi and memcg */ |
|---|
| 935 | + bdi = bdi_get_by_id(bdi_id); |
|---|
| 936 | + if (!bdi) |
|---|
| 937 | + return -ENOENT; |
|---|
| 938 | + |
|---|
| 939 | + rcu_read_lock(); |
|---|
| 940 | + memcg_css = css_from_id(memcg_id, &memory_cgrp_subsys); |
|---|
| 941 | + if (memcg_css && !css_tryget(memcg_css)) |
|---|
| 942 | + memcg_css = NULL; |
|---|
| 943 | + rcu_read_unlock(); |
|---|
| 944 | + if (!memcg_css) { |
|---|
| 945 | + ret = -ENOENT; |
|---|
| 946 | + goto out_bdi_put; |
|---|
| 947 | + } |
|---|
| 948 | + |
|---|
| 949 | + /* |
|---|
| 950 | + * And find the associated wb. If the wb isn't there already |
|---|
| 951 | + * there's nothing to flush, don't create one. |
|---|
| 952 | + */ |
|---|
| 953 | + wb = wb_get_lookup(bdi, memcg_css); |
|---|
| 954 | + if (!wb) { |
|---|
| 955 | + ret = -ENOENT; |
|---|
| 956 | + goto out_css_put; |
|---|
| 957 | + } |
|---|
| 958 | + |
|---|
| 959 | + /* |
|---|
| 960 | + * If @nr is zero, the caller is attempting to write out most of |
|---|
| 961 | + * the currently dirty pages. Let's take the current dirty page |
|---|
| 962 | + * count and inflate it by 25% which should be large enough to |
|---|
| 963 | + * flush out most dirty pages while avoiding getting livelocked by |
|---|
| 964 | + * concurrent dirtiers. |
|---|
| 965 | + */ |
|---|
| 966 | + if (!nr) { |
|---|
| 967 | + unsigned long filepages, headroom, dirty, writeback; |
|---|
| 968 | + |
|---|
| 969 | + mem_cgroup_wb_stats(wb, &filepages, &headroom, &dirty, |
|---|
| 970 | + &writeback); |
|---|
| 971 | + nr = dirty * 10 / 8; |
|---|
| 972 | + } |
|---|
| 973 | + |
|---|
| 974 | + /* issue the writeback work */ |
|---|
| 975 | + work = kzalloc(sizeof(*work), GFP_NOWAIT | __GFP_NOWARN); |
|---|
| 976 | + if (work) { |
|---|
| 977 | + work->nr_pages = nr; |
|---|
| 978 | + work->sync_mode = WB_SYNC_NONE; |
|---|
| 979 | + work->range_cyclic = 1; |
|---|
| 980 | + work->reason = reason; |
|---|
| 981 | + work->done = done; |
|---|
| 982 | + work->auto_free = 1; |
|---|
| 983 | + wb_queue_work(wb, work); |
|---|
| 984 | + ret = 0; |
|---|
| 985 | + } else { |
|---|
| 986 | + ret = -ENOMEM; |
|---|
| 987 | + } |
|---|
| 988 | + |
|---|
| 989 | + wb_put(wb); |
|---|
| 990 | +out_css_put: |
|---|
| 991 | + css_put(memcg_css); |
|---|
| 992 | +out_bdi_put: |
|---|
| 993 | + bdi_put(bdi); |
|---|
| 994 | + return ret; |
|---|
| 913 | 995 | } |
|---|
| 914 | 996 | |
|---|
| 915 | 997 | /** |
|---|
| .. | .. |
|---|
| 995 | 1077 | static unsigned long get_nr_dirty_pages(void) |
|---|
| 996 | 1078 | { |
|---|
| 997 | 1079 | return global_node_page_state(NR_FILE_DIRTY) + |
|---|
| 998 | | - global_node_page_state(NR_UNSTABLE_NFS) + |
|---|
| 999 | 1080 | get_nr_dirty_inodes(); |
|---|
| 1000 | 1081 | } |
|---|
| 1001 | 1082 | |
|---|
| .. | .. |
|---|
| 1053 | 1134 | spin_unlock(&inode->i_lock); |
|---|
| 1054 | 1135 | spin_unlock(&wb->list_lock); |
|---|
| 1055 | 1136 | } |
|---|
| 1137 | +EXPORT_SYMBOL(inode_io_list_del); |
|---|
| 1056 | 1138 | |
|---|
| 1057 | 1139 | /* |
|---|
| 1058 | 1140 | * mark an inode as under writeback on the sb |
|---|
| .. | .. |
|---|
| 1568 | 1650 | }; |
|---|
| 1569 | 1651 | unsigned long start_time = jiffies; |
|---|
| 1570 | 1652 | long write_chunk; |
|---|
| 1571 | | - long wrote = 0; /* count both pages and inodes */ |
|---|
| 1653 | + long total_wrote = 0; /* count both pages and inodes */ |
|---|
| 1572 | 1654 | |
|---|
| 1573 | 1655 | while (!list_empty(&wb->b_io)) { |
|---|
| 1574 | 1656 | struct inode *inode = wb_inode(wb->b_io.prev); |
|---|
| 1575 | 1657 | struct bdi_writeback *tmp_wb; |
|---|
| 1658 | + long wrote; |
|---|
| 1576 | 1659 | |
|---|
| 1577 | 1660 | if (inode->i_sb != sb) { |
|---|
| 1578 | 1661 | if (work->sb) { |
|---|
| .. | .. |
|---|
| 1648 | 1731 | |
|---|
| 1649 | 1732 | wbc_detach_inode(&wbc); |
|---|
| 1650 | 1733 | work->nr_pages -= write_chunk - wbc.nr_to_write; |
|---|
| 1651 | | - wrote += write_chunk - wbc.nr_to_write; |
|---|
| 1734 | + wrote = write_chunk - wbc.nr_to_write - wbc.pages_skipped; |
|---|
| 1735 | + wrote = wrote < 0 ? 0 : wrote; |
|---|
| 1736 | + total_wrote += wrote; |
|---|
| 1652 | 1737 | |
|---|
| 1653 | 1738 | if (need_resched()) { |
|---|
| 1654 | 1739 | /* |
|---|
| .. | .. |
|---|
| 1670 | 1755 | tmp_wb = inode_to_wb_and_lock_list(inode); |
|---|
| 1671 | 1756 | spin_lock(&inode->i_lock); |
|---|
| 1672 | 1757 | if (!(inode->i_state & I_DIRTY_ALL)) |
|---|
| 1673 | | - wrote++; |
|---|
| 1758 | + total_wrote++; |
|---|
| 1674 | 1759 | requeue_inode(inode, tmp_wb, &wbc); |
|---|
| 1675 | 1760 | inode_sync_complete(inode); |
|---|
| 1676 | 1761 | spin_unlock(&inode->i_lock); |
|---|
| .. | .. |
|---|
| 1684 | 1769 | * bail out to wb_writeback() often enough to check |
|---|
| 1685 | 1770 | * background threshold and other termination conditions. |
|---|
| 1686 | 1771 | */ |
|---|
| 1687 | | - if (wrote) { |
|---|
| 1772 | + if (total_wrote) { |
|---|
| 1688 | 1773 | if (time_is_before_jiffies(start_time + HZ / 10UL)) |
|---|
| 1689 | 1774 | break; |
|---|
| 1690 | 1775 | if (work->nr_pages <= 0) |
|---|
| 1691 | 1776 | break; |
|---|
| 1692 | 1777 | } |
|---|
| 1693 | 1778 | } |
|---|
| 1694 | | - return wrote; |
|---|
| 1779 | + return total_wrote; |
|---|
| 1695 | 1780 | } |
|---|
| 1696 | 1781 | |
|---|
| 1697 | 1782 | static long __writeback_inodes_wb(struct bdi_writeback *wb, |
|---|
| .. | .. |
|---|
| 2110 | 2195 | __initcall(start_dirtytime_writeback); |
|---|
| 2111 | 2196 | |
|---|
| 2112 | 2197 | int dirtytime_interval_handler(struct ctl_table *table, int write, |
|---|
| 2113 | | - void __user *buffer, size_t *lenp, loff_t *ppos) |
|---|
| 2198 | + void *buffer, size_t *lenp, loff_t *ppos) |
|---|
| 2114 | 2199 | { |
|---|
| 2115 | 2200 | int ret; |
|---|
| 2116 | 2201 | |
|---|
| .. | .. |
|---|
| 2222 | 2307 | |
|---|
| 2223 | 2308 | wb = locked_inode_to_wb_and_lock_list(inode); |
|---|
| 2224 | 2309 | |
|---|
| 2225 | | - WARN(bdi_cap_writeback_dirty(wb->bdi) && |
|---|
| 2310 | + WARN((wb->bdi->capabilities & BDI_CAP_WRITEBACK) && |
|---|
| 2226 | 2311 | !test_bit(WB_registered, &wb->state), |
|---|
| 2227 | | - "bdi-%s not registered\n", wb->bdi->name); |
|---|
| 2312 | + "bdi-%s not registered\n", bdi_dev_name(wb->bdi)); |
|---|
| 2228 | 2313 | |
|---|
| 2229 | 2314 | inode->dirtied_when = jiffies; |
|---|
| 2230 | 2315 | if (dirtytime) |
|---|
| .. | .. |
|---|
| 2247 | 2332 | * to make sure background write-back happens |
|---|
| 2248 | 2333 | * later. |
|---|
| 2249 | 2334 | */ |
|---|
| 2250 | | - if (bdi_cap_writeback_dirty(wb->bdi) && wakeup_bdi) |
|---|
| 2335 | + if (wakeup_bdi && |
|---|
| 2336 | + (wb->bdi->capabilities & BDI_CAP_WRITEBACK)) |
|---|
| 2251 | 2337 | wb_wakeup_delayed(wb); |
|---|
| 2252 | 2338 | return; |
|---|
| 2253 | 2339 | } |
|---|
| .. | .. |
|---|
| 2255 | 2341 | out_unlock_inode: |
|---|
| 2256 | 2342 | spin_unlock(&inode->i_lock); |
|---|
| 2257 | 2343 | } |
|---|
| 2258 | | -EXPORT_SYMBOL(__mark_inode_dirty); |
|---|
| 2344 | +EXPORT_SYMBOL_NS(__mark_inode_dirty, ANDROID_GKI_VFS_EXPORT_ONLY); |
|---|
| 2259 | 2345 | |
|---|
| 2260 | 2346 | /* |
|---|
| 2261 | 2347 | * The @s_sync_lock is used to serialise concurrent sync operations |
|---|
| .. | .. |
|---|
| 2354 | 2440 | static void __writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr, |
|---|
| 2355 | 2441 | enum wb_reason reason, bool skip_if_busy) |
|---|
| 2356 | 2442 | { |
|---|
| 2357 | | - DEFINE_WB_COMPLETION_ONSTACK(done); |
|---|
| 2443 | + struct backing_dev_info *bdi = sb->s_bdi; |
|---|
| 2444 | + DEFINE_WB_COMPLETION(done, bdi); |
|---|
| 2358 | 2445 | struct wb_writeback_work work = { |
|---|
| 2359 | 2446 | .sb = sb, |
|---|
| 2360 | 2447 | .sync_mode = WB_SYNC_NONE, |
|---|
| .. | .. |
|---|
| 2363 | 2450 | .nr_pages = nr, |
|---|
| 2364 | 2451 | .reason = reason, |
|---|
| 2365 | 2452 | }; |
|---|
| 2366 | | - struct backing_dev_info *bdi = sb->s_bdi; |
|---|
| 2367 | 2453 | |
|---|
| 2368 | 2454 | if (!bdi_has_dirty_io(bdi) || bdi == &noop_backing_dev_info) |
|---|
| 2369 | 2455 | return; |
|---|
| 2370 | 2456 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
|---|
| 2371 | 2457 | |
|---|
| 2372 | 2458 | bdi_split_work_to_wbs(sb->s_bdi, &work, skip_if_busy); |
|---|
| 2373 | | - wb_wait_for_completion(bdi, &done); |
|---|
| 2459 | + wb_wait_for_completion(&done); |
|---|
| 2374 | 2460 | } |
|---|
| 2375 | 2461 | |
|---|
| 2376 | 2462 | /** |
|---|
| .. | .. |
|---|
| 2421 | 2507 | __writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason, true); |
|---|
| 2422 | 2508 | up_read(&sb->s_umount); |
|---|
| 2423 | 2509 | } |
|---|
| 2424 | | -EXPORT_SYMBOL(try_to_writeback_inodes_sb); |
|---|
| 2510 | +EXPORT_SYMBOL_NS(try_to_writeback_inodes_sb, ANDROID_GKI_VFS_EXPORT_ONLY); |
|---|
| 2425 | 2511 | |
|---|
| 2426 | 2512 | /** |
|---|
| 2427 | 2513 | * sync_inodes_sb - sync sb inode pages |
|---|
| .. | .. |
|---|
| 2432 | 2518 | */ |
|---|
| 2433 | 2519 | void sync_inodes_sb(struct super_block *sb) |
|---|
| 2434 | 2520 | { |
|---|
| 2435 | | - DEFINE_WB_COMPLETION_ONSTACK(done); |
|---|
| 2521 | + struct backing_dev_info *bdi = sb->s_bdi; |
|---|
| 2522 | + DEFINE_WB_COMPLETION(done, bdi); |
|---|
| 2436 | 2523 | struct wb_writeback_work work = { |
|---|
| 2437 | 2524 | .sb = sb, |
|---|
| 2438 | 2525 | .sync_mode = WB_SYNC_ALL, |
|---|
| .. | .. |
|---|
| 2442 | 2529 | .reason = WB_REASON_SYNC, |
|---|
| 2443 | 2530 | .for_sync = 1, |
|---|
| 2444 | 2531 | }; |
|---|
| 2445 | | - struct backing_dev_info *bdi = sb->s_bdi; |
|---|
| 2446 | 2532 | |
|---|
| 2447 | 2533 | /* |
|---|
| 2448 | 2534 | * Can't skip on !bdi_has_dirty() because we should wait for !dirty |
|---|
| .. | .. |
|---|
| 2456 | 2542 | /* protect against inode wb switch, see inode_switch_wbs_work_fn() */ |
|---|
| 2457 | 2543 | bdi_down_write_wb_switch_rwsem(bdi); |
|---|
| 2458 | 2544 | bdi_split_work_to_wbs(bdi, &work, false); |
|---|
| 2459 | | - wb_wait_for_completion(bdi, &done); |
|---|
| 2545 | + wb_wait_for_completion(&done); |
|---|
| 2460 | 2546 | bdi_up_write_wb_switch_rwsem(bdi); |
|---|
| 2461 | 2547 | |
|---|
| 2462 | 2548 | wait_sb_inodes(sb); |
|---|
| .. | .. |
|---|
| 2482 | 2568 | .range_end = LLONG_MAX, |
|---|
| 2483 | 2569 | }; |
|---|
| 2484 | 2570 | |
|---|
| 2485 | | - if (!mapping_cap_writeback_dirty(inode->i_mapping)) |
|---|
| 2571 | + if (!mapping_can_writeback(inode->i_mapping)) |
|---|
| 2486 | 2572 | wbc.nr_to_write = 0; |
|---|
| 2487 | 2573 | |
|---|
| 2488 | 2574 | might_sleep(); |
|---|
| 2489 | 2575 | return writeback_single_inode(inode, &wbc); |
|---|
| 2490 | 2576 | } |
|---|
| 2491 | | -EXPORT_SYMBOL(write_inode_now); |
|---|
| 2577 | +EXPORT_SYMBOL_NS(write_inode_now, ANDROID_GKI_VFS_EXPORT_ONLY); |
|---|
| 2492 | 2578 | |
|---|
| 2493 | 2579 | /** |
|---|
| 2494 | 2580 | * sync_inode - write an inode and its pages to disk. |
|---|
| .. | .. |
|---|
| 2525 | 2611 | |
|---|
| 2526 | 2612 | return sync_inode(inode, &wbc); |
|---|
| 2527 | 2613 | } |
|---|
| 2528 | | -EXPORT_SYMBOL(sync_inode_metadata); |
|---|
| 2614 | +EXPORT_SYMBOL_NS(sync_inode_metadata, ANDROID_GKI_VFS_EXPORT_ONLY); |
|---|