hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/mm/page_io.c
....@@ -25,25 +25,23 @@
2525 #include <linux/psi.h>
2626 #include <linux/uio.h>
2727 #include <linux/sched/task.h>
28
-#include <asm/pgtable.h>
28
+#include <trace/hooks/mm.h>
2929
3030 static struct bio *get_swap_bio(gfp_t gfp_flags,
3131 struct page *page, bio_end_io_t end_io)
3232 {
33
- int i, nr = hpage_nr_pages(page);
3433 struct bio *bio;
3534
36
- bio = bio_alloc(gfp_flags, nr);
35
+ bio = bio_alloc(gfp_flags, 1);
3736 if (bio) {
3837 struct block_device *bdev;
3938
4039 bio->bi_iter.bi_sector = map_swap_page(page, &bdev);
4140 bio_set_dev(bio, bdev);
41
+ bio->bi_iter.bi_sector <<= PAGE_SHIFT - 9;
4242 bio->bi_end_io = end_io;
4343
44
- for (i = 0; i < nr; i++)
45
- bio_add_page(bio, page + i, PAGE_SIZE, 0);
46
- VM_BUG_ON(bio->bi_iter.bi_size != PAGE_SIZE * nr);
44
+ bio_add_page(bio, page, thp_size(page), 0);
4745 }
4846 return bio;
4947 }
....@@ -63,62 +61,13 @@
6361 * Also clear PG_reclaim to avoid rotate_reclaimable_page()
6462 */
6563 set_page_dirty(page);
66
- pr_alert("Write-error on swap-device (%u:%u:%llu)\n",
67
- MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
68
- (unsigned long long)bio->bi_iter.bi_sector);
64
+ pr_alert_ratelimited("Write-error on swap-device (%u:%u:%llu)\n",
65
+ MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
66
+ (unsigned long long)bio->bi_iter.bi_sector);
6967 ClearPageReclaim(page);
7068 }
7169 end_page_writeback(page);
7270 bio_put(bio);
73
-}
74
-
75
-static void swap_slot_free_notify(struct page *page)
76
-{
77
- struct swap_info_struct *sis;
78
- struct gendisk *disk;
79
- swp_entry_t entry;
80
-
81
- /*
82
- * There is no guarantee that the page is in swap cache - the software
83
- * suspend code (at least) uses end_swap_bio_read() against a non-
84
- * swapcache page. So we must check PG_swapcache before proceeding with
85
- * this optimization.
86
- */
87
- if (unlikely(!PageSwapCache(page)))
88
- return;
89
-
90
- sis = page_swap_info(page);
91
- if (!(sis->flags & SWP_BLKDEV))
92
- return;
93
-
94
- /*
95
- * The swap subsystem performs lazy swap slot freeing,
96
- * expecting that the page will be swapped out again.
97
- * So we can avoid an unnecessary write if the page
98
- * isn't redirtied.
99
- * This is good for real swap storage because we can
100
- * reduce unnecessary I/O and enhance wear-leveling
101
- * if an SSD is used as the as swap device.
102
- * But if in-memory swap device (eg zram) is used,
103
- * this causes a duplicated copy between uncompressed
104
- * data in VM-owned memory and compressed data in
105
- * zram-owned memory. So let's free zram-owned memory
106
- * and make the VM-owned decompressed page *dirty*,
107
- * so the page should be swapped out somewhere again if
108
- * we again wish to reclaim it.
109
- */
110
- disk = sis->bdev->bd_disk;
111
- entry.val = page_private(page);
112
- if (disk->fops->swap_slot_free_notify &&
113
- __swap_count(sis, entry) == 1) {
114
- unsigned long offset;
115
-
116
- offset = swp_offset(entry);
117
-
118
- SetPageDirty(page);
119
- disk->fops->swap_slot_free_notify(sis->bdev,
120
- offset);
121
- }
12271 }
12372
12473 static void end_swap_bio_read(struct bio *bio)
....@@ -129,20 +78,21 @@
12978 if (bio->bi_status) {
13079 SetPageError(page);
13180 ClearPageUptodate(page);
132
- pr_alert("Read-error on swap-device (%u:%u:%llu)\n",
133
- MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
134
- (unsigned long long)bio->bi_iter.bi_sector);
81
+ pr_alert_ratelimited("Read-error on swap-device (%u:%u:%llu)\n",
82
+ MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
83
+ (unsigned long long)bio->bi_iter.bi_sector);
13584 goto out;
13685 }
13786
13887 SetPageUptodate(page);
139
- swap_slot_free_notify(page);
14088 out:
14189 unlock_page(page);
14290 WRITE_ONCE(bio->bi_private, NULL);
14391 bio_put(bio);
144
- wake_up_process(waiter);
145
- put_task_struct(waiter);
92
+ if (waiter) {
93
+ blk_wake_io_task(waiter);
94
+ put_task_struct(waiter);
95
+ }
14696 }
14797
14898 int generic_swapfile_activate(struct swap_info_struct *sis,
....@@ -165,7 +115,7 @@
165115 blocks_per_page = PAGE_SIZE >> blkbits;
166116
167117 /*
168
- * Map all the blocks into the extent list. This code doesn't try
118
+ * Map all the blocks into the extent tree. This code doesn't try
169119 * to be very smart.
170120 */
171121 probe_block = 0;
....@@ -178,8 +128,9 @@
178128
179129 cond_resched();
180130
181
- first_block = bmap(inode, probe_block);
182
- if (first_block == 0)
131
+ first_block = probe_block;
132
+ ret = bmap(inode, &first_block);
133
+ if (ret || !first_block)
183134 goto bad_bmap;
184135
185136 /*
....@@ -194,9 +145,11 @@
194145 block_in_page++) {
195146 sector_t block;
196147
197
- block = bmap(inode, probe_block + block_in_page);
198
- if (block == 0)
148
+ block = probe_block + block_in_page;
149
+ ret = bmap(inode, &block);
150
+ if (ret || !block)
199151 goto bad_bmap;
152
+
200153 if (block != first_block + block_in_page) {
201154 /* Discontiguity */
202155 probe_block++;
....@@ -251,6 +204,16 @@
251204 unlock_page(page);
252205 goto out;
253206 }
207
+ /*
208
+ * Arch code may have to preserve more data than just the page
209
+ * contents, e.g. memory tags.
210
+ */
211
+ ret = arch_prepare_to_swap(page);
212
+ if (ret) {
213
+ set_page_dirty(page);
214
+ unlock_page(page);
215
+ goto out;
216
+ }
254217 if (frontswap_store(page) == 0) {
255218 set_page_writeback(page);
256219 unlock_page(page);
....@@ -268,8 +231,25 @@
268231 if (unlikely(PageTransHuge(page)))
269232 count_vm_event(THP_SWPOUT);
270233 #endif
271
- count_vm_events(PSWPOUT, hpage_nr_pages(page));
234
+ count_vm_events(PSWPOUT, thp_nr_pages(page));
272235 }
236
+
237
+#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
238
+static void bio_associate_blkg_from_page(struct bio *bio, struct page *page)
239
+{
240
+ struct cgroup_subsys_state *css;
241
+
242
+ if (!page->mem_cgroup)
243
+ return;
244
+
245
+ rcu_read_lock();
246
+ css = cgroup_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys);
247
+ bio_associate_blkg_from_css(bio, css);
248
+ rcu_read_unlock();
249
+}
250
+#else
251
+#define bio_associate_blkg_from_page(bio, page) do { } while (0)
252
+#endif /* CONFIG_MEMCG && CONFIG_BLK_CGROUP */
273253
274254 int __swap_writepage(struct page *page, struct writeback_control *wbc,
275255 bio_end_io_t end_write_func)
....@@ -277,9 +257,10 @@
277257 struct bio *bio;
278258 int ret;
279259 struct swap_info_struct *sis = page_swap_info(page);
260
+ bool skip = false;
280261
281262 VM_BUG_ON_PAGE(!PageSwapCache(page), page);
282
- if (sis->flags & SWP_FILE) {
263
+ if (data_race(sis->flags & SWP_FS_OPS)) {
283264 struct kiocb kiocb;
284265 struct file *swap_file = sis->swap_file;
285266 struct address_space *mapping = swap_file->f_mapping;
....@@ -290,7 +271,7 @@
290271 };
291272 struct iov_iter from;
292273
293
- iov_iter_bvec(&from, ITER_BVEC | WRITE, &bv, 1, PAGE_SIZE);
274
+ iov_iter_bvec(&from, WRITE, &bv, 1, PAGE_SIZE);
294275 init_sync_kiocb(&kiocb, swap_file);
295276 kiocb.ki_pos = page_file_offset(page);
296277
....@@ -298,6 +279,7 @@
298279 unlock_page(page);
299280 ret = mapping->a_ops->direct_IO(&kiocb, &from);
300281 if (ret == PAGE_SIZE) {
282
+ trace_android_vh_count_pswpout(sis);
301283 count_vm_event(PSWPOUT);
302284 ret = 0;
303285 } else {
....@@ -320,29 +302,30 @@
320302 return ret;
321303 }
322304
323
- ret = bdev_write_page(sis->bdev, map_swap_page(page, &sis->bdev),
324
- page, wbc);
305
+ ret = bdev_write_page(sis->bdev, swap_page_sector(page), page, wbc);
325306 if (!ret) {
326
- count_swpout_vm_event(page);
307
+ trace_android_vh_count_swpout_vm_event(sis, page, &skip);
308
+ if (!skip)
309
+ count_swpout_vm_event(page);
327310 return 0;
328311 }
329312
330
- ret = 0;
331313 bio = get_swap_bio(GFP_NOIO, page, end_write_func);
332314 if (bio == NULL) {
333315 set_page_dirty(page);
334316 unlock_page(page);
335
- ret = -ENOMEM;
336
- goto out;
317
+ return -ENOMEM;
337318 }
338319 bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc);
339
- bio_associate_blkcg_from_page(bio, page);
340
- count_swpout_vm_event(page);
320
+ bio_associate_blkg_from_page(bio, page);
321
+ trace_android_vh_count_swpout_vm_event(sis, page, &skip);
322
+ if (!skip)
323
+ count_swpout_vm_event(page);
341324 set_page_writeback(page);
342325 unlock_page(page);
343326 submit_bio(bio);
344
-out:
345
- return ret;
327
+
328
+ return 0;
346329 }
347330
348331 int swap_readpage(struct page *page, bool synchronous)
....@@ -371,25 +354,25 @@
371354 goto out;
372355 }
373356
374
- if (sis->flags & SWP_FILE) {
357
+ if (data_race(sis->flags & SWP_FS_OPS)) {
375358 struct file *swap_file = sis->swap_file;
376359 struct address_space *mapping = swap_file->f_mapping;
377360
378361 ret = mapping->a_ops->readpage(swap_file, page);
379
- if (!ret)
362
+ if (!ret) {
363
+ trace_android_vh_count_pswpin(sis);
380364 count_vm_event(PSWPIN);
365
+ }
381366 goto out;
382367 }
383368
384
- ret = bdev_read_page(sis->bdev, map_swap_page(page, &sis->bdev), page);
385
- if (!ret) {
386
- if (trylock_page(page)) {
387
- swap_slot_free_notify(page);
388
- unlock_page(page);
369
+ if (sis->flags & SWP_SYNCHRONOUS_IO) {
370
+ ret = bdev_read_page(sis->bdev, swap_page_sector(page), page);
371
+ if (!ret) {
372
+ trace_android_vh_count_pswpin(sis);
373
+ count_vm_event(PSWPIN);
374
+ goto out;
389375 }
390
-
391
- count_vm_event(PSWPIN);
392
- goto out;
393376 }
394377
395378 ret = 0;
....@@ -404,9 +387,13 @@
404387 * Keep this task valid during swap readpage because the oom killer may
405388 * attempt to access it in the page fault retry time check.
406389 */
407
- get_task_struct(current);
408
- bio->bi_private = current;
409390 bio_set_op_attrs(bio, REQ_OP_READ, 0);
391
+ if (synchronous) {
392
+ bio->bi_opf |= REQ_HIPRI;
393
+ get_task_struct(current);
394
+ bio->bi_private = current;
395
+ }
396
+ trace_android_vh_count_pswpin(sis);
410397 count_vm_event(PSWPIN);
411398 bio_get(bio);
412399 qc = submit_bio(bio);
....@@ -415,8 +402,8 @@
415402 if (!READ_ONCE(bio->bi_private))
416403 break;
417404
418
- if (!blk_poll(disk->queue, qc))
419
- break;
405
+ if (!blk_poll(disk->queue, qc, true))
406
+ blk_io_schedule();
420407 }
421408 __set_current_state(TASK_RUNNING);
422409 bio_put(bio);
....@@ -430,7 +417,7 @@
430417 {
431418 struct swap_info_struct *sis = page_swap_info(page);
432419
433
- if (sis->flags & SWP_FILE) {
420
+ if (data_race(sis->flags & SWP_FS_OPS)) {
434421 struct address_space *mapping = sis->swap_file->f_mapping;
435422
436423 VM_BUG_ON_PAGE(!PageSwapCache(page), page);