.. | .. |
---|
25 | 25 | #include <linux/psi.h> |
---|
26 | 26 | #include <linux/uio.h> |
---|
27 | 27 | #include <linux/sched/task.h> |
---|
28 | | -#include <asm/pgtable.h> |
---|
| 28 | +#include <trace/hooks/mm.h> |
---|
29 | 29 | |
---|
30 | 30 | static struct bio *get_swap_bio(gfp_t gfp_flags, |
---|
31 | 31 | struct page *page, bio_end_io_t end_io) |
---|
32 | 32 | { |
---|
33 | | - int i, nr = hpage_nr_pages(page); |
---|
34 | 33 | struct bio *bio; |
---|
35 | 34 | |
---|
36 | | - bio = bio_alloc(gfp_flags, nr); |
---|
| 35 | + bio = bio_alloc(gfp_flags, 1); |
---|
37 | 36 | if (bio) { |
---|
38 | 37 | struct block_device *bdev; |
---|
39 | 38 | |
---|
40 | 39 | bio->bi_iter.bi_sector = map_swap_page(page, &bdev); |
---|
41 | 40 | bio_set_dev(bio, bdev); |
---|
| 41 | + bio->bi_iter.bi_sector <<= PAGE_SHIFT - 9; |
---|
42 | 42 | bio->bi_end_io = end_io; |
---|
43 | 43 | |
---|
44 | | - for (i = 0; i < nr; i++) |
---|
45 | | - bio_add_page(bio, page + i, PAGE_SIZE, 0); |
---|
46 | | - VM_BUG_ON(bio->bi_iter.bi_size != PAGE_SIZE * nr); |
---|
| 44 | + bio_add_page(bio, page, thp_size(page), 0); |
---|
47 | 45 | } |
---|
48 | 46 | return bio; |
---|
49 | 47 | } |
---|
.. | .. |
---|
63 | 61 | * Also clear PG_reclaim to avoid rotate_reclaimable_page() |
---|
64 | 62 | */ |
---|
65 | 63 | set_page_dirty(page); |
---|
66 | | - pr_alert("Write-error on swap-device (%u:%u:%llu)\n", |
---|
67 | | - MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)), |
---|
68 | | - (unsigned long long)bio->bi_iter.bi_sector); |
---|
| 64 | + pr_alert_ratelimited("Write-error on swap-device (%u:%u:%llu)\n", |
---|
| 65 | + MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)), |
---|
| 66 | + (unsigned long long)bio->bi_iter.bi_sector); |
---|
69 | 67 | ClearPageReclaim(page); |
---|
70 | 68 | } |
---|
71 | 69 | end_page_writeback(page); |
---|
72 | 70 | bio_put(bio); |
---|
73 | | -} |
---|
74 | | - |
---|
75 | | -static void swap_slot_free_notify(struct page *page) |
---|
76 | | -{ |
---|
77 | | - struct swap_info_struct *sis; |
---|
78 | | - struct gendisk *disk; |
---|
79 | | - swp_entry_t entry; |
---|
80 | | - |
---|
81 | | - /* |
---|
82 | | - * There is no guarantee that the page is in swap cache - the software |
---|
83 | | - * suspend code (at least) uses end_swap_bio_read() against a non- |
---|
84 | | - * swapcache page. So we must check PG_swapcache before proceeding with |
---|
85 | | - * this optimization. |
---|
86 | | - */ |
---|
87 | | - if (unlikely(!PageSwapCache(page))) |
---|
88 | | - return; |
---|
89 | | - |
---|
90 | | - sis = page_swap_info(page); |
---|
91 | | - if (!(sis->flags & SWP_BLKDEV)) |
---|
92 | | - return; |
---|
93 | | - |
---|
94 | | - /* |
---|
95 | | - * The swap subsystem performs lazy swap slot freeing, |
---|
96 | | - * expecting that the page will be swapped out again. |
---|
97 | | - * So we can avoid an unnecessary write if the page |
---|
98 | | - * isn't redirtied. |
---|
99 | | - * This is good for real swap storage because we can |
---|
100 | | - * reduce unnecessary I/O and enhance wear-leveling |
---|
101 | | - * if an SSD is used as the as swap device. |
---|
102 | | - * But if in-memory swap device (eg zram) is used, |
---|
103 | | - * this causes a duplicated copy between uncompressed |
---|
104 | | - * data in VM-owned memory and compressed data in |
---|
105 | | - * zram-owned memory. So let's free zram-owned memory |
---|
106 | | - * and make the VM-owned decompressed page *dirty*, |
---|
107 | | - * so the page should be swapped out somewhere again if |
---|
108 | | - * we again wish to reclaim it. |
---|
109 | | - */ |
---|
110 | | - disk = sis->bdev->bd_disk; |
---|
111 | | - entry.val = page_private(page); |
---|
112 | | - if (disk->fops->swap_slot_free_notify && |
---|
113 | | - __swap_count(sis, entry) == 1) { |
---|
114 | | - unsigned long offset; |
---|
115 | | - |
---|
116 | | - offset = swp_offset(entry); |
---|
117 | | - |
---|
118 | | - SetPageDirty(page); |
---|
119 | | - disk->fops->swap_slot_free_notify(sis->bdev, |
---|
120 | | - offset); |
---|
121 | | - } |
---|
122 | 71 | } |
---|
123 | 72 | |
---|
124 | 73 | static void end_swap_bio_read(struct bio *bio) |
---|
.. | .. |
---|
129 | 78 | if (bio->bi_status) { |
---|
130 | 79 | SetPageError(page); |
---|
131 | 80 | ClearPageUptodate(page); |
---|
132 | | - pr_alert("Read-error on swap-device (%u:%u:%llu)\n", |
---|
133 | | - MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)), |
---|
134 | | - (unsigned long long)bio->bi_iter.bi_sector); |
---|
| 81 | + pr_alert_ratelimited("Read-error on swap-device (%u:%u:%llu)\n", |
---|
| 82 | + MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)), |
---|
| 83 | + (unsigned long long)bio->bi_iter.bi_sector); |
---|
135 | 84 | goto out; |
---|
136 | 85 | } |
---|
137 | 86 | |
---|
138 | 87 | SetPageUptodate(page); |
---|
139 | | - swap_slot_free_notify(page); |
---|
140 | 88 | out: |
---|
141 | 89 | unlock_page(page); |
---|
142 | 90 | WRITE_ONCE(bio->bi_private, NULL); |
---|
143 | 91 | bio_put(bio); |
---|
144 | | - wake_up_process(waiter); |
---|
145 | | - put_task_struct(waiter); |
---|
| 92 | + if (waiter) { |
---|
| 93 | + blk_wake_io_task(waiter); |
---|
| 94 | + put_task_struct(waiter); |
---|
| 95 | + } |
---|
146 | 96 | } |
---|
147 | 97 | |
---|
148 | 98 | int generic_swapfile_activate(struct swap_info_struct *sis, |
---|
.. | .. |
---|
165 | 115 | blocks_per_page = PAGE_SIZE >> blkbits; |
---|
166 | 116 | |
---|
167 | 117 | /* |
---|
168 | | - * Map all the blocks into the extent list. This code doesn't try |
---|
| 118 | + * Map all the blocks into the extent tree. This code doesn't try |
---|
169 | 119 | * to be very smart. |
---|
170 | 120 | */ |
---|
171 | 121 | probe_block = 0; |
---|
.. | .. |
---|
178 | 128 | |
---|
179 | 129 | cond_resched(); |
---|
180 | 130 | |
---|
181 | | - first_block = bmap(inode, probe_block); |
---|
182 | | - if (first_block == 0) |
---|
| 131 | + first_block = probe_block; |
---|
| 132 | + ret = bmap(inode, &first_block); |
---|
| 133 | + if (ret || !first_block) |
---|
183 | 134 | goto bad_bmap; |
---|
184 | 135 | |
---|
185 | 136 | /* |
---|
.. | .. |
---|
194 | 145 | block_in_page++) { |
---|
195 | 146 | sector_t block; |
---|
196 | 147 | |
---|
197 | | - block = bmap(inode, probe_block + block_in_page); |
---|
198 | | - if (block == 0) |
---|
| 148 | + block = probe_block + block_in_page; |
---|
| 149 | + ret = bmap(inode, &block); |
---|
| 150 | + if (ret || !block) |
---|
199 | 151 | goto bad_bmap; |
---|
| 152 | + |
---|
200 | 153 | if (block != first_block + block_in_page) { |
---|
201 | 154 | /* Discontiguity */ |
---|
202 | 155 | probe_block++; |
---|
.. | .. |
---|
251 | 204 | unlock_page(page); |
---|
252 | 205 | goto out; |
---|
253 | 206 | } |
---|
| 207 | + /* |
---|
| 208 | + * Arch code may have to preserve more data than just the page |
---|
| 209 | + * contents, e.g. memory tags. |
---|
| 210 | + */ |
---|
| 211 | + ret = arch_prepare_to_swap(page); |
---|
| 212 | + if (ret) { |
---|
| 213 | + set_page_dirty(page); |
---|
| 214 | + unlock_page(page); |
---|
| 215 | + goto out; |
---|
| 216 | + } |
---|
254 | 217 | if (frontswap_store(page) == 0) { |
---|
255 | 218 | set_page_writeback(page); |
---|
256 | 219 | unlock_page(page); |
---|
.. | .. |
---|
268 | 231 | if (unlikely(PageTransHuge(page))) |
---|
269 | 232 | count_vm_event(THP_SWPOUT); |
---|
270 | 233 | #endif |
---|
271 | | - count_vm_events(PSWPOUT, hpage_nr_pages(page)); |
---|
| 234 | + count_vm_events(PSWPOUT, thp_nr_pages(page)); |
---|
272 | 235 | } |
---|
| 236 | + |
---|
| 237 | +#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) |
---|
| 238 | +static void bio_associate_blkg_from_page(struct bio *bio, struct page *page) |
---|
| 239 | +{ |
---|
| 240 | + struct cgroup_subsys_state *css; |
---|
| 241 | + |
---|
| 242 | + if (!page->mem_cgroup) |
---|
| 243 | + return; |
---|
| 244 | + |
---|
| 245 | + rcu_read_lock(); |
---|
| 246 | + css = cgroup_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys); |
---|
| 247 | + bio_associate_blkg_from_css(bio, css); |
---|
| 248 | + rcu_read_unlock(); |
---|
| 249 | +} |
---|
| 250 | +#else |
---|
| 251 | +#define bio_associate_blkg_from_page(bio, page) do { } while (0) |
---|
| 252 | +#endif /* CONFIG_MEMCG && CONFIG_BLK_CGROUP */ |
---|
273 | 253 | |
---|
274 | 254 | int __swap_writepage(struct page *page, struct writeback_control *wbc, |
---|
275 | 255 | bio_end_io_t end_write_func) |
---|
.. | .. |
---|
277 | 257 | struct bio *bio; |
---|
278 | 258 | int ret; |
---|
279 | 259 | struct swap_info_struct *sis = page_swap_info(page); |
---|
| 260 | + bool skip = false; |
---|
280 | 261 | |
---|
281 | 262 | VM_BUG_ON_PAGE(!PageSwapCache(page), page); |
---|
282 | | - if (sis->flags & SWP_FILE) { |
---|
| 263 | + if (data_race(sis->flags & SWP_FS_OPS)) { |
---|
283 | 264 | struct kiocb kiocb; |
---|
284 | 265 | struct file *swap_file = sis->swap_file; |
---|
285 | 266 | struct address_space *mapping = swap_file->f_mapping; |
---|
.. | .. |
---|
290 | 271 | }; |
---|
291 | 272 | struct iov_iter from; |
---|
292 | 273 | |
---|
293 | | - iov_iter_bvec(&from, ITER_BVEC | WRITE, &bv, 1, PAGE_SIZE); |
---|
| 274 | + iov_iter_bvec(&from, WRITE, &bv, 1, PAGE_SIZE); |
---|
294 | 275 | init_sync_kiocb(&kiocb, swap_file); |
---|
295 | 276 | kiocb.ki_pos = page_file_offset(page); |
---|
296 | 277 | |
---|
.. | .. |
---|
298 | 279 | unlock_page(page); |
---|
299 | 280 | ret = mapping->a_ops->direct_IO(&kiocb, &from); |
---|
300 | 281 | if (ret == PAGE_SIZE) { |
---|
| 282 | + trace_android_vh_count_pswpout(sis); |
---|
301 | 283 | count_vm_event(PSWPOUT); |
---|
302 | 284 | ret = 0; |
---|
303 | 285 | } else { |
---|
.. | .. |
---|
320 | 302 | return ret; |
---|
321 | 303 | } |
---|
322 | 304 | |
---|
323 | | - ret = bdev_write_page(sis->bdev, map_swap_page(page, &sis->bdev), |
---|
324 | | - page, wbc); |
---|
| 305 | + ret = bdev_write_page(sis->bdev, swap_page_sector(page), page, wbc); |
---|
325 | 306 | if (!ret) { |
---|
326 | | - count_swpout_vm_event(page); |
---|
| 307 | + trace_android_vh_count_swpout_vm_event(sis, page, &skip); |
---|
| 308 | + if (!skip) |
---|
| 309 | + count_swpout_vm_event(page); |
---|
327 | 310 | return 0; |
---|
328 | 311 | } |
---|
329 | 312 | |
---|
330 | | - ret = 0; |
---|
331 | 313 | bio = get_swap_bio(GFP_NOIO, page, end_write_func); |
---|
332 | 314 | if (bio == NULL) { |
---|
333 | 315 | set_page_dirty(page); |
---|
334 | 316 | unlock_page(page); |
---|
335 | | - ret = -ENOMEM; |
---|
336 | | - goto out; |
---|
| 317 | + return -ENOMEM; |
---|
337 | 318 | } |
---|
338 | 319 | bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc); |
---|
339 | | - bio_associate_blkcg_from_page(bio, page); |
---|
340 | | - count_swpout_vm_event(page); |
---|
| 320 | + bio_associate_blkg_from_page(bio, page); |
---|
| 321 | + trace_android_vh_count_swpout_vm_event(sis, page, &skip); |
---|
| 322 | + if (!skip) |
---|
| 323 | + count_swpout_vm_event(page); |
---|
341 | 324 | set_page_writeback(page); |
---|
342 | 325 | unlock_page(page); |
---|
343 | 326 | submit_bio(bio); |
---|
344 | | -out: |
---|
345 | | - return ret; |
---|
| 327 | + |
---|
| 328 | + return 0; |
---|
346 | 329 | } |
---|
347 | 330 | |
---|
348 | 331 | int swap_readpage(struct page *page, bool synchronous) |
---|
.. | .. |
---|
371 | 354 | goto out; |
---|
372 | 355 | } |
---|
373 | 356 | |
---|
374 | | - if (sis->flags & SWP_FILE) { |
---|
| 357 | + if (data_race(sis->flags & SWP_FS_OPS)) { |
---|
375 | 358 | struct file *swap_file = sis->swap_file; |
---|
376 | 359 | struct address_space *mapping = swap_file->f_mapping; |
---|
377 | 360 | |
---|
378 | 361 | ret = mapping->a_ops->readpage(swap_file, page); |
---|
379 | | - if (!ret) |
---|
| 362 | + if (!ret) { |
---|
| 363 | + trace_android_vh_count_pswpin(sis); |
---|
380 | 364 | count_vm_event(PSWPIN); |
---|
| 365 | + } |
---|
381 | 366 | goto out; |
---|
382 | 367 | } |
---|
383 | 368 | |
---|
384 | | - ret = bdev_read_page(sis->bdev, map_swap_page(page, &sis->bdev), page); |
---|
385 | | - if (!ret) { |
---|
386 | | - if (trylock_page(page)) { |
---|
387 | | - swap_slot_free_notify(page); |
---|
388 | | - unlock_page(page); |
---|
| 369 | + if (sis->flags & SWP_SYNCHRONOUS_IO) { |
---|
| 370 | + ret = bdev_read_page(sis->bdev, swap_page_sector(page), page); |
---|
| 371 | + if (!ret) { |
---|
| 372 | + trace_android_vh_count_pswpin(sis); |
---|
| 373 | + count_vm_event(PSWPIN); |
---|
| 374 | + goto out; |
---|
389 | 375 | } |
---|
390 | | - |
---|
391 | | - count_vm_event(PSWPIN); |
---|
392 | | - goto out; |
---|
393 | 376 | } |
---|
394 | 377 | |
---|
395 | 378 | ret = 0; |
---|
.. | .. |
---|
404 | 387 | * Keep this task valid during swap readpage because the oom killer may |
---|
405 | 388 | * attempt to access it in the page fault retry time check. |
---|
406 | 389 | */ |
---|
407 | | - get_task_struct(current); |
---|
408 | | - bio->bi_private = current; |
---|
409 | 390 | bio_set_op_attrs(bio, REQ_OP_READ, 0); |
---|
| 391 | + if (synchronous) { |
---|
| 392 | + bio->bi_opf |= REQ_HIPRI; |
---|
| 393 | + get_task_struct(current); |
---|
| 394 | + bio->bi_private = current; |
---|
| 395 | + } |
---|
| 396 | + trace_android_vh_count_pswpin(sis); |
---|
410 | 397 | count_vm_event(PSWPIN); |
---|
411 | 398 | bio_get(bio); |
---|
412 | 399 | qc = submit_bio(bio); |
---|
.. | .. |
---|
415 | 402 | if (!READ_ONCE(bio->bi_private)) |
---|
416 | 403 | break; |
---|
417 | 404 | |
---|
418 | | - if (!blk_poll(disk->queue, qc)) |
---|
419 | | - break; |
---|
| 405 | + if (!blk_poll(disk->queue, qc, true)) |
---|
| 406 | + blk_io_schedule(); |
---|
420 | 407 | } |
---|
421 | 408 | __set_current_state(TASK_RUNNING); |
---|
422 | 409 | bio_put(bio); |
---|
.. | .. |
---|
430 | 417 | { |
---|
431 | 418 | struct swap_info_struct *sis = page_swap_info(page); |
---|
432 | 419 | |
---|
433 | | - if (sis->flags & SWP_FILE) { |
---|
| 420 | + if (data_race(sis->flags & SWP_FS_OPS)) { |
---|
434 | 421 | struct address_space *mapping = sis->swap_file->f_mapping; |
---|
435 | 422 | |
---|
436 | 423 | VM_BUG_ON_PAGE(!PageSwapCache(page), page); |
---|