| .. | .. |
|---|
| 25 | 25 | #include <linux/psi.h> |
|---|
| 26 | 26 | #include <linux/uio.h> |
|---|
| 27 | 27 | #include <linux/sched/task.h> |
|---|
| 28 | | -#include <asm/pgtable.h> |
|---|
| 28 | +#include <trace/hooks/mm.h> |
|---|
| 29 | 29 | |
|---|
| 30 | 30 | static struct bio *get_swap_bio(gfp_t gfp_flags, |
|---|
| 31 | 31 | struct page *page, bio_end_io_t end_io) |
|---|
| 32 | 32 | { |
|---|
| 33 | | - int i, nr = hpage_nr_pages(page); |
|---|
| 34 | 33 | struct bio *bio; |
|---|
| 35 | 34 | |
|---|
| 36 | | - bio = bio_alloc(gfp_flags, nr); |
|---|
| 35 | + bio = bio_alloc(gfp_flags, 1); |
|---|
| 37 | 36 | if (bio) { |
|---|
| 38 | 37 | struct block_device *bdev; |
|---|
| 39 | 38 | |
|---|
| 40 | 39 | bio->bi_iter.bi_sector = map_swap_page(page, &bdev); |
|---|
| 41 | 40 | bio_set_dev(bio, bdev); |
|---|
| 41 | + bio->bi_iter.bi_sector <<= PAGE_SHIFT - 9; |
|---|
| 42 | 42 | bio->bi_end_io = end_io; |
|---|
| 43 | 43 | |
|---|
| 44 | | - for (i = 0; i < nr; i++) |
|---|
| 45 | | - bio_add_page(bio, page + i, PAGE_SIZE, 0); |
|---|
| 46 | | - VM_BUG_ON(bio->bi_iter.bi_size != PAGE_SIZE * nr); |
|---|
| 44 | + bio_add_page(bio, page, thp_size(page), 0); |
|---|
| 47 | 45 | } |
|---|
| 48 | 46 | return bio; |
|---|
| 49 | 47 | } |
|---|
| .. | .. |
|---|
| 63 | 61 | * Also clear PG_reclaim to avoid rotate_reclaimable_page() |
|---|
| 64 | 62 | */ |
|---|
| 65 | 63 | set_page_dirty(page); |
|---|
| 66 | | - pr_alert("Write-error on swap-device (%u:%u:%llu)\n", |
|---|
| 67 | | - MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)), |
|---|
| 68 | | - (unsigned long long)bio->bi_iter.bi_sector); |
|---|
| 64 | + pr_alert_ratelimited("Write-error on swap-device (%u:%u:%llu)\n", |
|---|
| 65 | + MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)), |
|---|
| 66 | + (unsigned long long)bio->bi_iter.bi_sector); |
|---|
| 69 | 67 | ClearPageReclaim(page); |
|---|
| 70 | 68 | } |
|---|
| 71 | 69 | end_page_writeback(page); |
|---|
| 72 | 70 | bio_put(bio); |
|---|
| 73 | | -} |
|---|
| 74 | | - |
|---|
| 75 | | -static void swap_slot_free_notify(struct page *page) |
|---|
| 76 | | -{ |
|---|
| 77 | | - struct swap_info_struct *sis; |
|---|
| 78 | | - struct gendisk *disk; |
|---|
| 79 | | - swp_entry_t entry; |
|---|
| 80 | | - |
|---|
| 81 | | - /* |
|---|
| 82 | | - * There is no guarantee that the page is in swap cache - the software |
|---|
| 83 | | - * suspend code (at least) uses end_swap_bio_read() against a non- |
|---|
| 84 | | - * swapcache page. So we must check PG_swapcache before proceeding with |
|---|
| 85 | | - * this optimization. |
|---|
| 86 | | - */ |
|---|
| 87 | | - if (unlikely(!PageSwapCache(page))) |
|---|
| 88 | | - return; |
|---|
| 89 | | - |
|---|
| 90 | | - sis = page_swap_info(page); |
|---|
| 91 | | - if (!(sis->flags & SWP_BLKDEV)) |
|---|
| 92 | | - return; |
|---|
| 93 | | - |
|---|
| 94 | | - /* |
|---|
| 95 | | - * The swap subsystem performs lazy swap slot freeing, |
|---|
| 96 | | - * expecting that the page will be swapped out again. |
|---|
| 97 | | - * So we can avoid an unnecessary write if the page |
|---|
| 98 | | - * isn't redirtied. |
|---|
| 99 | | - * This is good for real swap storage because we can |
|---|
| 100 | | - * reduce unnecessary I/O and enhance wear-leveling |
|---|
| 101 | | - * if an SSD is used as the as swap device. |
|---|
| 102 | | - * But if in-memory swap device (eg zram) is used, |
|---|
| 103 | | - * this causes a duplicated copy between uncompressed |
|---|
| 104 | | - * data in VM-owned memory and compressed data in |
|---|
| 105 | | - * zram-owned memory. So let's free zram-owned memory |
|---|
| 106 | | - * and make the VM-owned decompressed page *dirty*, |
|---|
| 107 | | - * so the page should be swapped out somewhere again if |
|---|
| 108 | | - * we again wish to reclaim it. |
|---|
| 109 | | - */ |
|---|
| 110 | | - disk = sis->bdev->bd_disk; |
|---|
| 111 | | - entry.val = page_private(page); |
|---|
| 112 | | - if (disk->fops->swap_slot_free_notify && |
|---|
| 113 | | - __swap_count(sis, entry) == 1) { |
|---|
| 114 | | - unsigned long offset; |
|---|
| 115 | | - |
|---|
| 116 | | - offset = swp_offset(entry); |
|---|
| 117 | | - |
|---|
| 118 | | - SetPageDirty(page); |
|---|
| 119 | | - disk->fops->swap_slot_free_notify(sis->bdev, |
|---|
| 120 | | - offset); |
|---|
| 121 | | - } |
|---|
| 122 | 71 | } |
|---|
| 123 | 72 | |
|---|
| 124 | 73 | static void end_swap_bio_read(struct bio *bio) |
|---|
| .. | .. |
|---|
| 129 | 78 | if (bio->bi_status) { |
|---|
| 130 | 79 | SetPageError(page); |
|---|
| 131 | 80 | ClearPageUptodate(page); |
|---|
| 132 | | - pr_alert("Read-error on swap-device (%u:%u:%llu)\n", |
|---|
| 133 | | - MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)), |
|---|
| 134 | | - (unsigned long long)bio->bi_iter.bi_sector); |
|---|
| 81 | + pr_alert_ratelimited("Read-error on swap-device (%u:%u:%llu)\n", |
|---|
| 82 | + MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)), |
|---|
| 83 | + (unsigned long long)bio->bi_iter.bi_sector); |
|---|
| 135 | 84 | goto out; |
|---|
| 136 | 85 | } |
|---|
| 137 | 86 | |
|---|
| 138 | 87 | SetPageUptodate(page); |
|---|
| 139 | | - swap_slot_free_notify(page); |
|---|
| 140 | 88 | out: |
|---|
| 141 | 89 | unlock_page(page); |
|---|
| 142 | 90 | WRITE_ONCE(bio->bi_private, NULL); |
|---|
| 143 | 91 | bio_put(bio); |
|---|
| 144 | | - wake_up_process(waiter); |
|---|
| 145 | | - put_task_struct(waiter); |
|---|
| 92 | + if (waiter) { |
|---|
| 93 | + blk_wake_io_task(waiter); |
|---|
| 94 | + put_task_struct(waiter); |
|---|
| 95 | + } |
|---|
| 146 | 96 | } |
|---|
| 147 | 97 | |
|---|
| 148 | 98 | int generic_swapfile_activate(struct swap_info_struct *sis, |
|---|
| .. | .. |
|---|
| 165 | 115 | blocks_per_page = PAGE_SIZE >> blkbits; |
|---|
| 166 | 116 | |
|---|
| 167 | 117 | /* |
|---|
| 168 | | - * Map all the blocks into the extent list. This code doesn't try |
|---|
| 118 | + * Map all the blocks into the extent tree. This code doesn't try |
|---|
| 169 | 119 | * to be very smart. |
|---|
| 170 | 120 | */ |
|---|
| 171 | 121 | probe_block = 0; |
|---|
| .. | .. |
|---|
| 178 | 128 | |
|---|
| 179 | 129 | cond_resched(); |
|---|
| 180 | 130 | |
|---|
| 181 | | - first_block = bmap(inode, probe_block); |
|---|
| 182 | | - if (first_block == 0) |
|---|
| 131 | + first_block = probe_block; |
|---|
| 132 | + ret = bmap(inode, &first_block); |
|---|
| 133 | + if (ret || !first_block) |
|---|
| 183 | 134 | goto bad_bmap; |
|---|
| 184 | 135 | |
|---|
| 185 | 136 | /* |
|---|
| .. | .. |
|---|
| 194 | 145 | block_in_page++) { |
|---|
| 195 | 146 | sector_t block; |
|---|
| 196 | 147 | |
|---|
| 197 | | - block = bmap(inode, probe_block + block_in_page); |
|---|
| 198 | | - if (block == 0) |
|---|
| 148 | + block = probe_block + block_in_page; |
|---|
| 149 | + ret = bmap(inode, &block); |
|---|
| 150 | + if (ret || !block) |
|---|
| 199 | 151 | goto bad_bmap; |
|---|
| 152 | + |
|---|
| 200 | 153 | if (block != first_block + block_in_page) { |
|---|
| 201 | 154 | /* Discontiguity */ |
|---|
| 202 | 155 | probe_block++; |
|---|
| .. | .. |
|---|
| 251 | 204 | unlock_page(page); |
|---|
| 252 | 205 | goto out; |
|---|
| 253 | 206 | } |
|---|
| 207 | + /* |
|---|
| 208 | + * Arch code may have to preserve more data than just the page |
|---|
| 209 | + * contents, e.g. memory tags. |
|---|
| 210 | + */ |
|---|
| 211 | + ret = arch_prepare_to_swap(page); |
|---|
| 212 | + if (ret) { |
|---|
| 213 | + set_page_dirty(page); |
|---|
| 214 | + unlock_page(page); |
|---|
| 215 | + goto out; |
|---|
| 216 | + } |
|---|
| 254 | 217 | if (frontswap_store(page) == 0) { |
|---|
| 255 | 218 | set_page_writeback(page); |
|---|
| 256 | 219 | unlock_page(page); |
|---|
| .. | .. |
|---|
| 268 | 231 | if (unlikely(PageTransHuge(page))) |
|---|
| 269 | 232 | count_vm_event(THP_SWPOUT); |
|---|
| 270 | 233 | #endif |
|---|
| 271 | | - count_vm_events(PSWPOUT, hpage_nr_pages(page)); |
|---|
| 234 | + count_vm_events(PSWPOUT, thp_nr_pages(page)); |
|---|
| 272 | 235 | } |
|---|
| 236 | + |
|---|
| 237 | +#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) |
|---|
| 238 | +static void bio_associate_blkg_from_page(struct bio *bio, struct page *page) |
|---|
| 239 | +{ |
|---|
| 240 | + struct cgroup_subsys_state *css; |
|---|
| 241 | + |
|---|
| 242 | + if (!page->mem_cgroup) |
|---|
| 243 | + return; |
|---|
| 244 | + |
|---|
| 245 | + rcu_read_lock(); |
|---|
| 246 | + css = cgroup_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys); |
|---|
| 247 | + bio_associate_blkg_from_css(bio, css); |
|---|
| 248 | + rcu_read_unlock(); |
|---|
| 249 | +} |
|---|
| 250 | +#else |
|---|
| 251 | +#define bio_associate_blkg_from_page(bio, page) do { } while (0) |
|---|
| 252 | +#endif /* CONFIG_MEMCG && CONFIG_BLK_CGROUP */ |
|---|
| 273 | 253 | |
|---|
| 274 | 254 | int __swap_writepage(struct page *page, struct writeback_control *wbc, |
|---|
| 275 | 255 | bio_end_io_t end_write_func) |
|---|
| .. | .. |
|---|
| 277 | 257 | struct bio *bio; |
|---|
| 278 | 258 | int ret; |
|---|
| 279 | 259 | struct swap_info_struct *sis = page_swap_info(page); |
|---|
| 260 | + bool skip = false; |
|---|
| 280 | 261 | |
|---|
| 281 | 262 | VM_BUG_ON_PAGE(!PageSwapCache(page), page); |
|---|
| 282 | | - if (sis->flags & SWP_FILE) { |
|---|
| 263 | + if (data_race(sis->flags & SWP_FS_OPS)) { |
|---|
| 283 | 264 | struct kiocb kiocb; |
|---|
| 284 | 265 | struct file *swap_file = sis->swap_file; |
|---|
| 285 | 266 | struct address_space *mapping = swap_file->f_mapping; |
|---|
| .. | .. |
|---|
| 290 | 271 | }; |
|---|
| 291 | 272 | struct iov_iter from; |
|---|
| 292 | 273 | |
|---|
| 293 | | - iov_iter_bvec(&from, ITER_BVEC | WRITE, &bv, 1, PAGE_SIZE); |
|---|
| 274 | + iov_iter_bvec(&from, WRITE, &bv, 1, PAGE_SIZE); |
|---|
| 294 | 275 | init_sync_kiocb(&kiocb, swap_file); |
|---|
| 295 | 276 | kiocb.ki_pos = page_file_offset(page); |
|---|
| 296 | 277 | |
|---|
| .. | .. |
|---|
| 298 | 279 | unlock_page(page); |
|---|
| 299 | 280 | ret = mapping->a_ops->direct_IO(&kiocb, &from); |
|---|
| 300 | 281 | if (ret == PAGE_SIZE) { |
|---|
| 282 | + trace_android_vh_count_pswpout(sis); |
|---|
| 301 | 283 | count_vm_event(PSWPOUT); |
|---|
| 302 | 284 | ret = 0; |
|---|
| 303 | 285 | } else { |
|---|
| .. | .. |
|---|
| 320 | 302 | return ret; |
|---|
| 321 | 303 | } |
|---|
| 322 | 304 | |
|---|
| 323 | | - ret = bdev_write_page(sis->bdev, map_swap_page(page, &sis->bdev), |
|---|
| 324 | | - page, wbc); |
|---|
| 305 | + ret = bdev_write_page(sis->bdev, swap_page_sector(page), page, wbc); |
|---|
| 325 | 306 | if (!ret) { |
|---|
| 326 | | - count_swpout_vm_event(page); |
|---|
| 307 | + trace_android_vh_count_swpout_vm_event(sis, page, &skip); |
|---|
| 308 | + if (!skip) |
|---|
| 309 | + count_swpout_vm_event(page); |
|---|
| 327 | 310 | return 0; |
|---|
| 328 | 311 | } |
|---|
| 329 | 312 | |
|---|
| 330 | | - ret = 0; |
|---|
| 331 | 313 | bio = get_swap_bio(GFP_NOIO, page, end_write_func); |
|---|
| 332 | 314 | if (bio == NULL) { |
|---|
| 333 | 315 | set_page_dirty(page); |
|---|
| 334 | 316 | unlock_page(page); |
|---|
| 335 | | - ret = -ENOMEM; |
|---|
| 336 | | - goto out; |
|---|
| 317 | + return -ENOMEM; |
|---|
| 337 | 318 | } |
|---|
| 338 | 319 | bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc); |
|---|
| 339 | | - bio_associate_blkcg_from_page(bio, page); |
|---|
| 340 | | - count_swpout_vm_event(page); |
|---|
| 320 | + bio_associate_blkg_from_page(bio, page); |
|---|
| 321 | + trace_android_vh_count_swpout_vm_event(sis, page, &skip); |
|---|
| 322 | + if (!skip) |
|---|
| 323 | + count_swpout_vm_event(page); |
|---|
| 341 | 324 | set_page_writeback(page); |
|---|
| 342 | 325 | unlock_page(page); |
|---|
| 343 | 326 | submit_bio(bio); |
|---|
| 344 | | -out: |
|---|
| 345 | | - return ret; |
|---|
| 327 | + |
|---|
| 328 | + return 0; |
|---|
| 346 | 329 | } |
|---|
| 347 | 330 | |
|---|
| 348 | 331 | int swap_readpage(struct page *page, bool synchronous) |
|---|
| .. | .. |
|---|
| 371 | 354 | goto out; |
|---|
| 372 | 355 | } |
|---|
| 373 | 356 | |
|---|
| 374 | | - if (sis->flags & SWP_FILE) { |
|---|
| 357 | + if (data_race(sis->flags & SWP_FS_OPS)) { |
|---|
| 375 | 358 | struct file *swap_file = sis->swap_file; |
|---|
| 376 | 359 | struct address_space *mapping = swap_file->f_mapping; |
|---|
| 377 | 360 | |
|---|
| 378 | 361 | ret = mapping->a_ops->readpage(swap_file, page); |
|---|
| 379 | | - if (!ret) |
|---|
| 362 | + if (!ret) { |
|---|
| 363 | + trace_android_vh_count_pswpin(sis); |
|---|
| 380 | 364 | count_vm_event(PSWPIN); |
|---|
| 365 | + } |
|---|
| 381 | 366 | goto out; |
|---|
| 382 | 367 | } |
|---|
| 383 | 368 | |
|---|
| 384 | | - ret = bdev_read_page(sis->bdev, map_swap_page(page, &sis->bdev), page); |
|---|
| 385 | | - if (!ret) { |
|---|
| 386 | | - if (trylock_page(page)) { |
|---|
| 387 | | - swap_slot_free_notify(page); |
|---|
| 388 | | - unlock_page(page); |
|---|
| 369 | + if (sis->flags & SWP_SYNCHRONOUS_IO) { |
|---|
| 370 | + ret = bdev_read_page(sis->bdev, swap_page_sector(page), page); |
|---|
| 371 | + if (!ret) { |
|---|
| 372 | + trace_android_vh_count_pswpin(sis); |
|---|
| 373 | + count_vm_event(PSWPIN); |
|---|
| 374 | + goto out; |
|---|
| 389 | 375 | } |
|---|
| 390 | | - |
|---|
| 391 | | - count_vm_event(PSWPIN); |
|---|
| 392 | | - goto out; |
|---|
| 393 | 376 | } |
|---|
| 394 | 377 | |
|---|
| 395 | 378 | ret = 0; |
|---|
| .. | .. |
|---|
| 404 | 387 | * Keep this task valid during swap readpage because the oom killer may |
|---|
| 405 | 388 | * attempt to access it in the page fault retry time check. |
|---|
| 406 | 389 | */ |
|---|
| 407 | | - get_task_struct(current); |
|---|
| 408 | | - bio->bi_private = current; |
|---|
| 409 | 390 | bio_set_op_attrs(bio, REQ_OP_READ, 0); |
|---|
| 391 | + if (synchronous) { |
|---|
| 392 | + bio->bi_opf |= REQ_HIPRI; |
|---|
| 393 | + get_task_struct(current); |
|---|
| 394 | + bio->bi_private = current; |
|---|
| 395 | + } |
|---|
| 396 | + trace_android_vh_count_pswpin(sis); |
|---|
| 410 | 397 | count_vm_event(PSWPIN); |
|---|
| 411 | 398 | bio_get(bio); |
|---|
| 412 | 399 | qc = submit_bio(bio); |
|---|
| .. | .. |
|---|
| 415 | 402 | if (!READ_ONCE(bio->bi_private)) |
|---|
| 416 | 403 | break; |
|---|
| 417 | 404 | |
|---|
| 418 | | - if (!blk_poll(disk->queue, qc)) |
|---|
| 419 | | - break; |
|---|
| 405 | + if (!blk_poll(disk->queue, qc, true)) |
|---|
| 406 | + blk_io_schedule(); |
|---|
| 420 | 407 | } |
|---|
| 421 | 408 | __set_current_state(TASK_RUNNING); |
|---|
| 422 | 409 | bio_put(bio); |
|---|
| .. | .. |
|---|
| 430 | 417 | { |
|---|
| 431 | 418 | struct swap_info_struct *sis = page_swap_info(page); |
|---|
| 432 | 419 | |
|---|
| 433 | | - if (sis->flags & SWP_FILE) { |
|---|
| 420 | + if (data_race(sis->flags & SWP_FS_OPS)) { |
|---|
| 434 | 421 | struct address_space *mapping = sis->swap_file->f_mapping; |
|---|
| 435 | 422 | |
|---|
| 436 | 423 | VM_BUG_ON_PAGE(!PageSwapCache(page), page); |
|---|