.. | .. |
---|
19 | 19 | #include <linux/rbtree.h> |
---|
20 | 20 | #include <linux/stacktrace.h> |
---|
21 | 21 | |
---|
| 22 | +#include <trace/hooks/mm.h> |
---|
| 23 | + |
---|
22 | 24 | #define DM_MSG_PREFIX "bufio" |
---|
23 | 25 | |
---|
24 | 26 | /* |
---|
.. | .. |
---|
66 | 68 | |
---|
67 | 69 | /* |
---|
68 | 70 | * Linking of buffers: |
---|
69 | | - * All buffers are linked to cache_hash with their hash_list field. |
---|
| 71 | + * All buffers are linked to buffer_tree with their node field. |
---|
70 | 72 | * |
---|
71 | 73 | * Clean buffers that are not being written (B_WRITING not set) |
---|
72 | 74 | * are linked to lru[LIST_CLEAN] with their lru_list field. |
---|
.. | .. |
---|
108 | 110 | int async_write_error; |
---|
109 | 111 | |
---|
110 | 112 | struct list_head client_list; |
---|
| 113 | + |
---|
111 | 114 | struct shrinker shrinker; |
---|
| 115 | + struct work_struct shrink_work; |
---|
| 116 | + atomic_long_t need_shrink; |
---|
112 | 117 | }; |
---|
113 | 118 | |
---|
114 | 119 | /* |
---|
.. | .. |
---|
153 | 158 | void (*end_io)(struct dm_buffer *, blk_status_t); |
---|
154 | 159 | #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING |
---|
155 | 160 | #define MAX_STACK 10 |
---|
156 | | - struct stack_trace stack_trace; |
---|
| 161 | + unsigned int stack_len; |
---|
157 | 162 | unsigned long stack_entries[MAX_STACK]; |
---|
158 | 163 | #endif |
---|
159 | 164 | }; |
---|
.. | .. |
---|
238 | 243 | #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING |
---|
239 | 244 | static void buffer_record_stack(struct dm_buffer *b) |
---|
240 | 245 | { |
---|
241 | | - b->stack_trace.nr_entries = 0; |
---|
242 | | - b->stack_trace.max_entries = MAX_STACK; |
---|
243 | | - b->stack_trace.entries = b->stack_entries; |
---|
244 | | - b->stack_trace.skip = 2; |
---|
245 | | - save_stack_trace(&b->stack_trace); |
---|
| 246 | + b->stack_len = stack_trace_save(b->stack_entries, MAX_STACK, 2); |
---|
246 | 247 | } |
---|
247 | 248 | #endif |
---|
248 | 249 | |
---|
.. | .. |
---|
260 | 261 | if (b->block == block) |
---|
261 | 262 | return b; |
---|
262 | 263 | |
---|
263 | | - n = (b->block < block) ? n->rb_left : n->rb_right; |
---|
| 264 | + n = block < b->block ? n->rb_left : n->rb_right; |
---|
264 | 265 | } |
---|
265 | 266 | |
---|
266 | 267 | return NULL; |
---|
| 268 | +} |
---|
| 269 | + |
---|
| 270 | +static struct dm_buffer *__find_next(struct dm_bufio_client *c, sector_t block) |
---|
| 271 | +{ |
---|
| 272 | + struct rb_node *n = c->buffer_tree.rb_node; |
---|
| 273 | + struct dm_buffer *b; |
---|
| 274 | + struct dm_buffer *best = NULL; |
---|
| 275 | + |
---|
| 276 | + while (n) { |
---|
| 277 | + b = container_of(n, struct dm_buffer, node); |
---|
| 278 | + |
---|
| 279 | + if (b->block == block) |
---|
| 280 | + return b; |
---|
| 281 | + |
---|
| 282 | + if (block <= b->block) { |
---|
| 283 | + n = n->rb_left; |
---|
| 284 | + best = b; |
---|
| 285 | + } else { |
---|
| 286 | + n = n->rb_right; |
---|
| 287 | + } |
---|
| 288 | + } |
---|
| 289 | + |
---|
| 290 | + return best; |
---|
267 | 291 | } |
---|
268 | 292 | |
---|
269 | 293 | static void __insert(struct dm_bufio_client *c, struct dm_buffer *b) |
---|
.. | .. |
---|
280 | 304 | } |
---|
281 | 305 | |
---|
282 | 306 | parent = *new; |
---|
283 | | - new = (found->block < b->block) ? |
---|
284 | | - &((*new)->rb_left) : &((*new)->rb_right); |
---|
| 307 | + new = b->block < found->block ? |
---|
| 308 | + &found->node.rb_left : &found->node.rb_right; |
---|
285 | 309 | } |
---|
286 | 310 | |
---|
287 | 311 | rb_link_node(&b->node, parent, new); |
---|
.. | .. |
---|
404 | 428 | */ |
---|
405 | 429 | if (gfp_mask & __GFP_NORETRY) { |
---|
406 | 430 | unsigned noio_flag = memalloc_noio_save(); |
---|
407 | | - void *ptr = __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL); |
---|
| 431 | + void *ptr = __vmalloc(c->block_size, gfp_mask); |
---|
408 | 432 | |
---|
409 | 433 | memalloc_noio_restore(noio_flag); |
---|
410 | 434 | return ptr; |
---|
411 | 435 | } |
---|
412 | 436 | |
---|
413 | | - return __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL); |
---|
| 437 | + return __vmalloc(c->block_size, gfp_mask); |
---|
414 | 438 | } |
---|
415 | 439 | |
---|
416 | 440 | /* |
---|
.. | .. |
---|
459 | 483 | } |
---|
460 | 484 | |
---|
461 | 485 | #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING |
---|
462 | | - memset(&b->stack_trace, 0, sizeof(b->stack_trace)); |
---|
| 486 | + b->stack_len = 0; |
---|
463 | 487 | #endif |
---|
464 | 488 | return b; |
---|
465 | 489 | } |
---|
.. | .. |
---|
476 | 500 | } |
---|
477 | 501 | |
---|
478 | 502 | /* |
---|
479 | | - * Link buffer to the hash list and clean or dirty queue. |
---|
| 503 | + * Link buffer to the buffer tree and clean or dirty queue. |
---|
480 | 504 | */ |
---|
481 | 505 | static void __link_buffer(struct dm_buffer *b, sector_t block, int dirty) |
---|
482 | 506 | { |
---|
.. | .. |
---|
493 | 517 | } |
---|
494 | 518 | |
---|
495 | 519 | /* |
---|
496 | | - * Unlink buffer from the hash list and dirty or clean queue. |
---|
| 520 | + * Unlink buffer from the buffer tree and dirty or clean queue. |
---|
497 | 521 | */ |
---|
498 | 522 | static void __unlink_buffer(struct dm_buffer *b) |
---|
499 | 523 | { |
---|
.. | .. |
---|
635 | 659 | submit_bio(bio); |
---|
636 | 660 | } |
---|
637 | 661 | |
---|
| 662 | +static inline sector_t block_to_sector(struct dm_bufio_client *c, sector_t block) |
---|
| 663 | +{ |
---|
| 664 | + sector_t sector; |
---|
| 665 | + |
---|
| 666 | + if (likely(c->sectors_per_block_bits >= 0)) |
---|
| 667 | + sector = block << c->sectors_per_block_bits; |
---|
| 668 | + else |
---|
| 669 | + sector = block * (c->block_size >> SECTOR_SHIFT); |
---|
| 670 | + sector += c->start; |
---|
| 671 | + |
---|
| 672 | + return sector; |
---|
| 673 | +} |
---|
| 674 | + |
---|
638 | 675 | static void submit_io(struct dm_buffer *b, int rw, void (*end_io)(struct dm_buffer *, blk_status_t)) |
---|
639 | 676 | { |
---|
640 | 677 | unsigned n_sectors; |
---|
.. | .. |
---|
643 | 680 | |
---|
644 | 681 | b->end_io = end_io; |
---|
645 | 682 | |
---|
646 | | - if (likely(b->c->sectors_per_block_bits >= 0)) |
---|
647 | | - sector = b->block << b->c->sectors_per_block_bits; |
---|
648 | | - else |
---|
649 | | - sector = b->block * (b->c->block_size >> SECTOR_SHIFT); |
---|
650 | | - sector += b->c->start; |
---|
| 683 | + sector = block_to_sector(b->c, b->block); |
---|
651 | 684 | |
---|
652 | 685 | if (rw != REQ_OP_WRITE) { |
---|
653 | 686 | n_sectors = b->c->block_size >> SECTOR_SHIFT; |
---|
.. | .. |
---|
972 | 1005 | |
---|
973 | 1006 | /* |
---|
974 | 1007 | * We've had a period where the mutex was unlocked, so need to |
---|
975 | | - * recheck the hash table. |
---|
| 1008 | + * recheck the buffer tree. |
---|
976 | 1009 | */ |
---|
977 | 1010 | b = __find(c, block); |
---|
978 | 1011 | if (b) { |
---|
.. | .. |
---|
1306 | 1339 | EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers); |
---|
1307 | 1340 | |
---|
1308 | 1341 | /* |
---|
1309 | | - * Use dm-io to send and empty barrier flush the device. |
---|
| 1342 | + * Use dm-io to send an empty barrier to flush the device. |
---|
1310 | 1343 | */ |
---|
1311 | 1344 | int dm_bufio_issue_flush(struct dm_bufio_client *c) |
---|
1312 | 1345 | { |
---|
.. | .. |
---|
1330 | 1363 | EXPORT_SYMBOL_GPL(dm_bufio_issue_flush); |
---|
1331 | 1364 | |
---|
1332 | 1365 | /* |
---|
| 1366 | + * Use dm-io to send a discard request to flush the device. |
---|
| 1367 | + */ |
---|
| 1368 | +int dm_bufio_issue_discard(struct dm_bufio_client *c, sector_t block, sector_t count) |
---|
| 1369 | +{ |
---|
| 1370 | + struct dm_io_request io_req = { |
---|
| 1371 | + .bi_op = REQ_OP_DISCARD, |
---|
| 1372 | + .bi_op_flags = REQ_SYNC, |
---|
| 1373 | + .mem.type = DM_IO_KMEM, |
---|
| 1374 | + .mem.ptr.addr = NULL, |
---|
| 1375 | + .client = c->dm_io, |
---|
| 1376 | + }; |
---|
| 1377 | + struct dm_io_region io_reg = { |
---|
| 1378 | + .bdev = c->bdev, |
---|
| 1379 | + .sector = block_to_sector(c, block), |
---|
| 1380 | + .count = block_to_sector(c, count), |
---|
| 1381 | + }; |
---|
| 1382 | + |
---|
| 1383 | + BUG_ON(dm_bufio_in_request()); |
---|
| 1384 | + |
---|
| 1385 | + return dm_io(&io_req, 1, &io_reg, NULL); |
---|
| 1386 | +} |
---|
| 1387 | +EXPORT_SYMBOL_GPL(dm_bufio_issue_discard); |
---|
| 1388 | + |
---|
| 1389 | +/* |
---|
1333 | 1390 | * We first delete any other buffer that may be at that new location. |
---|
1334 | 1391 | * |
---|
1335 | 1392 | * Then, we write the buffer to the original location if it was dirty. |
---|
1336 | 1393 | * |
---|
1337 | 1394 | * Then, if we are the only one who is holding the buffer, relink the buffer |
---|
1338 | | - * in the hash queue for the new location. |
---|
| 1395 | + * in the buffer tree for the new location. |
---|
1339 | 1396 | * |
---|
1340 | 1397 | * If there was someone else holding the buffer, we write it to the new |
---|
1341 | 1398 | * location but not relink it, because that other user needs to have the buffer |
---|
.. | .. |
---|
1405 | 1462 | } |
---|
1406 | 1463 | EXPORT_SYMBOL_GPL(dm_bufio_release_move); |
---|
1407 | 1464 | |
---|
| 1465 | +static void forget_buffer_locked(struct dm_buffer *b) |
---|
| 1466 | +{ |
---|
| 1467 | + if (likely(!b->hold_count) && likely(!b->state)) { |
---|
| 1468 | + __unlink_buffer(b); |
---|
| 1469 | + __free_buffer_wake(b); |
---|
| 1470 | + } |
---|
| 1471 | +} |
---|
| 1472 | + |
---|
1408 | 1473 | /* |
---|
1409 | 1474 | * Free the given buffer. |
---|
1410 | 1475 | * |
---|
.. | .. |
---|
1418 | 1483 | dm_bufio_lock(c); |
---|
1419 | 1484 | |
---|
1420 | 1485 | b = __find(c, block); |
---|
1421 | | - if (b && likely(!b->hold_count) && likely(!b->state)) { |
---|
1422 | | - __unlink_buffer(b); |
---|
1423 | | - __free_buffer_wake(b); |
---|
1424 | | - } |
---|
| 1486 | + if (b) |
---|
| 1487 | + forget_buffer_locked(b); |
---|
1425 | 1488 | |
---|
1426 | 1489 | dm_bufio_unlock(c); |
---|
1427 | 1490 | } |
---|
1428 | 1491 | EXPORT_SYMBOL_GPL(dm_bufio_forget); |
---|
| 1492 | + |
---|
| 1493 | +void dm_bufio_forget_buffers(struct dm_bufio_client *c, sector_t block, sector_t n_blocks) |
---|
| 1494 | +{ |
---|
| 1495 | + struct dm_buffer *b; |
---|
| 1496 | + sector_t end_block = block + n_blocks; |
---|
| 1497 | + |
---|
| 1498 | + while (block < end_block) { |
---|
| 1499 | + dm_bufio_lock(c); |
---|
| 1500 | + |
---|
| 1501 | + b = __find_next(c, block); |
---|
| 1502 | + if (b) { |
---|
| 1503 | + block = b->block + 1; |
---|
| 1504 | + forget_buffer_locked(b); |
---|
| 1505 | + } |
---|
| 1506 | + |
---|
| 1507 | + dm_bufio_unlock(c); |
---|
| 1508 | + |
---|
| 1509 | + if (!b) |
---|
| 1510 | + break; |
---|
| 1511 | + } |
---|
| 1512 | + |
---|
| 1513 | +} |
---|
| 1514 | +EXPORT_SYMBOL_GPL(dm_bufio_forget_buffers); |
---|
1429 | 1515 | |
---|
1430 | 1516 | void dm_bufio_set_minimum_buffers(struct dm_bufio_client *c, unsigned n) |
---|
1431 | 1517 | { |
---|
.. | .. |
---|
1509 | 1595 | DMERR("leaked buffer %llx, hold count %u, list %d", |
---|
1510 | 1596 | (unsigned long long)b->block, b->hold_count, i); |
---|
1511 | 1597 | #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING |
---|
1512 | | - print_stack_trace(&b->stack_trace, 1); |
---|
1513 | | - b->hold_count = 0; /* mark unclaimed to avoid BUG_ON below */ |
---|
| 1598 | + stack_trace_print(b->stack_entries, b->stack_len, 1); |
---|
| 1599 | + /* mark unclaimed to avoid BUG_ON below */ |
---|
| 1600 | + b->hold_count = 0; |
---|
1514 | 1601 | #endif |
---|
1515 | 1602 | } |
---|
1516 | 1603 | |
---|
.. | .. |
---|
1562 | 1649 | return retain_bytes; |
---|
1563 | 1650 | } |
---|
1564 | 1651 | |
---|
1565 | | -static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, |
---|
1566 | | - gfp_t gfp_mask) |
---|
| 1652 | +static void __scan(struct dm_bufio_client *c) |
---|
1567 | 1653 | { |
---|
1568 | 1654 | int l; |
---|
1569 | 1655 | struct dm_buffer *b, *tmp; |
---|
.. | .. |
---|
1574 | 1660 | |
---|
1575 | 1661 | for (l = 0; l < LIST_SIZE; l++) { |
---|
1576 | 1662 | list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) { |
---|
1577 | | - if (__try_evict_buffer(b, gfp_mask)) |
---|
| 1663 | + if (count - freed <= retain_target) |
---|
| 1664 | + atomic_long_set(&c->need_shrink, 0); |
---|
| 1665 | + if (!atomic_long_read(&c->need_shrink)) |
---|
| 1666 | + return; |
---|
| 1667 | + if (__try_evict_buffer(b, GFP_KERNEL)) { |
---|
| 1668 | + atomic_long_dec(&c->need_shrink); |
---|
1578 | 1669 | freed++; |
---|
1579 | | - if (!--nr_to_scan || ((count - freed) <= retain_target)) |
---|
1580 | | - return freed; |
---|
| 1670 | + } |
---|
1581 | 1671 | cond_resched(); |
---|
1582 | 1672 | } |
---|
1583 | 1673 | } |
---|
1584 | | - return freed; |
---|
1585 | 1674 | } |
---|
1586 | 1675 | |
---|
1587 | | -static unsigned long |
---|
1588 | | -dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) |
---|
| 1676 | +static void shrink_work(struct work_struct *w) |
---|
| 1677 | +{ |
---|
| 1678 | + struct dm_bufio_client *c = container_of(w, struct dm_bufio_client, shrink_work); |
---|
| 1679 | + |
---|
| 1680 | + dm_bufio_lock(c); |
---|
| 1681 | + __scan(c); |
---|
| 1682 | + dm_bufio_unlock(c); |
---|
| 1683 | +} |
---|
| 1684 | + |
---|
| 1685 | +static unsigned long dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) |
---|
1589 | 1686 | { |
---|
1590 | 1687 | struct dm_bufio_client *c; |
---|
1591 | | - unsigned long freed; |
---|
| 1688 | + bool bypass = false; |
---|
| 1689 | + |
---|
| 1690 | + trace_android_vh_dm_bufio_shrink_scan_bypass( |
---|
| 1691 | + dm_bufio_current_allocated, |
---|
| 1692 | + &bypass); |
---|
| 1693 | + if (bypass) |
---|
| 1694 | + return 0; |
---|
1592 | 1695 | |
---|
1593 | 1696 | c = container_of(shrink, struct dm_bufio_client, shrinker); |
---|
1594 | | - if (sc->gfp_mask & __GFP_FS) |
---|
1595 | | - dm_bufio_lock(c); |
---|
1596 | | - else if (!dm_bufio_trylock(c)) |
---|
1597 | | - return SHRINK_STOP; |
---|
| 1697 | + atomic_long_add(sc->nr_to_scan, &c->need_shrink); |
---|
| 1698 | + queue_work(dm_bufio_wq, &c->shrink_work); |
---|
1598 | 1699 | |
---|
1599 | | - freed = __scan(c, sc->nr_to_scan, sc->gfp_mask); |
---|
1600 | | - dm_bufio_unlock(c); |
---|
1601 | | - return freed; |
---|
| 1700 | + return sc->nr_to_scan; |
---|
1602 | 1701 | } |
---|
1603 | 1702 | |
---|
1604 | | -static unsigned long |
---|
1605 | | -dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc) |
---|
| 1703 | +static unsigned long dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc) |
---|
1606 | 1704 | { |
---|
1607 | 1705 | struct dm_bufio_client *c = container_of(shrink, struct dm_bufio_client, shrinker); |
---|
1608 | 1706 | unsigned long count = READ_ONCE(c->n_buffers[LIST_CLEAN]) + |
---|
1609 | 1707 | READ_ONCE(c->n_buffers[LIST_DIRTY]); |
---|
1610 | 1708 | unsigned long retain_target = get_retain_buffers(c); |
---|
| 1709 | + unsigned long queued_for_cleanup = atomic_long_read(&c->need_shrink); |
---|
1611 | 1710 | |
---|
1612 | | - return (count < retain_target) ? 0 : (count - retain_target); |
---|
| 1711 | + if (unlikely(count < retain_target)) |
---|
| 1712 | + count = 0; |
---|
| 1713 | + else |
---|
| 1714 | + count -= retain_target; |
---|
| 1715 | + |
---|
| 1716 | + if (unlikely(count < queued_for_cleanup)) |
---|
| 1717 | + count = 0; |
---|
| 1718 | + else |
---|
| 1719 | + count -= queued_for_cleanup; |
---|
| 1720 | + |
---|
| 1721 | + return count; |
---|
1613 | 1722 | } |
---|
1614 | 1723 | |
---|
1615 | 1724 | /* |
---|
.. | .. |
---|
1700 | 1809 | __free_buffer_wake(b); |
---|
1701 | 1810 | } |
---|
1702 | 1811 | |
---|
| 1812 | + INIT_WORK(&c->shrink_work, shrink_work); |
---|
| 1813 | + atomic_long_set(&c->need_shrink, 0); |
---|
| 1814 | + |
---|
1703 | 1815 | c->shrinker.count_objects = dm_bufio_shrink_count; |
---|
1704 | 1816 | c->shrinker.scan_objects = dm_bufio_shrink_scan; |
---|
1705 | 1817 | c->shrinker.seeks = 1; |
---|
.. | .. |
---|
1745 | 1857 | drop_buffers(c); |
---|
1746 | 1858 | |
---|
1747 | 1859 | unregister_shrinker(&c->shrinker); |
---|
| 1860 | + flush_work(&c->shrink_work); |
---|
1748 | 1861 | |
---|
1749 | 1862 | mutex_lock(&dm_bufio_clients_lock); |
---|
1750 | 1863 | |
---|
.. | .. |
---|
1905 | 2018 | { |
---|
1906 | 2019 | unsigned long max_age_hz = get_max_age_hz(); |
---|
1907 | 2020 | struct dm_bufio_client *c; |
---|
| 2021 | + bool bypass = false; |
---|
| 2022 | + |
---|
| 2023 | + trace_android_vh_cleanup_old_buffers_bypass( |
---|
| 2024 | + dm_bufio_current_allocated, |
---|
| 2025 | + &max_age_hz, |
---|
| 2026 | + &bypass); |
---|
| 2027 | + if (bypass) |
---|
| 2028 | + return; |
---|
1908 | 2029 | |
---|
1909 | 2030 | mutex_lock(&dm_bufio_clients_lock); |
---|
1910 | 2031 | |
---|
.. | .. |
---|
1941 | 2062 | dm_bufio_allocated_vmalloc = 0; |
---|
1942 | 2063 | dm_bufio_current_allocated = 0; |
---|
1943 | 2064 | |
---|
1944 | | - mem = (__u64)mult_frac(totalram_pages - totalhigh_pages, |
---|
| 2065 | + mem = (__u64)mult_frac(totalram_pages() - totalhigh_pages(), |
---|
1945 | 2066 | DM_BUFIO_MEMORY_PERCENT, 100) << PAGE_SHIFT; |
---|
1946 | 2067 | |
---|
1947 | 2068 | if (mem > ULONG_MAX) |
---|