| .. | .. |
|---|
| 66 | 66 | |
|---|
| 67 | 67 | /* |
|---|
| 68 | 68 | * Linking of buffers: |
|---|
| 69 | | - * All buffers are linked to cache_hash with their hash_list field. |
|---|
| 69 | + * All buffers are linked to buffer_tree with their node field. |
|---|
| 70 | 70 | * |
|---|
| 71 | 71 | * Clean buffers that are not being written (B_WRITING not set) |
|---|
| 72 | 72 | * are linked to lru[LIST_CLEAN] with their lru_list field. |
|---|
| .. | .. |
|---|
| 108 | 108 | int async_write_error; |
|---|
| 109 | 109 | |
|---|
| 110 | 110 | struct list_head client_list; |
|---|
| 111 | + |
|---|
| 111 | 112 | struct shrinker shrinker; |
|---|
| 113 | + struct work_struct shrink_work; |
|---|
| 114 | + atomic_long_t need_shrink; |
|---|
| 112 | 115 | }; |
|---|
| 113 | 116 | |
|---|
| 114 | 117 | /* |
|---|
| .. | .. |
|---|
| 153 | 156 | void (*end_io)(struct dm_buffer *, blk_status_t); |
|---|
| 154 | 157 | #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING |
|---|
| 155 | 158 | #define MAX_STACK 10 |
|---|
| 156 | | - struct stack_trace stack_trace; |
|---|
| 159 | + unsigned int stack_len; |
|---|
| 157 | 160 | unsigned long stack_entries[MAX_STACK]; |
|---|
| 158 | 161 | #endif |
|---|
| 159 | 162 | }; |
|---|
| .. | .. |
|---|
| 238 | 241 | #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING |
|---|
| 239 | 242 | static void buffer_record_stack(struct dm_buffer *b) |
|---|
| 240 | 243 | { |
|---|
| 241 | | - b->stack_trace.nr_entries = 0; |
|---|
| 242 | | - b->stack_trace.max_entries = MAX_STACK; |
|---|
| 243 | | - b->stack_trace.entries = b->stack_entries; |
|---|
| 244 | | - b->stack_trace.skip = 2; |
|---|
| 245 | | - save_stack_trace(&b->stack_trace); |
|---|
| 244 | + b->stack_len = stack_trace_save(b->stack_entries, MAX_STACK, 2); |
|---|
| 246 | 245 | } |
|---|
| 247 | 246 | #endif |
|---|
| 248 | 247 | |
|---|
| .. | .. |
|---|
| 260 | 259 | if (b->block == block) |
|---|
| 261 | 260 | return b; |
|---|
| 262 | 261 | |
|---|
| 263 | | - n = (b->block < block) ? n->rb_left : n->rb_right; |
|---|
| 262 | + n = block < b->block ? n->rb_left : n->rb_right; |
|---|
| 264 | 263 | } |
|---|
| 265 | 264 | |
|---|
| 266 | 265 | return NULL; |
|---|
| 266 | +} |
|---|
| 267 | + |
|---|
| 268 | +static struct dm_buffer *__find_next(struct dm_bufio_client *c, sector_t block) |
|---|
| 269 | +{ |
|---|
| 270 | + struct rb_node *n = c->buffer_tree.rb_node; |
|---|
| 271 | + struct dm_buffer *b; |
|---|
| 272 | + struct dm_buffer *best = NULL; |
|---|
| 273 | + |
|---|
| 274 | + while (n) { |
|---|
| 275 | + b = container_of(n, struct dm_buffer, node); |
|---|
| 276 | + |
|---|
| 277 | + if (b->block == block) |
|---|
| 278 | + return b; |
|---|
| 279 | + |
|---|
| 280 | + if (block <= b->block) { |
|---|
| 281 | + n = n->rb_left; |
|---|
| 282 | + best = b; |
|---|
| 283 | + } else { |
|---|
| 284 | + n = n->rb_right; |
|---|
| 285 | + } |
|---|
| 286 | + } |
|---|
| 287 | + |
|---|
| 288 | + return best; |
|---|
| 267 | 289 | } |
|---|
| 268 | 290 | |
|---|
| 269 | 291 | static void __insert(struct dm_bufio_client *c, struct dm_buffer *b) |
|---|
| .. | .. |
|---|
| 280 | 302 | } |
|---|
| 281 | 303 | |
|---|
| 282 | 304 | parent = *new; |
|---|
| 283 | | - new = (found->block < b->block) ? |
|---|
| 284 | | - &((*new)->rb_left) : &((*new)->rb_right); |
|---|
| 305 | + new = b->block < found->block ? |
|---|
| 306 | + &found->node.rb_left : &found->node.rb_right; |
|---|
| 285 | 307 | } |
|---|
| 286 | 308 | |
|---|
| 287 | 309 | rb_link_node(&b->node, parent, new); |
|---|
| .. | .. |
|---|
| 404 | 426 | */ |
|---|
| 405 | 427 | if (gfp_mask & __GFP_NORETRY) { |
|---|
| 406 | 428 | unsigned noio_flag = memalloc_noio_save(); |
|---|
| 407 | | - void *ptr = __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL); |
|---|
| 429 | + void *ptr = __vmalloc(c->block_size, gfp_mask); |
|---|
| 408 | 430 | |
|---|
| 409 | 431 | memalloc_noio_restore(noio_flag); |
|---|
| 410 | 432 | return ptr; |
|---|
| 411 | 433 | } |
|---|
| 412 | 434 | |
|---|
| 413 | | - return __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL); |
|---|
| 435 | + return __vmalloc(c->block_size, gfp_mask); |
|---|
| 414 | 436 | } |
|---|
| 415 | 437 | |
|---|
| 416 | 438 | /* |
|---|
| .. | .. |
|---|
| 459 | 481 | } |
|---|
| 460 | 482 | |
|---|
| 461 | 483 | #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING |
|---|
| 462 | | - memset(&b->stack_trace, 0, sizeof(b->stack_trace)); |
|---|
| 484 | + b->stack_len = 0; |
|---|
| 463 | 485 | #endif |
|---|
| 464 | 486 | return b; |
|---|
| 465 | 487 | } |
|---|
| .. | .. |
|---|
| 476 | 498 | } |
|---|
| 477 | 499 | |
|---|
| 478 | 500 | /* |
|---|
| 479 | | - * Link buffer to the hash list and clean or dirty queue. |
|---|
| 501 | + * Link buffer to the buffer tree and clean or dirty queue. |
|---|
| 480 | 502 | */ |
|---|
| 481 | 503 | static void __link_buffer(struct dm_buffer *b, sector_t block, int dirty) |
|---|
| 482 | 504 | { |
|---|
| .. | .. |
|---|
| 493 | 515 | } |
|---|
| 494 | 516 | |
|---|
| 495 | 517 | /* |
|---|
| 496 | | - * Unlink buffer from the hash list and dirty or clean queue. |
|---|
| 518 | + * Unlink buffer from the buffer tree and dirty or clean queue. |
|---|
| 497 | 519 | */ |
|---|
| 498 | 520 | static void __unlink_buffer(struct dm_buffer *b) |
|---|
| 499 | 521 | { |
|---|
| .. | .. |
|---|
| 635 | 657 | submit_bio(bio); |
|---|
| 636 | 658 | } |
|---|
| 637 | 659 | |
|---|
| 660 | +static inline sector_t block_to_sector(struct dm_bufio_client *c, sector_t block) |
|---|
| 661 | +{ |
|---|
| 662 | + sector_t sector; |
|---|
| 663 | + |
|---|
| 664 | + if (likely(c->sectors_per_block_bits >= 0)) |
|---|
| 665 | + sector = block << c->sectors_per_block_bits; |
|---|
| 666 | + else |
|---|
| 667 | + sector = block * (c->block_size >> SECTOR_SHIFT); |
|---|
| 668 | + sector += c->start; |
|---|
| 669 | + |
|---|
| 670 | + return sector; |
|---|
| 671 | +} |
|---|
| 672 | + |
|---|
| 638 | 673 | static void submit_io(struct dm_buffer *b, int rw, void (*end_io)(struct dm_buffer *, blk_status_t)) |
|---|
| 639 | 674 | { |
|---|
| 640 | 675 | unsigned n_sectors; |
|---|
| .. | .. |
|---|
| 643 | 678 | |
|---|
| 644 | 679 | b->end_io = end_io; |
|---|
| 645 | 680 | |
|---|
| 646 | | - if (likely(b->c->sectors_per_block_bits >= 0)) |
|---|
| 647 | | - sector = b->block << b->c->sectors_per_block_bits; |
|---|
| 648 | | - else |
|---|
| 649 | | - sector = b->block * (b->c->block_size >> SECTOR_SHIFT); |
|---|
| 650 | | - sector += b->c->start; |
|---|
| 681 | + sector = block_to_sector(b->c, b->block); |
|---|
| 651 | 682 | |
|---|
| 652 | 683 | if (rw != REQ_OP_WRITE) { |
|---|
| 653 | 684 | n_sectors = b->c->block_size >> SECTOR_SHIFT; |
|---|
| .. | .. |
|---|
| 972 | 1003 | |
|---|
| 973 | 1004 | /* |
|---|
| 974 | 1005 | * We've had a period where the mutex was unlocked, so need to |
|---|
| 975 | | - * recheck the hash table. |
|---|
| 1006 | + * recheck the buffer tree. |
|---|
| 976 | 1007 | */ |
|---|
| 977 | 1008 | b = __find(c, block); |
|---|
| 978 | 1009 | if (b) { |
|---|
| .. | .. |
|---|
| 1306 | 1337 | EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers); |
|---|
| 1307 | 1338 | |
|---|
| 1308 | 1339 | /* |
|---|
| 1309 | | - * Use dm-io to send and empty barrier flush the device. |
|---|
| 1340 | + * Use dm-io to send an empty barrier to flush the device. |
|---|
| 1310 | 1341 | */ |
|---|
| 1311 | 1342 | int dm_bufio_issue_flush(struct dm_bufio_client *c) |
|---|
| 1312 | 1343 | { |
|---|
| .. | .. |
|---|
| 1330 | 1361 | EXPORT_SYMBOL_GPL(dm_bufio_issue_flush); |
|---|
| 1331 | 1362 | |
|---|
| 1332 | 1363 | /* |
|---|
| 1364 | + * Use dm-io to send a discard request to flush the device. |
|---|
| 1365 | + */ |
|---|
| 1366 | +int dm_bufio_issue_discard(struct dm_bufio_client *c, sector_t block, sector_t count) |
|---|
| 1367 | +{ |
|---|
| 1368 | + struct dm_io_request io_req = { |
|---|
| 1369 | + .bi_op = REQ_OP_DISCARD, |
|---|
| 1370 | + .bi_op_flags = REQ_SYNC, |
|---|
| 1371 | + .mem.type = DM_IO_KMEM, |
|---|
| 1372 | + .mem.ptr.addr = NULL, |
|---|
| 1373 | + .client = c->dm_io, |
|---|
| 1374 | + }; |
|---|
| 1375 | + struct dm_io_region io_reg = { |
|---|
| 1376 | + .bdev = c->bdev, |
|---|
| 1377 | + .sector = block_to_sector(c, block), |
|---|
| 1378 | + .count = block_to_sector(c, count), |
|---|
| 1379 | + }; |
|---|
| 1380 | + |
|---|
| 1381 | + BUG_ON(dm_bufio_in_request()); |
|---|
| 1382 | + |
|---|
| 1383 | + return dm_io(&io_req, 1, &io_reg, NULL); |
|---|
| 1384 | +} |
|---|
| 1385 | +EXPORT_SYMBOL_GPL(dm_bufio_issue_discard); |
|---|
| 1386 | + |
|---|
| 1387 | +/* |
|---|
| 1333 | 1388 | * We first delete any other buffer that may be at that new location. |
|---|
| 1334 | 1389 | * |
|---|
| 1335 | 1390 | * Then, we write the buffer to the original location if it was dirty. |
|---|
| 1336 | 1391 | * |
|---|
| 1337 | 1392 | * Then, if we are the only one who is holding the buffer, relink the buffer |
|---|
| 1338 | | - * in the hash queue for the new location. |
|---|
| 1393 | + * in the buffer tree for the new location. |
|---|
| 1339 | 1394 | * |
|---|
| 1340 | 1395 | * If there was someone else holding the buffer, we write it to the new |
|---|
| 1341 | 1396 | * location but not relink it, because that other user needs to have the buffer |
|---|
| .. | .. |
|---|
| 1405 | 1460 | } |
|---|
| 1406 | 1461 | EXPORT_SYMBOL_GPL(dm_bufio_release_move); |
|---|
| 1407 | 1462 | |
|---|
| 1463 | +static void forget_buffer_locked(struct dm_buffer *b) |
|---|
| 1464 | +{ |
|---|
| 1465 | + if (likely(!b->hold_count) && likely(!b->state)) { |
|---|
| 1466 | + __unlink_buffer(b); |
|---|
| 1467 | + __free_buffer_wake(b); |
|---|
| 1468 | + } |
|---|
| 1469 | +} |
|---|
| 1470 | + |
|---|
| 1408 | 1471 | /* |
|---|
| 1409 | 1472 | * Free the given buffer. |
|---|
| 1410 | 1473 | * |
|---|
| .. | .. |
|---|
| 1418 | 1481 | dm_bufio_lock(c); |
|---|
| 1419 | 1482 | |
|---|
| 1420 | 1483 | b = __find(c, block); |
|---|
| 1421 | | - if (b && likely(!b->hold_count) && likely(!b->state)) { |
|---|
| 1422 | | - __unlink_buffer(b); |
|---|
| 1423 | | - __free_buffer_wake(b); |
|---|
| 1424 | | - } |
|---|
| 1484 | + if (b) |
|---|
| 1485 | + forget_buffer_locked(b); |
|---|
| 1425 | 1486 | |
|---|
| 1426 | 1487 | dm_bufio_unlock(c); |
|---|
| 1427 | 1488 | } |
|---|
| 1428 | 1489 | EXPORT_SYMBOL_GPL(dm_bufio_forget); |
|---|
| 1490 | + |
|---|
| 1491 | +void dm_bufio_forget_buffers(struct dm_bufio_client *c, sector_t block, sector_t n_blocks) |
|---|
| 1492 | +{ |
|---|
| 1493 | + struct dm_buffer *b; |
|---|
| 1494 | + sector_t end_block = block + n_blocks; |
|---|
| 1495 | + |
|---|
| 1496 | + while (block < end_block) { |
|---|
| 1497 | + dm_bufio_lock(c); |
|---|
| 1498 | + |
|---|
| 1499 | + b = __find_next(c, block); |
|---|
| 1500 | + if (b) { |
|---|
| 1501 | + block = b->block + 1; |
|---|
| 1502 | + forget_buffer_locked(b); |
|---|
| 1503 | + } |
|---|
| 1504 | + |
|---|
| 1505 | + dm_bufio_unlock(c); |
|---|
| 1506 | + |
|---|
| 1507 | + if (!b) |
|---|
| 1508 | + break; |
|---|
| 1509 | + } |
|---|
| 1510 | + |
|---|
| 1511 | +} |
|---|
| 1512 | +EXPORT_SYMBOL_GPL(dm_bufio_forget_buffers); |
|---|
| 1429 | 1513 | |
|---|
| 1430 | 1514 | void dm_bufio_set_minimum_buffers(struct dm_bufio_client *c, unsigned n) |
|---|
| 1431 | 1515 | { |
|---|
| .. | .. |
|---|
| 1509 | 1593 | DMERR("leaked buffer %llx, hold count %u, list %d", |
|---|
| 1510 | 1594 | (unsigned long long)b->block, b->hold_count, i); |
|---|
| 1511 | 1595 | #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING |
|---|
| 1512 | | - print_stack_trace(&b->stack_trace, 1); |
|---|
| 1513 | | - b->hold_count = 0; /* mark unclaimed to avoid BUG_ON below */ |
|---|
| 1596 | + stack_trace_print(b->stack_entries, b->stack_len, 1); |
|---|
| 1597 | + /* mark unclaimed to avoid BUG_ON below */ |
|---|
| 1598 | + b->hold_count = 0; |
|---|
| 1514 | 1599 | #endif |
|---|
| 1515 | 1600 | } |
|---|
| 1516 | 1601 | |
|---|
| .. | .. |
|---|
| 1562 | 1647 | return retain_bytes; |
|---|
| 1563 | 1648 | } |
|---|
| 1564 | 1649 | |
|---|
| 1565 | | -static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, |
|---|
| 1566 | | - gfp_t gfp_mask) |
|---|
| 1650 | +static void __scan(struct dm_bufio_client *c) |
|---|
| 1567 | 1651 | { |
|---|
| 1568 | 1652 | int l; |
|---|
| 1569 | 1653 | struct dm_buffer *b, *tmp; |
|---|
| .. | .. |
|---|
| 1574 | 1658 | |
|---|
| 1575 | 1659 | for (l = 0; l < LIST_SIZE; l++) { |
|---|
| 1576 | 1660 | list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) { |
|---|
| 1577 | | - if (__try_evict_buffer(b, gfp_mask)) |
|---|
| 1661 | + if (count - freed <= retain_target) |
|---|
| 1662 | + atomic_long_set(&c->need_shrink, 0); |
|---|
| 1663 | + if (!atomic_long_read(&c->need_shrink)) |
|---|
| 1664 | + return; |
|---|
| 1665 | + if (__try_evict_buffer(b, GFP_KERNEL)) { |
|---|
| 1666 | + atomic_long_dec(&c->need_shrink); |
|---|
| 1578 | 1667 | freed++; |
|---|
| 1579 | | - if (!--nr_to_scan || ((count - freed) <= retain_target)) |
|---|
| 1580 | | - return freed; |
|---|
| 1668 | + } |
|---|
| 1581 | 1669 | cond_resched(); |
|---|
| 1582 | 1670 | } |
|---|
| 1583 | 1671 | } |
|---|
| 1584 | | - return freed; |
|---|
| 1585 | 1672 | } |
|---|
| 1586 | 1673 | |
|---|
| 1587 | | -static unsigned long |
|---|
| 1588 | | -dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) |
|---|
| 1674 | +static void shrink_work(struct work_struct *w) |
|---|
| 1675 | +{ |
|---|
| 1676 | + struct dm_bufio_client *c = container_of(w, struct dm_bufio_client, shrink_work); |
|---|
| 1677 | + |
|---|
| 1678 | + dm_bufio_lock(c); |
|---|
| 1679 | + __scan(c); |
|---|
| 1680 | + dm_bufio_unlock(c); |
|---|
| 1681 | +} |
|---|
| 1682 | + |
|---|
| 1683 | +static unsigned long dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) |
|---|
| 1589 | 1684 | { |
|---|
| 1590 | 1685 | struct dm_bufio_client *c; |
|---|
| 1591 | | - unsigned long freed; |
|---|
| 1592 | 1686 | |
|---|
| 1593 | 1687 | c = container_of(shrink, struct dm_bufio_client, shrinker); |
|---|
| 1594 | | - if (sc->gfp_mask & __GFP_FS) |
|---|
| 1595 | | - dm_bufio_lock(c); |
|---|
| 1596 | | - else if (!dm_bufio_trylock(c)) |
|---|
| 1597 | | - return SHRINK_STOP; |
|---|
| 1688 | + atomic_long_add(sc->nr_to_scan, &c->need_shrink); |
|---|
| 1689 | + queue_work(dm_bufio_wq, &c->shrink_work); |
|---|
| 1598 | 1690 | |
|---|
| 1599 | | - freed = __scan(c, sc->nr_to_scan, sc->gfp_mask); |
|---|
| 1600 | | - dm_bufio_unlock(c); |
|---|
| 1601 | | - return freed; |
|---|
| 1691 | + return sc->nr_to_scan; |
|---|
| 1602 | 1692 | } |
|---|
| 1603 | 1693 | |
|---|
| 1604 | | -static unsigned long |
|---|
| 1605 | | -dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc) |
|---|
| 1694 | +static unsigned long dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc) |
|---|
| 1606 | 1695 | { |
|---|
| 1607 | 1696 | struct dm_bufio_client *c = container_of(shrink, struct dm_bufio_client, shrinker); |
|---|
| 1608 | 1697 | unsigned long count = READ_ONCE(c->n_buffers[LIST_CLEAN]) + |
|---|
| 1609 | 1698 | READ_ONCE(c->n_buffers[LIST_DIRTY]); |
|---|
| 1610 | 1699 | unsigned long retain_target = get_retain_buffers(c); |
|---|
| 1700 | + unsigned long queued_for_cleanup = atomic_long_read(&c->need_shrink); |
|---|
| 1611 | 1701 | |
|---|
| 1612 | | - return (count < retain_target) ? 0 : (count - retain_target); |
|---|
| 1702 | + if (unlikely(count < retain_target)) |
|---|
| 1703 | + count = 0; |
|---|
| 1704 | + else |
|---|
| 1705 | + count -= retain_target; |
|---|
| 1706 | + |
|---|
| 1707 | + if (unlikely(count < queued_for_cleanup)) |
|---|
| 1708 | + count = 0; |
|---|
| 1709 | + else |
|---|
| 1710 | + count -= queued_for_cleanup; |
|---|
| 1711 | + |
|---|
| 1712 | + return count; |
|---|
| 1613 | 1713 | } |
|---|
| 1614 | 1714 | |
|---|
| 1615 | 1715 | /* |
|---|
| .. | .. |
|---|
| 1700 | 1800 | __free_buffer_wake(b); |
|---|
| 1701 | 1801 | } |
|---|
| 1702 | 1802 | |
|---|
| 1803 | + INIT_WORK(&c->shrink_work, shrink_work); |
|---|
| 1804 | + atomic_long_set(&c->need_shrink, 0); |
|---|
| 1805 | + |
|---|
| 1703 | 1806 | c->shrinker.count_objects = dm_bufio_shrink_count; |
|---|
| 1704 | 1807 | c->shrinker.scan_objects = dm_bufio_shrink_scan; |
|---|
| 1705 | 1808 | c->shrinker.seeks = 1; |
|---|
| .. | .. |
|---|
| 1745 | 1848 | drop_buffers(c); |
|---|
| 1746 | 1849 | |
|---|
| 1747 | 1850 | unregister_shrinker(&c->shrinker); |
|---|
| 1851 | + flush_work(&c->shrink_work); |
|---|
| 1748 | 1852 | |
|---|
| 1749 | 1853 | mutex_lock(&dm_bufio_clients_lock); |
|---|
| 1750 | 1854 | |
|---|
| .. | .. |
|---|
| 1941 | 2045 | dm_bufio_allocated_vmalloc = 0; |
|---|
| 1942 | 2046 | dm_bufio_current_allocated = 0; |
|---|
| 1943 | 2047 | |
|---|
| 1944 | | - mem = (__u64)mult_frac(totalram_pages - totalhigh_pages, |
|---|
| 2048 | + mem = (__u64)mult_frac(totalram_pages() - totalhigh_pages(), |
|---|
| 1945 | 2049 | DM_BUFIO_MEMORY_PERCENT, 100) << PAGE_SHIFT; |
|---|
| 1946 | 2050 | |
|---|
| 1947 | 2051 | if (mem > ULONG_MAX) |
|---|