hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/drivers/md/dm-bufio.c
....@@ -19,6 +19,8 @@
1919 #include <linux/rbtree.h>
2020 #include <linux/stacktrace.h>
2121
22
+#include <trace/hooks/mm.h>
23
+
2224 #define DM_MSG_PREFIX "bufio"
2325
2426 /*
....@@ -66,7 +68,7 @@
6668
6769 /*
6870 * Linking of buffers:
69
- * All buffers are linked to cache_hash with their hash_list field.
71
+ * All buffers are linked to buffer_tree with their node field.
7072 *
7173 * Clean buffers that are not being written (B_WRITING not set)
7274 * are linked to lru[LIST_CLEAN] with their lru_list field.
....@@ -108,7 +110,10 @@
108110 int async_write_error;
109111
110112 struct list_head client_list;
113
+
111114 struct shrinker shrinker;
115
+ struct work_struct shrink_work;
116
+ atomic_long_t need_shrink;
112117 };
113118
114119 /*
....@@ -153,7 +158,7 @@
153158 void (*end_io)(struct dm_buffer *, blk_status_t);
154159 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
155160 #define MAX_STACK 10
156
- struct stack_trace stack_trace;
161
+ unsigned int stack_len;
157162 unsigned long stack_entries[MAX_STACK];
158163 #endif
159164 };
....@@ -238,11 +243,7 @@
238243 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
239244 static void buffer_record_stack(struct dm_buffer *b)
240245 {
241
- b->stack_trace.nr_entries = 0;
242
- b->stack_trace.max_entries = MAX_STACK;
243
- b->stack_trace.entries = b->stack_entries;
244
- b->stack_trace.skip = 2;
245
- save_stack_trace(&b->stack_trace);
246
+ b->stack_len = stack_trace_save(b->stack_entries, MAX_STACK, 2);
246247 }
247248 #endif
248249
....@@ -260,10 +261,33 @@
260261 if (b->block == block)
261262 return b;
262263
263
- n = (b->block < block) ? n->rb_left : n->rb_right;
264
+ n = block < b->block ? n->rb_left : n->rb_right;
264265 }
265266
266267 return NULL;
268
+}
269
+
270
+static struct dm_buffer *__find_next(struct dm_bufio_client *c, sector_t block)
271
+{
272
+ struct rb_node *n = c->buffer_tree.rb_node;
273
+ struct dm_buffer *b;
274
+ struct dm_buffer *best = NULL;
275
+
276
+ while (n) {
277
+ b = container_of(n, struct dm_buffer, node);
278
+
279
+ if (b->block == block)
280
+ return b;
281
+
282
+ if (block <= b->block) {
283
+ n = n->rb_left;
284
+ best = b;
285
+ } else {
286
+ n = n->rb_right;
287
+ }
288
+ }
289
+
290
+ return best;
267291 }
268292
269293 static void __insert(struct dm_bufio_client *c, struct dm_buffer *b)
....@@ -280,8 +304,8 @@
280304 }
281305
282306 parent = *new;
283
- new = (found->block < b->block) ?
284
- &((*new)->rb_left) : &((*new)->rb_right);
307
+ new = b->block < found->block ?
308
+ &found->node.rb_left : &found->node.rb_right;
285309 }
286310
287311 rb_link_node(&b->node, parent, new);
....@@ -404,13 +428,13 @@
404428 */
405429 if (gfp_mask & __GFP_NORETRY) {
406430 unsigned noio_flag = memalloc_noio_save();
407
- void *ptr = __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
431
+ void *ptr = __vmalloc(c->block_size, gfp_mask);
408432
409433 memalloc_noio_restore(noio_flag);
410434 return ptr;
411435 }
412436
413
- return __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
437
+ return __vmalloc(c->block_size, gfp_mask);
414438 }
415439
416440 /*
....@@ -459,7 +483,7 @@
459483 }
460484
461485 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
462
- memset(&b->stack_trace, 0, sizeof(b->stack_trace));
486
+ b->stack_len = 0;
463487 #endif
464488 return b;
465489 }
....@@ -476,7 +500,7 @@
476500 }
477501
478502 /*
479
- * Link buffer to the hash list and clean or dirty queue.
503
+ * Link buffer to the buffer tree and clean or dirty queue.
480504 */
481505 static void __link_buffer(struct dm_buffer *b, sector_t block, int dirty)
482506 {
....@@ -493,7 +517,7 @@
493517 }
494518
495519 /*
496
- * Unlink buffer from the hash list and dirty or clean queue.
520
+ * Unlink buffer from the buffer tree and dirty or clean queue.
497521 */
498522 static void __unlink_buffer(struct dm_buffer *b)
499523 {
....@@ -635,6 +659,19 @@
635659 submit_bio(bio);
636660 }
637661
662
+static inline sector_t block_to_sector(struct dm_bufio_client *c, sector_t block)
663
+{
664
+ sector_t sector;
665
+
666
+ if (likely(c->sectors_per_block_bits >= 0))
667
+ sector = block << c->sectors_per_block_bits;
668
+ else
669
+ sector = block * (c->block_size >> SECTOR_SHIFT);
670
+ sector += c->start;
671
+
672
+ return sector;
673
+}
674
+
638675 static void submit_io(struct dm_buffer *b, int rw, void (*end_io)(struct dm_buffer *, blk_status_t))
639676 {
640677 unsigned n_sectors;
....@@ -643,11 +680,7 @@
643680
644681 b->end_io = end_io;
645682
646
- if (likely(b->c->sectors_per_block_bits >= 0))
647
- sector = b->block << b->c->sectors_per_block_bits;
648
- else
649
- sector = b->block * (b->c->block_size >> SECTOR_SHIFT);
650
- sector += b->c->start;
683
+ sector = block_to_sector(b->c, b->block);
651684
652685 if (rw != REQ_OP_WRITE) {
653686 n_sectors = b->c->block_size >> SECTOR_SHIFT;
....@@ -972,7 +1005,7 @@
9721005
9731006 /*
9741007 * We've had a period where the mutex was unlocked, so need to
975
- * recheck the hash table.
1008
+ * recheck the buffer tree.
9761009 */
9771010 b = __find(c, block);
9781011 if (b) {
....@@ -1306,7 +1339,7 @@
13061339 EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers);
13071340
13081341 /*
1309
- * Use dm-io to send and empty barrier flush the device.
1342
+ * Use dm-io to send an empty barrier to flush the device.
13101343 */
13111344 int dm_bufio_issue_flush(struct dm_bufio_client *c)
13121345 {
....@@ -1330,12 +1363,36 @@
13301363 EXPORT_SYMBOL_GPL(dm_bufio_issue_flush);
13311364
13321365 /*
1366
+ * Use dm-io to send a discard request to flush the device.
1367
+ */
1368
+int dm_bufio_issue_discard(struct dm_bufio_client *c, sector_t block, sector_t count)
1369
+{
1370
+ struct dm_io_request io_req = {
1371
+ .bi_op = REQ_OP_DISCARD,
1372
+ .bi_op_flags = REQ_SYNC,
1373
+ .mem.type = DM_IO_KMEM,
1374
+ .mem.ptr.addr = NULL,
1375
+ .client = c->dm_io,
1376
+ };
1377
+ struct dm_io_region io_reg = {
1378
+ .bdev = c->bdev,
1379
+ .sector = block_to_sector(c, block),
1380
+ .count = block_to_sector(c, count),
1381
+ };
1382
+
1383
+ BUG_ON(dm_bufio_in_request());
1384
+
1385
+ return dm_io(&io_req, 1, &io_reg, NULL);
1386
+}
1387
+EXPORT_SYMBOL_GPL(dm_bufio_issue_discard);
1388
+
1389
+/*
13331390 * We first delete any other buffer that may be at that new location.
13341391 *
13351392 * Then, we write the buffer to the original location if it was dirty.
13361393 *
13371394 * Then, if we are the only one who is holding the buffer, relink the buffer
1338
- * in the hash queue for the new location.
1395
+ * in the buffer tree for the new location.
13391396 *
13401397 * If there was someone else holding the buffer, we write it to the new
13411398 * location but not relink it, because that other user needs to have the buffer
....@@ -1405,6 +1462,14 @@
14051462 }
14061463 EXPORT_SYMBOL_GPL(dm_bufio_release_move);
14071464
1465
+static void forget_buffer_locked(struct dm_buffer *b)
1466
+{
1467
+ if (likely(!b->hold_count) && likely(!b->state)) {
1468
+ __unlink_buffer(b);
1469
+ __free_buffer_wake(b);
1470
+ }
1471
+}
1472
+
14081473 /*
14091474 * Free the given buffer.
14101475 *
....@@ -1418,14 +1483,35 @@
14181483 dm_bufio_lock(c);
14191484
14201485 b = __find(c, block);
1421
- if (b && likely(!b->hold_count) && likely(!b->state)) {
1422
- __unlink_buffer(b);
1423
- __free_buffer_wake(b);
1424
- }
1486
+ if (b)
1487
+ forget_buffer_locked(b);
14251488
14261489 dm_bufio_unlock(c);
14271490 }
14281491 EXPORT_SYMBOL_GPL(dm_bufio_forget);
1492
+
1493
+void dm_bufio_forget_buffers(struct dm_bufio_client *c, sector_t block, sector_t n_blocks)
1494
+{
1495
+ struct dm_buffer *b;
1496
+ sector_t end_block = block + n_blocks;
1497
+
1498
+ while (block < end_block) {
1499
+ dm_bufio_lock(c);
1500
+
1501
+ b = __find_next(c, block);
1502
+ if (b) {
1503
+ block = b->block + 1;
1504
+ forget_buffer_locked(b);
1505
+ }
1506
+
1507
+ dm_bufio_unlock(c);
1508
+
1509
+ if (!b)
1510
+ break;
1511
+ }
1512
+
1513
+}
1514
+EXPORT_SYMBOL_GPL(dm_bufio_forget_buffers);
14291515
14301516 void dm_bufio_set_minimum_buffers(struct dm_bufio_client *c, unsigned n)
14311517 {
....@@ -1509,8 +1595,9 @@
15091595 DMERR("leaked buffer %llx, hold count %u, list %d",
15101596 (unsigned long long)b->block, b->hold_count, i);
15111597 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
1512
- print_stack_trace(&b->stack_trace, 1);
1513
- b->hold_count = 0; /* mark unclaimed to avoid BUG_ON below */
1598
+ stack_trace_print(b->stack_entries, b->stack_len, 1);
1599
+ /* mark unclaimed to avoid BUG_ON below */
1600
+ b->hold_count = 0;
15141601 #endif
15151602 }
15161603
....@@ -1562,8 +1649,7 @@
15621649 return retain_bytes;
15631650 }
15641651
1565
-static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
1566
- gfp_t gfp_mask)
1652
+static void __scan(struct dm_bufio_client *c)
15671653 {
15681654 int l;
15691655 struct dm_buffer *b, *tmp;
....@@ -1574,42 +1660,65 @@
15741660
15751661 for (l = 0; l < LIST_SIZE; l++) {
15761662 list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) {
1577
- if (__try_evict_buffer(b, gfp_mask))
1663
+ if (count - freed <= retain_target)
1664
+ atomic_long_set(&c->need_shrink, 0);
1665
+ if (!atomic_long_read(&c->need_shrink))
1666
+ return;
1667
+ if (__try_evict_buffer(b, GFP_KERNEL)) {
1668
+ atomic_long_dec(&c->need_shrink);
15781669 freed++;
1579
- if (!--nr_to_scan || ((count - freed) <= retain_target))
1580
- return freed;
1670
+ }
15811671 cond_resched();
15821672 }
15831673 }
1584
- return freed;
15851674 }
15861675
1587
-static unsigned long
1588
-dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
1676
+static void shrink_work(struct work_struct *w)
1677
+{
1678
+ struct dm_bufio_client *c = container_of(w, struct dm_bufio_client, shrink_work);
1679
+
1680
+ dm_bufio_lock(c);
1681
+ __scan(c);
1682
+ dm_bufio_unlock(c);
1683
+}
1684
+
1685
+static unsigned long dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
15891686 {
15901687 struct dm_bufio_client *c;
1591
- unsigned long freed;
1688
+ bool bypass = false;
1689
+
1690
+ trace_android_vh_dm_bufio_shrink_scan_bypass(
1691
+ dm_bufio_current_allocated,
1692
+ &bypass);
1693
+ if (bypass)
1694
+ return 0;
15921695
15931696 c = container_of(shrink, struct dm_bufio_client, shrinker);
1594
- if (sc->gfp_mask & __GFP_FS)
1595
- dm_bufio_lock(c);
1596
- else if (!dm_bufio_trylock(c))
1597
- return SHRINK_STOP;
1697
+ atomic_long_add(sc->nr_to_scan, &c->need_shrink);
1698
+ queue_work(dm_bufio_wq, &c->shrink_work);
15981699
1599
- freed = __scan(c, sc->nr_to_scan, sc->gfp_mask);
1600
- dm_bufio_unlock(c);
1601
- return freed;
1700
+ return sc->nr_to_scan;
16021701 }
16031702
1604
-static unsigned long
1605
-dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
1703
+static unsigned long dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
16061704 {
16071705 struct dm_bufio_client *c = container_of(shrink, struct dm_bufio_client, shrinker);
16081706 unsigned long count = READ_ONCE(c->n_buffers[LIST_CLEAN]) +
16091707 READ_ONCE(c->n_buffers[LIST_DIRTY]);
16101708 unsigned long retain_target = get_retain_buffers(c);
1709
+ unsigned long queued_for_cleanup = atomic_long_read(&c->need_shrink);
16111710
1612
- return (count < retain_target) ? 0 : (count - retain_target);
1711
+ if (unlikely(count < retain_target))
1712
+ count = 0;
1713
+ else
1714
+ count -= retain_target;
1715
+
1716
+ if (unlikely(count < queued_for_cleanup))
1717
+ count = 0;
1718
+ else
1719
+ count -= queued_for_cleanup;
1720
+
1721
+ return count;
16131722 }
16141723
16151724 /*
....@@ -1700,6 +1809,9 @@
17001809 __free_buffer_wake(b);
17011810 }
17021811
1812
+ INIT_WORK(&c->shrink_work, shrink_work);
1813
+ atomic_long_set(&c->need_shrink, 0);
1814
+
17031815 c->shrinker.count_objects = dm_bufio_shrink_count;
17041816 c->shrinker.scan_objects = dm_bufio_shrink_scan;
17051817 c->shrinker.seeks = 1;
....@@ -1745,6 +1857,7 @@
17451857 drop_buffers(c);
17461858
17471859 unregister_shrinker(&c->shrinker);
1860
+ flush_work(&c->shrink_work);
17481861
17491862 mutex_lock(&dm_bufio_clients_lock);
17501863
....@@ -1905,6 +2018,14 @@
19052018 {
19062019 unsigned long max_age_hz = get_max_age_hz();
19072020 struct dm_bufio_client *c;
2021
+ bool bypass = false;
2022
+
2023
+ trace_android_vh_cleanup_old_buffers_bypass(
2024
+ dm_bufio_current_allocated,
2025
+ &max_age_hz,
2026
+ &bypass);
2027
+ if (bypass)
2028
+ return;
19082029
19092030 mutex_lock(&dm_bufio_clients_lock);
19102031
....@@ -1941,7 +2062,7 @@
19412062 dm_bufio_allocated_vmalloc = 0;
19422063 dm_bufio_current_allocated = 0;
19432064
1944
- mem = (__u64)mult_frac(totalram_pages - totalhigh_pages,
2065
+ mem = (__u64)mult_frac(totalram_pages() - totalhigh_pages(),
19452066 DM_BUFIO_MEMORY_PERCENT, 100) << PAGE_SHIFT;
19462067
19472068 if (mem > ULONG_MAX)