From 102a0743326a03cd1a1202ceda21e175b7d3575c Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Tue, 20 Feb 2024 01:20:52 +0000
Subject: [PATCH] add new system file

---
 kernel/drivers/md/dm-cache-target.c |  248 ++++++++++++++++++++++++++++---------------------
 1 files changed, 141 insertions(+), 107 deletions(-)

diff --git a/kernel/drivers/md/dm-cache-target.c b/kernel/drivers/md/dm-cache-target.c
index 2ddd575..f98ad43 100644
--- a/kernel/drivers/md/dm-cache-target.c
+++ b/kernel/drivers/md/dm-cache-target.c
@@ -74,22 +74,19 @@
 static bool iot_idle_for(struct io_tracker *iot, unsigned long jifs)
 {
 	bool r;
-	unsigned long flags;
 
-	spin_lock_irqsave(&iot->lock, flags);
+	spin_lock_irq(&iot->lock);
 	r = __iot_idle_for(iot, jifs);
-	spin_unlock_irqrestore(&iot->lock, flags);
+	spin_unlock_irq(&iot->lock);
 
 	return r;
 }
 
 static void iot_io_begin(struct io_tracker *iot, sector_t len)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&iot->lock, flags);
+	spin_lock_irq(&iot->lock);
 	iot->in_flight += len;
-	spin_unlock_irqrestore(&iot->lock, flags);
+	spin_unlock_irq(&iot->lock);
 }
 
 static void __iot_io_end(struct io_tracker *iot, sector_t len)
@@ -172,7 +169,6 @@
 {
 	struct batcher *b = container_of(_ws, struct batcher, commit_work);
 	blk_status_t r;
-	unsigned long flags;
 	struct list_head work_items;
 	struct work_struct *ws, *tmp;
 	struct continuation *k;
@@ -186,12 +182,12 @@
 	 * We have to grab these before the commit_op to avoid a race
 	 * condition.
 	 */
-	spin_lock_irqsave(&b->lock, flags);
+	spin_lock_irq(&b->lock);
 	list_splice_init(&b->work_items, &work_items);
 	bio_list_merge(&bios, &b->bios);
 	bio_list_init(&b->bios);
 	b->commit_scheduled = false;
-	spin_unlock_irqrestore(&b->lock, flags);
+	spin_unlock_irq(&b->lock);
 
 	r = b->commit_op(b->commit_context);
 
@@ -238,13 +234,12 @@
 
 static void continue_after_commit(struct batcher *b, struct continuation *k)
 {
-	unsigned long flags;
 	bool commit_scheduled;
 
-	spin_lock_irqsave(&b->lock, flags);
+	spin_lock_irq(&b->lock);
 	commit_scheduled = b->commit_scheduled;
 	list_add_tail(&k->ws.entry, &b->work_items);
-	spin_unlock_irqrestore(&b->lock, flags);
+	spin_unlock_irq(&b->lock);
 
 	if (commit_scheduled)
 		async_commit(b);
@@ -255,13 +250,12 @@
  */
 static void issue_after_commit(struct batcher *b, struct bio *bio)
 {
-       unsigned long flags;
        bool commit_scheduled;
 
-       spin_lock_irqsave(&b->lock, flags);
+       spin_lock_irq(&b->lock);
        commit_scheduled = b->commit_scheduled;
        bio_list_add(&b->bios, bio);
-       spin_unlock_irqrestore(&b->lock, flags);
+       spin_unlock_irq(&b->lock);
 
        if (commit_scheduled)
 	       async_commit(b);
@@ -273,12 +267,11 @@
 static void schedule_commit(struct batcher *b)
 {
 	bool immediate;
-	unsigned long flags;
 
-	spin_lock_irqsave(&b->lock, flags);
+	spin_lock_irq(&b->lock);
 	immediate = !list_empty(&b->work_items) || !bio_list_empty(&b->bios);
 	b->commit_scheduled = true;
-	spin_unlock_irqrestore(&b->lock, flags);
+	spin_unlock_irq(&b->lock);
 
 	if (immediate)
 		async_commit(b);
@@ -353,6 +346,7 @@
 	enum cache_metadata_mode mode;
 	enum cache_io_mode io_mode;
 	unsigned metadata_version;
+	bool discard_passdown:1;
 };
 
 struct cache_stats {
@@ -426,8 +420,6 @@
 	struct bio_list deferred_bios;
 
 	struct rw_semaphore quiesce_lock;
-
-	struct dm_target_callbacks callbacks;
 
 	/*
 	 * origin_blocks entries, discarded if set.
@@ -629,23 +621,19 @@
 
 static void defer_bio(struct cache *cache, struct bio *bio)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&cache->lock, flags);
+	spin_lock_irq(&cache->lock);
 	bio_list_add(&cache->deferred_bios, bio);
-	spin_unlock_irqrestore(&cache->lock, flags);
+	spin_unlock_irq(&cache->lock);
 
 	wake_deferred_bio_worker(cache);
 }
 
 static void defer_bios(struct cache *cache, struct bio_list *bios)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&cache->lock, flags);
+	spin_lock_irq(&cache->lock);
 	bio_list_merge(&cache->deferred_bios, bios);
 	bio_list_init(bios);
-	spin_unlock_irqrestore(&cache->lock, flags);
+	spin_unlock_irq(&cache->lock);
 
 	wake_deferred_bio_worker(cache);
 }
@@ -724,10 +712,6 @@
 	return cache->sectors_per_block_shift >= 0;
 }
 
-/* gcc on ARM generates spurious references to __udivdi3 and __umoddi3 */
-#if defined(CONFIG_ARM) && __GNUC__ == 4 && __GNUC_MINOR__ <= 6
-__always_inline
-#endif
 static dm_block_t block_div(dm_block_t b, uint32_t n)
 {
 	do_div(b, n);
@@ -755,33 +739,27 @@
 
 static void set_discard(struct cache *cache, dm_dblock_t b)
 {
-	unsigned long flags;
-
 	BUG_ON(from_dblock(b) >= from_dblock(cache->discard_nr_blocks));
 	atomic_inc(&cache->stats.discard_count);
 
-	spin_lock_irqsave(&cache->lock, flags);
+	spin_lock_irq(&cache->lock);
 	set_bit(from_dblock(b), cache->discard_bitset);
-	spin_unlock_irqrestore(&cache->lock, flags);
+	spin_unlock_irq(&cache->lock);
 }
 
 static void clear_discard(struct cache *cache, dm_dblock_t b)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&cache->lock, flags);
+	spin_lock_irq(&cache->lock);
 	clear_bit(from_dblock(b), cache->discard_bitset);
-	spin_unlock_irqrestore(&cache->lock, flags);
+	spin_unlock_irq(&cache->lock);
 }
 
 static bool is_discarded(struct cache *cache, dm_dblock_t b)
 {
 	int r;
-	unsigned long flags;
-
-	spin_lock_irqsave(&cache->lock, flags);
+	spin_lock_irq(&cache->lock);
 	r = test_bit(from_dblock(b), cache->discard_bitset);
-	spin_unlock_irqrestore(&cache->lock, flags);
+	spin_unlock_irq(&cache->lock);
 
 	return r;
 }
@@ -789,12 +767,10 @@
 static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b)
 {
 	int r;
-	unsigned long flags;
-
-	spin_lock_irqsave(&cache->lock, flags);
+	spin_lock_irq(&cache->lock);
 	r = test_bit(from_dblock(oblock_to_dblock(cache, b)),
 		     cache->discard_bitset);
-	spin_unlock_irqrestore(&cache->lock, flags);
+	spin_unlock_irq(&cache->lock);
 
 	return r;
 }
@@ -826,17 +802,16 @@
 
 static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
 {
-	unsigned long flags;
 	struct per_bio_data *pb;
 
-	spin_lock_irqsave(&cache->lock, flags);
+	spin_lock_irq(&cache->lock);
 	if (cache->need_tick_bio && !op_is_flush(bio->bi_opf) &&
 	    bio_op(bio) != REQ_OP_DISCARD) {
 		pb = get_per_bio_data(bio);
 		pb->tick = true;
 		cache->need_tick_bio = false;
 	}
-	spin_unlock_irqrestore(&cache->lock, flags);
+	spin_unlock_irq(&cache->lock);
 }
 
 static void __remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
@@ -905,7 +880,7 @@
 static void accounted_request(struct cache *cache, struct bio *bio)
 {
 	accounted_begin(cache, bio);
-	generic_make_request(bio);
+	submit_bio_noacct(bio);
 }
 
 static void issue_op(struct bio *bio, void *context)
@@ -946,7 +921,7 @@
 
 static const char *cache_device_name(struct cache *cache)
 {
-	return dm_device_name(dm_table_get_md(cache->ti->table));
+	return dm_table_device_name(cache->ti->table);
 }
 
 static void notify_mode_switch(struct cache *cache, enum cache_metadata_mode mode)
@@ -1010,14 +985,14 @@
 	if (get_cache_mode(cache) >= CM_READ_ONLY)
 		return;
 
-	if (dm_cache_metadata_set_needs_check(cache->cmd)) {
-		DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name);
-		set_cache_mode(cache, CM_FAIL);
-	}
-
 	DMERR_LIMIT("%s: aborting current metadata transaction", dev_name);
 	if (dm_cache_metadata_abort(cache->cmd)) {
 		DMERR("%s: failed to abort metadata transaction", dev_name);
+		set_cache_mode(cache, CM_FAIL);
+	}
+
+	if (dm_cache_metadata_set_needs_check(cache->cmd)) {
+		DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name);
 		set_cache_mode(cache, CM_FAIL);
 	}
 }
@@ -1811,7 +1786,7 @@
 	bool commit_needed;
 
 	if (map_bio(cache, bio, get_bio_block(cache, bio), &commit_needed) == DM_MAPIO_REMAPPED)
-		generic_make_request(bio);
+		submit_bio_noacct(bio);
 
 	return commit_needed;
 }
@@ -1875,7 +1850,11 @@
 		b = to_dblock(from_dblock(b) + 1);
 	}
 
-	bio_endio(bio);
+	if (cache->features.discard_passdown) {
+		remap_to_origin(cache, bio);
+		submit_bio_noacct(bio);
+	} else
+		bio_endio(bio);
 
 	return false;
 }
@@ -1884,17 +1863,16 @@
 {
 	struct cache *cache = container_of(ws, struct cache, deferred_bio_worker);
 
-	unsigned long flags;
 	bool commit_needed = false;
 	struct bio_list bios;
 	struct bio *bio;
 
 	bio_list_init(&bios);
 
-	spin_lock_irqsave(&cache->lock, flags);
+	spin_lock_irq(&cache->lock);
 	bio_list_merge(&bios, &cache->deferred_bios);
 	bio_list_init(&cache->deferred_bios);
-	spin_unlock_irqrestore(&cache->lock, flags);
+	spin_unlock_irq(&cache->lock);
 
 	while ((bio = bio_list_pop(&bios))) {
 		if (bio->bi_opf & REQ_PREFLUSH)
@@ -1905,6 +1883,7 @@
 
 		else
 			commit_needed = process_bio(cache, bio) || commit_needed;
+		cond_resched();
 	}
 
 	if (commit_needed)
@@ -1927,6 +1906,7 @@
 	while ((bio = bio_list_pop(&bios))) {
 		bio->bi_status = BLK_STS_DM_REQUEUE;
 		bio_endio(bio);
+		cond_resched();
 	}
 }
 
@@ -1967,6 +1947,8 @@
 		r = mg_start(cache, op, NULL);
 		if (r)
 			break;
+
+		cond_resched();
 	}
 }
 
@@ -1987,6 +1969,7 @@
 	if (cache->prison)
 		dm_bio_prison_destroy_v2(cache->prison);
 
+	cancel_delayed_work_sync(&cache->waker);
 	if (cache->wq)
 		destroy_workqueue(cache->wq);
 
@@ -2209,13 +2192,14 @@
 	cf->mode = CM_WRITE;
 	cf->io_mode = CM_IO_WRITEBACK;
 	cf->metadata_version = 1;
+	cf->discard_passdown = true;
 }
 
 static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
 			  char **error)
 {
 	static const struct dm_arg _args[] = {
-		{0, 2, "Invalid number of cache feature arguments"},
+		{0, 3, "Invalid number of cache feature arguments"},
 	};
 
 	int r, mode_ctr = 0;
@@ -2249,6 +2233,9 @@
 
 		else if (!strcasecmp(arg, "metadata2"))
 			cf->metadata_version = 2;
+
+		else if (!strcasecmp(arg, "no_discard_passdown"))
+			cf->discard_passdown = false;
 
 		else {
 			*error = "Unrecognised cache feature requested";
@@ -2435,20 +2422,6 @@
 	cache->cache_size = size;
 }
 
-static int is_congested(struct dm_dev *dev, int bdi_bits)
-{
-	struct request_queue *q = bdev_get_queue(dev->bdev);
-	return bdi_congested(q->backing_dev_info, bdi_bits);
-}
-
-static int cache_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
-{
-	struct cache *cache = container_of(cb, struct cache, callbacks);
-
-	return is_congested(cache->origin_dev, bdi_bits) ||
-		is_congested(cache->cache_dev, bdi_bits);
-}
-
 #define DEFAULT_MIGRATION_THRESHOLD 2048
 
 static int cache_create(struct cache_args *ca, struct cache **result)
@@ -2472,7 +2445,6 @@
 
 	ti->num_discard_bios = 1;
 	ti->discards_supported = true;
-	ti->split_discard_bios = false;
 
 	ti->per_io_data_size = sizeof(struct per_bio_data);
 
@@ -2483,9 +2455,6 @@
 		if (r)
 			goto bad;
 	}
-
-	cache->callbacks.congested_fn = cache_is_congested;
-	dm_table_add_target_callbacks(ti->table, &cache->callbacks);
 
 	cache->metadata_dev = ca->metadata_dev;
 	cache->origin_dev = ca->origin_dev;
@@ -3096,6 +3065,39 @@
 	do_waker(&cache->waker.work);
 }
 
+static void emit_flags(struct cache *cache, char *result,
+		       unsigned maxlen, ssize_t *sz_ptr)
+{
+	ssize_t sz = *sz_ptr;
+	struct cache_features *cf = &cache->features;
+	unsigned count = (cf->metadata_version == 2) + !cf->discard_passdown + 1;
+
+	DMEMIT("%u ", count);
+
+	if (cf->metadata_version == 2)
+		DMEMIT("metadata2 ");
+
+	if (writethrough_mode(cache))
+		DMEMIT("writethrough ");
+
+	else if (passthrough_mode(cache))
+		DMEMIT("passthrough ");
+
+	else if (writeback_mode(cache))
+		DMEMIT("writeback ");
+
+	else {
+		DMEMIT("unknown ");
+		DMERR("%s: internal error: unknown io mode: %d",
+		      cache_device_name(cache), (int) cf->io_mode);
+	}
+
+	if (!cf->discard_passdown)
+		DMEMIT("no_discard_passdown ");
+
+	*sz_ptr = sz;
+}
+
 /*
  * Status format:
  *
@@ -3162,25 +3164,7 @@
 		       (unsigned) atomic_read(&cache->stats.promotion),
 		       (unsigned long) atomic_read(&cache->nr_dirty));
 
-		if (cache->features.metadata_version == 2)
-			DMEMIT("2 metadata2 ");
-		else
-			DMEMIT("1 ");
-
-		if (writethrough_mode(cache))
-			DMEMIT("writethrough ");
-
-		else if (passthrough_mode(cache))
-			DMEMIT("passthrough ");
-
-		else if (writeback_mode(cache))
-			DMEMIT("writeback ");
-
-		else {
-			DMERR("%s: internal error: unknown io mode: %d",
-			      cache_device_name(cache), (int) cache->features.io_mode);
-			goto err;
-		}
+		emit_flags(cache, result, maxlen, &sz);
 
 		DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold);
 
@@ -3409,14 +3393,62 @@
 	return r;
 }
 
+static bool origin_dev_supports_discard(struct block_device *origin_bdev)
+{
+	struct request_queue *q = bdev_get_queue(origin_bdev);
+
+	return q && blk_queue_discard(q);
+}
+
+/*
+ * If discard_passdown was enabled verify that the origin device
+ * supports discards.  Disable discard_passdown if not.
+ */
+static void disable_passdown_if_not_supported(struct cache *cache)
+{
+	struct block_device *origin_bdev = cache->origin_dev->bdev;
+	struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits;
+	const char *reason = NULL;
+	char buf[BDEVNAME_SIZE];
+
+	if (!cache->features.discard_passdown)
+		return;
+
+	if (!origin_dev_supports_discard(origin_bdev))
+		reason = "discard unsupported";
+
+	else if (origin_limits->max_discard_sectors < cache->sectors_per_block)
+		reason = "max discard sectors smaller than a block";
+
+	if (reason) {
+		DMWARN("Origin device (%s) %s: Disabling discard passdown.",
+		       bdevname(origin_bdev, buf), reason);
+		cache->features.discard_passdown = false;
+	}
+}
+
 static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
 {
+	struct block_device *origin_bdev = cache->origin_dev->bdev;
+	struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits;
+
+	if (!cache->features.discard_passdown) {
+		/* No passdown is done so setting own virtual limits */
+		limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024,
+						    cache->origin_sectors);
+		limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT;
+		return;
+	}
+
 	/*
-	 * FIXME: these limits may be incompatible with the cache device
+	 * cache_iterate_devices() is stacking both origin and fast device limits
+	 * but discards aren't passed to fast device, so inherit origin's limits.
 	 */
-	limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024,
-					    cache->origin_sectors);
-	limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT;
+	limits->max_discard_sectors = origin_limits->max_discard_sectors;
+	limits->max_hw_discard_sectors = origin_limits->max_hw_discard_sectors;
+	limits->discard_granularity = origin_limits->discard_granularity;
+	limits->discard_alignment = origin_limits->discard_alignment;
+	limits->discard_misaligned = origin_limits->discard_misaligned;
 }
 
 static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
@@ -3433,6 +3465,8 @@
 		blk_limits_io_min(limits, cache->sectors_per_block << SECTOR_SHIFT);
 		blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT);
 	}
+
+	disable_passdown_if_not_supported(cache);
 	set_discard_limits(cache, limits);
 }
 
@@ -3440,7 +3474,7 @@
 
 static struct target_type cache_target = {
 	.name = "cache",
-	.version = {2, 0, 0},
+	.version = {2, 2, 0},
 	.module = THIS_MODULE,
 	.ctr = cache_ctr,
 	.dtr = cache_dtr,

--
Gitblit v1.6.2