hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/drivers/md/dm-zoned-reclaim.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Copyright (C) 2017 Western Digital Corporation or its affiliates.
34 *
....@@ -12,7 +13,6 @@
1213
1314 struct dmz_reclaim {
1415 struct dmz_metadata *metadata;
15
- struct dmz_dev *dev;
1616
1717 struct delayed_work work;
1818 struct workqueue_struct *wq;
....@@ -20,6 +20,8 @@
2020 struct dm_kcopyd_client *kc;
2121 struct dm_kcopyd_throttle kc_throttle;
2222 int kc_err;
23
+
24
+ int dev_idx;
2325
2426 unsigned long flags;
2527
....@@ -43,13 +45,13 @@
4345 * Percentage of unmapped (free) random zones below which reclaim starts
4446 * even if the target is busy.
4547 */
46
-#define DMZ_RECLAIM_LOW_UNMAP_RND 30
48
+#define DMZ_RECLAIM_LOW_UNMAP_ZONES 30
4749
4850 /*
4951 * Percentage of unmapped (free) random zones above which reclaim will
5052 * stop if the target is busy.
5153 */
52
-#define DMZ_RECLAIM_HIGH_UNMAP_RND 50
54
+#define DMZ_RECLAIM_HIGH_UNMAP_ZONES 50
5355
5456 /*
5557 * Align a sequential zone write pointer to chunk_block.
....@@ -58,6 +60,7 @@
5860 sector_t block)
5961 {
6062 struct dmz_metadata *zmd = zrc->metadata;
63
+ struct dmz_dev *dev = zone->dev;
6164 sector_t wp_block = zone->wp_block;
6265 unsigned int nr_blocks;
6366 int ret;
....@@ -73,15 +76,15 @@
7376 * pointer and the requested position.
7477 */
7578 nr_blocks = block - wp_block;
76
- ret = blkdev_issue_zeroout(zrc->dev->bdev,
79
+ ret = blkdev_issue_zeroout(dev->bdev,
7780 dmz_start_sect(zmd, zone) + dmz_blk2sect(wp_block),
7881 dmz_blk2sect(nr_blocks), GFP_NOIO, 0);
7982 if (ret) {
80
- dmz_dev_err(zrc->dev,
83
+ dmz_dev_err(dev,
8184 "Align zone %u wp %llu to %llu (wp+%u) blocks failed %d",
82
- dmz_id(zmd, zone), (unsigned long long)wp_block,
85
+ zone->id, (unsigned long long)wp_block,
8386 (unsigned long long)block, nr_blocks, ret);
84
- dmz_check_bdev(zrc->dev);
87
+ dmz_check_bdev(dev);
8588 return ret;
8689 }
8790
....@@ -115,7 +118,6 @@
115118 struct dm_zone *src_zone, struct dm_zone *dst_zone)
116119 {
117120 struct dmz_metadata *zmd = zrc->metadata;
118
- struct dmz_dev *dev = zrc->dev;
119121 struct dm_io_region src, dst;
120122 sector_t block = 0, end_block;
121123 sector_t nr_blocks;
....@@ -127,7 +129,7 @@
127129 if (dmz_is_seq(src_zone))
128130 end_block = src_zone->wp_block;
129131 else
130
- end_block = dev->zone_nr_blocks;
132
+ end_block = dmz_zone_nr_blocks(zmd);
131133 src_zone_block = dmz_start_block(zmd, src_zone);
132134 dst_zone_block = dmz_start_block(zmd, dst_zone);
133135
....@@ -135,8 +137,13 @@
135137 set_bit(DM_KCOPYD_WRITE_SEQ, &flags);
136138
137139 while (block < end_block) {
138
- if (dev->flags & DMZ_BDEV_DYING)
140
+ if (src_zone->dev->flags & DMZ_BDEV_DYING)
139141 return -EIO;
142
+ if (dst_zone->dev->flags & DMZ_BDEV_DYING)
143
+ return -EIO;
144
+
145
+ if (dmz_reclaim_should_terminate(src_zone))
146
+ return -EINTR;
140147
141148 /* Get a valid region from the source zone */
142149 ret = dmz_first_valid_block(zmd, src_zone, &block);
....@@ -155,11 +162,11 @@
155162 return ret;
156163 }
157164
158
- src.bdev = dev->bdev;
165
+ src.bdev = src_zone->dev->bdev;
159166 src.sector = dmz_blk2sect(src_zone_block + block);
160167 src.count = dmz_blk2sect(nr_blocks);
161168
162
- dst.bdev = dev->bdev;
169
+ dst.bdev = dst_zone->dev->bdev;
163170 dst.sector = dmz_blk2sect(dst_zone_block + block);
164171 dst.count = src.count;
165172
....@@ -193,10 +200,10 @@
193200 struct dmz_metadata *zmd = zrc->metadata;
194201 int ret;
195202
196
- dmz_dev_debug(zrc->dev,
197
- "Chunk %u, move buf zone %u (weight %u) to data zone %u (weight %u)",
198
- dzone->chunk, dmz_id(zmd, bzone), dmz_weight(bzone),
199
- dmz_id(zmd, dzone), dmz_weight(dzone));
203
+ DMDEBUG("(%s/%u): Chunk %u, move buf zone %u (weight %u) to data zone %u (weight %u)",
204
+ dmz_metadata_label(zmd), zrc->dev_idx,
205
+ dzone->chunk, bzone->id, dmz_weight(bzone),
206
+ dzone->id, dmz_weight(dzone));
200207
201208 /* Flush data zone into the buffer zone */
202209 ret = dmz_reclaim_copy(zrc, bzone, dzone);
....@@ -209,7 +216,7 @@
209216 ret = dmz_merge_valid_blocks(zmd, bzone, dzone, chunk_block);
210217 if (ret == 0) {
211218 /* Free the buffer zone */
212
- dmz_invalidate_blocks(zmd, bzone, 0, zrc->dev->zone_nr_blocks);
219
+ dmz_invalidate_blocks(zmd, bzone, 0, dmz_zone_nr_blocks(zmd));
213220 dmz_lock_map(zmd);
214221 dmz_unmap_zone(zmd, bzone);
215222 dmz_unlock_zone_reclaim(dzone);
....@@ -232,10 +239,10 @@
232239 struct dmz_metadata *zmd = zrc->metadata;
233240 int ret = 0;
234241
235
- dmz_dev_debug(zrc->dev,
236
- "Chunk %u, move data zone %u (weight %u) to buf zone %u (weight %u)",
237
- chunk, dmz_id(zmd, dzone), dmz_weight(dzone),
238
- dmz_id(zmd, bzone), dmz_weight(bzone));
242
+ DMDEBUG("(%s/%u): Chunk %u, move data zone %u (weight %u) to buf zone %u (weight %u)",
243
+ dmz_metadata_label(zmd), zrc->dev_idx,
244
+ chunk, dzone->id, dmz_weight(dzone),
245
+ bzone->id, dmz_weight(bzone));
239246
240247 /* Flush data zone into the buffer zone */
241248 ret = dmz_reclaim_copy(zrc, dzone, bzone);
....@@ -251,7 +258,7 @@
251258 * Free the data zone and remap the chunk to
252259 * the buffer zone.
253260 */
254
- dmz_invalidate_blocks(zmd, dzone, 0, zrc->dev->zone_nr_blocks);
261
+ dmz_invalidate_blocks(zmd, dzone, 0, dmz_zone_nr_blocks(zmd));
255262 dmz_lock_map(zmd);
256263 dmz_unmap_zone(zmd, bzone);
257264 dmz_unmap_zone(zmd, dzone);
....@@ -276,18 +283,26 @@
276283 struct dm_zone *szone = NULL;
277284 struct dmz_metadata *zmd = zrc->metadata;
278285 int ret;
286
+ int alloc_flags = DMZ_ALLOC_SEQ;
279287
280
- /* Get a free sequential zone */
288
+ /* Get a free random or sequential zone */
281289 dmz_lock_map(zmd);
282
- szone = dmz_alloc_zone(zmd, DMZ_ALLOC_RECLAIM);
290
+again:
291
+ szone = dmz_alloc_zone(zmd, zrc->dev_idx,
292
+ alloc_flags | DMZ_ALLOC_RECLAIM);
293
+ if (!szone && alloc_flags == DMZ_ALLOC_SEQ && dmz_nr_cache_zones(zmd)) {
294
+ alloc_flags = DMZ_ALLOC_RND;
295
+ goto again;
296
+ }
283297 dmz_unlock_map(zmd);
284298 if (!szone)
285299 return -ENOSPC;
286300
287
- dmz_dev_debug(zrc->dev,
288
- "Chunk %u, move rnd zone %u (weight %u) to seq zone %u",
289
- chunk, dmz_id(zmd, dzone), dmz_weight(dzone),
290
- dmz_id(zmd, szone));
301
+ DMDEBUG("(%s/%u): Chunk %u, move %s zone %u (weight %u) to %s zone %u",
302
+ dmz_metadata_label(zmd), zrc->dev_idx, chunk,
303
+ dmz_is_cache(dzone) ? "cache" : "rnd",
304
+ dzone->id, dmz_weight(dzone),
305
+ dmz_is_rnd(szone) ? "rnd" : "seq", szone->id);
291306
292307 /* Flush the random data zone into the sequential zone */
293308 ret = dmz_reclaim_copy(zrc, dzone, szone);
....@@ -305,7 +320,7 @@
305320 dmz_unlock_map(zmd);
306321 } else {
307322 /* Free the data zone and remap the chunk */
308
- dmz_invalidate_blocks(zmd, dzone, 0, zrc->dev->zone_nr_blocks);
323
+ dmz_invalidate_blocks(zmd, dzone, 0, dmz_zone_nr_blocks(zmd));
309324 dmz_lock_map(zmd);
310325 dmz_unmap_zone(zmd, dzone);
311326 dmz_unlock_zone_reclaim(dzone);
....@@ -336,6 +351,14 @@
336351 }
337352
338353 /*
354
+ * Test if the target device is idle.
355
+ */
356
+static inline int dmz_target_idle(struct dmz_reclaim *zrc)
357
+{
358
+ return time_is_before_jiffies(zrc->atime + DMZ_IDLE_PERIOD);
359
+}
360
+
361
+/*
339362 * Find a candidate zone for reclaim and process it.
340363 */
341364 static int dmz_do_reclaim(struct dmz_reclaim *zrc)
....@@ -347,13 +370,17 @@
347370 int ret;
348371
349372 /* Get a data zone */
350
- dzone = dmz_get_zone_for_reclaim(zmd);
351
- if (!dzone)
373
+ dzone = dmz_get_zone_for_reclaim(zmd, zrc->dev_idx,
374
+ dmz_target_idle(zrc));
375
+ if (!dzone) {
376
+ DMDEBUG("(%s/%u): No zone found to reclaim",
377
+ dmz_metadata_label(zmd), zrc->dev_idx);
352378 return -EBUSY;
379
+ }
380
+ rzone = dzone;
353381
354382 start = jiffies;
355
-
356
- if (dmz_is_rnd(dzone)) {
383
+ if (dmz_is_cache(dzone) || dmz_is_rnd(dzone)) {
357384 if (!dmz_weight(dzone)) {
358385 /* Empty zone */
359386 dmz_reclaim_empty(zrc, dzone);
....@@ -365,8 +392,6 @@
365392 */
366393 ret = dmz_reclaim_rnd_data(zrc, dzone);
367394 }
368
- rzone = dzone;
369
-
370395 } else {
371396 struct dm_zone *bzone = dzone->bzone;
372397 sector_t chunk_block = 0;
....@@ -389,59 +414,86 @@
389414 * be later reclaimed.
390415 */
391416 ret = dmz_reclaim_seq_data(zrc, dzone);
392
- rzone = dzone;
393417 }
394418 }
395419 out:
396420 if (ret) {
421
+ if (ret == -EINTR)
422
+ DMDEBUG("(%s/%u): reclaim zone %u interrupted",
423
+ dmz_metadata_label(zmd), zrc->dev_idx,
424
+ rzone->id);
425
+ else
426
+ DMDEBUG("(%s/%u): Failed to reclaim zone %u, err %d",
427
+ dmz_metadata_label(zmd), zrc->dev_idx,
428
+ rzone->id, ret);
397429 dmz_unlock_zone_reclaim(dzone);
398430 return ret;
399431 }
400432
401433 ret = dmz_flush_metadata(zrc->metadata);
402434 if (ret) {
403
- dmz_dev_debug(zrc->dev,
404
- "Metadata flush for zone %u failed, err %d\n",
405
- dmz_id(zmd, rzone), ret);
435
+ DMDEBUG("(%s/%u): Metadata flush for zone %u failed, err %d",
436
+ dmz_metadata_label(zmd), zrc->dev_idx, rzone->id, ret);
406437 return ret;
407438 }
408439
409
- dmz_dev_debug(zrc->dev, "Reclaimed zone %u in %u ms",
410
- dmz_id(zmd, rzone), jiffies_to_msecs(jiffies - start));
440
+ DMDEBUG("(%s/%u): Reclaimed zone %u in %u ms",
441
+ dmz_metadata_label(zmd), zrc->dev_idx,
442
+ rzone->id, jiffies_to_msecs(jiffies - start));
411443 return 0;
412444 }
413445
414
-/*
415
- * Test if the target device is idle.
416
- */
417
-static inline int dmz_target_idle(struct dmz_reclaim *zrc)
446
+static unsigned int dmz_reclaim_percentage(struct dmz_reclaim *zrc)
418447 {
419
- return time_is_before_jiffies(zrc->atime + DMZ_IDLE_PERIOD);
448
+ struct dmz_metadata *zmd = zrc->metadata;
449
+ unsigned int nr_cache = dmz_nr_cache_zones(zmd);
450
+ unsigned int nr_unmap, nr_zones;
451
+
452
+ if (nr_cache) {
453
+ nr_zones = nr_cache;
454
+ nr_unmap = dmz_nr_unmap_cache_zones(zmd);
455
+ } else {
456
+ nr_zones = dmz_nr_rnd_zones(zmd, zrc->dev_idx);
457
+ nr_unmap = dmz_nr_unmap_rnd_zones(zmd, zrc->dev_idx);
458
+ }
459
+ if (nr_unmap <= 1)
460
+ return 0;
461
+ return nr_unmap * 100 / nr_zones;
420462 }
421463
422464 /*
423465 * Test if reclaim is necessary.
424466 */
425
-static bool dmz_should_reclaim(struct dmz_reclaim *zrc)
467
+static bool dmz_should_reclaim(struct dmz_reclaim *zrc, unsigned int p_unmap)
426468 {
427
- struct dmz_metadata *zmd = zrc->metadata;
428
- unsigned int nr_rnd = dmz_nr_rnd_zones(zmd);
429
- unsigned int nr_unmap_rnd = dmz_nr_unmap_rnd_zones(zmd);
430
- unsigned int p_unmap_rnd = nr_unmap_rnd * 100 / nr_rnd;
469
+ unsigned int nr_reclaim;
470
+
471
+ nr_reclaim = dmz_nr_rnd_zones(zrc->metadata, zrc->dev_idx);
472
+
473
+ if (dmz_nr_cache_zones(zrc->metadata)) {
474
+ /*
475
+ * The first device in a multi-device
476
+ * setup only contains cache zones, so
477
+ * never start reclaim there.
478
+ */
479
+ if (zrc->dev_idx == 0)
480
+ return false;
481
+ nr_reclaim += dmz_nr_cache_zones(zrc->metadata);
482
+ }
431483
432484 /* Reclaim when idle */
433
- if (dmz_target_idle(zrc) && nr_unmap_rnd < nr_rnd)
485
+ if (dmz_target_idle(zrc) && nr_reclaim)
434486 return true;
435487
436
- /* If there are still plenty of random zones, do not reclaim */
437
- if (p_unmap_rnd >= DMZ_RECLAIM_HIGH_UNMAP_RND)
488
+ /* If there are still plenty of cache zones, do not reclaim */
489
+ if (p_unmap >= DMZ_RECLAIM_HIGH_UNMAP_ZONES)
438490 return false;
439491
440492 /*
441
- * If the percentage of unmappped random zones is low,
493
+ * If the percentage of unmapped cache zones is low,
442494 * reclaim even if the target is busy.
443495 */
444
- return p_unmap_rnd <= DMZ_RECLAIM_LOW_UNMAP_RND;
496
+ return p_unmap <= DMZ_RECLAIM_LOW_UNMAP_ZONES;
445497 }
446498
447499 /*
....@@ -451,14 +503,14 @@
451503 {
452504 struct dmz_reclaim *zrc = container_of(work, struct dmz_reclaim, work.work);
453505 struct dmz_metadata *zmd = zrc->metadata;
454
- unsigned int nr_rnd, nr_unmap_rnd;
455
- unsigned int p_unmap_rnd;
506
+ unsigned int p_unmap;
456507 int ret;
457508
458
- if (dmz_bdev_is_dying(zrc->dev))
509
+ if (dmz_dev_is_dying(zmd))
459510 return;
460511
461
- if (!dmz_should_reclaim(zrc)) {
512
+ p_unmap = dmz_reclaim_percentage(zrc);
513
+ if (!dmz_should_reclaim(zrc, p_unmap)) {
462514 mod_delayed_work(zrc->wq, &zrc->work, DMZ_IDLE_PERIOD);
463515 return;
464516 }
....@@ -469,27 +521,26 @@
469521 * and slower if there are still some free random zones to avoid
470522 * as much as possible to negatively impact the user workload.
471523 */
472
- nr_rnd = dmz_nr_rnd_zones(zmd);
473
- nr_unmap_rnd = dmz_nr_unmap_rnd_zones(zmd);
474
- p_unmap_rnd = nr_unmap_rnd * 100 / nr_rnd;
475
- if (dmz_target_idle(zrc) || p_unmap_rnd < DMZ_RECLAIM_LOW_UNMAP_RND / 2) {
524
+ if (dmz_target_idle(zrc) || p_unmap < DMZ_RECLAIM_LOW_UNMAP_ZONES / 2) {
476525 /* Idle or very low percentage: go fast */
477526 zrc->kc_throttle.throttle = 100;
478527 } else {
479528 /* Busy but we still have some random zone: throttle */
480
- zrc->kc_throttle.throttle = min(75U, 100U - p_unmap_rnd / 2);
529
+ zrc->kc_throttle.throttle = min(75U, 100U - p_unmap / 2);
481530 }
482531
483
- dmz_dev_debug(zrc->dev,
484
- "Reclaim (%u): %s, %u%% free rnd zones (%u/%u)",
485
- zrc->kc_throttle.throttle,
486
- (dmz_target_idle(zrc) ? "Idle" : "Busy"),
487
- p_unmap_rnd, nr_unmap_rnd, nr_rnd);
532
+ DMDEBUG("(%s/%u): Reclaim (%u): %s, %u%% free zones (%u/%u cache %u/%u random)",
533
+ dmz_metadata_label(zmd), zrc->dev_idx,
534
+ zrc->kc_throttle.throttle,
535
+ (dmz_target_idle(zrc) ? "Idle" : "Busy"),
536
+ p_unmap, dmz_nr_unmap_cache_zones(zmd),
537
+ dmz_nr_cache_zones(zmd),
538
+ dmz_nr_unmap_rnd_zones(zmd, zrc->dev_idx),
539
+ dmz_nr_rnd_zones(zmd, zrc->dev_idx));
488540
489541 ret = dmz_do_reclaim(zrc);
490
- if (ret) {
491
- dmz_dev_debug(zrc->dev, "Reclaim error %d\n", ret);
492
- if (!dmz_check_bdev(zrc->dev))
542
+ if (ret && ret != -EINTR) {
543
+ if (!dmz_check_dev(zmd))
493544 return;
494545 }
495546
....@@ -499,8 +550,8 @@
499550 /*
500551 * Initialize reclaim.
501552 */
502
-int dmz_ctr_reclaim(struct dmz_dev *dev, struct dmz_metadata *zmd,
503
- struct dmz_reclaim **reclaim)
553
+int dmz_ctr_reclaim(struct dmz_metadata *zmd,
554
+ struct dmz_reclaim **reclaim, int idx)
504555 {
505556 struct dmz_reclaim *zrc;
506557 int ret;
....@@ -509,9 +560,9 @@
509560 if (!zrc)
510561 return -ENOMEM;
511562
512
- zrc->dev = dev;
513563 zrc->metadata = zmd;
514564 zrc->atime = jiffies;
565
+ zrc->dev_idx = idx;
515566
516567 /* Reclaim kcopyd client */
517568 zrc->kc = dm_kcopyd_client_create(&zrc->kc_throttle);
....@@ -523,8 +574,8 @@
523574
524575 /* Reclaim work */
525576 INIT_DELAYED_WORK(&zrc->work, dmz_reclaim_work);
526
- zrc->wq = alloc_ordered_workqueue("dmz_rwq_%s", WQ_MEM_RECLAIM,
527
- dev->name);
577
+ zrc->wq = alloc_ordered_workqueue("dmz_rwq_%s_%d", WQ_MEM_RECLAIM,
578
+ dmz_metadata_label(zmd), idx);
528579 if (!zrc->wq) {
529580 ret = -ENOMEM;
530581 goto err;
....@@ -582,7 +633,8 @@
582633 */
583634 void dmz_schedule_reclaim(struct dmz_reclaim *zrc)
584635 {
585
- if (dmz_should_reclaim(zrc))
636
+ unsigned int p_unmap = dmz_reclaim_percentage(zrc);
637
+
638
+ if (dmz_should_reclaim(zrc, p_unmap))
586639 mod_delayed_work(zrc->wq, &zrc->work, 0);
587640 }
588
-