| .. | .. |
|---|
| 26 | 26 | #include <linux/wait.h> |
|---|
| 27 | 27 | #include <linux/pr.h> |
|---|
| 28 | 28 | #include <linux/refcount.h> |
|---|
| 29 | +#include <linux/part_stat.h> |
|---|
| 29 | 30 | #include <linux/blk-crypto.h> |
|---|
| 30 | 31 | #include <linux/keyslot-manager.h> |
|---|
| 31 | 32 | |
|---|
| .. | .. |
|---|
| 148 | 149 | #define DM_NUMA_NODE NUMA_NO_NODE |
|---|
| 149 | 150 | static int dm_numa_node = DM_NUMA_NODE; |
|---|
| 150 | 151 | |
|---|
| 152 | +#define DEFAULT_SWAP_BIOS (8 * 1048576 / PAGE_SIZE) |
|---|
| 153 | +static int swap_bios = DEFAULT_SWAP_BIOS; |
|---|
| 154 | +static int get_swap_bios(void) |
|---|
| 155 | +{ |
|---|
| 156 | + int latch = READ_ONCE(swap_bios); |
|---|
| 157 | + if (unlikely(latch <= 0)) |
|---|
| 158 | + latch = DEFAULT_SWAP_BIOS; |
|---|
| 159 | + return latch; |
|---|
| 160 | +} |
|---|
| 161 | + |
|---|
| 151 | 162 | /* |
|---|
| 152 | 163 | * For mempools pre-allocation at the table loading time. |
|---|
| 153 | 164 | */ |
|---|
| .. | .. |
|---|
| 161 | 172 | refcount_t count; |
|---|
| 162 | 173 | struct dm_dev dm_dev; |
|---|
| 163 | 174 | }; |
|---|
| 164 | | - |
|---|
| 165 | | -static struct kmem_cache *_rq_tio_cache; |
|---|
| 166 | | -static struct kmem_cache *_rq_cache; |
|---|
| 167 | 175 | |
|---|
| 168 | 176 | /* |
|---|
| 169 | 177 | * Bio-based DM's mempools' reserved IOs set by the user. |
|---|
| .. | .. |
|---|
| 226 | 234 | |
|---|
| 227 | 235 | static int __init local_init(void) |
|---|
| 228 | 236 | { |
|---|
| 229 | | - int r = -ENOMEM; |
|---|
| 230 | | - |
|---|
| 231 | | - _rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0); |
|---|
| 232 | | - if (!_rq_tio_cache) |
|---|
| 233 | | - return r; |
|---|
| 234 | | - |
|---|
| 235 | | - _rq_cache = kmem_cache_create("dm_old_clone_request", sizeof(struct request), |
|---|
| 236 | | - __alignof__(struct request), 0, NULL); |
|---|
| 237 | | - if (!_rq_cache) |
|---|
| 238 | | - goto out_free_rq_tio_cache; |
|---|
| 237 | + int r; |
|---|
| 239 | 238 | |
|---|
| 240 | 239 | r = dm_uevent_init(); |
|---|
| 241 | 240 | if (r) |
|---|
| 242 | | - goto out_free_rq_cache; |
|---|
| 241 | + return r; |
|---|
| 243 | 242 | |
|---|
| 244 | 243 | deferred_remove_workqueue = alloc_workqueue("kdmremove", WQ_UNBOUND, 1); |
|---|
| 245 | 244 | if (!deferred_remove_workqueue) { |
|---|
| .. | .. |
|---|
| 261 | 260 | destroy_workqueue(deferred_remove_workqueue); |
|---|
| 262 | 261 | out_uevent_exit: |
|---|
| 263 | 262 | dm_uevent_exit(); |
|---|
| 264 | | -out_free_rq_cache: |
|---|
| 265 | | - kmem_cache_destroy(_rq_cache); |
|---|
| 266 | | -out_free_rq_tio_cache: |
|---|
| 267 | | - kmem_cache_destroy(_rq_tio_cache); |
|---|
| 268 | 263 | |
|---|
| 269 | 264 | return r; |
|---|
| 270 | 265 | } |
|---|
| 271 | 266 | |
|---|
| 272 | 267 | static void local_exit(void) |
|---|
| 273 | 268 | { |
|---|
| 274 | | - flush_scheduled_work(); |
|---|
| 275 | 269 | destroy_workqueue(deferred_remove_workqueue); |
|---|
| 276 | 270 | |
|---|
| 277 | | - kmem_cache_destroy(_rq_cache); |
|---|
| 278 | | - kmem_cache_destroy(_rq_tio_cache); |
|---|
| 279 | 271 | unregister_blkdev(_major, _name); |
|---|
| 280 | 272 | dm_uevent_exit(); |
|---|
| 281 | 273 | |
|---|
| .. | .. |
|---|
| 440 | 432 | dm_deferred_remove(); |
|---|
| 441 | 433 | } |
|---|
| 442 | 434 | |
|---|
| 443 | | -sector_t dm_get_size(struct mapped_device *md) |
|---|
| 444 | | -{ |
|---|
| 445 | | - return get_capacity(md->disk); |
|---|
| 446 | | -} |
|---|
| 447 | | - |
|---|
| 448 | | -struct request_queue *dm_get_md_queue(struct mapped_device *md) |
|---|
| 449 | | -{ |
|---|
| 450 | | - return md->queue; |
|---|
| 451 | | -} |
|---|
| 452 | | - |
|---|
| 453 | | -struct dm_stats *dm_get_stats(struct mapped_device *md) |
|---|
| 454 | | -{ |
|---|
| 455 | | - return &md->stats; |
|---|
| 456 | | -} |
|---|
| 457 | | - |
|---|
| 458 | 435 | static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo) |
|---|
| 459 | 436 | { |
|---|
| 460 | 437 | struct mapped_device *md = bdev->bd_disk->private_data; |
|---|
| 461 | 438 | |
|---|
| 462 | 439 | return dm_get_geometry(md, geo); |
|---|
| 463 | 440 | } |
|---|
| 441 | + |
|---|
| 442 | +#ifdef CONFIG_BLK_DEV_ZONED |
|---|
| 443 | +int dm_report_zones_cb(struct blk_zone *zone, unsigned int idx, void *data) |
|---|
| 444 | +{ |
|---|
| 445 | + struct dm_report_zones_args *args = data; |
|---|
| 446 | + sector_t sector_diff = args->tgt->begin - args->start; |
|---|
| 447 | + |
|---|
| 448 | + /* |
|---|
| 449 | + * Ignore zones beyond the target range. |
|---|
| 450 | + */ |
|---|
| 451 | + if (zone->start >= args->start + args->tgt->len) |
|---|
| 452 | + return 0; |
|---|
| 453 | + |
|---|
| 454 | + /* |
|---|
| 455 | + * Remap the start sector and write pointer position of the zone |
|---|
| 456 | + * to match its position in the target range. |
|---|
| 457 | + */ |
|---|
| 458 | + zone->start += sector_diff; |
|---|
| 459 | + if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) { |
|---|
| 460 | + if (zone->cond == BLK_ZONE_COND_FULL) |
|---|
| 461 | + zone->wp = zone->start + zone->len; |
|---|
| 462 | + else if (zone->cond == BLK_ZONE_COND_EMPTY) |
|---|
| 463 | + zone->wp = zone->start; |
|---|
| 464 | + else |
|---|
| 465 | + zone->wp += sector_diff; |
|---|
| 466 | + } |
|---|
| 467 | + |
|---|
| 468 | + args->next_sector = zone->start + zone->len; |
|---|
| 469 | + return args->orig_cb(zone, args->zone_idx++, args->orig_data); |
|---|
| 470 | +} |
|---|
| 471 | +EXPORT_SYMBOL_GPL(dm_report_zones_cb); |
|---|
| 472 | + |
|---|
| 473 | +static int dm_blk_report_zones(struct gendisk *disk, sector_t sector, |
|---|
| 474 | + unsigned int nr_zones, report_zones_cb cb, void *data) |
|---|
| 475 | +{ |
|---|
| 476 | + struct mapped_device *md = disk->private_data; |
|---|
| 477 | + struct dm_table *map; |
|---|
| 478 | + int srcu_idx, ret; |
|---|
| 479 | + struct dm_report_zones_args args = { |
|---|
| 480 | + .next_sector = sector, |
|---|
| 481 | + .orig_data = data, |
|---|
| 482 | + .orig_cb = cb, |
|---|
| 483 | + }; |
|---|
| 484 | + |
|---|
| 485 | + if (dm_suspended_md(md)) |
|---|
| 486 | + return -EAGAIN; |
|---|
| 487 | + |
|---|
| 488 | + map = dm_get_live_table(md, &srcu_idx); |
|---|
| 489 | + if (!map) { |
|---|
| 490 | + ret = -EIO; |
|---|
| 491 | + goto out; |
|---|
| 492 | + } |
|---|
| 493 | + |
|---|
| 494 | + do { |
|---|
| 495 | + struct dm_target *tgt; |
|---|
| 496 | + |
|---|
| 497 | + tgt = dm_table_find_target(map, args.next_sector); |
|---|
| 498 | + if (WARN_ON_ONCE(!tgt->type->report_zones)) { |
|---|
| 499 | + ret = -EIO; |
|---|
| 500 | + goto out; |
|---|
| 501 | + } |
|---|
| 502 | + |
|---|
| 503 | + args.tgt = tgt; |
|---|
| 504 | + ret = tgt->type->report_zones(tgt, &args, |
|---|
| 505 | + nr_zones - args.zone_idx); |
|---|
| 506 | + if (ret < 0) |
|---|
| 507 | + goto out; |
|---|
| 508 | + } while (args.zone_idx < nr_zones && |
|---|
| 509 | + args.next_sector < get_capacity(disk)); |
|---|
| 510 | + |
|---|
| 511 | + ret = args.zone_idx; |
|---|
| 512 | +out: |
|---|
| 513 | + dm_put_live_table(md, srcu_idx); |
|---|
| 514 | + return ret; |
|---|
| 515 | +} |
|---|
| 516 | +#else |
|---|
| 517 | +#define dm_blk_report_zones NULL |
|---|
| 518 | +#endif /* CONFIG_BLK_DEV_ZONED */ |
|---|
| 464 | 519 | |
|---|
| 465 | 520 | static int dm_prepare_ioctl(struct mapped_device *md, int *srcu_idx, |
|---|
| 466 | 521 | struct block_device **bdev) |
|---|
| .. | .. |
|---|
| 531 | 586 | return r; |
|---|
| 532 | 587 | } |
|---|
| 533 | 588 | |
|---|
| 534 | | -static void start_io_acct(struct dm_io *io); |
|---|
| 589 | +u64 dm_start_time_ns_from_clone(struct bio *bio) |
|---|
| 590 | +{ |
|---|
| 591 | + struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone); |
|---|
| 592 | + struct dm_io *io = tio->io; |
|---|
| 593 | + |
|---|
| 594 | + return jiffies_to_nsecs(io->start_time); |
|---|
| 595 | +} |
|---|
| 596 | +EXPORT_SYMBOL_GPL(dm_start_time_ns_from_clone); |
|---|
| 597 | + |
|---|
| 598 | +static void start_io_acct(struct dm_io *io) |
|---|
| 599 | +{ |
|---|
| 600 | + struct mapped_device *md = io->md; |
|---|
| 601 | + struct bio *bio = io->orig_bio; |
|---|
| 602 | + |
|---|
| 603 | + io->start_time = bio_start_io_acct(bio); |
|---|
| 604 | + if (unlikely(dm_stats_used(&md->stats))) |
|---|
| 605 | + dm_stats_account_io(&md->stats, bio_data_dir(bio), |
|---|
| 606 | + bio->bi_iter.bi_sector, bio_sectors(bio), |
|---|
| 607 | + false, 0, &io->stats_aux); |
|---|
| 608 | +} |
|---|
| 609 | + |
|---|
| 610 | +static void end_io_acct(struct mapped_device *md, struct bio *bio, |
|---|
| 611 | + unsigned long start_time, struct dm_stats_aux *stats_aux) |
|---|
| 612 | +{ |
|---|
| 613 | + unsigned long duration = jiffies - start_time; |
|---|
| 614 | + |
|---|
| 615 | + if (unlikely(dm_stats_used(&md->stats))) |
|---|
| 616 | + dm_stats_account_io(&md->stats, bio_data_dir(bio), |
|---|
| 617 | + bio->bi_iter.bi_sector, bio_sectors(bio), |
|---|
| 618 | + true, duration, stats_aux); |
|---|
| 619 | + |
|---|
| 620 | + smp_wmb(); |
|---|
| 621 | + |
|---|
| 622 | + bio_end_io_acct(bio, start_time); |
|---|
| 623 | + |
|---|
| 624 | + /* nudge anyone waiting on suspend queue */ |
|---|
| 625 | + if (unlikely(wq_has_sleeper(&md->wait))) |
|---|
| 626 | + wake_up(&md->wait); |
|---|
| 627 | +} |
|---|
| 535 | 628 | |
|---|
| 536 | 629 | static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) |
|---|
| 537 | 630 | { |
|---|
| .. | .. |
|---|
| 595 | 688 | if (tio->inside_dm_io) |
|---|
| 596 | 689 | return; |
|---|
| 597 | 690 | bio_put(&tio->clone); |
|---|
| 598 | | -} |
|---|
| 599 | | - |
|---|
| 600 | | -int md_in_flight(struct mapped_device *md) |
|---|
| 601 | | -{ |
|---|
| 602 | | - return atomic_read(&md->pending[READ]) + |
|---|
| 603 | | - atomic_read(&md->pending[WRITE]); |
|---|
| 604 | | -} |
|---|
| 605 | | - |
|---|
| 606 | | -static void start_io_acct(struct dm_io *io) |
|---|
| 607 | | -{ |
|---|
| 608 | | - struct mapped_device *md = io->md; |
|---|
| 609 | | - struct bio *bio = io->orig_bio; |
|---|
| 610 | | - int rw = bio_data_dir(bio); |
|---|
| 611 | | - |
|---|
| 612 | | - io->start_time = jiffies; |
|---|
| 613 | | - |
|---|
| 614 | | - generic_start_io_acct(md->queue, bio_op(bio), bio_sectors(bio), |
|---|
| 615 | | - &dm_disk(md)->part0); |
|---|
| 616 | | - |
|---|
| 617 | | - atomic_set(&dm_disk(md)->part0.in_flight[rw], |
|---|
| 618 | | - atomic_inc_return(&md->pending[rw])); |
|---|
| 619 | | - |
|---|
| 620 | | - if (unlikely(dm_stats_used(&md->stats))) |
|---|
| 621 | | - dm_stats_account_io(&md->stats, bio_data_dir(bio), |
|---|
| 622 | | - bio->bi_iter.bi_sector, bio_sectors(bio), |
|---|
| 623 | | - false, 0, &io->stats_aux); |
|---|
| 624 | | -} |
|---|
| 625 | | - |
|---|
| 626 | | -static void end_io_acct(struct dm_io *io) |
|---|
| 627 | | -{ |
|---|
| 628 | | - struct mapped_device *md = io->md; |
|---|
| 629 | | - struct bio *bio = io->orig_bio; |
|---|
| 630 | | - unsigned long duration = jiffies - io->start_time; |
|---|
| 631 | | - int pending; |
|---|
| 632 | | - int rw = bio_data_dir(bio); |
|---|
| 633 | | - |
|---|
| 634 | | - generic_end_io_acct(md->queue, bio_op(bio), &dm_disk(md)->part0, |
|---|
| 635 | | - io->start_time); |
|---|
| 636 | | - |
|---|
| 637 | | - if (unlikely(dm_stats_used(&md->stats))) |
|---|
| 638 | | - dm_stats_account_io(&md->stats, bio_data_dir(bio), |
|---|
| 639 | | - bio->bi_iter.bi_sector, bio_sectors(bio), |
|---|
| 640 | | - true, duration, &io->stats_aux); |
|---|
| 641 | | - |
|---|
| 642 | | - /* |
|---|
| 643 | | - * After this is decremented the bio must not be touched if it is |
|---|
| 644 | | - * a flush. |
|---|
| 645 | | - */ |
|---|
| 646 | | - pending = atomic_dec_return(&md->pending[rw]); |
|---|
| 647 | | - atomic_set(&dm_disk(md)->part0.in_flight[rw], pending); |
|---|
| 648 | | - pending += atomic_read(&md->pending[rw^0x1]); |
|---|
| 649 | | - |
|---|
| 650 | | - /* nudge anyone waiting on suspend queue */ |
|---|
| 651 | | - if (!pending) |
|---|
| 652 | | - wake_up(&md->wait); |
|---|
| 653 | 691 | } |
|---|
| 654 | 692 | |
|---|
| 655 | 693 | /* |
|---|
| .. | .. |
|---|
| 748 | 786 | } |
|---|
| 749 | 787 | |
|---|
| 750 | 788 | static struct table_device *find_table_device(struct list_head *l, dev_t dev, |
|---|
| 751 | | - fmode_t mode) { |
|---|
| 789 | + fmode_t mode) |
|---|
| 790 | +{ |
|---|
| 752 | 791 | struct table_device *td; |
|---|
| 753 | 792 | |
|---|
| 754 | 793 | list_for_each_entry(td, l, list) |
|---|
| .. | .. |
|---|
| 759 | 798 | } |
|---|
| 760 | 799 | |
|---|
| 761 | 800 | int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode, |
|---|
| 762 | | - struct dm_dev **result) { |
|---|
| 801 | + struct dm_dev **result) |
|---|
| 802 | +{ |
|---|
| 763 | 803 | int r; |
|---|
| 764 | 804 | struct table_device *td; |
|---|
| 765 | 805 | |
|---|
| .. | .. |
|---|
| 864 | 904 | blk_status_t io_error; |
|---|
| 865 | 905 | struct bio *bio; |
|---|
| 866 | 906 | struct mapped_device *md = io->md; |
|---|
| 907 | + unsigned long start_time = 0; |
|---|
| 908 | + struct dm_stats_aux stats_aux; |
|---|
| 867 | 909 | |
|---|
| 868 | 910 | /* Push-back supersedes any I/O errors */ |
|---|
| 869 | 911 | if (unlikely(error)) { |
|---|
| .. | .. |
|---|
| 890 | 932 | |
|---|
| 891 | 933 | io_error = io->status; |
|---|
| 892 | 934 | bio = io->orig_bio; |
|---|
| 893 | | - end_io_acct(io); |
|---|
| 935 | + start_time = io->start_time; |
|---|
| 936 | + stats_aux = io->stats_aux; |
|---|
| 894 | 937 | free_io(md, io); |
|---|
| 938 | + end_io_acct(md, bio, start_time, &stats_aux); |
|---|
| 895 | 939 | |
|---|
| 896 | 940 | if (io_error == BLK_STS_DM_REQUEUE) |
|---|
| 897 | 941 | return; |
|---|
| .. | .. |
|---|
| 937 | 981 | limits->max_write_zeroes_sectors = 0; |
|---|
| 938 | 982 | } |
|---|
| 939 | 983 | |
|---|
| 984 | +static bool swap_bios_limit(struct dm_target *ti, struct bio *bio) |
|---|
| 985 | +{ |
|---|
| 986 | + return unlikely((bio->bi_opf & REQ_SWAP) != 0) && unlikely(ti->limit_swap_bios); |
|---|
| 987 | +} |
|---|
| 988 | + |
|---|
| 940 | 989 | static void clone_endio(struct bio *bio) |
|---|
| 941 | 990 | { |
|---|
| 942 | 991 | blk_status_t error = bio->bi_status; |
|---|
| .. | .. |
|---|
| 944 | 993 | struct dm_io *io = tio->io; |
|---|
| 945 | 994 | struct mapped_device *md = tio->io->md; |
|---|
| 946 | 995 | dm_endio_fn endio = tio->ti->type->end_io; |
|---|
| 996 | + struct bio *orig_bio = io->orig_bio; |
|---|
| 947 | 997 | |
|---|
| 948 | | - if (unlikely(error == BLK_STS_TARGET) && md->type != DM_TYPE_NVME_BIO_BASED) { |
|---|
| 998 | + if (unlikely(error == BLK_STS_TARGET)) { |
|---|
| 949 | 999 | if (bio_op(bio) == REQ_OP_DISCARD && |
|---|
| 950 | 1000 | !bio->bi_disk->queue->limits.max_discard_sectors) |
|---|
| 951 | 1001 | disable_discard(md); |
|---|
| .. | .. |
|---|
| 957 | 1007 | disable_write_zeroes(md); |
|---|
| 958 | 1008 | } |
|---|
| 959 | 1009 | |
|---|
| 1010 | + /* |
|---|
| 1011 | + * For zone-append bios get offset in zone of the written |
|---|
| 1012 | + * sector and add that to the original bio sector pos. |
|---|
| 1013 | + */ |
|---|
| 1014 | + if (bio_op(orig_bio) == REQ_OP_ZONE_APPEND) { |
|---|
| 1015 | + sector_t written_sector = bio->bi_iter.bi_sector; |
|---|
| 1016 | + struct request_queue *q = orig_bio->bi_disk->queue; |
|---|
| 1017 | + u64 mask = (u64)blk_queue_zone_sectors(q) - 1; |
|---|
| 1018 | + |
|---|
| 1019 | + orig_bio->bi_iter.bi_sector += written_sector & mask; |
|---|
| 1020 | + } |
|---|
| 1021 | + |
|---|
| 960 | 1022 | if (endio) { |
|---|
| 961 | 1023 | int r = endio(tio->ti, bio, &error); |
|---|
| 962 | 1024 | switch (r) { |
|---|
| 963 | 1025 | case DM_ENDIO_REQUEUE: |
|---|
| 964 | 1026 | error = BLK_STS_DM_REQUEUE; |
|---|
| 965 | | - /*FALLTHRU*/ |
|---|
| 1027 | + fallthrough; |
|---|
| 966 | 1028 | case DM_ENDIO_DONE: |
|---|
| 967 | 1029 | break; |
|---|
| 968 | 1030 | case DM_ENDIO_INCOMPLETE: |
|---|
| .. | .. |
|---|
| 974 | 1036 | } |
|---|
| 975 | 1037 | } |
|---|
| 976 | 1038 | |
|---|
| 1039 | + if (unlikely(swap_bios_limit(tio->ti, bio))) { |
|---|
| 1040 | + struct mapped_device *md = io->md; |
|---|
| 1041 | + up(&md->swap_bios_semaphore); |
|---|
| 1042 | + } |
|---|
| 1043 | + |
|---|
| 977 | 1044 | free_tio(tio); |
|---|
| 978 | 1045 | dec_pending(io, error); |
|---|
| 979 | 1046 | } |
|---|
| .. | .. |
|---|
| 982 | 1049 | * Return maximum size of I/O possible at the supplied sector up to the current |
|---|
| 983 | 1050 | * target boundary. |
|---|
| 984 | 1051 | */ |
|---|
| 985 | | -static sector_t max_io_len_target_boundary(sector_t sector, struct dm_target *ti) |
|---|
| 1052 | +static inline sector_t max_io_len_target_boundary(struct dm_target *ti, |
|---|
| 1053 | + sector_t target_offset) |
|---|
| 986 | 1054 | { |
|---|
| 987 | | - sector_t target_offset = dm_target_offset(ti, sector); |
|---|
| 988 | | - |
|---|
| 989 | 1055 | return ti->len - target_offset; |
|---|
| 990 | 1056 | } |
|---|
| 991 | 1057 | |
|---|
| 992 | | -static sector_t max_io_len(sector_t sector, struct dm_target *ti) |
|---|
| 1058 | +static sector_t max_io_len(struct dm_target *ti, sector_t sector) |
|---|
| 993 | 1059 | { |
|---|
| 994 | | - sector_t len = max_io_len_target_boundary(sector, ti); |
|---|
| 995 | | - sector_t offset, max_len; |
|---|
| 1060 | + sector_t target_offset = dm_target_offset(ti, sector); |
|---|
| 1061 | + sector_t len = max_io_len_target_boundary(ti, target_offset); |
|---|
| 1062 | + sector_t max_len; |
|---|
| 996 | 1063 | |
|---|
| 997 | 1064 | /* |
|---|
| 998 | | - * Does the target need to split even further? |
|---|
| 1065 | + * Does the target need to split IO even further? |
|---|
| 1066 | + * - varied (per target) IO splitting is a tenet of DM; this |
|---|
| 1067 | + * explains why stacked chunk_sectors based splitting via |
|---|
| 1068 | + * blk_max_size_offset() isn't possible here. So pass in |
|---|
| 1069 | + * ti->max_io_len to override stacked chunk_sectors. |
|---|
| 999 | 1070 | */ |
|---|
| 1000 | 1071 | if (ti->max_io_len) { |
|---|
| 1001 | | - offset = dm_target_offset(ti, sector); |
|---|
| 1002 | | - if (unlikely(ti->max_io_len & (ti->max_io_len - 1))) |
|---|
| 1003 | | - max_len = sector_div(offset, ti->max_io_len); |
|---|
| 1004 | | - else |
|---|
| 1005 | | - max_len = offset & (ti->max_io_len - 1); |
|---|
| 1006 | | - max_len = ti->max_io_len - max_len; |
|---|
| 1007 | | - |
|---|
| 1072 | + max_len = blk_max_size_offset(ti->table->md->queue, |
|---|
| 1073 | + target_offset, ti->max_io_len); |
|---|
| 1008 | 1074 | if (len > max_len) |
|---|
| 1009 | 1075 | len = max_len; |
|---|
| 1010 | 1076 | } |
|---|
| .. | .. |
|---|
| 1039 | 1105 | return NULL; |
|---|
| 1040 | 1106 | |
|---|
| 1041 | 1107 | ti = dm_table_find_target(map, sector); |
|---|
| 1042 | | - if (!dm_target_is_valid(ti)) |
|---|
| 1108 | + if (!ti) |
|---|
| 1043 | 1109 | return NULL; |
|---|
| 1044 | 1110 | |
|---|
| 1045 | 1111 | return ti; |
|---|
| .. | .. |
|---|
| 1060 | 1126 | goto out; |
|---|
| 1061 | 1127 | if (!ti->type->direct_access) |
|---|
| 1062 | 1128 | goto out; |
|---|
| 1063 | | - len = max_io_len(sector, ti) / PAGE_SECTORS; |
|---|
| 1129 | + len = max_io_len(ti, sector) / PAGE_SECTORS; |
|---|
| 1064 | 1130 | if (len < 1) |
|---|
| 1065 | 1131 | goto out; |
|---|
| 1066 | 1132 | nr_pages = min(len, nr_pages); |
|---|
| 1067 | 1133 | ret = ti->type->direct_access(ti, pgoff, nr_pages, kaddr, pfn); |
|---|
| 1068 | 1134 | |
|---|
| 1069 | 1135 | out: |
|---|
| 1136 | + dm_put_live_table(md, srcu_idx); |
|---|
| 1137 | + |
|---|
| 1138 | + return ret; |
|---|
| 1139 | +} |
|---|
| 1140 | + |
|---|
| 1141 | +static bool dm_dax_supported(struct dax_device *dax_dev, struct block_device *bdev, |
|---|
| 1142 | + int blocksize, sector_t start, sector_t len) |
|---|
| 1143 | +{ |
|---|
| 1144 | + struct mapped_device *md = dax_get_private(dax_dev); |
|---|
| 1145 | + struct dm_table *map; |
|---|
| 1146 | + bool ret = false; |
|---|
| 1147 | + int srcu_idx; |
|---|
| 1148 | + |
|---|
| 1149 | + map = dm_get_live_table(md, &srcu_idx); |
|---|
| 1150 | + if (!map) |
|---|
| 1151 | + goto out; |
|---|
| 1152 | + |
|---|
| 1153 | + ret = dm_table_supports_dax(map, device_not_dax_capable, &blocksize); |
|---|
| 1154 | + |
|---|
| 1155 | +out: |
|---|
| 1070 | 1156 | dm_put_live_table(md, srcu_idx); |
|---|
| 1071 | 1157 | |
|---|
| 1072 | 1158 | return ret; |
|---|
| .. | .. |
|---|
| 1120 | 1206 | return ret; |
|---|
| 1121 | 1207 | } |
|---|
| 1122 | 1208 | |
|---|
| 1209 | +static int dm_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, |
|---|
| 1210 | + size_t nr_pages) |
|---|
| 1211 | +{ |
|---|
| 1212 | + struct mapped_device *md = dax_get_private(dax_dev); |
|---|
| 1213 | + sector_t sector = pgoff * PAGE_SECTORS; |
|---|
| 1214 | + struct dm_target *ti; |
|---|
| 1215 | + int ret = -EIO; |
|---|
| 1216 | + int srcu_idx; |
|---|
| 1217 | + |
|---|
| 1218 | + ti = dm_dax_get_live_target(md, sector, &srcu_idx); |
|---|
| 1219 | + |
|---|
| 1220 | + if (!ti) |
|---|
| 1221 | + goto out; |
|---|
| 1222 | + if (WARN_ON(!ti->type->dax_zero_page_range)) { |
|---|
| 1223 | + /* |
|---|
| 1224 | + * ->zero_page_range() is mandatory dax operation. If we are |
|---|
| 1225 | + * here, something is wrong. |
|---|
| 1226 | + */ |
|---|
| 1227 | + goto out; |
|---|
| 1228 | + } |
|---|
| 1229 | + ret = ti->type->dax_zero_page_range(ti, pgoff, nr_pages); |
|---|
| 1230 | + out: |
|---|
| 1231 | + dm_put_live_table(md, srcu_idx); |
|---|
| 1232 | + |
|---|
| 1233 | + return ret; |
|---|
| 1234 | +} |
|---|
| 1235 | + |
|---|
| 1123 | 1236 | /* |
|---|
| 1124 | 1237 | * A target may call dm_accept_partial_bio only from the map routine. It is |
|---|
| 1125 | | - * allowed for all bio types except REQ_PREFLUSH and REQ_OP_ZONE_RESET. |
|---|
| 1238 | + * allowed for all bio types except REQ_PREFLUSH, REQ_OP_ZONE_* zone management |
|---|
| 1239 | + * operations and REQ_OP_ZONE_APPEND (zone append writes). |
|---|
| 1126 | 1240 | * |
|---|
| 1127 | 1241 | * dm_accept_partial_bio informs the dm that the target only wants to process |
|---|
| 1128 | 1242 | * additional n_sectors sectors of the bio and the rest of the data should be |
|---|
| .. | .. |
|---|
| 1152 | 1266 | { |
|---|
| 1153 | 1267 | struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone); |
|---|
| 1154 | 1268 | unsigned bi_size = bio->bi_iter.bi_size >> SECTOR_SHIFT; |
|---|
| 1269 | + |
|---|
| 1155 | 1270 | BUG_ON(bio->bi_opf & REQ_PREFLUSH); |
|---|
| 1271 | + BUG_ON(op_is_zone_mgmt(bio_op(bio))); |
|---|
| 1272 | + BUG_ON(bio_op(bio) == REQ_OP_ZONE_APPEND); |
|---|
| 1156 | 1273 | BUG_ON(bi_size > *tio->len_ptr); |
|---|
| 1157 | 1274 | BUG_ON(n_sectors > bi_size); |
|---|
| 1275 | + |
|---|
| 1158 | 1276 | *tio->len_ptr -= bi_size - n_sectors; |
|---|
| 1159 | 1277 | bio->bi_iter.bi_size = n_sectors << SECTOR_SHIFT; |
|---|
| 1160 | 1278 | } |
|---|
| 1161 | 1279 | EXPORT_SYMBOL_GPL(dm_accept_partial_bio); |
|---|
| 1162 | 1280 | |
|---|
| 1163 | | -/* |
|---|
| 1164 | | - * The zone descriptors obtained with a zone report indicate zone positions |
|---|
| 1165 | | - * within the target backing device, regardless of that device is a partition |
|---|
| 1166 | | - * and regardless of the target mapping start sector on the device or partition. |
|---|
| 1167 | | - * The zone descriptors start sector and write pointer position must be adjusted |
|---|
| 1168 | | - * to match their relative position within the dm device. |
|---|
| 1169 | | - * A target may call dm_remap_zone_report() after completion of a |
|---|
| 1170 | | - * REQ_OP_ZONE_REPORT bio to remap the zone descriptors obtained from the |
|---|
| 1171 | | - * backing device. |
|---|
| 1172 | | - */ |
|---|
| 1173 | | -void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, sector_t start) |
|---|
| 1281 | +static noinline void __set_swap_bios_limit(struct mapped_device *md, int latch) |
|---|
| 1174 | 1282 | { |
|---|
| 1175 | | -#ifdef CONFIG_BLK_DEV_ZONED |
|---|
| 1176 | | - struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone); |
|---|
| 1177 | | - struct bio *report_bio = tio->io->orig_bio; |
|---|
| 1178 | | - struct blk_zone_report_hdr *hdr = NULL; |
|---|
| 1179 | | - struct blk_zone *zone; |
|---|
| 1180 | | - unsigned int nr_rep = 0; |
|---|
| 1181 | | - unsigned int ofst; |
|---|
| 1182 | | - sector_t part_offset; |
|---|
| 1183 | | - struct bio_vec bvec; |
|---|
| 1184 | | - struct bvec_iter iter; |
|---|
| 1185 | | - void *addr; |
|---|
| 1186 | | - |
|---|
| 1187 | | - if (bio->bi_status) |
|---|
| 1188 | | - return; |
|---|
| 1189 | | - |
|---|
| 1190 | | - /* |
|---|
| 1191 | | - * bio sector was incremented by the request size on completion. Taking |
|---|
| 1192 | | - * into account the original request sector, the target start offset on |
|---|
| 1193 | | - * the backing device and the target mapping offset (ti->begin), the |
|---|
| 1194 | | - * start sector of the backing device. The partition offset is always 0 |
|---|
| 1195 | | - * if the target uses a whole device. |
|---|
| 1196 | | - */ |
|---|
| 1197 | | - part_offset = bio->bi_iter.bi_sector + ti->begin - (start + bio_end_sector(report_bio)); |
|---|
| 1198 | | - |
|---|
| 1199 | | - /* |
|---|
| 1200 | | - * Remap the start sector of the reported zones. For sequential zones, |
|---|
| 1201 | | - * also remap the write pointer position. |
|---|
| 1202 | | - */ |
|---|
| 1203 | | - bio_for_each_segment(bvec, report_bio, iter) { |
|---|
| 1204 | | - addr = kmap_atomic(bvec.bv_page); |
|---|
| 1205 | | - |
|---|
| 1206 | | - /* Remember the report header in the first page */ |
|---|
| 1207 | | - if (!hdr) { |
|---|
| 1208 | | - hdr = addr; |
|---|
| 1209 | | - ofst = sizeof(struct blk_zone_report_hdr); |
|---|
| 1210 | | - } else |
|---|
| 1211 | | - ofst = 0; |
|---|
| 1212 | | - |
|---|
| 1213 | | - /* Set zones start sector */ |
|---|
| 1214 | | - while (hdr->nr_zones && ofst < bvec.bv_len) { |
|---|
| 1215 | | - zone = addr + ofst; |
|---|
| 1216 | | - zone->start -= part_offset; |
|---|
| 1217 | | - if (zone->start >= start + ti->len) { |
|---|
| 1218 | | - hdr->nr_zones = 0; |
|---|
| 1219 | | - break; |
|---|
| 1220 | | - } |
|---|
| 1221 | | - zone->start = zone->start + ti->begin - start; |
|---|
| 1222 | | - if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) { |
|---|
| 1223 | | - if (zone->cond == BLK_ZONE_COND_FULL) |
|---|
| 1224 | | - zone->wp = zone->start + zone->len; |
|---|
| 1225 | | - else if (zone->cond == BLK_ZONE_COND_EMPTY) |
|---|
| 1226 | | - zone->wp = zone->start; |
|---|
| 1227 | | - else |
|---|
| 1228 | | - zone->wp = zone->wp + ti->begin - start - part_offset; |
|---|
| 1229 | | - } |
|---|
| 1230 | | - ofst += sizeof(struct blk_zone); |
|---|
| 1231 | | - hdr->nr_zones--; |
|---|
| 1232 | | - nr_rep++; |
|---|
| 1233 | | - } |
|---|
| 1234 | | - |
|---|
| 1235 | | - if (addr != hdr) |
|---|
| 1236 | | - kunmap_atomic(addr); |
|---|
| 1237 | | - |
|---|
| 1238 | | - if (!hdr->nr_zones) |
|---|
| 1239 | | - break; |
|---|
| 1283 | + mutex_lock(&md->swap_bios_lock); |
|---|
| 1284 | + while (latch < md->swap_bios) { |
|---|
| 1285 | + cond_resched(); |
|---|
| 1286 | + down(&md->swap_bios_semaphore); |
|---|
| 1287 | + md->swap_bios--; |
|---|
| 1240 | 1288 | } |
|---|
| 1241 | | - |
|---|
| 1242 | | - if (hdr) { |
|---|
| 1243 | | - hdr->nr_zones = nr_rep; |
|---|
| 1244 | | - kunmap_atomic(hdr); |
|---|
| 1289 | + while (latch > md->swap_bios) { |
|---|
| 1290 | + cond_resched(); |
|---|
| 1291 | + up(&md->swap_bios_semaphore); |
|---|
| 1292 | + md->swap_bios++; |
|---|
| 1245 | 1293 | } |
|---|
| 1246 | | - |
|---|
| 1247 | | - bio_advance(report_bio, report_bio->bi_iter.bi_size); |
|---|
| 1248 | | - |
|---|
| 1249 | | -#else /* !CONFIG_BLK_DEV_ZONED */ |
|---|
| 1250 | | - bio->bi_status = BLK_STS_NOTSUPP; |
|---|
| 1251 | | -#endif |
|---|
| 1294 | + mutex_unlock(&md->swap_bios_lock); |
|---|
| 1252 | 1295 | } |
|---|
| 1253 | | -EXPORT_SYMBOL_GPL(dm_remap_zone_report); |
|---|
| 1254 | 1296 | |
|---|
| 1255 | 1297 | static blk_qc_t __map_bio(struct dm_target_io *tio) |
|---|
| 1256 | 1298 | { |
|---|
| .. | .. |
|---|
| 1258 | 1300 | sector_t sector; |
|---|
| 1259 | 1301 | struct bio *clone = &tio->clone; |
|---|
| 1260 | 1302 | struct dm_io *io = tio->io; |
|---|
| 1261 | | - struct mapped_device *md = io->md; |
|---|
| 1262 | 1303 | struct dm_target *ti = tio->ti; |
|---|
| 1263 | 1304 | blk_qc_t ret = BLK_QC_T_NONE; |
|---|
| 1264 | 1305 | |
|---|
| .. | .. |
|---|
| 1272 | 1313 | atomic_inc(&io->io_count); |
|---|
| 1273 | 1314 | sector = clone->bi_iter.bi_sector; |
|---|
| 1274 | 1315 | |
|---|
| 1316 | + if (unlikely(swap_bios_limit(ti, clone))) { |
|---|
| 1317 | + struct mapped_device *md = io->md; |
|---|
| 1318 | + int latch = get_swap_bios(); |
|---|
| 1319 | + if (unlikely(latch != md->swap_bios)) |
|---|
| 1320 | + __set_swap_bios_limit(md, latch); |
|---|
| 1321 | + down(&md->swap_bios_semaphore); |
|---|
| 1322 | + } |
|---|
| 1323 | + |
|---|
| 1275 | 1324 | r = ti->type->map(ti, clone); |
|---|
| 1276 | 1325 | switch (r) { |
|---|
| 1277 | 1326 | case DM_MAPIO_SUBMITTED: |
|---|
| .. | .. |
|---|
| 1280 | 1329 | /* the bio has been remapped so dispatch it */ |
|---|
| 1281 | 1330 | trace_block_bio_remap(clone->bi_disk->queue, clone, |
|---|
| 1282 | 1331 | bio_dev(io->orig_bio), sector); |
|---|
| 1283 | | - if (md->type == DM_TYPE_NVME_BIO_BASED) |
|---|
| 1284 | | - ret = direct_make_request(clone); |
|---|
| 1285 | | - else |
|---|
| 1286 | | - ret = generic_make_request(clone); |
|---|
| 1332 | + ret = submit_bio_noacct(clone); |
|---|
| 1287 | 1333 | break; |
|---|
| 1288 | 1334 | case DM_MAPIO_KILL: |
|---|
| 1335 | + if (unlikely(swap_bios_limit(ti, clone))) { |
|---|
| 1336 | + struct mapped_device *md = io->md; |
|---|
| 1337 | + up(&md->swap_bios_semaphore); |
|---|
| 1338 | + } |
|---|
| 1289 | 1339 | free_tio(tio); |
|---|
| 1290 | 1340 | dec_pending(io, BLK_STS_IOERR); |
|---|
| 1291 | 1341 | break; |
|---|
| 1292 | 1342 | case DM_MAPIO_REQUEUE: |
|---|
| 1343 | + if (unlikely(swap_bios_limit(ti, clone))) { |
|---|
| 1344 | + struct mapped_device *md = io->md; |
|---|
| 1345 | + up(&md->swap_bios_semaphore); |
|---|
| 1346 | + } |
|---|
| 1293 | 1347 | free_tio(tio); |
|---|
| 1294 | 1348 | dec_pending(io, BLK_STS_DM_REQUEUE); |
|---|
| 1295 | 1349 | break; |
|---|
| .. | .. |
|---|
| 1314 | 1368 | sector_t sector, unsigned len) |
|---|
| 1315 | 1369 | { |
|---|
| 1316 | 1370 | struct bio *clone = &tio->clone; |
|---|
| 1371 | + int r; |
|---|
| 1317 | 1372 | |
|---|
| 1318 | 1373 | __bio_clone_fast(clone, bio); |
|---|
| 1319 | 1374 | |
|---|
| 1320 | | - bio_crypt_clone(clone, bio, GFP_NOIO); |
|---|
| 1375 | + r = bio_crypt_clone(clone, bio, GFP_NOIO); |
|---|
| 1376 | + if (r < 0) |
|---|
| 1377 | + return r; |
|---|
| 1321 | 1378 | |
|---|
| 1322 | | - if (unlikely(bio_integrity(bio) != NULL)) { |
|---|
| 1323 | | - int r; |
|---|
| 1379 | + if (bio_integrity(bio)) { |
|---|
| 1324 | 1380 | if (unlikely(!dm_target_has_integrity(tio->ti->type) && |
|---|
| 1325 | 1381 | !dm_target_passes_integrity(tio->ti->type))) { |
|---|
| 1326 | 1382 | DMWARN("%s: the target %s doesn't support integrity data.", |
|---|
| .. | .. |
|---|
| 1334 | 1390 | return r; |
|---|
| 1335 | 1391 | } |
|---|
| 1336 | 1392 | |
|---|
| 1337 | | - if (bio_op(bio) != REQ_OP_ZONE_REPORT) |
|---|
| 1338 | | - bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector)); |
|---|
| 1393 | + bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector)); |
|---|
| 1339 | 1394 | clone->bi_iter.bi_size = to_bytes(len); |
|---|
| 1340 | 1395 | |
|---|
| 1341 | | - if (unlikely(bio_integrity(bio) != NULL)) |
|---|
| 1396 | + if (bio_integrity(bio)) |
|---|
| 1342 | 1397 | bio_integrity_trim(clone); |
|---|
| 1343 | 1398 | |
|---|
| 1344 | 1399 | return 0; |
|---|
| .. | .. |
|---|
| 1417 | 1472 | { |
|---|
| 1418 | 1473 | unsigned target_nr = 0; |
|---|
| 1419 | 1474 | struct dm_target *ti; |
|---|
| 1475 | + struct bio flush_bio; |
|---|
| 1476 | + |
|---|
| 1477 | + /* |
|---|
| 1478 | + * Use an on-stack bio for this, it's safe since we don't |
|---|
| 1479 | + * need to reference it after submit. It's just used as |
|---|
| 1480 | + * the basis for the clone(s). |
|---|
| 1481 | + */ |
|---|
| 1482 | + bio_init(&flush_bio, NULL, 0); |
|---|
| 1483 | + flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC; |
|---|
| 1484 | + ci->bio = &flush_bio; |
|---|
| 1485 | + ci->sector_count = 0; |
|---|
| 1486 | + |
|---|
| 1487 | + /* |
|---|
| 1488 | + * Empty flush uses a statically initialized bio, as the base for |
|---|
| 1489 | + * cloning. However, blkg association requires that a bdev is |
|---|
| 1490 | + * associated with a gendisk, which doesn't happen until the bdev is |
|---|
| 1491 | + * opened. So, blkg association is done at issue time of the flush |
|---|
| 1492 | + * rather than when the device is created in alloc_dev(). |
|---|
| 1493 | + */ |
|---|
| 1494 | + bio_set_dev(ci->bio, ci->io->md->bdev); |
|---|
| 1420 | 1495 | |
|---|
| 1421 | 1496 | BUG_ON(bio_has_data(ci->bio)); |
|---|
| 1422 | 1497 | while ((ti = dm_table_get_target(ci->map, target_nr++))) |
|---|
| 1423 | 1498 | __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL); |
|---|
| 1424 | 1499 | |
|---|
| 1500 | + bio_uninit(ci->bio); |
|---|
| 1425 | 1501 | return 0; |
|---|
| 1426 | 1502 | } |
|---|
| 1427 | 1503 | |
|---|
| .. | .. |
|---|
| 1444 | 1520 | return 0; |
|---|
| 1445 | 1521 | } |
|---|
| 1446 | 1522 | |
|---|
| 1447 | | -typedef unsigned (*get_num_bios_fn)(struct dm_target *ti); |
|---|
| 1448 | | - |
|---|
| 1449 | | -static unsigned get_num_discard_bios(struct dm_target *ti) |
|---|
| 1450 | | -{ |
|---|
| 1451 | | - return ti->num_discard_bios; |
|---|
| 1452 | | -} |
|---|
| 1453 | | - |
|---|
| 1454 | | -static unsigned get_num_secure_erase_bios(struct dm_target *ti) |
|---|
| 1455 | | -{ |
|---|
| 1456 | | - return ti->num_secure_erase_bios; |
|---|
| 1457 | | -} |
|---|
| 1458 | | - |
|---|
| 1459 | | -static unsigned get_num_write_same_bios(struct dm_target *ti) |
|---|
| 1460 | | -{ |
|---|
| 1461 | | - return ti->num_write_same_bios; |
|---|
| 1462 | | -} |
|---|
| 1463 | | - |
|---|
| 1464 | | -static unsigned get_num_write_zeroes_bios(struct dm_target *ti) |
|---|
| 1465 | | -{ |
|---|
| 1466 | | - return ti->num_write_zeroes_bios; |
|---|
| 1467 | | -} |
|---|
| 1468 | | - |
|---|
| 1469 | | -typedef bool (*is_split_required_fn)(struct dm_target *ti); |
|---|
| 1470 | | - |
|---|
| 1471 | | -static bool is_split_required_for_discard(struct dm_target *ti) |
|---|
| 1472 | | -{ |
|---|
| 1473 | | - return ti->split_discard_bios; |
|---|
| 1474 | | -} |
|---|
| 1475 | | - |
|---|
| 1476 | 1523 | static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti, |
|---|
| 1477 | | - get_num_bios_fn get_num_bios, |
|---|
| 1478 | | - is_split_required_fn is_split_required) |
|---|
| 1524 | + unsigned num_bios) |
|---|
| 1479 | 1525 | { |
|---|
| 1480 | 1526 | unsigned len; |
|---|
| 1481 | | - unsigned num_bios; |
|---|
| 1482 | 1527 | |
|---|
| 1483 | 1528 | /* |
|---|
| 1484 | 1529 | * Even though the device advertised support for this type of |
|---|
| .. | .. |
|---|
| 1486 | 1531 | * reconfiguration might also have changed that since the |
|---|
| 1487 | 1532 | * check was performed. |
|---|
| 1488 | 1533 | */ |
|---|
| 1489 | | - num_bios = get_num_bios ? get_num_bios(ti) : 0; |
|---|
| 1490 | 1534 | if (!num_bios) |
|---|
| 1491 | 1535 | return -EOPNOTSUPP; |
|---|
| 1492 | 1536 | |
|---|
| 1493 | | - if (is_split_required && !is_split_required(ti)) |
|---|
| 1494 | | - len = min((sector_t)ci->sector_count, max_io_len_target_boundary(ci->sector, ti)); |
|---|
| 1495 | | - else |
|---|
| 1496 | | - len = min((sector_t)ci->sector_count, max_io_len(ci->sector, ti)); |
|---|
| 1537 | + len = min_t(sector_t, ci->sector_count, |
|---|
| 1538 | + max_io_len_target_boundary(ti, dm_target_offset(ti, ci->sector))); |
|---|
| 1497 | 1539 | |
|---|
| 1498 | 1540 | __send_duplicate_bios(ci, ti, num_bios, &len); |
|---|
| 1499 | 1541 | |
|---|
| .. | .. |
|---|
| 1503 | 1545 | return 0; |
|---|
| 1504 | 1546 | } |
|---|
| 1505 | 1547 | |
|---|
| 1506 | | -static int __send_discard(struct clone_info *ci, struct dm_target *ti) |
|---|
| 1548 | +static bool is_abnormal_io(struct bio *bio) |
|---|
| 1507 | 1549 | { |
|---|
| 1508 | | - return __send_changing_extent_only(ci, ti, get_num_discard_bios, |
|---|
| 1509 | | - is_split_required_for_discard); |
|---|
| 1510 | | -} |
|---|
| 1550 | + bool r = false; |
|---|
| 1511 | 1551 | |
|---|
| 1512 | | -static int __send_secure_erase(struct clone_info *ci, struct dm_target *ti) |
|---|
| 1513 | | -{ |
|---|
| 1514 | | - return __send_changing_extent_only(ci, ti, get_num_secure_erase_bios, NULL); |
|---|
| 1515 | | -} |
|---|
| 1552 | + switch (bio_op(bio)) { |
|---|
| 1553 | + case REQ_OP_DISCARD: |
|---|
| 1554 | + case REQ_OP_SECURE_ERASE: |
|---|
| 1555 | + case REQ_OP_WRITE_SAME: |
|---|
| 1556 | + case REQ_OP_WRITE_ZEROES: |
|---|
| 1557 | + r = true; |
|---|
| 1558 | + break; |
|---|
| 1559 | + } |
|---|
| 1516 | 1560 | |
|---|
| 1517 | | -static int __send_write_same(struct clone_info *ci, struct dm_target *ti) |
|---|
| 1518 | | -{ |
|---|
| 1519 | | - return __send_changing_extent_only(ci, ti, get_num_write_same_bios, NULL); |
|---|
| 1520 | | -} |
|---|
| 1521 | | - |
|---|
| 1522 | | -static int __send_write_zeroes(struct clone_info *ci, struct dm_target *ti) |
|---|
| 1523 | | -{ |
|---|
| 1524 | | - return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios, NULL); |
|---|
| 1561 | + return r; |
|---|
| 1525 | 1562 | } |
|---|
| 1526 | 1563 | |
|---|
| 1527 | 1564 | static bool __process_abnormal_io(struct clone_info *ci, struct dm_target *ti, |
|---|
| 1528 | 1565 | int *result) |
|---|
| 1529 | 1566 | { |
|---|
| 1530 | 1567 | struct bio *bio = ci->bio; |
|---|
| 1568 | + unsigned num_bios = 0; |
|---|
| 1531 | 1569 | |
|---|
| 1532 | | - if (bio_op(bio) == REQ_OP_DISCARD) |
|---|
| 1533 | | - *result = __send_discard(ci, ti); |
|---|
| 1534 | | - else if (bio_op(bio) == REQ_OP_SECURE_ERASE) |
|---|
| 1535 | | - *result = __send_secure_erase(ci, ti); |
|---|
| 1536 | | - else if (bio_op(bio) == REQ_OP_WRITE_SAME) |
|---|
| 1537 | | - *result = __send_write_same(ci, ti); |
|---|
| 1538 | | - else if (bio_op(bio) == REQ_OP_WRITE_ZEROES) |
|---|
| 1539 | | - *result = __send_write_zeroes(ci, ti); |
|---|
| 1540 | | - else |
|---|
| 1570 | + switch (bio_op(bio)) { |
|---|
| 1571 | + case REQ_OP_DISCARD: |
|---|
| 1572 | + num_bios = ti->num_discard_bios; |
|---|
| 1573 | + break; |
|---|
| 1574 | + case REQ_OP_SECURE_ERASE: |
|---|
| 1575 | + num_bios = ti->num_secure_erase_bios; |
|---|
| 1576 | + break; |
|---|
| 1577 | + case REQ_OP_WRITE_SAME: |
|---|
| 1578 | + num_bios = ti->num_write_same_bios; |
|---|
| 1579 | + break; |
|---|
| 1580 | + case REQ_OP_WRITE_ZEROES: |
|---|
| 1581 | + num_bios = ti->num_write_zeroes_bios; |
|---|
| 1582 | + break; |
|---|
| 1583 | + default: |
|---|
| 1541 | 1584 | return false; |
|---|
| 1585 | + } |
|---|
| 1542 | 1586 | |
|---|
| 1587 | + *result = __send_changing_extent_only(ci, ti, num_bios); |
|---|
| 1543 | 1588 | return true; |
|---|
| 1544 | 1589 | } |
|---|
| 1545 | 1590 | |
|---|
| .. | .. |
|---|
| 1548 | 1593 | */ |
|---|
| 1549 | 1594 | static int __split_and_process_non_flush(struct clone_info *ci) |
|---|
| 1550 | 1595 | { |
|---|
| 1551 | | - struct bio *bio = ci->bio; |
|---|
| 1552 | 1596 | struct dm_target *ti; |
|---|
| 1553 | 1597 | unsigned len; |
|---|
| 1554 | 1598 | int r; |
|---|
| 1555 | 1599 | |
|---|
| 1556 | 1600 | ti = dm_table_find_target(ci->map, ci->sector); |
|---|
| 1557 | | - if (!dm_target_is_valid(ti)) |
|---|
| 1601 | + if (!ti) |
|---|
| 1558 | 1602 | return -EIO; |
|---|
| 1559 | 1603 | |
|---|
| 1560 | | - if (unlikely(__process_abnormal_io(ci, ti, &r))) |
|---|
| 1604 | + if (__process_abnormal_io(ci, ti, &r)) |
|---|
| 1561 | 1605 | return r; |
|---|
| 1562 | 1606 | |
|---|
| 1563 | | - if (bio_op(bio) == REQ_OP_ZONE_REPORT) |
|---|
| 1564 | | - len = ci->sector_count; |
|---|
| 1565 | | - else |
|---|
| 1566 | | - len = min_t(sector_t, max_io_len(ci->sector, ti), |
|---|
| 1567 | | - ci->sector_count); |
|---|
| 1607 | + len = min_t(sector_t, max_io_len(ti, ci->sector), ci->sector_count); |
|---|
| 1568 | 1608 | |
|---|
| 1569 | 1609 | r = __clone_and_map_data_bio(ci, ti, ci->sector, &len); |
|---|
| 1570 | 1610 | if (r < 0) |
|---|
| .. | .. |
|---|
| 1584 | 1624 | ci->sector = bio->bi_iter.bi_sector; |
|---|
| 1585 | 1625 | } |
|---|
| 1586 | 1626 | |
|---|
| 1627 | +#define __dm_part_stat_sub(part, field, subnd) \ |
|---|
| 1628 | + (part_stat_get(part, field) -= (subnd)) |
|---|
| 1629 | + |
|---|
| 1587 | 1630 | /* |
|---|
| 1588 | 1631 | * Entry point to split a bio into clones and submit them to the targets. |
|---|
| 1589 | 1632 | */ |
|---|
| .. | .. |
|---|
| 1594 | 1637 | blk_qc_t ret = BLK_QC_T_NONE; |
|---|
| 1595 | 1638 | int error = 0; |
|---|
| 1596 | 1639 | |
|---|
| 1597 | | - if (unlikely(!map)) { |
|---|
| 1598 | | - bio_io_error(bio); |
|---|
| 1599 | | - return ret; |
|---|
| 1600 | | - } |
|---|
| 1601 | | - |
|---|
| 1602 | | - blk_queue_split(md->queue, &bio); |
|---|
| 1603 | | - |
|---|
| 1604 | 1640 | init_clone_info(&ci, md, map, bio); |
|---|
| 1605 | 1641 | |
|---|
| 1606 | 1642 | if (bio->bi_opf & REQ_PREFLUSH) { |
|---|
| 1607 | | - ci.bio = &ci.io->md->flush_bio; |
|---|
| 1608 | | - ci.sector_count = 0; |
|---|
| 1609 | 1643 | error = __send_empty_flush(&ci); |
|---|
| 1610 | 1644 | /* dec_pending submits any data associated with flush */ |
|---|
| 1611 | | - } else if (bio_op(bio) == REQ_OP_ZONE_RESET) { |
|---|
| 1645 | + } else if (op_is_zone_mgmt(bio_op(bio))) { |
|---|
| 1612 | 1646 | ci.bio = bio; |
|---|
| 1613 | 1647 | ci.sector_count = 0; |
|---|
| 1614 | 1648 | error = __split_and_process_non_flush(&ci); |
|---|
| .. | .. |
|---|
| 1619 | 1653 | error = __split_and_process_non_flush(&ci); |
|---|
| 1620 | 1654 | if (current->bio_list && ci.sector_count && !error) { |
|---|
| 1621 | 1655 | /* |
|---|
| 1622 | | - * Remainder must be passed to generic_make_request() |
|---|
| 1656 | + * Remainder must be passed to submit_bio_noacct() |
|---|
| 1623 | 1657 | * so that it gets handled *after* bios already submitted |
|---|
| 1624 | 1658 | * have been completely processed. |
|---|
| 1625 | 1659 | * We take a clone of the original to store in |
|---|
| 1626 | 1660 | * ci.io->orig_bio to be used by end_io_acct() and |
|---|
| 1627 | 1661 | * for dec_pending to use for completion handling. |
|---|
| 1628 | | - * As this path is not used for REQ_OP_ZONE_REPORT, |
|---|
| 1629 | | - * the usage of io->orig_bio in dm_remap_zone_report() |
|---|
| 1630 | | - * won't be affected by this reassignment. |
|---|
| 1631 | 1662 | */ |
|---|
| 1632 | 1663 | struct bio *b = bio_split(bio, bio_sectors(bio) - ci.sector_count, |
|---|
| 1633 | 1664 | GFP_NOIO, &md->queue->bio_split); |
|---|
| 1634 | 1665 | ci.io->orig_bio = b; |
|---|
| 1666 | + |
|---|
| 1667 | + /* |
|---|
| 1668 | + * Adjust IO stats for each split, otherwise upon queue |
|---|
| 1669 | + * reentry there will be redundant IO accounting. |
|---|
| 1670 | + * NOTE: this is a stop-gap fix, a proper fix involves |
|---|
| 1671 | + * significant refactoring of DM core's bio splitting |
|---|
| 1672 | + * (by eliminating DM's splitting and just using bio_split) |
|---|
| 1673 | + */ |
|---|
| 1674 | + part_stat_lock(); |
|---|
| 1675 | + __dm_part_stat_sub(&dm_disk(md)->part0, |
|---|
| 1676 | + sectors[op_stat_group(bio_op(bio))], ci.sector_count); |
|---|
| 1677 | + part_stat_unlock(); |
|---|
| 1678 | + |
|---|
| 1635 | 1679 | bio_chain(b, bio); |
|---|
| 1636 | | - ret = generic_make_request(bio); |
|---|
| 1680 | + trace_block_split(md->queue, b, bio->bi_iter.bi_sector); |
|---|
| 1681 | + ret = submit_bio_noacct(bio); |
|---|
| 1637 | 1682 | break; |
|---|
| 1638 | 1683 | } |
|---|
| 1639 | 1684 | } |
|---|
| .. | .. |
|---|
| 1644 | 1689 | return ret; |
|---|
| 1645 | 1690 | } |
|---|
| 1646 | 1691 | |
|---|
| 1647 | | -/* |
|---|
| 1648 | | - * Optimized variant of __split_and_process_bio that leverages the |
|---|
| 1649 | | - * fact that targets that use it do _not_ have a need to split bios. |
|---|
| 1650 | | - */ |
|---|
| 1651 | | -static blk_qc_t __process_bio(struct mapped_device *md, |
|---|
| 1652 | | - struct dm_table *map, struct bio *bio) |
|---|
| 1692 | +static blk_qc_t dm_submit_bio(struct bio *bio) |
|---|
| 1653 | 1693 | { |
|---|
| 1654 | | - struct clone_info ci; |
|---|
| 1655 | | - blk_qc_t ret = BLK_QC_T_NONE; |
|---|
| 1656 | | - int error = 0; |
|---|
| 1657 | | - |
|---|
| 1658 | | - if (unlikely(!map)) { |
|---|
| 1659 | | - bio_io_error(bio); |
|---|
| 1660 | | - return ret; |
|---|
| 1661 | | - } |
|---|
| 1662 | | - |
|---|
| 1663 | | - init_clone_info(&ci, md, map, bio); |
|---|
| 1664 | | - |
|---|
| 1665 | | - if (bio->bi_opf & REQ_PREFLUSH) { |
|---|
| 1666 | | - ci.bio = &ci.io->md->flush_bio; |
|---|
| 1667 | | - ci.sector_count = 0; |
|---|
| 1668 | | - error = __send_empty_flush(&ci); |
|---|
| 1669 | | - /* dec_pending submits any data associated with flush */ |
|---|
| 1670 | | - } else { |
|---|
| 1671 | | - struct dm_target *ti = md->immutable_target; |
|---|
| 1672 | | - struct dm_target_io *tio; |
|---|
| 1673 | | - |
|---|
| 1674 | | - /* |
|---|
| 1675 | | - * Defend against IO still getting in during teardown |
|---|
| 1676 | | - * - as was seen for a time with nvme-fcloop |
|---|
| 1677 | | - */ |
|---|
| 1678 | | - if (unlikely(WARN_ON_ONCE(!ti || !dm_target_is_valid(ti)))) { |
|---|
| 1679 | | - error = -EIO; |
|---|
| 1680 | | - goto out; |
|---|
| 1681 | | - } |
|---|
| 1682 | | - |
|---|
| 1683 | | - ci.bio = bio; |
|---|
| 1684 | | - ci.sector_count = bio_sectors(bio); |
|---|
| 1685 | | - if (unlikely(__process_abnormal_io(&ci, ti, &error))) |
|---|
| 1686 | | - goto out; |
|---|
| 1687 | | - |
|---|
| 1688 | | - tio = alloc_tio(&ci, ti, 0, GFP_NOIO); |
|---|
| 1689 | | - ret = __clone_and_map_simple_bio(&ci, tio, NULL); |
|---|
| 1690 | | - } |
|---|
| 1691 | | -out: |
|---|
| 1692 | | - /* drop the extra reference count */ |
|---|
| 1693 | | - dec_pending(ci.io, errno_to_blk_status(error)); |
|---|
| 1694 | | - return ret; |
|---|
| 1695 | | -} |
|---|
| 1696 | | - |
|---|
| 1697 | | -typedef blk_qc_t (process_bio_fn)(struct mapped_device *, struct dm_table *, struct bio *); |
|---|
| 1698 | | - |
|---|
| 1699 | | -static blk_qc_t __dm_make_request(struct request_queue *q, struct bio *bio, |
|---|
| 1700 | | - process_bio_fn process_bio) |
|---|
| 1701 | | -{ |
|---|
| 1702 | | - struct mapped_device *md = q->queuedata; |
|---|
| 1694 | + struct mapped_device *md = bio->bi_disk->private_data; |
|---|
| 1703 | 1695 | blk_qc_t ret = BLK_QC_T_NONE; |
|---|
| 1704 | 1696 | int srcu_idx; |
|---|
| 1705 | 1697 | struct dm_table *map; |
|---|
| 1706 | 1698 | |
|---|
| 1707 | 1699 | map = dm_get_live_table(md, &srcu_idx); |
|---|
| 1708 | 1700 | |
|---|
| 1709 | | - /* if we're suspended, we have to queue this io for later */ |
|---|
| 1710 | | - if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) { |
|---|
| 1711 | | - dm_put_live_table(md, srcu_idx); |
|---|
| 1712 | | - |
|---|
| 1713 | | - if (!(bio->bi_opf & REQ_RAHEAD)) |
|---|
| 1714 | | - queue_io(md, bio); |
|---|
| 1715 | | - else |
|---|
| 1701 | + /* If suspended, or map not yet available, queue this IO for later */ |
|---|
| 1702 | + if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) || |
|---|
| 1703 | + unlikely(!map)) { |
|---|
| 1704 | + if (bio->bi_opf & REQ_NOWAIT) |
|---|
| 1705 | + bio_wouldblock_error(bio); |
|---|
| 1706 | + else if (bio->bi_opf & REQ_RAHEAD) |
|---|
| 1716 | 1707 | bio_io_error(bio); |
|---|
| 1717 | | - return ret; |
|---|
| 1708 | + else |
|---|
| 1709 | + queue_io(md, bio); |
|---|
| 1710 | + goto out; |
|---|
| 1718 | 1711 | } |
|---|
| 1719 | 1712 | |
|---|
| 1720 | | - ret = process_bio(md, map, bio); |
|---|
| 1713 | + /* |
|---|
| 1714 | + * Use blk_queue_split() for abnormal IO (e.g. discard, writesame, etc) |
|---|
| 1715 | + * otherwise associated queue_limits won't be imposed. |
|---|
| 1716 | + */ |
|---|
| 1717 | + if (is_abnormal_io(bio)) |
|---|
| 1718 | + blk_queue_split(&bio); |
|---|
| 1721 | 1719 | |
|---|
| 1720 | + ret = __split_and_process_bio(md, map, bio); |
|---|
| 1721 | +out: |
|---|
| 1722 | 1722 | dm_put_live_table(md, srcu_idx); |
|---|
| 1723 | 1723 | return ret; |
|---|
| 1724 | | -} |
|---|
| 1725 | | - |
|---|
| 1726 | | -/* |
|---|
| 1727 | | - * The request function that remaps the bio to one target and |
|---|
| 1728 | | - * splits off any remainder. |
|---|
| 1729 | | - */ |
|---|
| 1730 | | -static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio) |
|---|
| 1731 | | -{ |
|---|
| 1732 | | - return __dm_make_request(q, bio, __split_and_process_bio); |
|---|
| 1733 | | -} |
|---|
| 1734 | | - |
|---|
| 1735 | | -static blk_qc_t dm_make_request_nvme(struct request_queue *q, struct bio *bio) |
|---|
| 1736 | | -{ |
|---|
| 1737 | | - return __dm_make_request(q, bio, __process_bio); |
|---|
| 1738 | | -} |
|---|
| 1739 | | - |
|---|
| 1740 | | -static int dm_any_congested(void *congested_data, int bdi_bits) |
|---|
| 1741 | | -{ |
|---|
| 1742 | | - int r = bdi_bits; |
|---|
| 1743 | | - struct mapped_device *md = congested_data; |
|---|
| 1744 | | - struct dm_table *map; |
|---|
| 1745 | | - |
|---|
| 1746 | | - if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { |
|---|
| 1747 | | - if (dm_request_based(md)) { |
|---|
| 1748 | | - /* |
|---|
| 1749 | | - * With request-based DM we only need to check the |
|---|
| 1750 | | - * top-level queue for congestion. |
|---|
| 1751 | | - */ |
|---|
| 1752 | | - r = md->queue->backing_dev_info->wb.state & bdi_bits; |
|---|
| 1753 | | - } else { |
|---|
| 1754 | | - map = dm_get_live_table_fast(md); |
|---|
| 1755 | | - if (map) |
|---|
| 1756 | | - r = dm_table_any_congested(map, bdi_bits); |
|---|
| 1757 | | - dm_put_live_table_fast(md); |
|---|
| 1758 | | - } |
|---|
| 1759 | | - } |
|---|
| 1760 | | - |
|---|
| 1761 | | - return r; |
|---|
| 1762 | 1724 | } |
|---|
| 1763 | 1725 | |
|---|
| 1764 | 1726 | /*----------------------------------------------------------------- |
|---|
| .. | .. |
|---|
| 1811 | 1773 | } |
|---|
| 1812 | 1774 | |
|---|
| 1813 | 1775 | static const struct block_device_operations dm_blk_dops; |
|---|
| 1776 | +static const struct block_device_operations dm_rq_blk_dops; |
|---|
| 1814 | 1777 | static const struct dax_operations dm_dax_ops; |
|---|
| 1815 | 1778 | |
|---|
| 1816 | 1779 | static void dm_wq_work(struct work_struct *work); |
|---|
| 1817 | 1780 | |
|---|
| 1818 | | -static void dm_init_normal_md_queue(struct mapped_device *md) |
|---|
| 1781 | +#ifdef CONFIG_BLK_INLINE_ENCRYPTION |
|---|
| 1782 | +static void dm_queue_destroy_keyslot_manager(struct request_queue *q) |
|---|
| 1819 | 1783 | { |
|---|
| 1820 | | - md->use_blk_mq = false; |
|---|
| 1821 | | - |
|---|
| 1822 | | - /* |
|---|
| 1823 | | - * Initialize aspects of queue that aren't relevant for blk-mq |
|---|
| 1824 | | - */ |
|---|
| 1825 | | - md->queue->backing_dev_info->congested_data = md; |
|---|
| 1826 | | - md->queue->backing_dev_info->congested_fn = dm_any_congested; |
|---|
| 1784 | + dm_destroy_keyslot_manager(q->ksm); |
|---|
| 1827 | 1785 | } |
|---|
| 1828 | 1786 | |
|---|
| 1829 | | -static void dm_destroy_inline_encryption(struct request_queue *q); |
|---|
| 1787 | +#else /* CONFIG_BLK_INLINE_ENCRYPTION */ |
|---|
| 1788 | + |
|---|
| 1789 | +static inline void dm_queue_destroy_keyslot_manager(struct request_queue *q) |
|---|
| 1790 | +{ |
|---|
| 1791 | +} |
|---|
| 1792 | +#endif /* !CONFIG_BLK_INLINE_ENCRYPTION */ |
|---|
| 1830 | 1793 | |
|---|
| 1831 | 1794 | static void cleanup_mapped_device(struct mapped_device *md) |
|---|
| 1832 | 1795 | { |
|---|
| 1833 | 1796 | if (md->wq) |
|---|
| 1834 | 1797 | destroy_workqueue(md->wq); |
|---|
| 1835 | | - if (md->kworker_task) |
|---|
| 1836 | | - kthread_stop(md->kworker_task); |
|---|
| 1837 | 1798 | bioset_exit(&md->bs); |
|---|
| 1838 | 1799 | bioset_exit(&md->io_bs); |
|---|
| 1839 | 1800 | |
|---|
| .. | .. |
|---|
| 1852 | 1813 | } |
|---|
| 1853 | 1814 | |
|---|
| 1854 | 1815 | if (md->queue) { |
|---|
| 1855 | | - dm_destroy_inline_encryption(md->queue); |
|---|
| 1816 | + dm_queue_destroy_keyslot_manager(md->queue); |
|---|
| 1856 | 1817 | blk_cleanup_queue(md->queue); |
|---|
| 1857 | 1818 | } |
|---|
| 1858 | 1819 | |
|---|
| .. | .. |
|---|
| 1866 | 1827 | mutex_destroy(&md->suspend_lock); |
|---|
| 1867 | 1828 | mutex_destroy(&md->type_lock); |
|---|
| 1868 | 1829 | mutex_destroy(&md->table_devices_lock); |
|---|
| 1830 | + mutex_destroy(&md->swap_bios_lock); |
|---|
| 1869 | 1831 | |
|---|
| 1870 | 1832 | dm_mq_cleanup_mapped_device(md); |
|---|
| 1871 | 1833 | } |
|---|
| .. | .. |
|---|
| 1876 | 1838 | static struct mapped_device *alloc_dev(int minor) |
|---|
| 1877 | 1839 | { |
|---|
| 1878 | 1840 | int r, numa_node_id = dm_get_numa_node(); |
|---|
| 1879 | | - struct dax_device *dax_dev = NULL; |
|---|
| 1880 | 1841 | struct mapped_device *md; |
|---|
| 1881 | 1842 | void *old_md; |
|---|
| 1882 | 1843 | |
|---|
| .. | .. |
|---|
| 1902 | 1863 | goto bad_io_barrier; |
|---|
| 1903 | 1864 | |
|---|
| 1904 | 1865 | md->numa_node_id = numa_node_id; |
|---|
| 1905 | | - md->use_blk_mq = dm_use_blk_mq_default(); |
|---|
| 1906 | 1866 | md->init_tio_pdu = false; |
|---|
| 1907 | 1867 | md->type = DM_TYPE_NONE; |
|---|
| 1908 | 1868 | mutex_init(&md->suspend_lock); |
|---|
| .. | .. |
|---|
| 1917 | 1877 | INIT_LIST_HEAD(&md->table_devices); |
|---|
| 1918 | 1878 | spin_lock_init(&md->uevent_lock); |
|---|
| 1919 | 1879 | |
|---|
| 1920 | | - md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id, NULL); |
|---|
| 1880 | + /* |
|---|
| 1881 | + * default to bio-based until DM table is loaded and md->type |
|---|
| 1882 | + * established. If request-based table is loaded: blk-mq will |
|---|
| 1883 | + * override accordingly. |
|---|
| 1884 | + */ |
|---|
| 1885 | + md->queue = blk_alloc_queue(numa_node_id); |
|---|
| 1921 | 1886 | if (!md->queue) |
|---|
| 1922 | 1887 | goto bad; |
|---|
| 1923 | | - md->queue->queuedata = md; |
|---|
| 1924 | | - /* |
|---|
| 1925 | | - * default to bio-based required ->make_request_fn until DM |
|---|
| 1926 | | - * table is loaded and md->type established. If request-based |
|---|
| 1927 | | - * table is loaded: blk-mq will override accordingly. |
|---|
| 1928 | | - */ |
|---|
| 1929 | | - blk_queue_make_request(md->queue, dm_make_request); |
|---|
| 1930 | 1888 | |
|---|
| 1931 | 1889 | md->disk = alloc_disk_node(1, md->numa_node_id); |
|---|
| 1932 | 1890 | if (!md->disk) |
|---|
| 1933 | 1891 | goto bad; |
|---|
| 1934 | 1892 | |
|---|
| 1935 | | - atomic_set(&md->pending[0], 0); |
|---|
| 1936 | | - atomic_set(&md->pending[1], 0); |
|---|
| 1937 | 1893 | init_waitqueue_head(&md->wait); |
|---|
| 1938 | 1894 | INIT_WORK(&md->work, dm_wq_work); |
|---|
| 1939 | 1895 | init_waitqueue_head(&md->eventq); |
|---|
| 1940 | 1896 | init_completion(&md->kobj_holder.completion); |
|---|
| 1941 | | - md->kworker_task = NULL; |
|---|
| 1897 | + |
|---|
| 1898 | + md->swap_bios = get_swap_bios(); |
|---|
| 1899 | + sema_init(&md->swap_bios_semaphore, md->swap_bios); |
|---|
| 1900 | + mutex_init(&md->swap_bios_lock); |
|---|
| 1942 | 1901 | |
|---|
| 1943 | 1902 | md->disk->major = _major; |
|---|
| 1944 | 1903 | md->disk->first_minor = minor; |
|---|
| .. | .. |
|---|
| 1948 | 1907 | sprintf(md->disk->disk_name, "dm-%d", minor); |
|---|
| 1949 | 1908 | |
|---|
| 1950 | 1909 | if (IS_ENABLED(CONFIG_DAX_DRIVER)) { |
|---|
| 1951 | | - dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops); |
|---|
| 1952 | | - if (!dax_dev) |
|---|
| 1910 | + md->dax_dev = alloc_dax(md, md->disk->disk_name, |
|---|
| 1911 | + &dm_dax_ops, 0); |
|---|
| 1912 | + if (IS_ERR(md->dax_dev)) { |
|---|
| 1913 | + md->dax_dev = NULL; |
|---|
| 1953 | 1914 | goto bad; |
|---|
| 1915 | + } |
|---|
| 1954 | 1916 | } |
|---|
| 1955 | | - md->dax_dev = dax_dev; |
|---|
| 1956 | 1917 | |
|---|
| 1957 | 1918 | add_disk_no_queue_reg(md->disk); |
|---|
| 1958 | 1919 | format_dev_t(md->name, MKDEV(_major, minor)); |
|---|
| .. | .. |
|---|
| 1965 | 1926 | if (!md->bdev) |
|---|
| 1966 | 1927 | goto bad; |
|---|
| 1967 | 1928 | |
|---|
| 1968 | | - bio_init(&md->flush_bio, NULL, 0); |
|---|
| 1969 | | - bio_set_dev(&md->flush_bio, md->bdev); |
|---|
| 1970 | | - md->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC; |
|---|
| 1971 | | - |
|---|
| 1972 | | - dm_stats_init(&md->stats); |
|---|
| 1929 | + r = dm_stats_init(&md->stats); |
|---|
| 1930 | + if (r < 0) |
|---|
| 1931 | + goto bad; |
|---|
| 1973 | 1932 | |
|---|
| 1974 | 1933 | /* Populate the mapping, nobody knows we exist yet */ |
|---|
| 1975 | 1934 | spin_lock(&_minor_lock); |
|---|
| .. | .. |
|---|
| 2072 | 2031 | } |
|---|
| 2073 | 2032 | |
|---|
| 2074 | 2033 | /* |
|---|
| 2075 | | - * Protected by md->suspend_lock obtained by dm_swap_table(). |
|---|
| 2076 | | - */ |
|---|
| 2077 | | -static void __set_size(struct mapped_device *md, sector_t size) |
|---|
| 2078 | | -{ |
|---|
| 2079 | | - lockdep_assert_held(&md->suspend_lock); |
|---|
| 2080 | | - |
|---|
| 2081 | | - set_capacity(md->disk, size); |
|---|
| 2082 | | - |
|---|
| 2083 | | - i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT); |
|---|
| 2084 | | -} |
|---|
| 2085 | | - |
|---|
| 2086 | | -/* |
|---|
| 2087 | 2034 | * Returns old map, which caller must destroy. |
|---|
| 2088 | 2035 | */ |
|---|
| 2089 | 2036 | static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, |
|---|
| .. | .. |
|---|
| 2105 | 2052 | if (size != dm_get_size(md)) |
|---|
| 2106 | 2053 | memset(&md->geometry, 0, sizeof(md->geometry)); |
|---|
| 2107 | 2054 | |
|---|
| 2108 | | - __set_size(md, size); |
|---|
| 2055 | + set_capacity(md->disk, size); |
|---|
| 2056 | + bd_set_nr_sectors(md->bdev, size); |
|---|
| 2109 | 2057 | |
|---|
| 2110 | 2058 | dm_table_event_callback(t, event_callback, md); |
|---|
| 2111 | 2059 | |
|---|
| .. | .. |
|---|
| 2119 | 2067 | if (request_based) |
|---|
| 2120 | 2068 | dm_stop_queue(q); |
|---|
| 2121 | 2069 | |
|---|
| 2122 | | - if (request_based || md->type == DM_TYPE_NVME_BIO_BASED) { |
|---|
| 2070 | + if (request_based) { |
|---|
| 2123 | 2071 | /* |
|---|
| 2124 | | - * Leverage the fact that request-based DM targets and |
|---|
| 2125 | | - * NVMe bio based targets are immutable singletons |
|---|
| 2126 | | - * - used to optimize both dm_request_fn and dm_mq_queue_rq; |
|---|
| 2127 | | - * and __process_bio. |
|---|
| 2072 | + * Leverage the fact that request-based DM targets are |
|---|
| 2073 | + * immutable singletons - used to optimize dm_mq_queue_rq. |
|---|
| 2128 | 2074 | */ |
|---|
| 2129 | 2075 | md->immutable_target = dm_table_get_immutable_target(t); |
|---|
| 2130 | 2076 | } |
|---|
| .. | .. |
|---|
| 2227 | 2173 | } |
|---|
| 2228 | 2174 | EXPORT_SYMBOL_GPL(dm_get_queue_limits); |
|---|
| 2229 | 2175 | |
|---|
| 2230 | | -#ifdef CONFIG_BLK_INLINE_ENCRYPTION |
|---|
| 2231 | | -struct dm_keyslot_evict_args { |
|---|
| 2232 | | - const struct blk_crypto_key *key; |
|---|
| 2233 | | - int err; |
|---|
| 2234 | | -}; |
|---|
| 2235 | | - |
|---|
| 2236 | | -static int dm_keyslot_evict_callback(struct dm_target *ti, struct dm_dev *dev, |
|---|
| 2237 | | - sector_t start, sector_t len, void *data) |
|---|
| 2238 | | -{ |
|---|
| 2239 | | - struct dm_keyslot_evict_args *args = data; |
|---|
| 2240 | | - int err; |
|---|
| 2241 | | - |
|---|
| 2242 | | - err = blk_crypto_evict_key(dev->bdev->bd_queue, args->key); |
|---|
| 2243 | | - if (!args->err) |
|---|
| 2244 | | - args->err = err; |
|---|
| 2245 | | - /* Always try to evict the key from all devices. */ |
|---|
| 2246 | | - return 0; |
|---|
| 2247 | | -} |
|---|
| 2248 | | - |
|---|
| 2249 | | -/* |
|---|
| 2250 | | - * When an inline encryption key is evicted from a device-mapper device, evict |
|---|
| 2251 | | - * it from all the underlying devices. |
|---|
| 2252 | | - */ |
|---|
| 2253 | | -static int dm_keyslot_evict(struct keyslot_manager *ksm, |
|---|
| 2254 | | - const struct blk_crypto_key *key, unsigned int slot) |
|---|
| 2255 | | -{ |
|---|
| 2256 | | - struct mapped_device *md = keyslot_manager_private(ksm); |
|---|
| 2257 | | - struct dm_keyslot_evict_args args = { key }; |
|---|
| 2258 | | - struct dm_table *t; |
|---|
| 2259 | | - int srcu_idx; |
|---|
| 2260 | | - int i; |
|---|
| 2261 | | - struct dm_target *ti; |
|---|
| 2262 | | - |
|---|
| 2263 | | - t = dm_get_live_table(md, &srcu_idx); |
|---|
| 2264 | | - if (!t) |
|---|
| 2265 | | - return 0; |
|---|
| 2266 | | - for (i = 0; i < dm_table_get_num_targets(t); i++) { |
|---|
| 2267 | | - ti = dm_table_get_target(t, i); |
|---|
| 2268 | | - if (!ti->type->iterate_devices) |
|---|
| 2269 | | - continue; |
|---|
| 2270 | | - ti->type->iterate_devices(ti, dm_keyslot_evict_callback, &args); |
|---|
| 2271 | | - } |
|---|
| 2272 | | - dm_put_live_table(md, srcu_idx); |
|---|
| 2273 | | - return args.err; |
|---|
| 2274 | | -} |
|---|
| 2275 | | - |
|---|
| 2276 | | -struct dm_derive_raw_secret_args { |
|---|
| 2277 | | - const u8 *wrapped_key; |
|---|
| 2278 | | - unsigned int wrapped_key_size; |
|---|
| 2279 | | - u8 *secret; |
|---|
| 2280 | | - unsigned int secret_size; |
|---|
| 2281 | | - int err; |
|---|
| 2282 | | -}; |
|---|
| 2283 | | - |
|---|
| 2284 | | -static int dm_derive_raw_secret_callback(struct dm_target *ti, |
|---|
| 2285 | | - struct dm_dev *dev, sector_t start, |
|---|
| 2286 | | - sector_t len, void *data) |
|---|
| 2287 | | -{ |
|---|
| 2288 | | - struct dm_derive_raw_secret_args *args = data; |
|---|
| 2289 | | - struct request_queue *q = dev->bdev->bd_queue; |
|---|
| 2290 | | - |
|---|
| 2291 | | - if (!args->err) |
|---|
| 2292 | | - return 0; |
|---|
| 2293 | | - |
|---|
| 2294 | | - if (!q->ksm) { |
|---|
| 2295 | | - args->err = -EOPNOTSUPP; |
|---|
| 2296 | | - return 0; |
|---|
| 2297 | | - } |
|---|
| 2298 | | - |
|---|
| 2299 | | - args->err = keyslot_manager_derive_raw_secret(q->ksm, args->wrapped_key, |
|---|
| 2300 | | - args->wrapped_key_size, |
|---|
| 2301 | | - args->secret, |
|---|
| 2302 | | - args->secret_size); |
|---|
| 2303 | | - /* Try another device in case this fails. */ |
|---|
| 2304 | | - return 0; |
|---|
| 2305 | | -} |
|---|
| 2306 | | - |
|---|
| 2307 | | -/* |
|---|
| 2308 | | - * Retrieve the raw_secret from the underlying device. Given that |
|---|
| 2309 | | - * only only one raw_secret can exist for a particular wrappedkey, |
|---|
| 2310 | | - * retrieve it only from the first device that supports derive_raw_secret() |
|---|
| 2311 | | - */ |
|---|
| 2312 | | -static int dm_derive_raw_secret(struct keyslot_manager *ksm, |
|---|
| 2313 | | - const u8 *wrapped_key, |
|---|
| 2314 | | - unsigned int wrapped_key_size, |
|---|
| 2315 | | - u8 *secret, unsigned int secret_size) |
|---|
| 2316 | | -{ |
|---|
| 2317 | | - struct mapped_device *md = keyslot_manager_private(ksm); |
|---|
| 2318 | | - struct dm_derive_raw_secret_args args = { |
|---|
| 2319 | | - .wrapped_key = wrapped_key, |
|---|
| 2320 | | - .wrapped_key_size = wrapped_key_size, |
|---|
| 2321 | | - .secret = secret, |
|---|
| 2322 | | - .secret_size = secret_size, |
|---|
| 2323 | | - .err = -EOPNOTSUPP, |
|---|
| 2324 | | - }; |
|---|
| 2325 | | - struct dm_table *t; |
|---|
| 2326 | | - int srcu_idx; |
|---|
| 2327 | | - int i; |
|---|
| 2328 | | - struct dm_target *ti; |
|---|
| 2329 | | - |
|---|
| 2330 | | - t = dm_get_live_table(md, &srcu_idx); |
|---|
| 2331 | | - if (!t) |
|---|
| 2332 | | - return -EOPNOTSUPP; |
|---|
| 2333 | | - for (i = 0; i < dm_table_get_num_targets(t); i++) { |
|---|
| 2334 | | - ti = dm_table_get_target(t, i); |
|---|
| 2335 | | - if (!ti->type->iterate_devices) |
|---|
| 2336 | | - continue; |
|---|
| 2337 | | - ti->type->iterate_devices(ti, dm_derive_raw_secret_callback, |
|---|
| 2338 | | - &args); |
|---|
| 2339 | | - if (!args.err) |
|---|
| 2340 | | - break; |
|---|
| 2341 | | - } |
|---|
| 2342 | | - dm_put_live_table(md, srcu_idx); |
|---|
| 2343 | | - return args.err; |
|---|
| 2344 | | -} |
|---|
| 2345 | | - |
|---|
| 2346 | | -static struct keyslot_mgmt_ll_ops dm_ksm_ll_ops = { |
|---|
| 2347 | | - .keyslot_evict = dm_keyslot_evict, |
|---|
| 2348 | | - .derive_raw_secret = dm_derive_raw_secret, |
|---|
| 2349 | | -}; |
|---|
| 2350 | | - |
|---|
| 2351 | | -static int dm_init_inline_encryption(struct mapped_device *md) |
|---|
| 2352 | | -{ |
|---|
| 2353 | | - unsigned int features; |
|---|
| 2354 | | - unsigned int mode_masks[BLK_ENCRYPTION_MODE_MAX]; |
|---|
| 2355 | | - |
|---|
| 2356 | | - /* |
|---|
| 2357 | | - * Initially declare support for all crypto settings. Anything |
|---|
| 2358 | | - * unsupported by a child device will be removed later when calculating |
|---|
| 2359 | | - * the device restrictions. |
|---|
| 2360 | | - */ |
|---|
| 2361 | | - features = BLK_CRYPTO_FEATURE_STANDARD_KEYS | |
|---|
| 2362 | | - BLK_CRYPTO_FEATURE_WRAPPED_KEYS; |
|---|
| 2363 | | - memset(mode_masks, 0xFF, sizeof(mode_masks)); |
|---|
| 2364 | | - |
|---|
| 2365 | | - md->queue->ksm = keyslot_manager_create_passthrough(NULL, |
|---|
| 2366 | | - &dm_ksm_ll_ops, |
|---|
| 2367 | | - features, |
|---|
| 2368 | | - mode_masks, md); |
|---|
| 2369 | | - if (!md->queue->ksm) |
|---|
| 2370 | | - return -ENOMEM; |
|---|
| 2371 | | - return 0; |
|---|
| 2372 | | -} |
|---|
| 2373 | | - |
|---|
| 2374 | | -static void dm_destroy_inline_encryption(struct request_queue *q) |
|---|
| 2375 | | -{ |
|---|
| 2376 | | - keyslot_manager_destroy(q->ksm); |
|---|
| 2377 | | - q->ksm = NULL; |
|---|
| 2378 | | -} |
|---|
| 2379 | | -#else /* CONFIG_BLK_INLINE_ENCRYPTION */ |
|---|
| 2380 | | -static inline int dm_init_inline_encryption(struct mapped_device *md) |
|---|
| 2381 | | -{ |
|---|
| 2382 | | - return 0; |
|---|
| 2383 | | -} |
|---|
| 2384 | | - |
|---|
| 2385 | | -static inline void dm_destroy_inline_encryption(struct request_queue *q) |
|---|
| 2386 | | -{ |
|---|
| 2387 | | -} |
|---|
| 2388 | | -#endif /* !CONFIG_BLK_INLINE_ENCRYPTION */ |
|---|
| 2389 | | - |
|---|
| 2390 | 2176 | /* |
|---|
| 2391 | 2177 | * Setup the DM device's queue based on md's type |
|---|
| 2392 | 2178 | */ |
|---|
| .. | .. |
|---|
| 2398 | 2184 | |
|---|
| 2399 | 2185 | switch (type) { |
|---|
| 2400 | 2186 | case DM_TYPE_REQUEST_BASED: |
|---|
| 2401 | | - dm_init_normal_md_queue(md); |
|---|
| 2402 | | - r = dm_old_init_request_queue(md, t); |
|---|
| 2403 | | - if (r) { |
|---|
| 2404 | | - DMERR("Cannot initialize queue for request-based mapped device"); |
|---|
| 2405 | | - return r; |
|---|
| 2406 | | - } |
|---|
| 2407 | | - break; |
|---|
| 2408 | | - case DM_TYPE_MQ_REQUEST_BASED: |
|---|
| 2187 | + md->disk->fops = &dm_rq_blk_dops; |
|---|
| 2409 | 2188 | r = dm_mq_init_request_queue(md, t); |
|---|
| 2410 | 2189 | if (r) { |
|---|
| 2411 | | - DMERR("Cannot initialize queue for request-based dm-mq mapped device"); |
|---|
| 2190 | + DMERR("Cannot initialize queue for request-based dm mapped device"); |
|---|
| 2412 | 2191 | return r; |
|---|
| 2413 | 2192 | } |
|---|
| 2414 | 2193 | break; |
|---|
| 2415 | 2194 | case DM_TYPE_BIO_BASED: |
|---|
| 2416 | 2195 | case DM_TYPE_DAX_BIO_BASED: |
|---|
| 2417 | | - dm_init_normal_md_queue(md); |
|---|
| 2418 | | - break; |
|---|
| 2419 | | - case DM_TYPE_NVME_BIO_BASED: |
|---|
| 2420 | | - dm_init_normal_md_queue(md); |
|---|
| 2421 | | - blk_queue_make_request(md->queue, dm_make_request_nvme); |
|---|
| 2422 | 2196 | break; |
|---|
| 2423 | 2197 | case DM_TYPE_NONE: |
|---|
| 2424 | 2198 | WARN_ON_ONCE(true); |
|---|
| .. | .. |
|---|
| 2430 | 2204 | DMERR("Cannot calculate initial queue limits"); |
|---|
| 2431 | 2205 | return r; |
|---|
| 2432 | 2206 | } |
|---|
| 2433 | | - |
|---|
| 2434 | | - r = dm_init_inline_encryption(md); |
|---|
| 2435 | | - if (r) { |
|---|
| 2436 | | - DMERR("Cannot initialize inline encryption"); |
|---|
| 2437 | | - return r; |
|---|
| 2438 | | - } |
|---|
| 2439 | | - |
|---|
| 2440 | 2207 | dm_table_set_restrictions(t, md->queue, &limits); |
|---|
| 2441 | 2208 | blk_register_queue(md->disk); |
|---|
| 2442 | 2209 | |
|---|
| .. | .. |
|---|
| 2516 | 2283 | |
|---|
| 2517 | 2284 | blk_set_queue_dying(md->queue); |
|---|
| 2518 | 2285 | |
|---|
| 2519 | | - if (dm_request_based(md) && md->kworker_task) |
|---|
| 2520 | | - kthread_flush_worker(&md->kworker); |
|---|
| 2521 | | - |
|---|
| 2522 | 2286 | /* |
|---|
| 2523 | 2287 | * Take suspend_lock so that presuspend and postsuspend methods |
|---|
| 2524 | 2288 | * do not race with internal suspend. |
|---|
| .. | .. |
|---|
| 2569 | 2333 | } |
|---|
| 2570 | 2334 | EXPORT_SYMBOL_GPL(dm_put); |
|---|
| 2571 | 2335 | |
|---|
| 2572 | | -static int dm_wait_for_completion(struct mapped_device *md, long task_state) |
|---|
| 2336 | +static bool md_in_flight_bios(struct mapped_device *md) |
|---|
| 2337 | +{ |
|---|
| 2338 | + int cpu; |
|---|
| 2339 | + struct hd_struct *part = &dm_disk(md)->part0; |
|---|
| 2340 | + long sum = 0; |
|---|
| 2341 | + |
|---|
| 2342 | + for_each_possible_cpu(cpu) { |
|---|
| 2343 | + sum += part_stat_local_read_cpu(part, in_flight[0], cpu); |
|---|
| 2344 | + sum += part_stat_local_read_cpu(part, in_flight[1], cpu); |
|---|
| 2345 | + } |
|---|
| 2346 | + |
|---|
| 2347 | + return sum != 0; |
|---|
| 2348 | +} |
|---|
| 2349 | + |
|---|
| 2350 | +static int dm_wait_for_bios_completion(struct mapped_device *md, long task_state) |
|---|
| 2573 | 2351 | { |
|---|
| 2574 | 2352 | int r = 0; |
|---|
| 2575 | 2353 | DEFINE_WAIT(wait); |
|---|
| 2576 | 2354 | |
|---|
| 2577 | | - while (1) { |
|---|
| 2355 | + while (true) { |
|---|
| 2578 | 2356 | prepare_to_wait(&md->wait, &wait, task_state); |
|---|
| 2579 | 2357 | |
|---|
| 2580 | | - if (!md_in_flight(md)) |
|---|
| 2358 | + if (!md_in_flight_bios(md)) |
|---|
| 2581 | 2359 | break; |
|---|
| 2582 | 2360 | |
|---|
| 2583 | 2361 | if (signal_pending_state(task_state, current)) { |
|---|
| .. | .. |
|---|
| 2589 | 2367 | } |
|---|
| 2590 | 2368 | finish_wait(&md->wait, &wait); |
|---|
| 2591 | 2369 | |
|---|
| 2370 | + smp_rmb(); |
|---|
| 2371 | + |
|---|
| 2372 | + return r; |
|---|
| 2373 | +} |
|---|
| 2374 | + |
|---|
| 2375 | +static int dm_wait_for_completion(struct mapped_device *md, long task_state) |
|---|
| 2376 | +{ |
|---|
| 2377 | + int r = 0; |
|---|
| 2378 | + |
|---|
| 2379 | + if (!queue_is_mq(md->queue)) |
|---|
| 2380 | + return dm_wait_for_bios_completion(md, task_state); |
|---|
| 2381 | + |
|---|
| 2382 | + while (true) { |
|---|
| 2383 | + if (!blk_mq_queue_inflight(md->queue)) |
|---|
| 2384 | + break; |
|---|
| 2385 | + |
|---|
| 2386 | + if (signal_pending_state(task_state, current)) { |
|---|
| 2387 | + r = -EINTR; |
|---|
| 2388 | + break; |
|---|
| 2389 | + } |
|---|
| 2390 | + |
|---|
| 2391 | + msleep(5); |
|---|
| 2392 | + } |
|---|
| 2393 | + |
|---|
| 2592 | 2394 | return r; |
|---|
| 2593 | 2395 | } |
|---|
| 2594 | 2396 | |
|---|
| .. | .. |
|---|
| 2597 | 2399 | */ |
|---|
| 2598 | 2400 | static void dm_wq_work(struct work_struct *work) |
|---|
| 2599 | 2401 | { |
|---|
| 2600 | | - struct mapped_device *md = container_of(work, struct mapped_device, |
|---|
| 2601 | | - work); |
|---|
| 2602 | | - struct bio *c; |
|---|
| 2603 | | - int srcu_idx; |
|---|
| 2604 | | - struct dm_table *map; |
|---|
| 2605 | | - |
|---|
| 2606 | | - map = dm_get_live_table(md, &srcu_idx); |
|---|
| 2402 | + struct mapped_device *md = container_of(work, struct mapped_device, work); |
|---|
| 2403 | + struct bio *bio; |
|---|
| 2607 | 2404 | |
|---|
| 2608 | 2405 | while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { |
|---|
| 2609 | 2406 | spin_lock_irq(&md->deferred_lock); |
|---|
| 2610 | | - c = bio_list_pop(&md->deferred); |
|---|
| 2407 | + bio = bio_list_pop(&md->deferred); |
|---|
| 2611 | 2408 | spin_unlock_irq(&md->deferred_lock); |
|---|
| 2612 | 2409 | |
|---|
| 2613 | | - if (!c) |
|---|
| 2410 | + if (!bio) |
|---|
| 2614 | 2411 | break; |
|---|
| 2615 | 2412 | |
|---|
| 2616 | | - if (dm_request_based(md)) |
|---|
| 2617 | | - generic_make_request(c); |
|---|
| 2618 | | - else |
|---|
| 2619 | | - __split_and_process_bio(md, map, c); |
|---|
| 2413 | + submit_bio_noacct(bio); |
|---|
| 2414 | + cond_resched(); |
|---|
| 2620 | 2415 | } |
|---|
| 2621 | | - |
|---|
| 2622 | | - dm_put_live_table(md, srcu_idx); |
|---|
| 2623 | 2416 | } |
|---|
| 2624 | 2417 | |
|---|
| 2625 | 2418 | static void dm_queue_flush(struct mapped_device *md) |
|---|
| .. | .. |
|---|
| 2681 | 2474 | { |
|---|
| 2682 | 2475 | int r; |
|---|
| 2683 | 2476 | |
|---|
| 2684 | | - WARN_ON(md->frozen_sb); |
|---|
| 2477 | + WARN_ON(test_bit(DMF_FROZEN, &md->flags)); |
|---|
| 2685 | 2478 | |
|---|
| 2686 | | - md->frozen_sb = freeze_bdev(md->bdev); |
|---|
| 2687 | | - if (IS_ERR(md->frozen_sb)) { |
|---|
| 2688 | | - r = PTR_ERR(md->frozen_sb); |
|---|
| 2689 | | - md->frozen_sb = NULL; |
|---|
| 2690 | | - return r; |
|---|
| 2691 | | - } |
|---|
| 2692 | | - |
|---|
| 2693 | | - set_bit(DMF_FROZEN, &md->flags); |
|---|
| 2694 | | - |
|---|
| 2695 | | - return 0; |
|---|
| 2479 | + r = freeze_bdev(md->bdev); |
|---|
| 2480 | + if (!r) |
|---|
| 2481 | + set_bit(DMF_FROZEN, &md->flags); |
|---|
| 2482 | + return r; |
|---|
| 2696 | 2483 | } |
|---|
| 2697 | 2484 | |
|---|
| 2698 | 2485 | static void unlock_fs(struct mapped_device *md) |
|---|
| 2699 | 2486 | { |
|---|
| 2700 | 2487 | if (!test_bit(DMF_FROZEN, &md->flags)) |
|---|
| 2701 | 2488 | return; |
|---|
| 2702 | | - |
|---|
| 2703 | | - thaw_bdev(md->bdev, md->frozen_sb); |
|---|
| 2704 | | - md->frozen_sb = NULL; |
|---|
| 2489 | + thaw_bdev(md->bdev); |
|---|
| 2705 | 2490 | clear_bit(DMF_FROZEN, &md->flags); |
|---|
| 2706 | 2491 | } |
|---|
| 2707 | 2492 | |
|---|
| .. | .. |
|---|
| 2731 | 2516 | if (noflush) |
|---|
| 2732 | 2517 | set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); |
|---|
| 2733 | 2518 | else |
|---|
| 2734 | | - pr_debug("%s: suspending with flush\n", dm_device_name(md)); |
|---|
| 2519 | + DMDEBUG("%s: suspending with flush", dm_device_name(md)); |
|---|
| 2735 | 2520 | |
|---|
| 2736 | 2521 | /* |
|---|
| 2737 | 2522 | * This gets reverted if there's an error later and the targets |
|---|
| .. | .. |
|---|
| 2756 | 2541 | /* |
|---|
| 2757 | 2542 | * Here we must make sure that no processes are submitting requests |
|---|
| 2758 | 2543 | * to target drivers i.e. no one may be executing |
|---|
| 2759 | | - * __split_and_process_bio. This is called from dm_request and |
|---|
| 2760 | | - * dm_wq_work. |
|---|
| 2544 | + * __split_and_process_bio from dm_submit_bio. |
|---|
| 2761 | 2545 | * |
|---|
| 2762 | | - * To get all processes out of __split_and_process_bio in dm_request, |
|---|
| 2546 | + * To get all processes out of __split_and_process_bio in dm_submit_bio, |
|---|
| 2763 | 2547 | * we take the write lock. To prevent any process from reentering |
|---|
| 2764 | | - * __split_and_process_bio from dm_request and quiesce the thread |
|---|
| 2765 | | - * (dm_wq_work), we set BMF_BLOCK_IO_FOR_SUSPEND and call |
|---|
| 2548 | + * __split_and_process_bio from dm_submit_bio and quiesce the thread |
|---|
| 2549 | + * (dm_wq_work), we set DMF_BLOCK_IO_FOR_SUSPEND and call |
|---|
| 2766 | 2550 | * flush_workqueue(md->wq). |
|---|
| 2767 | 2551 | */ |
|---|
| 2768 | 2552 | set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); |
|---|
| .. | .. |
|---|
| 2773 | 2557 | * Stop md->queue before flushing md->wq in case request-based |
|---|
| 2774 | 2558 | * dm defers requests to md->wq from md->queue. |
|---|
| 2775 | 2559 | */ |
|---|
| 2776 | | - if (dm_request_based(md)) { |
|---|
| 2560 | + if (dm_request_based(md)) |
|---|
| 2777 | 2561 | dm_stop_queue(md->queue); |
|---|
| 2778 | | - if (md->kworker_task) |
|---|
| 2779 | | - kthread_flush_worker(&md->kworker); |
|---|
| 2780 | | - } |
|---|
| 2781 | 2562 | |
|---|
| 2782 | 2563 | flush_workqueue(md->wq); |
|---|
| 2783 | 2564 | |
|---|
| .. | .. |
|---|
| 3133 | 2914 | |
|---|
| 3134 | 2915 | int dm_suspended(struct dm_target *ti) |
|---|
| 3135 | 2916 | { |
|---|
| 3136 | | - return dm_suspended_md(dm_table_get_md(ti->table)); |
|---|
| 2917 | + return dm_suspended_md(ti->table->md); |
|---|
| 3137 | 2918 | } |
|---|
| 3138 | 2919 | EXPORT_SYMBOL_GPL(dm_suspended); |
|---|
| 3139 | 2920 | |
|---|
| 3140 | 2921 | int dm_post_suspending(struct dm_target *ti) |
|---|
| 3141 | 2922 | { |
|---|
| 3142 | | - return dm_post_suspending_md(dm_table_get_md(ti->table)); |
|---|
| 2923 | + return dm_post_suspending_md(ti->table->md); |
|---|
| 3143 | 2924 | } |
|---|
| 3144 | 2925 | EXPORT_SYMBOL_GPL(dm_post_suspending); |
|---|
| 3145 | 2926 | |
|---|
| 3146 | 2927 | int dm_noflush_suspending(struct dm_target *ti) |
|---|
| 3147 | 2928 | { |
|---|
| 3148 | | - return __noflush_suspending(dm_table_get_md(ti->table)); |
|---|
| 2929 | + return __noflush_suspending(ti->table->md); |
|---|
| 3149 | 2930 | } |
|---|
| 3150 | 2931 | EXPORT_SYMBOL_GPL(dm_noflush_suspending); |
|---|
| 3151 | 2932 | |
|---|
| .. | .. |
|---|
| 3164 | 2945 | switch (type) { |
|---|
| 3165 | 2946 | case DM_TYPE_BIO_BASED: |
|---|
| 3166 | 2947 | case DM_TYPE_DAX_BIO_BASED: |
|---|
| 3167 | | - case DM_TYPE_NVME_BIO_BASED: |
|---|
| 3168 | 2948 | pool_size = max(dm_get_reserved_bio_based_ios(), min_pool_size); |
|---|
| 3169 | 2949 | front_pad = roundup(per_io_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone); |
|---|
| 3170 | 2950 | io_front_pad = roundup(front_pad, __alignof__(struct dm_io)) + offsetof(struct dm_io, tio); |
|---|
| .. | .. |
|---|
| 3175 | 2955 | goto out; |
|---|
| 3176 | 2956 | break; |
|---|
| 3177 | 2957 | case DM_TYPE_REQUEST_BASED: |
|---|
| 3178 | | - case DM_TYPE_MQ_REQUEST_BASED: |
|---|
| 3179 | 2958 | pool_size = max(dm_get_reserved_rq_based_ios(), min_pool_size); |
|---|
| 3180 | 2959 | front_pad = offsetof(struct dm_rq_clone_bio_info, clone); |
|---|
| 3181 | 2960 | /* per_io_data_size is used for blk-mq pdu at queue allocation */ |
|---|
| .. | .. |
|---|
| 3233 | 3012 | if (dm_table_get_num_targets(table) != 1) |
|---|
| 3234 | 3013 | goto out; |
|---|
| 3235 | 3014 | ti = dm_table_get_target(table, 0); |
|---|
| 3015 | + |
|---|
| 3016 | + if (dm_suspended_md(md)) { |
|---|
| 3017 | + ret = -EAGAIN; |
|---|
| 3018 | + goto out; |
|---|
| 3019 | + } |
|---|
| 3236 | 3020 | |
|---|
| 3237 | 3021 | ret = -EINVAL; |
|---|
| 3238 | 3022 | if (!ti->type->iterate_devices) |
|---|
| .. | .. |
|---|
| 3373 | 3157 | }; |
|---|
| 3374 | 3158 | |
|---|
| 3375 | 3159 | static const struct block_device_operations dm_blk_dops = { |
|---|
| 3160 | + .submit_bio = dm_submit_bio, |
|---|
| 3161 | + .open = dm_blk_open, |
|---|
| 3162 | + .release = dm_blk_close, |
|---|
| 3163 | + .ioctl = dm_blk_ioctl, |
|---|
| 3164 | + .getgeo = dm_blk_getgeo, |
|---|
| 3165 | + .report_zones = dm_blk_report_zones, |
|---|
| 3166 | + .pr_ops = &dm_pr_ops, |
|---|
| 3167 | + .owner = THIS_MODULE |
|---|
| 3168 | +}; |
|---|
| 3169 | + |
|---|
| 3170 | +static const struct block_device_operations dm_rq_blk_dops = { |
|---|
| 3376 | 3171 | .open = dm_blk_open, |
|---|
| 3377 | 3172 | .release = dm_blk_close, |
|---|
| 3378 | 3173 | .ioctl = dm_blk_ioctl, |
|---|
| .. | .. |
|---|
| 3383 | 3178 | |
|---|
| 3384 | 3179 | static const struct dax_operations dm_dax_ops = { |
|---|
| 3385 | 3180 | .direct_access = dm_dax_direct_access, |
|---|
| 3181 | + .dax_supported = dm_dax_supported, |
|---|
| 3386 | 3182 | .copy_from_iter = dm_dax_copy_from_iter, |
|---|
| 3387 | 3183 | .copy_to_iter = dm_dax_copy_to_iter, |
|---|
| 3184 | + .zero_page_range = dm_dax_zero_page_range, |
|---|
| 3388 | 3185 | }; |
|---|
| 3389 | 3186 | |
|---|
| 3390 | 3187 | /* |
|---|
| .. | .. |
|---|
| 3402 | 3199 | module_param(dm_numa_node, int, S_IRUGO | S_IWUSR); |
|---|
| 3403 | 3200 | MODULE_PARM_DESC(dm_numa_node, "NUMA node for DM device memory allocations"); |
|---|
| 3404 | 3201 | |
|---|
| 3202 | +module_param(swap_bios, int, S_IRUGO | S_IWUSR); |
|---|
| 3203 | +MODULE_PARM_DESC(swap_bios, "Maximum allowed inflight swap IOs"); |
|---|
| 3204 | + |
|---|
| 3405 | 3205 | MODULE_DESCRIPTION(DM_NAME " driver"); |
|---|
| 3406 | 3206 | MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); |
|---|
| 3407 | 3207 | MODULE_LICENSE("GPL"); |
|---|