| .. | .. |
|---|
| 1 | 1 | /* |
|---|
| 2 | | - * dm-snapshot.c |
|---|
| 3 | | - * |
|---|
| 4 | 2 | * Copyright (C) 2001-2002 Sistina Software (UK) Limited. |
|---|
| 5 | 3 | * |
|---|
| 6 | 4 | * This file is released under the GPL. |
|---|
| .. | .. |
|---|
| 13 | 11 | #include <linux/init.h> |
|---|
| 14 | 12 | #include <linux/kdev_t.h> |
|---|
| 15 | 13 | #include <linux/list.h> |
|---|
| 14 | +#include <linux/list_bl.h> |
|---|
| 16 | 15 | #include <linux/mempool.h> |
|---|
| 17 | 16 | #include <linux/module.h> |
|---|
| 18 | 17 | #include <linux/slab.h> |
|---|
| .. | .. |
|---|
| 43 | 42 | struct dm_exception_table { |
|---|
| 44 | 43 | uint32_t hash_mask; |
|---|
| 45 | 44 | unsigned hash_shift; |
|---|
| 46 | | - struct list_head *table; |
|---|
| 45 | + struct hlist_bl_head *table; |
|---|
| 47 | 46 | }; |
|---|
| 48 | 47 | |
|---|
| 49 | 48 | struct dm_snapshot { |
|---|
| 50 | | - struct mutex lock; |
|---|
| 49 | + struct rw_semaphore lock; |
|---|
| 51 | 50 | |
|---|
| 52 | 51 | struct dm_dev *origin; |
|---|
| 53 | 52 | struct dm_dev *cow; |
|---|
| .. | .. |
|---|
| 75 | 74 | |
|---|
| 76 | 75 | atomic_t pending_exceptions_count; |
|---|
| 77 | 76 | |
|---|
| 78 | | - /* Protected by "lock" */ |
|---|
| 77 | + spinlock_t pe_allocation_lock; |
|---|
| 78 | + |
|---|
| 79 | + /* Protected by "pe_allocation_lock" */ |
|---|
| 79 | 80 | sector_t exception_start_sequence; |
|---|
| 80 | 81 | |
|---|
| 81 | 82 | /* Protected by kcopyd single-threaded callback */ |
|---|
| .. | .. |
|---|
| 130 | 131 | * - I/O error while merging |
|---|
| 131 | 132 | * => stop merging; set merge_failed; process I/O normally. |
|---|
| 132 | 133 | */ |
|---|
| 133 | | - int merge_failed; |
|---|
| 134 | + bool merge_failed:1; |
|---|
| 135 | + |
|---|
| 136 | + bool discard_zeroes_cow:1; |
|---|
| 137 | + bool discard_passdown_origin:1; |
|---|
| 134 | 138 | |
|---|
| 135 | 139 | /* |
|---|
| 136 | 140 | * Incoming bios that overlap with chunks being merged must wait |
|---|
| .. | .. |
|---|
| 461 | 465 | if (!bdev_equal(s->cow->bdev, snap->cow->bdev)) |
|---|
| 462 | 466 | continue; |
|---|
| 463 | 467 | |
|---|
| 464 | | - mutex_lock(&s->lock); |
|---|
| 468 | + down_read(&s->lock); |
|---|
| 465 | 469 | active = s->active; |
|---|
| 466 | | - mutex_unlock(&s->lock); |
|---|
| 470 | + up_read(&s->lock); |
|---|
| 467 | 471 | |
|---|
| 468 | 472 | if (active) { |
|---|
| 469 | 473 | if (snap_src) |
|---|
| .. | .. |
|---|
| 622 | 626 | * The lowest hash_shift bits of the chunk number are ignored, allowing |
|---|
| 623 | 627 | * some consecutive chunks to be grouped together. |
|---|
| 624 | 628 | */ |
|---|
| 629 | +static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk); |
|---|
| 630 | + |
|---|
| 631 | +/* Lock to protect access to the completed and pending exception hash tables. */ |
|---|
| 632 | +struct dm_exception_table_lock { |
|---|
| 633 | + struct hlist_bl_head *complete_slot; |
|---|
| 634 | + struct hlist_bl_head *pending_slot; |
|---|
| 635 | +}; |
|---|
| 636 | + |
|---|
| 637 | +static void dm_exception_table_lock_init(struct dm_snapshot *s, chunk_t chunk, |
|---|
| 638 | + struct dm_exception_table_lock *lock) |
|---|
| 639 | +{ |
|---|
| 640 | + struct dm_exception_table *complete = &s->complete; |
|---|
| 641 | + struct dm_exception_table *pending = &s->pending; |
|---|
| 642 | + |
|---|
| 643 | + lock->complete_slot = &complete->table[exception_hash(complete, chunk)]; |
|---|
| 644 | + lock->pending_slot = &pending->table[exception_hash(pending, chunk)]; |
|---|
| 645 | +} |
|---|
| 646 | + |
|---|
| 647 | +static void dm_exception_table_lock(struct dm_exception_table_lock *lock) |
|---|
| 648 | +{ |
|---|
| 649 | + hlist_bl_lock(lock->complete_slot); |
|---|
| 650 | + hlist_bl_lock(lock->pending_slot); |
|---|
| 651 | +} |
|---|
| 652 | + |
|---|
| 653 | +static void dm_exception_table_unlock(struct dm_exception_table_lock *lock) |
|---|
| 654 | +{ |
|---|
| 655 | + hlist_bl_unlock(lock->pending_slot); |
|---|
| 656 | + hlist_bl_unlock(lock->complete_slot); |
|---|
| 657 | +} |
|---|
| 658 | + |
|---|
| 625 | 659 | static int dm_exception_table_init(struct dm_exception_table *et, |
|---|
| 626 | 660 | uint32_t size, unsigned hash_shift) |
|---|
| 627 | 661 | { |
|---|
| .. | .. |
|---|
| 629 | 663 | |
|---|
| 630 | 664 | et->hash_shift = hash_shift; |
|---|
| 631 | 665 | et->hash_mask = size - 1; |
|---|
| 632 | | - et->table = dm_vcalloc(size, sizeof(struct list_head)); |
|---|
| 666 | + et->table = dm_vcalloc(size, sizeof(struct hlist_bl_head)); |
|---|
| 633 | 667 | if (!et->table) |
|---|
| 634 | 668 | return -ENOMEM; |
|---|
| 635 | 669 | |
|---|
| 636 | 670 | for (i = 0; i < size; i++) |
|---|
| 637 | | - INIT_LIST_HEAD(et->table + i); |
|---|
| 671 | + INIT_HLIST_BL_HEAD(et->table + i); |
|---|
| 638 | 672 | |
|---|
| 639 | 673 | return 0; |
|---|
| 640 | 674 | } |
|---|
| .. | .. |
|---|
| 642 | 676 | static void dm_exception_table_exit(struct dm_exception_table *et, |
|---|
| 643 | 677 | struct kmem_cache *mem) |
|---|
| 644 | 678 | { |
|---|
| 645 | | - struct list_head *slot; |
|---|
| 646 | | - struct dm_exception *ex, *next; |
|---|
| 679 | + struct hlist_bl_head *slot; |
|---|
| 680 | + struct dm_exception *ex; |
|---|
| 681 | + struct hlist_bl_node *pos, *n; |
|---|
| 647 | 682 | int i, size; |
|---|
| 648 | 683 | |
|---|
| 649 | 684 | size = et->hash_mask + 1; |
|---|
| 650 | 685 | for (i = 0; i < size; i++) { |
|---|
| 651 | 686 | slot = et->table + i; |
|---|
| 652 | 687 | |
|---|
| 653 | | - list_for_each_entry_safe (ex, next, slot, hash_list) |
|---|
| 688 | + hlist_bl_for_each_entry_safe(ex, pos, n, slot, hash_list) |
|---|
| 654 | 689 | kmem_cache_free(mem, ex); |
|---|
| 655 | 690 | } |
|---|
| 656 | 691 | |
|---|
| .. | .. |
|---|
| 664 | 699 | |
|---|
| 665 | 700 | static void dm_remove_exception(struct dm_exception *e) |
|---|
| 666 | 701 | { |
|---|
| 667 | | - list_del(&e->hash_list); |
|---|
| 702 | + hlist_bl_del(&e->hash_list); |
|---|
| 668 | 703 | } |
|---|
| 669 | 704 | |
|---|
| 670 | 705 | /* |
|---|
| .. | .. |
|---|
| 674 | 709 | static struct dm_exception *dm_lookup_exception(struct dm_exception_table *et, |
|---|
| 675 | 710 | chunk_t chunk) |
|---|
| 676 | 711 | { |
|---|
| 677 | | - struct list_head *slot; |
|---|
| 712 | + struct hlist_bl_head *slot; |
|---|
| 713 | + struct hlist_bl_node *pos; |
|---|
| 678 | 714 | struct dm_exception *e; |
|---|
| 679 | 715 | |
|---|
| 680 | 716 | slot = &et->table[exception_hash(et, chunk)]; |
|---|
| 681 | | - list_for_each_entry (e, slot, hash_list) |
|---|
| 717 | + hlist_bl_for_each_entry(e, pos, slot, hash_list) |
|---|
| 682 | 718 | if (chunk >= e->old_chunk && |
|---|
| 683 | 719 | chunk <= e->old_chunk + dm_consecutive_chunk_count(e)) |
|---|
| 684 | 720 | return e; |
|---|
| .. | .. |
|---|
| 725 | 761 | static void dm_insert_exception(struct dm_exception_table *eh, |
|---|
| 726 | 762 | struct dm_exception *new_e) |
|---|
| 727 | 763 | { |
|---|
| 728 | | - struct list_head *l; |
|---|
| 764 | + struct hlist_bl_head *l; |
|---|
| 765 | + struct hlist_bl_node *pos; |
|---|
| 729 | 766 | struct dm_exception *e = NULL; |
|---|
| 730 | 767 | |
|---|
| 731 | 768 | l = &eh->table[exception_hash(eh, new_e->old_chunk)]; |
|---|
| .. | .. |
|---|
| 735 | 772 | goto out; |
|---|
| 736 | 773 | |
|---|
| 737 | 774 | /* List is ordered by old_chunk */ |
|---|
| 738 | | - list_for_each_entry_reverse(e, l, hash_list) { |
|---|
| 775 | + hlist_bl_for_each_entry(e, pos, l, hash_list) { |
|---|
| 739 | 776 | /* Insert after an existing chunk? */ |
|---|
| 740 | 777 | if (new_e->old_chunk == (e->old_chunk + |
|---|
| 741 | 778 | dm_consecutive_chunk_count(e) + 1) && |
|---|
| .. | .. |
|---|
| 756 | 793 | return; |
|---|
| 757 | 794 | } |
|---|
| 758 | 795 | |
|---|
| 759 | | - if (new_e->old_chunk > e->old_chunk) |
|---|
| 796 | + if (new_e->old_chunk < e->old_chunk) |
|---|
| 760 | 797 | break; |
|---|
| 761 | 798 | } |
|---|
| 762 | 799 | |
|---|
| 763 | 800 | out: |
|---|
| 764 | | - list_add(&new_e->hash_list, e ? &e->hash_list : l); |
|---|
| 801 | + if (!e) { |
|---|
| 802 | + /* |
|---|
| 803 | + * Either the table doesn't support consecutive chunks or slot |
|---|
| 804 | + * l is empty. |
|---|
| 805 | + */ |
|---|
| 806 | + hlist_bl_add_head(&new_e->hash_list, l); |
|---|
| 807 | + } else if (new_e->old_chunk < e->old_chunk) { |
|---|
| 808 | + /* Add before an existing exception */ |
|---|
| 809 | + hlist_bl_add_before(&new_e->hash_list, &e->hash_list); |
|---|
| 810 | + } else { |
|---|
| 811 | + /* Add to l's tail: e is the last exception in this slot */ |
|---|
| 812 | + hlist_bl_add_behind(&new_e->hash_list, &e->hash_list); |
|---|
| 813 | + } |
|---|
| 765 | 814 | } |
|---|
| 766 | 815 | |
|---|
| 767 | 816 | /* |
|---|
| .. | .. |
|---|
| 770 | 819 | */ |
|---|
| 771 | 820 | static int dm_add_exception(void *context, chunk_t old, chunk_t new) |
|---|
| 772 | 821 | { |
|---|
| 822 | + struct dm_exception_table_lock lock; |
|---|
| 773 | 823 | struct dm_snapshot *s = context; |
|---|
| 774 | 824 | struct dm_exception *e; |
|---|
| 775 | 825 | |
|---|
| .. | .. |
|---|
| 782 | 832 | /* Consecutive_count is implicitly initialised to zero */ |
|---|
| 783 | 833 | e->new_chunk = new; |
|---|
| 784 | 834 | |
|---|
| 835 | + /* |
|---|
| 836 | + * Although there is no need to lock access to the exception tables |
|---|
| 837 | + * here, if we don't then hlist_bl_add_head(), called by |
|---|
| 838 | + * dm_insert_exception(), will complain about accessing the |
|---|
| 839 | + * corresponding list without locking it first. |
|---|
| 840 | + */ |
|---|
| 841 | + dm_exception_table_lock_init(s, old, &lock); |
|---|
| 842 | + |
|---|
| 843 | + dm_exception_table_lock(&lock); |
|---|
| 785 | 844 | dm_insert_exception(&s->complete, e); |
|---|
| 845 | + dm_exception_table_unlock(&lock); |
|---|
| 786 | 846 | |
|---|
| 787 | 847 | return 0; |
|---|
| 788 | 848 | } |
|---|
| .. | .. |
|---|
| 811 | 871 | { |
|---|
| 812 | 872 | /* use a fixed size of 2MB */ |
|---|
| 813 | 873 | unsigned long mem = 2 * 1024 * 1024; |
|---|
| 814 | | - mem /= sizeof(struct list_head); |
|---|
| 874 | + mem /= sizeof(struct hlist_bl_head); |
|---|
| 815 | 875 | |
|---|
| 816 | 876 | return mem; |
|---|
| 817 | 877 | } |
|---|
| .. | .. |
|---|
| 931 | 991 | int r; |
|---|
| 932 | 992 | chunk_t old_chunk = s->first_merging_chunk + s->num_merging_chunks - 1; |
|---|
| 933 | 993 | |
|---|
| 934 | | - mutex_lock(&s->lock); |
|---|
| 994 | + down_write(&s->lock); |
|---|
| 935 | 995 | |
|---|
| 936 | 996 | /* |
|---|
| 937 | 997 | * Process chunks (and associated exceptions) in reverse order |
|---|
| .. | .. |
|---|
| 946 | 1006 | b = __release_queued_bios_after_merge(s); |
|---|
| 947 | 1007 | |
|---|
| 948 | 1008 | out: |
|---|
| 949 | | - mutex_unlock(&s->lock); |
|---|
| 1009 | + up_write(&s->lock); |
|---|
| 950 | 1010 | if (b) |
|---|
| 951 | 1011 | flush_bios(b); |
|---|
| 952 | 1012 | |
|---|
| .. | .. |
|---|
| 1005 | 1065 | if (linear_chunks < 0) { |
|---|
| 1006 | 1066 | DMERR("Read error in exception store: " |
|---|
| 1007 | 1067 | "shutting down merge"); |
|---|
| 1008 | | - mutex_lock(&s->lock); |
|---|
| 1009 | | - s->merge_failed = 1; |
|---|
| 1010 | | - mutex_unlock(&s->lock); |
|---|
| 1068 | + down_write(&s->lock); |
|---|
| 1069 | + s->merge_failed = true; |
|---|
| 1070 | + up_write(&s->lock); |
|---|
| 1011 | 1071 | } |
|---|
| 1012 | 1072 | goto shut; |
|---|
| 1013 | 1073 | } |
|---|
| .. | .. |
|---|
| 1048 | 1108 | previous_count = read_pending_exceptions_done_count(); |
|---|
| 1049 | 1109 | } |
|---|
| 1050 | 1110 | |
|---|
| 1051 | | - mutex_lock(&s->lock); |
|---|
| 1111 | + down_write(&s->lock); |
|---|
| 1052 | 1112 | s->first_merging_chunk = old_chunk; |
|---|
| 1053 | 1113 | s->num_merging_chunks = linear_chunks; |
|---|
| 1054 | | - mutex_unlock(&s->lock); |
|---|
| 1114 | + up_write(&s->lock); |
|---|
| 1055 | 1115 | |
|---|
| 1056 | 1116 | /* Wait until writes to all 'linear_chunks' drain */ |
|---|
| 1057 | 1117 | for (i = 0; i < linear_chunks; i++) |
|---|
| 1058 | 1118 | __check_for_conflicting_io(s, old_chunk + i); |
|---|
| 1059 | 1119 | |
|---|
| 1060 | | - dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, merge_callback, s); |
|---|
| 1120 | + dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 1 << DM_KCOPYD_SNAP_MERGE, |
|---|
| 1121 | + merge_callback, s); |
|---|
| 1061 | 1122 | return; |
|---|
| 1062 | 1123 | |
|---|
| 1063 | 1124 | shut: |
|---|
| .. | .. |
|---|
| 1109 | 1170 | return; |
|---|
| 1110 | 1171 | |
|---|
| 1111 | 1172 | shut: |
|---|
| 1112 | | - mutex_lock(&s->lock); |
|---|
| 1113 | | - s->merge_failed = 1; |
|---|
| 1173 | + down_write(&s->lock); |
|---|
| 1174 | + s->merge_failed = true; |
|---|
| 1114 | 1175 | b = __release_queued_bios_after_merge(s); |
|---|
| 1115 | | - mutex_unlock(&s->lock); |
|---|
| 1176 | + up_write(&s->lock); |
|---|
| 1116 | 1177 | error_bios(b); |
|---|
| 1117 | 1178 | |
|---|
| 1118 | 1179 | merge_shutdown(s); |
|---|
| .. | .. |
|---|
| 1134 | 1195 | clear_bit(SHUTDOWN_MERGE, &s->state_bits); |
|---|
| 1135 | 1196 | } |
|---|
| 1136 | 1197 | |
|---|
| 1198 | +static int parse_snapshot_features(struct dm_arg_set *as, struct dm_snapshot *s, |
|---|
| 1199 | + struct dm_target *ti) |
|---|
| 1200 | +{ |
|---|
| 1201 | + int r; |
|---|
| 1202 | + unsigned argc; |
|---|
| 1203 | + const char *arg_name; |
|---|
| 1204 | + |
|---|
| 1205 | + static const struct dm_arg _args[] = { |
|---|
| 1206 | + {0, 2, "Invalid number of feature arguments"}, |
|---|
| 1207 | + }; |
|---|
| 1208 | + |
|---|
| 1209 | + /* |
|---|
| 1210 | + * No feature arguments supplied. |
|---|
| 1211 | + */ |
|---|
| 1212 | + if (!as->argc) |
|---|
| 1213 | + return 0; |
|---|
| 1214 | + |
|---|
| 1215 | + r = dm_read_arg_group(_args, as, &argc, &ti->error); |
|---|
| 1216 | + if (r) |
|---|
| 1217 | + return -EINVAL; |
|---|
| 1218 | + |
|---|
| 1219 | + while (argc && !r) { |
|---|
| 1220 | + arg_name = dm_shift_arg(as); |
|---|
| 1221 | + argc--; |
|---|
| 1222 | + |
|---|
| 1223 | + if (!strcasecmp(arg_name, "discard_zeroes_cow")) |
|---|
| 1224 | + s->discard_zeroes_cow = true; |
|---|
| 1225 | + |
|---|
| 1226 | + else if (!strcasecmp(arg_name, "discard_passdown_origin")) |
|---|
| 1227 | + s->discard_passdown_origin = true; |
|---|
| 1228 | + |
|---|
| 1229 | + else { |
|---|
| 1230 | + ti->error = "Unrecognised feature requested"; |
|---|
| 1231 | + r = -EINVAL; |
|---|
| 1232 | + break; |
|---|
| 1233 | + } |
|---|
| 1234 | + } |
|---|
| 1235 | + |
|---|
| 1236 | + if (!s->discard_zeroes_cow && s->discard_passdown_origin) { |
|---|
| 1237 | + /* |
|---|
| 1238 | + * TODO: really these are disjoint.. but ti->num_discard_bios |
|---|
| 1239 | + * and dm_bio_get_target_bio_nr() require rigid constraints. |
|---|
| 1240 | + */ |
|---|
| 1241 | + ti->error = "discard_passdown_origin feature depends on discard_zeroes_cow"; |
|---|
| 1242 | + r = -EINVAL; |
|---|
| 1243 | + } |
|---|
| 1244 | + |
|---|
| 1245 | + return r; |
|---|
| 1246 | +} |
|---|
| 1247 | + |
|---|
| 1137 | 1248 | /* |
|---|
| 1138 | | - * Construct a snapshot mapping: <origin_dev> <COW-dev> <p|po|n> <chunk-size> |
|---|
| 1249 | + * Construct a snapshot mapping: |
|---|
| 1250 | + * <origin_dev> <COW-dev> <p|po|n> <chunk-size> [<# feature args> [<arg>]*] |
|---|
| 1139 | 1251 | */ |
|---|
| 1140 | 1252 | static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) |
|---|
| 1141 | 1253 | { |
|---|
| 1142 | 1254 | struct dm_snapshot *s; |
|---|
| 1255 | + struct dm_arg_set as; |
|---|
| 1143 | 1256 | int i; |
|---|
| 1144 | 1257 | int r = -EINVAL; |
|---|
| 1145 | 1258 | char *origin_path, *cow_path; |
|---|
| .. | .. |
|---|
| 1147 | 1260 | unsigned args_used, num_flush_bios = 1; |
|---|
| 1148 | 1261 | fmode_t origin_mode = FMODE_READ; |
|---|
| 1149 | 1262 | |
|---|
| 1150 | | - if (argc != 4) { |
|---|
| 1151 | | - ti->error = "requires exactly 4 arguments"; |
|---|
| 1263 | + if (argc < 4) { |
|---|
| 1264 | + ti->error = "requires 4 or more arguments"; |
|---|
| 1152 | 1265 | r = -EINVAL; |
|---|
| 1153 | 1266 | goto bad; |
|---|
| 1154 | 1267 | } |
|---|
| .. | .. |
|---|
| 1164 | 1277 | r = -ENOMEM; |
|---|
| 1165 | 1278 | goto bad; |
|---|
| 1166 | 1279 | } |
|---|
| 1280 | + |
|---|
| 1281 | + as.argc = argc; |
|---|
| 1282 | + as.argv = argv; |
|---|
| 1283 | + dm_consume_args(&as, 4); |
|---|
| 1284 | + r = parse_snapshot_features(&as, s, ti); |
|---|
| 1285 | + if (r) |
|---|
| 1286 | + goto bad_features; |
|---|
| 1167 | 1287 | |
|---|
| 1168 | 1288 | origin_path = argv[0]; |
|---|
| 1169 | 1289 | argv++; |
|---|
| .. | .. |
|---|
| 1208 | 1328 | s->snapshot_overflowed = 0; |
|---|
| 1209 | 1329 | s->active = 0; |
|---|
| 1210 | 1330 | atomic_set(&s->pending_exceptions_count, 0); |
|---|
| 1331 | + spin_lock_init(&s->pe_allocation_lock); |
|---|
| 1211 | 1332 | s->exception_start_sequence = 0; |
|---|
| 1212 | 1333 | s->exception_complete_sequence = 0; |
|---|
| 1213 | 1334 | s->out_of_order_tree = RB_ROOT; |
|---|
| 1214 | | - mutex_init(&s->lock); |
|---|
| 1335 | + init_rwsem(&s->lock); |
|---|
| 1215 | 1336 | INIT_LIST_HEAD(&s->list); |
|---|
| 1216 | 1337 | spin_lock_init(&s->pe_lock); |
|---|
| 1217 | 1338 | s->state_bits = 0; |
|---|
| 1218 | | - s->merge_failed = 0; |
|---|
| 1339 | + s->merge_failed = false; |
|---|
| 1219 | 1340 | s->first_merging_chunk = 0; |
|---|
| 1220 | 1341 | s->num_merging_chunks = 0; |
|---|
| 1221 | 1342 | bio_list_init(&s->bios_queued_during_merge); |
|---|
| .. | .. |
|---|
| 1250 | 1371 | |
|---|
| 1251 | 1372 | ti->private = s; |
|---|
| 1252 | 1373 | ti->num_flush_bios = num_flush_bios; |
|---|
| 1374 | + if (s->discard_zeroes_cow) |
|---|
| 1375 | + ti->num_discard_bios = (s->discard_passdown_origin ? 2 : 1); |
|---|
| 1253 | 1376 | ti->per_io_data_size = sizeof(struct dm_snap_tracked_chunk); |
|---|
| 1254 | 1377 | |
|---|
| 1255 | 1378 | /* Add snapshot to the list of snapshots for this origin */ |
|---|
| .. | .. |
|---|
| 1298 | 1421 | |
|---|
| 1299 | 1422 | bad_read_metadata: |
|---|
| 1300 | 1423 | unregister_snapshot(s); |
|---|
| 1301 | | - |
|---|
| 1302 | 1424 | bad_load_and_register: |
|---|
| 1303 | 1425 | mempool_exit(&s->pending_pool); |
|---|
| 1304 | | - |
|---|
| 1305 | 1426 | bad_pending_pool: |
|---|
| 1306 | 1427 | dm_kcopyd_client_destroy(s->kcopyd_client); |
|---|
| 1307 | | - |
|---|
| 1308 | 1428 | bad_kcopyd: |
|---|
| 1309 | 1429 | dm_exception_table_exit(&s->pending, pending_cache); |
|---|
| 1310 | 1430 | dm_exception_table_exit(&s->complete, exception_cache); |
|---|
| 1311 | | - |
|---|
| 1312 | 1431 | bad_hash_tables: |
|---|
| 1313 | 1432 | dm_exception_store_destroy(s->store); |
|---|
| 1314 | | - |
|---|
| 1315 | 1433 | bad_store: |
|---|
| 1316 | 1434 | dm_put_device(ti, s->cow); |
|---|
| 1317 | | - |
|---|
| 1318 | 1435 | bad_cow: |
|---|
| 1319 | 1436 | dm_put_device(ti, s->origin); |
|---|
| 1320 | | - |
|---|
| 1321 | 1437 | bad_origin: |
|---|
| 1438 | +bad_features: |
|---|
| 1322 | 1439 | kfree(s); |
|---|
| 1323 | | - |
|---|
| 1324 | 1440 | bad: |
|---|
| 1325 | 1441 | return r; |
|---|
| 1326 | 1442 | } |
|---|
| .. | .. |
|---|
| 1379 | 1495 | /* Check whether exception handover must be cancelled */ |
|---|
| 1380 | 1496 | (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); |
|---|
| 1381 | 1497 | if (snap_src && snap_dest && (s == snap_src)) { |
|---|
| 1382 | | - mutex_lock(&snap_dest->lock); |
|---|
| 1498 | + down_write(&snap_dest->lock); |
|---|
| 1383 | 1499 | snap_dest->valid = 0; |
|---|
| 1384 | | - mutex_unlock(&snap_dest->lock); |
|---|
| 1500 | + up_write(&snap_dest->lock); |
|---|
| 1385 | 1501 | DMERR("Cancelling snapshot handover."); |
|---|
| 1386 | 1502 | } |
|---|
| 1387 | 1503 | up_read(&_origins_lock); |
|---|
| .. | .. |
|---|
| 1411 | 1527 | mempool_exit(&s->pending_pool); |
|---|
| 1412 | 1528 | |
|---|
| 1413 | 1529 | dm_exception_store_destroy(s->store); |
|---|
| 1414 | | - |
|---|
| 1415 | | - mutex_destroy(&s->lock); |
|---|
| 1416 | 1530 | |
|---|
| 1417 | 1531 | bio_uninit(&s->flush_bio); |
|---|
| 1418 | 1532 | |
|---|
| .. | .. |
|---|
| 1480 | 1594 | while (bio) { |
|---|
| 1481 | 1595 | n = bio->bi_next; |
|---|
| 1482 | 1596 | bio->bi_next = NULL; |
|---|
| 1483 | | - generic_make_request(bio); |
|---|
| 1597 | + submit_bio_noacct(bio); |
|---|
| 1484 | 1598 | bio = n; |
|---|
| 1485 | 1599 | } |
|---|
| 1486 | 1600 | } |
|---|
| .. | .. |
|---|
| 1500 | 1614 | bio->bi_next = NULL; |
|---|
| 1501 | 1615 | r = do_origin(s->origin, bio, false); |
|---|
| 1502 | 1616 | if (r == DM_MAPIO_REMAPPED) |
|---|
| 1503 | | - generic_make_request(bio); |
|---|
| 1617 | + submit_bio_noacct(bio); |
|---|
| 1504 | 1618 | bio = n; |
|---|
| 1505 | 1619 | } |
|---|
| 1506 | 1620 | } |
|---|
| .. | .. |
|---|
| 1538 | 1652 | dm_table_event(s->ti->table); |
|---|
| 1539 | 1653 | } |
|---|
| 1540 | 1654 | |
|---|
| 1655 | +static void invalidate_snapshot(struct dm_snapshot *s, int err) |
|---|
| 1656 | +{ |
|---|
| 1657 | + down_write(&s->lock); |
|---|
| 1658 | + __invalidate_snapshot(s, err); |
|---|
| 1659 | + up_write(&s->lock); |
|---|
| 1660 | +} |
|---|
| 1661 | + |
|---|
| 1541 | 1662 | static void pending_complete(void *context, int success) |
|---|
| 1542 | 1663 | { |
|---|
| 1543 | 1664 | struct dm_snap_pending_exception *pe = context; |
|---|
| .. | .. |
|---|
| 1546 | 1667 | struct bio *origin_bios = NULL; |
|---|
| 1547 | 1668 | struct bio *snapshot_bios = NULL; |
|---|
| 1548 | 1669 | struct bio *full_bio = NULL; |
|---|
| 1670 | + struct dm_exception_table_lock lock; |
|---|
| 1549 | 1671 | int error = 0; |
|---|
| 1672 | + |
|---|
| 1673 | + dm_exception_table_lock_init(s, pe->e.old_chunk, &lock); |
|---|
| 1550 | 1674 | |
|---|
| 1551 | 1675 | if (!success) { |
|---|
| 1552 | 1676 | /* Read/write error - snapshot is unusable */ |
|---|
| 1553 | | - mutex_lock(&s->lock); |
|---|
| 1554 | | - __invalidate_snapshot(s, -EIO); |
|---|
| 1677 | + invalidate_snapshot(s, -EIO); |
|---|
| 1555 | 1678 | error = 1; |
|---|
| 1679 | + |
|---|
| 1680 | + dm_exception_table_lock(&lock); |
|---|
| 1556 | 1681 | goto out; |
|---|
| 1557 | 1682 | } |
|---|
| 1558 | 1683 | |
|---|
| 1559 | 1684 | e = alloc_completed_exception(GFP_NOIO); |
|---|
| 1560 | 1685 | if (!e) { |
|---|
| 1561 | | - mutex_lock(&s->lock); |
|---|
| 1562 | | - __invalidate_snapshot(s, -ENOMEM); |
|---|
| 1686 | + invalidate_snapshot(s, -ENOMEM); |
|---|
| 1563 | 1687 | error = 1; |
|---|
| 1688 | + |
|---|
| 1689 | + dm_exception_table_lock(&lock); |
|---|
| 1564 | 1690 | goto out; |
|---|
| 1565 | 1691 | } |
|---|
| 1566 | 1692 | *e = pe->e; |
|---|
| 1567 | 1693 | |
|---|
| 1568 | | - mutex_lock(&s->lock); |
|---|
| 1694 | + down_read(&s->lock); |
|---|
| 1695 | + dm_exception_table_lock(&lock); |
|---|
| 1569 | 1696 | if (!s->valid) { |
|---|
| 1697 | + up_read(&s->lock); |
|---|
| 1570 | 1698 | free_completed_exception(e); |
|---|
| 1571 | 1699 | error = 1; |
|---|
| 1700 | + |
|---|
| 1572 | 1701 | goto out; |
|---|
| 1573 | 1702 | } |
|---|
| 1574 | 1703 | |
|---|
| 1575 | | - /* Check for conflicting reads */ |
|---|
| 1576 | | - __check_for_conflicting_io(s, pe->e.old_chunk); |
|---|
| 1577 | | - |
|---|
| 1578 | 1704 | /* |
|---|
| 1579 | | - * Add a proper exception, and remove the |
|---|
| 1580 | | - * in-flight exception from the list. |
|---|
| 1705 | + * Add a proper exception. After inserting the completed exception all |
|---|
| 1706 | + * subsequent snapshot reads to this chunk will be redirected to the |
|---|
| 1707 | + * COW device. This ensures that we do not starve. Moreover, as long |
|---|
| 1708 | + * as the pending exception exists, neither origin writes nor snapshot |
|---|
| 1709 | + * merging can overwrite the chunk in origin. |
|---|
| 1581 | 1710 | */ |
|---|
| 1582 | 1711 | dm_insert_exception(&s->complete, e); |
|---|
| 1712 | + up_read(&s->lock); |
|---|
| 1713 | + |
|---|
| 1714 | + /* Wait for conflicting reads to drain */ |
|---|
| 1715 | + if (__chunk_is_tracked(s, pe->e.old_chunk)) { |
|---|
| 1716 | + dm_exception_table_unlock(&lock); |
|---|
| 1717 | + __check_for_conflicting_io(s, pe->e.old_chunk); |
|---|
| 1718 | + dm_exception_table_lock(&lock); |
|---|
| 1719 | + } |
|---|
| 1583 | 1720 | |
|---|
| 1584 | 1721 | out: |
|---|
| 1722 | + /* Remove the in-flight exception from the list */ |
|---|
| 1585 | 1723 | dm_remove_exception(&pe->e); |
|---|
| 1724 | + |
|---|
| 1725 | + dm_exception_table_unlock(&lock); |
|---|
| 1726 | + |
|---|
| 1586 | 1727 | snapshot_bios = bio_list_get(&pe->snapshot_bios); |
|---|
| 1587 | 1728 | origin_bios = bio_list_get(&pe->origin_bios); |
|---|
| 1588 | 1729 | full_bio = pe->full_bio; |
|---|
| 1589 | 1730 | if (full_bio) |
|---|
| 1590 | 1731 | full_bio->bi_end_io = pe->full_bio_end_io; |
|---|
| 1591 | 1732 | increment_pending_exceptions_done_count(); |
|---|
| 1592 | | - |
|---|
| 1593 | | - mutex_unlock(&s->lock); |
|---|
| 1594 | 1733 | |
|---|
| 1595 | 1734 | /* Submit any pending write bios */ |
|---|
| 1596 | 1735 | if (error) { |
|---|
| .. | .. |
|---|
| 1716 | 1855 | bio->bi_end_io = full_bio_end_io; |
|---|
| 1717 | 1856 | bio->bi_private = callback_data; |
|---|
| 1718 | 1857 | |
|---|
| 1719 | | - generic_make_request(bio); |
|---|
| 1858 | + submit_bio_noacct(bio); |
|---|
| 1720 | 1859 | } |
|---|
| 1721 | 1860 | |
|---|
| 1722 | 1861 | static struct dm_snap_pending_exception * |
|---|
| .. | .. |
|---|
| 1731 | 1870 | } |
|---|
| 1732 | 1871 | |
|---|
| 1733 | 1872 | /* |
|---|
| 1873 | + * Inserts a pending exception into the pending table. |
|---|
| 1874 | + * |
|---|
| 1875 | + * NOTE: a write lock must be held on the chunk's pending exception table slot |
|---|
| 1876 | + * before calling this. |
|---|
| 1877 | + */ |
|---|
| 1878 | +static struct dm_snap_pending_exception * |
|---|
| 1879 | +__insert_pending_exception(struct dm_snapshot *s, |
|---|
| 1880 | + struct dm_snap_pending_exception *pe, chunk_t chunk) |
|---|
| 1881 | +{ |
|---|
| 1882 | + pe->e.old_chunk = chunk; |
|---|
| 1883 | + bio_list_init(&pe->origin_bios); |
|---|
| 1884 | + bio_list_init(&pe->snapshot_bios); |
|---|
| 1885 | + pe->started = 0; |
|---|
| 1886 | + pe->full_bio = NULL; |
|---|
| 1887 | + |
|---|
| 1888 | + spin_lock(&s->pe_allocation_lock); |
|---|
| 1889 | + if (s->store->type->prepare_exception(s->store, &pe->e)) { |
|---|
| 1890 | + spin_unlock(&s->pe_allocation_lock); |
|---|
| 1891 | + free_pending_exception(pe); |
|---|
| 1892 | + return NULL; |
|---|
| 1893 | + } |
|---|
| 1894 | + |
|---|
| 1895 | + pe->exception_sequence = s->exception_start_sequence++; |
|---|
| 1896 | + spin_unlock(&s->pe_allocation_lock); |
|---|
| 1897 | + |
|---|
| 1898 | + dm_insert_exception(&s->pending, &pe->e); |
|---|
| 1899 | + |
|---|
| 1900 | + return pe; |
|---|
| 1901 | +} |
|---|
| 1902 | + |
|---|
| 1903 | +/* |
|---|
| 1734 | 1904 | * Looks to see if this snapshot already has a pending exception |
|---|
| 1735 | 1905 | * for this chunk, otherwise it allocates a new one and inserts |
|---|
| 1736 | 1906 | * it into the pending table. |
|---|
| 1737 | 1907 | * |
|---|
| 1738 | | - * NOTE: a write lock must be held on snap->lock before calling |
|---|
| 1739 | | - * this. |
|---|
| 1908 | + * NOTE: a write lock must be held on the chunk's pending exception table slot |
|---|
| 1909 | + * before calling this. |
|---|
| 1740 | 1910 | */ |
|---|
| 1741 | 1911 | static struct dm_snap_pending_exception * |
|---|
| 1742 | 1912 | __find_pending_exception(struct dm_snapshot *s, |
|---|
| .. | .. |
|---|
| 1750 | 1920 | return pe2; |
|---|
| 1751 | 1921 | } |
|---|
| 1752 | 1922 | |
|---|
| 1753 | | - pe->e.old_chunk = chunk; |
|---|
| 1754 | | - bio_list_init(&pe->origin_bios); |
|---|
| 1755 | | - bio_list_init(&pe->snapshot_bios); |
|---|
| 1756 | | - pe->started = 0; |
|---|
| 1757 | | - pe->full_bio = NULL; |
|---|
| 1758 | | - |
|---|
| 1759 | | - if (s->store->type->prepare_exception(s->store, &pe->e)) { |
|---|
| 1760 | | - free_pending_exception(pe); |
|---|
| 1761 | | - return NULL; |
|---|
| 1762 | | - } |
|---|
| 1763 | | - |
|---|
| 1764 | | - pe->exception_sequence = s->exception_start_sequence++; |
|---|
| 1765 | | - |
|---|
| 1766 | | - dm_insert_exception(&s->pending, &pe->e); |
|---|
| 1767 | | - |
|---|
| 1768 | | - return pe; |
|---|
| 1923 | + return __insert_pending_exception(s, pe, chunk); |
|---|
| 1769 | 1924 | } |
|---|
| 1770 | 1925 | |
|---|
| 1771 | 1926 | static void remap_exception(struct dm_snapshot *s, struct dm_exception *e, |
|---|
| .. | .. |
|---|
| 1778 | 1933 | (bio->bi_iter.bi_sector & s->store->chunk_mask); |
|---|
| 1779 | 1934 | } |
|---|
| 1780 | 1935 | |
|---|
| 1936 | +static void zero_callback(int read_err, unsigned long write_err, void *context) |
|---|
| 1937 | +{ |
|---|
| 1938 | + struct bio *bio = context; |
|---|
| 1939 | + struct dm_snapshot *s = bio->bi_private; |
|---|
| 1940 | + |
|---|
| 1941 | + account_end_copy(s); |
|---|
| 1942 | + bio->bi_status = write_err ? BLK_STS_IOERR : 0; |
|---|
| 1943 | + bio_endio(bio); |
|---|
| 1944 | +} |
|---|
| 1945 | + |
|---|
| 1946 | +static void zero_exception(struct dm_snapshot *s, struct dm_exception *e, |
|---|
| 1947 | + struct bio *bio, chunk_t chunk) |
|---|
| 1948 | +{ |
|---|
| 1949 | + struct dm_io_region dest; |
|---|
| 1950 | + |
|---|
| 1951 | + dest.bdev = s->cow->bdev; |
|---|
| 1952 | + dest.sector = bio->bi_iter.bi_sector; |
|---|
| 1953 | + dest.count = s->store->chunk_size; |
|---|
| 1954 | + |
|---|
| 1955 | + account_start_copy(s); |
|---|
| 1956 | + WARN_ON_ONCE(bio->bi_private); |
|---|
| 1957 | + bio->bi_private = s; |
|---|
| 1958 | + dm_kcopyd_zero(s->kcopyd_client, 1, &dest, 0, zero_callback, bio); |
|---|
| 1959 | +} |
|---|
| 1960 | + |
|---|
| 1961 | +static bool io_overlaps_chunk(struct dm_snapshot *s, struct bio *bio) |
|---|
| 1962 | +{ |
|---|
| 1963 | + return bio->bi_iter.bi_size == |
|---|
| 1964 | + (s->store->chunk_size << SECTOR_SHIFT); |
|---|
| 1965 | +} |
|---|
| 1966 | + |
|---|
| 1781 | 1967 | static int snapshot_map(struct dm_target *ti, struct bio *bio) |
|---|
| 1782 | 1968 | { |
|---|
| 1783 | 1969 | struct dm_exception *e; |
|---|
| .. | .. |
|---|
| 1785 | 1971 | int r = DM_MAPIO_REMAPPED; |
|---|
| 1786 | 1972 | chunk_t chunk; |
|---|
| 1787 | 1973 | struct dm_snap_pending_exception *pe = NULL; |
|---|
| 1974 | + struct dm_exception_table_lock lock; |
|---|
| 1788 | 1975 | |
|---|
| 1789 | 1976 | init_tracked_chunk(bio); |
|---|
| 1790 | 1977 | |
|---|
| .. | .. |
|---|
| 1794 | 1981 | } |
|---|
| 1795 | 1982 | |
|---|
| 1796 | 1983 | chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector); |
|---|
| 1984 | + dm_exception_table_lock_init(s, chunk, &lock); |
|---|
| 1797 | 1985 | |
|---|
| 1798 | 1986 | /* Full snapshots are not usable */ |
|---|
| 1799 | 1987 | /* To get here the table must be live so s->active is always set. */ |
|---|
| .. | .. |
|---|
| 1805 | 1993 | ; /* wait_for_in_progress() has slept */ |
|---|
| 1806 | 1994 | } |
|---|
| 1807 | 1995 | |
|---|
| 1808 | | - mutex_lock(&s->lock); |
|---|
| 1996 | + down_read(&s->lock); |
|---|
| 1997 | + dm_exception_table_lock(&lock); |
|---|
| 1809 | 1998 | |
|---|
| 1810 | 1999 | if (!s->valid || (unlikely(s->snapshot_overflowed) && |
|---|
| 1811 | 2000 | bio_data_dir(bio) == WRITE)) { |
|---|
| .. | .. |
|---|
| 1813 | 2002 | goto out_unlock; |
|---|
| 1814 | 2003 | } |
|---|
| 1815 | 2004 | |
|---|
| 2005 | + if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) { |
|---|
| 2006 | + if (s->discard_passdown_origin && dm_bio_get_target_bio_nr(bio)) { |
|---|
| 2007 | + /* |
|---|
| 2008 | + * passdown discard to origin (without triggering |
|---|
| 2009 | + * snapshot exceptions via do_origin; doing so would |
|---|
| 2010 | + * defeat the goal of freeing space in origin that is |
|---|
| 2011 | + * implied by the "discard_passdown_origin" feature) |
|---|
| 2012 | + */ |
|---|
| 2013 | + bio_set_dev(bio, s->origin->bdev); |
|---|
| 2014 | + track_chunk(s, bio, chunk); |
|---|
| 2015 | + goto out_unlock; |
|---|
| 2016 | + } |
|---|
| 2017 | + /* discard to snapshot (target_bio_nr == 0) zeroes exceptions */ |
|---|
| 2018 | + } |
|---|
| 2019 | + |
|---|
| 1816 | 2020 | /* If the block is already remapped - use that, else remap it */ |
|---|
| 1817 | 2021 | e = dm_lookup_exception(&s->complete, chunk); |
|---|
| 1818 | 2022 | if (e) { |
|---|
| 1819 | 2023 | remap_exception(s, e, bio, chunk); |
|---|
| 2024 | + if (unlikely(bio_op(bio) == REQ_OP_DISCARD) && |
|---|
| 2025 | + io_overlaps_chunk(s, bio)) { |
|---|
| 2026 | + dm_exception_table_unlock(&lock); |
|---|
| 2027 | + up_read(&s->lock); |
|---|
| 2028 | + zero_exception(s, e, bio, chunk); |
|---|
| 2029 | + r = DM_MAPIO_SUBMITTED; /* discard is not issued */ |
|---|
| 2030 | + goto out; |
|---|
| 2031 | + } |
|---|
| 2032 | + goto out_unlock; |
|---|
| 2033 | + } |
|---|
| 2034 | + |
|---|
| 2035 | + if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) { |
|---|
| 2036 | + /* |
|---|
| 2037 | + * If no exception exists, complete discard immediately |
|---|
| 2038 | + * otherwise it'll trigger copy-out. |
|---|
| 2039 | + */ |
|---|
| 2040 | + bio_endio(bio); |
|---|
| 2041 | + r = DM_MAPIO_SUBMITTED; |
|---|
| 1820 | 2042 | goto out_unlock; |
|---|
| 1821 | 2043 | } |
|---|
| 1822 | 2044 | |
|---|
| .. | .. |
|---|
| 1828 | 2050 | if (bio_data_dir(bio) == WRITE) { |
|---|
| 1829 | 2051 | pe = __lookup_pending_exception(s, chunk); |
|---|
| 1830 | 2052 | if (!pe) { |
|---|
| 1831 | | - mutex_unlock(&s->lock); |
|---|
| 2053 | + dm_exception_table_unlock(&lock); |
|---|
| 1832 | 2054 | pe = alloc_pending_exception(s); |
|---|
| 1833 | | - mutex_lock(&s->lock); |
|---|
| 1834 | | - |
|---|
| 1835 | | - if (!s->valid || s->snapshot_overflowed) { |
|---|
| 1836 | | - free_pending_exception(pe); |
|---|
| 1837 | | - r = DM_MAPIO_KILL; |
|---|
| 1838 | | - goto out_unlock; |
|---|
| 1839 | | - } |
|---|
| 2055 | + dm_exception_table_lock(&lock); |
|---|
| 1840 | 2056 | |
|---|
| 1841 | 2057 | e = dm_lookup_exception(&s->complete, chunk); |
|---|
| 1842 | 2058 | if (e) { |
|---|
| .. | .. |
|---|
| 1847 | 2063 | |
|---|
| 1848 | 2064 | pe = __find_pending_exception(s, pe, chunk); |
|---|
| 1849 | 2065 | if (!pe) { |
|---|
| 2066 | + dm_exception_table_unlock(&lock); |
|---|
| 2067 | + up_read(&s->lock); |
|---|
| 2068 | + |
|---|
| 2069 | + down_write(&s->lock); |
|---|
| 2070 | + |
|---|
| 1850 | 2071 | if (s->store->userspace_supports_overflow) { |
|---|
| 1851 | | - s->snapshot_overflowed = 1; |
|---|
| 1852 | | - DMERR("Snapshot overflowed: Unable to allocate exception."); |
|---|
| 2072 | + if (s->valid && !s->snapshot_overflowed) { |
|---|
| 2073 | + s->snapshot_overflowed = 1; |
|---|
| 2074 | + DMERR("Snapshot overflowed: Unable to allocate exception."); |
|---|
| 2075 | + } |
|---|
| 1853 | 2076 | } else |
|---|
| 1854 | 2077 | __invalidate_snapshot(s, -ENOMEM); |
|---|
| 2078 | + up_write(&s->lock); |
|---|
| 2079 | + |
|---|
| 1855 | 2080 | r = DM_MAPIO_KILL; |
|---|
| 1856 | | - goto out_unlock; |
|---|
| 2081 | + goto out; |
|---|
| 1857 | 2082 | } |
|---|
| 1858 | 2083 | } |
|---|
| 1859 | 2084 | |
|---|
| .. | .. |
|---|
| 1861 | 2086 | |
|---|
| 1862 | 2087 | r = DM_MAPIO_SUBMITTED; |
|---|
| 1863 | 2088 | |
|---|
| 1864 | | - if (!pe->started && |
|---|
| 1865 | | - bio->bi_iter.bi_size == |
|---|
| 1866 | | - (s->store->chunk_size << SECTOR_SHIFT)) { |
|---|
| 2089 | + if (!pe->started && io_overlaps_chunk(s, bio)) { |
|---|
| 1867 | 2090 | pe->started = 1; |
|---|
| 1868 | | - mutex_unlock(&s->lock); |
|---|
| 2091 | + |
|---|
| 2092 | + dm_exception_table_unlock(&lock); |
|---|
| 2093 | + up_read(&s->lock); |
|---|
| 2094 | + |
|---|
| 1869 | 2095 | start_full_bio(pe, bio); |
|---|
| 1870 | 2096 | goto out; |
|---|
| 1871 | 2097 | } |
|---|
| .. | .. |
|---|
| 1873 | 2099 | bio_list_add(&pe->snapshot_bios, bio); |
|---|
| 1874 | 2100 | |
|---|
| 1875 | 2101 | if (!pe->started) { |
|---|
| 1876 | | - /* this is protected by snap->lock */ |
|---|
| 2102 | + /* this is protected by the exception table lock */ |
|---|
| 1877 | 2103 | pe->started = 1; |
|---|
| 1878 | | - mutex_unlock(&s->lock); |
|---|
| 2104 | + |
|---|
| 2105 | + dm_exception_table_unlock(&lock); |
|---|
| 2106 | + up_read(&s->lock); |
|---|
| 2107 | + |
|---|
| 1879 | 2108 | start_copy(pe); |
|---|
| 1880 | 2109 | goto out; |
|---|
| 1881 | 2110 | } |
|---|
| .. | .. |
|---|
| 1885 | 2114 | } |
|---|
| 1886 | 2115 | |
|---|
| 1887 | 2116 | out_unlock: |
|---|
| 1888 | | - mutex_unlock(&s->lock); |
|---|
| 2117 | + dm_exception_table_unlock(&lock); |
|---|
| 2118 | + up_read(&s->lock); |
|---|
| 1889 | 2119 | out: |
|---|
| 1890 | 2120 | return r; |
|---|
| 1891 | 2121 | } |
|---|
| .. | .. |
|---|
| 1919 | 2149 | return DM_MAPIO_REMAPPED; |
|---|
| 1920 | 2150 | } |
|---|
| 1921 | 2151 | |
|---|
| 2152 | + if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) { |
|---|
| 2153 | + /* Once merging, discards no longer effect change */ |
|---|
| 2154 | + bio_endio(bio); |
|---|
| 2155 | + return DM_MAPIO_SUBMITTED; |
|---|
| 2156 | + } |
|---|
| 2157 | + |
|---|
| 1922 | 2158 | chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector); |
|---|
| 1923 | 2159 | |
|---|
| 1924 | | - mutex_lock(&s->lock); |
|---|
| 2160 | + down_write(&s->lock); |
|---|
| 1925 | 2161 | |
|---|
| 1926 | 2162 | /* Full merging snapshots are redirected to the origin */ |
|---|
| 1927 | 2163 | if (!s->valid) |
|---|
| .. | .. |
|---|
| 1952 | 2188 | bio_set_dev(bio, s->origin->bdev); |
|---|
| 1953 | 2189 | |
|---|
| 1954 | 2190 | if (bio_data_dir(bio) == WRITE) { |
|---|
| 1955 | | - mutex_unlock(&s->lock); |
|---|
| 2191 | + up_write(&s->lock); |
|---|
| 1956 | 2192 | return do_origin(s->origin, bio, false); |
|---|
| 1957 | 2193 | } |
|---|
| 1958 | 2194 | |
|---|
| 1959 | 2195 | out_unlock: |
|---|
| 1960 | | - mutex_unlock(&s->lock); |
|---|
| 2196 | + up_write(&s->lock); |
|---|
| 1961 | 2197 | |
|---|
| 1962 | 2198 | return r; |
|---|
| 1963 | 2199 | } |
|---|
| .. | .. |
|---|
| 1989 | 2225 | down_read(&_origins_lock); |
|---|
| 1990 | 2226 | (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); |
|---|
| 1991 | 2227 | if (snap_src && snap_dest) { |
|---|
| 1992 | | - mutex_lock(&snap_src->lock); |
|---|
| 2228 | + down_read(&snap_src->lock); |
|---|
| 1993 | 2229 | if (s == snap_src) { |
|---|
| 1994 | 2230 | DMERR("Unable to resume snapshot source until " |
|---|
| 1995 | 2231 | "handover completes."); |
|---|
| .. | .. |
|---|
| 1999 | 2235 | "source is suspended."); |
|---|
| 2000 | 2236 | r = -EINVAL; |
|---|
| 2001 | 2237 | } |
|---|
| 2002 | | - mutex_unlock(&snap_src->lock); |
|---|
| 2238 | + up_read(&snap_src->lock); |
|---|
| 2003 | 2239 | } |
|---|
| 2004 | 2240 | up_read(&_origins_lock); |
|---|
| 2005 | 2241 | |
|---|
| .. | .. |
|---|
| 2045 | 2281 | |
|---|
| 2046 | 2282 | (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); |
|---|
| 2047 | 2283 | if (snap_src && snap_dest) { |
|---|
| 2048 | | - mutex_lock(&snap_src->lock); |
|---|
| 2049 | | - mutex_lock_nested(&snap_dest->lock, SINGLE_DEPTH_NESTING); |
|---|
| 2284 | + down_write(&snap_src->lock); |
|---|
| 2285 | + down_write_nested(&snap_dest->lock, SINGLE_DEPTH_NESTING); |
|---|
| 2050 | 2286 | __handover_exceptions(snap_src, snap_dest); |
|---|
| 2051 | | - mutex_unlock(&snap_dest->lock); |
|---|
| 2052 | | - mutex_unlock(&snap_src->lock); |
|---|
| 2287 | + up_write(&snap_dest->lock); |
|---|
| 2288 | + up_write(&snap_src->lock); |
|---|
| 2053 | 2289 | } |
|---|
| 2054 | 2290 | |
|---|
| 2055 | 2291 | up_read(&_origins_lock); |
|---|
| .. | .. |
|---|
| 2064 | 2300 | /* Now we have correct chunk size, reregister */ |
|---|
| 2065 | 2301 | reregister_snapshot(s); |
|---|
| 2066 | 2302 | |
|---|
| 2067 | | - mutex_lock(&s->lock); |
|---|
| 2303 | + down_write(&s->lock); |
|---|
| 2068 | 2304 | s->active = 1; |
|---|
| 2069 | | - mutex_unlock(&s->lock); |
|---|
| 2305 | + up_write(&s->lock); |
|---|
| 2070 | 2306 | } |
|---|
| 2071 | 2307 | |
|---|
| 2072 | 2308 | static uint32_t get_origin_minimum_chunksize(struct block_device *bdev) |
|---|
| .. | .. |
|---|
| 2102 | 2338 | { |
|---|
| 2103 | 2339 | unsigned sz = 0; |
|---|
| 2104 | 2340 | struct dm_snapshot *snap = ti->private; |
|---|
| 2341 | + unsigned num_features; |
|---|
| 2105 | 2342 | |
|---|
| 2106 | 2343 | switch (type) { |
|---|
| 2107 | 2344 | case STATUSTYPE_INFO: |
|---|
| 2108 | 2345 | |
|---|
| 2109 | | - mutex_lock(&snap->lock); |
|---|
| 2346 | + down_write(&snap->lock); |
|---|
| 2110 | 2347 | |
|---|
| 2111 | 2348 | if (!snap->valid) |
|---|
| 2112 | 2349 | DMEMIT("Invalid"); |
|---|
| .. | .. |
|---|
| 2131 | 2368 | DMEMIT("Unknown"); |
|---|
| 2132 | 2369 | } |
|---|
| 2133 | 2370 | |
|---|
| 2134 | | - mutex_unlock(&snap->lock); |
|---|
| 2371 | + up_write(&snap->lock); |
|---|
| 2135 | 2372 | |
|---|
| 2136 | 2373 | break; |
|---|
| 2137 | 2374 | |
|---|
| .. | .. |
|---|
| 2142 | 2379 | * make sense. |
|---|
| 2143 | 2380 | */ |
|---|
| 2144 | 2381 | DMEMIT("%s %s", snap->origin->name, snap->cow->name); |
|---|
| 2145 | | - snap->store->type->status(snap->store, type, result + sz, |
|---|
| 2146 | | - maxlen - sz); |
|---|
| 2382 | + sz += snap->store->type->status(snap->store, type, result + sz, |
|---|
| 2383 | + maxlen - sz); |
|---|
| 2384 | + num_features = snap->discard_zeroes_cow + snap->discard_passdown_origin; |
|---|
| 2385 | + if (num_features) { |
|---|
| 2386 | + DMEMIT(" %u", num_features); |
|---|
| 2387 | + if (snap->discard_zeroes_cow) |
|---|
| 2388 | + DMEMIT(" discard_zeroes_cow"); |
|---|
| 2389 | + if (snap->discard_passdown_origin) |
|---|
| 2390 | + DMEMIT(" discard_passdown_origin"); |
|---|
| 2391 | + } |
|---|
| 2147 | 2392 | break; |
|---|
| 2148 | 2393 | } |
|---|
| 2149 | 2394 | } |
|---|
| .. | .. |
|---|
| 2162 | 2407 | return r; |
|---|
| 2163 | 2408 | } |
|---|
| 2164 | 2409 | |
|---|
| 2410 | +static void snapshot_io_hints(struct dm_target *ti, struct queue_limits *limits) |
|---|
| 2411 | +{ |
|---|
| 2412 | + struct dm_snapshot *snap = ti->private; |
|---|
| 2413 | + |
|---|
| 2414 | + if (snap->discard_zeroes_cow) { |
|---|
| 2415 | + struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; |
|---|
| 2416 | + |
|---|
| 2417 | + down_read(&_origins_lock); |
|---|
| 2418 | + |
|---|
| 2419 | + (void) __find_snapshots_sharing_cow(snap, &snap_src, &snap_dest, NULL); |
|---|
| 2420 | + if (snap_src && snap_dest) |
|---|
| 2421 | + snap = snap_src; |
|---|
| 2422 | + |
|---|
| 2423 | + /* All discards are split on chunk_size boundary */ |
|---|
| 2424 | + limits->discard_granularity = snap->store->chunk_size; |
|---|
| 2425 | + limits->max_discard_sectors = snap->store->chunk_size; |
|---|
| 2426 | + |
|---|
| 2427 | + up_read(&_origins_lock); |
|---|
| 2428 | + } |
|---|
| 2429 | +} |
|---|
| 2165 | 2430 | |
|---|
| 2166 | 2431 | /*----------------------------------------------------------------- |
|---|
| 2167 | 2432 | * Origin methods |
|---|
| .. | .. |
|---|
| 2183 | 2448 | int r = DM_MAPIO_REMAPPED; |
|---|
| 2184 | 2449 | struct dm_snapshot *snap; |
|---|
| 2185 | 2450 | struct dm_exception *e; |
|---|
| 2186 | | - struct dm_snap_pending_exception *pe; |
|---|
| 2451 | + struct dm_snap_pending_exception *pe, *pe2; |
|---|
| 2187 | 2452 | struct dm_snap_pending_exception *pe_to_start_now = NULL; |
|---|
| 2188 | 2453 | struct dm_snap_pending_exception *pe_to_start_last = NULL; |
|---|
| 2454 | + struct dm_exception_table_lock lock; |
|---|
| 2189 | 2455 | chunk_t chunk; |
|---|
| 2190 | 2456 | |
|---|
| 2191 | 2457 | /* Do all the snapshots on this origin */ |
|---|
| .. | .. |
|---|
| 2197 | 2463 | if (dm_target_is_snapshot_merge(snap->ti)) |
|---|
| 2198 | 2464 | continue; |
|---|
| 2199 | 2465 | |
|---|
| 2200 | | - mutex_lock(&snap->lock); |
|---|
| 2201 | | - |
|---|
| 2202 | | - /* Only deal with valid and active snapshots */ |
|---|
| 2203 | | - if (!snap->valid || !snap->active) |
|---|
| 2204 | | - goto next_snapshot; |
|---|
| 2205 | | - |
|---|
| 2206 | 2466 | /* Nothing to do if writing beyond end of snapshot */ |
|---|
| 2207 | 2467 | if (sector >= dm_table_get_size(snap->ti->table)) |
|---|
| 2208 | | - goto next_snapshot; |
|---|
| 2468 | + continue; |
|---|
| 2209 | 2469 | |
|---|
| 2210 | 2470 | /* |
|---|
| 2211 | 2471 | * Remember, different snapshots can have |
|---|
| 2212 | 2472 | * different chunk sizes. |
|---|
| 2213 | 2473 | */ |
|---|
| 2214 | 2474 | chunk = sector_to_chunk(snap->store, sector); |
|---|
| 2475 | + dm_exception_table_lock_init(snap, chunk, &lock); |
|---|
| 2215 | 2476 | |
|---|
| 2216 | | - /* |
|---|
| 2217 | | - * Check exception table to see if block |
|---|
| 2218 | | - * is already remapped in this snapshot |
|---|
| 2219 | | - * and trigger an exception if not. |
|---|
| 2220 | | - */ |
|---|
| 2221 | | - e = dm_lookup_exception(&snap->complete, chunk); |
|---|
| 2222 | | - if (e) |
|---|
| 2477 | + down_read(&snap->lock); |
|---|
| 2478 | + dm_exception_table_lock(&lock); |
|---|
| 2479 | + |
|---|
| 2480 | + /* Only deal with valid and active snapshots */ |
|---|
| 2481 | + if (!snap->valid || !snap->active) |
|---|
| 2223 | 2482 | goto next_snapshot; |
|---|
| 2224 | 2483 | |
|---|
| 2225 | 2484 | pe = __lookup_pending_exception(snap, chunk); |
|---|
| 2226 | 2485 | if (!pe) { |
|---|
| 2227 | | - mutex_unlock(&snap->lock); |
|---|
| 2228 | | - pe = alloc_pending_exception(snap); |
|---|
| 2229 | | - mutex_lock(&snap->lock); |
|---|
| 2230 | | - |
|---|
| 2231 | | - if (!snap->valid) { |
|---|
| 2232 | | - free_pending_exception(pe); |
|---|
| 2233 | | - goto next_snapshot; |
|---|
| 2234 | | - } |
|---|
| 2235 | | - |
|---|
| 2486 | + /* |
|---|
| 2487 | + * Check exception table to see if block is already |
|---|
| 2488 | + * remapped in this snapshot and trigger an exception |
|---|
| 2489 | + * if not. |
|---|
| 2490 | + */ |
|---|
| 2236 | 2491 | e = dm_lookup_exception(&snap->complete, chunk); |
|---|
| 2237 | | - if (e) { |
|---|
| 2238 | | - free_pending_exception(pe); |
|---|
| 2492 | + if (e) |
|---|
| 2239 | 2493 | goto next_snapshot; |
|---|
| 2240 | | - } |
|---|
| 2241 | 2494 | |
|---|
| 2242 | | - pe = __find_pending_exception(snap, pe, chunk); |
|---|
| 2243 | | - if (!pe) { |
|---|
| 2244 | | - __invalidate_snapshot(snap, -ENOMEM); |
|---|
| 2245 | | - goto next_snapshot; |
|---|
| 2495 | + dm_exception_table_unlock(&lock); |
|---|
| 2496 | + pe = alloc_pending_exception(snap); |
|---|
| 2497 | + dm_exception_table_lock(&lock); |
|---|
| 2498 | + |
|---|
| 2499 | + pe2 = __lookup_pending_exception(snap, chunk); |
|---|
| 2500 | + |
|---|
| 2501 | + if (!pe2) { |
|---|
| 2502 | + e = dm_lookup_exception(&snap->complete, chunk); |
|---|
| 2503 | + if (e) { |
|---|
| 2504 | + free_pending_exception(pe); |
|---|
| 2505 | + goto next_snapshot; |
|---|
| 2506 | + } |
|---|
| 2507 | + |
|---|
| 2508 | + pe = __insert_pending_exception(snap, pe, chunk); |
|---|
| 2509 | + if (!pe) { |
|---|
| 2510 | + dm_exception_table_unlock(&lock); |
|---|
| 2511 | + up_read(&snap->lock); |
|---|
| 2512 | + |
|---|
| 2513 | + invalidate_snapshot(snap, -ENOMEM); |
|---|
| 2514 | + continue; |
|---|
| 2515 | + } |
|---|
| 2516 | + } else { |
|---|
| 2517 | + free_pending_exception(pe); |
|---|
| 2518 | + pe = pe2; |
|---|
| 2246 | 2519 | } |
|---|
| 2247 | 2520 | } |
|---|
| 2248 | 2521 | |
|---|
| .. | .. |
|---|
| 2269 | 2542 | } |
|---|
| 2270 | 2543 | |
|---|
| 2271 | 2544 | next_snapshot: |
|---|
| 2272 | | - mutex_unlock(&snap->lock); |
|---|
| 2545 | + dm_exception_table_unlock(&lock); |
|---|
| 2546 | + up_read(&snap->lock); |
|---|
| 2273 | 2547 | |
|---|
| 2274 | 2548 | if (pe_to_start_now) { |
|---|
| 2275 | 2549 | start_copy(pe_to_start_now); |
|---|
| .. | .. |
|---|
| 2423 | 2697 | return do_origin(o->dev, bio, true); |
|---|
| 2424 | 2698 | } |
|---|
| 2425 | 2699 | |
|---|
| 2426 | | -static long origin_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, |
|---|
| 2427 | | - long nr_pages, void **kaddr, pfn_t *pfn) |
|---|
| 2428 | | -{ |
|---|
| 2429 | | - DMWARN("device does not support dax."); |
|---|
| 2430 | | - return -EIO; |
|---|
| 2431 | | -} |
|---|
| 2432 | | - |
|---|
| 2433 | 2700 | /* |
|---|
| 2434 | 2701 | * Set the target "max_io_len" field to the minimum of all the snapshots' |
|---|
| 2435 | 2702 | * chunk sizes. |
|---|
| .. | .. |
|---|
| 2489 | 2756 | .postsuspend = origin_postsuspend, |
|---|
| 2490 | 2757 | .status = origin_status, |
|---|
| 2491 | 2758 | .iterate_devices = origin_iterate_devices, |
|---|
| 2492 | | - .direct_access = origin_dax_direct_access, |
|---|
| 2493 | 2759 | }; |
|---|
| 2494 | 2760 | |
|---|
| 2495 | 2761 | static struct target_type snapshot_target = { |
|---|
| 2496 | 2762 | .name = "snapshot", |
|---|
| 2497 | | - .version = {1, 15, 0}, |
|---|
| 2763 | + .version = {1, 16, 0}, |
|---|
| 2498 | 2764 | .module = THIS_MODULE, |
|---|
| 2499 | 2765 | .ctr = snapshot_ctr, |
|---|
| 2500 | 2766 | .dtr = snapshot_dtr, |
|---|
| .. | .. |
|---|
| 2504 | 2770 | .resume = snapshot_resume, |
|---|
| 2505 | 2771 | .status = snapshot_status, |
|---|
| 2506 | 2772 | .iterate_devices = snapshot_iterate_devices, |
|---|
| 2773 | + .io_hints = snapshot_io_hints, |
|---|
| 2507 | 2774 | }; |
|---|
| 2508 | 2775 | |
|---|
| 2509 | 2776 | static struct target_type merge_target = { |
|---|
| 2510 | 2777 | .name = dm_snapshot_merge_target_name, |
|---|
| 2511 | | - .version = {1, 4, 0}, |
|---|
| 2778 | + .version = {1, 5, 0}, |
|---|
| 2512 | 2779 | .module = THIS_MODULE, |
|---|
| 2513 | 2780 | .ctr = snapshot_ctr, |
|---|
| 2514 | 2781 | .dtr = snapshot_dtr, |
|---|
| .. | .. |
|---|
| 2519 | 2786 | .resume = snapshot_merge_resume, |
|---|
| 2520 | 2787 | .status = snapshot_status, |
|---|
| 2521 | 2788 | .iterate_devices = snapshot_iterate_devices, |
|---|
| 2789 | + .io_hints = snapshot_io_hints, |
|---|
| 2522 | 2790 | }; |
|---|
| 2523 | 2791 | |
|---|
| 2524 | 2792 | static int __init dm_snapshot_init(void) |
|---|