.. | .. |
---|
1 | 1 | /* |
---|
2 | | - * dm-snapshot.c |
---|
3 | | - * |
---|
4 | 2 | * Copyright (C) 2001-2002 Sistina Software (UK) Limited. |
---|
5 | 3 | * |
---|
6 | 4 | * This file is released under the GPL. |
---|
.. | .. |
---|
13 | 11 | #include <linux/init.h> |
---|
14 | 12 | #include <linux/kdev_t.h> |
---|
15 | 13 | #include <linux/list.h> |
---|
| 14 | +#include <linux/list_bl.h> |
---|
16 | 15 | #include <linux/mempool.h> |
---|
17 | 16 | #include <linux/module.h> |
---|
18 | 17 | #include <linux/slab.h> |
---|
.. | .. |
---|
43 | 42 | struct dm_exception_table { |
---|
44 | 43 | uint32_t hash_mask; |
---|
45 | 44 | unsigned hash_shift; |
---|
46 | | - struct list_head *table; |
---|
| 45 | + struct hlist_bl_head *table; |
---|
47 | 46 | }; |
---|
48 | 47 | |
---|
49 | 48 | struct dm_snapshot { |
---|
50 | | - struct mutex lock; |
---|
| 49 | + struct rw_semaphore lock; |
---|
51 | 50 | |
---|
52 | 51 | struct dm_dev *origin; |
---|
53 | 52 | struct dm_dev *cow; |
---|
.. | .. |
---|
75 | 74 | |
---|
76 | 75 | atomic_t pending_exceptions_count; |
---|
77 | 76 | |
---|
78 | | - /* Protected by "lock" */ |
---|
| 77 | + spinlock_t pe_allocation_lock; |
---|
| 78 | + |
---|
| 79 | + /* Protected by "pe_allocation_lock" */ |
---|
79 | 80 | sector_t exception_start_sequence; |
---|
80 | 81 | |
---|
81 | 82 | /* Protected by kcopyd single-threaded callback */ |
---|
.. | .. |
---|
130 | 131 | * - I/O error while merging |
---|
131 | 132 | * => stop merging; set merge_failed; process I/O normally. |
---|
132 | 133 | */ |
---|
133 | | - int merge_failed; |
---|
| 134 | + bool merge_failed:1; |
---|
| 135 | + |
---|
| 136 | + bool discard_zeroes_cow:1; |
---|
| 137 | + bool discard_passdown_origin:1; |
---|
134 | 138 | |
---|
135 | 139 | /* |
---|
136 | 140 | * Incoming bios that overlap with chunks being merged must wait |
---|
.. | .. |
---|
461 | 465 | if (!bdev_equal(s->cow->bdev, snap->cow->bdev)) |
---|
462 | 466 | continue; |
---|
463 | 467 | |
---|
464 | | - mutex_lock(&s->lock); |
---|
| 468 | + down_read(&s->lock); |
---|
465 | 469 | active = s->active; |
---|
466 | | - mutex_unlock(&s->lock); |
---|
| 470 | + up_read(&s->lock); |
---|
467 | 471 | |
---|
468 | 472 | if (active) { |
---|
469 | 473 | if (snap_src) |
---|
.. | .. |
---|
622 | 626 | * The lowest hash_shift bits of the chunk number are ignored, allowing |
---|
623 | 627 | * some consecutive chunks to be grouped together. |
---|
624 | 628 | */ |
---|
| 629 | +static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk); |
---|
| 630 | + |
---|
| 631 | +/* Lock to protect access to the completed and pending exception hash tables. */ |
---|
| 632 | +struct dm_exception_table_lock { |
---|
| 633 | + struct hlist_bl_head *complete_slot; |
---|
| 634 | + struct hlist_bl_head *pending_slot; |
---|
| 635 | +}; |
---|
| 636 | + |
---|
| 637 | +static void dm_exception_table_lock_init(struct dm_snapshot *s, chunk_t chunk, |
---|
| 638 | + struct dm_exception_table_lock *lock) |
---|
| 639 | +{ |
---|
| 640 | + struct dm_exception_table *complete = &s->complete; |
---|
| 641 | + struct dm_exception_table *pending = &s->pending; |
---|
| 642 | + |
---|
| 643 | + lock->complete_slot = &complete->table[exception_hash(complete, chunk)]; |
---|
| 644 | + lock->pending_slot = &pending->table[exception_hash(pending, chunk)]; |
---|
| 645 | +} |
---|
| 646 | + |
---|
| 647 | +static void dm_exception_table_lock(struct dm_exception_table_lock *lock) |
---|
| 648 | +{ |
---|
| 649 | + hlist_bl_lock(lock->complete_slot); |
---|
| 650 | + hlist_bl_lock(lock->pending_slot); |
---|
| 651 | +} |
---|
| 652 | + |
---|
| 653 | +static void dm_exception_table_unlock(struct dm_exception_table_lock *lock) |
---|
| 654 | +{ |
---|
| 655 | + hlist_bl_unlock(lock->pending_slot); |
---|
| 656 | + hlist_bl_unlock(lock->complete_slot); |
---|
| 657 | +} |
---|
| 658 | + |
---|
625 | 659 | static int dm_exception_table_init(struct dm_exception_table *et, |
---|
626 | 660 | uint32_t size, unsigned hash_shift) |
---|
627 | 661 | { |
---|
.. | .. |
---|
629 | 663 | |
---|
630 | 664 | et->hash_shift = hash_shift; |
---|
631 | 665 | et->hash_mask = size - 1; |
---|
632 | | - et->table = dm_vcalloc(size, sizeof(struct list_head)); |
---|
| 666 | + et->table = dm_vcalloc(size, sizeof(struct hlist_bl_head)); |
---|
633 | 667 | if (!et->table) |
---|
634 | 668 | return -ENOMEM; |
---|
635 | 669 | |
---|
636 | 670 | for (i = 0; i < size; i++) |
---|
637 | | - INIT_LIST_HEAD(et->table + i); |
---|
| 671 | + INIT_HLIST_BL_HEAD(et->table + i); |
---|
638 | 672 | |
---|
639 | 673 | return 0; |
---|
640 | 674 | } |
---|
.. | .. |
---|
642 | 676 | static void dm_exception_table_exit(struct dm_exception_table *et, |
---|
643 | 677 | struct kmem_cache *mem) |
---|
644 | 678 | { |
---|
645 | | - struct list_head *slot; |
---|
646 | | - struct dm_exception *ex, *next; |
---|
| 679 | + struct hlist_bl_head *slot; |
---|
| 680 | + struct dm_exception *ex; |
---|
| 681 | + struct hlist_bl_node *pos, *n; |
---|
647 | 682 | int i, size; |
---|
648 | 683 | |
---|
649 | 684 | size = et->hash_mask + 1; |
---|
650 | 685 | for (i = 0; i < size; i++) { |
---|
651 | 686 | slot = et->table + i; |
---|
652 | 687 | |
---|
653 | | - list_for_each_entry_safe (ex, next, slot, hash_list) |
---|
| 688 | + hlist_bl_for_each_entry_safe(ex, pos, n, slot, hash_list) |
---|
654 | 689 | kmem_cache_free(mem, ex); |
---|
655 | 690 | } |
---|
656 | 691 | |
---|
.. | .. |
---|
664 | 699 | |
---|
665 | 700 | static void dm_remove_exception(struct dm_exception *e) |
---|
666 | 701 | { |
---|
667 | | - list_del(&e->hash_list); |
---|
| 702 | + hlist_bl_del(&e->hash_list); |
---|
668 | 703 | } |
---|
669 | 704 | |
---|
670 | 705 | /* |
---|
.. | .. |
---|
674 | 709 | static struct dm_exception *dm_lookup_exception(struct dm_exception_table *et, |
---|
675 | 710 | chunk_t chunk) |
---|
676 | 711 | { |
---|
677 | | - struct list_head *slot; |
---|
| 712 | + struct hlist_bl_head *slot; |
---|
| 713 | + struct hlist_bl_node *pos; |
---|
678 | 714 | struct dm_exception *e; |
---|
679 | 715 | |
---|
680 | 716 | slot = &et->table[exception_hash(et, chunk)]; |
---|
681 | | - list_for_each_entry (e, slot, hash_list) |
---|
| 717 | + hlist_bl_for_each_entry(e, pos, slot, hash_list) |
---|
682 | 718 | if (chunk >= e->old_chunk && |
---|
683 | 719 | chunk <= e->old_chunk + dm_consecutive_chunk_count(e)) |
---|
684 | 720 | return e; |
---|
.. | .. |
---|
725 | 761 | static void dm_insert_exception(struct dm_exception_table *eh, |
---|
726 | 762 | struct dm_exception *new_e) |
---|
727 | 763 | { |
---|
728 | | - struct list_head *l; |
---|
| 764 | + struct hlist_bl_head *l; |
---|
| 765 | + struct hlist_bl_node *pos; |
---|
729 | 766 | struct dm_exception *e = NULL; |
---|
730 | 767 | |
---|
731 | 768 | l = &eh->table[exception_hash(eh, new_e->old_chunk)]; |
---|
.. | .. |
---|
735 | 772 | goto out; |
---|
736 | 773 | |
---|
737 | 774 | /* List is ordered by old_chunk */ |
---|
738 | | - list_for_each_entry_reverse(e, l, hash_list) { |
---|
| 775 | + hlist_bl_for_each_entry(e, pos, l, hash_list) { |
---|
739 | 776 | /* Insert after an existing chunk? */ |
---|
740 | 777 | if (new_e->old_chunk == (e->old_chunk + |
---|
741 | 778 | dm_consecutive_chunk_count(e) + 1) && |
---|
.. | .. |
---|
756 | 793 | return; |
---|
757 | 794 | } |
---|
758 | 795 | |
---|
759 | | - if (new_e->old_chunk > e->old_chunk) |
---|
| 796 | + if (new_e->old_chunk < e->old_chunk) |
---|
760 | 797 | break; |
---|
761 | 798 | } |
---|
762 | 799 | |
---|
763 | 800 | out: |
---|
764 | | - list_add(&new_e->hash_list, e ? &e->hash_list : l); |
---|
| 801 | + if (!e) { |
---|
| 802 | + /* |
---|
| 803 | + * Either the table doesn't support consecutive chunks or slot |
---|
| 804 | + * l is empty. |
---|
| 805 | + */ |
---|
| 806 | + hlist_bl_add_head(&new_e->hash_list, l); |
---|
| 807 | + } else if (new_e->old_chunk < e->old_chunk) { |
---|
| 808 | + /* Add before an existing exception */ |
---|
| 809 | + hlist_bl_add_before(&new_e->hash_list, &e->hash_list); |
---|
| 810 | + } else { |
---|
| 811 | + /* Add to l's tail: e is the last exception in this slot */ |
---|
| 812 | + hlist_bl_add_behind(&new_e->hash_list, &e->hash_list); |
---|
| 813 | + } |
---|
765 | 814 | } |
---|
766 | 815 | |
---|
767 | 816 | /* |
---|
.. | .. |
---|
770 | 819 | */ |
---|
771 | 820 | static int dm_add_exception(void *context, chunk_t old, chunk_t new) |
---|
772 | 821 | { |
---|
| 822 | + struct dm_exception_table_lock lock; |
---|
773 | 823 | struct dm_snapshot *s = context; |
---|
774 | 824 | struct dm_exception *e; |
---|
775 | 825 | |
---|
.. | .. |
---|
782 | 832 | /* Consecutive_count is implicitly initialised to zero */ |
---|
783 | 833 | e->new_chunk = new; |
---|
784 | 834 | |
---|
| 835 | + /* |
---|
| 836 | + * Although there is no need to lock access to the exception tables |
---|
| 837 | + * here, if we don't then hlist_bl_add_head(), called by |
---|
| 838 | + * dm_insert_exception(), will complain about accessing the |
---|
| 839 | + * corresponding list without locking it first. |
---|
| 840 | + */ |
---|
| 841 | + dm_exception_table_lock_init(s, old, &lock); |
---|
| 842 | + |
---|
| 843 | + dm_exception_table_lock(&lock); |
---|
785 | 844 | dm_insert_exception(&s->complete, e); |
---|
| 845 | + dm_exception_table_unlock(&lock); |
---|
786 | 846 | |
---|
787 | 847 | return 0; |
---|
788 | 848 | } |
---|
.. | .. |
---|
811 | 871 | { |
---|
812 | 872 | /* use a fixed size of 2MB */ |
---|
813 | 873 | unsigned long mem = 2 * 1024 * 1024; |
---|
814 | | - mem /= sizeof(struct list_head); |
---|
| 874 | + mem /= sizeof(struct hlist_bl_head); |
---|
815 | 875 | |
---|
816 | 876 | return mem; |
---|
817 | 877 | } |
---|
.. | .. |
---|
931 | 991 | int r; |
---|
932 | 992 | chunk_t old_chunk = s->first_merging_chunk + s->num_merging_chunks - 1; |
---|
933 | 993 | |
---|
934 | | - mutex_lock(&s->lock); |
---|
| 994 | + down_write(&s->lock); |
---|
935 | 995 | |
---|
936 | 996 | /* |
---|
937 | 997 | * Process chunks (and associated exceptions) in reverse order |
---|
.. | .. |
---|
946 | 1006 | b = __release_queued_bios_after_merge(s); |
---|
947 | 1007 | |
---|
948 | 1008 | out: |
---|
949 | | - mutex_unlock(&s->lock); |
---|
| 1009 | + up_write(&s->lock); |
---|
950 | 1010 | if (b) |
---|
951 | 1011 | flush_bios(b); |
---|
952 | 1012 | |
---|
.. | .. |
---|
1005 | 1065 | if (linear_chunks < 0) { |
---|
1006 | 1066 | DMERR("Read error in exception store: " |
---|
1007 | 1067 | "shutting down merge"); |
---|
1008 | | - mutex_lock(&s->lock); |
---|
1009 | | - s->merge_failed = 1; |
---|
1010 | | - mutex_unlock(&s->lock); |
---|
| 1068 | + down_write(&s->lock); |
---|
| 1069 | + s->merge_failed = true; |
---|
| 1070 | + up_write(&s->lock); |
---|
1011 | 1071 | } |
---|
1012 | 1072 | goto shut; |
---|
1013 | 1073 | } |
---|
.. | .. |
---|
1048 | 1108 | previous_count = read_pending_exceptions_done_count(); |
---|
1049 | 1109 | } |
---|
1050 | 1110 | |
---|
1051 | | - mutex_lock(&s->lock); |
---|
| 1111 | + down_write(&s->lock); |
---|
1052 | 1112 | s->first_merging_chunk = old_chunk; |
---|
1053 | 1113 | s->num_merging_chunks = linear_chunks; |
---|
1054 | | - mutex_unlock(&s->lock); |
---|
| 1114 | + up_write(&s->lock); |
---|
1055 | 1115 | |
---|
1056 | 1116 | /* Wait until writes to all 'linear_chunks' drain */ |
---|
1057 | 1117 | for (i = 0; i < linear_chunks; i++) |
---|
1058 | 1118 | __check_for_conflicting_io(s, old_chunk + i); |
---|
1059 | 1119 | |
---|
1060 | | - dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, merge_callback, s); |
---|
| 1120 | + dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 1 << DM_KCOPYD_SNAP_MERGE, |
---|
| 1121 | + merge_callback, s); |
---|
1061 | 1122 | return; |
---|
1062 | 1123 | |
---|
1063 | 1124 | shut: |
---|
.. | .. |
---|
1109 | 1170 | return; |
---|
1110 | 1171 | |
---|
1111 | 1172 | shut: |
---|
1112 | | - mutex_lock(&s->lock); |
---|
1113 | | - s->merge_failed = 1; |
---|
| 1173 | + down_write(&s->lock); |
---|
| 1174 | + s->merge_failed = true; |
---|
1114 | 1175 | b = __release_queued_bios_after_merge(s); |
---|
1115 | | - mutex_unlock(&s->lock); |
---|
| 1176 | + up_write(&s->lock); |
---|
1116 | 1177 | error_bios(b); |
---|
1117 | 1178 | |
---|
1118 | 1179 | merge_shutdown(s); |
---|
.. | .. |
---|
1134 | 1195 | clear_bit(SHUTDOWN_MERGE, &s->state_bits); |
---|
1135 | 1196 | } |
---|
1136 | 1197 | |
---|
| 1198 | +static int parse_snapshot_features(struct dm_arg_set *as, struct dm_snapshot *s, |
---|
| 1199 | + struct dm_target *ti) |
---|
| 1200 | +{ |
---|
| 1201 | + int r; |
---|
| 1202 | + unsigned argc; |
---|
| 1203 | + const char *arg_name; |
---|
| 1204 | + |
---|
| 1205 | + static const struct dm_arg _args[] = { |
---|
| 1206 | + {0, 2, "Invalid number of feature arguments"}, |
---|
| 1207 | + }; |
---|
| 1208 | + |
---|
| 1209 | + /* |
---|
| 1210 | + * No feature arguments supplied. |
---|
| 1211 | + */ |
---|
| 1212 | + if (!as->argc) |
---|
| 1213 | + return 0; |
---|
| 1214 | + |
---|
| 1215 | + r = dm_read_arg_group(_args, as, &argc, &ti->error); |
---|
| 1216 | + if (r) |
---|
| 1217 | + return -EINVAL; |
---|
| 1218 | + |
---|
| 1219 | + while (argc && !r) { |
---|
| 1220 | + arg_name = dm_shift_arg(as); |
---|
| 1221 | + argc--; |
---|
| 1222 | + |
---|
| 1223 | + if (!strcasecmp(arg_name, "discard_zeroes_cow")) |
---|
| 1224 | + s->discard_zeroes_cow = true; |
---|
| 1225 | + |
---|
| 1226 | + else if (!strcasecmp(arg_name, "discard_passdown_origin")) |
---|
| 1227 | + s->discard_passdown_origin = true; |
---|
| 1228 | + |
---|
| 1229 | + else { |
---|
| 1230 | + ti->error = "Unrecognised feature requested"; |
---|
| 1231 | + r = -EINVAL; |
---|
| 1232 | + break; |
---|
| 1233 | + } |
---|
| 1234 | + } |
---|
| 1235 | + |
---|
| 1236 | + if (!s->discard_zeroes_cow && s->discard_passdown_origin) { |
---|
| 1237 | + /* |
---|
| 1238 | + * TODO: really these are disjoint.. but ti->num_discard_bios |
---|
| 1239 | + * and dm_bio_get_target_bio_nr() require rigid constraints. |
---|
| 1240 | + */ |
---|
| 1241 | + ti->error = "discard_passdown_origin feature depends on discard_zeroes_cow"; |
---|
| 1242 | + r = -EINVAL; |
---|
| 1243 | + } |
---|
| 1244 | + |
---|
| 1245 | + return r; |
---|
| 1246 | +} |
---|
| 1247 | + |
---|
1137 | 1248 | /* |
---|
1138 | | - * Construct a snapshot mapping: <origin_dev> <COW-dev> <p|po|n> <chunk-size> |
---|
| 1249 | + * Construct a snapshot mapping: |
---|
| 1250 | + * <origin_dev> <COW-dev> <p|po|n> <chunk-size> [<# feature args> [<arg>]*] |
---|
1139 | 1251 | */ |
---|
1140 | 1252 | static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) |
---|
1141 | 1253 | { |
---|
1142 | 1254 | struct dm_snapshot *s; |
---|
| 1255 | + struct dm_arg_set as; |
---|
1143 | 1256 | int i; |
---|
1144 | 1257 | int r = -EINVAL; |
---|
1145 | 1258 | char *origin_path, *cow_path; |
---|
.. | .. |
---|
1147 | 1260 | unsigned args_used, num_flush_bios = 1; |
---|
1148 | 1261 | fmode_t origin_mode = FMODE_READ; |
---|
1149 | 1262 | |
---|
1150 | | - if (argc != 4) { |
---|
1151 | | - ti->error = "requires exactly 4 arguments"; |
---|
| 1263 | + if (argc < 4) { |
---|
| 1264 | + ti->error = "requires 4 or more arguments"; |
---|
1152 | 1265 | r = -EINVAL; |
---|
1153 | 1266 | goto bad; |
---|
1154 | 1267 | } |
---|
.. | .. |
---|
1164 | 1277 | r = -ENOMEM; |
---|
1165 | 1278 | goto bad; |
---|
1166 | 1279 | } |
---|
| 1280 | + |
---|
| 1281 | + as.argc = argc; |
---|
| 1282 | + as.argv = argv; |
---|
| 1283 | + dm_consume_args(&as, 4); |
---|
| 1284 | + r = parse_snapshot_features(&as, s, ti); |
---|
| 1285 | + if (r) |
---|
| 1286 | + goto bad_features; |
---|
1167 | 1287 | |
---|
1168 | 1288 | origin_path = argv[0]; |
---|
1169 | 1289 | argv++; |
---|
.. | .. |
---|
1208 | 1328 | s->snapshot_overflowed = 0; |
---|
1209 | 1329 | s->active = 0; |
---|
1210 | 1330 | atomic_set(&s->pending_exceptions_count, 0); |
---|
| 1331 | + spin_lock_init(&s->pe_allocation_lock); |
---|
1211 | 1332 | s->exception_start_sequence = 0; |
---|
1212 | 1333 | s->exception_complete_sequence = 0; |
---|
1213 | 1334 | s->out_of_order_tree = RB_ROOT; |
---|
1214 | | - mutex_init(&s->lock); |
---|
| 1335 | + init_rwsem(&s->lock); |
---|
1215 | 1336 | INIT_LIST_HEAD(&s->list); |
---|
1216 | 1337 | spin_lock_init(&s->pe_lock); |
---|
1217 | 1338 | s->state_bits = 0; |
---|
1218 | | - s->merge_failed = 0; |
---|
| 1339 | + s->merge_failed = false; |
---|
1219 | 1340 | s->first_merging_chunk = 0; |
---|
1220 | 1341 | s->num_merging_chunks = 0; |
---|
1221 | 1342 | bio_list_init(&s->bios_queued_during_merge); |
---|
.. | .. |
---|
1250 | 1371 | |
---|
1251 | 1372 | ti->private = s; |
---|
1252 | 1373 | ti->num_flush_bios = num_flush_bios; |
---|
| 1374 | + if (s->discard_zeroes_cow) |
---|
| 1375 | + ti->num_discard_bios = (s->discard_passdown_origin ? 2 : 1); |
---|
1253 | 1376 | ti->per_io_data_size = sizeof(struct dm_snap_tracked_chunk); |
---|
1254 | 1377 | |
---|
1255 | 1378 | /* Add snapshot to the list of snapshots for this origin */ |
---|
.. | .. |
---|
1298 | 1421 | |
---|
1299 | 1422 | bad_read_metadata: |
---|
1300 | 1423 | unregister_snapshot(s); |
---|
1301 | | - |
---|
1302 | 1424 | bad_load_and_register: |
---|
1303 | 1425 | mempool_exit(&s->pending_pool); |
---|
1304 | | - |
---|
1305 | 1426 | bad_pending_pool: |
---|
1306 | 1427 | dm_kcopyd_client_destroy(s->kcopyd_client); |
---|
1307 | | - |
---|
1308 | 1428 | bad_kcopyd: |
---|
1309 | 1429 | dm_exception_table_exit(&s->pending, pending_cache); |
---|
1310 | 1430 | dm_exception_table_exit(&s->complete, exception_cache); |
---|
1311 | | - |
---|
1312 | 1431 | bad_hash_tables: |
---|
1313 | 1432 | dm_exception_store_destroy(s->store); |
---|
1314 | | - |
---|
1315 | 1433 | bad_store: |
---|
1316 | 1434 | dm_put_device(ti, s->cow); |
---|
1317 | | - |
---|
1318 | 1435 | bad_cow: |
---|
1319 | 1436 | dm_put_device(ti, s->origin); |
---|
1320 | | - |
---|
1321 | 1437 | bad_origin: |
---|
| 1438 | +bad_features: |
---|
1322 | 1439 | kfree(s); |
---|
1323 | | - |
---|
1324 | 1440 | bad: |
---|
1325 | 1441 | return r; |
---|
1326 | 1442 | } |
---|
.. | .. |
---|
1379 | 1495 | /* Check whether exception handover must be cancelled */ |
---|
1380 | 1496 | (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); |
---|
1381 | 1497 | if (snap_src && snap_dest && (s == snap_src)) { |
---|
1382 | | - mutex_lock(&snap_dest->lock); |
---|
| 1498 | + down_write(&snap_dest->lock); |
---|
1383 | 1499 | snap_dest->valid = 0; |
---|
1384 | | - mutex_unlock(&snap_dest->lock); |
---|
| 1500 | + up_write(&snap_dest->lock); |
---|
1385 | 1501 | DMERR("Cancelling snapshot handover."); |
---|
1386 | 1502 | } |
---|
1387 | 1503 | up_read(&_origins_lock); |
---|
.. | .. |
---|
1411 | 1527 | mempool_exit(&s->pending_pool); |
---|
1412 | 1528 | |
---|
1413 | 1529 | dm_exception_store_destroy(s->store); |
---|
1414 | | - |
---|
1415 | | - mutex_destroy(&s->lock); |
---|
1416 | 1530 | |
---|
1417 | 1531 | bio_uninit(&s->flush_bio); |
---|
1418 | 1532 | |
---|
.. | .. |
---|
1480 | 1594 | while (bio) { |
---|
1481 | 1595 | n = bio->bi_next; |
---|
1482 | 1596 | bio->bi_next = NULL; |
---|
1483 | | - generic_make_request(bio); |
---|
| 1597 | + submit_bio_noacct(bio); |
---|
1484 | 1598 | bio = n; |
---|
1485 | 1599 | } |
---|
1486 | 1600 | } |
---|
.. | .. |
---|
1500 | 1614 | bio->bi_next = NULL; |
---|
1501 | 1615 | r = do_origin(s->origin, bio, false); |
---|
1502 | 1616 | if (r == DM_MAPIO_REMAPPED) |
---|
1503 | | - generic_make_request(bio); |
---|
| 1617 | + submit_bio_noacct(bio); |
---|
1504 | 1618 | bio = n; |
---|
1505 | 1619 | } |
---|
1506 | 1620 | } |
---|
.. | .. |
---|
1538 | 1652 | dm_table_event(s->ti->table); |
---|
1539 | 1653 | } |
---|
1540 | 1654 | |
---|
| 1655 | +static void invalidate_snapshot(struct dm_snapshot *s, int err) |
---|
| 1656 | +{ |
---|
| 1657 | + down_write(&s->lock); |
---|
| 1658 | + __invalidate_snapshot(s, err); |
---|
| 1659 | + up_write(&s->lock); |
---|
| 1660 | +} |
---|
| 1661 | + |
---|
1541 | 1662 | static void pending_complete(void *context, int success) |
---|
1542 | 1663 | { |
---|
1543 | 1664 | struct dm_snap_pending_exception *pe = context; |
---|
.. | .. |
---|
1546 | 1667 | struct bio *origin_bios = NULL; |
---|
1547 | 1668 | struct bio *snapshot_bios = NULL; |
---|
1548 | 1669 | struct bio *full_bio = NULL; |
---|
| 1670 | + struct dm_exception_table_lock lock; |
---|
1549 | 1671 | int error = 0; |
---|
| 1672 | + |
---|
| 1673 | + dm_exception_table_lock_init(s, pe->e.old_chunk, &lock); |
---|
1550 | 1674 | |
---|
1551 | 1675 | if (!success) { |
---|
1552 | 1676 | /* Read/write error - snapshot is unusable */ |
---|
1553 | | - mutex_lock(&s->lock); |
---|
1554 | | - __invalidate_snapshot(s, -EIO); |
---|
| 1677 | + invalidate_snapshot(s, -EIO); |
---|
1555 | 1678 | error = 1; |
---|
| 1679 | + |
---|
| 1680 | + dm_exception_table_lock(&lock); |
---|
1556 | 1681 | goto out; |
---|
1557 | 1682 | } |
---|
1558 | 1683 | |
---|
1559 | 1684 | e = alloc_completed_exception(GFP_NOIO); |
---|
1560 | 1685 | if (!e) { |
---|
1561 | | - mutex_lock(&s->lock); |
---|
1562 | | - __invalidate_snapshot(s, -ENOMEM); |
---|
| 1686 | + invalidate_snapshot(s, -ENOMEM); |
---|
1563 | 1687 | error = 1; |
---|
| 1688 | + |
---|
| 1689 | + dm_exception_table_lock(&lock); |
---|
1564 | 1690 | goto out; |
---|
1565 | 1691 | } |
---|
1566 | 1692 | *e = pe->e; |
---|
1567 | 1693 | |
---|
1568 | | - mutex_lock(&s->lock); |
---|
| 1694 | + down_read(&s->lock); |
---|
| 1695 | + dm_exception_table_lock(&lock); |
---|
1569 | 1696 | if (!s->valid) { |
---|
| 1697 | + up_read(&s->lock); |
---|
1570 | 1698 | free_completed_exception(e); |
---|
1571 | 1699 | error = 1; |
---|
| 1700 | + |
---|
1572 | 1701 | goto out; |
---|
1573 | 1702 | } |
---|
1574 | 1703 | |
---|
1575 | | - /* Check for conflicting reads */ |
---|
1576 | | - __check_for_conflicting_io(s, pe->e.old_chunk); |
---|
1577 | | - |
---|
1578 | 1704 | /* |
---|
1579 | | - * Add a proper exception, and remove the |
---|
1580 | | - * in-flight exception from the list. |
---|
| 1705 | + * Add a proper exception. After inserting the completed exception all |
---|
| 1706 | + * subsequent snapshot reads to this chunk will be redirected to the |
---|
| 1707 | + * COW device. This ensures that we do not starve. Moreover, as long |
---|
| 1708 | + * as the pending exception exists, neither origin writes nor snapshot |
---|
| 1709 | + * merging can overwrite the chunk in origin. |
---|
1581 | 1710 | */ |
---|
1582 | 1711 | dm_insert_exception(&s->complete, e); |
---|
| 1712 | + up_read(&s->lock); |
---|
| 1713 | + |
---|
| 1714 | + /* Wait for conflicting reads to drain */ |
---|
| 1715 | + if (__chunk_is_tracked(s, pe->e.old_chunk)) { |
---|
| 1716 | + dm_exception_table_unlock(&lock); |
---|
| 1717 | + __check_for_conflicting_io(s, pe->e.old_chunk); |
---|
| 1718 | + dm_exception_table_lock(&lock); |
---|
| 1719 | + } |
---|
1583 | 1720 | |
---|
1584 | 1721 | out: |
---|
| 1722 | + /* Remove the in-flight exception from the list */ |
---|
1585 | 1723 | dm_remove_exception(&pe->e); |
---|
| 1724 | + |
---|
| 1725 | + dm_exception_table_unlock(&lock); |
---|
| 1726 | + |
---|
1586 | 1727 | snapshot_bios = bio_list_get(&pe->snapshot_bios); |
---|
1587 | 1728 | origin_bios = bio_list_get(&pe->origin_bios); |
---|
1588 | 1729 | full_bio = pe->full_bio; |
---|
1589 | 1730 | if (full_bio) |
---|
1590 | 1731 | full_bio->bi_end_io = pe->full_bio_end_io; |
---|
1591 | 1732 | increment_pending_exceptions_done_count(); |
---|
1592 | | - |
---|
1593 | | - mutex_unlock(&s->lock); |
---|
1594 | 1733 | |
---|
1595 | 1734 | /* Submit any pending write bios */ |
---|
1596 | 1735 | if (error) { |
---|
.. | .. |
---|
1716 | 1855 | bio->bi_end_io = full_bio_end_io; |
---|
1717 | 1856 | bio->bi_private = callback_data; |
---|
1718 | 1857 | |
---|
1719 | | - generic_make_request(bio); |
---|
| 1858 | + submit_bio_noacct(bio); |
---|
1720 | 1859 | } |
---|
1721 | 1860 | |
---|
1722 | 1861 | static struct dm_snap_pending_exception * |
---|
.. | .. |
---|
1731 | 1870 | } |
---|
1732 | 1871 | |
---|
1733 | 1872 | /* |
---|
| 1873 | + * Inserts a pending exception into the pending table. |
---|
| 1874 | + * |
---|
| 1875 | + * NOTE: a write lock must be held on the chunk's pending exception table slot |
---|
| 1876 | + * before calling this. |
---|
| 1877 | + */ |
---|
| 1878 | +static struct dm_snap_pending_exception * |
---|
| 1879 | +__insert_pending_exception(struct dm_snapshot *s, |
---|
| 1880 | + struct dm_snap_pending_exception *pe, chunk_t chunk) |
---|
| 1881 | +{ |
---|
| 1882 | + pe->e.old_chunk = chunk; |
---|
| 1883 | + bio_list_init(&pe->origin_bios); |
---|
| 1884 | + bio_list_init(&pe->snapshot_bios); |
---|
| 1885 | + pe->started = 0; |
---|
| 1886 | + pe->full_bio = NULL; |
---|
| 1887 | + |
---|
| 1888 | + spin_lock(&s->pe_allocation_lock); |
---|
| 1889 | + if (s->store->type->prepare_exception(s->store, &pe->e)) { |
---|
| 1890 | + spin_unlock(&s->pe_allocation_lock); |
---|
| 1891 | + free_pending_exception(pe); |
---|
| 1892 | + return NULL; |
---|
| 1893 | + } |
---|
| 1894 | + |
---|
| 1895 | + pe->exception_sequence = s->exception_start_sequence++; |
---|
| 1896 | + spin_unlock(&s->pe_allocation_lock); |
---|
| 1897 | + |
---|
| 1898 | + dm_insert_exception(&s->pending, &pe->e); |
---|
| 1899 | + |
---|
| 1900 | + return pe; |
---|
| 1901 | +} |
---|
| 1902 | + |
---|
| 1903 | +/* |
---|
1734 | 1904 | * Looks to see if this snapshot already has a pending exception |
---|
1735 | 1905 | * for this chunk, otherwise it allocates a new one and inserts |
---|
1736 | 1906 | * it into the pending table. |
---|
1737 | 1907 | * |
---|
1738 | | - * NOTE: a write lock must be held on snap->lock before calling |
---|
1739 | | - * this. |
---|
| 1908 | + * NOTE: a write lock must be held on the chunk's pending exception table slot |
---|
| 1909 | + * before calling this. |
---|
1740 | 1910 | */ |
---|
1741 | 1911 | static struct dm_snap_pending_exception * |
---|
1742 | 1912 | __find_pending_exception(struct dm_snapshot *s, |
---|
.. | .. |
---|
1750 | 1920 | return pe2; |
---|
1751 | 1921 | } |
---|
1752 | 1922 | |
---|
1753 | | - pe->e.old_chunk = chunk; |
---|
1754 | | - bio_list_init(&pe->origin_bios); |
---|
1755 | | - bio_list_init(&pe->snapshot_bios); |
---|
1756 | | - pe->started = 0; |
---|
1757 | | - pe->full_bio = NULL; |
---|
1758 | | - |
---|
1759 | | - if (s->store->type->prepare_exception(s->store, &pe->e)) { |
---|
1760 | | - free_pending_exception(pe); |
---|
1761 | | - return NULL; |
---|
1762 | | - } |
---|
1763 | | - |
---|
1764 | | - pe->exception_sequence = s->exception_start_sequence++; |
---|
1765 | | - |
---|
1766 | | - dm_insert_exception(&s->pending, &pe->e); |
---|
1767 | | - |
---|
1768 | | - return pe; |
---|
| 1923 | + return __insert_pending_exception(s, pe, chunk); |
---|
1769 | 1924 | } |
---|
1770 | 1925 | |
---|
1771 | 1926 | static void remap_exception(struct dm_snapshot *s, struct dm_exception *e, |
---|
.. | .. |
---|
1778 | 1933 | (bio->bi_iter.bi_sector & s->store->chunk_mask); |
---|
1779 | 1934 | } |
---|
1780 | 1935 | |
---|
| 1936 | +static void zero_callback(int read_err, unsigned long write_err, void *context) |
---|
| 1937 | +{ |
---|
| 1938 | + struct bio *bio = context; |
---|
| 1939 | + struct dm_snapshot *s = bio->bi_private; |
---|
| 1940 | + |
---|
| 1941 | + account_end_copy(s); |
---|
| 1942 | + bio->bi_status = write_err ? BLK_STS_IOERR : 0; |
---|
| 1943 | + bio_endio(bio); |
---|
| 1944 | +} |
---|
| 1945 | + |
---|
| 1946 | +static void zero_exception(struct dm_snapshot *s, struct dm_exception *e, |
---|
| 1947 | + struct bio *bio, chunk_t chunk) |
---|
| 1948 | +{ |
---|
| 1949 | + struct dm_io_region dest; |
---|
| 1950 | + |
---|
| 1951 | + dest.bdev = s->cow->bdev; |
---|
| 1952 | + dest.sector = bio->bi_iter.bi_sector; |
---|
| 1953 | + dest.count = s->store->chunk_size; |
---|
| 1954 | + |
---|
| 1955 | + account_start_copy(s); |
---|
| 1956 | + WARN_ON_ONCE(bio->bi_private); |
---|
| 1957 | + bio->bi_private = s; |
---|
| 1958 | + dm_kcopyd_zero(s->kcopyd_client, 1, &dest, 0, zero_callback, bio); |
---|
| 1959 | +} |
---|
| 1960 | + |
---|
| 1961 | +static bool io_overlaps_chunk(struct dm_snapshot *s, struct bio *bio) |
---|
| 1962 | +{ |
---|
| 1963 | + return bio->bi_iter.bi_size == |
---|
| 1964 | + (s->store->chunk_size << SECTOR_SHIFT); |
---|
| 1965 | +} |
---|
| 1966 | + |
---|
1781 | 1967 | static int snapshot_map(struct dm_target *ti, struct bio *bio) |
---|
1782 | 1968 | { |
---|
1783 | 1969 | struct dm_exception *e; |
---|
.. | .. |
---|
1785 | 1971 | int r = DM_MAPIO_REMAPPED; |
---|
1786 | 1972 | chunk_t chunk; |
---|
1787 | 1973 | struct dm_snap_pending_exception *pe = NULL; |
---|
| 1974 | + struct dm_exception_table_lock lock; |
---|
1788 | 1975 | |
---|
1789 | 1976 | init_tracked_chunk(bio); |
---|
1790 | 1977 | |
---|
.. | .. |
---|
1794 | 1981 | } |
---|
1795 | 1982 | |
---|
1796 | 1983 | chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector); |
---|
| 1984 | + dm_exception_table_lock_init(s, chunk, &lock); |
---|
1797 | 1985 | |
---|
1798 | 1986 | /* Full snapshots are not usable */ |
---|
1799 | 1987 | /* To get here the table must be live so s->active is always set. */ |
---|
.. | .. |
---|
1805 | 1993 | ; /* wait_for_in_progress() has slept */ |
---|
1806 | 1994 | } |
---|
1807 | 1995 | |
---|
1808 | | - mutex_lock(&s->lock); |
---|
| 1996 | + down_read(&s->lock); |
---|
| 1997 | + dm_exception_table_lock(&lock); |
---|
1809 | 1998 | |
---|
1810 | 1999 | if (!s->valid || (unlikely(s->snapshot_overflowed) && |
---|
1811 | 2000 | bio_data_dir(bio) == WRITE)) { |
---|
.. | .. |
---|
1813 | 2002 | goto out_unlock; |
---|
1814 | 2003 | } |
---|
1815 | 2004 | |
---|
| 2005 | + if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) { |
---|
| 2006 | + if (s->discard_passdown_origin && dm_bio_get_target_bio_nr(bio)) { |
---|
| 2007 | + /* |
---|
| 2008 | + * passdown discard to origin (without triggering |
---|
| 2009 | + * snapshot exceptions via do_origin; doing so would |
---|
| 2010 | + * defeat the goal of freeing space in origin that is |
---|
| 2011 | + * implied by the "discard_passdown_origin" feature) |
---|
| 2012 | + */ |
---|
| 2013 | + bio_set_dev(bio, s->origin->bdev); |
---|
| 2014 | + track_chunk(s, bio, chunk); |
---|
| 2015 | + goto out_unlock; |
---|
| 2016 | + } |
---|
| 2017 | + /* discard to snapshot (target_bio_nr == 0) zeroes exceptions */ |
---|
| 2018 | + } |
---|
| 2019 | + |
---|
1816 | 2020 | /* If the block is already remapped - use that, else remap it */ |
---|
1817 | 2021 | e = dm_lookup_exception(&s->complete, chunk); |
---|
1818 | 2022 | if (e) { |
---|
1819 | 2023 | remap_exception(s, e, bio, chunk); |
---|
| 2024 | + if (unlikely(bio_op(bio) == REQ_OP_DISCARD) && |
---|
| 2025 | + io_overlaps_chunk(s, bio)) { |
---|
| 2026 | + dm_exception_table_unlock(&lock); |
---|
| 2027 | + up_read(&s->lock); |
---|
| 2028 | + zero_exception(s, e, bio, chunk); |
---|
| 2029 | + r = DM_MAPIO_SUBMITTED; /* discard is not issued */ |
---|
| 2030 | + goto out; |
---|
| 2031 | + } |
---|
| 2032 | + goto out_unlock; |
---|
| 2033 | + } |
---|
| 2034 | + |
---|
| 2035 | + if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) { |
---|
| 2036 | + /* |
---|
| 2037 | + * If no exception exists, complete discard immediately |
---|
| 2038 | + * otherwise it'll trigger copy-out. |
---|
| 2039 | + */ |
---|
| 2040 | + bio_endio(bio); |
---|
| 2041 | + r = DM_MAPIO_SUBMITTED; |
---|
1820 | 2042 | goto out_unlock; |
---|
1821 | 2043 | } |
---|
1822 | 2044 | |
---|
.. | .. |
---|
1828 | 2050 | if (bio_data_dir(bio) == WRITE) { |
---|
1829 | 2051 | pe = __lookup_pending_exception(s, chunk); |
---|
1830 | 2052 | if (!pe) { |
---|
1831 | | - mutex_unlock(&s->lock); |
---|
| 2053 | + dm_exception_table_unlock(&lock); |
---|
1832 | 2054 | pe = alloc_pending_exception(s); |
---|
1833 | | - mutex_lock(&s->lock); |
---|
1834 | | - |
---|
1835 | | - if (!s->valid || s->snapshot_overflowed) { |
---|
1836 | | - free_pending_exception(pe); |
---|
1837 | | - r = DM_MAPIO_KILL; |
---|
1838 | | - goto out_unlock; |
---|
1839 | | - } |
---|
| 2055 | + dm_exception_table_lock(&lock); |
---|
1840 | 2056 | |
---|
1841 | 2057 | e = dm_lookup_exception(&s->complete, chunk); |
---|
1842 | 2058 | if (e) { |
---|
.. | .. |
---|
1847 | 2063 | |
---|
1848 | 2064 | pe = __find_pending_exception(s, pe, chunk); |
---|
1849 | 2065 | if (!pe) { |
---|
| 2066 | + dm_exception_table_unlock(&lock); |
---|
| 2067 | + up_read(&s->lock); |
---|
| 2068 | + |
---|
| 2069 | + down_write(&s->lock); |
---|
| 2070 | + |
---|
1850 | 2071 | if (s->store->userspace_supports_overflow) { |
---|
1851 | | - s->snapshot_overflowed = 1; |
---|
1852 | | - DMERR("Snapshot overflowed: Unable to allocate exception."); |
---|
| 2072 | + if (s->valid && !s->snapshot_overflowed) { |
---|
| 2073 | + s->snapshot_overflowed = 1; |
---|
| 2074 | + DMERR("Snapshot overflowed: Unable to allocate exception."); |
---|
| 2075 | + } |
---|
1853 | 2076 | } else |
---|
1854 | 2077 | __invalidate_snapshot(s, -ENOMEM); |
---|
| 2078 | + up_write(&s->lock); |
---|
| 2079 | + |
---|
1855 | 2080 | r = DM_MAPIO_KILL; |
---|
1856 | | - goto out_unlock; |
---|
| 2081 | + goto out; |
---|
1857 | 2082 | } |
---|
1858 | 2083 | } |
---|
1859 | 2084 | |
---|
.. | .. |
---|
1861 | 2086 | |
---|
1862 | 2087 | r = DM_MAPIO_SUBMITTED; |
---|
1863 | 2088 | |
---|
1864 | | - if (!pe->started && |
---|
1865 | | - bio->bi_iter.bi_size == |
---|
1866 | | - (s->store->chunk_size << SECTOR_SHIFT)) { |
---|
| 2089 | + if (!pe->started && io_overlaps_chunk(s, bio)) { |
---|
1867 | 2090 | pe->started = 1; |
---|
1868 | | - mutex_unlock(&s->lock); |
---|
| 2091 | + |
---|
| 2092 | + dm_exception_table_unlock(&lock); |
---|
| 2093 | + up_read(&s->lock); |
---|
| 2094 | + |
---|
1869 | 2095 | start_full_bio(pe, bio); |
---|
1870 | 2096 | goto out; |
---|
1871 | 2097 | } |
---|
.. | .. |
---|
1873 | 2099 | bio_list_add(&pe->snapshot_bios, bio); |
---|
1874 | 2100 | |
---|
1875 | 2101 | if (!pe->started) { |
---|
1876 | | - /* this is protected by snap->lock */ |
---|
| 2102 | + /* this is protected by the exception table lock */ |
---|
1877 | 2103 | pe->started = 1; |
---|
1878 | | - mutex_unlock(&s->lock); |
---|
| 2104 | + |
---|
| 2105 | + dm_exception_table_unlock(&lock); |
---|
| 2106 | + up_read(&s->lock); |
---|
| 2107 | + |
---|
1879 | 2108 | start_copy(pe); |
---|
1880 | 2109 | goto out; |
---|
1881 | 2110 | } |
---|
.. | .. |
---|
1885 | 2114 | } |
---|
1886 | 2115 | |
---|
1887 | 2116 | out_unlock: |
---|
1888 | | - mutex_unlock(&s->lock); |
---|
| 2117 | + dm_exception_table_unlock(&lock); |
---|
| 2118 | + up_read(&s->lock); |
---|
1889 | 2119 | out: |
---|
1890 | 2120 | return r; |
---|
1891 | 2121 | } |
---|
.. | .. |
---|
1919 | 2149 | return DM_MAPIO_REMAPPED; |
---|
1920 | 2150 | } |
---|
1921 | 2151 | |
---|
| 2152 | + if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) { |
---|
| 2153 | + /* Once merging, discards no longer effect change */ |
---|
| 2154 | + bio_endio(bio); |
---|
| 2155 | + return DM_MAPIO_SUBMITTED; |
---|
| 2156 | + } |
---|
| 2157 | + |
---|
1922 | 2158 | chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector); |
---|
1923 | 2159 | |
---|
1924 | | - mutex_lock(&s->lock); |
---|
| 2160 | + down_write(&s->lock); |
---|
1925 | 2161 | |
---|
1926 | 2162 | /* Full merging snapshots are redirected to the origin */ |
---|
1927 | 2163 | if (!s->valid) |
---|
.. | .. |
---|
1952 | 2188 | bio_set_dev(bio, s->origin->bdev); |
---|
1953 | 2189 | |
---|
1954 | 2190 | if (bio_data_dir(bio) == WRITE) { |
---|
1955 | | - mutex_unlock(&s->lock); |
---|
| 2191 | + up_write(&s->lock); |
---|
1956 | 2192 | return do_origin(s->origin, bio, false); |
---|
1957 | 2193 | } |
---|
1958 | 2194 | |
---|
1959 | 2195 | out_unlock: |
---|
1960 | | - mutex_unlock(&s->lock); |
---|
| 2196 | + up_write(&s->lock); |
---|
1961 | 2197 | |
---|
1962 | 2198 | return r; |
---|
1963 | 2199 | } |
---|
.. | .. |
---|
1989 | 2225 | down_read(&_origins_lock); |
---|
1990 | 2226 | (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); |
---|
1991 | 2227 | if (snap_src && snap_dest) { |
---|
1992 | | - mutex_lock(&snap_src->lock); |
---|
| 2228 | + down_read(&snap_src->lock); |
---|
1993 | 2229 | if (s == snap_src) { |
---|
1994 | 2230 | DMERR("Unable to resume snapshot source until " |
---|
1995 | 2231 | "handover completes."); |
---|
.. | .. |
---|
1999 | 2235 | "source is suspended."); |
---|
2000 | 2236 | r = -EINVAL; |
---|
2001 | 2237 | } |
---|
2002 | | - mutex_unlock(&snap_src->lock); |
---|
| 2238 | + up_read(&snap_src->lock); |
---|
2003 | 2239 | } |
---|
2004 | 2240 | up_read(&_origins_lock); |
---|
2005 | 2241 | |
---|
.. | .. |
---|
2045 | 2281 | |
---|
2046 | 2282 | (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); |
---|
2047 | 2283 | if (snap_src && snap_dest) { |
---|
2048 | | - mutex_lock(&snap_src->lock); |
---|
2049 | | - mutex_lock_nested(&snap_dest->lock, SINGLE_DEPTH_NESTING); |
---|
| 2284 | + down_write(&snap_src->lock); |
---|
| 2285 | + down_write_nested(&snap_dest->lock, SINGLE_DEPTH_NESTING); |
---|
2050 | 2286 | __handover_exceptions(snap_src, snap_dest); |
---|
2051 | | - mutex_unlock(&snap_dest->lock); |
---|
2052 | | - mutex_unlock(&snap_src->lock); |
---|
| 2287 | + up_write(&snap_dest->lock); |
---|
| 2288 | + up_write(&snap_src->lock); |
---|
2053 | 2289 | } |
---|
2054 | 2290 | |
---|
2055 | 2291 | up_read(&_origins_lock); |
---|
.. | .. |
---|
2064 | 2300 | /* Now we have correct chunk size, reregister */ |
---|
2065 | 2301 | reregister_snapshot(s); |
---|
2066 | 2302 | |
---|
2067 | | - mutex_lock(&s->lock); |
---|
| 2303 | + down_write(&s->lock); |
---|
2068 | 2304 | s->active = 1; |
---|
2069 | | - mutex_unlock(&s->lock); |
---|
| 2305 | + up_write(&s->lock); |
---|
2070 | 2306 | } |
---|
2071 | 2307 | |
---|
2072 | 2308 | static uint32_t get_origin_minimum_chunksize(struct block_device *bdev) |
---|
.. | .. |
---|
2102 | 2338 | { |
---|
2103 | 2339 | unsigned sz = 0; |
---|
2104 | 2340 | struct dm_snapshot *snap = ti->private; |
---|
| 2341 | + unsigned num_features; |
---|
2105 | 2342 | |
---|
2106 | 2343 | switch (type) { |
---|
2107 | 2344 | case STATUSTYPE_INFO: |
---|
2108 | 2345 | |
---|
2109 | | - mutex_lock(&snap->lock); |
---|
| 2346 | + down_write(&snap->lock); |
---|
2110 | 2347 | |
---|
2111 | 2348 | if (!snap->valid) |
---|
2112 | 2349 | DMEMIT("Invalid"); |
---|
.. | .. |
---|
2131 | 2368 | DMEMIT("Unknown"); |
---|
2132 | 2369 | } |
---|
2133 | 2370 | |
---|
2134 | | - mutex_unlock(&snap->lock); |
---|
| 2371 | + up_write(&snap->lock); |
---|
2135 | 2372 | |
---|
2136 | 2373 | break; |
---|
2137 | 2374 | |
---|
.. | .. |
---|
2142 | 2379 | * make sense. |
---|
2143 | 2380 | */ |
---|
2144 | 2381 | DMEMIT("%s %s", snap->origin->name, snap->cow->name); |
---|
2145 | | - snap->store->type->status(snap->store, type, result + sz, |
---|
2146 | | - maxlen - sz); |
---|
| 2382 | + sz += snap->store->type->status(snap->store, type, result + sz, |
---|
| 2383 | + maxlen - sz); |
---|
| 2384 | + num_features = snap->discard_zeroes_cow + snap->discard_passdown_origin; |
---|
| 2385 | + if (num_features) { |
---|
| 2386 | + DMEMIT(" %u", num_features); |
---|
| 2387 | + if (snap->discard_zeroes_cow) |
---|
| 2388 | + DMEMIT(" discard_zeroes_cow"); |
---|
| 2389 | + if (snap->discard_passdown_origin) |
---|
| 2390 | + DMEMIT(" discard_passdown_origin"); |
---|
| 2391 | + } |
---|
2147 | 2392 | break; |
---|
2148 | 2393 | } |
---|
2149 | 2394 | } |
---|
.. | .. |
---|
2162 | 2407 | return r; |
---|
2163 | 2408 | } |
---|
2164 | 2409 | |
---|
| 2410 | +static void snapshot_io_hints(struct dm_target *ti, struct queue_limits *limits) |
---|
| 2411 | +{ |
---|
| 2412 | + struct dm_snapshot *snap = ti->private; |
---|
| 2413 | + |
---|
| 2414 | + if (snap->discard_zeroes_cow) { |
---|
| 2415 | + struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; |
---|
| 2416 | + |
---|
| 2417 | + down_read(&_origins_lock); |
---|
| 2418 | + |
---|
| 2419 | + (void) __find_snapshots_sharing_cow(snap, &snap_src, &snap_dest, NULL); |
---|
| 2420 | + if (snap_src && snap_dest) |
---|
| 2421 | + snap = snap_src; |
---|
| 2422 | + |
---|
| 2423 | + /* All discards are split on chunk_size boundary */ |
---|
| 2424 | + limits->discard_granularity = snap->store->chunk_size; |
---|
| 2425 | + limits->max_discard_sectors = snap->store->chunk_size; |
---|
| 2426 | + |
---|
| 2427 | + up_read(&_origins_lock); |
---|
| 2428 | + } |
---|
| 2429 | +} |
---|
2165 | 2430 | |
---|
2166 | 2431 | /*----------------------------------------------------------------- |
---|
2167 | 2432 | * Origin methods |
---|
.. | .. |
---|
2183 | 2448 | int r = DM_MAPIO_REMAPPED; |
---|
2184 | 2449 | struct dm_snapshot *snap; |
---|
2185 | 2450 | struct dm_exception *e; |
---|
2186 | | - struct dm_snap_pending_exception *pe; |
---|
| 2451 | + struct dm_snap_pending_exception *pe, *pe2; |
---|
2187 | 2452 | struct dm_snap_pending_exception *pe_to_start_now = NULL; |
---|
2188 | 2453 | struct dm_snap_pending_exception *pe_to_start_last = NULL; |
---|
| 2454 | + struct dm_exception_table_lock lock; |
---|
2189 | 2455 | chunk_t chunk; |
---|
2190 | 2456 | |
---|
2191 | 2457 | /* Do all the snapshots on this origin */ |
---|
.. | .. |
---|
2197 | 2463 | if (dm_target_is_snapshot_merge(snap->ti)) |
---|
2198 | 2464 | continue; |
---|
2199 | 2465 | |
---|
2200 | | - mutex_lock(&snap->lock); |
---|
2201 | | - |
---|
2202 | | - /* Only deal with valid and active snapshots */ |
---|
2203 | | - if (!snap->valid || !snap->active) |
---|
2204 | | - goto next_snapshot; |
---|
2205 | | - |
---|
2206 | 2466 | /* Nothing to do if writing beyond end of snapshot */ |
---|
2207 | 2467 | if (sector >= dm_table_get_size(snap->ti->table)) |
---|
2208 | | - goto next_snapshot; |
---|
| 2468 | + continue; |
---|
2209 | 2469 | |
---|
2210 | 2470 | /* |
---|
2211 | 2471 | * Remember, different snapshots can have |
---|
2212 | 2472 | * different chunk sizes. |
---|
2213 | 2473 | */ |
---|
2214 | 2474 | chunk = sector_to_chunk(snap->store, sector); |
---|
| 2475 | + dm_exception_table_lock_init(snap, chunk, &lock); |
---|
2215 | 2476 | |
---|
2216 | | - /* |
---|
2217 | | - * Check exception table to see if block |
---|
2218 | | - * is already remapped in this snapshot |
---|
2219 | | - * and trigger an exception if not. |
---|
2220 | | - */ |
---|
2221 | | - e = dm_lookup_exception(&snap->complete, chunk); |
---|
2222 | | - if (e) |
---|
| 2477 | + down_read(&snap->lock); |
---|
| 2478 | + dm_exception_table_lock(&lock); |
---|
| 2479 | + |
---|
| 2480 | + /* Only deal with valid and active snapshots */ |
---|
| 2481 | + if (!snap->valid || !snap->active) |
---|
2223 | 2482 | goto next_snapshot; |
---|
2224 | 2483 | |
---|
2225 | 2484 | pe = __lookup_pending_exception(snap, chunk); |
---|
2226 | 2485 | if (!pe) { |
---|
2227 | | - mutex_unlock(&snap->lock); |
---|
2228 | | - pe = alloc_pending_exception(snap); |
---|
2229 | | - mutex_lock(&snap->lock); |
---|
2230 | | - |
---|
2231 | | - if (!snap->valid) { |
---|
2232 | | - free_pending_exception(pe); |
---|
2233 | | - goto next_snapshot; |
---|
2234 | | - } |
---|
2235 | | - |
---|
| 2486 | + /* |
---|
| 2487 | + * Check exception table to see if block is already |
---|
| 2488 | + * remapped in this snapshot and trigger an exception |
---|
| 2489 | + * if not. |
---|
| 2490 | + */ |
---|
2236 | 2491 | e = dm_lookup_exception(&snap->complete, chunk); |
---|
2237 | | - if (e) { |
---|
2238 | | - free_pending_exception(pe); |
---|
| 2492 | + if (e) |
---|
2239 | 2493 | goto next_snapshot; |
---|
2240 | | - } |
---|
2241 | 2494 | |
---|
2242 | | - pe = __find_pending_exception(snap, pe, chunk); |
---|
2243 | | - if (!pe) { |
---|
2244 | | - __invalidate_snapshot(snap, -ENOMEM); |
---|
2245 | | - goto next_snapshot; |
---|
| 2495 | + dm_exception_table_unlock(&lock); |
---|
| 2496 | + pe = alloc_pending_exception(snap); |
---|
| 2497 | + dm_exception_table_lock(&lock); |
---|
| 2498 | + |
---|
| 2499 | + pe2 = __lookup_pending_exception(snap, chunk); |
---|
| 2500 | + |
---|
| 2501 | + if (!pe2) { |
---|
| 2502 | + e = dm_lookup_exception(&snap->complete, chunk); |
---|
| 2503 | + if (e) { |
---|
| 2504 | + free_pending_exception(pe); |
---|
| 2505 | + goto next_snapshot; |
---|
| 2506 | + } |
---|
| 2507 | + |
---|
| 2508 | + pe = __insert_pending_exception(snap, pe, chunk); |
---|
| 2509 | + if (!pe) { |
---|
| 2510 | + dm_exception_table_unlock(&lock); |
---|
| 2511 | + up_read(&snap->lock); |
---|
| 2512 | + |
---|
| 2513 | + invalidate_snapshot(snap, -ENOMEM); |
---|
| 2514 | + continue; |
---|
| 2515 | + } |
---|
| 2516 | + } else { |
---|
| 2517 | + free_pending_exception(pe); |
---|
| 2518 | + pe = pe2; |
---|
2246 | 2519 | } |
---|
2247 | 2520 | } |
---|
2248 | 2521 | |
---|
.. | .. |
---|
2269 | 2542 | } |
---|
2270 | 2543 | |
---|
2271 | 2544 | next_snapshot: |
---|
2272 | | - mutex_unlock(&snap->lock); |
---|
| 2545 | + dm_exception_table_unlock(&lock); |
---|
| 2546 | + up_read(&snap->lock); |
---|
2273 | 2547 | |
---|
2274 | 2548 | if (pe_to_start_now) { |
---|
2275 | 2549 | start_copy(pe_to_start_now); |
---|
.. | .. |
---|
2423 | 2697 | return do_origin(o->dev, bio, true); |
---|
2424 | 2698 | } |
---|
2425 | 2699 | |
---|
2426 | | -static long origin_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, |
---|
2427 | | - long nr_pages, void **kaddr, pfn_t *pfn) |
---|
2428 | | -{ |
---|
2429 | | - DMWARN("device does not support dax."); |
---|
2430 | | - return -EIO; |
---|
2431 | | -} |
---|
2432 | | - |
---|
2433 | 2700 | /* |
---|
2434 | 2701 | * Set the target "max_io_len" field to the minimum of all the snapshots' |
---|
2435 | 2702 | * chunk sizes. |
---|
.. | .. |
---|
2489 | 2756 | .postsuspend = origin_postsuspend, |
---|
2490 | 2757 | .status = origin_status, |
---|
2491 | 2758 | .iterate_devices = origin_iterate_devices, |
---|
2492 | | - .direct_access = origin_dax_direct_access, |
---|
2493 | 2759 | }; |
---|
2494 | 2760 | |
---|
2495 | 2761 | static struct target_type snapshot_target = { |
---|
2496 | 2762 | .name = "snapshot", |
---|
2497 | | - .version = {1, 15, 0}, |
---|
| 2763 | + .version = {1, 16, 0}, |
---|
2498 | 2764 | .module = THIS_MODULE, |
---|
2499 | 2765 | .ctr = snapshot_ctr, |
---|
2500 | 2766 | .dtr = snapshot_dtr, |
---|
.. | .. |
---|
2504 | 2770 | .resume = snapshot_resume, |
---|
2505 | 2771 | .status = snapshot_status, |
---|
2506 | 2772 | .iterate_devices = snapshot_iterate_devices, |
---|
| 2773 | + .io_hints = snapshot_io_hints, |
---|
2507 | 2774 | }; |
---|
2508 | 2775 | |
---|
2509 | 2776 | static struct target_type merge_target = { |
---|
2510 | 2777 | .name = dm_snapshot_merge_target_name, |
---|
2511 | | - .version = {1, 4, 0}, |
---|
| 2778 | + .version = {1, 5, 0}, |
---|
2512 | 2779 | .module = THIS_MODULE, |
---|
2513 | 2780 | .ctr = snapshot_ctr, |
---|
2514 | 2781 | .dtr = snapshot_dtr, |
---|
.. | .. |
---|
2519 | 2786 | .resume = snapshot_merge_resume, |
---|
2520 | 2787 | .status = snapshot_status, |
---|
2521 | 2788 | .iterate_devices = snapshot_iterate_devices, |
---|
| 2789 | + .io_hints = snapshot_io_hints, |
---|
2522 | 2790 | }; |
---|
2523 | 2791 | |
---|
2524 | 2792 | static int __init dm_snapshot_init(void) |
---|