hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/drivers/md/dm-thin-metadata.c
....@@ -28,7 +28,7 @@
2828 *
2929 * - A hierarchical btree, with 2 levels which effectively maps (thin
3030 * dev id, virtual block) -> block_time. Block time is a 64-bit
31
- * field holding the time in the low 24 bits, and block in the top 48
31
+ * field holding the time in the low 24 bits, and block in the top 40
3232 * bits.
3333 *
3434 * BTrees consist solely of btree_nodes, that fill a block. Some are
....@@ -189,6 +189,15 @@
189189 sector_t data_block_size;
190190
191191 /*
192
+ * Pre-commit callback.
193
+ *
194
+ * This allows the thin provisioning target to run a callback before
195
+ * the metadata are committed.
196
+ */
197
+ dm_pool_pre_commit_fn pre_commit_fn;
198
+ void *pre_commit_context;
199
+
200
+ /*
192201 * We reserve a section of the metadata for commit overhead.
193202 * All reported space does *not* include this.
194203 */
....@@ -200,6 +209,13 @@
200209 * operation possible in this state is the closing of the device.
201210 */
202211 bool fail_io:1;
212
+
213
+ /*
214
+ * Set once a thin-pool has been accessed through one of the interfaces
215
+ * that imply the pool is in-service (e.g. thin devices created/deleted,
216
+ * thin-pool message, metadata snapshots, etc).
217
+ */
218
+ bool in_service:1;
203219
204220 /*
205221 * Reading the space map roots can fail, so we read it into these
....@@ -363,6 +379,31 @@
363379 memcpy(&v2_le, value2_le, sizeof(v2_le));
364380
365381 return v1_le == v2_le;
382
+}
383
+
384
+/*----------------------------------------------------------------*/
385
+
386
+/*
387
+ * Variant that is used for in-core only changes or code that
388
+ * shouldn't put the pool in service on its own (e.g. commit).
389
+ */
390
+static inline void pmd_write_lock_in_core(struct dm_pool_metadata *pmd)
391
+ __acquires(pmd->root_lock)
392
+{
393
+ down_write(&pmd->root_lock);
394
+}
395
+
396
+static inline void pmd_write_lock(struct dm_pool_metadata *pmd)
397
+{
398
+ pmd_write_lock_in_core(pmd);
399
+ if (unlikely(!pmd->in_service))
400
+ pmd->in_service = true;
401
+}
402
+
403
+static inline void pmd_write_unlock(struct dm_pool_metadata *pmd)
404
+ __releases(pmd->root_lock)
405
+{
406
+ up_write(&pmd->root_lock);
366407 }
367408
368409 /*----------------------------------------------------------------*/
....@@ -773,7 +814,7 @@
773814 return r;
774815
775816 if (td->open_count)
776
- td->changed = 0;
817
+ td->changed = false;
777818 else {
778819 list_del(&td->list);
779820 kfree(td);
....@@ -793,6 +834,18 @@
793834 * We need to know if the thin_disk_superblock exceeds a 512-byte sector.
794835 */
795836 BUILD_BUG_ON(sizeof(struct thin_disk_superblock) > 512);
837
+ BUG_ON(!rwsem_is_locked(&pmd->root_lock));
838
+
839
+ if (unlikely(!pmd->in_service))
840
+ return 0;
841
+
842
+ if (pmd->pre_commit_fn) {
843
+ r = pmd->pre_commit_fn(pmd->pre_commit_context);
844
+ if (r < 0) {
845
+ DMERR("pre-commit callback failed");
846
+ return r;
847
+ }
848
+ }
796849
797850 r = __write_changed_details(pmd);
798851 if (r < 0)
....@@ -857,8 +910,11 @@
857910 pmd->time = 0;
858911 INIT_LIST_HEAD(&pmd->thin_devices);
859912 pmd->fail_io = false;
913
+ pmd->in_service = false;
860914 pmd->bdev = bdev;
861915 pmd->data_block_size = data_block_size;
916
+ pmd->pre_commit_fn = NULL;
917
+ pmd->pre_commit_context = NULL;
862918
863919 r = __create_persistent_data_objects(pmd, format_device);
864920 if (r) {
....@@ -901,13 +957,14 @@
901957 return -EBUSY;
902958 }
903959
960
+ pmd_write_lock_in_core(pmd);
904961 if (!pmd->fail_io && !dm_bm_is_read_only(pmd->bm)) {
905962 r = __commit_transaction(pmd);
906963 if (r < 0)
907964 DMWARN("%s: __commit_transaction() failed, error = %d",
908965 __func__, r);
909966 }
910
-
967
+ pmd_write_unlock(pmd);
911968 if (!pmd->fail_io)
912969 __destroy_persistent_data_objects(pmd);
913970
....@@ -994,12 +1051,11 @@
9941051 int r;
9951052 dm_block_t dev_root;
9961053 uint64_t key = dev;
997
- struct disk_device_details details_le;
9981054 struct dm_thin_device *td;
9991055 __le64 value;
10001056
10011057 r = dm_btree_lookup(&pmd->details_info, pmd->details_root,
1002
- &key, &details_le);
1058
+ &key, NULL);
10031059 if (!r)
10041060 return -EEXIST;
10051061
....@@ -1036,10 +1092,10 @@
10361092 {
10371093 int r = -EINVAL;
10381094
1039
- down_write(&pmd->root_lock);
1095
+ pmd_write_lock(pmd);
10401096 if (!pmd->fail_io)
10411097 r = __create_thin(pmd, dev);
1042
- up_write(&pmd->root_lock);
1098
+ pmd_write_unlock(pmd);
10431099
10441100 return r;
10451101 }
....@@ -1055,7 +1111,7 @@
10551111 if (r)
10561112 return r;
10571113
1058
- td->changed = 1;
1114
+ td->changed = true;
10591115 td->snapshotted_time = time;
10601116
10611117 snap->mapped_blocks = td->mapped_blocks;
....@@ -1072,12 +1128,11 @@
10721128 dm_block_t origin_root;
10731129 uint64_t key = origin, dev_key = dev;
10741130 struct dm_thin_device *td;
1075
- struct disk_device_details details_le;
10761131 __le64 value;
10771132
10781133 /* check this device is unused */
10791134 r = dm_btree_lookup(&pmd->details_info, pmd->details_root,
1080
- &dev_key, &details_le);
1135
+ &dev_key, NULL);
10811136 if (!r)
10821137 return -EEXIST;
10831138
....@@ -1127,10 +1182,10 @@
11271182 {
11281183 int r = -EINVAL;
11291184
1130
- down_write(&pmd->root_lock);
1185
+ pmd_write_lock(pmd);
11311186 if (!pmd->fail_io)
11321187 r = __create_snap(pmd, dev, origin);
1133
- up_write(&pmd->root_lock);
1188
+ pmd_write_unlock(pmd);
11341189
11351190 return r;
11361191 }
....@@ -1170,10 +1225,10 @@
11701225 {
11711226 int r = -EINVAL;
11721227
1173
- down_write(&pmd->root_lock);
1228
+ pmd_write_lock(pmd);
11741229 if (!pmd->fail_io)
11751230 r = __delete_device(pmd, dev);
1176
- up_write(&pmd->root_lock);
1231
+ pmd_write_unlock(pmd);
11771232
11781233 return r;
11791234 }
....@@ -1184,7 +1239,7 @@
11841239 {
11851240 int r = -EINVAL;
11861241
1187
- down_write(&pmd->root_lock);
1242
+ pmd_write_lock(pmd);
11881243
11891244 if (pmd->fail_io)
11901245 goto out;
....@@ -1198,7 +1253,7 @@
11981253 r = 0;
11991254
12001255 out:
1201
- up_write(&pmd->root_lock);
1256
+ pmd_write_unlock(pmd);
12021257
12031258 return r;
12041259 }
....@@ -1229,7 +1284,12 @@
12291284 * We commit to ensure the btree roots which we increment in a
12301285 * moment are up to date.
12311286 */
1232
- __commit_transaction(pmd);
1287
+ r = __commit_transaction(pmd);
1288
+ if (r < 0) {
1289
+ DMWARN("%s: __commit_transaction() failed, error = %d",
1290
+ __func__, r);
1291
+ return r;
1292
+ }
12331293
12341294 /*
12351295 * Copy the superblock.
....@@ -1287,10 +1347,10 @@
12871347 {
12881348 int r = -EINVAL;
12891349
1290
- down_write(&pmd->root_lock);
1350
+ pmd_write_lock(pmd);
12911351 if (!pmd->fail_io)
12921352 r = __reserve_metadata_snap(pmd);
1293
- up_write(&pmd->root_lock);
1353
+ pmd_write_unlock(pmd);
12941354
12951355 return r;
12961356 }
....@@ -1335,10 +1395,10 @@
13351395 {
13361396 int r = -EINVAL;
13371397
1338
- down_write(&pmd->root_lock);
1398
+ pmd_write_lock(pmd);
13391399 if (!pmd->fail_io)
13401400 r = __release_metadata_snap(pmd);
1341
- up_write(&pmd->root_lock);
1401
+ pmd_write_unlock(pmd);
13421402
13431403 return r;
13441404 }
....@@ -1381,19 +1441,19 @@
13811441 {
13821442 int r = -EINVAL;
13831443
1384
- down_write(&pmd->root_lock);
1444
+ pmd_write_lock_in_core(pmd);
13851445 if (!pmd->fail_io)
13861446 r = __open_device(pmd, dev, 0, td);
1387
- up_write(&pmd->root_lock);
1447
+ pmd_write_unlock(pmd);
13881448
13891449 return r;
13901450 }
13911451
13921452 int dm_pool_close_thin_device(struct dm_thin_device *td)
13931453 {
1394
- down_write(&td->pmd->root_lock);
1454
+ pmd_write_lock_in_core(td->pmd);
13951455 __close_device(td);
1396
- up_write(&td->pmd->root_lock);
1456
+ pmd_write_unlock(td->pmd);
13971457
13981458 return 0;
13991459 }
....@@ -1562,7 +1622,7 @@
15621622 if (r)
15631623 return r;
15641624
1565
- td->changed = 1;
1625
+ td->changed = true;
15661626 if (inserted)
15671627 td->mapped_blocks++;
15681628
....@@ -1574,10 +1634,10 @@
15741634 {
15751635 int r = -EINVAL;
15761636
1577
- down_write(&td->pmd->root_lock);
1637
+ pmd_write_lock(td->pmd);
15781638 if (!td->pmd->fail_io)
15791639 r = __insert(td, block, data_block);
1580
- up_write(&td->pmd->root_lock);
1640
+ pmd_write_unlock(td->pmd);
15811641
15821642 return r;
15831643 }
....@@ -1593,7 +1653,7 @@
15931653 return r;
15941654
15951655 td->mapped_blocks--;
1596
- td->changed = 1;
1656
+ td->changed = true;
15971657
15981658 return 0;
15991659 }
....@@ -1647,7 +1707,7 @@
16471707 }
16481708
16491709 td->mapped_blocks -= total_count;
1650
- td->changed = 1;
1710
+ td->changed = true;
16511711
16521712 /*
16531713 * Reinsert the mapping tree.
....@@ -1661,10 +1721,10 @@
16611721 {
16621722 int r = -EINVAL;
16631723
1664
- down_write(&td->pmd->root_lock);
1724
+ pmd_write_lock(td->pmd);
16651725 if (!td->pmd->fail_io)
16661726 r = __remove(td, block);
1667
- up_write(&td->pmd->root_lock);
1727
+ pmd_write_unlock(td->pmd);
16681728
16691729 return r;
16701730 }
....@@ -1674,10 +1734,10 @@
16741734 {
16751735 int r = -EINVAL;
16761736
1677
- down_write(&td->pmd->root_lock);
1737
+ pmd_write_lock(td->pmd);
16781738 if (!td->pmd->fail_io)
16791739 r = __remove_range(td, begin, end);
1680
- up_write(&td->pmd->root_lock);
1740
+ pmd_write_unlock(td->pmd);
16811741
16821742 return r;
16831743 }
....@@ -1700,13 +1760,13 @@
17001760 {
17011761 int r = 0;
17021762
1703
- down_write(&pmd->root_lock);
1763
+ pmd_write_lock(pmd);
17041764 for (; b != e; b++) {
17051765 r = dm_sm_inc_block(pmd->data_sm, b);
17061766 if (r)
17071767 break;
17081768 }
1709
- up_write(&pmd->root_lock);
1769
+ pmd_write_unlock(pmd);
17101770
17111771 return r;
17121772 }
....@@ -1715,13 +1775,13 @@
17151775 {
17161776 int r = 0;
17171777
1718
- down_write(&pmd->root_lock);
1778
+ pmd_write_lock(pmd);
17191779 for (; b != e; b++) {
17201780 r = dm_sm_dec_block(pmd->data_sm, b);
17211781 if (r)
17221782 break;
17231783 }
1724
- up_write(&pmd->root_lock);
1784
+ pmd_write_unlock(pmd);
17251785
17261786 return r;
17271787 }
....@@ -1769,10 +1829,10 @@
17691829 {
17701830 int r = -EINVAL;
17711831
1772
- down_write(&pmd->root_lock);
1832
+ pmd_write_lock(pmd);
17731833 if (!pmd->fail_io)
17741834 r = dm_sm_new_block(pmd->data_sm, result);
1775
- up_write(&pmd->root_lock);
1835
+ pmd_write_unlock(pmd);
17761836
17771837 return r;
17781838 }
....@@ -1781,12 +1841,16 @@
17811841 {
17821842 int r = -EINVAL;
17831843
1784
- down_write(&pmd->root_lock);
1844
+ /*
1845
+ * Care is taken to not have commit be what
1846
+ * triggers putting the thin-pool in-service.
1847
+ */
1848
+ pmd_write_lock_in_core(pmd);
17851849 if (pmd->fail_io)
17861850 goto out;
17871851
17881852 r = __commit_transaction(pmd);
1789
- if (r <= 0)
1853
+ if (r < 0)
17901854 goto out;
17911855
17921856 /*
....@@ -1794,7 +1858,7 @@
17941858 */
17951859 r = __begin_transaction(pmd);
17961860 out:
1797
- up_write(&pmd->root_lock);
1861
+ pmd_write_unlock(pmd);
17981862 return r;
17991863 }
18001864
....@@ -1810,7 +1874,7 @@
18101874 {
18111875 int r = -EINVAL;
18121876
1813
- down_write(&pmd->root_lock);
1877
+ pmd_write_lock(pmd);
18141878 if (pmd->fail_io)
18151879 goto out;
18161880
....@@ -1821,7 +1885,7 @@
18211885 pmd->fail_io = true;
18221886
18231887 out:
1824
- up_write(&pmd->root_lock);
1888
+ pmd_write_unlock(pmd);
18251889
18261890 return r;
18271891 }
....@@ -1952,10 +2016,10 @@
19522016 {
19532017 int r = -EINVAL;
19542018
1955
- down_write(&pmd->root_lock);
2019
+ pmd_write_lock(pmd);
19562020 if (!pmd->fail_io)
19572021 r = __resize_space_map(pmd->data_sm, new_count);
1958
- up_write(&pmd->root_lock);
2022
+ pmd_write_unlock(pmd);
19592023
19602024 return r;
19612025 }
....@@ -1964,29 +2028,29 @@
19642028 {
19652029 int r = -EINVAL;
19662030
1967
- down_write(&pmd->root_lock);
2031
+ pmd_write_lock(pmd);
19682032 if (!pmd->fail_io) {
19692033 r = __resize_space_map(pmd->metadata_sm, new_count);
19702034 if (!r)
19712035 __set_metadata_reserve(pmd);
19722036 }
1973
- up_write(&pmd->root_lock);
2037
+ pmd_write_unlock(pmd);
19742038
19752039 return r;
19762040 }
19772041
19782042 void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd)
19792043 {
1980
- down_write(&pmd->root_lock);
2044
+ pmd_write_lock_in_core(pmd);
19812045 dm_bm_set_read_only(pmd->bm);
1982
- up_write(&pmd->root_lock);
2046
+ pmd_write_unlock(pmd);
19832047 }
19842048
19852049 void dm_pool_metadata_read_write(struct dm_pool_metadata *pmd)
19862050 {
1987
- down_write(&pmd->root_lock);
2051
+ pmd_write_lock_in_core(pmd);
19882052 dm_bm_set_read_write(pmd->bm);
1989
- up_write(&pmd->root_lock);
2053
+ pmd_write_unlock(pmd);
19902054 }
19912055
19922056 int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd,
....@@ -1994,13 +2058,26 @@
19942058 dm_sm_threshold_fn fn,
19952059 void *context)
19962060 {
1997
- int r;
2061
+ int r = -EINVAL;
19982062
1999
- down_write(&pmd->root_lock);
2000
- r = dm_sm_register_threshold_callback(pmd->metadata_sm, threshold, fn, context);
2001
- up_write(&pmd->root_lock);
2063
+ pmd_write_lock_in_core(pmd);
2064
+ if (!pmd->fail_io) {
2065
+ r = dm_sm_register_threshold_callback(pmd->metadata_sm,
2066
+ threshold, fn, context);
2067
+ }
2068
+ pmd_write_unlock(pmd);
20022069
20032070 return r;
2071
+}
2072
+
2073
+void dm_pool_register_pre_commit_callback(struct dm_pool_metadata *pmd,
2074
+ dm_pool_pre_commit_fn fn,
2075
+ void *context)
2076
+{
2077
+ pmd_write_lock_in_core(pmd);
2078
+ pmd->pre_commit_fn = fn;
2079
+ pmd->pre_commit_context = context;
2080
+ pmd_write_unlock(pmd);
20042081 }
20052082
20062083 int dm_pool_metadata_set_needs_check(struct dm_pool_metadata *pmd)
....@@ -2009,7 +2086,7 @@
20092086 struct dm_block *sblock;
20102087 struct thin_disk_superblock *disk_super;
20112088
2012
- down_write(&pmd->root_lock);
2089
+ pmd_write_lock(pmd);
20132090 if (pmd->fail_io)
20142091 goto out;
20152092
....@@ -2026,7 +2103,7 @@
20262103
20272104 dm_bm_unlock(sblock);
20282105 out:
2029
- up_write(&pmd->root_lock);
2106
+ pmd_write_unlock(pmd);
20302107 return r;
20312108 }
20322109