hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/drivers/md/dm-table.c
....@@ -21,55 +21,12 @@
2121 #include <linux/blk-mq.h>
2222 #include <linux/mount.h>
2323 #include <linux/dax.h>
24
-#include <linux/bio.h>
25
-#include <linux/keyslot-manager.h>
2624
2725 #define DM_MSG_PREFIX "table"
2826
29
-#define MAX_DEPTH 16
3027 #define NODE_SIZE L1_CACHE_BYTES
3128 #define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t))
3229 #define CHILDREN_PER_NODE (KEYS_PER_NODE + 1)
33
-
34
-struct dm_table {
35
- struct mapped_device *md;
36
- enum dm_queue_mode type;
37
-
38
- /* btree table */
39
- unsigned int depth;
40
- unsigned int counts[MAX_DEPTH]; /* in nodes */
41
- sector_t *index[MAX_DEPTH];
42
-
43
- unsigned int num_targets;
44
- unsigned int num_allocated;
45
- sector_t *highs;
46
- struct dm_target *targets;
47
-
48
- struct target_type *immutable_target_type;
49
-
50
- bool integrity_supported:1;
51
- bool singleton:1;
52
- bool all_blk_mq:1;
53
- unsigned integrity_added:1;
54
-
55
- /*
56
- * Indicates the rw permissions for the new logical
57
- * device. This should be a combination of FMODE_READ
58
- * and FMODE_WRITE.
59
- */
60
- fmode_t mode;
61
-
62
- /* a list of devices used by this table */
63
- struct list_head devices;
64
-
65
- /* events get handed up using this callback */
66
- void (*event_fn)(void *);
67
- void *event_context;
68
-
69
- struct dm_md_mempools *mempools;
70
-
71
- struct list_head target_callbacks;
72
-};
7330
7431 /*
7532 * Similar to ceiling(log_size(n))
....@@ -166,10 +123,8 @@
166123
167124 /*
168125 * Allocate both the target array and offset array at once.
169
- * Append an empty entry to catch sectors beyond the end of
170
- * the device.
171126 */
172
- n_highs = (sector_t *) dm_vcalloc(num + 1, sizeof(struct dm_target) +
127
+ n_highs = (sector_t *) dm_vcalloc(num, sizeof(struct dm_target) +
173128 sizeof(sector_t));
174129 if (!n_highs)
175130 return -ENOMEM;
....@@ -195,7 +150,6 @@
195150 return -ENOMEM;
196151
197152 INIT_LIST_HEAD(&t->devices);
198
- INIT_LIST_HEAD(&t->target_callbacks);
199153
200154 if (!num_targets)
201155 num_targets = KEYS_PER_NODE;
....@@ -233,6 +187,8 @@
233187 }
234188 }
235189
190
+static void dm_table_destroy_keyslot_manager(struct dm_table *t);
191
+
236192 void dm_table_destroy(struct dm_table *t)
237193 {
238194 unsigned int i;
....@@ -261,6 +217,8 @@
261217
262218 dm_free_md_mempools(t->mempools);
263219
220
+ dm_table_destroy_keyslot_manager(t);
221
+
264222 kfree(t);
265223 }
266224
....@@ -284,7 +242,6 @@
284242 static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
285243 sector_t start, sector_t len, void *data)
286244 {
287
- struct request_queue *q;
288245 struct queue_limits *limits = data;
289246 struct block_device *bdev = dev->bdev;
290247 sector_t dev_size =
....@@ -292,22 +249,6 @@
292249 unsigned short logical_block_size_sectors =
293250 limits->logical_block_size >> SECTOR_SHIFT;
294251 char b[BDEVNAME_SIZE];
295
-
296
- /*
297
- * Some devices exist without request functions,
298
- * such as loop devices not yet bound to backing files.
299
- * Forbid the use of such devices.
300
- */
301
- q = bdev_get_queue(bdev);
302
- if (!q || !q->make_request_fn) {
303
- DMWARN("%s: %s is not yet initialised: "
304
- "start=%llu, len=%llu, dev_size=%llu",
305
- dm_device_name(ti->table->md), bdevname(bdev, b),
306
- (unsigned long long)start,
307
- (unsigned long long)len,
308
- (unsigned long long)dev_size);
309
- return 1;
310
- }
311252
312253 if (!dev_size)
313254 return 0;
....@@ -383,7 +324,7 @@
383324 * This upgrades the mode on an already open dm_dev, being
384325 * careful to leave things as they were if we fail to reopen the
385326 * device and not to touch the existing bdev field in case
386
- * it is accessed concurrently inside dm_table_any_congested().
327
+ * it is accessed concurrently.
387328 */
388329 static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
389330 struct mapped_device *md)
....@@ -492,7 +433,8 @@
492433 return 0;
493434 }
494435
495
- if (bdev_stack_limits(limits, bdev, start) < 0)
436
+ if (blk_stack_limits(limits, &q->limits,
437
+ get_start_sect(bdev) + start) < 0)
496438 DMWARN("%s: adding target device %s caused an alignment inconsistency: "
497439 "physical_block_size=%u, logical_block_size=%u, "
498440 "alignment_offset=%u, start=%llu",
....@@ -501,9 +443,6 @@
501443 q->limits.logical_block_size,
502444 q->limits.alignment_offset,
503445 (unsigned long long) start << SECTOR_SHIFT);
504
-
505
- limits->zoned = blk_queue_zoned_model(q);
506
-
507446 return 0;
508447 }
509448
....@@ -673,7 +612,7 @@
673612 */
674613 unsigned short remaining = 0;
675614
676
- struct dm_target *uninitialized_var(ti);
615
+ struct dm_target *ti;
677616 struct queue_limits ti_limits;
678617 unsigned i;
679618
....@@ -796,16 +735,6 @@
796735 goto bad;
797736 }
798737
799
-#ifdef CONFIG_ARCH_ROCKCHIP
800
- while (argv && (argc > 1) &&
801
- strncmp(argv[1], "PARTUUID=", 9) == 0 &&
802
- name_to_dev_t(argv[1]) == 0) {
803
- DMINFO("%s: %s: Waiting for device %s ...",
804
- dm_device_name(t->md), type, argv[1]);
805
- msleep(100);
806
- }
807
-#endif
808
-
809738 r = tgt->type->ctr(tgt, argc, argv);
810739 kfree(argv);
811740 if (r)
....@@ -887,14 +816,12 @@
887816 static bool __table_type_bio_based(enum dm_queue_mode table_type)
888817 {
889818 return (table_type == DM_TYPE_BIO_BASED ||
890
- table_type == DM_TYPE_DAX_BIO_BASED ||
891
- table_type == DM_TYPE_NVME_BIO_BASED);
819
+ table_type == DM_TYPE_DAX_BIO_BASED);
892820 }
893821
894822 static bool __table_type_request_based(enum dm_queue_mode table_type)
895823 {
896
- return (table_type == DM_TYPE_REQUEST_BASED ||
897
- table_type == DM_TYPE_MQ_REQUEST_BASED);
824
+ return table_type == DM_TYPE_REQUEST_BASED;
898825 }
899826
900827 void dm_table_set_type(struct dm_table *t, enum dm_queue_mode type)
....@@ -903,13 +830,29 @@
903830 }
904831 EXPORT_SYMBOL_GPL(dm_table_set_type);
905832
906
-static int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev,
907
- sector_t start, sector_t len, void *data)
833
+/* validate the dax capability of the target device span */
834
+int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev,
835
+ sector_t start, sector_t len, void *data)
908836 {
909
- return !bdev_dax_supported(dev->bdev, PAGE_SIZE);
837
+ int blocksize = *(int *) data, id;
838
+ bool rc;
839
+
840
+ id = dax_read_lock();
841
+ rc = !dax_supported(dev->dax_dev, dev->bdev, blocksize, start, len);
842
+ dax_read_unlock(id);
843
+
844
+ return rc;
910845 }
911846
912
-static bool dm_table_supports_dax(struct dm_table *t)
847
+/* Check devices support synchronous DAX */
848
+static int device_not_dax_synchronous_capable(struct dm_target *ti, struct dm_dev *dev,
849
+ sector_t start, sector_t len, void *data)
850
+{
851
+ return !dev->dax_dev || !dax_synchronous(dev->dax_dev);
852
+}
853
+
854
+bool dm_table_supports_dax(struct dm_table *t,
855
+ iterate_devices_callout_fn iterate_fn, int *blocksize)
913856 {
914857 struct dm_target *ti;
915858 unsigned i;
....@@ -922,42 +865,34 @@
922865 return false;
923866
924867 if (!ti->type->iterate_devices ||
925
- ti->type->iterate_devices(ti, device_not_dax_capable, NULL))
868
+ ti->type->iterate_devices(ti, iterate_fn, blocksize))
926869 return false;
927870 }
928871
929872 return true;
930873 }
931874
932
-static bool dm_table_does_not_support_partial_completion(struct dm_table *t);
933
-
934
-struct verify_rq_based_data {
935
- unsigned sq_count;
936
- unsigned mq_count;
937
-};
938
-
939
-static int device_is_rq_based(struct dm_target *ti, struct dm_dev *dev,
940
- sector_t start, sector_t len, void *data)
875
+static int device_is_rq_stackable(struct dm_target *ti, struct dm_dev *dev,
876
+ sector_t start, sector_t len, void *data)
941877 {
942
- struct request_queue *q = bdev_get_queue(dev->bdev);
943
- struct verify_rq_based_data *v = data;
878
+ struct block_device *bdev = dev->bdev;
879
+ struct request_queue *q = bdev_get_queue(bdev);
944880
945
- if (q->mq_ops)
946
- v->mq_count++;
947
- else
948
- v->sq_count++;
881
+ /* request-based cannot stack on partitions! */
882
+ if (bdev_is_partition(bdev))
883
+ return false;
949884
950
- return queue_is_rq_based(q);
885
+ return queue_is_mq(q);
951886 }
952887
953888 static int dm_table_determine_type(struct dm_table *t)
954889 {
955890 unsigned i;
956891 unsigned bio_based = 0, request_based = 0, hybrid = 0;
957
- struct verify_rq_based_data v = {.sq_count = 0, .mq_count = 0};
958892 struct dm_target *tgt;
959893 struct list_head *devices = dm_table_get_devices(t);
960894 enum dm_queue_mode live_md_type = dm_get_md_type(t->md);
895
+ int page_size = PAGE_SIZE;
961896
962897 if (t->type != DM_TYPE_NONE) {
963898 /* target already set the table's type */
....@@ -966,7 +901,6 @@
966901 goto verify_bio_based;
967902 }
968903 BUG_ON(t->type == DM_TYPE_DAX_BIO_BASED);
969
- BUG_ON(t->type == DM_TYPE_NVME_BIO_BASED);
970904 goto verify_rq_based;
971905 }
972906
....@@ -1002,28 +936,15 @@
1002936 verify_bio_based:
1003937 /* We must use this table as bio-based */
1004938 t->type = DM_TYPE_BIO_BASED;
1005
- if (dm_table_supports_dax(t) ||
939
+ if (dm_table_supports_dax(t, device_not_dax_capable, &page_size) ||
1006940 (list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) {
1007941 t->type = DM_TYPE_DAX_BIO_BASED;
1008
- } else {
1009
- /* Check if upgrading to NVMe bio-based is valid or required */
1010
- tgt = dm_table_get_immutable_target(t);
1011
- if (tgt && !tgt->max_io_len && dm_table_does_not_support_partial_completion(t)) {
1012
- t->type = DM_TYPE_NVME_BIO_BASED;
1013
- goto verify_rq_based; /* must be stacked directly on NVMe (blk-mq) */
1014
- } else if (list_empty(devices) && live_md_type == DM_TYPE_NVME_BIO_BASED) {
1015
- t->type = DM_TYPE_NVME_BIO_BASED;
1016
- }
1017942 }
1018943 return 0;
1019944 }
1020945
1021946 BUG_ON(!request_based); /* No targets in this table */
1022947
1023
- /*
1024
- * The only way to establish DM_TYPE_MQ_REQUEST_BASED is by
1025
- * having a compatible target use dm_table_set_type.
1026
- */
1027948 t->type = DM_TYPE_REQUEST_BASED;
1028949
1029950 verify_rq_based:
....@@ -1034,8 +955,7 @@
1034955 * (e.g. request completion process for partial completion.)
1035956 */
1036957 if (t->num_targets > 1) {
1037
- DMERR("%s DM doesn't support multiple targets",
1038
- t->type == DM_TYPE_NVME_BIO_BASED ? "nvme bio-based" : "request-based");
958
+ DMERR("request-based DM doesn't support multiple targets");
1039959 return -EINVAL;
1040960 }
1041961
....@@ -1043,11 +963,9 @@
1043963 int srcu_idx;
1044964 struct dm_table *live_table = dm_get_live_table(t->md, &srcu_idx);
1045965
1046
- /* inherit live table's type and all_blk_mq */
1047
- if (live_table) {
966
+ /* inherit live table's type */
967
+ if (live_table)
1048968 t->type = live_table->type;
1049
- t->all_blk_mq = live_table->all_blk_mq;
1050
- }
1051969 dm_put_live_table(t->md, srcu_idx);
1052970 return 0;
1053971 }
....@@ -1063,19 +981,8 @@
1063981
1064982 /* Non-request-stackable devices can't be used for request-based dm */
1065983 if (!tgt->type->iterate_devices ||
1066
- !tgt->type->iterate_devices(tgt, device_is_rq_based, &v)) {
984
+ !tgt->type->iterate_devices(tgt, device_is_rq_stackable, NULL)) {
1067985 DMERR("table load rejected: including non-request-stackable devices");
1068
- return -EINVAL;
1069
- }
1070
- if (v.sq_count && v.mq_count) {
1071
- DMERR("table load rejected: not all devices are blk-mq request-stackable");
1072
- return -EINVAL;
1073
- }
1074
- t->all_blk_mq = v.mq_count > 0;
1075
-
1076
- if (!t->all_blk_mq &&
1077
- (t->type == DM_TYPE_MQ_REQUEST_BASED || t->type == DM_TYPE_NVME_BIO_BASED)) {
1078
- DMERR("table load rejected: all devices are not blk-mq request-stackable");
1079986 return -EINVAL;
1080987 }
1081988
....@@ -1124,11 +1031,6 @@
11241031 bool dm_table_request_based(struct dm_table *t)
11251032 {
11261033 return __table_type_request_based(dm_table_get_type(t));
1127
-}
1128
-
1129
-bool dm_table_all_blk_mq_devices(struct dm_table *t)
1130
-{
1131
- return t->all_blk_mq;
11321034 }
11331035
11341036 static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *md)
....@@ -1312,6 +1214,278 @@
13121214 return 0;
13131215 }
13141216
1217
+#ifdef CONFIG_BLK_INLINE_ENCRYPTION
1218
+
1219
+struct dm_keyslot_manager {
1220
+ struct blk_keyslot_manager ksm;
1221
+ struct mapped_device *md;
1222
+};
1223
+
1224
+static int dm_keyslot_evict_callback(struct dm_target *ti, struct dm_dev *dev,
1225
+ sector_t start, sector_t len, void *data)
1226
+{
1227
+ const struct blk_crypto_key *key = data;
1228
+
1229
+ blk_crypto_evict_key(bdev_get_queue(dev->bdev), key);
1230
+ return 0;
1231
+}
1232
+
1233
+/*
1234
+ * When an inline encryption key is evicted from a device-mapper device, evict
1235
+ * it from all the underlying devices.
1236
+ */
1237
+static int dm_keyslot_evict(struct blk_keyslot_manager *ksm,
1238
+ const struct blk_crypto_key *key, unsigned int slot)
1239
+{
1240
+ struct dm_keyslot_manager *dksm = container_of(ksm,
1241
+ struct dm_keyslot_manager,
1242
+ ksm);
1243
+ struct mapped_device *md = dksm->md;
1244
+ struct dm_table *t;
1245
+ int srcu_idx;
1246
+ int i;
1247
+ struct dm_target *ti;
1248
+
1249
+ t = dm_get_live_table(md, &srcu_idx);
1250
+ if (!t)
1251
+ return 0;
1252
+ for (i = 0; i < dm_table_get_num_targets(t); i++) {
1253
+ ti = dm_table_get_target(t, i);
1254
+ if (!ti->type->iterate_devices)
1255
+ continue;
1256
+ ti->type->iterate_devices(ti, dm_keyslot_evict_callback,
1257
+ (void *)key);
1258
+ }
1259
+ dm_put_live_table(md, srcu_idx);
1260
+ return 0;
1261
+}
1262
+
1263
+struct dm_derive_raw_secret_args {
1264
+ const u8 *wrapped_key;
1265
+ unsigned int wrapped_key_size;
1266
+ u8 *secret;
1267
+ unsigned int secret_size;
1268
+ int err;
1269
+};
1270
+
1271
+static int dm_derive_raw_secret_callback(struct dm_target *ti,
1272
+ struct dm_dev *dev, sector_t start,
1273
+ sector_t len, void *data)
1274
+{
1275
+ struct dm_derive_raw_secret_args *args = data;
1276
+ struct request_queue *q = bdev_get_queue(dev->bdev);
1277
+
1278
+ if (!args->err)
1279
+ return 0;
1280
+
1281
+ if (!q->ksm) {
1282
+ args->err = -EOPNOTSUPP;
1283
+ return 0;
1284
+ }
1285
+
1286
+ args->err = blk_ksm_derive_raw_secret(q->ksm, args->wrapped_key,
1287
+ args->wrapped_key_size,
1288
+ args->secret,
1289
+ args->secret_size);
1290
+ /* Try another device in case this fails. */
1291
+ return 0;
1292
+}
1293
+
1294
+/*
1295
+ * Retrieve the raw_secret from the underlying device. Given that only one
1296
+ * raw_secret can exist for a particular wrappedkey, retrieve it only from the
1297
+ * first device that supports derive_raw_secret().
1298
+ */
1299
+static int dm_derive_raw_secret(struct blk_keyslot_manager *ksm,
1300
+ const u8 *wrapped_key,
1301
+ unsigned int wrapped_key_size,
1302
+ u8 *secret, unsigned int secret_size)
1303
+{
1304
+ struct dm_keyslot_manager *dksm = container_of(ksm,
1305
+ struct dm_keyslot_manager,
1306
+ ksm);
1307
+ struct mapped_device *md = dksm->md;
1308
+ struct dm_derive_raw_secret_args args = {
1309
+ .wrapped_key = wrapped_key,
1310
+ .wrapped_key_size = wrapped_key_size,
1311
+ .secret = secret,
1312
+ .secret_size = secret_size,
1313
+ .err = -EOPNOTSUPP,
1314
+ };
1315
+ struct dm_table *t;
1316
+ int srcu_idx;
1317
+ int i;
1318
+ struct dm_target *ti;
1319
+
1320
+ t = dm_get_live_table(md, &srcu_idx);
1321
+ if (!t)
1322
+ return -EOPNOTSUPP;
1323
+ for (i = 0; i < dm_table_get_num_targets(t); i++) {
1324
+ ti = dm_table_get_target(t, i);
1325
+ if (!ti->type->iterate_devices)
1326
+ continue;
1327
+ ti->type->iterate_devices(ti, dm_derive_raw_secret_callback,
1328
+ &args);
1329
+ if (!args.err)
1330
+ break;
1331
+ }
1332
+ dm_put_live_table(md, srcu_idx);
1333
+ return args.err;
1334
+}
1335
+
1336
+
1337
+static struct blk_ksm_ll_ops dm_ksm_ll_ops = {
1338
+ .keyslot_evict = dm_keyslot_evict,
1339
+ .derive_raw_secret = dm_derive_raw_secret,
1340
+};
1341
+
1342
+static int device_intersect_crypto_modes(struct dm_target *ti,
1343
+ struct dm_dev *dev, sector_t start,
1344
+ sector_t len, void *data)
1345
+{
1346
+ struct blk_keyslot_manager *parent = data;
1347
+ struct blk_keyslot_manager *child = bdev_get_queue(dev->bdev)->ksm;
1348
+
1349
+ blk_ksm_intersect_modes(parent, child);
1350
+ return 0;
1351
+}
1352
+
1353
+void dm_destroy_keyslot_manager(struct blk_keyslot_manager *ksm)
1354
+{
1355
+ struct dm_keyslot_manager *dksm = container_of(ksm,
1356
+ struct dm_keyslot_manager,
1357
+ ksm);
1358
+
1359
+ if (!ksm)
1360
+ return;
1361
+
1362
+ blk_ksm_destroy(ksm);
1363
+ kfree(dksm);
1364
+}
1365
+
1366
+static void dm_table_destroy_keyslot_manager(struct dm_table *t)
1367
+{
1368
+ dm_destroy_keyslot_manager(t->ksm);
1369
+ t->ksm = NULL;
1370
+}
1371
+
1372
+/*
1373
+ * Constructs and initializes t->ksm with a keyslot manager that
1374
+ * represents the common set of crypto capabilities of the devices
1375
+ * described by the dm_table. However, if the constructed keyslot
1376
+ * manager does not support a superset of the crypto capabilities
1377
+ * supported by the current keyslot manager of the mapped_device,
1378
+ * it returns an error instead, since we don't support restricting
1379
+ * crypto capabilities on table changes. Finally, if the constructed
1380
+ * keyslot manager doesn't actually support any crypto modes at all,
1381
+ * it just returns NULL.
1382
+ */
1383
+static int dm_table_construct_keyslot_manager(struct dm_table *t)
1384
+{
1385
+ struct dm_keyslot_manager *dksm;
1386
+ struct blk_keyslot_manager *ksm;
1387
+ struct dm_target *ti;
1388
+ unsigned int i;
1389
+ bool ksm_is_empty = true;
1390
+
1391
+ dksm = kmalloc(sizeof(*dksm), GFP_KERNEL);
1392
+ if (!dksm)
1393
+ return -ENOMEM;
1394
+ dksm->md = t->md;
1395
+
1396
+ ksm = &dksm->ksm;
1397
+ blk_ksm_init_passthrough(ksm);
1398
+ ksm->ksm_ll_ops = dm_ksm_ll_ops;
1399
+ ksm->max_dun_bytes_supported = UINT_MAX;
1400
+ memset(ksm->crypto_modes_supported, 0xFF,
1401
+ sizeof(ksm->crypto_modes_supported));
1402
+ ksm->features = BLK_CRYPTO_FEATURE_STANDARD_KEYS |
1403
+ BLK_CRYPTO_FEATURE_WRAPPED_KEYS;
1404
+
1405
+ for (i = 0; i < dm_table_get_num_targets(t); i++) {
1406
+ ti = dm_table_get_target(t, i);
1407
+
1408
+ if (!dm_target_passes_crypto(ti->type)) {
1409
+ blk_ksm_intersect_modes(ksm, NULL);
1410
+ break;
1411
+ }
1412
+ if (!ti->type->iterate_devices)
1413
+ continue;
1414
+ ti->type->iterate_devices(ti, device_intersect_crypto_modes,
1415
+ ksm);
1416
+ }
1417
+
1418
+ if (t->md->queue && !blk_ksm_is_superset(ksm, t->md->queue->ksm)) {
1419
+ DMWARN("Inline encryption capabilities of new DM table were more restrictive than the old table's. This is not supported!");
1420
+ dm_destroy_keyslot_manager(ksm);
1421
+ return -EINVAL;
1422
+ }
1423
+
1424
+ /*
1425
+ * If the new KSM doesn't actually support any crypto modes, we may as
1426
+ * well represent it with a NULL ksm.
1427
+ */
1428
+ ksm_is_empty = true;
1429
+ for (i = 0; i < ARRAY_SIZE(ksm->crypto_modes_supported); i++) {
1430
+ if (ksm->crypto_modes_supported[i]) {
1431
+ ksm_is_empty = false;
1432
+ break;
1433
+ }
1434
+ }
1435
+
1436
+ if (ksm_is_empty) {
1437
+ dm_destroy_keyslot_manager(ksm);
1438
+ ksm = NULL;
1439
+ }
1440
+
1441
+ /*
1442
+ * t->ksm is only set temporarily while the table is being set
1443
+ * up, and it gets set to NULL after the capabilities have
1444
+ * been transferred to the request_queue.
1445
+ */
1446
+ t->ksm = ksm;
1447
+
1448
+ return 0;
1449
+}
1450
+
1451
+static void dm_update_keyslot_manager(struct request_queue *q,
1452
+ struct dm_table *t)
1453
+{
1454
+ if (!t->ksm)
1455
+ return;
1456
+
1457
+ /* Make the ksm less restrictive */
1458
+ if (!q->ksm) {
1459
+ blk_ksm_register(t->ksm, q);
1460
+ } else {
1461
+ blk_ksm_update_capabilities(q->ksm, t->ksm);
1462
+ dm_destroy_keyslot_manager(t->ksm);
1463
+ }
1464
+ t->ksm = NULL;
1465
+}
1466
+
1467
+#else /* CONFIG_BLK_INLINE_ENCRYPTION */
1468
+
1469
+static int dm_table_construct_keyslot_manager(struct dm_table *t)
1470
+{
1471
+ return 0;
1472
+}
1473
+
1474
+void dm_destroy_keyslot_manager(struct blk_keyslot_manager *ksm)
1475
+{
1476
+}
1477
+
1478
+static void dm_table_destroy_keyslot_manager(struct dm_table *t)
1479
+{
1480
+}
1481
+
1482
+static void dm_update_keyslot_manager(struct request_queue *q,
1483
+ struct dm_table *t)
1484
+{
1485
+}
1486
+
1487
+#endif /* !CONFIG_BLK_INLINE_ENCRYPTION */
1488
+
13151489 /*
13161490 * Prepares the table for use by building the indices,
13171491 * setting the type, and allocating mempools.
....@@ -1335,6 +1509,12 @@
13351509 r = dm_table_register_integrity(t);
13361510 if (r) {
13371511 DMERR("could not register integrity profile.");
1512
+ return r;
1513
+ }
1514
+
1515
+ r = dm_table_construct_keyslot_manager(t);
1516
+ if (r) {
1517
+ DMERR("could not construct keyslot manager.");
13381518 return r;
13391519 }
13401520
....@@ -1381,7 +1561,7 @@
13811561 /*
13821562 * Search the btree for the correct target.
13831563 *
1384
- * Caller should check returned pointer with dm_target_is_valid()
1564
+ * Caller should check returned pointer for NULL
13851565 * to trap I/O beyond end of device.
13861566 */
13871567 struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
....@@ -1390,7 +1570,7 @@
13901570 sector_t *node;
13911571
13921572 if (unlikely(sector >= dm_table_get_size(t)))
1393
- return &t->targets[t->num_targets];
1573
+ return NULL;
13941574
13951575 for (l = 0; l < t->depth; l++) {
13961576 n = get_child(n, k);
....@@ -1489,6 +1669,13 @@
14891669 return !q || blk_queue_zoned_model(q) != *zoned_model;
14901670 }
14911671
1672
+/*
1673
+ * Check the device zoned model based on the target feature flag. If the target
1674
+ * has the DM_TARGET_ZONED_HM feature flag set, host-managed zoned devices are
1675
+ * also accepted but all devices must have the same zoned model. If the target
1676
+ * has the DM_TARGET_MIXED_ZONED_MODEL feature set, the devices can have any
1677
+ * zoned model with all zoned devices having the same zone size.
1678
+ */
14921679 static bool dm_table_supports_zoned_model(struct dm_table *t,
14931680 enum blk_zoned_model zoned_model)
14941681 {
....@@ -1498,13 +1685,15 @@
14981685 for (i = 0; i < dm_table_get_num_targets(t); i++) {
14991686 ti = dm_table_get_target(t, i);
15001687
1501
- if (zoned_model == BLK_ZONED_HM &&
1502
- !dm_target_supports_zoned_hm(ti->type))
1503
- return false;
1504
-
1505
- if (!ti->type->iterate_devices ||
1506
- ti->type->iterate_devices(ti, device_not_zoned_model, &zoned_model))
1507
- return false;
1688
+ if (dm_target_supports_zoned_hm(ti->type)) {
1689
+ if (!ti->type->iterate_devices ||
1690
+ ti->type->iterate_devices(ti, device_not_zoned_model,
1691
+ &zoned_model))
1692
+ return false;
1693
+ } else if (!dm_target_supports_mixed_zoned_model(ti->type)) {
1694
+ if (zoned_model == BLK_ZONED_HM)
1695
+ return false;
1696
+ }
15081697 }
15091698
15101699 return true;
....@@ -1516,9 +1705,17 @@
15161705 struct request_queue *q = bdev_get_queue(dev->bdev);
15171706 unsigned int *zone_sectors = data;
15181707
1708
+ if (!blk_queue_is_zoned(q))
1709
+ return 0;
1710
+
15191711 return !q || blk_queue_zone_sectors(q) != *zone_sectors;
15201712 }
15211713
1714
+/*
1715
+ * Check consistency of zoned model and zone sectors across all targets. For
1716
+ * zone sectors, if the destination device is a zoned block device, it shall
1717
+ * have the specified zone_sectors.
1718
+ */
15221719 static int validate_hardware_zoned_model(struct dm_table *table,
15231720 enum blk_zoned_model zoned_model,
15241721 unsigned int zone_sectors)
....@@ -1537,7 +1734,7 @@
15371734 return -EINVAL;
15381735
15391736 if (dm_table_any_dev_attr(table, device_not_matches_zone_sectors, &zone_sectors)) {
1540
- DMERR("%s: zone sectors is not consistent across all devices",
1737
+ DMERR("%s: zone sectors is not consistent across all zoned devices",
15411738 dm_device_name(table->md));
15421739 return -EINVAL;
15431740 }
....@@ -1606,22 +1803,6 @@
16061803 dm_device_name(table->md),
16071804 (unsigned long long) ti->begin,
16081805 (unsigned long long) ti->len);
1609
-
1610
- /*
1611
- * FIXME: this should likely be moved to blk_stack_limits(), would
1612
- * also eliminate limits->zoned stacking hack in dm_set_device_limits()
1613
- */
1614
- if (limits->zoned == BLK_ZONED_NONE && ti_limits.zoned != BLK_ZONED_NONE) {
1615
- /*
1616
- * By default, the stacked limits zoned model is set to
1617
- * BLK_ZONED_NONE in blk_set_stacking_limits(). Update
1618
- * this model using the first target model reported
1619
- * that is not BLK_ZONED_NONE. This will be either the
1620
- * first target device zoned model or the model reported
1621
- * by the target .io_hints.
1622
- */
1623
- limits->zoned = ti_limits.zoned;
1624
- }
16251806 }
16261807
16271808 /*
....@@ -1674,54 +1855,6 @@
16741855 blk_integrity_unregister(dm_disk(t->md));
16751856 }
16761857 }
1677
-
1678
-#ifdef CONFIG_BLK_INLINE_ENCRYPTION
1679
-static int device_intersect_crypto_modes(struct dm_target *ti,
1680
- struct dm_dev *dev, sector_t start,
1681
- sector_t len, void *data)
1682
-{
1683
- struct keyslot_manager *parent = data;
1684
- struct keyslot_manager *child = bdev_get_queue(dev->bdev)->ksm;
1685
-
1686
- keyslot_manager_intersect_modes(parent, child);
1687
- return 0;
1688
-}
1689
-
1690
-/*
1691
- * Update the inline crypto modes supported by 'q->ksm' to be the intersection
1692
- * of the modes supported by all targets in the table.
1693
- *
1694
- * For any mode to be supported at all, all targets must have explicitly
1695
- * declared that they can pass through inline crypto support. For a particular
1696
- * mode to be supported, all underlying devices must also support it.
1697
- *
1698
- * Assume that 'q->ksm' initially declares all modes to be supported.
1699
- */
1700
-static void dm_calculate_supported_crypto_modes(struct dm_table *t,
1701
- struct request_queue *q)
1702
-{
1703
- struct dm_target *ti;
1704
- unsigned int i;
1705
-
1706
- for (i = 0; i < dm_table_get_num_targets(t); i++) {
1707
- ti = dm_table_get_target(t, i);
1708
-
1709
- if (!ti->may_passthrough_inline_crypto) {
1710
- keyslot_manager_intersect_modes(q->ksm, NULL);
1711
- return;
1712
- }
1713
- if (!ti->type->iterate_devices)
1714
- continue;
1715
- ti->type->iterate_devices(ti, device_intersect_crypto_modes,
1716
- q->ksm);
1717
- }
1718
-}
1719
-#else /* CONFIG_BLK_INLINE_ENCRYPTION */
1720
-static inline void dm_calculate_supported_crypto_modes(struct dm_table *t,
1721
- struct request_queue *q)
1722
-{
1723
-}
1724
-#endif /* !CONFIG_BLK_INLINE_ENCRYPTION */
17251858
17261859 static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev,
17271860 sector_t start, sector_t len, void *data)
....@@ -1790,28 +1923,6 @@
17901923 return q && !blk_queue_add_random(q);
17911924 }
17921925
1793
-static int queue_no_sg_merge(struct dm_target *ti, struct dm_dev *dev,
1794
- sector_t start, sector_t len, void *data)
1795
-{
1796
- struct request_queue *q = bdev_get_queue(dev->bdev);
1797
-
1798
- return q && test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags);
1799
-}
1800
-
1801
-static int device_is_partial_completion(struct dm_target *ti, struct dm_dev *dev,
1802
- sector_t start, sector_t len, void *data)
1803
-{
1804
- char b[BDEVNAME_SIZE];
1805
-
1806
- /* For now, NVMe devices are the only devices of this class */
1807
- return (strncmp(bdevname(dev->bdev, b), "nvme", 4) != 0);
1808
-}
1809
-
1810
-static bool dm_table_does_not_support_partial_completion(struct dm_table *t)
1811
-{
1812
- return !dm_table_any_dev_attr(t, device_is_partial_completion, NULL);
1813
-}
1814
-
18151926 static int device_not_write_same_capable(struct dm_target *ti, struct dm_dev *dev,
18161927 sector_t start, sector_t len, void *data)
18171928 {
....@@ -1860,6 +1971,33 @@
18601971
18611972 if (!ti->type->iterate_devices ||
18621973 ti->type->iterate_devices(ti, device_not_write_zeroes_capable, NULL))
1974
+ return false;
1975
+ }
1976
+
1977
+ return true;
1978
+}
1979
+
1980
+static int device_not_nowait_capable(struct dm_target *ti, struct dm_dev *dev,
1981
+ sector_t start, sector_t len, void *data)
1982
+{
1983
+ struct request_queue *q = bdev_get_queue(dev->bdev);
1984
+
1985
+ return q && !blk_queue_nowait(q);
1986
+}
1987
+
1988
+static bool dm_table_supports_nowait(struct dm_table *t)
1989
+{
1990
+ struct dm_target *ti;
1991
+ unsigned i = 0;
1992
+
1993
+ while (i < dm_table_get_num_targets(t)) {
1994
+ ti = dm_table_get_target(t, i++);
1995
+
1996
+ if (!dm_target_supports_nowait(ti->type))
1997
+ return false;
1998
+
1999
+ if (!ti->type->iterate_devices ||
2000
+ ti->type->iterate_devices(ti, device_not_nowait_capable, NULL))
18632001 return false;
18642002 }
18652003
....@@ -1933,18 +2071,24 @@
19332071 {
19342072 struct request_queue *q = bdev_get_queue(dev->bdev);
19352073
1936
- return q && bdi_cap_stable_pages_required(q->backing_dev_info);
2074
+ return q && blk_queue_stable_writes(q);
19372075 }
19382076
19392077 void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
19402078 struct queue_limits *limits)
19412079 {
19422080 bool wc = false, fua = false;
2081
+ int page_size = PAGE_SIZE;
19432082
19442083 /*
19452084 * Copy table's limits to the DM device's request_queue
19462085 */
19472086 q->limits = *limits;
2087
+
2088
+ if (dm_table_supports_nowait(t))
2089
+ blk_queue_flag_set(QUEUE_FLAG_NOWAIT, q);
2090
+ else
2091
+ blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, q);
19482092
19492093 if (!dm_table_supports_discards(t)) {
19502094 blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
....@@ -1967,8 +2111,11 @@
19672111 }
19682112 blk_queue_write_cache(q, wc, fua);
19692113
1970
- if (dm_table_supports_dax(t))
2114
+ if (dm_table_supports_dax(t, device_not_dax_capable, &page_size)) {
19712115 blk_queue_flag_set(QUEUE_FLAG_DAX, q);
2116
+ if (dm_table_supports_dax(t, device_not_dax_synchronous_capable, NULL))
2117
+ set_dax_synchronous(t->md->dax_dev);
2118
+ }
19722119 else
19732120 blk_queue_flag_clear(QUEUE_FLAG_DAX, q);
19742121
....@@ -1986,14 +2133,7 @@
19862133 if (!dm_table_supports_write_zeroes(t))
19872134 q->limits.max_write_zeroes_sectors = 0;
19882135
1989
- if (dm_table_any_dev_attr(t, queue_no_sg_merge, NULL))
1990
- blk_queue_flag_set(QUEUE_FLAG_NO_SG_MERGE, q);
1991
- else
1992
- blk_queue_flag_clear(QUEUE_FLAG_NO_SG_MERGE, q);
1993
-
19942136 dm_table_verify_integrity(t);
1995
-
1996
- dm_calculate_supported_crypto_modes(t, q);
19972137
19982138 /*
19992139 * Some devices don't use blk_integrity but still want stable pages
....@@ -2003,9 +2143,9 @@
20032143 * don't want error, zero, etc to require stable pages.
20042144 */
20052145 if (dm_table_any_dev_attr(t, device_requires_stable_pages, NULL))
2006
- q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES;
2146
+ blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q);
20072147 else
2008
- q->backing_dev_info->capabilities &= ~BDI_CAP_STABLE_WRITES;
2148
+ blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q);
20092149
20102150 /*
20112151 * Determine whether or not this queue's I/O timings contribute
....@@ -2017,8 +2157,20 @@
20172157 dm_table_any_dev_attr(t, device_is_not_random, NULL))
20182158 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
20192159
2020
- /* io_pages is used for readahead */
2021
- q->backing_dev_info->io_pages = limits->max_sectors >> (PAGE_SHIFT - 9);
2160
+ /*
2161
+ * For a zoned target, the number of zones should be updated for the
2162
+ * correct value to be exposed in sysfs queue/nr_zones. For a BIO based
2163
+ * target, this is all that is needed.
2164
+ */
2165
+#ifdef CONFIG_BLK_DEV_ZONED
2166
+ if (blk_queue_is_zoned(q)) {
2167
+ WARN_ON_ONCE(queue_is_mq(q));
2168
+ q->nr_zones = blkdev_nr_zones(t->md->disk);
2169
+ }
2170
+#endif
2171
+
2172
+ dm_update_keyslot_manager(q, t);
2173
+ blk_queue_update_readahead(q);
20222174 }
20232175
20242176 unsigned int dm_table_get_num_targets(struct dm_table *t)
....@@ -2123,64 +2275,25 @@
21232275 return 0;
21242276 }
21252277
2126
-void dm_table_add_target_callbacks(struct dm_table *t, struct dm_target_callbacks *cb)
2127
-{
2128
- list_add(&cb->list, &t->target_callbacks);
2129
-}
2130
-EXPORT_SYMBOL_GPL(dm_table_add_target_callbacks);
2131
-
2132
-int dm_table_any_congested(struct dm_table *t, int bdi_bits)
2133
-{
2134
- struct dm_dev_internal *dd;
2135
- struct list_head *devices = dm_table_get_devices(t);
2136
- struct dm_target_callbacks *cb;
2137
- int r = 0;
2138
-
2139
- list_for_each_entry(dd, devices, list) {
2140
- struct request_queue *q = bdev_get_queue(dd->dm_dev->bdev);
2141
- char b[BDEVNAME_SIZE];
2142
-
2143
- if (likely(q))
2144
- r |= bdi_congested(q->backing_dev_info, bdi_bits);
2145
- else
2146
- DMWARN_LIMIT("%s: any_congested: nonexistent device %s",
2147
- dm_device_name(t->md),
2148
- bdevname(dd->dm_dev->bdev, b));
2149
- }
2150
-
2151
- list_for_each_entry(cb, &t->target_callbacks, list)
2152
- if (cb->congested_fn)
2153
- r |= cb->congested_fn(cb, bdi_bits);
2154
-
2155
- return r;
2156
-}
2157
-
21582278 struct mapped_device *dm_table_get_md(struct dm_table *t)
21592279 {
21602280 return t->md;
21612281 }
21622282 EXPORT_SYMBOL(dm_table_get_md);
21632283
2284
+const char *dm_table_device_name(struct dm_table *t)
2285
+{
2286
+ return dm_device_name(t->md);
2287
+}
2288
+EXPORT_SYMBOL_GPL(dm_table_device_name);
2289
+
21642290 void dm_table_run_md_queue_async(struct dm_table *t)
21652291 {
2166
- struct mapped_device *md;
2167
- struct request_queue *queue;
2168
- unsigned long flags;
2169
-
21702292 if (!dm_table_request_based(t))
21712293 return;
21722294
2173
- md = dm_table_get_md(t);
2174
- queue = dm_get_md_queue(md);
2175
- if (queue) {
2176
- if (queue->mq_ops)
2177
- blk_mq_run_hw_queues(queue, true);
2178
- else {
2179
- spin_lock_irqsave(queue->queue_lock, flags);
2180
- blk_run_queue_async(queue);
2181
- spin_unlock_irqrestore(queue->queue_lock, flags);
2182
- }
2183
- }
2295
+ if (t->md->queue)
2296
+ blk_mq_run_hw_queues(t->md->queue, true);
21842297 }
21852298 EXPORT_SYMBOL(dm_table_run_md_queue_async);
21862299