forked from ~ljy/RK356X_SDK_RELEASE

hc
2023-12-08 01573e231f18eb2d99162747186f59511f56b64d
kernel/drivers/md/dm-table.c
....@@ -21,55 +21,12 @@
2121 #include <linux/blk-mq.h>
2222 #include <linux/mount.h>
2323 #include <linux/dax.h>
24
-#include <linux/bio.h>
25
-#include <linux/keyslot-manager.h>
2624
2725 #define DM_MSG_PREFIX "table"
2826
29
-#define MAX_DEPTH 16
3027 #define NODE_SIZE L1_CACHE_BYTES
3128 #define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t))
3229 #define CHILDREN_PER_NODE (KEYS_PER_NODE + 1)
33
-
34
-struct dm_table {
35
- struct mapped_device *md;
36
- enum dm_queue_mode type;
37
-
38
- /* btree table */
39
- unsigned int depth;
40
- unsigned int counts[MAX_DEPTH]; /* in nodes */
41
- sector_t *index[MAX_DEPTH];
42
-
43
- unsigned int num_targets;
44
- unsigned int num_allocated;
45
- sector_t *highs;
46
- struct dm_target *targets;
47
-
48
- struct target_type *immutable_target_type;
49
-
50
- bool integrity_supported:1;
51
- bool singleton:1;
52
- bool all_blk_mq:1;
53
- unsigned integrity_added:1;
54
-
55
- /*
56
- * Indicates the rw permissions for the new logical
57
- * device. This should be a combination of FMODE_READ
58
- * and FMODE_WRITE.
59
- */
60
- fmode_t mode;
61
-
62
- /* a list of devices used by this table */
63
- struct list_head devices;
64
-
65
- /* events get handed up using this callback */
66
- void (*event_fn)(void *);
67
- void *event_context;
68
-
69
- struct dm_md_mempools *mempools;
70
-
71
- struct list_head target_callbacks;
72
-};
7330
7431 /*
7532 * Similar to ceiling(log_size(n))
....@@ -166,10 +123,8 @@
166123
167124 /*
168125 * Allocate both the target array and offset array at once.
169
- * Append an empty entry to catch sectors beyond the end of
170
- * the device.
171126 */
172
- n_highs = (sector_t *) dm_vcalloc(num + 1, sizeof(struct dm_target) +
127
+ n_highs = (sector_t *) dm_vcalloc(num, sizeof(struct dm_target) +
173128 sizeof(sector_t));
174129 if (!n_highs)
175130 return -ENOMEM;
....@@ -195,7 +150,6 @@
195150 return -ENOMEM;
196151
197152 INIT_LIST_HEAD(&t->devices);
198
- INIT_LIST_HEAD(&t->target_callbacks);
199153
200154 if (!num_targets)
201155 num_targets = KEYS_PER_NODE;
....@@ -233,6 +187,8 @@
233187 }
234188 }
235189
190
+static void dm_table_destroy_keyslot_manager(struct dm_table *t);
191
+
236192 void dm_table_destroy(struct dm_table *t)
237193 {
238194 unsigned int i;
....@@ -261,6 +217,8 @@
261217
262218 dm_free_md_mempools(t->mempools);
263219
220
+ dm_table_destroy_keyslot_manager(t);
221
+
264222 kfree(t);
265223 }
266224
....@@ -284,7 +242,6 @@
284242 static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
285243 sector_t start, sector_t len, void *data)
286244 {
287
- struct request_queue *q;
288245 struct queue_limits *limits = data;
289246 struct block_device *bdev = dev->bdev;
290247 sector_t dev_size =
....@@ -292,22 +249,6 @@
292249 unsigned short logical_block_size_sectors =
293250 limits->logical_block_size >> SECTOR_SHIFT;
294251 char b[BDEVNAME_SIZE];
295
-
296
- /*
297
- * Some devices exist without request functions,
298
- * such as loop devices not yet bound to backing files.
299
- * Forbid the use of such devices.
300
- */
301
- q = bdev_get_queue(bdev);
302
- if (!q || !q->make_request_fn) {
303
- DMWARN("%s: %s is not yet initialised: "
304
- "start=%llu, len=%llu, dev_size=%llu",
305
- dm_device_name(ti->table->md), bdevname(bdev, b),
306
- (unsigned long long)start,
307
- (unsigned long long)len,
308
- (unsigned long long)dev_size);
309
- return 1;
310
- }
311252
312253 if (!dev_size)
313254 return 0;
....@@ -383,7 +324,7 @@
383324 * This upgrades the mode on an already open dm_dev, being
384325 * careful to leave things as they were if we fail to reopen the
385326 * device and not to touch the existing bdev field in case
386
- * it is accessed concurrently inside dm_table_any_congested().
327
+ * it is accessed concurrently.
387328 */
388329 static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
389330 struct mapped_device *md)
....@@ -492,7 +433,8 @@
492433 return 0;
493434 }
494435
495
- if (bdev_stack_limits(limits, bdev, start) < 0)
436
+ if (blk_stack_limits(limits, &q->limits,
437
+ get_start_sect(bdev) + start) < 0)
496438 DMWARN("%s: adding target device %s caused an alignment inconsistency: "
497439 "physical_block_size=%u, logical_block_size=%u, "
498440 "alignment_offset=%u, start=%llu",
....@@ -501,9 +443,6 @@
501443 q->limits.logical_block_size,
502444 q->limits.alignment_offset,
503445 (unsigned long long) start << SECTOR_SHIFT);
504
-
505
- limits->zoned = blk_queue_zoned_model(q);
506
-
507446 return 0;
508447 }
509448
....@@ -673,7 +612,7 @@
673612 */
674613 unsigned short remaining = 0;
675614
676
- struct dm_target *uninitialized_var(ti);
615
+ struct dm_target *ti;
677616 struct queue_limits ti_limits;
678617 unsigned i;
679618
....@@ -796,16 +735,6 @@
796735 goto bad;
797736 }
798737
799
-#ifdef CONFIG_ARCH_ROCKCHIP
800
- while (argv && (argc > 1) &&
801
- strncmp(argv[1], "PARTUUID=", 9) == 0 &&
802
- name_to_dev_t(argv[1]) == 0) {
803
- DMINFO("%s: %s: Waiting for device %s ...",
804
- dm_device_name(t->md), type, argv[1]);
805
- msleep(100);
806
- }
807
-#endif
808
-
809738 r = tgt->type->ctr(tgt, argc, argv);
810739 kfree(argv);
811740 if (r)
....@@ -887,14 +816,12 @@
887816 static bool __table_type_bio_based(enum dm_queue_mode table_type)
888817 {
889818 return (table_type == DM_TYPE_BIO_BASED ||
890
- table_type == DM_TYPE_DAX_BIO_BASED ||
891
- table_type == DM_TYPE_NVME_BIO_BASED);
819
+ table_type == DM_TYPE_DAX_BIO_BASED);
892820 }
893821
894822 static bool __table_type_request_based(enum dm_queue_mode table_type)
895823 {
896
- return (table_type == DM_TYPE_REQUEST_BASED ||
897
- table_type == DM_TYPE_MQ_REQUEST_BASED);
824
+ return table_type == DM_TYPE_REQUEST_BASED;
898825 }
899826
900827 void dm_table_set_type(struct dm_table *t, enum dm_queue_mode type)
....@@ -903,13 +830,29 @@
903830 }
904831 EXPORT_SYMBOL_GPL(dm_table_set_type);
905832
906
-static int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev,
907
- sector_t start, sector_t len, void *data)
833
+/* validate the dax capability of the target device span */
834
+int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev,
835
+ sector_t start, sector_t len, void *data)
908836 {
909
- return !bdev_dax_supported(dev->bdev, PAGE_SIZE);
837
+ int blocksize = *(int *) data, id;
838
+ bool rc;
839
+
840
+ id = dax_read_lock();
841
+ rc = !dax_supported(dev->dax_dev, dev->bdev, blocksize, start, len);
842
+ dax_read_unlock(id);
843
+
844
+ return rc;
910845 }
911846
912
-static bool dm_table_supports_dax(struct dm_table *t)
847
+/* Check devices support synchronous DAX */
848
+static int device_not_dax_synchronous_capable(struct dm_target *ti, struct dm_dev *dev,
849
+ sector_t start, sector_t len, void *data)
850
+{
851
+ return !dev->dax_dev || !dax_synchronous(dev->dax_dev);
852
+}
853
+
854
+bool dm_table_supports_dax(struct dm_table *t,
855
+ iterate_devices_callout_fn iterate_fn, int *blocksize)
913856 {
914857 struct dm_target *ti;
915858 unsigned i;
....@@ -922,42 +865,34 @@
922865 return false;
923866
924867 if (!ti->type->iterate_devices ||
925
- ti->type->iterate_devices(ti, device_not_dax_capable, NULL))
868
+ ti->type->iterate_devices(ti, iterate_fn, blocksize))
926869 return false;
927870 }
928871
929872 return true;
930873 }
931874
932
-static bool dm_table_does_not_support_partial_completion(struct dm_table *t);
933
-
934
-struct verify_rq_based_data {
935
- unsigned sq_count;
936
- unsigned mq_count;
937
-};
938
-
939
-static int device_is_rq_based(struct dm_target *ti, struct dm_dev *dev,
940
- sector_t start, sector_t len, void *data)
875
+static int device_is_rq_stackable(struct dm_target *ti, struct dm_dev *dev,
876
+ sector_t start, sector_t len, void *data)
941877 {
942
- struct request_queue *q = bdev_get_queue(dev->bdev);
943
- struct verify_rq_based_data *v = data;
878
+ struct block_device *bdev = dev->bdev;
879
+ struct request_queue *q = bdev_get_queue(bdev);
944880
945
- if (q->mq_ops)
946
- v->mq_count++;
947
- else
948
- v->sq_count++;
881
+ /* request-based cannot stack on partitions! */
882
+ if (bdev_is_partition(bdev))
883
+ return false;
949884
950
- return queue_is_rq_based(q);
885
+ return queue_is_mq(q);
951886 }
952887
953888 static int dm_table_determine_type(struct dm_table *t)
954889 {
955890 unsigned i;
956891 unsigned bio_based = 0, request_based = 0, hybrid = 0;
957
- struct verify_rq_based_data v = {.sq_count = 0, .mq_count = 0};
958892 struct dm_target *tgt;
959893 struct list_head *devices = dm_table_get_devices(t);
960894 enum dm_queue_mode live_md_type = dm_get_md_type(t->md);
895
+ int page_size = PAGE_SIZE;
961896
962897 if (t->type != DM_TYPE_NONE) {
963898 /* target already set the table's type */
....@@ -966,7 +901,6 @@
966901 goto verify_bio_based;
967902 }
968903 BUG_ON(t->type == DM_TYPE_DAX_BIO_BASED);
969
- BUG_ON(t->type == DM_TYPE_NVME_BIO_BASED);
970904 goto verify_rq_based;
971905 }
972906
....@@ -1002,28 +936,15 @@
1002936 verify_bio_based:
1003937 /* We must use this table as bio-based */
1004938 t->type = DM_TYPE_BIO_BASED;
1005
- if (dm_table_supports_dax(t) ||
939
+ if (dm_table_supports_dax(t, device_not_dax_capable, &page_size) ||
1006940 (list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) {
1007941 t->type = DM_TYPE_DAX_BIO_BASED;
1008
- } else {
1009
- /* Check if upgrading to NVMe bio-based is valid or required */
1010
- tgt = dm_table_get_immutable_target(t);
1011
- if (tgt && !tgt->max_io_len && dm_table_does_not_support_partial_completion(t)) {
1012
- t->type = DM_TYPE_NVME_BIO_BASED;
1013
- goto verify_rq_based; /* must be stacked directly on NVMe (blk-mq) */
1014
- } else if (list_empty(devices) && live_md_type == DM_TYPE_NVME_BIO_BASED) {
1015
- t->type = DM_TYPE_NVME_BIO_BASED;
1016
- }
1017942 }
1018943 return 0;
1019944 }
1020945
1021946 BUG_ON(!request_based); /* No targets in this table */
1022947
1023
- /*
1024
- * The only way to establish DM_TYPE_MQ_REQUEST_BASED is by
1025
- * having a compatible target use dm_table_set_type.
1026
- */
1027948 t->type = DM_TYPE_REQUEST_BASED;
1028949
1029950 verify_rq_based:
....@@ -1034,8 +955,7 @@
1034955 * (e.g. request completion process for partial completion.)
1035956 */
1036957 if (t->num_targets > 1) {
1037
- DMERR("%s DM doesn't support multiple targets",
1038
- t->type == DM_TYPE_NVME_BIO_BASED ? "nvme bio-based" : "request-based");
958
+ DMERR("request-based DM doesn't support multiple targets");
1039959 return -EINVAL;
1040960 }
1041961
....@@ -1043,11 +963,9 @@
1043963 int srcu_idx;
1044964 struct dm_table *live_table = dm_get_live_table(t->md, &srcu_idx);
1045965
1046
- /* inherit live table's type and all_blk_mq */
1047
- if (live_table) {
966
+ /* inherit live table's type */
967
+ if (live_table)
1048968 t->type = live_table->type;
1049
- t->all_blk_mq = live_table->all_blk_mq;
1050
- }
1051969 dm_put_live_table(t->md, srcu_idx);
1052970 return 0;
1053971 }
....@@ -1063,19 +981,8 @@
1063981
1064982 /* Non-request-stackable devices can't be used for request-based dm */
1065983 if (!tgt->type->iterate_devices ||
1066
- !tgt->type->iterate_devices(tgt, device_is_rq_based, &v)) {
984
+ !tgt->type->iterate_devices(tgt, device_is_rq_stackable, NULL)) {
1067985 DMERR("table load rejected: including non-request-stackable devices");
1068
- return -EINVAL;
1069
- }
1070
- if (v.sq_count && v.mq_count) {
1071
- DMERR("table load rejected: not all devices are blk-mq request-stackable");
1072
- return -EINVAL;
1073
- }
1074
- t->all_blk_mq = v.mq_count > 0;
1075
-
1076
- if (!t->all_blk_mq &&
1077
- (t->type == DM_TYPE_MQ_REQUEST_BASED || t->type == DM_TYPE_NVME_BIO_BASED)) {
1078
- DMERR("table load rejected: all devices are not blk-mq request-stackable");
1079986 return -EINVAL;
1080987 }
1081988
....@@ -1124,11 +1031,6 @@
11241031 bool dm_table_request_based(struct dm_table *t)
11251032 {
11261033 return __table_type_request_based(dm_table_get_type(t));
1127
-}
1128
-
1129
-bool dm_table_all_blk_mq_devices(struct dm_table *t)
1130
-{
1131
- return t->all_blk_mq;
11321034 }
11331035
11341036 static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *md)
....@@ -1312,6 +1214,287 @@
13121214 return 0;
13131215 }
13141216
1217
+#ifdef CONFIG_BLK_INLINE_ENCRYPTION
1218
+
1219
+struct dm_keyslot_manager {
1220
+ struct blk_keyslot_manager ksm;
1221
+ struct mapped_device *md;
1222
+};
1223
+
1224
+struct dm_keyslot_evict_args {
1225
+ const struct blk_crypto_key *key;
1226
+ int err;
1227
+};
1228
+
1229
+static int dm_keyslot_evict_callback(struct dm_target *ti, struct dm_dev *dev,
1230
+ sector_t start, sector_t len, void *data)
1231
+{
1232
+ struct dm_keyslot_evict_args *args = data;
1233
+ int err;
1234
+
1235
+ err = blk_crypto_evict_key(bdev_get_queue(dev->bdev), args->key);
1236
+ if (!args->err)
1237
+ args->err = err;
1238
+ /* Always try to evict the key from all devices. */
1239
+ return 0;
1240
+}
1241
+
1242
+/*
1243
+ * When an inline encryption key is evicted from a device-mapper device, evict
1244
+ * it from all the underlying devices.
1245
+ */
1246
+static int dm_keyslot_evict(struct blk_keyslot_manager *ksm,
1247
+ const struct blk_crypto_key *key, unsigned int slot)
1248
+{
1249
+ struct dm_keyslot_manager *dksm = container_of(ksm,
1250
+ struct dm_keyslot_manager,
1251
+ ksm);
1252
+ struct mapped_device *md = dksm->md;
1253
+ struct dm_keyslot_evict_args args = { key };
1254
+ struct dm_table *t;
1255
+ int srcu_idx;
1256
+ int i;
1257
+ struct dm_target *ti;
1258
+
1259
+ t = dm_get_live_table(md, &srcu_idx);
1260
+ if (!t)
1261
+ return 0;
1262
+ for (i = 0; i < dm_table_get_num_targets(t); i++) {
1263
+ ti = dm_table_get_target(t, i);
1264
+ if (!ti->type->iterate_devices)
1265
+ continue;
1266
+ ti->type->iterate_devices(ti, dm_keyslot_evict_callback, &args);
1267
+ }
1268
+ dm_put_live_table(md, srcu_idx);
1269
+ return args.err;
1270
+}
1271
+
1272
+struct dm_derive_raw_secret_args {
1273
+ const u8 *wrapped_key;
1274
+ unsigned int wrapped_key_size;
1275
+ u8 *secret;
1276
+ unsigned int secret_size;
1277
+ int err;
1278
+};
1279
+
1280
+static int dm_derive_raw_secret_callback(struct dm_target *ti,
1281
+ struct dm_dev *dev, sector_t start,
1282
+ sector_t len, void *data)
1283
+{
1284
+ struct dm_derive_raw_secret_args *args = data;
1285
+ struct request_queue *q = bdev_get_queue(dev->bdev);
1286
+
1287
+ if (!args->err)
1288
+ return 0;
1289
+
1290
+ if (!q->ksm) {
1291
+ args->err = -EOPNOTSUPP;
1292
+ return 0;
1293
+ }
1294
+
1295
+ args->err = blk_ksm_derive_raw_secret(q->ksm, args->wrapped_key,
1296
+ args->wrapped_key_size,
1297
+ args->secret,
1298
+ args->secret_size);
1299
+ /* Try another device in case this fails. */
1300
+ return 0;
1301
+}
1302
+
1303
+/*
1304
+ * Retrieve the raw_secret from the underlying device. Given that only one
1305
+ * raw_secret can exist for a particular wrappedkey, retrieve it only from the
1306
+ * first device that supports derive_raw_secret().
1307
+ */
1308
+static int dm_derive_raw_secret(struct blk_keyslot_manager *ksm,
1309
+ const u8 *wrapped_key,
1310
+ unsigned int wrapped_key_size,
1311
+ u8 *secret, unsigned int secret_size)
1312
+{
1313
+ struct dm_keyslot_manager *dksm = container_of(ksm,
1314
+ struct dm_keyslot_manager,
1315
+ ksm);
1316
+ struct mapped_device *md = dksm->md;
1317
+ struct dm_derive_raw_secret_args args = {
1318
+ .wrapped_key = wrapped_key,
1319
+ .wrapped_key_size = wrapped_key_size,
1320
+ .secret = secret,
1321
+ .secret_size = secret_size,
1322
+ .err = -EOPNOTSUPP,
1323
+ };
1324
+ struct dm_table *t;
1325
+ int srcu_idx;
1326
+ int i;
1327
+ struct dm_target *ti;
1328
+
1329
+ t = dm_get_live_table(md, &srcu_idx);
1330
+ if (!t)
1331
+ return -EOPNOTSUPP;
1332
+ for (i = 0; i < dm_table_get_num_targets(t); i++) {
1333
+ ti = dm_table_get_target(t, i);
1334
+ if (!ti->type->iterate_devices)
1335
+ continue;
1336
+ ti->type->iterate_devices(ti, dm_derive_raw_secret_callback,
1337
+ &args);
1338
+ if (!args.err)
1339
+ break;
1340
+ }
1341
+ dm_put_live_table(md, srcu_idx);
1342
+ return args.err;
1343
+}
1344
+
1345
+
1346
+static struct blk_ksm_ll_ops dm_ksm_ll_ops = {
1347
+ .keyslot_evict = dm_keyslot_evict,
1348
+ .derive_raw_secret = dm_derive_raw_secret,
1349
+};
1350
+
1351
+static int device_intersect_crypto_modes(struct dm_target *ti,
1352
+ struct dm_dev *dev, sector_t start,
1353
+ sector_t len, void *data)
1354
+{
1355
+ struct blk_keyslot_manager *parent = data;
1356
+ struct blk_keyslot_manager *child = bdev_get_queue(dev->bdev)->ksm;
1357
+
1358
+ blk_ksm_intersect_modes(parent, child);
1359
+ return 0;
1360
+}
1361
+
1362
+void dm_destroy_keyslot_manager(struct blk_keyslot_manager *ksm)
1363
+{
1364
+ struct dm_keyslot_manager *dksm = container_of(ksm,
1365
+ struct dm_keyslot_manager,
1366
+ ksm);
1367
+
1368
+ if (!ksm)
1369
+ return;
1370
+
1371
+ blk_ksm_destroy(ksm);
1372
+ kfree(dksm);
1373
+}
1374
+
1375
+static void dm_table_destroy_keyslot_manager(struct dm_table *t)
1376
+{
1377
+ dm_destroy_keyslot_manager(t->ksm);
1378
+ t->ksm = NULL;
1379
+}
1380
+
1381
+/*
1382
+ * Constructs and initializes t->ksm with a keyslot manager that
1383
+ * represents the common set of crypto capabilities of the devices
1384
+ * described by the dm_table. However, if the constructed keyslot
1385
+ * manager does not support a superset of the crypto capabilities
1386
+ * supported by the current keyslot manager of the mapped_device,
1387
+ * it returns an error instead, since we don't support restricting
1388
+ * crypto capabilities on table changes. Finally, if the constructed
1389
+ * keyslot manager doesn't actually support any crypto modes at all,
1390
+ * it just returns NULL.
1391
+ */
1392
+static int dm_table_construct_keyslot_manager(struct dm_table *t)
1393
+{
1394
+ struct dm_keyslot_manager *dksm;
1395
+ struct blk_keyslot_manager *ksm;
1396
+ struct dm_target *ti;
1397
+ unsigned int i;
1398
+ bool ksm_is_empty = true;
1399
+
1400
+ dksm = kmalloc(sizeof(*dksm), GFP_KERNEL);
1401
+ if (!dksm)
1402
+ return -ENOMEM;
1403
+ dksm->md = t->md;
1404
+
1405
+ ksm = &dksm->ksm;
1406
+ blk_ksm_init_passthrough(ksm);
1407
+ ksm->ksm_ll_ops = dm_ksm_ll_ops;
1408
+ ksm->max_dun_bytes_supported = UINT_MAX;
1409
+ memset(ksm->crypto_modes_supported, 0xFF,
1410
+ sizeof(ksm->crypto_modes_supported));
1411
+ ksm->features = BLK_CRYPTO_FEATURE_STANDARD_KEYS |
1412
+ BLK_CRYPTO_FEATURE_WRAPPED_KEYS;
1413
+
1414
+ for (i = 0; i < dm_table_get_num_targets(t); i++) {
1415
+ ti = dm_table_get_target(t, i);
1416
+
1417
+ if (!dm_target_passes_crypto(ti->type)) {
1418
+ blk_ksm_intersect_modes(ksm, NULL);
1419
+ break;
1420
+ }
1421
+ if (!ti->type->iterate_devices)
1422
+ continue;
1423
+ ti->type->iterate_devices(ti, device_intersect_crypto_modes,
1424
+ ksm);
1425
+ }
1426
+
1427
+ if (t->md->queue && !blk_ksm_is_superset(ksm, t->md->queue->ksm)) {
1428
+ DMWARN("Inline encryption capabilities of new DM table were more restrictive than the old table's. This is not supported!");
1429
+ dm_destroy_keyslot_manager(ksm);
1430
+ return -EINVAL;
1431
+ }
1432
+
1433
+ /*
1434
+ * If the new KSM doesn't actually support any crypto modes, we may as
1435
+ * well represent it with a NULL ksm.
1436
+ */
1437
+ ksm_is_empty = true;
1438
+ for (i = 0; i < ARRAY_SIZE(ksm->crypto_modes_supported); i++) {
1439
+ if (ksm->crypto_modes_supported[i]) {
1440
+ ksm_is_empty = false;
1441
+ break;
1442
+ }
1443
+ }
1444
+
1445
+ if (ksm_is_empty) {
1446
+ dm_destroy_keyslot_manager(ksm);
1447
+ ksm = NULL;
1448
+ }
1449
+
1450
+ /*
1451
+ * t->ksm is only set temporarily while the table is being set
1452
+ * up, and it gets set to NULL after the capabilities have
1453
+ * been transferred to the request_queue.
1454
+ */
1455
+ t->ksm = ksm;
1456
+
1457
+ return 0;
1458
+}
1459
+
1460
+static void dm_update_keyslot_manager(struct request_queue *q,
1461
+ struct dm_table *t)
1462
+{
1463
+ if (!t->ksm)
1464
+ return;
1465
+
1466
+ /* Make the ksm less restrictive */
1467
+ if (!q->ksm) {
1468
+ blk_ksm_register(t->ksm, q);
1469
+ } else {
1470
+ blk_ksm_update_capabilities(q->ksm, t->ksm);
1471
+ dm_destroy_keyslot_manager(t->ksm);
1472
+ }
1473
+ t->ksm = NULL;
1474
+}
1475
+
1476
+#else /* CONFIG_BLK_INLINE_ENCRYPTION */
1477
+
1478
+static int dm_table_construct_keyslot_manager(struct dm_table *t)
1479
+{
1480
+ return 0;
1481
+}
1482
+
1483
+void dm_destroy_keyslot_manager(struct blk_keyslot_manager *ksm)
1484
+{
1485
+}
1486
+
1487
+static void dm_table_destroy_keyslot_manager(struct dm_table *t)
1488
+{
1489
+}
1490
+
1491
+static void dm_update_keyslot_manager(struct request_queue *q,
1492
+ struct dm_table *t)
1493
+{
1494
+}
1495
+
1496
+#endif /* !CONFIG_BLK_INLINE_ENCRYPTION */
1497
+
13151498 /*
13161499 * Prepares the table for use by building the indices,
13171500 * setting the type, and allocating mempools.
....@@ -1335,6 +1518,12 @@
13351518 r = dm_table_register_integrity(t);
13361519 if (r) {
13371520 DMERR("could not register integrity profile.");
1521
+ return r;
1522
+ }
1523
+
1524
+ r = dm_table_construct_keyslot_manager(t);
1525
+ if (r) {
1526
+ DMERR("could not construct keyslot manager.");
13381527 return r;
13391528 }
13401529
....@@ -1381,7 +1570,7 @@
13811570 /*
13821571 * Search the btree for the correct target.
13831572 *
1384
- * Caller should check returned pointer with dm_target_is_valid()
1573
+ * Caller should check returned pointer for NULL
13851574 * to trap I/O beyond end of device.
13861575 */
13871576 struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
....@@ -1390,7 +1579,7 @@
13901579 sector_t *node;
13911580
13921581 if (unlikely(sector >= dm_table_get_size(t)))
1393
- return &t->targets[t->num_targets];
1582
+ return NULL;
13941583
13951584 for (l = 0; l < t->depth; l++) {
13961585 n = get_child(n, k);
....@@ -1489,6 +1678,13 @@
14891678 return !q || blk_queue_zoned_model(q) != *zoned_model;
14901679 }
14911680
1681
+/*
1682
+ * Check the device zoned model based on the target feature flag. If the target
1683
+ * has the DM_TARGET_ZONED_HM feature flag set, host-managed zoned devices are
1684
+ * also accepted but all devices must have the same zoned model. If the target
1685
+ * has the DM_TARGET_MIXED_ZONED_MODEL feature set, the devices can have any
1686
+ * zoned model with all zoned devices having the same zone size.
1687
+ */
14921688 static bool dm_table_supports_zoned_model(struct dm_table *t,
14931689 enum blk_zoned_model zoned_model)
14941690 {
....@@ -1498,13 +1694,15 @@
14981694 for (i = 0; i < dm_table_get_num_targets(t); i++) {
14991695 ti = dm_table_get_target(t, i);
15001696
1501
- if (zoned_model == BLK_ZONED_HM &&
1502
- !dm_target_supports_zoned_hm(ti->type))
1503
- return false;
1504
-
1505
- if (!ti->type->iterate_devices ||
1506
- ti->type->iterate_devices(ti, device_not_zoned_model, &zoned_model))
1507
- return false;
1697
+ if (dm_target_supports_zoned_hm(ti->type)) {
1698
+ if (!ti->type->iterate_devices ||
1699
+ ti->type->iterate_devices(ti, device_not_zoned_model,
1700
+ &zoned_model))
1701
+ return false;
1702
+ } else if (!dm_target_supports_mixed_zoned_model(ti->type)) {
1703
+ if (zoned_model == BLK_ZONED_HM)
1704
+ return false;
1705
+ }
15081706 }
15091707
15101708 return true;
....@@ -1516,9 +1714,17 @@
15161714 struct request_queue *q = bdev_get_queue(dev->bdev);
15171715 unsigned int *zone_sectors = data;
15181716
1717
+ if (!blk_queue_is_zoned(q))
1718
+ return 0;
1719
+
15191720 return !q || blk_queue_zone_sectors(q) != *zone_sectors;
15201721 }
15211722
1723
+/*
1724
+ * Check consistency of zoned model and zone sectors across all targets. For
1725
+ * zone sectors, if the destination device is a zoned block device, it shall
1726
+ * have the specified zone_sectors.
1727
+ */
15221728 static int validate_hardware_zoned_model(struct dm_table *table,
15231729 enum blk_zoned_model zoned_model,
15241730 unsigned int zone_sectors)
....@@ -1537,7 +1743,7 @@
15371743 return -EINVAL;
15381744
15391745 if (dm_table_any_dev_attr(table, device_not_matches_zone_sectors, &zone_sectors)) {
1540
- DMERR("%s: zone sectors is not consistent across all devices",
1746
+ DMERR("%s: zone sectors is not consistent across all zoned devices",
15411747 dm_device_name(table->md));
15421748 return -EINVAL;
15431749 }
....@@ -1606,22 +1812,6 @@
16061812 dm_device_name(table->md),
16071813 (unsigned long long) ti->begin,
16081814 (unsigned long long) ti->len);
1609
-
1610
- /*
1611
- * FIXME: this should likely be moved to blk_stack_limits(), would
1612
- * also eliminate limits->zoned stacking hack in dm_set_device_limits()
1613
- */
1614
- if (limits->zoned == BLK_ZONED_NONE && ti_limits.zoned != BLK_ZONED_NONE) {
1615
- /*
1616
- * By default, the stacked limits zoned model is set to
1617
- * BLK_ZONED_NONE in blk_set_stacking_limits(). Update
1618
- * this model using the first target model reported
1619
- * that is not BLK_ZONED_NONE. This will be either the
1620
- * first target device zoned model or the model reported
1621
- * by the target .io_hints.
1622
- */
1623
- limits->zoned = ti_limits.zoned;
1624
- }
16251815 }
16261816
16271817 /*
....@@ -1674,54 +1864,6 @@
16741864 blk_integrity_unregister(dm_disk(t->md));
16751865 }
16761866 }
1677
-
1678
-#ifdef CONFIG_BLK_INLINE_ENCRYPTION
1679
-static int device_intersect_crypto_modes(struct dm_target *ti,
1680
- struct dm_dev *dev, sector_t start,
1681
- sector_t len, void *data)
1682
-{
1683
- struct keyslot_manager *parent = data;
1684
- struct keyslot_manager *child = bdev_get_queue(dev->bdev)->ksm;
1685
-
1686
- keyslot_manager_intersect_modes(parent, child);
1687
- return 0;
1688
-}
1689
-
1690
-/*
1691
- * Update the inline crypto modes supported by 'q->ksm' to be the intersection
1692
- * of the modes supported by all targets in the table.
1693
- *
1694
- * For any mode to be supported at all, all targets must have explicitly
1695
- * declared that they can pass through inline crypto support. For a particular
1696
- * mode to be supported, all underlying devices must also support it.
1697
- *
1698
- * Assume that 'q->ksm' initially declares all modes to be supported.
1699
- */
1700
-static void dm_calculate_supported_crypto_modes(struct dm_table *t,
1701
- struct request_queue *q)
1702
-{
1703
- struct dm_target *ti;
1704
- unsigned int i;
1705
-
1706
- for (i = 0; i < dm_table_get_num_targets(t); i++) {
1707
- ti = dm_table_get_target(t, i);
1708
-
1709
- if (!ti->may_passthrough_inline_crypto) {
1710
- keyslot_manager_intersect_modes(q->ksm, NULL);
1711
- return;
1712
- }
1713
- if (!ti->type->iterate_devices)
1714
- continue;
1715
- ti->type->iterate_devices(ti, device_intersect_crypto_modes,
1716
- q->ksm);
1717
- }
1718
-}
1719
-#else /* CONFIG_BLK_INLINE_ENCRYPTION */
1720
-static inline void dm_calculate_supported_crypto_modes(struct dm_table *t,
1721
- struct request_queue *q)
1722
-{
1723
-}
1724
-#endif /* !CONFIG_BLK_INLINE_ENCRYPTION */
17251867
17261868 static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev,
17271869 sector_t start, sector_t len, void *data)
....@@ -1790,28 +1932,6 @@
17901932 return q && !blk_queue_add_random(q);
17911933 }
17921934
1793
-static int queue_no_sg_merge(struct dm_target *ti, struct dm_dev *dev,
1794
- sector_t start, sector_t len, void *data)
1795
-{
1796
- struct request_queue *q = bdev_get_queue(dev->bdev);
1797
-
1798
- return q && test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags);
1799
-}
1800
-
1801
-static int device_is_partial_completion(struct dm_target *ti, struct dm_dev *dev,
1802
- sector_t start, sector_t len, void *data)
1803
-{
1804
- char b[BDEVNAME_SIZE];
1805
-
1806
- /* For now, NVMe devices are the only devices of this class */
1807
- return (strncmp(bdevname(dev->bdev, b), "nvme", 4) != 0);
1808
-}
1809
-
1810
-static bool dm_table_does_not_support_partial_completion(struct dm_table *t)
1811
-{
1812
- return !dm_table_any_dev_attr(t, device_is_partial_completion, NULL);
1813
-}
1814
-
18151935 static int device_not_write_same_capable(struct dm_target *ti, struct dm_dev *dev,
18161936 sector_t start, sector_t len, void *data)
18171937 {
....@@ -1860,6 +1980,33 @@
18601980
18611981 if (!ti->type->iterate_devices ||
18621982 ti->type->iterate_devices(ti, device_not_write_zeroes_capable, NULL))
1983
+ return false;
1984
+ }
1985
+
1986
+ return true;
1987
+}
1988
+
1989
+static int device_not_nowait_capable(struct dm_target *ti, struct dm_dev *dev,
1990
+ sector_t start, sector_t len, void *data)
1991
+{
1992
+ struct request_queue *q = bdev_get_queue(dev->bdev);
1993
+
1994
+ return q && !blk_queue_nowait(q);
1995
+}
1996
+
1997
+static bool dm_table_supports_nowait(struct dm_table *t)
1998
+{
1999
+ struct dm_target *ti;
2000
+ unsigned i = 0;
2001
+
2002
+ while (i < dm_table_get_num_targets(t)) {
2003
+ ti = dm_table_get_target(t, i++);
2004
+
2005
+ if (!dm_target_supports_nowait(ti->type))
2006
+ return false;
2007
+
2008
+ if (!ti->type->iterate_devices ||
2009
+ ti->type->iterate_devices(ti, device_not_nowait_capable, NULL))
18632010 return false;
18642011 }
18652012
....@@ -1933,18 +2080,24 @@
19332080 {
19342081 struct request_queue *q = bdev_get_queue(dev->bdev);
19352082
1936
- return q && bdi_cap_stable_pages_required(q->backing_dev_info);
2083
+ return q && blk_queue_stable_writes(q);
19372084 }
19382085
19392086 void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
19402087 struct queue_limits *limits)
19412088 {
19422089 bool wc = false, fua = false;
2090
+ int page_size = PAGE_SIZE;
19432091
19442092 /*
19452093 * Copy table's limits to the DM device's request_queue
19462094 */
19472095 q->limits = *limits;
2096
+
2097
+ if (dm_table_supports_nowait(t))
2098
+ blk_queue_flag_set(QUEUE_FLAG_NOWAIT, q);
2099
+ else
2100
+ blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, q);
19482101
19492102 if (!dm_table_supports_discards(t)) {
19502103 blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
....@@ -1967,8 +2120,11 @@
19672120 }
19682121 blk_queue_write_cache(q, wc, fua);
19692122
1970
- if (dm_table_supports_dax(t))
2123
+ if (dm_table_supports_dax(t, device_not_dax_capable, &page_size)) {
19712124 blk_queue_flag_set(QUEUE_FLAG_DAX, q);
2125
+ if (dm_table_supports_dax(t, device_not_dax_synchronous_capable, NULL))
2126
+ set_dax_synchronous(t->md->dax_dev);
2127
+ }
19722128 else
19732129 blk_queue_flag_clear(QUEUE_FLAG_DAX, q);
19742130
....@@ -1986,14 +2142,7 @@
19862142 if (!dm_table_supports_write_zeroes(t))
19872143 q->limits.max_write_zeroes_sectors = 0;
19882144
1989
- if (dm_table_any_dev_attr(t, queue_no_sg_merge, NULL))
1990
- blk_queue_flag_set(QUEUE_FLAG_NO_SG_MERGE, q);
1991
- else
1992
- blk_queue_flag_clear(QUEUE_FLAG_NO_SG_MERGE, q);
1993
-
19942145 dm_table_verify_integrity(t);
1995
-
1996
- dm_calculate_supported_crypto_modes(t, q);
19972146
19982147 /*
19992148 * Some devices don't use blk_integrity but still want stable pages
....@@ -2003,9 +2152,9 @@
20032152 * don't want error, zero, etc to require stable pages.
20042153 */
20052154 if (dm_table_any_dev_attr(t, device_requires_stable_pages, NULL))
2006
- q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES;
2155
+ blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q);
20072156 else
2008
- q->backing_dev_info->capabilities &= ~BDI_CAP_STABLE_WRITES;
2157
+ blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q);
20092158
20102159 /*
20112160 * Determine whether or not this queue's I/O timings contribute
....@@ -2017,8 +2166,20 @@
20172166 dm_table_any_dev_attr(t, device_is_not_random, NULL))
20182167 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
20192168
2020
- /* io_pages is used for readahead */
2021
- q->backing_dev_info->io_pages = limits->max_sectors >> (PAGE_SHIFT - 9);
2169
+ /*
2170
+ * For a zoned target, the number of zones should be updated for the
2171
+ * correct value to be exposed in sysfs queue/nr_zones. For a BIO based
2172
+ * target, this is all that is needed.
2173
+ */
2174
+#ifdef CONFIG_BLK_DEV_ZONED
2175
+ if (blk_queue_is_zoned(q)) {
2176
+ WARN_ON_ONCE(queue_is_mq(q));
2177
+ q->nr_zones = blkdev_nr_zones(t->md->disk);
2178
+ }
2179
+#endif
2180
+
2181
+ dm_update_keyslot_manager(q, t);
2182
+ blk_queue_update_readahead(q);
20222183 }
20232184
20242185 unsigned int dm_table_get_num_targets(struct dm_table *t)
....@@ -2123,64 +2284,25 @@
21232284 return 0;
21242285 }
21252286
2126
-void dm_table_add_target_callbacks(struct dm_table *t, struct dm_target_callbacks *cb)
2127
-{
2128
- list_add(&cb->list, &t->target_callbacks);
2129
-}
2130
-EXPORT_SYMBOL_GPL(dm_table_add_target_callbacks);
2131
-
2132
-int dm_table_any_congested(struct dm_table *t, int bdi_bits)
2133
-{
2134
- struct dm_dev_internal *dd;
2135
- struct list_head *devices = dm_table_get_devices(t);
2136
- struct dm_target_callbacks *cb;
2137
- int r = 0;
2138
-
2139
- list_for_each_entry(dd, devices, list) {
2140
- struct request_queue *q = bdev_get_queue(dd->dm_dev->bdev);
2141
- char b[BDEVNAME_SIZE];
2142
-
2143
- if (likely(q))
2144
- r |= bdi_congested(q->backing_dev_info, bdi_bits);
2145
- else
2146
- DMWARN_LIMIT("%s: any_congested: nonexistent device %s",
2147
- dm_device_name(t->md),
2148
- bdevname(dd->dm_dev->bdev, b));
2149
- }
2150
-
2151
- list_for_each_entry(cb, &t->target_callbacks, list)
2152
- if (cb->congested_fn)
2153
- r |= cb->congested_fn(cb, bdi_bits);
2154
-
2155
- return r;
2156
-}
2157
-
21582287 struct mapped_device *dm_table_get_md(struct dm_table *t)
21592288 {
21602289 return t->md;
21612290 }
21622291 EXPORT_SYMBOL(dm_table_get_md);
21632292
2293
+const char *dm_table_device_name(struct dm_table *t)
2294
+{
2295
+ return dm_device_name(t->md);
2296
+}
2297
+EXPORT_SYMBOL_GPL(dm_table_device_name);
2298
+
21642299 void dm_table_run_md_queue_async(struct dm_table *t)
21652300 {
2166
- struct mapped_device *md;
2167
- struct request_queue *queue;
2168
- unsigned long flags;
2169
-
21702301 if (!dm_table_request_based(t))
21712302 return;
21722303
2173
- md = dm_table_get_md(t);
2174
- queue = dm_get_md_queue(md);
2175
- if (queue) {
2176
- if (queue->mq_ops)
2177
- blk_mq_run_hw_queues(queue, true);
2178
- else {
2179
- spin_lock_irqsave(queue->queue_lock, flags);
2180
- blk_run_queue_async(queue);
2181
- spin_unlock_irqrestore(queue->queue_lock, flags);
2182
- }
2183
- }
2304
+ if (t->md->queue)
2305
+ blk_mq_run_hw_queues(t->md->queue, true);
21842306 }
21852307 EXPORT_SYMBOL(dm_table_run_md_queue_async);
21862308