hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/drivers/md/dm-integrity.c
....@@ -17,6 +17,7 @@
1717 #include <linux/rbtree.h>
1818 #include <linux/delay.h>
1919 #include <linux/random.h>
20
+#include <linux/reboot.h>
2021 #include <crypto/hash.h>
2122 #include <crypto/skcipher.h>
2223 #include <linux/async_tx.h>
....@@ -26,15 +27,19 @@
2627
2728 #define DEFAULT_INTERLEAVE_SECTORS 32768
2829 #define DEFAULT_JOURNAL_SIZE_FACTOR 7
30
+#define DEFAULT_SECTORS_PER_BITMAP_BIT 32768
2931 #define DEFAULT_BUFFER_SECTORS 128
3032 #define DEFAULT_JOURNAL_WATERMARK 50
3133 #define DEFAULT_SYNC_MSEC 10000
32
-#define DEFAULT_MAX_JOURNAL_SECTORS 131072
34
+#define DEFAULT_MAX_JOURNAL_SECTORS (IS_ENABLED(CONFIG_64BIT) ? 131072 : 8192)
3335 #define MIN_LOG2_INTERLEAVE_SECTORS 3
3436 #define MAX_LOG2_INTERLEAVE_SECTORS 31
3537 #define METADATA_WORKQUEUE_MAX_ACTIVE 16
36
-#define RECALC_SECTORS 8192
38
+#define RECALC_SECTORS (IS_ENABLED(CONFIG_64BIT) ? 32768 : 2048)
3739 #define RECALC_WRITE_SUPER 16
40
+#define BITMAP_BLOCK_SIZE 4096 /* don't change it */
41
+#define BITMAP_FLUSH_INTERVAL (10 * HZ)
42
+#define DISCARD_FILLER 0xf6
3843
3944 /*
4045 * Warning - DEBUG_PRINT prints security-sensitive data to the log,
....@@ -50,6 +55,8 @@
5055 #define SB_MAGIC "integrt"
5156 #define SB_VERSION_1 1
5257 #define SB_VERSION_2 2
58
+#define SB_VERSION_3 3
59
+#define SB_VERSION_4 4
5360 #define SB_SECTORS 8
5461 #define MAX_SECTORS_PER_BLOCK 8
5562
....@@ -62,12 +69,15 @@
6269 __u64 provided_data_sectors; /* userspace uses this value */
6370 __u32 flags;
6471 __u8 log2_sectors_per_block;
65
- __u8 pad[3];
72
+ __u8 log2_blocks_per_bitmap_bit;
73
+ __u8 pad[2];
6674 __u64 recalc_sector;
6775 };
6876
6977 #define SB_FLAG_HAVE_JOURNAL_MAC 0x1
7078 #define SB_FLAG_RECALCULATING 0x2
79
+#define SB_FLAG_DIRTY_BITMAP 0x4
80
+#define SB_FLAG_FIXED_PADDING 0x8
7181
7282 #define JOURNAL_ENTRY_ROUNDUP 8
7383
....@@ -82,7 +92,7 @@
8292 } s;
8393 __u64 sector;
8494 } u;
85
- commit_id_t last_bytes[0];
95
+ commit_id_t last_bytes[];
8696 /* __u8 tag[0]; */
8797 };
8898
....@@ -90,14 +100,10 @@
90100
91101 #if BITS_PER_LONG == 64
92102 #define journal_entry_set_sector(je, x) do { smp_wmb(); WRITE_ONCE((je)->u.sector, cpu_to_le64(x)); } while (0)
93
-#define journal_entry_get_sector(je) le64_to_cpu((je)->u.sector)
94
-#elif defined(CONFIG_LBDAF)
95
-#define journal_entry_set_sector(je, x) do { (je)->u.s.sector_lo = cpu_to_le32(x); smp_wmb(); WRITE_ONCE((je)->u.s.sector_hi, cpu_to_le32((x) >> 32)); } while (0)
96
-#define journal_entry_get_sector(je) le64_to_cpu((je)->u.sector)
97103 #else
98
-#define journal_entry_set_sector(je, x) do { (je)->u.s.sector_lo = cpu_to_le32(x); smp_wmb(); WRITE_ONCE((je)->u.s.sector_hi, cpu_to_le32(0)); } while (0)
99
-#define journal_entry_get_sector(je) le32_to_cpu((je)->u.s.sector_lo)
104
+#define journal_entry_set_sector(je, x) do { (je)->u.s.sector_lo = cpu_to_le32(x); smp_wmb(); WRITE_ONCE((je)->u.s.sector_hi, cpu_to_le32((x) >> 32)); } while (0)
100105 #endif
106
+#define journal_entry_get_sector(je) le64_to_cpu((je)->u.sector)
101107 #define journal_entry_is_unused(je) ((je)->u.s.sector_hi == cpu_to_le32(-1))
102108 #define journal_entry_set_unused(je) do { ((je)->u.s.sector_hi = cpu_to_le32(-1)); } while (0)
103109 #define journal_entry_is_inprogress(je) ((je)->u.s.sector_hi == cpu_to_le32(-2))
....@@ -157,9 +163,18 @@
157163 struct workqueue_struct *metadata_wq;
158164 struct superblock *sb;
159165 unsigned journal_pages;
166
+ unsigned n_bitmap_blocks;
167
+
160168 struct page_list *journal;
161169 struct page_list *journal_io;
162170 struct page_list *journal_xor;
171
+ struct page_list *recalc_bitmap;
172
+ struct page_list *may_write_bitmap;
173
+ struct bitmap_block_status *bbs;
174
+ unsigned bitmap_flush_interval;
175
+ int synchronous_mode;
176
+ struct bio_list synchronous_bios;
177
+ struct delayed_work bitmap_flush_work;
163178
164179 struct crypto_skcipher *journal_crypt;
165180 struct scatterlist **journal_scatterlist;
....@@ -186,6 +201,7 @@
186201 __s8 log2_metadata_run;
187202 __u8 log2_buffer_sectors;
188203 __u8 sectors_per_block;
204
+ __u8 log2_blocks_per_bitmap_bit;
189205
190206 unsigned char mode;
191207
....@@ -238,8 +254,12 @@
238254
239255 struct completion crypto_backoff;
240256
257
+ bool wrote_to_journal;
241258 bool journal_uptodate;
242259 bool just_formatted;
260
+ bool recalculate_flag;
261
+ bool discard;
262
+ bool fix_padding;
243263 bool legacy_recalculate;
244264
245265 struct alg_spec internal_hash_alg;
....@@ -247,11 +267,13 @@
247267 struct alg_spec journal_mac_alg;
248268
249269 atomic64_t number_of_mismatches;
270
+
271
+ struct notifier_block reboot_notifier;
250272 };
251273
252274 struct dm_integrity_range {
253275 sector_t logical_sector;
254
- unsigned n_sectors;
276
+ sector_t n_sectors;
255277 bool waiting;
256278 union {
257279 struct rb_node node;
....@@ -266,7 +288,7 @@
266288 struct work_struct work;
267289
268290 struct dm_integrity_c *ic;
269
- bool write;
291
+ enum req_opf op;
270292 bool fua;
271293
272294 struct dm_integrity_range range;
....@@ -291,6 +313,16 @@
291313 struct journal_io {
292314 struct dm_integrity_range range;
293315 struct journal_completion *comp;
316
+};
317
+
318
+struct bitmap_block_status {
319
+ struct work_struct work;
320
+ struct dm_integrity_c *ic;
321
+ unsigned idx;
322
+ unsigned long *bitmap;
323
+ struct bio_list bio_queue;
324
+ spinlock_t bio_queue_lock;
325
+
294326 };
295327
296328 static struct kmem_cache *journal_io_cache;
....@@ -320,6 +352,14 @@
320352 #define DEBUG_bytes(bytes, len, msg, ...) do { } while (0)
321353 #endif
322354
355
+static void dm_integrity_prepare(struct request *rq)
356
+{
357
+}
358
+
359
+static void dm_integrity_complete(struct request *rq, unsigned int nr_bytes)
360
+{
361
+}
362
+
323363 /*
324364 * DM Integrity profile, protection is performed layer above (dm-crypt)
325365 */
....@@ -327,6 +367,8 @@
327367 .name = "DM-DIF-EXT-TAG",
328368 .generate_fn = NULL,
329369 .verify_fn = NULL,
370
+ .prepare_fn = dm_integrity_prepare,
371
+ .complete_fn = dm_integrity_complete,
330372 };
331373
332374 static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map);
....@@ -436,7 +478,11 @@
436478
437479 static void sb_set_version(struct dm_integrity_c *ic)
438480 {
439
- if (ic->meta_dev || ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))
481
+ if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING))
482
+ ic->sb->version = SB_VERSION_4;
483
+ else if (ic->mode == 'B' || ic->sb->flags & cpu_to_le32(SB_FLAG_DIRTY_BITMAP))
484
+ ic->sb->version = SB_VERSION_3;
485
+ else if (ic->meta_dev || ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))
440486 ic->sb->version = SB_VERSION_2;
441487 else
442488 ic->sb->version = SB_VERSION_1;
....@@ -457,7 +503,141 @@
457503 io_loc.sector = ic->start;
458504 io_loc.count = SB_SECTORS;
459505
506
+ if (op == REQ_OP_WRITE)
507
+ sb_set_version(ic);
508
+
460509 return dm_io(&io_req, 1, &io_loc, NULL);
510
+}
511
+
512
+#define BITMAP_OP_TEST_ALL_SET 0
513
+#define BITMAP_OP_TEST_ALL_CLEAR 1
514
+#define BITMAP_OP_SET 2
515
+#define BITMAP_OP_CLEAR 3
516
+
517
+static bool block_bitmap_op(struct dm_integrity_c *ic, struct page_list *bitmap,
518
+ sector_t sector, sector_t n_sectors, int mode)
519
+{
520
+ unsigned long bit, end_bit, this_end_bit, page, end_page;
521
+ unsigned long *data;
522
+
523
+ if (unlikely(((sector | n_sectors) & ((1 << ic->sb->log2_sectors_per_block) - 1)) != 0)) {
524
+ DMCRIT("invalid bitmap access (%llx,%llx,%d,%d,%d)",
525
+ sector,
526
+ n_sectors,
527
+ ic->sb->log2_sectors_per_block,
528
+ ic->log2_blocks_per_bitmap_bit,
529
+ mode);
530
+ BUG();
531
+ }
532
+
533
+ if (unlikely(!n_sectors))
534
+ return true;
535
+
536
+ bit = sector >> (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
537
+ end_bit = (sector + n_sectors - 1) >>
538
+ (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
539
+
540
+ page = bit / (PAGE_SIZE * 8);
541
+ bit %= PAGE_SIZE * 8;
542
+
543
+ end_page = end_bit / (PAGE_SIZE * 8);
544
+ end_bit %= PAGE_SIZE * 8;
545
+
546
+repeat:
547
+ if (page < end_page) {
548
+ this_end_bit = PAGE_SIZE * 8 - 1;
549
+ } else {
550
+ this_end_bit = end_bit;
551
+ }
552
+
553
+ data = lowmem_page_address(bitmap[page].page);
554
+
555
+ if (mode == BITMAP_OP_TEST_ALL_SET) {
556
+ while (bit <= this_end_bit) {
557
+ if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) {
558
+ do {
559
+ if (data[bit / BITS_PER_LONG] != -1)
560
+ return false;
561
+ bit += BITS_PER_LONG;
562
+ } while (this_end_bit >= bit + BITS_PER_LONG - 1);
563
+ continue;
564
+ }
565
+ if (!test_bit(bit, data))
566
+ return false;
567
+ bit++;
568
+ }
569
+ } else if (mode == BITMAP_OP_TEST_ALL_CLEAR) {
570
+ while (bit <= this_end_bit) {
571
+ if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) {
572
+ do {
573
+ if (data[bit / BITS_PER_LONG] != 0)
574
+ return false;
575
+ bit += BITS_PER_LONG;
576
+ } while (this_end_bit >= bit + BITS_PER_LONG - 1);
577
+ continue;
578
+ }
579
+ if (test_bit(bit, data))
580
+ return false;
581
+ bit++;
582
+ }
583
+ } else if (mode == BITMAP_OP_SET) {
584
+ while (bit <= this_end_bit) {
585
+ if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) {
586
+ do {
587
+ data[bit / BITS_PER_LONG] = -1;
588
+ bit += BITS_PER_LONG;
589
+ } while (this_end_bit >= bit + BITS_PER_LONG - 1);
590
+ continue;
591
+ }
592
+ __set_bit(bit, data);
593
+ bit++;
594
+ }
595
+ } else if (mode == BITMAP_OP_CLEAR) {
596
+ if (!bit && this_end_bit == PAGE_SIZE * 8 - 1)
597
+ clear_page(data);
598
+ else while (bit <= this_end_bit) {
599
+ if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) {
600
+ do {
601
+ data[bit / BITS_PER_LONG] = 0;
602
+ bit += BITS_PER_LONG;
603
+ } while (this_end_bit >= bit + BITS_PER_LONG - 1);
604
+ continue;
605
+ }
606
+ __clear_bit(bit, data);
607
+ bit++;
608
+ }
609
+ } else {
610
+ BUG();
611
+ }
612
+
613
+ if (unlikely(page < end_page)) {
614
+ bit = 0;
615
+ page++;
616
+ goto repeat;
617
+ }
618
+
619
+ return true;
620
+}
621
+
622
+static void block_bitmap_copy(struct dm_integrity_c *ic, struct page_list *dst, struct page_list *src)
623
+{
624
+ unsigned n_bitmap_pages = DIV_ROUND_UP(ic->n_bitmap_blocks, PAGE_SIZE / BITMAP_BLOCK_SIZE);
625
+ unsigned i;
626
+
627
+ for (i = 0; i < n_bitmap_pages; i++) {
628
+ unsigned long *dst_data = lowmem_page_address(dst[i].page);
629
+ unsigned long *src_data = lowmem_page_address(src[i].page);
630
+ copy_page(dst_data, src_data);
631
+ }
632
+}
633
+
634
+static struct bitmap_block_status *sector_to_bitmap_block(struct dm_integrity_c *ic, sector_t sector)
635
+{
636
+ unsigned bit = sector >> (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
637
+ unsigned bitmap_block = bit / (BITMAP_BLOCK_SIZE * 8);
638
+
639
+ BUG_ON(bitmap_block >= ic->n_bitmap_blocks);
640
+ return &ic->bbs[bitmap_block];
461641 }
462642
463643 static void access_journal_check(struct dm_integrity_c *ic, unsigned section, unsigned offset,
....@@ -468,8 +648,8 @@
468648
469649 if (unlikely(section >= ic->journal_sections) ||
470650 unlikely(offset >= limit)) {
471
- printk(KERN_CRIT "%s: invalid access at (%u,%u), limit (%u,%u)\n",
472
- function, section, offset, ic->journal_sections, limit);
651
+ DMCRIT("%s: invalid access at (%u,%u), limit (%u,%u)",
652
+ function, section, offset, ic->journal_sections, limit);
473653 BUG();
474654 }
475655 #endif
....@@ -541,7 +721,6 @@
541721 unsigned j, size;
542722
543723 desc->tfm = ic->journal_mac;
544
- desc->flags = 0;
545724
546725 r = crypto_shash_init(desc);
547726 if (unlikely(r)) {
....@@ -568,7 +747,12 @@
568747 }
569748 memset(result + size, 0, JOURNAL_MAC_SIZE - size);
570749 } else {
571
- __u8 digest[size];
750
+ __u8 digest[HASH_MAX_DIGESTSIZE];
751
+
752
+ if (WARN_ON(size > sizeof(digest))) {
753
+ dm_integrity_io_error(ic, "digest_size", -EINVAL);
754
+ goto err;
755
+ }
572756 r = crypto_shash_final(desc, digest);
573757 if (unlikely(r)) {
574758 dm_integrity_io_error(ic, "crypto_shash_final", r);
....@@ -765,12 +949,12 @@
765949 complete_journal_op(comp);
766950 }
767951
768
-static void rw_journal(struct dm_integrity_c *ic, int op, int op_flags, unsigned section,
769
- unsigned n_sections, struct journal_completion *comp)
952
+static void rw_journal_sectors(struct dm_integrity_c *ic, int op, int op_flags,
953
+ unsigned sector, unsigned n_sectors, struct journal_completion *comp)
770954 {
771955 struct dm_io_request io_req;
772956 struct dm_io_region io_loc;
773
- unsigned sector, n_sectors, pl_index, pl_offset;
957
+ unsigned pl_index, pl_offset;
774958 int r;
775959
776960 if (unlikely(dm_integrity_failed(ic))) {
....@@ -778,9 +962,6 @@
778962 complete_journal_io(-1UL, comp);
779963 return;
780964 }
781
-
782
- sector = section * ic->journal_section_sectors;
783
- n_sectors = n_sections * ic->journal_section_sectors;
784965
785966 pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT);
786967 pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1);
....@@ -812,6 +993,17 @@
812993 complete_journal_io(-1UL, comp);
813994 }
814995 }
996
+}
997
+
998
+static void rw_journal(struct dm_integrity_c *ic, int op, int op_flags, unsigned section,
999
+ unsigned n_sections, struct journal_completion *comp)
1000
+{
1001
+ unsigned sector, n_sectors;
1002
+
1003
+ sector = section * ic->journal_section_sectors;
1004
+ n_sectors = n_sections * ic->journal_section_sectors;
1005
+
1006
+ rw_journal_sectors(ic, op, op_flags, sector, n_sectors, comp);
8151007 }
8161008
8171009 static void write_journal(struct dm_integrity_c *ic, unsigned commit_start, unsigned commit_sections)
....@@ -997,6 +1189,12 @@
9971189 } while (unlikely(new_range->waiting));
9981190 }
9991191
1192
+static void add_new_range_and_wait(struct dm_integrity_c *ic, struct dm_integrity_range *new_range)
1193
+{
1194
+ if (unlikely(!add_new_range(ic, new_range, true)))
1195
+ wait_and_add_new_range(ic, new_range);
1196
+}
1197
+
10001198 static void init_journal_node(struct journal_node *node)
10011199 {
10021200 RB_CLEAR_NODE(&node->node);
....@@ -1113,6 +1311,11 @@
11131311 static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, sector_t *metadata_block,
11141312 unsigned *metadata_offset, unsigned total_size, int op)
11151313 {
1314
+#define MAY_BE_FILLER 1
1315
+#define MAY_BE_HASH 2
1316
+ unsigned hash_offset = 0;
1317
+ unsigned may_be = MAY_BE_HASH | (ic->discard ? MAY_BE_FILLER : 0);
1318
+
11161319 do {
11171320 unsigned char *data, *dp;
11181321 struct dm_buffer *b;
....@@ -1124,7 +1327,7 @@
11241327 return r;
11251328
11261329 data = dm_bufio_read(ic->bufio, *metadata_block, &b);
1127
- if (unlikely(IS_ERR(data)))
1330
+ if (IS_ERR(data))
11281331 return PTR_ERR(data);
11291332
11301333 to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - *metadata_offset, total_size);
....@@ -1134,18 +1337,35 @@
11341337 } else if (op == TAG_WRITE) {
11351338 memcpy(dp, tag, to_copy);
11361339 dm_bufio_mark_partial_buffer_dirty(b, *metadata_offset, *metadata_offset + to_copy);
1137
- } else {
1340
+ } else {
11381341 /* e.g.: op == TAG_CMP */
1139
- if (unlikely(memcmp(dp, tag, to_copy))) {
1140
- unsigned i;
11411342
1142
- for (i = 0; i < to_copy; i++) {
1143
- if (dp[i] != tag[i])
1144
- break;
1145
- total_size--;
1343
+ if (likely(is_power_of_2(ic->tag_size))) {
1344
+ if (unlikely(memcmp(dp, tag, to_copy)))
1345
+ if (unlikely(!ic->discard) ||
1346
+ unlikely(memchr_inv(dp, DISCARD_FILLER, to_copy) != NULL)) {
1347
+ goto thorough_test;
11461348 }
1147
- dm_bufio_release(b);
1148
- return total_size;
1349
+ } else {
1350
+ unsigned i, ts;
1351
+thorough_test:
1352
+ ts = total_size;
1353
+
1354
+ for (i = 0; i < to_copy; i++, ts--) {
1355
+ if (unlikely(dp[i] != tag[i]))
1356
+ may_be &= ~MAY_BE_HASH;
1357
+ if (likely(dp[i] != DISCARD_FILLER))
1358
+ may_be &= ~MAY_BE_FILLER;
1359
+ hash_offset++;
1360
+ if (unlikely(hash_offset == ic->tag_size)) {
1361
+ if (unlikely(!may_be)) {
1362
+ dm_bufio_release(b);
1363
+ return ts;
1364
+ }
1365
+ hash_offset = 0;
1366
+ may_be = MAY_BE_HASH | (ic->discard ? MAY_BE_FILLER : 0);
1367
+ }
1368
+ }
11491369 }
11501370 }
11511371 dm_bufio_release(b);
....@@ -1156,10 +1376,17 @@
11561376 (*metadata_block)++;
11571377 *metadata_offset = 0;
11581378 }
1379
+
1380
+ if (unlikely(!is_power_of_2(ic->tag_size))) {
1381
+ hash_offset = (hash_offset + to_copy) % ic->tag_size;
1382
+ }
1383
+
11591384 total_size -= to_copy;
11601385 } while (unlikely(total_size));
11611386
11621387 return 0;
1388
+#undef MAY_BE_FILLER
1389
+#undef MAY_BE_HASH
11631390 }
11641391
11651392 struct flush_request {
....@@ -1253,6 +1480,14 @@
12531480 int r = dm_integrity_failed(ic);
12541481 if (unlikely(r) && !bio->bi_status)
12551482 bio->bi_status = errno_to_blk_status(r);
1483
+ if (unlikely(ic->synchronous_mode) && bio_op(bio) == REQ_OP_WRITE) {
1484
+ unsigned long flags;
1485
+ spin_lock_irqsave(&ic->endio_wait.lock, flags);
1486
+ bio_list_add(&ic->synchronous_bios, bio);
1487
+ queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, 0);
1488
+ spin_unlock_irqrestore(&ic->endio_wait.lock, flags);
1489
+ return;
1490
+ }
12561491 bio_endio(bio);
12571492 }
12581493
....@@ -1274,7 +1509,7 @@
12741509
12751510 remove_range(ic, &dio->range);
12761511
1277
- if (unlikely(dio->write))
1512
+ if (dio->op == REQ_OP_WRITE || unlikely(dio->op == REQ_OP_DISCARD))
12781513 schedule_autocommit(ic);
12791514
12801515 bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
....@@ -1315,7 +1550,6 @@
13151550 unsigned digest_size;
13161551
13171552 req->tfm = ic->internal_hash;
1318
- req->flags = 0;
13191553
13201554 r = crypto_shash_init(req);
13211555 if (unlikely(r < 0)) {
....@@ -1366,17 +1600,61 @@
13661600 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
13671601 char *checksums;
13681602 unsigned extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0;
1369
- char checksums_onstack[ic->tag_size + extra_space];
1370
- unsigned sectors_to_process = dio->range.n_sectors;
1371
- sector_t sector = dio->range.logical_sector;
1603
+ char checksums_onstack[max((size_t)HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)];
1604
+ sector_t sector;
1605
+ unsigned sectors_to_process;
13721606
13731607 if (unlikely(ic->mode == 'R'))
13741608 goto skip_io;
13751609
1376
- checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
1377
- GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
1378
- if (!checksums)
1610
+ if (likely(dio->op != REQ_OP_DISCARD))
1611
+ checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
1612
+ GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
1613
+ else
1614
+ checksums = kmalloc(PAGE_SIZE, GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
1615
+ if (!checksums) {
13791616 checksums = checksums_onstack;
1617
+ if (WARN_ON(extra_space &&
1618
+ digest_size > sizeof(checksums_onstack))) {
1619
+ r = -EINVAL;
1620
+ goto error;
1621
+ }
1622
+ }
1623
+
1624
+ if (unlikely(dio->op == REQ_OP_DISCARD)) {
1625
+ sector_t bi_sector = dio->bio_details.bi_iter.bi_sector;
1626
+ unsigned bi_size = dio->bio_details.bi_iter.bi_size;
1627
+ unsigned max_size = likely(checksums != checksums_onstack) ? PAGE_SIZE : HASH_MAX_DIGESTSIZE;
1628
+ unsigned max_blocks = max_size / ic->tag_size;
1629
+ memset(checksums, DISCARD_FILLER, max_size);
1630
+
1631
+ while (bi_size) {
1632
+ unsigned this_step_blocks = bi_size >> (SECTOR_SHIFT + ic->sb->log2_sectors_per_block);
1633
+ this_step_blocks = min(this_step_blocks, max_blocks);
1634
+ r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
1635
+ this_step_blocks * ic->tag_size, TAG_WRITE);
1636
+ if (unlikely(r)) {
1637
+ if (likely(checksums != checksums_onstack))
1638
+ kfree(checksums);
1639
+ goto error;
1640
+ }
1641
+
1642
+ /*if (bi_size < this_step_blocks << (SECTOR_SHIFT + ic->sb->log2_sectors_per_block)) {
1643
+ printk("BUGG: bi_sector: %llx, bi_size: %u\n", bi_sector, bi_size);
1644
+ printk("BUGG: this_step_blocks: %u\n", this_step_blocks);
1645
+ BUG();
1646
+ }*/
1647
+ bi_size -= this_step_blocks << (SECTOR_SHIFT + ic->sb->log2_sectors_per_block);
1648
+ bi_sector += this_step_blocks << ic->sb->log2_sectors_per_block;
1649
+ }
1650
+
1651
+ if (likely(checksums != checksums_onstack))
1652
+ kfree(checksums);
1653
+ goto skip_io;
1654
+ }
1655
+
1656
+ sector = dio->range.logical_sector;
1657
+ sectors_to_process = dio->range.n_sectors;
13801658
13811659 __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) {
13821660 unsigned pos;
....@@ -1396,11 +1674,12 @@
13961674 kunmap_atomic(mem);
13971675
13981676 r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
1399
- checksums_ptr - checksums, !dio->write ? TAG_CMP : TAG_WRITE);
1677
+ checksums_ptr - checksums, dio->op == REQ_OP_READ ? TAG_CMP : TAG_WRITE);
14001678 if (unlikely(r)) {
14011679 if (r > 0) {
1402
- DMERR_LIMIT("Checksum failed at sector 0x%llx",
1403
- (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
1680
+ char b[BDEVNAME_SIZE];
1681
+ DMERR_LIMIT("%s: Checksum failed at sector 0x%llx", bio_devname(bio, b),
1682
+ (sector - ((r + ic->tag_size - 1) / ic->tag_size)));
14041683 r = -EILSEQ;
14051684 atomic64_inc(&ic->number_of_mismatches);
14061685 }
....@@ -1439,7 +1718,7 @@
14391718 tag = lowmem_page_address(biv.bv_page) + biv.bv_offset;
14401719 this_len = min(biv.bv_len, data_to_process);
14411720 r = dm_integrity_rw_tag(ic, tag, &dio->metadata_block, &dio->metadata_offset,
1442
- this_len, !dio->write ? TAG_READ : TAG_WRITE);
1721
+ this_len, dio->op == REQ_OP_READ ? TAG_READ : TAG_WRITE);
14431722 if (unlikely(r))
14441723 goto error;
14451724 data_to_process -= this_len;
....@@ -1466,6 +1745,20 @@
14661745
14671746 dio->ic = ic;
14681747 dio->bi_status = 0;
1748
+ dio->op = bio_op(bio);
1749
+
1750
+ if (unlikely(dio->op == REQ_OP_DISCARD)) {
1751
+ if (ti->max_io_len) {
1752
+ sector_t sec = dm_target_offset(ti, bio->bi_iter.bi_sector);
1753
+ unsigned log2_max_io_len = __fls(ti->max_io_len);
1754
+ sector_t start_boundary = sec >> log2_max_io_len;
1755
+ sector_t end_boundary = (sec + bio_sectors(bio) - 1) >> log2_max_io_len;
1756
+ if (start_boundary < end_boundary) {
1757
+ sector_t len = ti->max_io_len - (sec & (ti->max_io_len - 1));
1758
+ dm_accept_partial_bio(bio, len);
1759
+ }
1760
+ }
1761
+ }
14691762
14701763 if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
14711764 submit_flush_bio(ic, dio);
....@@ -1473,8 +1766,7 @@
14731766 }
14741767
14751768 dio->range.logical_sector = dm_target_offset(ti, bio->bi_iter.bi_sector);
1476
- dio->write = bio_op(bio) == REQ_OP_WRITE;
1477
- dio->fua = dio->write && bio->bi_opf & REQ_FUA;
1769
+ dio->fua = dio->op == REQ_OP_WRITE && bio->bi_opf & REQ_FUA;
14781770 if (unlikely(dio->fua)) {
14791771 /*
14801772 * Don't pass down the FUA flag because we have to flush
....@@ -1484,18 +1776,18 @@
14841776 }
14851777 if (unlikely(dio->range.logical_sector + bio_sectors(bio) > ic->provided_data_sectors)) {
14861778 DMERR("Too big sector number: 0x%llx + 0x%x > 0x%llx",
1487
- (unsigned long long)dio->range.logical_sector, bio_sectors(bio),
1488
- (unsigned long long)ic->provided_data_sectors);
1779
+ dio->range.logical_sector, bio_sectors(bio),
1780
+ ic->provided_data_sectors);
14891781 return DM_MAPIO_KILL;
14901782 }
14911783 if (unlikely((dio->range.logical_sector | bio_sectors(bio)) & (unsigned)(ic->sectors_per_block - 1))) {
14921784 DMERR("Bio not aligned on %u sectors: 0x%llx, 0x%x",
14931785 ic->sectors_per_block,
1494
- (unsigned long long)dio->range.logical_sector, bio_sectors(bio));
1786
+ dio->range.logical_sector, bio_sectors(bio));
14951787 return DM_MAPIO_KILL;
14961788 }
14971789
1498
- if (ic->sectors_per_block > 1) {
1790
+ if (ic->sectors_per_block > 1 && likely(dio->op != REQ_OP_DISCARD)) {
14991791 struct bvec_iter iter;
15001792 struct bio_vec bv;
15011793 bio_for_each_segment(bv, bio, iter) {
....@@ -1516,7 +1808,8 @@
15161808 else
15171809 wanted_tag_size *= ic->tag_size;
15181810 if (unlikely(wanted_tag_size != bip->bip_iter.bi_size)) {
1519
- DMERR("Invalid integrity data size %u, expected %u", bip->bip_iter.bi_size, wanted_tag_size);
1811
+ DMERR("Invalid integrity data size %u, expected %u",
1812
+ bip->bip_iter.bi_size, wanted_tag_size);
15201813 return DM_MAPIO_KILL;
15211814 }
15221815 }
....@@ -1527,7 +1820,7 @@
15271820 }
15281821 }
15291822
1530
- if (unlikely(ic->mode == 'R') && unlikely(dio->write))
1823
+ if (unlikely(ic->mode == 'R') && unlikely(dio->op != REQ_OP_READ))
15311824 return DM_MAPIO_KILL;
15321825
15331826 get_area_and_offset(ic, dio->range.logical_sector, &area, &offset);
....@@ -1557,13 +1850,13 @@
15571850 bio_advance_iter(bio, &bio->bi_iter, bv.bv_len);
15581851 retry_kmap:
15591852 mem = kmap_atomic(bv.bv_page);
1560
- if (likely(dio->write))
1853
+ if (likely(dio->op == REQ_OP_WRITE))
15611854 flush_dcache_page(bv.bv_page);
15621855
15631856 do {
15641857 struct journal_entry *je = access_journal_entry(ic, journal_section, journal_entry);
15651858
1566
- if (unlikely(!dio->write)) {
1859
+ if (unlikely(dio->op == REQ_OP_READ)) {
15671860 struct journal_sector *js;
15681861 char *mem_ptr;
15691862 unsigned s;
....@@ -1588,12 +1881,12 @@
15881881 } while (++s < ic->sectors_per_block);
15891882 #ifdef INTERNAL_VERIFY
15901883 if (ic->internal_hash) {
1591
- char checksums_onstack[max(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)];
1884
+ char checksums_onstack[max((size_t)HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)];
15921885
15931886 integrity_sector_checksum(ic, logical_sector, mem + bv.bv_offset, checksums_onstack);
15941887 if (unlikely(memcmp(checksums_onstack, journal_entry_tag(ic, je), ic->tag_size))) {
15951888 DMERR_LIMIT("Checksum failed when reading from journal, at sector 0x%llx",
1596
- (unsigned long long)logical_sector);
1889
+ logical_sector);
15971890 }
15981891 }
15991892 #endif
....@@ -1610,7 +1903,7 @@
16101903 char *tag_addr;
16111904 BUG_ON(PageHighMem(biv.bv_page));
16121905 tag_addr = lowmem_page_address(biv.bv_page) + biv.bv_offset;
1613
- if (likely(dio->write))
1906
+ if (likely(dio->op == REQ_OP_WRITE))
16141907 memcpy(tag_ptr, tag_addr, tag_now);
16151908 else
16161909 memcpy(tag_addr, tag_ptr, tag_now);
....@@ -1618,12 +1911,12 @@
16181911 tag_ptr += tag_now;
16191912 tag_todo -= tag_now;
16201913 } while (unlikely(tag_todo)); else {
1621
- if (likely(dio->write))
1914
+ if (likely(dio->op == REQ_OP_WRITE))
16221915 memset(tag_ptr, 0, tag_todo);
16231916 }
16241917 }
16251918
1626
- if (likely(dio->write)) {
1919
+ if (likely(dio->op == REQ_OP_WRITE)) {
16271920 struct journal_sector *js;
16281921 unsigned s;
16291922
....@@ -1638,7 +1931,7 @@
16381931 if (ic->internal_hash) {
16391932 unsigned digest_size = crypto_shash_digestsize(ic->internal_hash);
16401933 if (unlikely(digest_size > ic->tag_size)) {
1641
- char checksums_onstack[digest_size];
1934
+ char checksums_onstack[HASH_MAX_DIGESTSIZE];
16421935 integrity_sector_checksum(ic, logical_sector, (char *)js, checksums_onstack);
16431936 memcpy(journal_entry_tag(ic, je), checksums_onstack, ic->tag_size);
16441937 } else
....@@ -1659,12 +1952,12 @@
16591952 bv.bv_offset += ic->sectors_per_block << SECTOR_SHIFT;
16601953 } while (bv.bv_len -= ic->sectors_per_block << SECTOR_SHIFT);
16611954
1662
- if (unlikely(!dio->write))
1955
+ if (unlikely(dio->op == REQ_OP_READ))
16631956 flush_dcache_page(bv.bv_page);
16641957 kunmap_atomic(mem);
16651958 } while (n_sectors);
16661959
1667
- if (likely(dio->write)) {
1960
+ if (likely(dio->op == REQ_OP_WRITE)) {
16681961 smp_mb();
16691962 if (unlikely(waitqueue_active(&ic->copy_to_journal_wait)))
16701963 wake_up(&ic->copy_to_journal_wait);
....@@ -1696,7 +1989,10 @@
16961989 unsigned journal_section, journal_entry;
16971990 unsigned journal_read_pos;
16981991 struct completion read_comp;
1699
- bool need_sync_io = ic->internal_hash && !dio->write;
1992
+ bool discard_retried = false;
1993
+ bool need_sync_io = ic->internal_hash && dio->op == REQ_OP_READ;
1994
+ if (unlikely(dio->op == REQ_OP_DISCARD) && ic->mode != 'D')
1995
+ need_sync_io = true;
17001996
17011997 if (need_sync_io && from_map) {
17021998 INIT_WORK(&dio->work, integrity_bio_wait);
....@@ -1714,13 +2010,13 @@
17142010 }
17152011 dio->range.n_sectors = bio_sectors(bio);
17162012 journal_read_pos = NOT_FOUND;
1717
- if (likely(ic->mode == 'J')) {
1718
- if (dio->write) {
2013
+ if (ic->mode == 'J' && likely(dio->op != REQ_OP_DISCARD)) {
2014
+ if (dio->op == REQ_OP_WRITE) {
17192015 unsigned next_entry, i, pos;
17202016 unsigned ws, we, range_sectors;
17212017
17222018 dio->range.n_sectors = min(dio->range.n_sectors,
1723
- ic->free_sectors << ic->sb->log2_sectors_per_block);
2019
+ (sector_t)ic->free_sectors << ic->sb->log2_sectors_per_block);
17242020 if (unlikely(!dio->range.n_sectors)) {
17252021 if (from_map)
17262022 goto offload_to_thread;
....@@ -1810,12 +2106,41 @@
18102106 }
18112107 }
18122108 }
2109
+ if (ic->mode == 'J' && likely(dio->op == REQ_OP_DISCARD) && !discard_retried) {
2110
+ sector_t next_sector;
2111
+ unsigned new_pos = find_journal_node(ic, dio->range.logical_sector, &next_sector);
2112
+ if (unlikely(new_pos != NOT_FOUND) ||
2113
+ unlikely(next_sector < dio->range.logical_sector - dio->range.n_sectors)) {
2114
+ remove_range_unlocked(ic, &dio->range);
2115
+ spin_unlock_irq(&ic->endio_wait.lock);
2116
+ queue_work(ic->commit_wq, &ic->commit_work);
2117
+ flush_workqueue(ic->commit_wq);
2118
+ queue_work(ic->writer_wq, &ic->writer_work);
2119
+ flush_workqueue(ic->writer_wq);
2120
+ discard_retried = true;
2121
+ goto lock_retry;
2122
+ }
2123
+ }
18132124 spin_unlock_irq(&ic->endio_wait.lock);
18142125
18152126 if (unlikely(journal_read_pos != NOT_FOUND)) {
18162127 journal_section = journal_read_pos / ic->journal_section_entries;
18172128 journal_entry = journal_read_pos % ic->journal_section_entries;
18182129 goto journal_read_write;
2130
+ }
2131
+
2132
+ if (ic->mode == 'B' && (dio->op == REQ_OP_WRITE || unlikely(dio->op == REQ_OP_DISCARD))) {
2133
+ if (!block_bitmap_op(ic, ic->may_write_bitmap, dio->range.logical_sector,
2134
+ dio->range.n_sectors, BITMAP_OP_TEST_ALL_SET)) {
2135
+ struct bitmap_block_status *bbs;
2136
+
2137
+ bbs = sector_to_bitmap_block(ic, dio->range.logical_sector);
2138
+ spin_lock(&bbs->bio_queue_lock);
2139
+ bio_list_add(&bbs->bio_queue, bio);
2140
+ spin_unlock(&bbs->bio_queue_lock);
2141
+ queue_work(ic->writer_wq, &bbs->work);
2142
+ return;
2143
+ }
18192144 }
18202145
18212146 dio->in_flight = (atomic_t)ATOMIC_INIT(2);
....@@ -1833,14 +2158,31 @@
18332158 bio->bi_end_io = integrity_end_io;
18342159 bio->bi_iter.bi_size = dio->range.n_sectors << SECTOR_SHIFT;
18352160
1836
- generic_make_request(bio);
2161
+ if (unlikely(dio->op == REQ_OP_DISCARD) && likely(ic->mode != 'D')) {
2162
+ integrity_metadata(&dio->work);
2163
+ dm_integrity_flush_buffers(ic, false);
2164
+
2165
+ dio->in_flight = (atomic_t)ATOMIC_INIT(1);
2166
+ dio->completion = NULL;
2167
+
2168
+ submit_bio_noacct(bio);
2169
+
2170
+ return;
2171
+ }
2172
+
2173
+ submit_bio_noacct(bio);
18372174
18382175 if (need_sync_io) {
18392176 wait_for_completion_io(&read_comp);
1840
- if (unlikely(ic->recalc_wq != NULL) &&
1841
- ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) &&
2177
+ if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) &&
18422178 dio->range.logical_sector + dio->range.n_sectors > le64_to_cpu(ic->sb->recalc_sector))
18432179 goto skip_check;
2180
+ if (ic->mode == 'B') {
2181
+ if (!block_bitmap_op(ic, ic->recalc_bitmap, dio->range.logical_sector,
2182
+ dio->range.n_sectors, BITMAP_OP_TEST_ALL_CLEAR))
2183
+ goto skip_check;
2184
+ }
2185
+
18442186 if (likely(!bio->bi_status))
18452187 integrity_metadata(&dio->work);
18462188 else
....@@ -1878,8 +2220,16 @@
18782220 wraparound_section(ic, &ic->free_section);
18792221 ic->n_uncommitted_sections++;
18802222 }
1881
- WARN_ON(ic->journal_sections * ic->journal_section_entries !=
1882
- (ic->n_uncommitted_sections + ic->n_committed_sections) * ic->journal_section_entries + ic->free_sectors);
2223
+ if (WARN_ON(ic->journal_sections * ic->journal_section_entries !=
2224
+ (ic->n_uncommitted_sections + ic->n_committed_sections) *
2225
+ ic->journal_section_entries + ic->free_sectors)) {
2226
+ DMCRIT("journal_sections %u, journal_section_entries %u, "
2227
+ "n_uncommitted_sections %u, n_committed_sections %u, "
2228
+ "journal_section_entries %u, free_sectors %u",
2229
+ ic->journal_sections, ic->journal_section_entries,
2230
+ ic->n_uncommitted_sections, ic->n_committed_sections,
2231
+ ic->journal_section_entries, ic->free_sectors);
2232
+ }
18832233 }
18842234
18852235 static void integrity_commit(struct work_struct *w)
....@@ -1906,6 +2256,8 @@
19062256
19072257 if (!commit_sections)
19082258 goto release_flush_bios;
2259
+
2260
+ ic->wrote_to_journal = true;
19092261
19102262 i = commit_start;
19112263 for (n = 0; n < commit_sections; n++) {
....@@ -2005,6 +2357,10 @@
20052357 dm_integrity_io_error(ic, "invalid sector in journal", -EIO);
20062358 sec &= ~(sector_t)(ic->sectors_per_block - 1);
20072359 }
2360
+ if (unlikely(sec >= ic->provided_data_sectors)) {
2361
+ journal_entry_set_unused(je);
2362
+ continue;
2363
+ }
20082364 }
20092365 get_area_and_offset(ic, sec, &area, &offset);
20102366 restore_last_bytes(ic, access_journal_data(ic, i, j), je);
....@@ -2015,6 +2371,8 @@
20152371 break;
20162372 BUG_ON(unlikely(journal_entry_is_inprogress(je2)) && !from_replay);
20172373 sec2 = journal_entry_get_sector(je2);
2374
+ if (unlikely(sec2 >= ic->provided_data_sectors))
2375
+ break;
20182376 get_area_and_offset(ic, sec2, &area2, &offset2);
20192377 if (area2 != area || offset2 != offset + ((k - j) << ic->sb->log2_sectors_per_block))
20202378 break;
....@@ -2028,8 +2386,7 @@
20282386 io->range.n_sectors = (k - j) << ic->sb->log2_sectors_per_block;
20292387
20302388 spin_lock_irq(&ic->endio_wait.lock);
2031
- if (unlikely(!add_new_range(ic, &io->range, true)))
2032
- wait_and_add_new_range(ic, &io->range);
2389
+ add_new_range_and_wait(ic, &io->range);
20332390
20342391 if (likely(!from_replay)) {
20352392 struct journal_node *section_node = &ic->journal_tree[i * ic->journal_section_entries];
....@@ -2073,7 +2430,7 @@
20732430 unlikely(from_replay) &&
20742431 #endif
20752432 ic->internal_hash) {
2076
- char test_tag[max(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)];
2433
+ char test_tag[max_t(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)];
20772434
20782435 integrity_sector_checksum(ic, sec + ((l - j) << ic->sb->log2_sectors_per_block),
20792436 (char *)access_journal_data(ic, i, l), test_tag);
....@@ -2116,10 +2473,6 @@
21162473
21172474 unsigned prev_free_sectors;
21182475
2119
- /* the following test is not needed, but it tests the replay code */
2120
- if (unlikely(dm_post_suspending(ic->ti)) && !ic->meta_dev)
2121
- return;
2122
-
21232476 spin_lock_irq(&ic->endio_wait.lock);
21242477 write_start = ic->committed_section;
21252478 write_sections = ic->n_committed_sections;
....@@ -2152,7 +2505,6 @@
21522505 if (dm_integrity_failed(ic))
21532506 return;
21542507
2155
- sb_set_version(ic);
21562508 r = sync_rw_sb(ic, REQ_OP_WRITE, 0);
21572509 if (unlikely(r))
21582510 dm_integrity_io_error(ic, "writing superblock", r);
....@@ -2167,10 +2519,13 @@
21672519 sector_t area, offset;
21682520 sector_t metadata_block;
21692521 unsigned metadata_offset;
2522
+ sector_t logical_sector, n_sectors;
21702523 __u8 *t;
21712524 unsigned i;
21722525 int r;
21732526 unsigned super_counter = 0;
2527
+
2528
+ DEBUG_print("start recalculation... (position %llx)\n", le64_to_cpu(ic->sb->recalc_sector));
21742529
21752530 spin_lock_irq(&ic->endio_wait.lock);
21762531
....@@ -2180,21 +2535,50 @@
21802535 goto unlock_ret;
21812536
21822537 range.logical_sector = le64_to_cpu(ic->sb->recalc_sector);
2183
- if (unlikely(range.logical_sector >= ic->provided_data_sectors))
2538
+ if (unlikely(range.logical_sector >= ic->provided_data_sectors)) {
2539
+ if (ic->mode == 'B') {
2540
+ block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR);
2541
+ DEBUG_print("queue_delayed_work: bitmap_flush_work\n");
2542
+ queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, 0);
2543
+ }
21842544 goto unlock_ret;
2545
+ }
21852546
21862547 get_area_and_offset(ic, range.logical_sector, &area, &offset);
21872548 range.n_sectors = min((sector_t)RECALC_SECTORS, ic->provided_data_sectors - range.logical_sector);
21882549 if (!ic->meta_dev)
2189
- range.n_sectors = min(range.n_sectors, (1U << ic->sb->log2_interleave_sectors) - (unsigned)offset);
2550
+ range.n_sectors = min(range.n_sectors, ((sector_t)1U << ic->sb->log2_interleave_sectors) - (unsigned)offset);
21902551
2191
- if (unlikely(!add_new_range(ic, &range, true)))
2192
- wait_and_add_new_range(ic, &range);
2193
-
2552
+ add_new_range_and_wait(ic, &range);
21942553 spin_unlock_irq(&ic->endio_wait.lock);
2554
+ logical_sector = range.logical_sector;
2555
+ n_sectors = range.n_sectors;
2556
+
2557
+ if (ic->mode == 'B') {
2558
+ if (block_bitmap_op(ic, ic->recalc_bitmap, logical_sector, n_sectors, BITMAP_OP_TEST_ALL_CLEAR)) {
2559
+ goto advance_and_next;
2560
+ }
2561
+ while (block_bitmap_op(ic, ic->recalc_bitmap, logical_sector,
2562
+ ic->sectors_per_block, BITMAP_OP_TEST_ALL_CLEAR)) {
2563
+ logical_sector += ic->sectors_per_block;
2564
+ n_sectors -= ic->sectors_per_block;
2565
+ cond_resched();
2566
+ }
2567
+ while (block_bitmap_op(ic, ic->recalc_bitmap, logical_sector + n_sectors - ic->sectors_per_block,
2568
+ ic->sectors_per_block, BITMAP_OP_TEST_ALL_CLEAR)) {
2569
+ n_sectors -= ic->sectors_per_block;
2570
+ cond_resched();
2571
+ }
2572
+ get_area_and_offset(ic, logical_sector, &area, &offset);
2573
+ }
2574
+
2575
+ DEBUG_print("recalculating: %llx, %llx\n", logical_sector, n_sectors);
21952576
21962577 if (unlikely(++super_counter == RECALC_WRITE_SUPER)) {
21972578 recalc_write_super(ic);
2579
+ if (ic->mode == 'B') {
2580
+ queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, ic->bitmap_flush_interval);
2581
+ }
21982582 super_counter = 0;
21992583 }
22002584
....@@ -2209,7 +2593,7 @@
22092593 io_req.client = ic->io;
22102594 io_loc.bdev = ic->dev->bdev;
22112595 io_loc.sector = get_data_sector(ic, area, offset);
2212
- io_loc.count = range.n_sectors;
2596
+ io_loc.count = n_sectors;
22132597
22142598 r = dm_io(&io_req, 1, &io_loc, NULL);
22152599 if (unlikely(r)) {
....@@ -2218,8 +2602,8 @@
22182602 }
22192603
22202604 t = ic->recalc_tags;
2221
- for (i = 0; i < range.n_sectors; i += ic->sectors_per_block) {
2222
- integrity_sector_checksum(ic, range.logical_sector + i, ic->recalc_buffer + (i << SECTOR_SHIFT), t);
2605
+ for (i = 0; i < n_sectors; i += ic->sectors_per_block) {
2606
+ integrity_sector_checksum(ic, logical_sector + i, ic->recalc_buffer + (i << SECTOR_SHIFT), t);
22232607 t += ic->tag_size;
22242608 }
22252609
....@@ -2230,6 +2614,20 @@
22302614 dm_integrity_io_error(ic, "writing tags", r);
22312615 goto err;
22322616 }
2617
+
2618
+ if (ic->mode == 'B') {
2619
+ sector_t start, end;
2620
+ start = (range.logical_sector >>
2621
+ (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit)) <<
2622
+ (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
2623
+ end = ((range.logical_sector + range.n_sectors) >>
2624
+ (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit)) <<
2625
+ (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
2626
+ block_bitmap_op(ic, ic->recalc_bitmap, start, end - start, BITMAP_OP_CLEAR);
2627
+ }
2628
+
2629
+advance_and_next:
2630
+ cond_resched();
22332631
22342632 spin_lock_irq(&ic->endio_wait.lock);
22352633 remove_range_unlocked(ic, &range);
....@@ -2245,6 +2643,101 @@
22452643
22462644 recalc_write_super(ic);
22472645 }
2646
+
2647
+static void bitmap_block_work(struct work_struct *w)
2648
+{
2649
+ struct bitmap_block_status *bbs = container_of(w, struct bitmap_block_status, work);
2650
+ struct dm_integrity_c *ic = bbs->ic;
2651
+ struct bio *bio;
2652
+ struct bio_list bio_queue;
2653
+ struct bio_list waiting;
2654
+
2655
+ bio_list_init(&waiting);
2656
+
2657
+ spin_lock(&bbs->bio_queue_lock);
2658
+ bio_queue = bbs->bio_queue;
2659
+ bio_list_init(&bbs->bio_queue);
2660
+ spin_unlock(&bbs->bio_queue_lock);
2661
+
2662
+ while ((bio = bio_list_pop(&bio_queue))) {
2663
+ struct dm_integrity_io *dio;
2664
+
2665
+ dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
2666
+
2667
+ if (block_bitmap_op(ic, ic->may_write_bitmap, dio->range.logical_sector,
2668
+ dio->range.n_sectors, BITMAP_OP_TEST_ALL_SET)) {
2669
+ remove_range(ic, &dio->range);
2670
+ INIT_WORK(&dio->work, integrity_bio_wait);
2671
+ queue_work(ic->offload_wq, &dio->work);
2672
+ } else {
2673
+ block_bitmap_op(ic, ic->journal, dio->range.logical_sector,
2674
+ dio->range.n_sectors, BITMAP_OP_SET);
2675
+ bio_list_add(&waiting, bio);
2676
+ }
2677
+ }
2678
+
2679
+ if (bio_list_empty(&waiting))
2680
+ return;
2681
+
2682
+ rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC,
2683
+ bbs->idx * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT),
2684
+ BITMAP_BLOCK_SIZE >> SECTOR_SHIFT, NULL);
2685
+
2686
+ while ((bio = bio_list_pop(&waiting))) {
2687
+ struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
2688
+
2689
+ block_bitmap_op(ic, ic->may_write_bitmap, dio->range.logical_sector,
2690
+ dio->range.n_sectors, BITMAP_OP_SET);
2691
+
2692
+ remove_range(ic, &dio->range);
2693
+ INIT_WORK(&dio->work, integrity_bio_wait);
2694
+ queue_work(ic->offload_wq, &dio->work);
2695
+ }
2696
+
2697
+ queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, ic->bitmap_flush_interval);
2698
+}
2699
+
2700
+static void bitmap_flush_work(struct work_struct *work)
2701
+{
2702
+ struct dm_integrity_c *ic = container_of(work, struct dm_integrity_c, bitmap_flush_work.work);
2703
+ struct dm_integrity_range range;
2704
+ unsigned long limit;
2705
+ struct bio *bio;
2706
+
2707
+ dm_integrity_flush_buffers(ic, false);
2708
+
2709
+ range.logical_sector = 0;
2710
+ range.n_sectors = ic->provided_data_sectors;
2711
+
2712
+ spin_lock_irq(&ic->endio_wait.lock);
2713
+ add_new_range_and_wait(ic, &range);
2714
+ spin_unlock_irq(&ic->endio_wait.lock);
2715
+
2716
+ dm_integrity_flush_buffers(ic, true);
2717
+
2718
+ limit = ic->provided_data_sectors;
2719
+ if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
2720
+ limit = le64_to_cpu(ic->sb->recalc_sector)
2721
+ >> (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit)
2722
+ << (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
2723
+ }
2724
+ /*DEBUG_print("zeroing journal\n");*/
2725
+ block_bitmap_op(ic, ic->journal, 0, limit, BITMAP_OP_CLEAR);
2726
+ block_bitmap_op(ic, ic->may_write_bitmap, 0, limit, BITMAP_OP_CLEAR);
2727
+
2728
+ rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, 0,
2729
+ ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
2730
+
2731
+ spin_lock_irq(&ic->endio_wait.lock);
2732
+ remove_range_unlocked(ic, &range);
2733
+ while (unlikely((bio = bio_list_pop(&ic->synchronous_bios)) != NULL)) {
2734
+ bio_endio(bio);
2735
+ spin_unlock_irq(&ic->endio_wait.lock);
2736
+ spin_lock_irq(&ic->endio_wait.lock);
2737
+ }
2738
+ spin_unlock_irq(&ic->endio_wait.lock);
2739
+}
2740
+
22482741
22492742 static void init_journal(struct dm_integrity_c *ic, unsigned start_section,
22502743 unsigned n_sections, unsigned char commit_seq)
....@@ -2442,23 +2935,73 @@
24422935 init_journal_node(&ic->journal_tree[i]);
24432936 }
24442937
2938
+static void dm_integrity_enter_synchronous_mode(struct dm_integrity_c *ic)
2939
+{
2940
+ DEBUG_print("dm_integrity_enter_synchronous_mode\n");
2941
+
2942
+ if (ic->mode == 'B') {
2943
+ ic->bitmap_flush_interval = msecs_to_jiffies(10) + 1;
2944
+ ic->synchronous_mode = 1;
2945
+
2946
+ cancel_delayed_work_sync(&ic->bitmap_flush_work);
2947
+ queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, 0);
2948
+ flush_workqueue(ic->commit_wq);
2949
+ }
2950
+}
2951
+
2952
+static int dm_integrity_reboot(struct notifier_block *n, unsigned long code, void *x)
2953
+{
2954
+ struct dm_integrity_c *ic = container_of(n, struct dm_integrity_c, reboot_notifier);
2955
+
2956
+ DEBUG_print("dm_integrity_reboot\n");
2957
+
2958
+ dm_integrity_enter_synchronous_mode(ic);
2959
+
2960
+ return NOTIFY_DONE;
2961
+}
2962
+
24452963 static void dm_integrity_postsuspend(struct dm_target *ti)
24462964 {
24472965 struct dm_integrity_c *ic = (struct dm_integrity_c *)ti->private;
2966
+ int r;
2967
+
2968
+ WARN_ON(unregister_reboot_notifier(&ic->reboot_notifier));
24482969
24492970 del_timer_sync(&ic->autocommit_timer);
24502971
24512972 if (ic->recalc_wq)
24522973 drain_workqueue(ic->recalc_wq);
24532974
2975
+ if (ic->mode == 'B')
2976
+ cancel_delayed_work_sync(&ic->bitmap_flush_work);
2977
+
24542978 queue_work(ic->commit_wq, &ic->commit_work);
24552979 drain_workqueue(ic->commit_wq);
24562980
24572981 if (ic->mode == 'J') {
2458
- if (ic->meta_dev)
2459
- queue_work(ic->writer_wq, &ic->writer_work);
2982
+ queue_work(ic->writer_wq, &ic->writer_work);
24602983 drain_workqueue(ic->writer_wq);
24612984 dm_integrity_flush_buffers(ic, true);
2985
+ if (ic->wrote_to_journal) {
2986
+ init_journal(ic, ic->free_section,
2987
+ ic->journal_sections - ic->free_section, ic->commit_seq);
2988
+ if (ic->free_section) {
2989
+ init_journal(ic, 0, ic->free_section,
2990
+ next_commit_seq(ic->commit_seq));
2991
+ }
2992
+ }
2993
+ }
2994
+
2995
+ if (ic->mode == 'B') {
2996
+ dm_integrity_flush_buffers(ic, true);
2997
+#if 1
2998
+ /* set to 0 to test bitmap replay code */
2999
+ init_journal(ic, 0, ic->journal_sections, 0);
3000
+ ic->sb->flags &= ~cpu_to_le32(SB_FLAG_DIRTY_BITMAP);
3001
+ r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA);
3002
+ if (unlikely(r))
3003
+ dm_integrity_io_error(ic, "writing superblock", r);
3004
+#endif
24623005 }
24633006
24643007 BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
....@@ -2469,11 +3012,99 @@
24693012 static void dm_integrity_resume(struct dm_target *ti)
24703013 {
24713014 struct dm_integrity_c *ic = (struct dm_integrity_c *)ti->private;
3015
+ __u64 old_provided_data_sectors = le64_to_cpu(ic->sb->provided_data_sectors);
3016
+ int r;
24723017
2473
- replay_journal(ic);
3018
+ DEBUG_print("resume\n");
24743019
2475
- if (ic->recalc_wq && ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
3020
+ ic->wrote_to_journal = false;
3021
+
3022
+ if (ic->provided_data_sectors != old_provided_data_sectors) {
3023
+ if (ic->provided_data_sectors > old_provided_data_sectors &&
3024
+ ic->mode == 'B' &&
3025
+ ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit) {
3026
+ rw_journal_sectors(ic, REQ_OP_READ, 0, 0,
3027
+ ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
3028
+ block_bitmap_op(ic, ic->journal, old_provided_data_sectors,
3029
+ ic->provided_data_sectors - old_provided_data_sectors, BITMAP_OP_SET);
3030
+ rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, 0,
3031
+ ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
3032
+ }
3033
+
3034
+ ic->sb->provided_data_sectors = cpu_to_le64(ic->provided_data_sectors);
3035
+ r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA);
3036
+ if (unlikely(r))
3037
+ dm_integrity_io_error(ic, "writing superblock", r);
3038
+ }
3039
+
3040
+ if (ic->sb->flags & cpu_to_le32(SB_FLAG_DIRTY_BITMAP)) {
3041
+ DEBUG_print("resume dirty_bitmap\n");
3042
+ rw_journal_sectors(ic, REQ_OP_READ, 0, 0,
3043
+ ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
3044
+ if (ic->mode == 'B') {
3045
+ if (ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit) {
3046
+ block_bitmap_copy(ic, ic->recalc_bitmap, ic->journal);
3047
+ block_bitmap_copy(ic, ic->may_write_bitmap, ic->journal);
3048
+ if (!block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors,
3049
+ BITMAP_OP_TEST_ALL_CLEAR)) {
3050
+ ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING);
3051
+ ic->sb->recalc_sector = cpu_to_le64(0);
3052
+ }
3053
+ } else {
3054
+ DEBUG_print("non-matching blocks_per_bitmap_bit: %u, %u\n",
3055
+ ic->sb->log2_blocks_per_bitmap_bit, ic->log2_blocks_per_bitmap_bit);
3056
+ ic->sb->log2_blocks_per_bitmap_bit = ic->log2_blocks_per_bitmap_bit;
3057
+ block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_SET);
3058
+ block_bitmap_op(ic, ic->may_write_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_SET);
3059
+ block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_SET);
3060
+ rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, 0,
3061
+ ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
3062
+ ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING);
3063
+ ic->sb->recalc_sector = cpu_to_le64(0);
3064
+ }
3065
+ } else {
3066
+ if (!(ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit &&
3067
+ block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_TEST_ALL_CLEAR))) {
3068
+ ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING);
3069
+ ic->sb->recalc_sector = cpu_to_le64(0);
3070
+ }
3071
+ init_journal(ic, 0, ic->journal_sections, 0);
3072
+ replay_journal(ic);
3073
+ ic->sb->flags &= ~cpu_to_le32(SB_FLAG_DIRTY_BITMAP);
3074
+ }
3075
+ r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA);
3076
+ if (unlikely(r))
3077
+ dm_integrity_io_error(ic, "writing superblock", r);
3078
+ } else {
3079
+ replay_journal(ic);
3080
+ if (ic->mode == 'B') {
3081
+ ic->sb->flags |= cpu_to_le32(SB_FLAG_DIRTY_BITMAP);
3082
+ ic->sb->log2_blocks_per_bitmap_bit = ic->log2_blocks_per_bitmap_bit;
3083
+ r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA);
3084
+ if (unlikely(r))
3085
+ dm_integrity_io_error(ic, "writing superblock", r);
3086
+
3087
+ block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR);
3088
+ block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR);
3089
+ block_bitmap_op(ic, ic->may_write_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR);
3090
+ if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) &&
3091
+ le64_to_cpu(ic->sb->recalc_sector) < ic->provided_data_sectors) {
3092
+ block_bitmap_op(ic, ic->journal, le64_to_cpu(ic->sb->recalc_sector),
3093
+ ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET);
3094
+ block_bitmap_op(ic, ic->recalc_bitmap, le64_to_cpu(ic->sb->recalc_sector),
3095
+ ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET);
3096
+ block_bitmap_op(ic, ic->may_write_bitmap, le64_to_cpu(ic->sb->recalc_sector),
3097
+ ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET);
3098
+ }
3099
+ rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, 0,
3100
+ ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
3101
+ }
3102
+ }
3103
+
3104
+ DEBUG_print("testing recalc: %x\n", ic->sb->flags);
3105
+ if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
24763106 __u64 recalc_pos = le64_to_cpu(ic->sb->recalc_sector);
3107
+ DEBUG_print("recalc pos: %llx / %llx\n", recalc_pos, ic->provided_data_sectors);
24773108 if (recalc_pos < ic->provided_data_sectors) {
24783109 queue_work(ic->recalc_wq, &ic->recalc_work);
24793110 } else if (recalc_pos > ic->provided_data_sectors) {
....@@ -2481,6 +3112,16 @@
24813112 recalc_write_super(ic);
24823113 }
24833114 }
3115
+
3116
+ ic->reboot_notifier.notifier_call = dm_integrity_reboot;
3117
+ ic->reboot_notifier.next = NULL;
3118
+ ic->reboot_notifier.priority = INT_MAX - 1; /* be notified after md and before hardware drivers */
3119
+ WARN_ON(register_reboot_notifier(&ic->reboot_notifier));
3120
+
3121
+#if 0
3122
+ /* set to 1 to stress test synchronous mode */
3123
+ dm_integrity_enter_synchronous_mode(ic);
3124
+#endif
24843125 }
24853126
24863127 static void dm_integrity_status(struct dm_target *ti, status_type_t type,
....@@ -2494,9 +3135,9 @@
24943135 case STATUSTYPE_INFO:
24953136 DMEMIT("%llu %llu",
24963137 (unsigned long long)atomic64_read(&ic->number_of_mismatches),
2497
- (unsigned long long)ic->provided_data_sectors);
3138
+ ic->provided_data_sectors);
24983139 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))
2499
- DMEMIT(" %llu", (unsigned long long)le64_to_cpu(ic->sb->recalc_sector));
3140
+ DMEMIT(" %llu", le64_to_cpu(ic->sb->recalc_sector));
25003141 else
25013142 DMEMIT(" -");
25023143 break;
....@@ -2505,15 +3146,21 @@
25053146 __u64 watermark_percentage = (__u64)(ic->journal_entries - ic->free_sectors_threshold) * 100;
25063147 watermark_percentage += ic->journal_entries / 2;
25073148 do_div(watermark_percentage, ic->journal_entries);
2508
- arg_count = 5;
3149
+ arg_count = 3;
25093150 arg_count += !!ic->meta_dev;
25103151 arg_count += ic->sectors_per_block != 1;
25113152 arg_count += !!(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING));
3153
+ arg_count += ic->discard;
3154
+ arg_count += ic->mode == 'J';
3155
+ arg_count += ic->mode == 'J';
3156
+ arg_count += ic->mode == 'B';
3157
+ arg_count += ic->mode == 'B';
25123158 arg_count += !!ic->internal_hash_alg.alg_string;
25133159 arg_count += !!ic->journal_crypt_alg.alg_string;
25143160 arg_count += !!ic->journal_mac_alg.alg_string;
3161
+ arg_count += (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0;
25153162 arg_count += ic->legacy_recalculate;
2516
- DMEMIT("%s %llu %u %c %u", ic->dev->name, (unsigned long long)ic->start,
3163
+ DMEMIT("%s %llu %u %c %u", ic->dev->name, ic->start,
25173164 ic->tag_size, ic->mode, arg_count);
25183165 if (ic->meta_dev)
25193166 DMEMIT(" meta_device:%s", ic->meta_dev->name);
....@@ -2521,11 +3168,21 @@
25213168 DMEMIT(" block_size:%u", ic->sectors_per_block << SECTOR_SHIFT);
25223169 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))
25233170 DMEMIT(" recalculate");
3171
+ if (ic->discard)
3172
+ DMEMIT(" allow_discards");
25243173 DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS);
25253174 DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors);
25263175 DMEMIT(" buffer_sectors:%u", 1U << ic->log2_buffer_sectors);
2527
- DMEMIT(" journal_watermark:%u", (unsigned)watermark_percentage);
2528
- DMEMIT(" commit_time:%u", ic->autocommit_msec);
3176
+ if (ic->mode == 'J') {
3177
+ DMEMIT(" journal_watermark:%u", (unsigned)watermark_percentage);
3178
+ DMEMIT(" commit_time:%u", ic->autocommit_msec);
3179
+ }
3180
+ if (ic->mode == 'B') {
3181
+ DMEMIT(" sectors_per_bit:%llu", (sector_t)ic->sectors_per_block << ic->log2_blocks_per_bitmap_bit);
3182
+ DMEMIT(" bitmap_flush_interval:%u", jiffies_to_msecs(ic->bitmap_flush_interval));
3183
+ }
3184
+ if ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0)
3185
+ DMEMIT(" fix_padding");
25293186 if (ic->legacy_recalculate)
25303187 DMEMIT(" legacy_recalculate");
25313188
....@@ -2596,8 +3253,14 @@
25963253 if (!ic->meta_dev) {
25973254 sector_t last_sector, last_area, last_offset;
25983255
2599
- ic->metadata_run = roundup((__u64)ic->tag_size << (ic->sb->log2_interleave_sectors - ic->sb->log2_sectors_per_block),
2600
- (__u64)(1 << SECTOR_SHIFT << METADATA_PADDING_SECTORS)) >> SECTOR_SHIFT;
3256
+ /* we have to maintain excessive padding for compatibility with existing volumes */
3257
+ __u64 metadata_run_padding =
3258
+ ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING) ?
3259
+ (__u64)(METADATA_PADDING_SECTORS << SECTOR_SHIFT) :
3260
+ (__u64)(1 << SECTOR_SHIFT << METADATA_PADDING_SECTORS);
3261
+
3262
+ ic->metadata_run = round_up((__u64)ic->tag_size << (ic->sb->log2_interleave_sectors - ic->sb->log2_sectors_per_block),
3263
+ metadata_run_padding) >> SECTOR_SHIFT;
26013264 if (!(ic->metadata_run & (ic->metadata_run - 1)))
26023265 ic->log2_metadata_run = __ffs(ic->metadata_run);
26033266 else
....@@ -2622,6 +3285,24 @@
26223285 return 0;
26233286 }
26243287
3288
+static void get_provided_data_sectors(struct dm_integrity_c *ic)
3289
+{
3290
+ if (!ic->meta_dev) {
3291
+ int test_bit;
3292
+ ic->provided_data_sectors = 0;
3293
+ for (test_bit = fls64(ic->meta_device_sectors) - 1; test_bit >= 3; test_bit--) {
3294
+ __u64 prev_data_sectors = ic->provided_data_sectors;
3295
+
3296
+ ic->provided_data_sectors |= (sector_t)1 << test_bit;
3297
+ if (calculate_device_limits(ic))
3298
+ ic->provided_data_sectors = prev_data_sectors;
3299
+ }
3300
+ } else {
3301
+ ic->provided_data_sectors = ic->data_device_sectors;
3302
+ ic->provided_data_sectors &= ~(sector_t)(ic->sectors_per_block - 1);
3303
+ }
3304
+}
3305
+
26253306 static int initialize_superblock(struct dm_integrity_c *ic, unsigned journal_sectors, unsigned interleave_sectors)
26263307 {
26273308 unsigned journal_sections;
....@@ -2640,6 +3321,8 @@
26403321 journal_sections = 1;
26413322
26423323 if (!ic->meta_dev) {
3324
+ if (ic->fix_padding)
3325
+ ic->sb->flags |= cpu_to_le32(SB_FLAG_FIXED_PADDING);
26433326 ic->sb->journal_sections = cpu_to_le32(journal_sections);
26443327 if (!interleave_sectors)
26453328 interleave_sectors = DEFAULT_INTERLEAVE_SECTORS;
....@@ -2647,20 +3330,15 @@
26473330 ic->sb->log2_interleave_sectors = max((__u8)MIN_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors);
26483331 ic->sb->log2_interleave_sectors = min((__u8)MAX_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors);
26493332
2650
- ic->provided_data_sectors = 0;
2651
- for (test_bit = fls64(ic->meta_device_sectors) - 1; test_bit >= 3; test_bit--) {
2652
- __u64 prev_data_sectors = ic->provided_data_sectors;
2653
-
2654
- ic->provided_data_sectors |= (sector_t)1 << test_bit;
2655
- if (calculate_device_limits(ic))
2656
- ic->provided_data_sectors = prev_data_sectors;
2657
- }
3333
+ get_provided_data_sectors(ic);
26583334 if (!ic->provided_data_sectors)
26593335 return -EINVAL;
26603336 } else {
26613337 ic->sb->log2_interleave_sectors = 0;
2662
- ic->provided_data_sectors = ic->data_device_sectors;
2663
- ic->provided_data_sectors &= ~(sector_t)(ic->sectors_per_block - 1);
3338
+
3339
+ get_provided_data_sectors(ic);
3340
+ if (!ic->provided_data_sectors)
3341
+ return -EINVAL;
26643342
26653343 try_smaller_buffer:
26663344 ic->sb->journal_sections = cpu_to_le32(0);
....@@ -2705,37 +3383,37 @@
27053383 blk_queue_max_integrity_segments(disk->queue, UINT_MAX);
27063384 }
27073385
2708
-static void dm_integrity_free_page_list(struct dm_integrity_c *ic, struct page_list *pl)
3386
+static void dm_integrity_free_page_list(struct page_list *pl)
27093387 {
27103388 unsigned i;
27113389
27123390 if (!pl)
27133391 return;
2714
- for (i = 0; i < ic->journal_pages; i++)
2715
- if (pl[i].page)
2716
- __free_page(pl[i].page);
3392
+ for (i = 0; pl[i].page; i++)
3393
+ __free_page(pl[i].page);
27173394 kvfree(pl);
27183395 }
27193396
2720
-static struct page_list *dm_integrity_alloc_page_list(struct dm_integrity_c *ic)
3397
+static struct page_list *dm_integrity_alloc_page_list(unsigned n_pages)
27213398 {
2722
- size_t page_list_desc_size = ic->journal_pages * sizeof(struct page_list);
27233399 struct page_list *pl;
27243400 unsigned i;
27253401
2726
- pl = kvmalloc(page_list_desc_size, GFP_KERNEL | __GFP_ZERO);
3402
+ pl = kvmalloc_array(n_pages + 1, sizeof(struct page_list), GFP_KERNEL | __GFP_ZERO);
27273403 if (!pl)
27283404 return NULL;
27293405
2730
- for (i = 0; i < ic->journal_pages; i++) {
3406
+ for (i = 0; i < n_pages; i++) {
27313407 pl[i].page = alloc_page(GFP_KERNEL);
27323408 if (!pl[i].page) {
2733
- dm_integrity_free_page_list(ic, pl);
3409
+ dm_integrity_free_page_list(pl);
27343410 return NULL;
27353411 }
27363412 if (i)
27373413 pl[i - 1].next = &pl[i];
27383414 }
3415
+ pl[i].page = NULL;
3416
+ pl[i].next = NULL;
27393417
27403418 return pl;
27413419 }
....@@ -2748,7 +3426,8 @@
27483426 kvfree(sl);
27493427 }
27503428
2751
-static struct scatterlist **dm_integrity_alloc_journal_scatterlist(struct dm_integrity_c *ic, struct page_list *pl)
3429
+static struct scatterlist **dm_integrity_alloc_journal_scatterlist(struct dm_integrity_c *ic,
3430
+ struct page_list *pl)
27523431 {
27533432 struct scatterlist **sl;
27543433 unsigned i;
....@@ -2767,7 +3446,8 @@
27673446 unsigned idx;
27683447
27693448 page_list_location(ic, i, 0, &start_index, &start_offset);
2770
- page_list_location(ic, i, ic->journal_section_sectors - 1, &end_index, &end_offset);
3449
+ page_list_location(ic, i, ic->journal_section_sectors - 1,
3450
+ &end_index, &end_offset);
27713451
27723452 n_pages = (end_index - start_index + 1);
27733453
....@@ -2797,8 +3477,8 @@
27973477
27983478 static void free_alg(struct alg_spec *a)
27993479 {
2800
- kzfree(a->alg_string);
2801
- kzfree(a->key);
3480
+ kfree_sensitive(a->alg_string);
3481
+ kfree_sensitive(a->key);
28023482 memset(a, 0, sizeof *a);
28033483 }
28043484
....@@ -2842,7 +3522,7 @@
28423522 int r;
28433523
28443524 if (a->alg_string) {
2845
- *hash = crypto_alloc_shash(a->alg_string, 0, CRYPTO_ALG_ASYNC);
3525
+ *hash = crypto_alloc_shash(a->alg_string, 0, CRYPTO_ALG_ALLOCATES_MEMORY);
28463526 if (IS_ERR(*hash)) {
28473527 *error = error_alg;
28483528 r = PTR_ERR(*hash);
....@@ -2881,14 +3561,14 @@
28813561 journal_pages = roundup((__u64)ic->journal_sections * ic->journal_section_sectors,
28823562 PAGE_SIZE >> SECTOR_SHIFT) >> (PAGE_SHIFT - SECTOR_SHIFT);
28833563 journal_desc_size = journal_pages * sizeof(struct page_list);
2884
- if (journal_pages >= totalram_pages - totalhigh_pages || journal_desc_size > ULONG_MAX) {
3564
+ if (journal_pages >= totalram_pages() - totalhigh_pages() || journal_desc_size > ULONG_MAX) {
28853565 *error = "Journal doesn't fit into memory";
28863566 r = -ENOMEM;
28873567 goto bad;
28883568 }
28893569 ic->journal_pages = journal_pages;
28903570
2891
- ic->journal = dm_integrity_alloc_page_list(ic);
3571
+ ic->journal = dm_integrity_alloc_page_list(ic->journal_pages);
28923572 if (!ic->journal) {
28933573 *error = "Could not allocate memory for journal";
28943574 r = -ENOMEM;
....@@ -2899,7 +3579,7 @@
28993579 struct journal_completion comp;
29003580
29013581 comp.ic = ic;
2902
- ic->journal_crypt = crypto_alloc_skcipher(ic->journal_crypt_alg.alg_string, 0, 0);
3582
+ ic->journal_crypt = crypto_alloc_skcipher(ic->journal_crypt_alg.alg_string, 0, CRYPTO_ALG_ALLOCATES_MEMORY);
29033583 if (IS_ERR(ic->journal_crypt)) {
29043584 *error = "Invalid journal cipher";
29053585 r = PTR_ERR(ic->journal_crypt);
....@@ -2920,7 +3600,7 @@
29203600 DEBUG_print("cipher %s, block size %u iv size %u\n",
29213601 ic->journal_crypt_alg.alg_string, blocksize, ivsize);
29223602
2923
- ic->journal_io = dm_integrity_alloc_page_list(ic);
3603
+ ic->journal_io = dm_integrity_alloc_page_list(ic->journal_pages);
29243604 if (!ic->journal_io) {
29253605 *error = "Could not allocate memory for journal io";
29263606 r = -ENOMEM;
....@@ -2937,14 +3617,14 @@
29373617 goto bad;
29383618 }
29393619
2940
- crypt_iv = kmalloc(ivsize, GFP_KERNEL);
3620
+ crypt_iv = kzalloc(ivsize, GFP_KERNEL);
29413621 if (!crypt_iv) {
29423622 *error = "Could not allocate iv";
29433623 r = -ENOMEM;
29443624 goto bad;
29453625 }
29463626
2947
- ic->journal_xor = dm_integrity_alloc_page_list(ic);
3627
+ ic->journal_xor = dm_integrity_alloc_page_list(ic->journal_pages);
29483628 if (!ic->journal_xor) {
29493629 *error = "Could not allocate memory for journal xor";
29503630 r = -ENOMEM;
....@@ -2966,9 +3646,9 @@
29663646 sg_set_buf(&sg[i], va, PAGE_SIZE);
29673647 }
29683648 sg_set_buf(&sg[i], &ic->commit_ids, sizeof ic->commit_ids);
2969
- memset(crypt_iv, 0x00, ivsize);
29703649
2971
- skcipher_request_set_crypt(req, sg, sg, PAGE_SIZE * ic->journal_pages + sizeof ic->commit_ids, crypt_iv);
3650
+ skcipher_request_set_crypt(req, sg, sg,
3651
+ PAGE_SIZE * ic->journal_pages + sizeof ic->commit_ids, crypt_iv);
29723652 init_completion(&comp.comp);
29733653 comp.in_flight = (atomic_t)ATOMIC_INIT(1);
29743654 if (do_crypt(true, req, &comp))
....@@ -3109,7 +3789,7 @@
31093789 * device
31103790 * offset from the start of the device
31113791 * tag size
3112
- * D - direct writes, J - journal writes, R - recovery mode
3792
+ * D - direct writes, J - journal writes, B - bitmap mode, R - recovery mode
31133793 * number of optional arguments
31143794 * optional arguments:
31153795 * journal_sectors
....@@ -3117,10 +3797,14 @@
31173797 * buffer_sectors
31183798 * journal_watermark
31193799 * commit_time
3800
+ * meta_device
3801
+ * block_size
3802
+ * sectors_per_bit
3803
+ * bitmap_flush_interval
31203804 * internal_hash
31213805 * journal_crypt
31223806 * journal_mac
3123
- * block_size
3807
+ * recalculate
31243808 */
31253809 static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
31263810 {
....@@ -3130,13 +3814,16 @@
31303814 unsigned extra_args;
31313815 struct dm_arg_set as;
31323816 static const struct dm_arg _args[] = {
3133
- {0, 12, "Invalid number of feature args"},
3817
+ {0, 16, "Invalid number of feature args"},
31343818 };
31353819 unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec;
3136
- bool recalculate;
31373820 bool should_write_sb;
31383821 __u64 threshold;
31393822 unsigned long long start;
3823
+ __s8 log2_sectors_per_bitmap_bit = -1;
3824
+ __s8 log2_blocks_per_bitmap_bit;
3825
+ __u64 bits_in_journal;
3826
+ __u64 n_bitmap_bits;
31403827
31413828 #define DIRECT_ARGUMENTS 4
31423829
....@@ -3161,6 +3848,7 @@
31613848 init_waitqueue_head(&ic->copy_to_journal_wait);
31623849 init_completion(&ic->crypto_backoff);
31633850 atomic64_set(&ic->number_of_mismatches, 0);
3851
+ ic->bitmap_flush_interval = BITMAP_FLUSH_INTERVAL;
31643852
31653853 r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &ic->dev);
31663854 if (r) {
....@@ -3183,10 +3871,11 @@
31833871 }
31843872 }
31853873
3186
- if (!strcmp(argv[3], "J") || !strcmp(argv[3], "D") || !strcmp(argv[3], "R"))
3874
+ if (!strcmp(argv[3], "J") || !strcmp(argv[3], "B") ||
3875
+ !strcmp(argv[3], "D") || !strcmp(argv[3], "R")) {
31873876 ic->mode = argv[3][0];
3188
- else {
3189
- ti->error = "Invalid mode (expecting J, D, R)";
3877
+ } else {
3878
+ ti->error = "Invalid mode (expecting J, B, D, R)";
31903879 r = -EINVAL;
31913880 goto bad;
31923881 }
....@@ -3196,7 +3885,6 @@
31963885 buffer_sectors = DEFAULT_BUFFER_SECTORS;
31973886 journal_watermark = DEFAULT_JOURNAL_WATERMARK;
31983887 sync_msec = DEFAULT_SYNC_MSEC;
3199
- recalculate = false;
32003888 ic->sectors_per_block = 1;
32013889
32023890 as.argc = argc - DIRECT_ARGUMENTS;
....@@ -3208,6 +3896,7 @@
32083896 while (extra_args--) {
32093897 const char *opt_string;
32103898 unsigned val;
3899
+ unsigned long long llval;
32113900 opt_string = dm_shift_arg(&as);
32123901 if (!opt_string) {
32133902 r = -EINVAL;
....@@ -3229,7 +3918,8 @@
32293918 dm_put_device(ti, ic->meta_dev);
32303919 ic->meta_dev = NULL;
32313920 }
3232
- r = dm_get_device(ti, strchr(opt_string, ':') + 1, dm_table_get_mode(ti->table), &ic->meta_dev);
3921
+ r = dm_get_device(ti, strchr(opt_string, ':') + 1,
3922
+ dm_table_get_mode(ti->table), &ic->meta_dev);
32333923 if (r) {
32343924 ti->error = "Device lookup failed";
32353925 goto bad;
....@@ -3243,6 +3933,15 @@
32433933 goto bad;
32443934 }
32453935 ic->sectors_per_block = val >> SECTOR_SHIFT;
3936
+ } else if (sscanf(opt_string, "sectors_per_bit:%llu%c", &llval, &dummy) == 1) {
3937
+ log2_sectors_per_bitmap_bit = !llval ? 0 : __ilog2_u64(llval);
3938
+ } else if (sscanf(opt_string, "bitmap_flush_interval:%u%c", &val, &dummy) == 1) {
3939
+ if (val >= (uint64_t)UINT_MAX * 1000 / HZ) {
3940
+ r = -EINVAL;
3941
+ ti->error = "Invalid bitmap_flush_interval argument";
3942
+ goto bad;
3943
+ }
3944
+ ic->bitmap_flush_interval = msecs_to_jiffies(val);
32463945 } else if (!strncmp(opt_string, "internal_hash:", strlen("internal_hash:"))) {
32473946 r = get_alg_and_key(opt_string, &ic->internal_hash_alg, &ti->error,
32483947 "Invalid internal_hash argument");
....@@ -3259,7 +3958,11 @@
32593958 if (r)
32603959 goto bad;
32613960 } else if (!strcmp(opt_string, "recalculate")) {
3262
- recalculate = true;
3961
+ ic->recalculate_flag = true;
3962
+ } else if (!strcmp(opt_string, "allow_discards")) {
3963
+ ic->discard = true;
3964
+ } else if (!strcmp(opt_string, "fix_padding")) {
3965
+ ic->fix_padding = true;
32633966 } else if (!strcmp(opt_string, "legacy_recalculate")) {
32643967 ic->legacy_recalculate = true;
32653968 } else {
....@@ -3277,7 +3980,7 @@
32773980
32783981 if (!journal_sectors) {
32793982 journal_sectors = min((sector_t)DEFAULT_MAX_JOURNAL_SECTORS,
3280
- ic->data_device_sectors >> DEFAULT_JOURNAL_SIZE_FACTOR);
3983
+ ic->data_device_sectors >> DEFAULT_JOURNAL_SIZE_FACTOR);
32813984 }
32823985
32833986 if (!buffer_sectors)
....@@ -3311,6 +4014,18 @@
33114014 ic->log2_tag_size = __ffs(ic->tag_size);
33124015 else
33134016 ic->log2_tag_size = -1;
4017
+
4018
+ if (ic->mode == 'B' && !ic->internal_hash) {
4019
+ r = -EINVAL;
4020
+ ti->error = "Bitmap mode can be only used with internal hash";
4021
+ goto bad;
4022
+ }
4023
+
4024
+ if (ic->discard && !ic->internal_hash) {
4025
+ r = -EINVAL;
4026
+ ti->error = "Discard can be only used with internal hash";
4027
+ goto bad;
4028
+ }
33144029
33154030 ic->autocommit_jiffies = msecs_to_jiffies(sync_msec);
33164031 ic->autocommit_msec = sync_msec;
....@@ -3365,7 +4080,7 @@
33654080 }
33664081 INIT_WORK(&ic->commit_work, integrity_commit);
33674082
3368
- if (ic->mode == 'J') {
4083
+ if (ic->mode == 'J' || ic->mode == 'B') {
33694084 ic->writer_wq = alloc_workqueue("dm-integrity-writer", WQ_MEM_RECLAIM, 1);
33704085 if (!ic->writer_wq) {
33714086 ti->error = "Cannot allocate workqueue";
....@@ -3406,7 +4121,7 @@
34064121 should_write_sb = true;
34074122 }
34084123
3409
- if (!ic->sb->version || ic->sb->version > SB_VERSION_2) {
4124
+ if (!ic->sb->version || ic->sb->version > SB_VERSION_4) {
34104125 r = -EINVAL;
34114126 ti->error = "Unknown version";
34124127 goto bad;
....@@ -3441,16 +4156,16 @@
34414156 goto bad;
34424157 }
34434158 }
3444
- ic->provided_data_sectors = le64_to_cpu(ic->sb->provided_data_sectors);
3445
- if (ic->provided_data_sectors != le64_to_cpu(ic->sb->provided_data_sectors)) {
3446
- /* test for overflow */
3447
- r = -EINVAL;
3448
- ti->error = "The superblock has 64-bit device size, but the kernel was compiled with 32-bit sectors";
3449
- goto bad;
3450
- }
34514159 if (!!(ic->sb->flags & cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC)) != !!ic->journal_mac_alg.alg_string) {
34524160 r = -EINVAL;
34534161 ti->error = "Journal mac mismatch";
4162
+ goto bad;
4163
+ }
4164
+
4165
+ get_provided_data_sectors(ic);
4166
+ if (!ic->provided_data_sectors) {
4167
+ r = -EINVAL;
4168
+ ti->error = "The device is too small";
34544169 goto bad;
34554170 }
34564171
....@@ -3466,6 +4181,27 @@
34664181 ti->error = "The device is too small";
34674182 goto bad;
34684183 }
4184
+
4185
+ if (log2_sectors_per_bitmap_bit < 0)
4186
+ log2_sectors_per_bitmap_bit = __fls(DEFAULT_SECTORS_PER_BITMAP_BIT);
4187
+ if (log2_sectors_per_bitmap_bit < ic->sb->log2_sectors_per_block)
4188
+ log2_sectors_per_bitmap_bit = ic->sb->log2_sectors_per_block;
4189
+
4190
+ bits_in_journal = ((__u64)ic->journal_section_sectors * ic->journal_sections) << (SECTOR_SHIFT + 3);
4191
+ if (bits_in_journal > UINT_MAX)
4192
+ bits_in_journal = UINT_MAX;
4193
+ while (bits_in_journal < (ic->provided_data_sectors + ((sector_t)1 << log2_sectors_per_bitmap_bit) - 1) >> log2_sectors_per_bitmap_bit)
4194
+ log2_sectors_per_bitmap_bit++;
4195
+
4196
+ log2_blocks_per_bitmap_bit = log2_sectors_per_bitmap_bit - ic->sb->log2_sectors_per_block;
4197
+ ic->log2_blocks_per_bitmap_bit = log2_blocks_per_bitmap_bit;
4198
+ if (should_write_sb) {
4199
+ ic->sb->log2_blocks_per_bitmap_bit = log2_blocks_per_bitmap_bit;
4200
+ }
4201
+ n_bitmap_bits = ((ic->provided_data_sectors >> ic->sb->log2_sectors_per_block)
4202
+ + (((sector_t)1 << log2_blocks_per_bitmap_bit) - 1)) >> log2_blocks_per_bitmap_bit;
4203
+ ic->n_bitmap_blocks = DIV_ROUND_UP(n_bitmap_bits, BITMAP_BLOCK_SIZE * 8);
4204
+
34694205 if (!ic->meta_dev)
34704206 ic->log2_buffer_sectors = min(ic->log2_buffer_sectors, (__u8)__ffs(ic->metadata_run));
34714207
....@@ -3490,26 +4226,22 @@
34904226 DEBUG_print(" journal_sections %u\n", (unsigned)le32_to_cpu(ic->sb->journal_sections));
34914227 DEBUG_print(" journal_entries %u\n", ic->journal_entries);
34924228 DEBUG_print(" log2_interleave_sectors %d\n", ic->sb->log2_interleave_sectors);
3493
- DEBUG_print(" data_device_sectors 0x%llx\n", (unsigned long long)ic->data_device_sectors);
4229
+ DEBUG_print(" data_device_sectors 0x%llx\n", i_size_read(ic->dev->bdev->bd_inode) >> SECTOR_SHIFT);
34944230 DEBUG_print(" initial_sectors 0x%x\n", ic->initial_sectors);
34954231 DEBUG_print(" metadata_run 0x%x\n", ic->metadata_run);
34964232 DEBUG_print(" log2_metadata_run %d\n", ic->log2_metadata_run);
3497
- DEBUG_print(" provided_data_sectors 0x%llx (%llu)\n", (unsigned long long)ic->provided_data_sectors,
3498
- (unsigned long long)ic->provided_data_sectors);
4233
+ DEBUG_print(" provided_data_sectors 0x%llx (%llu)\n", ic->provided_data_sectors, ic->provided_data_sectors);
34994234 DEBUG_print(" log2_buffer_sectors %u\n", ic->log2_buffer_sectors);
4235
+ DEBUG_print(" bits_in_journal %llu\n", bits_in_journal);
35004236
3501
- if (recalculate && !(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))) {
4237
+ if (ic->recalculate_flag && !(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))) {
35024238 ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING);
35034239 ic->sb->recalc_sector = cpu_to_le64(0);
35044240 }
35054241
3506
- if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
3507
- if (!ic->internal_hash) {
3508
- r = -EINVAL;
3509
- ti->error = "Recalculate is only valid with internal hash";
3510
- goto bad;
3511
- }
3512
- ic->recalc_wq = alloc_workqueue("dm-intergrity-recalc", WQ_MEM_RECLAIM, 1);
4242
+ if (ic->internal_hash) {
4243
+ size_t recalc_tags_size;
4244
+ ic->recalc_wq = alloc_workqueue("dm-integrity-recalc", WQ_MEM_RECLAIM, 1);
35134245 if (!ic->recalc_wq ) {
35144246 ti->error = "Cannot allocate workqueue";
35154247 r = -ENOMEM;
....@@ -3522,8 +4254,10 @@
35224254 r = -ENOMEM;
35234255 goto bad;
35244256 }
3525
- ic->recalc_tags = kvmalloc_array(RECALC_SECTORS >> ic->sb->log2_sectors_per_block,
3526
- ic->tag_size, GFP_KERNEL);
4257
+ recalc_tags_size = (RECALC_SECTORS >> ic->sb->log2_sectors_per_block) * ic->tag_size;
4258
+ if (crypto_shash_digestsize(ic->internal_hash) > ic->tag_size)
4259
+ recalc_tags_size += crypto_shash_digestsize(ic->internal_hash) - ic->tag_size;
4260
+ ic->recalc_tags = kvmalloc(recalc_tags_size, GFP_KERNEL);
35274261 if (!ic->recalc_tags) {
35284262 ti->error = "Cannot allocate tags for recalculating";
35294263 r = -ENOMEM;
....@@ -3559,11 +4293,48 @@
35594293 r = create_journal(ic, &ti->error);
35604294 if (r)
35614295 goto bad;
4296
+
4297
+ }
4298
+
4299
+ if (ic->mode == 'B') {
4300
+ unsigned i;
4301
+ unsigned n_bitmap_pages = DIV_ROUND_UP(ic->n_bitmap_blocks, PAGE_SIZE / BITMAP_BLOCK_SIZE);
4302
+
4303
+ ic->recalc_bitmap = dm_integrity_alloc_page_list(n_bitmap_pages);
4304
+ if (!ic->recalc_bitmap) {
4305
+ r = -ENOMEM;
4306
+ goto bad;
4307
+ }
4308
+ ic->may_write_bitmap = dm_integrity_alloc_page_list(n_bitmap_pages);
4309
+ if (!ic->may_write_bitmap) {
4310
+ r = -ENOMEM;
4311
+ goto bad;
4312
+ }
4313
+ ic->bbs = kvmalloc_array(ic->n_bitmap_blocks, sizeof(struct bitmap_block_status), GFP_KERNEL);
4314
+ if (!ic->bbs) {
4315
+ r = -ENOMEM;
4316
+ goto bad;
4317
+ }
4318
+ INIT_DELAYED_WORK(&ic->bitmap_flush_work, bitmap_flush_work);
4319
+ for (i = 0; i < ic->n_bitmap_blocks; i++) {
4320
+ struct bitmap_block_status *bbs = &ic->bbs[i];
4321
+ unsigned sector, pl_index, pl_offset;
4322
+
4323
+ INIT_WORK(&bbs->work, bitmap_block_work);
4324
+ bbs->ic = ic;
4325
+ bbs->idx = i;
4326
+ bio_list_init(&bbs->bio_queue);
4327
+ spin_lock_init(&bbs->bio_queue_lock);
4328
+
4329
+ sector = i * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT);
4330
+ pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT);
4331
+ pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1);
4332
+
4333
+ bbs->bitmap = lowmem_page_address(ic->journal[pl_index].page) + pl_offset;
4334
+ }
35624335 }
35634336
35644337 if (should_write_sb) {
3565
- int r;
3566
-
35674338 init_journal(ic, 0, ic->journal_sections, 0);
35684339 r = dm_integrity_failed(ic);
35694340 if (unlikely(r)) {
....@@ -3583,14 +4354,28 @@
35834354 if (r)
35844355 goto bad;
35854356 }
4357
+ if (ic->mode == 'B') {
4358
+ unsigned max_io_len = ((sector_t)ic->sectors_per_block << ic->log2_blocks_per_bitmap_bit) * (BITMAP_BLOCK_SIZE * 8);
4359
+ if (!max_io_len)
4360
+ max_io_len = 1U << 31;
4361
+ DEBUG_print("max_io_len: old %u, new %u\n", ti->max_io_len, max_io_len);
4362
+ if (!ti->max_io_len || ti->max_io_len > max_io_len) {
4363
+ r = dm_set_target_max_io_len(ti, max_io_len);
4364
+ if (r)
4365
+ goto bad;
4366
+ }
4367
+ }
35864368
35874369 if (!ic->internal_hash)
35884370 dm_integrity_set(ti, ic);
35894371
35904372 ti->num_flush_bios = 1;
35914373 ti->flush_supported = true;
4374
+ if (ic->discard)
4375
+ ti->num_discard_bios = 1;
35924376
35934377 return 0;
4378
+
35944379 bad:
35954380 dm_integrity_dtr(ti);
35964381 return r;
....@@ -3603,6 +4388,8 @@
36034388 BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
36044389 BUG_ON(!list_empty(&ic->wait_list));
36054390
4391
+ if (ic->mode == 'B')
4392
+ cancel_delayed_work_sync(&ic->bitmap_flush_work);
36064393 if (ic->metadata_wq)
36074394 destroy_workqueue(ic->metadata_wq);
36084395 if (ic->wait_wq)
....@@ -3615,10 +4402,9 @@
36154402 destroy_workqueue(ic->writer_wq);
36164403 if (ic->recalc_wq)
36174404 destroy_workqueue(ic->recalc_wq);
3618
- if (ic->recalc_buffer)
3619
- vfree(ic->recalc_buffer);
3620
- if (ic->recalc_tags)
3621
- kvfree(ic->recalc_tags);
4405
+ vfree(ic->recalc_buffer);
4406
+ kvfree(ic->recalc_tags);
4407
+ kvfree(ic->bbs);
36224408 if (ic->bufio)
36234409 dm_bufio_client_destroy(ic->bufio);
36244410 mempool_exit(&ic->journal_io_mempool);
....@@ -3628,9 +4414,11 @@
36284414 dm_put_device(ti, ic->dev);
36294415 if (ic->meta_dev)
36304416 dm_put_device(ti, ic->meta_dev);
3631
- dm_integrity_free_page_list(ic, ic->journal);
3632
- dm_integrity_free_page_list(ic, ic->journal_io);
3633
- dm_integrity_free_page_list(ic, ic->journal_xor);
4417
+ dm_integrity_free_page_list(ic->journal);
4418
+ dm_integrity_free_page_list(ic->journal_io);
4419
+ dm_integrity_free_page_list(ic->journal_xor);
4420
+ dm_integrity_free_page_list(ic->recalc_bitmap);
4421
+ dm_integrity_free_page_list(ic->may_write_bitmap);
36344422 if (ic->journal_scatterlist)
36354423 dm_integrity_free_journal_scatterlist(ic, ic->journal_scatterlist);
36364424 if (ic->journal_io_scatterlist)
....@@ -3641,7 +4429,7 @@
36414429 for (i = 0; i < ic->journal_sections; i++) {
36424430 struct skcipher_request *req = ic->sk_requests[i];
36434431 if (req) {
3644
- kzfree(req->iv);
4432
+ kfree_sensitive(req->iv);
36454433 skcipher_request_free(req);
36464434 }
36474435 }
....@@ -3668,7 +4456,7 @@
36684456
36694457 static struct target_type integrity_target = {
36704458 .name = "integrity",
3671
- .version = {1, 2, 0},
4459
+ .version = {1, 6, 0},
36724460 .module = THIS_MODULE,
36734461 .features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY,
36744462 .ctr = dm_integrity_ctr,
....@@ -3681,7 +4469,7 @@
36814469 .io_hints = dm_integrity_io_hints,
36824470 };
36834471
3684
-int __init dm_integrity_init(void)
4472
+static int __init dm_integrity_init(void)
36854473 {
36864474 int r;
36874475
....@@ -3693,14 +4481,16 @@
36934481 }
36944482
36954483 r = dm_register_target(&integrity_target);
3696
-
3697
- if (r < 0)
4484
+ if (r < 0) {
36984485 DMERR("register failed %d", r);
4486
+ kmem_cache_destroy(journal_io_cache);
4487
+ return r;
4488
+ }
36994489
3700
- return r;
4490
+ return 0;
37014491 }
37024492
3703
-void dm_integrity_exit(void)
4493
+static void __exit dm_integrity_exit(void)
37044494 {
37054495 dm_unregister_target(&integrity_target);
37064496 kmem_cache_destroy(journal_io_cache);