.. | .. |
---|
17 | 17 | #include <linux/rbtree.h> |
---|
18 | 18 | #include <linux/delay.h> |
---|
19 | 19 | #include <linux/random.h> |
---|
| 20 | +#include <linux/reboot.h> |
---|
20 | 21 | #include <crypto/hash.h> |
---|
21 | 22 | #include <crypto/skcipher.h> |
---|
22 | 23 | #include <linux/async_tx.h> |
---|
.. | .. |
---|
26 | 27 | |
---|
27 | 28 | #define DEFAULT_INTERLEAVE_SECTORS 32768 |
---|
28 | 29 | #define DEFAULT_JOURNAL_SIZE_FACTOR 7 |
---|
| 30 | +#define DEFAULT_SECTORS_PER_BITMAP_BIT 32768 |
---|
29 | 31 | #define DEFAULT_BUFFER_SECTORS 128 |
---|
30 | 32 | #define DEFAULT_JOURNAL_WATERMARK 50 |
---|
31 | 33 | #define DEFAULT_SYNC_MSEC 10000 |
---|
32 | | -#define DEFAULT_MAX_JOURNAL_SECTORS 131072 |
---|
| 34 | +#define DEFAULT_MAX_JOURNAL_SECTORS (IS_ENABLED(CONFIG_64BIT) ? 131072 : 8192) |
---|
33 | 35 | #define MIN_LOG2_INTERLEAVE_SECTORS 3 |
---|
34 | 36 | #define MAX_LOG2_INTERLEAVE_SECTORS 31 |
---|
35 | 37 | #define METADATA_WORKQUEUE_MAX_ACTIVE 16 |
---|
36 | | -#define RECALC_SECTORS 8192 |
---|
| 38 | +#define RECALC_SECTORS (IS_ENABLED(CONFIG_64BIT) ? 32768 : 2048) |
---|
37 | 39 | #define RECALC_WRITE_SUPER 16 |
---|
| 40 | +#define BITMAP_BLOCK_SIZE 4096 /* don't change it */ |
---|
| 41 | +#define BITMAP_FLUSH_INTERVAL (10 * HZ) |
---|
| 42 | +#define DISCARD_FILLER 0xf6 |
---|
38 | 43 | |
---|
39 | 44 | /* |
---|
40 | 45 | * Warning - DEBUG_PRINT prints security-sensitive data to the log, |
---|
.. | .. |
---|
50 | 55 | #define SB_MAGIC "integrt" |
---|
51 | 56 | #define SB_VERSION_1 1 |
---|
52 | 57 | #define SB_VERSION_2 2 |
---|
| 58 | +#define SB_VERSION_3 3 |
---|
| 59 | +#define SB_VERSION_4 4 |
---|
53 | 60 | #define SB_SECTORS 8 |
---|
54 | 61 | #define MAX_SECTORS_PER_BLOCK 8 |
---|
55 | 62 | |
---|
.. | .. |
---|
62 | 69 | __u64 provided_data_sectors; /* userspace uses this value */ |
---|
63 | 70 | __u32 flags; |
---|
64 | 71 | __u8 log2_sectors_per_block; |
---|
65 | | - __u8 pad[3]; |
---|
| 72 | + __u8 log2_blocks_per_bitmap_bit; |
---|
| 73 | + __u8 pad[2]; |
---|
66 | 74 | __u64 recalc_sector; |
---|
67 | 75 | }; |
---|
68 | 76 | |
---|
69 | 77 | #define SB_FLAG_HAVE_JOURNAL_MAC 0x1 |
---|
70 | 78 | #define SB_FLAG_RECALCULATING 0x2 |
---|
| 79 | +#define SB_FLAG_DIRTY_BITMAP 0x4 |
---|
| 80 | +#define SB_FLAG_FIXED_PADDING 0x8 |
---|
71 | 81 | |
---|
72 | 82 | #define JOURNAL_ENTRY_ROUNDUP 8 |
---|
73 | 83 | |
---|
.. | .. |
---|
82 | 92 | } s; |
---|
83 | 93 | __u64 sector; |
---|
84 | 94 | } u; |
---|
85 | | - commit_id_t last_bytes[0]; |
---|
| 95 | + commit_id_t last_bytes[]; |
---|
86 | 96 | /* __u8 tag[0]; */ |
---|
87 | 97 | }; |
---|
88 | 98 | |
---|
.. | .. |
---|
90 | 100 | |
---|
91 | 101 | #if BITS_PER_LONG == 64 |
---|
92 | 102 | #define journal_entry_set_sector(je, x) do { smp_wmb(); WRITE_ONCE((je)->u.sector, cpu_to_le64(x)); } while (0) |
---|
93 | | -#define journal_entry_get_sector(je) le64_to_cpu((je)->u.sector) |
---|
94 | | -#elif defined(CONFIG_LBDAF) |
---|
95 | | -#define journal_entry_set_sector(je, x) do { (je)->u.s.sector_lo = cpu_to_le32(x); smp_wmb(); WRITE_ONCE((je)->u.s.sector_hi, cpu_to_le32((x) >> 32)); } while (0) |
---|
96 | | -#define journal_entry_get_sector(je) le64_to_cpu((je)->u.sector) |
---|
97 | 103 | #else |
---|
98 | | -#define journal_entry_set_sector(je, x) do { (je)->u.s.sector_lo = cpu_to_le32(x); smp_wmb(); WRITE_ONCE((je)->u.s.sector_hi, cpu_to_le32(0)); } while (0) |
---|
99 | | -#define journal_entry_get_sector(je) le32_to_cpu((je)->u.s.sector_lo) |
---|
| 104 | +#define journal_entry_set_sector(je, x) do { (je)->u.s.sector_lo = cpu_to_le32(x); smp_wmb(); WRITE_ONCE((je)->u.s.sector_hi, cpu_to_le32((x) >> 32)); } while (0) |
---|
100 | 105 | #endif |
---|
| 106 | +#define journal_entry_get_sector(je) le64_to_cpu((je)->u.sector) |
---|
101 | 107 | #define journal_entry_is_unused(je) ((je)->u.s.sector_hi == cpu_to_le32(-1)) |
---|
102 | 108 | #define journal_entry_set_unused(je) do { ((je)->u.s.sector_hi = cpu_to_le32(-1)); } while (0) |
---|
103 | 109 | #define journal_entry_is_inprogress(je) ((je)->u.s.sector_hi == cpu_to_le32(-2)) |
---|
.. | .. |
---|
157 | 163 | struct workqueue_struct *metadata_wq; |
---|
158 | 164 | struct superblock *sb; |
---|
159 | 165 | unsigned journal_pages; |
---|
| 166 | + unsigned n_bitmap_blocks; |
---|
| 167 | + |
---|
160 | 168 | struct page_list *journal; |
---|
161 | 169 | struct page_list *journal_io; |
---|
162 | 170 | struct page_list *journal_xor; |
---|
| 171 | + struct page_list *recalc_bitmap; |
---|
| 172 | + struct page_list *may_write_bitmap; |
---|
| 173 | + struct bitmap_block_status *bbs; |
---|
| 174 | + unsigned bitmap_flush_interval; |
---|
| 175 | + int synchronous_mode; |
---|
| 176 | + struct bio_list synchronous_bios; |
---|
| 177 | + struct delayed_work bitmap_flush_work; |
---|
163 | 178 | |
---|
164 | 179 | struct crypto_skcipher *journal_crypt; |
---|
165 | 180 | struct scatterlist **journal_scatterlist; |
---|
.. | .. |
---|
186 | 201 | __s8 log2_metadata_run; |
---|
187 | 202 | __u8 log2_buffer_sectors; |
---|
188 | 203 | __u8 sectors_per_block; |
---|
| 204 | + __u8 log2_blocks_per_bitmap_bit; |
---|
189 | 205 | |
---|
190 | 206 | unsigned char mode; |
---|
191 | 207 | |
---|
.. | .. |
---|
238 | 254 | |
---|
239 | 255 | struct completion crypto_backoff; |
---|
240 | 256 | |
---|
| 257 | + bool wrote_to_journal; |
---|
241 | 258 | bool journal_uptodate; |
---|
242 | 259 | bool just_formatted; |
---|
| 260 | + bool recalculate_flag; |
---|
| 261 | + bool discard; |
---|
| 262 | + bool fix_padding; |
---|
243 | 263 | bool legacy_recalculate; |
---|
244 | 264 | |
---|
245 | 265 | struct alg_spec internal_hash_alg; |
---|
.. | .. |
---|
247 | 267 | struct alg_spec journal_mac_alg; |
---|
248 | 268 | |
---|
249 | 269 | atomic64_t number_of_mismatches; |
---|
| 270 | + |
---|
| 271 | + struct notifier_block reboot_notifier; |
---|
250 | 272 | }; |
---|
251 | 273 | |
---|
252 | 274 | struct dm_integrity_range { |
---|
253 | 275 | sector_t logical_sector; |
---|
254 | | - unsigned n_sectors; |
---|
| 276 | + sector_t n_sectors; |
---|
255 | 277 | bool waiting; |
---|
256 | 278 | union { |
---|
257 | 279 | struct rb_node node; |
---|
.. | .. |
---|
266 | 288 | struct work_struct work; |
---|
267 | 289 | |
---|
268 | 290 | struct dm_integrity_c *ic; |
---|
269 | | - bool write; |
---|
| 291 | + enum req_opf op; |
---|
270 | 292 | bool fua; |
---|
271 | 293 | |
---|
272 | 294 | struct dm_integrity_range range; |
---|
.. | .. |
---|
291 | 313 | struct journal_io { |
---|
292 | 314 | struct dm_integrity_range range; |
---|
293 | 315 | struct journal_completion *comp; |
---|
| 316 | +}; |
---|
| 317 | + |
---|
| 318 | +struct bitmap_block_status { |
---|
| 319 | + struct work_struct work; |
---|
| 320 | + struct dm_integrity_c *ic; |
---|
| 321 | + unsigned idx; |
---|
| 322 | + unsigned long *bitmap; |
---|
| 323 | + struct bio_list bio_queue; |
---|
| 324 | + spinlock_t bio_queue_lock; |
---|
| 325 | + |
---|
294 | 326 | }; |
---|
295 | 327 | |
---|
296 | 328 | static struct kmem_cache *journal_io_cache; |
---|
.. | .. |
---|
320 | 352 | #define DEBUG_bytes(bytes, len, msg, ...) do { } while (0) |
---|
321 | 353 | #endif |
---|
322 | 354 | |
---|
| 355 | +static void dm_integrity_prepare(struct request *rq) |
---|
| 356 | +{ |
---|
| 357 | +} |
---|
| 358 | + |
---|
| 359 | +static void dm_integrity_complete(struct request *rq, unsigned int nr_bytes) |
---|
| 360 | +{ |
---|
| 361 | +} |
---|
| 362 | + |
---|
323 | 363 | /* |
---|
324 | 364 | * DM Integrity profile, protection is performed layer above (dm-crypt) |
---|
325 | 365 | */ |
---|
.. | .. |
---|
327 | 367 | .name = "DM-DIF-EXT-TAG", |
---|
328 | 368 | .generate_fn = NULL, |
---|
329 | 369 | .verify_fn = NULL, |
---|
| 370 | + .prepare_fn = dm_integrity_prepare, |
---|
| 371 | + .complete_fn = dm_integrity_complete, |
---|
330 | 372 | }; |
---|
331 | 373 | |
---|
332 | 374 | static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map); |
---|
.. | .. |
---|
436 | 478 | |
---|
437 | 479 | static void sb_set_version(struct dm_integrity_c *ic) |
---|
438 | 480 | { |
---|
439 | | - if (ic->meta_dev || ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) |
---|
| 481 | + if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) |
---|
| 482 | + ic->sb->version = SB_VERSION_4; |
---|
| 483 | + else if (ic->mode == 'B' || ic->sb->flags & cpu_to_le32(SB_FLAG_DIRTY_BITMAP)) |
---|
| 484 | + ic->sb->version = SB_VERSION_3; |
---|
| 485 | + else if (ic->meta_dev || ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) |
---|
440 | 486 | ic->sb->version = SB_VERSION_2; |
---|
441 | 487 | else |
---|
442 | 488 | ic->sb->version = SB_VERSION_1; |
---|
.. | .. |
---|
457 | 503 | io_loc.sector = ic->start; |
---|
458 | 504 | io_loc.count = SB_SECTORS; |
---|
459 | 505 | |
---|
| 506 | + if (op == REQ_OP_WRITE) |
---|
| 507 | + sb_set_version(ic); |
---|
| 508 | + |
---|
460 | 509 | return dm_io(&io_req, 1, &io_loc, NULL); |
---|
| 510 | +} |
---|
| 511 | + |
---|
| 512 | +#define BITMAP_OP_TEST_ALL_SET 0 |
---|
| 513 | +#define BITMAP_OP_TEST_ALL_CLEAR 1 |
---|
| 514 | +#define BITMAP_OP_SET 2 |
---|
| 515 | +#define BITMAP_OP_CLEAR 3 |
---|
| 516 | + |
---|
| 517 | +static bool block_bitmap_op(struct dm_integrity_c *ic, struct page_list *bitmap, |
---|
| 518 | + sector_t sector, sector_t n_sectors, int mode) |
---|
| 519 | +{ |
---|
| 520 | + unsigned long bit, end_bit, this_end_bit, page, end_page; |
---|
| 521 | + unsigned long *data; |
---|
| 522 | + |
---|
| 523 | + if (unlikely(((sector | n_sectors) & ((1 << ic->sb->log2_sectors_per_block) - 1)) != 0)) { |
---|
| 524 | + DMCRIT("invalid bitmap access (%llx,%llx,%d,%d,%d)", |
---|
| 525 | + sector, |
---|
| 526 | + n_sectors, |
---|
| 527 | + ic->sb->log2_sectors_per_block, |
---|
| 528 | + ic->log2_blocks_per_bitmap_bit, |
---|
| 529 | + mode); |
---|
| 530 | + BUG(); |
---|
| 531 | + } |
---|
| 532 | + |
---|
| 533 | + if (unlikely(!n_sectors)) |
---|
| 534 | + return true; |
---|
| 535 | + |
---|
| 536 | + bit = sector >> (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit); |
---|
| 537 | + end_bit = (sector + n_sectors - 1) >> |
---|
| 538 | + (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit); |
---|
| 539 | + |
---|
| 540 | + page = bit / (PAGE_SIZE * 8); |
---|
| 541 | + bit %= PAGE_SIZE * 8; |
---|
| 542 | + |
---|
| 543 | + end_page = end_bit / (PAGE_SIZE * 8); |
---|
| 544 | + end_bit %= PAGE_SIZE * 8; |
---|
| 545 | + |
---|
| 546 | +repeat: |
---|
| 547 | + if (page < end_page) { |
---|
| 548 | + this_end_bit = PAGE_SIZE * 8 - 1; |
---|
| 549 | + } else { |
---|
| 550 | + this_end_bit = end_bit; |
---|
| 551 | + } |
---|
| 552 | + |
---|
| 553 | + data = lowmem_page_address(bitmap[page].page); |
---|
| 554 | + |
---|
| 555 | + if (mode == BITMAP_OP_TEST_ALL_SET) { |
---|
| 556 | + while (bit <= this_end_bit) { |
---|
| 557 | + if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) { |
---|
| 558 | + do { |
---|
| 559 | + if (data[bit / BITS_PER_LONG] != -1) |
---|
| 560 | + return false; |
---|
| 561 | + bit += BITS_PER_LONG; |
---|
| 562 | + } while (this_end_bit >= bit + BITS_PER_LONG - 1); |
---|
| 563 | + continue; |
---|
| 564 | + } |
---|
| 565 | + if (!test_bit(bit, data)) |
---|
| 566 | + return false; |
---|
| 567 | + bit++; |
---|
| 568 | + } |
---|
| 569 | + } else if (mode == BITMAP_OP_TEST_ALL_CLEAR) { |
---|
| 570 | + while (bit <= this_end_bit) { |
---|
| 571 | + if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) { |
---|
| 572 | + do { |
---|
| 573 | + if (data[bit / BITS_PER_LONG] != 0) |
---|
| 574 | + return false; |
---|
| 575 | + bit += BITS_PER_LONG; |
---|
| 576 | + } while (this_end_bit >= bit + BITS_PER_LONG - 1); |
---|
| 577 | + continue; |
---|
| 578 | + } |
---|
| 579 | + if (test_bit(bit, data)) |
---|
| 580 | + return false; |
---|
| 581 | + bit++; |
---|
| 582 | + } |
---|
| 583 | + } else if (mode == BITMAP_OP_SET) { |
---|
| 584 | + while (bit <= this_end_bit) { |
---|
| 585 | + if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) { |
---|
| 586 | + do { |
---|
| 587 | + data[bit / BITS_PER_LONG] = -1; |
---|
| 588 | + bit += BITS_PER_LONG; |
---|
| 589 | + } while (this_end_bit >= bit + BITS_PER_LONG - 1); |
---|
| 590 | + continue; |
---|
| 591 | + } |
---|
| 592 | + __set_bit(bit, data); |
---|
| 593 | + bit++; |
---|
| 594 | + } |
---|
| 595 | + } else if (mode == BITMAP_OP_CLEAR) { |
---|
| 596 | + if (!bit && this_end_bit == PAGE_SIZE * 8 - 1) |
---|
| 597 | + clear_page(data); |
---|
| 598 | + else while (bit <= this_end_bit) { |
---|
| 599 | + if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) { |
---|
| 600 | + do { |
---|
| 601 | + data[bit / BITS_PER_LONG] = 0; |
---|
| 602 | + bit += BITS_PER_LONG; |
---|
| 603 | + } while (this_end_bit >= bit + BITS_PER_LONG - 1); |
---|
| 604 | + continue; |
---|
| 605 | + } |
---|
| 606 | + __clear_bit(bit, data); |
---|
| 607 | + bit++; |
---|
| 608 | + } |
---|
| 609 | + } else { |
---|
| 610 | + BUG(); |
---|
| 611 | + } |
---|
| 612 | + |
---|
| 613 | + if (unlikely(page < end_page)) { |
---|
| 614 | + bit = 0; |
---|
| 615 | + page++; |
---|
| 616 | + goto repeat; |
---|
| 617 | + } |
---|
| 618 | + |
---|
| 619 | + return true; |
---|
| 620 | +} |
---|
| 621 | + |
---|
| 622 | +static void block_bitmap_copy(struct dm_integrity_c *ic, struct page_list *dst, struct page_list *src) |
---|
| 623 | +{ |
---|
| 624 | + unsigned n_bitmap_pages = DIV_ROUND_UP(ic->n_bitmap_blocks, PAGE_SIZE / BITMAP_BLOCK_SIZE); |
---|
| 625 | + unsigned i; |
---|
| 626 | + |
---|
| 627 | + for (i = 0; i < n_bitmap_pages; i++) { |
---|
| 628 | + unsigned long *dst_data = lowmem_page_address(dst[i].page); |
---|
| 629 | + unsigned long *src_data = lowmem_page_address(src[i].page); |
---|
| 630 | + copy_page(dst_data, src_data); |
---|
| 631 | + } |
---|
| 632 | +} |
---|
| 633 | + |
---|
| 634 | +static struct bitmap_block_status *sector_to_bitmap_block(struct dm_integrity_c *ic, sector_t sector) |
---|
| 635 | +{ |
---|
| 636 | + unsigned bit = sector >> (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit); |
---|
| 637 | + unsigned bitmap_block = bit / (BITMAP_BLOCK_SIZE * 8); |
---|
| 638 | + |
---|
| 639 | + BUG_ON(bitmap_block >= ic->n_bitmap_blocks); |
---|
| 640 | + return &ic->bbs[bitmap_block]; |
---|
461 | 641 | } |
---|
462 | 642 | |
---|
463 | 643 | static void access_journal_check(struct dm_integrity_c *ic, unsigned section, unsigned offset, |
---|
.. | .. |
---|
468 | 648 | |
---|
469 | 649 | if (unlikely(section >= ic->journal_sections) || |
---|
470 | 650 | unlikely(offset >= limit)) { |
---|
471 | | - printk(KERN_CRIT "%s: invalid access at (%u,%u), limit (%u,%u)\n", |
---|
472 | | - function, section, offset, ic->journal_sections, limit); |
---|
| 651 | + DMCRIT("%s: invalid access at (%u,%u), limit (%u,%u)", |
---|
| 652 | + function, section, offset, ic->journal_sections, limit); |
---|
473 | 653 | BUG(); |
---|
474 | 654 | } |
---|
475 | 655 | #endif |
---|
.. | .. |
---|
541 | 721 | unsigned j, size; |
---|
542 | 722 | |
---|
543 | 723 | desc->tfm = ic->journal_mac; |
---|
544 | | - desc->flags = 0; |
---|
545 | 724 | |
---|
546 | 725 | r = crypto_shash_init(desc); |
---|
547 | 726 | if (unlikely(r)) { |
---|
.. | .. |
---|
568 | 747 | } |
---|
569 | 748 | memset(result + size, 0, JOURNAL_MAC_SIZE - size); |
---|
570 | 749 | } else { |
---|
571 | | - __u8 digest[size]; |
---|
| 750 | + __u8 digest[HASH_MAX_DIGESTSIZE]; |
---|
| 751 | + |
---|
| 752 | + if (WARN_ON(size > sizeof(digest))) { |
---|
| 753 | + dm_integrity_io_error(ic, "digest_size", -EINVAL); |
---|
| 754 | + goto err; |
---|
| 755 | + } |
---|
572 | 756 | r = crypto_shash_final(desc, digest); |
---|
573 | 757 | if (unlikely(r)) { |
---|
574 | 758 | dm_integrity_io_error(ic, "crypto_shash_final", r); |
---|
.. | .. |
---|
765 | 949 | complete_journal_op(comp); |
---|
766 | 950 | } |
---|
767 | 951 | |
---|
768 | | -static void rw_journal(struct dm_integrity_c *ic, int op, int op_flags, unsigned section, |
---|
769 | | - unsigned n_sections, struct journal_completion *comp) |
---|
| 952 | +static void rw_journal_sectors(struct dm_integrity_c *ic, int op, int op_flags, |
---|
| 953 | + unsigned sector, unsigned n_sectors, struct journal_completion *comp) |
---|
770 | 954 | { |
---|
771 | 955 | struct dm_io_request io_req; |
---|
772 | 956 | struct dm_io_region io_loc; |
---|
773 | | - unsigned sector, n_sectors, pl_index, pl_offset; |
---|
| 957 | + unsigned pl_index, pl_offset; |
---|
774 | 958 | int r; |
---|
775 | 959 | |
---|
776 | 960 | if (unlikely(dm_integrity_failed(ic))) { |
---|
.. | .. |
---|
778 | 962 | complete_journal_io(-1UL, comp); |
---|
779 | 963 | return; |
---|
780 | 964 | } |
---|
781 | | - |
---|
782 | | - sector = section * ic->journal_section_sectors; |
---|
783 | | - n_sectors = n_sections * ic->journal_section_sectors; |
---|
784 | 965 | |
---|
785 | 966 | pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT); |
---|
786 | 967 | pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1); |
---|
.. | .. |
---|
812 | 993 | complete_journal_io(-1UL, comp); |
---|
813 | 994 | } |
---|
814 | 995 | } |
---|
| 996 | +} |
---|
| 997 | + |
---|
| 998 | +static void rw_journal(struct dm_integrity_c *ic, int op, int op_flags, unsigned section, |
---|
| 999 | + unsigned n_sections, struct journal_completion *comp) |
---|
| 1000 | +{ |
---|
| 1001 | + unsigned sector, n_sectors; |
---|
| 1002 | + |
---|
| 1003 | + sector = section * ic->journal_section_sectors; |
---|
| 1004 | + n_sectors = n_sections * ic->journal_section_sectors; |
---|
| 1005 | + |
---|
| 1006 | + rw_journal_sectors(ic, op, op_flags, sector, n_sectors, comp); |
---|
815 | 1007 | } |
---|
816 | 1008 | |
---|
817 | 1009 | static void write_journal(struct dm_integrity_c *ic, unsigned commit_start, unsigned commit_sections) |
---|
.. | .. |
---|
997 | 1189 | } while (unlikely(new_range->waiting)); |
---|
998 | 1190 | } |
---|
999 | 1191 | |
---|
| 1192 | +static void add_new_range_and_wait(struct dm_integrity_c *ic, struct dm_integrity_range *new_range) |
---|
| 1193 | +{ |
---|
| 1194 | + if (unlikely(!add_new_range(ic, new_range, true))) |
---|
| 1195 | + wait_and_add_new_range(ic, new_range); |
---|
| 1196 | +} |
---|
| 1197 | + |
---|
1000 | 1198 | static void init_journal_node(struct journal_node *node) |
---|
1001 | 1199 | { |
---|
1002 | 1200 | RB_CLEAR_NODE(&node->node); |
---|
.. | .. |
---|
1113 | 1311 | static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, sector_t *metadata_block, |
---|
1114 | 1312 | unsigned *metadata_offset, unsigned total_size, int op) |
---|
1115 | 1313 | { |
---|
| 1314 | +#define MAY_BE_FILLER 1 |
---|
| 1315 | +#define MAY_BE_HASH 2 |
---|
| 1316 | + unsigned hash_offset = 0; |
---|
| 1317 | + unsigned may_be = MAY_BE_HASH | (ic->discard ? MAY_BE_FILLER : 0); |
---|
| 1318 | + |
---|
1116 | 1319 | do { |
---|
1117 | 1320 | unsigned char *data, *dp; |
---|
1118 | 1321 | struct dm_buffer *b; |
---|
.. | .. |
---|
1124 | 1327 | return r; |
---|
1125 | 1328 | |
---|
1126 | 1329 | data = dm_bufio_read(ic->bufio, *metadata_block, &b); |
---|
1127 | | - if (unlikely(IS_ERR(data))) |
---|
| 1330 | + if (IS_ERR(data)) |
---|
1128 | 1331 | return PTR_ERR(data); |
---|
1129 | 1332 | |
---|
1130 | 1333 | to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - *metadata_offset, total_size); |
---|
.. | .. |
---|
1134 | 1337 | } else if (op == TAG_WRITE) { |
---|
1135 | 1338 | memcpy(dp, tag, to_copy); |
---|
1136 | 1339 | dm_bufio_mark_partial_buffer_dirty(b, *metadata_offset, *metadata_offset + to_copy); |
---|
1137 | | - } else { |
---|
| 1340 | + } else { |
---|
1138 | 1341 | /* e.g.: op == TAG_CMP */ |
---|
1139 | | - if (unlikely(memcmp(dp, tag, to_copy))) { |
---|
1140 | | - unsigned i; |
---|
1141 | 1342 | |
---|
1142 | | - for (i = 0; i < to_copy; i++) { |
---|
1143 | | - if (dp[i] != tag[i]) |
---|
1144 | | - break; |
---|
1145 | | - total_size--; |
---|
| 1343 | + if (likely(is_power_of_2(ic->tag_size))) { |
---|
| 1344 | + if (unlikely(memcmp(dp, tag, to_copy))) |
---|
| 1345 | + if (unlikely(!ic->discard) || |
---|
| 1346 | + unlikely(memchr_inv(dp, DISCARD_FILLER, to_copy) != NULL)) { |
---|
| 1347 | + goto thorough_test; |
---|
1146 | 1348 | } |
---|
1147 | | - dm_bufio_release(b); |
---|
1148 | | - return total_size; |
---|
| 1349 | + } else { |
---|
| 1350 | + unsigned i, ts; |
---|
| 1351 | +thorough_test: |
---|
| 1352 | + ts = total_size; |
---|
| 1353 | + |
---|
| 1354 | + for (i = 0; i < to_copy; i++, ts--) { |
---|
| 1355 | + if (unlikely(dp[i] != tag[i])) |
---|
| 1356 | + may_be &= ~MAY_BE_HASH; |
---|
| 1357 | + if (likely(dp[i] != DISCARD_FILLER)) |
---|
| 1358 | + may_be &= ~MAY_BE_FILLER; |
---|
| 1359 | + hash_offset++; |
---|
| 1360 | + if (unlikely(hash_offset == ic->tag_size)) { |
---|
| 1361 | + if (unlikely(!may_be)) { |
---|
| 1362 | + dm_bufio_release(b); |
---|
| 1363 | + return ts; |
---|
| 1364 | + } |
---|
| 1365 | + hash_offset = 0; |
---|
| 1366 | + may_be = MAY_BE_HASH | (ic->discard ? MAY_BE_FILLER : 0); |
---|
| 1367 | + } |
---|
| 1368 | + } |
---|
1149 | 1369 | } |
---|
1150 | 1370 | } |
---|
1151 | 1371 | dm_bufio_release(b); |
---|
.. | .. |
---|
1156 | 1376 | (*metadata_block)++; |
---|
1157 | 1377 | *metadata_offset = 0; |
---|
1158 | 1378 | } |
---|
| 1379 | + |
---|
| 1380 | + if (unlikely(!is_power_of_2(ic->tag_size))) { |
---|
| 1381 | + hash_offset = (hash_offset + to_copy) % ic->tag_size; |
---|
| 1382 | + } |
---|
| 1383 | + |
---|
1159 | 1384 | total_size -= to_copy; |
---|
1160 | 1385 | } while (unlikely(total_size)); |
---|
1161 | 1386 | |
---|
1162 | 1387 | return 0; |
---|
| 1388 | +#undef MAY_BE_FILLER |
---|
| 1389 | +#undef MAY_BE_HASH |
---|
1163 | 1390 | } |
---|
1164 | 1391 | |
---|
1165 | 1392 | struct flush_request { |
---|
.. | .. |
---|
1253 | 1480 | int r = dm_integrity_failed(ic); |
---|
1254 | 1481 | if (unlikely(r) && !bio->bi_status) |
---|
1255 | 1482 | bio->bi_status = errno_to_blk_status(r); |
---|
| 1483 | + if (unlikely(ic->synchronous_mode) && bio_op(bio) == REQ_OP_WRITE) { |
---|
| 1484 | + unsigned long flags; |
---|
| 1485 | + spin_lock_irqsave(&ic->endio_wait.lock, flags); |
---|
| 1486 | + bio_list_add(&ic->synchronous_bios, bio); |
---|
| 1487 | + queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, 0); |
---|
| 1488 | + spin_unlock_irqrestore(&ic->endio_wait.lock, flags); |
---|
| 1489 | + return; |
---|
| 1490 | + } |
---|
1256 | 1491 | bio_endio(bio); |
---|
1257 | 1492 | } |
---|
1258 | 1493 | |
---|
.. | .. |
---|
1274 | 1509 | |
---|
1275 | 1510 | remove_range(ic, &dio->range); |
---|
1276 | 1511 | |
---|
1277 | | - if (unlikely(dio->write)) |
---|
| 1512 | + if (dio->op == REQ_OP_WRITE || unlikely(dio->op == REQ_OP_DISCARD)) |
---|
1278 | 1513 | schedule_autocommit(ic); |
---|
1279 | 1514 | |
---|
1280 | 1515 | bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); |
---|
.. | .. |
---|
1315 | 1550 | unsigned digest_size; |
---|
1316 | 1551 | |
---|
1317 | 1552 | req->tfm = ic->internal_hash; |
---|
1318 | | - req->flags = 0; |
---|
1319 | 1553 | |
---|
1320 | 1554 | r = crypto_shash_init(req); |
---|
1321 | 1555 | if (unlikely(r < 0)) { |
---|
.. | .. |
---|
1366 | 1600 | struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); |
---|
1367 | 1601 | char *checksums; |
---|
1368 | 1602 | unsigned extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0; |
---|
1369 | | - char checksums_onstack[ic->tag_size + extra_space]; |
---|
1370 | | - unsigned sectors_to_process = dio->range.n_sectors; |
---|
1371 | | - sector_t sector = dio->range.logical_sector; |
---|
| 1603 | + char checksums_onstack[max((size_t)HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)]; |
---|
| 1604 | + sector_t sector; |
---|
| 1605 | + unsigned sectors_to_process; |
---|
1372 | 1606 | |
---|
1373 | 1607 | if (unlikely(ic->mode == 'R')) |
---|
1374 | 1608 | goto skip_io; |
---|
1375 | 1609 | |
---|
1376 | | - checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space, |
---|
1377 | | - GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN); |
---|
1378 | | - if (!checksums) |
---|
| 1610 | + if (likely(dio->op != REQ_OP_DISCARD)) |
---|
| 1611 | + checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space, |
---|
| 1612 | + GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN); |
---|
| 1613 | + else |
---|
| 1614 | + checksums = kmalloc(PAGE_SIZE, GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN); |
---|
| 1615 | + if (!checksums) { |
---|
1379 | 1616 | checksums = checksums_onstack; |
---|
| 1617 | + if (WARN_ON(extra_space && |
---|
| 1618 | + digest_size > sizeof(checksums_onstack))) { |
---|
| 1619 | + r = -EINVAL; |
---|
| 1620 | + goto error; |
---|
| 1621 | + } |
---|
| 1622 | + } |
---|
| 1623 | + |
---|
| 1624 | + if (unlikely(dio->op == REQ_OP_DISCARD)) { |
---|
| 1625 | + sector_t bi_sector = dio->bio_details.bi_iter.bi_sector; |
---|
| 1626 | + unsigned bi_size = dio->bio_details.bi_iter.bi_size; |
---|
| 1627 | + unsigned max_size = likely(checksums != checksums_onstack) ? PAGE_SIZE : HASH_MAX_DIGESTSIZE; |
---|
| 1628 | + unsigned max_blocks = max_size / ic->tag_size; |
---|
| 1629 | + memset(checksums, DISCARD_FILLER, max_size); |
---|
| 1630 | + |
---|
| 1631 | + while (bi_size) { |
---|
| 1632 | + unsigned this_step_blocks = bi_size >> (SECTOR_SHIFT + ic->sb->log2_sectors_per_block); |
---|
| 1633 | + this_step_blocks = min(this_step_blocks, max_blocks); |
---|
| 1634 | + r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset, |
---|
| 1635 | + this_step_blocks * ic->tag_size, TAG_WRITE); |
---|
| 1636 | + if (unlikely(r)) { |
---|
| 1637 | + if (likely(checksums != checksums_onstack)) |
---|
| 1638 | + kfree(checksums); |
---|
| 1639 | + goto error; |
---|
| 1640 | + } |
---|
| 1641 | + |
---|
| 1642 | + /*if (bi_size < this_step_blocks << (SECTOR_SHIFT + ic->sb->log2_sectors_per_block)) { |
---|
| 1643 | + printk("BUGG: bi_sector: %llx, bi_size: %u\n", bi_sector, bi_size); |
---|
| 1644 | + printk("BUGG: this_step_blocks: %u\n", this_step_blocks); |
---|
| 1645 | + BUG(); |
---|
| 1646 | + }*/ |
---|
| 1647 | + bi_size -= this_step_blocks << (SECTOR_SHIFT + ic->sb->log2_sectors_per_block); |
---|
| 1648 | + bi_sector += this_step_blocks << ic->sb->log2_sectors_per_block; |
---|
| 1649 | + } |
---|
| 1650 | + |
---|
| 1651 | + if (likely(checksums != checksums_onstack)) |
---|
| 1652 | + kfree(checksums); |
---|
| 1653 | + goto skip_io; |
---|
| 1654 | + } |
---|
| 1655 | + |
---|
| 1656 | + sector = dio->range.logical_sector; |
---|
| 1657 | + sectors_to_process = dio->range.n_sectors; |
---|
1380 | 1658 | |
---|
1381 | 1659 | __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) { |
---|
1382 | 1660 | unsigned pos; |
---|
.. | .. |
---|
1396 | 1674 | kunmap_atomic(mem); |
---|
1397 | 1675 | |
---|
1398 | 1676 | r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset, |
---|
1399 | | - checksums_ptr - checksums, !dio->write ? TAG_CMP : TAG_WRITE); |
---|
| 1677 | + checksums_ptr - checksums, dio->op == REQ_OP_READ ? TAG_CMP : TAG_WRITE); |
---|
1400 | 1678 | if (unlikely(r)) { |
---|
1401 | 1679 | if (r > 0) { |
---|
1402 | | - DMERR_LIMIT("Checksum failed at sector 0x%llx", |
---|
1403 | | - (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size))); |
---|
| 1680 | + char b[BDEVNAME_SIZE]; |
---|
| 1681 | + DMERR_LIMIT("%s: Checksum failed at sector 0x%llx", bio_devname(bio, b), |
---|
| 1682 | + (sector - ((r + ic->tag_size - 1) / ic->tag_size))); |
---|
1404 | 1683 | r = -EILSEQ; |
---|
1405 | 1684 | atomic64_inc(&ic->number_of_mismatches); |
---|
1406 | 1685 | } |
---|
.. | .. |
---|
1439 | 1718 | tag = lowmem_page_address(biv.bv_page) + biv.bv_offset; |
---|
1440 | 1719 | this_len = min(biv.bv_len, data_to_process); |
---|
1441 | 1720 | r = dm_integrity_rw_tag(ic, tag, &dio->metadata_block, &dio->metadata_offset, |
---|
1442 | | - this_len, !dio->write ? TAG_READ : TAG_WRITE); |
---|
| 1721 | + this_len, dio->op == REQ_OP_READ ? TAG_READ : TAG_WRITE); |
---|
1443 | 1722 | if (unlikely(r)) |
---|
1444 | 1723 | goto error; |
---|
1445 | 1724 | data_to_process -= this_len; |
---|
.. | .. |
---|
1466 | 1745 | |
---|
1467 | 1746 | dio->ic = ic; |
---|
1468 | 1747 | dio->bi_status = 0; |
---|
| 1748 | + dio->op = bio_op(bio); |
---|
| 1749 | + |
---|
| 1750 | + if (unlikely(dio->op == REQ_OP_DISCARD)) { |
---|
| 1751 | + if (ti->max_io_len) { |
---|
| 1752 | + sector_t sec = dm_target_offset(ti, bio->bi_iter.bi_sector); |
---|
| 1753 | + unsigned log2_max_io_len = __fls(ti->max_io_len); |
---|
| 1754 | + sector_t start_boundary = sec >> log2_max_io_len; |
---|
| 1755 | + sector_t end_boundary = (sec + bio_sectors(bio) - 1) >> log2_max_io_len; |
---|
| 1756 | + if (start_boundary < end_boundary) { |
---|
| 1757 | + sector_t len = ti->max_io_len - (sec & (ti->max_io_len - 1)); |
---|
| 1758 | + dm_accept_partial_bio(bio, len); |
---|
| 1759 | + } |
---|
| 1760 | + } |
---|
| 1761 | + } |
---|
1469 | 1762 | |
---|
1470 | 1763 | if (unlikely(bio->bi_opf & REQ_PREFLUSH)) { |
---|
1471 | 1764 | submit_flush_bio(ic, dio); |
---|
.. | .. |
---|
1473 | 1766 | } |
---|
1474 | 1767 | |
---|
1475 | 1768 | dio->range.logical_sector = dm_target_offset(ti, bio->bi_iter.bi_sector); |
---|
1476 | | - dio->write = bio_op(bio) == REQ_OP_WRITE; |
---|
1477 | | - dio->fua = dio->write && bio->bi_opf & REQ_FUA; |
---|
| 1769 | + dio->fua = dio->op == REQ_OP_WRITE && bio->bi_opf & REQ_FUA; |
---|
1478 | 1770 | if (unlikely(dio->fua)) { |
---|
1479 | 1771 | /* |
---|
1480 | 1772 | * Don't pass down the FUA flag because we have to flush |
---|
.. | .. |
---|
1484 | 1776 | } |
---|
1485 | 1777 | if (unlikely(dio->range.logical_sector + bio_sectors(bio) > ic->provided_data_sectors)) { |
---|
1486 | 1778 | DMERR("Too big sector number: 0x%llx + 0x%x > 0x%llx", |
---|
1487 | | - (unsigned long long)dio->range.logical_sector, bio_sectors(bio), |
---|
1488 | | - (unsigned long long)ic->provided_data_sectors); |
---|
| 1779 | + dio->range.logical_sector, bio_sectors(bio), |
---|
| 1780 | + ic->provided_data_sectors); |
---|
1489 | 1781 | return DM_MAPIO_KILL; |
---|
1490 | 1782 | } |
---|
1491 | 1783 | if (unlikely((dio->range.logical_sector | bio_sectors(bio)) & (unsigned)(ic->sectors_per_block - 1))) { |
---|
1492 | 1784 | DMERR("Bio not aligned on %u sectors: 0x%llx, 0x%x", |
---|
1493 | 1785 | ic->sectors_per_block, |
---|
1494 | | - (unsigned long long)dio->range.logical_sector, bio_sectors(bio)); |
---|
| 1786 | + dio->range.logical_sector, bio_sectors(bio)); |
---|
1495 | 1787 | return DM_MAPIO_KILL; |
---|
1496 | 1788 | } |
---|
1497 | 1789 | |
---|
1498 | | - if (ic->sectors_per_block > 1) { |
---|
| 1790 | + if (ic->sectors_per_block > 1 && likely(dio->op != REQ_OP_DISCARD)) { |
---|
1499 | 1791 | struct bvec_iter iter; |
---|
1500 | 1792 | struct bio_vec bv; |
---|
1501 | 1793 | bio_for_each_segment(bv, bio, iter) { |
---|
.. | .. |
---|
1516 | 1808 | else |
---|
1517 | 1809 | wanted_tag_size *= ic->tag_size; |
---|
1518 | 1810 | if (unlikely(wanted_tag_size != bip->bip_iter.bi_size)) { |
---|
1519 | | - DMERR("Invalid integrity data size %u, expected %u", bip->bip_iter.bi_size, wanted_tag_size); |
---|
| 1811 | + DMERR("Invalid integrity data size %u, expected %u", |
---|
| 1812 | + bip->bip_iter.bi_size, wanted_tag_size); |
---|
1520 | 1813 | return DM_MAPIO_KILL; |
---|
1521 | 1814 | } |
---|
1522 | 1815 | } |
---|
.. | .. |
---|
1527 | 1820 | } |
---|
1528 | 1821 | } |
---|
1529 | 1822 | |
---|
1530 | | - if (unlikely(ic->mode == 'R') && unlikely(dio->write)) |
---|
| 1823 | + if (unlikely(ic->mode == 'R') && unlikely(dio->op != REQ_OP_READ)) |
---|
1531 | 1824 | return DM_MAPIO_KILL; |
---|
1532 | 1825 | |
---|
1533 | 1826 | get_area_and_offset(ic, dio->range.logical_sector, &area, &offset); |
---|
.. | .. |
---|
1557 | 1850 | bio_advance_iter(bio, &bio->bi_iter, bv.bv_len); |
---|
1558 | 1851 | retry_kmap: |
---|
1559 | 1852 | mem = kmap_atomic(bv.bv_page); |
---|
1560 | | - if (likely(dio->write)) |
---|
| 1853 | + if (likely(dio->op == REQ_OP_WRITE)) |
---|
1561 | 1854 | flush_dcache_page(bv.bv_page); |
---|
1562 | 1855 | |
---|
1563 | 1856 | do { |
---|
1564 | 1857 | struct journal_entry *je = access_journal_entry(ic, journal_section, journal_entry); |
---|
1565 | 1858 | |
---|
1566 | | - if (unlikely(!dio->write)) { |
---|
| 1859 | + if (unlikely(dio->op == REQ_OP_READ)) { |
---|
1567 | 1860 | struct journal_sector *js; |
---|
1568 | 1861 | char *mem_ptr; |
---|
1569 | 1862 | unsigned s; |
---|
.. | .. |
---|
1588 | 1881 | } while (++s < ic->sectors_per_block); |
---|
1589 | 1882 | #ifdef INTERNAL_VERIFY |
---|
1590 | 1883 | if (ic->internal_hash) { |
---|
1591 | | - char checksums_onstack[max(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)]; |
---|
| 1884 | + char checksums_onstack[max((size_t)HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)]; |
---|
1592 | 1885 | |
---|
1593 | 1886 | integrity_sector_checksum(ic, logical_sector, mem + bv.bv_offset, checksums_onstack); |
---|
1594 | 1887 | if (unlikely(memcmp(checksums_onstack, journal_entry_tag(ic, je), ic->tag_size))) { |
---|
1595 | 1888 | DMERR_LIMIT("Checksum failed when reading from journal, at sector 0x%llx", |
---|
1596 | | - (unsigned long long)logical_sector); |
---|
| 1889 | + logical_sector); |
---|
1597 | 1890 | } |
---|
1598 | 1891 | } |
---|
1599 | 1892 | #endif |
---|
.. | .. |
---|
1610 | 1903 | char *tag_addr; |
---|
1611 | 1904 | BUG_ON(PageHighMem(biv.bv_page)); |
---|
1612 | 1905 | tag_addr = lowmem_page_address(biv.bv_page) + biv.bv_offset; |
---|
1613 | | - if (likely(dio->write)) |
---|
| 1906 | + if (likely(dio->op == REQ_OP_WRITE)) |
---|
1614 | 1907 | memcpy(tag_ptr, tag_addr, tag_now); |
---|
1615 | 1908 | else |
---|
1616 | 1909 | memcpy(tag_addr, tag_ptr, tag_now); |
---|
.. | .. |
---|
1618 | 1911 | tag_ptr += tag_now; |
---|
1619 | 1912 | tag_todo -= tag_now; |
---|
1620 | 1913 | } while (unlikely(tag_todo)); else { |
---|
1621 | | - if (likely(dio->write)) |
---|
| 1914 | + if (likely(dio->op == REQ_OP_WRITE)) |
---|
1622 | 1915 | memset(tag_ptr, 0, tag_todo); |
---|
1623 | 1916 | } |
---|
1624 | 1917 | } |
---|
1625 | 1918 | |
---|
1626 | | - if (likely(dio->write)) { |
---|
| 1919 | + if (likely(dio->op == REQ_OP_WRITE)) { |
---|
1627 | 1920 | struct journal_sector *js; |
---|
1628 | 1921 | unsigned s; |
---|
1629 | 1922 | |
---|
.. | .. |
---|
1638 | 1931 | if (ic->internal_hash) { |
---|
1639 | 1932 | unsigned digest_size = crypto_shash_digestsize(ic->internal_hash); |
---|
1640 | 1933 | if (unlikely(digest_size > ic->tag_size)) { |
---|
1641 | | - char checksums_onstack[digest_size]; |
---|
| 1934 | + char checksums_onstack[HASH_MAX_DIGESTSIZE]; |
---|
1642 | 1935 | integrity_sector_checksum(ic, logical_sector, (char *)js, checksums_onstack); |
---|
1643 | 1936 | memcpy(journal_entry_tag(ic, je), checksums_onstack, ic->tag_size); |
---|
1644 | 1937 | } else |
---|
.. | .. |
---|
1659 | 1952 | bv.bv_offset += ic->sectors_per_block << SECTOR_SHIFT; |
---|
1660 | 1953 | } while (bv.bv_len -= ic->sectors_per_block << SECTOR_SHIFT); |
---|
1661 | 1954 | |
---|
1662 | | - if (unlikely(!dio->write)) |
---|
| 1955 | + if (unlikely(dio->op == REQ_OP_READ)) |
---|
1663 | 1956 | flush_dcache_page(bv.bv_page); |
---|
1664 | 1957 | kunmap_atomic(mem); |
---|
1665 | 1958 | } while (n_sectors); |
---|
1666 | 1959 | |
---|
1667 | | - if (likely(dio->write)) { |
---|
| 1960 | + if (likely(dio->op == REQ_OP_WRITE)) { |
---|
1668 | 1961 | smp_mb(); |
---|
1669 | 1962 | if (unlikely(waitqueue_active(&ic->copy_to_journal_wait))) |
---|
1670 | 1963 | wake_up(&ic->copy_to_journal_wait); |
---|
.. | .. |
---|
1696 | 1989 | unsigned journal_section, journal_entry; |
---|
1697 | 1990 | unsigned journal_read_pos; |
---|
1698 | 1991 | struct completion read_comp; |
---|
1699 | | - bool need_sync_io = ic->internal_hash && !dio->write; |
---|
| 1992 | + bool discard_retried = false; |
---|
| 1993 | + bool need_sync_io = ic->internal_hash && dio->op == REQ_OP_READ; |
---|
| 1994 | + if (unlikely(dio->op == REQ_OP_DISCARD) && ic->mode != 'D') |
---|
| 1995 | + need_sync_io = true; |
---|
1700 | 1996 | |
---|
1701 | 1997 | if (need_sync_io && from_map) { |
---|
1702 | 1998 | INIT_WORK(&dio->work, integrity_bio_wait); |
---|
.. | .. |
---|
1714 | 2010 | } |
---|
1715 | 2011 | dio->range.n_sectors = bio_sectors(bio); |
---|
1716 | 2012 | journal_read_pos = NOT_FOUND; |
---|
1717 | | - if (likely(ic->mode == 'J')) { |
---|
1718 | | - if (dio->write) { |
---|
| 2013 | + if (ic->mode == 'J' && likely(dio->op != REQ_OP_DISCARD)) { |
---|
| 2014 | + if (dio->op == REQ_OP_WRITE) { |
---|
1719 | 2015 | unsigned next_entry, i, pos; |
---|
1720 | 2016 | unsigned ws, we, range_sectors; |
---|
1721 | 2017 | |
---|
1722 | 2018 | dio->range.n_sectors = min(dio->range.n_sectors, |
---|
1723 | | - ic->free_sectors << ic->sb->log2_sectors_per_block); |
---|
| 2019 | + (sector_t)ic->free_sectors << ic->sb->log2_sectors_per_block); |
---|
1724 | 2020 | if (unlikely(!dio->range.n_sectors)) { |
---|
1725 | 2021 | if (from_map) |
---|
1726 | 2022 | goto offload_to_thread; |
---|
.. | .. |
---|
1810 | 2106 | } |
---|
1811 | 2107 | } |
---|
1812 | 2108 | } |
---|
| 2109 | + if (ic->mode == 'J' && likely(dio->op == REQ_OP_DISCARD) && !discard_retried) { |
---|
| 2110 | + sector_t next_sector; |
---|
| 2111 | + unsigned new_pos = find_journal_node(ic, dio->range.logical_sector, &next_sector); |
---|
| 2112 | + if (unlikely(new_pos != NOT_FOUND) || |
---|
| 2113 | + unlikely(next_sector < dio->range.logical_sector - dio->range.n_sectors)) { |
---|
| 2114 | + remove_range_unlocked(ic, &dio->range); |
---|
| 2115 | + spin_unlock_irq(&ic->endio_wait.lock); |
---|
| 2116 | + queue_work(ic->commit_wq, &ic->commit_work); |
---|
| 2117 | + flush_workqueue(ic->commit_wq); |
---|
| 2118 | + queue_work(ic->writer_wq, &ic->writer_work); |
---|
| 2119 | + flush_workqueue(ic->writer_wq); |
---|
| 2120 | + discard_retried = true; |
---|
| 2121 | + goto lock_retry; |
---|
| 2122 | + } |
---|
| 2123 | + } |
---|
1813 | 2124 | spin_unlock_irq(&ic->endio_wait.lock); |
---|
1814 | 2125 | |
---|
1815 | 2126 | if (unlikely(journal_read_pos != NOT_FOUND)) { |
---|
1816 | 2127 | journal_section = journal_read_pos / ic->journal_section_entries; |
---|
1817 | 2128 | journal_entry = journal_read_pos % ic->journal_section_entries; |
---|
1818 | 2129 | goto journal_read_write; |
---|
| 2130 | + } |
---|
| 2131 | + |
---|
| 2132 | + if (ic->mode == 'B' && (dio->op == REQ_OP_WRITE || unlikely(dio->op == REQ_OP_DISCARD))) { |
---|
| 2133 | + if (!block_bitmap_op(ic, ic->may_write_bitmap, dio->range.logical_sector, |
---|
| 2134 | + dio->range.n_sectors, BITMAP_OP_TEST_ALL_SET)) { |
---|
| 2135 | + struct bitmap_block_status *bbs; |
---|
| 2136 | + |
---|
| 2137 | + bbs = sector_to_bitmap_block(ic, dio->range.logical_sector); |
---|
| 2138 | + spin_lock(&bbs->bio_queue_lock); |
---|
| 2139 | + bio_list_add(&bbs->bio_queue, bio); |
---|
| 2140 | + spin_unlock(&bbs->bio_queue_lock); |
---|
| 2141 | + queue_work(ic->writer_wq, &bbs->work); |
---|
| 2142 | + return; |
---|
| 2143 | + } |
---|
1819 | 2144 | } |
---|
1820 | 2145 | |
---|
1821 | 2146 | dio->in_flight = (atomic_t)ATOMIC_INIT(2); |
---|
.. | .. |
---|
1833 | 2158 | bio->bi_end_io = integrity_end_io; |
---|
1834 | 2159 | bio->bi_iter.bi_size = dio->range.n_sectors << SECTOR_SHIFT; |
---|
1835 | 2160 | |
---|
1836 | | - generic_make_request(bio); |
---|
| 2161 | + if (unlikely(dio->op == REQ_OP_DISCARD) && likely(ic->mode != 'D')) { |
---|
| 2162 | + integrity_metadata(&dio->work); |
---|
| 2163 | + dm_integrity_flush_buffers(ic, false); |
---|
| 2164 | + |
---|
| 2165 | + dio->in_flight = (atomic_t)ATOMIC_INIT(1); |
---|
| 2166 | + dio->completion = NULL; |
---|
| 2167 | + |
---|
| 2168 | + submit_bio_noacct(bio); |
---|
| 2169 | + |
---|
| 2170 | + return; |
---|
| 2171 | + } |
---|
| 2172 | + |
---|
| 2173 | + submit_bio_noacct(bio); |
---|
1837 | 2174 | |
---|
1838 | 2175 | if (need_sync_io) { |
---|
1839 | 2176 | wait_for_completion_io(&read_comp); |
---|
1840 | | - if (unlikely(ic->recalc_wq != NULL) && |
---|
1841 | | - ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) && |
---|
| 2177 | + if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) && |
---|
1842 | 2178 | dio->range.logical_sector + dio->range.n_sectors > le64_to_cpu(ic->sb->recalc_sector)) |
---|
1843 | 2179 | goto skip_check; |
---|
| 2180 | + if (ic->mode == 'B') { |
---|
| 2181 | + if (!block_bitmap_op(ic, ic->recalc_bitmap, dio->range.logical_sector, |
---|
| 2182 | + dio->range.n_sectors, BITMAP_OP_TEST_ALL_CLEAR)) |
---|
| 2183 | + goto skip_check; |
---|
| 2184 | + } |
---|
| 2185 | + |
---|
1844 | 2186 | if (likely(!bio->bi_status)) |
---|
1845 | 2187 | integrity_metadata(&dio->work); |
---|
1846 | 2188 | else |
---|
.. | .. |
---|
1878 | 2220 | wraparound_section(ic, &ic->free_section); |
---|
1879 | 2221 | ic->n_uncommitted_sections++; |
---|
1880 | 2222 | } |
---|
1881 | | - WARN_ON(ic->journal_sections * ic->journal_section_entries != |
---|
1882 | | - (ic->n_uncommitted_sections + ic->n_committed_sections) * ic->journal_section_entries + ic->free_sectors); |
---|
| 2223 | + if (WARN_ON(ic->journal_sections * ic->journal_section_entries != |
---|
| 2224 | + (ic->n_uncommitted_sections + ic->n_committed_sections) * |
---|
| 2225 | + ic->journal_section_entries + ic->free_sectors)) { |
---|
| 2226 | + DMCRIT("journal_sections %u, journal_section_entries %u, " |
---|
| 2227 | + "n_uncommitted_sections %u, n_committed_sections %u, " |
---|
| 2228 | + "journal_section_entries %u, free_sectors %u", |
---|
| 2229 | + ic->journal_sections, ic->journal_section_entries, |
---|
| 2230 | + ic->n_uncommitted_sections, ic->n_committed_sections, |
---|
| 2231 | + ic->journal_section_entries, ic->free_sectors); |
---|
| 2232 | + } |
---|
1883 | 2233 | } |
---|
1884 | 2234 | |
---|
1885 | 2235 | static void integrity_commit(struct work_struct *w) |
---|
.. | .. |
---|
1906 | 2256 | |
---|
1907 | 2257 | if (!commit_sections) |
---|
1908 | 2258 | goto release_flush_bios; |
---|
| 2259 | + |
---|
| 2260 | + ic->wrote_to_journal = true; |
---|
1909 | 2261 | |
---|
1910 | 2262 | i = commit_start; |
---|
1911 | 2263 | for (n = 0; n < commit_sections; n++) { |
---|
.. | .. |
---|
2005 | 2357 | dm_integrity_io_error(ic, "invalid sector in journal", -EIO); |
---|
2006 | 2358 | sec &= ~(sector_t)(ic->sectors_per_block - 1); |
---|
2007 | 2359 | } |
---|
| 2360 | + if (unlikely(sec >= ic->provided_data_sectors)) { |
---|
| 2361 | + journal_entry_set_unused(je); |
---|
| 2362 | + continue; |
---|
| 2363 | + } |
---|
2008 | 2364 | } |
---|
2009 | 2365 | get_area_and_offset(ic, sec, &area, &offset); |
---|
2010 | 2366 | restore_last_bytes(ic, access_journal_data(ic, i, j), je); |
---|
.. | .. |
---|
2015 | 2371 | break; |
---|
2016 | 2372 | BUG_ON(unlikely(journal_entry_is_inprogress(je2)) && !from_replay); |
---|
2017 | 2373 | sec2 = journal_entry_get_sector(je2); |
---|
| 2374 | + if (unlikely(sec2 >= ic->provided_data_sectors)) |
---|
| 2375 | + break; |
---|
2018 | 2376 | get_area_and_offset(ic, sec2, &area2, &offset2); |
---|
2019 | 2377 | if (area2 != area || offset2 != offset + ((k - j) << ic->sb->log2_sectors_per_block)) |
---|
2020 | 2378 | break; |
---|
.. | .. |
---|
2028 | 2386 | io->range.n_sectors = (k - j) << ic->sb->log2_sectors_per_block; |
---|
2029 | 2387 | |
---|
2030 | 2388 | spin_lock_irq(&ic->endio_wait.lock); |
---|
2031 | | - if (unlikely(!add_new_range(ic, &io->range, true))) |
---|
2032 | | - wait_and_add_new_range(ic, &io->range); |
---|
| 2389 | + add_new_range_and_wait(ic, &io->range); |
---|
2033 | 2390 | |
---|
2034 | 2391 | if (likely(!from_replay)) { |
---|
2035 | 2392 | struct journal_node *section_node = &ic->journal_tree[i * ic->journal_section_entries]; |
---|
.. | .. |
---|
2073 | 2430 | unlikely(from_replay) && |
---|
2074 | 2431 | #endif |
---|
2075 | 2432 | ic->internal_hash) { |
---|
2076 | | - char test_tag[max(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)]; |
---|
| 2433 | + char test_tag[max_t(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)]; |
---|
2077 | 2434 | |
---|
2078 | 2435 | integrity_sector_checksum(ic, sec + ((l - j) << ic->sb->log2_sectors_per_block), |
---|
2079 | 2436 | (char *)access_journal_data(ic, i, l), test_tag); |
---|
.. | .. |
---|
2116 | 2473 | |
---|
2117 | 2474 | unsigned prev_free_sectors; |
---|
2118 | 2475 | |
---|
2119 | | - /* the following test is not needed, but it tests the replay code */ |
---|
2120 | | - if (unlikely(dm_post_suspending(ic->ti)) && !ic->meta_dev) |
---|
2121 | | - return; |
---|
2122 | | - |
---|
2123 | 2476 | spin_lock_irq(&ic->endio_wait.lock); |
---|
2124 | 2477 | write_start = ic->committed_section; |
---|
2125 | 2478 | write_sections = ic->n_committed_sections; |
---|
.. | .. |
---|
2152 | 2505 | if (dm_integrity_failed(ic)) |
---|
2153 | 2506 | return; |
---|
2154 | 2507 | |
---|
2155 | | - sb_set_version(ic); |
---|
2156 | 2508 | r = sync_rw_sb(ic, REQ_OP_WRITE, 0); |
---|
2157 | 2509 | if (unlikely(r)) |
---|
2158 | 2510 | dm_integrity_io_error(ic, "writing superblock", r); |
---|
.. | .. |
---|
2167 | 2519 | sector_t area, offset; |
---|
2168 | 2520 | sector_t metadata_block; |
---|
2169 | 2521 | unsigned metadata_offset; |
---|
| 2522 | + sector_t logical_sector, n_sectors; |
---|
2170 | 2523 | __u8 *t; |
---|
2171 | 2524 | unsigned i; |
---|
2172 | 2525 | int r; |
---|
2173 | 2526 | unsigned super_counter = 0; |
---|
| 2527 | + |
---|
| 2528 | + DEBUG_print("start recalculation... (position %llx)\n", le64_to_cpu(ic->sb->recalc_sector)); |
---|
2174 | 2529 | |
---|
2175 | 2530 | spin_lock_irq(&ic->endio_wait.lock); |
---|
2176 | 2531 | |
---|
.. | .. |
---|
2180 | 2535 | goto unlock_ret; |
---|
2181 | 2536 | |
---|
2182 | 2537 | range.logical_sector = le64_to_cpu(ic->sb->recalc_sector); |
---|
2183 | | - if (unlikely(range.logical_sector >= ic->provided_data_sectors)) |
---|
| 2538 | + if (unlikely(range.logical_sector >= ic->provided_data_sectors)) { |
---|
| 2539 | + if (ic->mode == 'B') { |
---|
| 2540 | + block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR); |
---|
| 2541 | + DEBUG_print("queue_delayed_work: bitmap_flush_work\n"); |
---|
| 2542 | + queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, 0); |
---|
| 2543 | + } |
---|
2184 | 2544 | goto unlock_ret; |
---|
| 2545 | + } |
---|
2185 | 2546 | |
---|
2186 | 2547 | get_area_and_offset(ic, range.logical_sector, &area, &offset); |
---|
2187 | 2548 | range.n_sectors = min((sector_t)RECALC_SECTORS, ic->provided_data_sectors - range.logical_sector); |
---|
2188 | 2549 | if (!ic->meta_dev) |
---|
2189 | | - range.n_sectors = min(range.n_sectors, (1U << ic->sb->log2_interleave_sectors) - (unsigned)offset); |
---|
| 2550 | + range.n_sectors = min(range.n_sectors, ((sector_t)1U << ic->sb->log2_interleave_sectors) - (unsigned)offset); |
---|
2190 | 2551 | |
---|
2191 | | - if (unlikely(!add_new_range(ic, &range, true))) |
---|
2192 | | - wait_and_add_new_range(ic, &range); |
---|
2193 | | - |
---|
| 2552 | + add_new_range_and_wait(ic, &range); |
---|
2194 | 2553 | spin_unlock_irq(&ic->endio_wait.lock); |
---|
| 2554 | + logical_sector = range.logical_sector; |
---|
| 2555 | + n_sectors = range.n_sectors; |
---|
| 2556 | + |
---|
| 2557 | + if (ic->mode == 'B') { |
---|
| 2558 | + if (block_bitmap_op(ic, ic->recalc_bitmap, logical_sector, n_sectors, BITMAP_OP_TEST_ALL_CLEAR)) { |
---|
| 2559 | + goto advance_and_next; |
---|
| 2560 | + } |
---|
| 2561 | + while (block_bitmap_op(ic, ic->recalc_bitmap, logical_sector, |
---|
| 2562 | + ic->sectors_per_block, BITMAP_OP_TEST_ALL_CLEAR)) { |
---|
| 2563 | + logical_sector += ic->sectors_per_block; |
---|
| 2564 | + n_sectors -= ic->sectors_per_block; |
---|
| 2565 | + cond_resched(); |
---|
| 2566 | + } |
---|
| 2567 | + while (block_bitmap_op(ic, ic->recalc_bitmap, logical_sector + n_sectors - ic->sectors_per_block, |
---|
| 2568 | + ic->sectors_per_block, BITMAP_OP_TEST_ALL_CLEAR)) { |
---|
| 2569 | + n_sectors -= ic->sectors_per_block; |
---|
| 2570 | + cond_resched(); |
---|
| 2571 | + } |
---|
| 2572 | + get_area_and_offset(ic, logical_sector, &area, &offset); |
---|
| 2573 | + } |
---|
| 2574 | + |
---|
| 2575 | + DEBUG_print("recalculating: %llx, %llx\n", logical_sector, n_sectors); |
---|
2195 | 2576 | |
---|
2196 | 2577 | if (unlikely(++super_counter == RECALC_WRITE_SUPER)) { |
---|
2197 | 2578 | recalc_write_super(ic); |
---|
| 2579 | + if (ic->mode == 'B') { |
---|
| 2580 | + queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, ic->bitmap_flush_interval); |
---|
| 2581 | + } |
---|
2198 | 2582 | super_counter = 0; |
---|
2199 | 2583 | } |
---|
2200 | 2584 | |
---|
.. | .. |
---|
2209 | 2593 | io_req.client = ic->io; |
---|
2210 | 2594 | io_loc.bdev = ic->dev->bdev; |
---|
2211 | 2595 | io_loc.sector = get_data_sector(ic, area, offset); |
---|
2212 | | - io_loc.count = range.n_sectors; |
---|
| 2596 | + io_loc.count = n_sectors; |
---|
2213 | 2597 | |
---|
2214 | 2598 | r = dm_io(&io_req, 1, &io_loc, NULL); |
---|
2215 | 2599 | if (unlikely(r)) { |
---|
.. | .. |
---|
2218 | 2602 | } |
---|
2219 | 2603 | |
---|
2220 | 2604 | t = ic->recalc_tags; |
---|
2221 | | - for (i = 0; i < range.n_sectors; i += ic->sectors_per_block) { |
---|
2222 | | - integrity_sector_checksum(ic, range.logical_sector + i, ic->recalc_buffer + (i << SECTOR_SHIFT), t); |
---|
| 2605 | + for (i = 0; i < n_sectors; i += ic->sectors_per_block) { |
---|
| 2606 | + integrity_sector_checksum(ic, logical_sector + i, ic->recalc_buffer + (i << SECTOR_SHIFT), t); |
---|
2223 | 2607 | t += ic->tag_size; |
---|
2224 | 2608 | } |
---|
2225 | 2609 | |
---|
.. | .. |
---|
2230 | 2614 | dm_integrity_io_error(ic, "writing tags", r); |
---|
2231 | 2615 | goto err; |
---|
2232 | 2616 | } |
---|
| 2617 | + |
---|
| 2618 | + if (ic->mode == 'B') { |
---|
| 2619 | + sector_t start, end; |
---|
| 2620 | + start = (range.logical_sector >> |
---|
| 2621 | + (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit)) << |
---|
| 2622 | + (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit); |
---|
| 2623 | + end = ((range.logical_sector + range.n_sectors) >> |
---|
| 2624 | + (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit)) << |
---|
| 2625 | + (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit); |
---|
| 2626 | + block_bitmap_op(ic, ic->recalc_bitmap, start, end - start, BITMAP_OP_CLEAR); |
---|
| 2627 | + } |
---|
| 2628 | + |
---|
| 2629 | +advance_and_next: |
---|
| 2630 | + cond_resched(); |
---|
2233 | 2631 | |
---|
2234 | 2632 | spin_lock_irq(&ic->endio_wait.lock); |
---|
2235 | 2633 | remove_range_unlocked(ic, &range); |
---|
.. | .. |
---|
2245 | 2643 | |
---|
2246 | 2644 | recalc_write_super(ic); |
---|
2247 | 2645 | } |
---|
| 2646 | + |
---|
| 2647 | +static void bitmap_block_work(struct work_struct *w) |
---|
| 2648 | +{ |
---|
| 2649 | + struct bitmap_block_status *bbs = container_of(w, struct bitmap_block_status, work); |
---|
| 2650 | + struct dm_integrity_c *ic = bbs->ic; |
---|
| 2651 | + struct bio *bio; |
---|
| 2652 | + struct bio_list bio_queue; |
---|
| 2653 | + struct bio_list waiting; |
---|
| 2654 | + |
---|
| 2655 | + bio_list_init(&waiting); |
---|
| 2656 | + |
---|
| 2657 | + spin_lock(&bbs->bio_queue_lock); |
---|
| 2658 | + bio_queue = bbs->bio_queue; |
---|
| 2659 | + bio_list_init(&bbs->bio_queue); |
---|
| 2660 | + spin_unlock(&bbs->bio_queue_lock); |
---|
| 2661 | + |
---|
| 2662 | + while ((bio = bio_list_pop(&bio_queue))) { |
---|
| 2663 | + struct dm_integrity_io *dio; |
---|
| 2664 | + |
---|
| 2665 | + dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io)); |
---|
| 2666 | + |
---|
| 2667 | + if (block_bitmap_op(ic, ic->may_write_bitmap, dio->range.logical_sector, |
---|
| 2668 | + dio->range.n_sectors, BITMAP_OP_TEST_ALL_SET)) { |
---|
| 2669 | + remove_range(ic, &dio->range); |
---|
| 2670 | + INIT_WORK(&dio->work, integrity_bio_wait); |
---|
| 2671 | + queue_work(ic->offload_wq, &dio->work); |
---|
| 2672 | + } else { |
---|
| 2673 | + block_bitmap_op(ic, ic->journal, dio->range.logical_sector, |
---|
| 2674 | + dio->range.n_sectors, BITMAP_OP_SET); |
---|
| 2675 | + bio_list_add(&waiting, bio); |
---|
| 2676 | + } |
---|
| 2677 | + } |
---|
| 2678 | + |
---|
| 2679 | + if (bio_list_empty(&waiting)) |
---|
| 2680 | + return; |
---|
| 2681 | + |
---|
| 2682 | + rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, |
---|
| 2683 | + bbs->idx * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), |
---|
| 2684 | + BITMAP_BLOCK_SIZE >> SECTOR_SHIFT, NULL); |
---|
| 2685 | + |
---|
| 2686 | + while ((bio = bio_list_pop(&waiting))) { |
---|
| 2687 | + struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io)); |
---|
| 2688 | + |
---|
| 2689 | + block_bitmap_op(ic, ic->may_write_bitmap, dio->range.logical_sector, |
---|
| 2690 | + dio->range.n_sectors, BITMAP_OP_SET); |
---|
| 2691 | + |
---|
| 2692 | + remove_range(ic, &dio->range); |
---|
| 2693 | + INIT_WORK(&dio->work, integrity_bio_wait); |
---|
| 2694 | + queue_work(ic->offload_wq, &dio->work); |
---|
| 2695 | + } |
---|
| 2696 | + |
---|
| 2697 | + queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, ic->bitmap_flush_interval); |
---|
| 2698 | +} |
---|
| 2699 | + |
---|
| 2700 | +static void bitmap_flush_work(struct work_struct *work) |
---|
| 2701 | +{ |
---|
| 2702 | + struct dm_integrity_c *ic = container_of(work, struct dm_integrity_c, bitmap_flush_work.work); |
---|
| 2703 | + struct dm_integrity_range range; |
---|
| 2704 | + unsigned long limit; |
---|
| 2705 | + struct bio *bio; |
---|
| 2706 | + |
---|
| 2707 | + dm_integrity_flush_buffers(ic, false); |
---|
| 2708 | + |
---|
| 2709 | + range.logical_sector = 0; |
---|
| 2710 | + range.n_sectors = ic->provided_data_sectors; |
---|
| 2711 | + |
---|
| 2712 | + spin_lock_irq(&ic->endio_wait.lock); |
---|
| 2713 | + add_new_range_and_wait(ic, &range); |
---|
| 2714 | + spin_unlock_irq(&ic->endio_wait.lock); |
---|
| 2715 | + |
---|
| 2716 | + dm_integrity_flush_buffers(ic, true); |
---|
| 2717 | + |
---|
| 2718 | + limit = ic->provided_data_sectors; |
---|
| 2719 | + if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) { |
---|
| 2720 | + limit = le64_to_cpu(ic->sb->recalc_sector) |
---|
| 2721 | + >> (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit) |
---|
| 2722 | + << (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit); |
---|
| 2723 | + } |
---|
| 2724 | + /*DEBUG_print("zeroing journal\n");*/ |
---|
| 2725 | + block_bitmap_op(ic, ic->journal, 0, limit, BITMAP_OP_CLEAR); |
---|
| 2726 | + block_bitmap_op(ic, ic->may_write_bitmap, 0, limit, BITMAP_OP_CLEAR); |
---|
| 2727 | + |
---|
| 2728 | + rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, 0, |
---|
| 2729 | + ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); |
---|
| 2730 | + |
---|
| 2731 | + spin_lock_irq(&ic->endio_wait.lock); |
---|
| 2732 | + remove_range_unlocked(ic, &range); |
---|
| 2733 | + while (unlikely((bio = bio_list_pop(&ic->synchronous_bios)) != NULL)) { |
---|
| 2734 | + bio_endio(bio); |
---|
| 2735 | + spin_unlock_irq(&ic->endio_wait.lock); |
---|
| 2736 | + spin_lock_irq(&ic->endio_wait.lock); |
---|
| 2737 | + } |
---|
| 2738 | + spin_unlock_irq(&ic->endio_wait.lock); |
---|
| 2739 | +} |
---|
| 2740 | + |
---|
2248 | 2741 | |
---|
2249 | 2742 | static void init_journal(struct dm_integrity_c *ic, unsigned start_section, |
---|
2250 | 2743 | unsigned n_sections, unsigned char commit_seq) |
---|
.. | .. |
---|
2442 | 2935 | init_journal_node(&ic->journal_tree[i]); |
---|
2443 | 2936 | } |
---|
2444 | 2937 | |
---|
| 2938 | +static void dm_integrity_enter_synchronous_mode(struct dm_integrity_c *ic) |
---|
| 2939 | +{ |
---|
| 2940 | + DEBUG_print("dm_integrity_enter_synchronous_mode\n"); |
---|
| 2941 | + |
---|
| 2942 | + if (ic->mode == 'B') { |
---|
| 2943 | + ic->bitmap_flush_interval = msecs_to_jiffies(10) + 1; |
---|
| 2944 | + ic->synchronous_mode = 1; |
---|
| 2945 | + |
---|
| 2946 | + cancel_delayed_work_sync(&ic->bitmap_flush_work); |
---|
| 2947 | + queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, 0); |
---|
| 2948 | + flush_workqueue(ic->commit_wq); |
---|
| 2949 | + } |
---|
| 2950 | +} |
---|
| 2951 | + |
---|
| 2952 | +static int dm_integrity_reboot(struct notifier_block *n, unsigned long code, void *x) |
---|
| 2953 | +{ |
---|
| 2954 | + struct dm_integrity_c *ic = container_of(n, struct dm_integrity_c, reboot_notifier); |
---|
| 2955 | + |
---|
| 2956 | + DEBUG_print("dm_integrity_reboot\n"); |
---|
| 2957 | + |
---|
| 2958 | + dm_integrity_enter_synchronous_mode(ic); |
---|
| 2959 | + |
---|
| 2960 | + return NOTIFY_DONE; |
---|
| 2961 | +} |
---|
| 2962 | + |
---|
2445 | 2963 | static void dm_integrity_postsuspend(struct dm_target *ti) |
---|
2446 | 2964 | { |
---|
2447 | 2965 | struct dm_integrity_c *ic = (struct dm_integrity_c *)ti->private; |
---|
| 2966 | + int r; |
---|
| 2967 | + |
---|
| 2968 | + WARN_ON(unregister_reboot_notifier(&ic->reboot_notifier)); |
---|
2448 | 2969 | |
---|
2449 | 2970 | del_timer_sync(&ic->autocommit_timer); |
---|
2450 | 2971 | |
---|
2451 | 2972 | if (ic->recalc_wq) |
---|
2452 | 2973 | drain_workqueue(ic->recalc_wq); |
---|
2453 | 2974 | |
---|
| 2975 | + if (ic->mode == 'B') |
---|
| 2976 | + cancel_delayed_work_sync(&ic->bitmap_flush_work); |
---|
| 2977 | + |
---|
2454 | 2978 | queue_work(ic->commit_wq, &ic->commit_work); |
---|
2455 | 2979 | drain_workqueue(ic->commit_wq); |
---|
2456 | 2980 | |
---|
2457 | 2981 | if (ic->mode == 'J') { |
---|
2458 | | - if (ic->meta_dev) |
---|
2459 | | - queue_work(ic->writer_wq, &ic->writer_work); |
---|
| 2982 | + queue_work(ic->writer_wq, &ic->writer_work); |
---|
2460 | 2983 | drain_workqueue(ic->writer_wq); |
---|
2461 | 2984 | dm_integrity_flush_buffers(ic, true); |
---|
| 2985 | + if (ic->wrote_to_journal) { |
---|
| 2986 | + init_journal(ic, ic->free_section, |
---|
| 2987 | + ic->journal_sections - ic->free_section, ic->commit_seq); |
---|
| 2988 | + if (ic->free_section) { |
---|
| 2989 | + init_journal(ic, 0, ic->free_section, |
---|
| 2990 | + next_commit_seq(ic->commit_seq)); |
---|
| 2991 | + } |
---|
| 2992 | + } |
---|
| 2993 | + } |
---|
| 2994 | + |
---|
| 2995 | + if (ic->mode == 'B') { |
---|
| 2996 | + dm_integrity_flush_buffers(ic, true); |
---|
| 2997 | +#if 1 |
---|
| 2998 | + /* set to 0 to test bitmap replay code */ |
---|
| 2999 | + init_journal(ic, 0, ic->journal_sections, 0); |
---|
| 3000 | + ic->sb->flags &= ~cpu_to_le32(SB_FLAG_DIRTY_BITMAP); |
---|
| 3001 | + r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA); |
---|
| 3002 | + if (unlikely(r)) |
---|
| 3003 | + dm_integrity_io_error(ic, "writing superblock", r); |
---|
| 3004 | +#endif |
---|
2462 | 3005 | } |
---|
2463 | 3006 | |
---|
2464 | 3007 | BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress)); |
---|
.. | .. |
---|
2469 | 3012 | static void dm_integrity_resume(struct dm_target *ti) |
---|
2470 | 3013 | { |
---|
2471 | 3014 | struct dm_integrity_c *ic = (struct dm_integrity_c *)ti->private; |
---|
| 3015 | + __u64 old_provided_data_sectors = le64_to_cpu(ic->sb->provided_data_sectors); |
---|
| 3016 | + int r; |
---|
2472 | 3017 | |
---|
2473 | | - replay_journal(ic); |
---|
| 3018 | + DEBUG_print("resume\n"); |
---|
2474 | 3019 | |
---|
2475 | | - if (ic->recalc_wq && ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) { |
---|
| 3020 | + ic->wrote_to_journal = false; |
---|
| 3021 | + |
---|
| 3022 | + if (ic->provided_data_sectors != old_provided_data_sectors) { |
---|
| 3023 | + if (ic->provided_data_sectors > old_provided_data_sectors && |
---|
| 3024 | + ic->mode == 'B' && |
---|
| 3025 | + ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit) { |
---|
| 3026 | + rw_journal_sectors(ic, REQ_OP_READ, 0, 0, |
---|
| 3027 | + ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); |
---|
| 3028 | + block_bitmap_op(ic, ic->journal, old_provided_data_sectors, |
---|
| 3029 | + ic->provided_data_sectors - old_provided_data_sectors, BITMAP_OP_SET); |
---|
| 3030 | + rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, 0, |
---|
| 3031 | + ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); |
---|
| 3032 | + } |
---|
| 3033 | + |
---|
| 3034 | + ic->sb->provided_data_sectors = cpu_to_le64(ic->provided_data_sectors); |
---|
| 3035 | + r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA); |
---|
| 3036 | + if (unlikely(r)) |
---|
| 3037 | + dm_integrity_io_error(ic, "writing superblock", r); |
---|
| 3038 | + } |
---|
| 3039 | + |
---|
| 3040 | + if (ic->sb->flags & cpu_to_le32(SB_FLAG_DIRTY_BITMAP)) { |
---|
| 3041 | + DEBUG_print("resume dirty_bitmap\n"); |
---|
| 3042 | + rw_journal_sectors(ic, REQ_OP_READ, 0, 0, |
---|
| 3043 | + ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); |
---|
| 3044 | + if (ic->mode == 'B') { |
---|
| 3045 | + if (ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit) { |
---|
| 3046 | + block_bitmap_copy(ic, ic->recalc_bitmap, ic->journal); |
---|
| 3047 | + block_bitmap_copy(ic, ic->may_write_bitmap, ic->journal); |
---|
| 3048 | + if (!block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, |
---|
| 3049 | + BITMAP_OP_TEST_ALL_CLEAR)) { |
---|
| 3050 | + ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING); |
---|
| 3051 | + ic->sb->recalc_sector = cpu_to_le64(0); |
---|
| 3052 | + } |
---|
| 3053 | + } else { |
---|
| 3054 | + DEBUG_print("non-matching blocks_per_bitmap_bit: %u, %u\n", |
---|
| 3055 | + ic->sb->log2_blocks_per_bitmap_bit, ic->log2_blocks_per_bitmap_bit); |
---|
| 3056 | + ic->sb->log2_blocks_per_bitmap_bit = ic->log2_blocks_per_bitmap_bit; |
---|
| 3057 | + block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_SET); |
---|
| 3058 | + block_bitmap_op(ic, ic->may_write_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_SET); |
---|
| 3059 | + block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_SET); |
---|
| 3060 | + rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, 0, |
---|
| 3061 | + ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); |
---|
| 3062 | + ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING); |
---|
| 3063 | + ic->sb->recalc_sector = cpu_to_le64(0); |
---|
| 3064 | + } |
---|
| 3065 | + } else { |
---|
| 3066 | + if (!(ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit && |
---|
| 3067 | + block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_TEST_ALL_CLEAR))) { |
---|
| 3068 | + ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING); |
---|
| 3069 | + ic->sb->recalc_sector = cpu_to_le64(0); |
---|
| 3070 | + } |
---|
| 3071 | + init_journal(ic, 0, ic->journal_sections, 0); |
---|
| 3072 | + replay_journal(ic); |
---|
| 3073 | + ic->sb->flags &= ~cpu_to_le32(SB_FLAG_DIRTY_BITMAP); |
---|
| 3074 | + } |
---|
| 3075 | + r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA); |
---|
| 3076 | + if (unlikely(r)) |
---|
| 3077 | + dm_integrity_io_error(ic, "writing superblock", r); |
---|
| 3078 | + } else { |
---|
| 3079 | + replay_journal(ic); |
---|
| 3080 | + if (ic->mode == 'B') { |
---|
| 3081 | + ic->sb->flags |= cpu_to_le32(SB_FLAG_DIRTY_BITMAP); |
---|
| 3082 | + ic->sb->log2_blocks_per_bitmap_bit = ic->log2_blocks_per_bitmap_bit; |
---|
| 3083 | + r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA); |
---|
| 3084 | + if (unlikely(r)) |
---|
| 3085 | + dm_integrity_io_error(ic, "writing superblock", r); |
---|
| 3086 | + |
---|
| 3087 | + block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR); |
---|
| 3088 | + block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR); |
---|
| 3089 | + block_bitmap_op(ic, ic->may_write_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR); |
---|
| 3090 | + if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) && |
---|
| 3091 | + le64_to_cpu(ic->sb->recalc_sector) < ic->provided_data_sectors) { |
---|
| 3092 | + block_bitmap_op(ic, ic->journal, le64_to_cpu(ic->sb->recalc_sector), |
---|
| 3093 | + ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET); |
---|
| 3094 | + block_bitmap_op(ic, ic->recalc_bitmap, le64_to_cpu(ic->sb->recalc_sector), |
---|
| 3095 | + ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET); |
---|
| 3096 | + block_bitmap_op(ic, ic->may_write_bitmap, le64_to_cpu(ic->sb->recalc_sector), |
---|
| 3097 | + ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET); |
---|
| 3098 | + } |
---|
| 3099 | + rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, 0, |
---|
| 3100 | + ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); |
---|
| 3101 | + } |
---|
| 3102 | + } |
---|
| 3103 | + |
---|
| 3104 | + DEBUG_print("testing recalc: %x\n", ic->sb->flags); |
---|
| 3105 | + if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) { |
---|
2476 | 3106 | __u64 recalc_pos = le64_to_cpu(ic->sb->recalc_sector); |
---|
| 3107 | + DEBUG_print("recalc pos: %llx / %llx\n", recalc_pos, ic->provided_data_sectors); |
---|
2477 | 3108 | if (recalc_pos < ic->provided_data_sectors) { |
---|
2478 | 3109 | queue_work(ic->recalc_wq, &ic->recalc_work); |
---|
2479 | 3110 | } else if (recalc_pos > ic->provided_data_sectors) { |
---|
.. | .. |
---|
2481 | 3112 | recalc_write_super(ic); |
---|
2482 | 3113 | } |
---|
2483 | 3114 | } |
---|
| 3115 | + |
---|
| 3116 | + ic->reboot_notifier.notifier_call = dm_integrity_reboot; |
---|
| 3117 | + ic->reboot_notifier.next = NULL; |
---|
| 3118 | + ic->reboot_notifier.priority = INT_MAX - 1; /* be notified after md and before hardware drivers */ |
---|
| 3119 | + WARN_ON(register_reboot_notifier(&ic->reboot_notifier)); |
---|
| 3120 | + |
---|
| 3121 | +#if 0 |
---|
| 3122 | + /* set to 1 to stress test synchronous mode */ |
---|
| 3123 | + dm_integrity_enter_synchronous_mode(ic); |
---|
| 3124 | +#endif |
---|
2484 | 3125 | } |
---|
2485 | 3126 | |
---|
2486 | 3127 | static void dm_integrity_status(struct dm_target *ti, status_type_t type, |
---|
.. | .. |
---|
2494 | 3135 | case STATUSTYPE_INFO: |
---|
2495 | 3136 | DMEMIT("%llu %llu", |
---|
2496 | 3137 | (unsigned long long)atomic64_read(&ic->number_of_mismatches), |
---|
2497 | | - (unsigned long long)ic->provided_data_sectors); |
---|
| 3138 | + ic->provided_data_sectors); |
---|
2498 | 3139 | if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) |
---|
2499 | | - DMEMIT(" %llu", (unsigned long long)le64_to_cpu(ic->sb->recalc_sector)); |
---|
| 3140 | + DMEMIT(" %llu", le64_to_cpu(ic->sb->recalc_sector)); |
---|
2500 | 3141 | else |
---|
2501 | 3142 | DMEMIT(" -"); |
---|
2502 | 3143 | break; |
---|
.. | .. |
---|
2505 | 3146 | __u64 watermark_percentage = (__u64)(ic->journal_entries - ic->free_sectors_threshold) * 100; |
---|
2506 | 3147 | watermark_percentage += ic->journal_entries / 2; |
---|
2507 | 3148 | do_div(watermark_percentage, ic->journal_entries); |
---|
2508 | | - arg_count = 5; |
---|
| 3149 | + arg_count = 3; |
---|
2509 | 3150 | arg_count += !!ic->meta_dev; |
---|
2510 | 3151 | arg_count += ic->sectors_per_block != 1; |
---|
2511 | 3152 | arg_count += !!(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)); |
---|
| 3153 | + arg_count += ic->discard; |
---|
| 3154 | + arg_count += ic->mode == 'J'; |
---|
| 3155 | + arg_count += ic->mode == 'J'; |
---|
| 3156 | + arg_count += ic->mode == 'B'; |
---|
| 3157 | + arg_count += ic->mode == 'B'; |
---|
2512 | 3158 | arg_count += !!ic->internal_hash_alg.alg_string; |
---|
2513 | 3159 | arg_count += !!ic->journal_crypt_alg.alg_string; |
---|
2514 | 3160 | arg_count += !!ic->journal_mac_alg.alg_string; |
---|
| 3161 | + arg_count += (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0; |
---|
2515 | 3162 | arg_count += ic->legacy_recalculate; |
---|
2516 | | - DMEMIT("%s %llu %u %c %u", ic->dev->name, (unsigned long long)ic->start, |
---|
| 3163 | + DMEMIT("%s %llu %u %c %u", ic->dev->name, ic->start, |
---|
2517 | 3164 | ic->tag_size, ic->mode, arg_count); |
---|
2518 | 3165 | if (ic->meta_dev) |
---|
2519 | 3166 | DMEMIT(" meta_device:%s", ic->meta_dev->name); |
---|
.. | .. |
---|
2521 | 3168 | DMEMIT(" block_size:%u", ic->sectors_per_block << SECTOR_SHIFT); |
---|
2522 | 3169 | if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) |
---|
2523 | 3170 | DMEMIT(" recalculate"); |
---|
| 3171 | + if (ic->discard) |
---|
| 3172 | + DMEMIT(" allow_discards"); |
---|
2524 | 3173 | DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS); |
---|
2525 | 3174 | DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors); |
---|
2526 | 3175 | DMEMIT(" buffer_sectors:%u", 1U << ic->log2_buffer_sectors); |
---|
2527 | | - DMEMIT(" journal_watermark:%u", (unsigned)watermark_percentage); |
---|
2528 | | - DMEMIT(" commit_time:%u", ic->autocommit_msec); |
---|
| 3176 | + if (ic->mode == 'J') { |
---|
| 3177 | + DMEMIT(" journal_watermark:%u", (unsigned)watermark_percentage); |
---|
| 3178 | + DMEMIT(" commit_time:%u", ic->autocommit_msec); |
---|
| 3179 | + } |
---|
| 3180 | + if (ic->mode == 'B') { |
---|
| 3181 | + DMEMIT(" sectors_per_bit:%llu", (sector_t)ic->sectors_per_block << ic->log2_blocks_per_bitmap_bit); |
---|
| 3182 | + DMEMIT(" bitmap_flush_interval:%u", jiffies_to_msecs(ic->bitmap_flush_interval)); |
---|
| 3183 | + } |
---|
| 3184 | + if ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0) |
---|
| 3185 | + DMEMIT(" fix_padding"); |
---|
2529 | 3186 | if (ic->legacy_recalculate) |
---|
2530 | 3187 | DMEMIT(" legacy_recalculate"); |
---|
2531 | 3188 | |
---|
.. | .. |
---|
2596 | 3253 | if (!ic->meta_dev) { |
---|
2597 | 3254 | sector_t last_sector, last_area, last_offset; |
---|
2598 | 3255 | |
---|
2599 | | - ic->metadata_run = roundup((__u64)ic->tag_size << (ic->sb->log2_interleave_sectors - ic->sb->log2_sectors_per_block), |
---|
2600 | | - (__u64)(1 << SECTOR_SHIFT << METADATA_PADDING_SECTORS)) >> SECTOR_SHIFT; |
---|
| 3256 | + /* we have to maintain excessive padding for compatibility with existing volumes */ |
---|
| 3257 | + __u64 metadata_run_padding = |
---|
| 3258 | + ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING) ? |
---|
| 3259 | + (__u64)(METADATA_PADDING_SECTORS << SECTOR_SHIFT) : |
---|
| 3260 | + (__u64)(1 << SECTOR_SHIFT << METADATA_PADDING_SECTORS); |
---|
| 3261 | + |
---|
| 3262 | + ic->metadata_run = round_up((__u64)ic->tag_size << (ic->sb->log2_interleave_sectors - ic->sb->log2_sectors_per_block), |
---|
| 3263 | + metadata_run_padding) >> SECTOR_SHIFT; |
---|
2601 | 3264 | if (!(ic->metadata_run & (ic->metadata_run - 1))) |
---|
2602 | 3265 | ic->log2_metadata_run = __ffs(ic->metadata_run); |
---|
2603 | 3266 | else |
---|
.. | .. |
---|
2622 | 3285 | return 0; |
---|
2623 | 3286 | } |
---|
2624 | 3287 | |
---|
| 3288 | +static void get_provided_data_sectors(struct dm_integrity_c *ic) |
---|
| 3289 | +{ |
---|
| 3290 | + if (!ic->meta_dev) { |
---|
| 3291 | + int test_bit; |
---|
| 3292 | + ic->provided_data_sectors = 0; |
---|
| 3293 | + for (test_bit = fls64(ic->meta_device_sectors) - 1; test_bit >= 3; test_bit--) { |
---|
| 3294 | + __u64 prev_data_sectors = ic->provided_data_sectors; |
---|
| 3295 | + |
---|
| 3296 | + ic->provided_data_sectors |= (sector_t)1 << test_bit; |
---|
| 3297 | + if (calculate_device_limits(ic)) |
---|
| 3298 | + ic->provided_data_sectors = prev_data_sectors; |
---|
| 3299 | + } |
---|
| 3300 | + } else { |
---|
| 3301 | + ic->provided_data_sectors = ic->data_device_sectors; |
---|
| 3302 | + ic->provided_data_sectors &= ~(sector_t)(ic->sectors_per_block - 1); |
---|
| 3303 | + } |
---|
| 3304 | +} |
---|
| 3305 | + |
---|
2625 | 3306 | static int initialize_superblock(struct dm_integrity_c *ic, unsigned journal_sectors, unsigned interleave_sectors) |
---|
2626 | 3307 | { |
---|
2627 | 3308 | unsigned journal_sections; |
---|
.. | .. |
---|
2640 | 3321 | journal_sections = 1; |
---|
2641 | 3322 | |
---|
2642 | 3323 | if (!ic->meta_dev) { |
---|
| 3324 | + if (ic->fix_padding) |
---|
| 3325 | + ic->sb->flags |= cpu_to_le32(SB_FLAG_FIXED_PADDING); |
---|
2643 | 3326 | ic->sb->journal_sections = cpu_to_le32(journal_sections); |
---|
2644 | 3327 | if (!interleave_sectors) |
---|
2645 | 3328 | interleave_sectors = DEFAULT_INTERLEAVE_SECTORS; |
---|
.. | .. |
---|
2647 | 3330 | ic->sb->log2_interleave_sectors = max((__u8)MIN_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors); |
---|
2648 | 3331 | ic->sb->log2_interleave_sectors = min((__u8)MAX_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors); |
---|
2649 | 3332 | |
---|
2650 | | - ic->provided_data_sectors = 0; |
---|
2651 | | - for (test_bit = fls64(ic->meta_device_sectors) - 1; test_bit >= 3; test_bit--) { |
---|
2652 | | - __u64 prev_data_sectors = ic->provided_data_sectors; |
---|
2653 | | - |
---|
2654 | | - ic->provided_data_sectors |= (sector_t)1 << test_bit; |
---|
2655 | | - if (calculate_device_limits(ic)) |
---|
2656 | | - ic->provided_data_sectors = prev_data_sectors; |
---|
2657 | | - } |
---|
| 3333 | + get_provided_data_sectors(ic); |
---|
2658 | 3334 | if (!ic->provided_data_sectors) |
---|
2659 | 3335 | return -EINVAL; |
---|
2660 | 3336 | } else { |
---|
2661 | 3337 | ic->sb->log2_interleave_sectors = 0; |
---|
2662 | | - ic->provided_data_sectors = ic->data_device_sectors; |
---|
2663 | | - ic->provided_data_sectors &= ~(sector_t)(ic->sectors_per_block - 1); |
---|
| 3338 | + |
---|
| 3339 | + get_provided_data_sectors(ic); |
---|
| 3340 | + if (!ic->provided_data_sectors) |
---|
| 3341 | + return -EINVAL; |
---|
2664 | 3342 | |
---|
2665 | 3343 | try_smaller_buffer: |
---|
2666 | 3344 | ic->sb->journal_sections = cpu_to_le32(0); |
---|
.. | .. |
---|
2705 | 3383 | blk_queue_max_integrity_segments(disk->queue, UINT_MAX); |
---|
2706 | 3384 | } |
---|
2707 | 3385 | |
---|
2708 | | -static void dm_integrity_free_page_list(struct dm_integrity_c *ic, struct page_list *pl) |
---|
| 3386 | +static void dm_integrity_free_page_list(struct page_list *pl) |
---|
2709 | 3387 | { |
---|
2710 | 3388 | unsigned i; |
---|
2711 | 3389 | |
---|
2712 | 3390 | if (!pl) |
---|
2713 | 3391 | return; |
---|
2714 | | - for (i = 0; i < ic->journal_pages; i++) |
---|
2715 | | - if (pl[i].page) |
---|
2716 | | - __free_page(pl[i].page); |
---|
| 3392 | + for (i = 0; pl[i].page; i++) |
---|
| 3393 | + __free_page(pl[i].page); |
---|
2717 | 3394 | kvfree(pl); |
---|
2718 | 3395 | } |
---|
2719 | 3396 | |
---|
2720 | | -static struct page_list *dm_integrity_alloc_page_list(struct dm_integrity_c *ic) |
---|
| 3397 | +static struct page_list *dm_integrity_alloc_page_list(unsigned n_pages) |
---|
2721 | 3398 | { |
---|
2722 | | - size_t page_list_desc_size = ic->journal_pages * sizeof(struct page_list); |
---|
2723 | 3399 | struct page_list *pl; |
---|
2724 | 3400 | unsigned i; |
---|
2725 | 3401 | |
---|
2726 | | - pl = kvmalloc(page_list_desc_size, GFP_KERNEL | __GFP_ZERO); |
---|
| 3402 | + pl = kvmalloc_array(n_pages + 1, sizeof(struct page_list), GFP_KERNEL | __GFP_ZERO); |
---|
2727 | 3403 | if (!pl) |
---|
2728 | 3404 | return NULL; |
---|
2729 | 3405 | |
---|
2730 | | - for (i = 0; i < ic->journal_pages; i++) { |
---|
| 3406 | + for (i = 0; i < n_pages; i++) { |
---|
2731 | 3407 | pl[i].page = alloc_page(GFP_KERNEL); |
---|
2732 | 3408 | if (!pl[i].page) { |
---|
2733 | | - dm_integrity_free_page_list(ic, pl); |
---|
| 3409 | + dm_integrity_free_page_list(pl); |
---|
2734 | 3410 | return NULL; |
---|
2735 | 3411 | } |
---|
2736 | 3412 | if (i) |
---|
2737 | 3413 | pl[i - 1].next = &pl[i]; |
---|
2738 | 3414 | } |
---|
| 3415 | + pl[i].page = NULL; |
---|
| 3416 | + pl[i].next = NULL; |
---|
2739 | 3417 | |
---|
2740 | 3418 | return pl; |
---|
2741 | 3419 | } |
---|
.. | .. |
---|
2748 | 3426 | kvfree(sl); |
---|
2749 | 3427 | } |
---|
2750 | 3428 | |
---|
2751 | | -static struct scatterlist **dm_integrity_alloc_journal_scatterlist(struct dm_integrity_c *ic, struct page_list *pl) |
---|
| 3429 | +static struct scatterlist **dm_integrity_alloc_journal_scatterlist(struct dm_integrity_c *ic, |
---|
| 3430 | + struct page_list *pl) |
---|
2752 | 3431 | { |
---|
2753 | 3432 | struct scatterlist **sl; |
---|
2754 | 3433 | unsigned i; |
---|
.. | .. |
---|
2767 | 3446 | unsigned idx; |
---|
2768 | 3447 | |
---|
2769 | 3448 | page_list_location(ic, i, 0, &start_index, &start_offset); |
---|
2770 | | - page_list_location(ic, i, ic->journal_section_sectors - 1, &end_index, &end_offset); |
---|
| 3449 | + page_list_location(ic, i, ic->journal_section_sectors - 1, |
---|
| 3450 | + &end_index, &end_offset); |
---|
2771 | 3451 | |
---|
2772 | 3452 | n_pages = (end_index - start_index + 1); |
---|
2773 | 3453 | |
---|
.. | .. |
---|
2797 | 3477 | |
---|
2798 | 3478 | static void free_alg(struct alg_spec *a) |
---|
2799 | 3479 | { |
---|
2800 | | - kzfree(a->alg_string); |
---|
2801 | | - kzfree(a->key); |
---|
| 3480 | + kfree_sensitive(a->alg_string); |
---|
| 3481 | + kfree_sensitive(a->key); |
---|
2802 | 3482 | memset(a, 0, sizeof *a); |
---|
2803 | 3483 | } |
---|
2804 | 3484 | |
---|
.. | .. |
---|
2842 | 3522 | int r; |
---|
2843 | 3523 | |
---|
2844 | 3524 | if (a->alg_string) { |
---|
2845 | | - *hash = crypto_alloc_shash(a->alg_string, 0, CRYPTO_ALG_ASYNC); |
---|
| 3525 | + *hash = crypto_alloc_shash(a->alg_string, 0, CRYPTO_ALG_ALLOCATES_MEMORY); |
---|
2846 | 3526 | if (IS_ERR(*hash)) { |
---|
2847 | 3527 | *error = error_alg; |
---|
2848 | 3528 | r = PTR_ERR(*hash); |
---|
.. | .. |
---|
2881 | 3561 | journal_pages = roundup((__u64)ic->journal_sections * ic->journal_section_sectors, |
---|
2882 | 3562 | PAGE_SIZE >> SECTOR_SHIFT) >> (PAGE_SHIFT - SECTOR_SHIFT); |
---|
2883 | 3563 | journal_desc_size = journal_pages * sizeof(struct page_list); |
---|
2884 | | - if (journal_pages >= totalram_pages - totalhigh_pages || journal_desc_size > ULONG_MAX) { |
---|
| 3564 | + if (journal_pages >= totalram_pages() - totalhigh_pages() || journal_desc_size > ULONG_MAX) { |
---|
2885 | 3565 | *error = "Journal doesn't fit into memory"; |
---|
2886 | 3566 | r = -ENOMEM; |
---|
2887 | 3567 | goto bad; |
---|
2888 | 3568 | } |
---|
2889 | 3569 | ic->journal_pages = journal_pages; |
---|
2890 | 3570 | |
---|
2891 | | - ic->journal = dm_integrity_alloc_page_list(ic); |
---|
| 3571 | + ic->journal = dm_integrity_alloc_page_list(ic->journal_pages); |
---|
2892 | 3572 | if (!ic->journal) { |
---|
2893 | 3573 | *error = "Could not allocate memory for journal"; |
---|
2894 | 3574 | r = -ENOMEM; |
---|
.. | .. |
---|
2899 | 3579 | struct journal_completion comp; |
---|
2900 | 3580 | |
---|
2901 | 3581 | comp.ic = ic; |
---|
2902 | | - ic->journal_crypt = crypto_alloc_skcipher(ic->journal_crypt_alg.alg_string, 0, 0); |
---|
| 3582 | + ic->journal_crypt = crypto_alloc_skcipher(ic->journal_crypt_alg.alg_string, 0, CRYPTO_ALG_ALLOCATES_MEMORY); |
---|
2903 | 3583 | if (IS_ERR(ic->journal_crypt)) { |
---|
2904 | 3584 | *error = "Invalid journal cipher"; |
---|
2905 | 3585 | r = PTR_ERR(ic->journal_crypt); |
---|
.. | .. |
---|
2920 | 3600 | DEBUG_print("cipher %s, block size %u iv size %u\n", |
---|
2921 | 3601 | ic->journal_crypt_alg.alg_string, blocksize, ivsize); |
---|
2922 | 3602 | |
---|
2923 | | - ic->journal_io = dm_integrity_alloc_page_list(ic); |
---|
| 3603 | + ic->journal_io = dm_integrity_alloc_page_list(ic->journal_pages); |
---|
2924 | 3604 | if (!ic->journal_io) { |
---|
2925 | 3605 | *error = "Could not allocate memory for journal io"; |
---|
2926 | 3606 | r = -ENOMEM; |
---|
.. | .. |
---|
2937 | 3617 | goto bad; |
---|
2938 | 3618 | } |
---|
2939 | 3619 | |
---|
2940 | | - crypt_iv = kmalloc(ivsize, GFP_KERNEL); |
---|
| 3620 | + crypt_iv = kzalloc(ivsize, GFP_KERNEL); |
---|
2941 | 3621 | if (!crypt_iv) { |
---|
2942 | 3622 | *error = "Could not allocate iv"; |
---|
2943 | 3623 | r = -ENOMEM; |
---|
2944 | 3624 | goto bad; |
---|
2945 | 3625 | } |
---|
2946 | 3626 | |
---|
2947 | | - ic->journal_xor = dm_integrity_alloc_page_list(ic); |
---|
| 3627 | + ic->journal_xor = dm_integrity_alloc_page_list(ic->journal_pages); |
---|
2948 | 3628 | if (!ic->journal_xor) { |
---|
2949 | 3629 | *error = "Could not allocate memory for journal xor"; |
---|
2950 | 3630 | r = -ENOMEM; |
---|
.. | .. |
---|
2966 | 3646 | sg_set_buf(&sg[i], va, PAGE_SIZE); |
---|
2967 | 3647 | } |
---|
2968 | 3648 | sg_set_buf(&sg[i], &ic->commit_ids, sizeof ic->commit_ids); |
---|
2969 | | - memset(crypt_iv, 0x00, ivsize); |
---|
2970 | 3649 | |
---|
2971 | | - skcipher_request_set_crypt(req, sg, sg, PAGE_SIZE * ic->journal_pages + sizeof ic->commit_ids, crypt_iv); |
---|
| 3650 | + skcipher_request_set_crypt(req, sg, sg, |
---|
| 3651 | + PAGE_SIZE * ic->journal_pages + sizeof ic->commit_ids, crypt_iv); |
---|
2972 | 3652 | init_completion(&comp.comp); |
---|
2973 | 3653 | comp.in_flight = (atomic_t)ATOMIC_INIT(1); |
---|
2974 | 3654 | if (do_crypt(true, req, &comp)) |
---|
.. | .. |
---|
3109 | 3789 | * device |
---|
3110 | 3790 | * offset from the start of the device |
---|
3111 | 3791 | * tag size |
---|
3112 | | - * D - direct writes, J - journal writes, R - recovery mode |
---|
| 3792 | + * D - direct writes, J - journal writes, B - bitmap mode, R - recovery mode |
---|
3113 | 3793 | * number of optional arguments |
---|
3114 | 3794 | * optional arguments: |
---|
3115 | 3795 | * journal_sectors |
---|
.. | .. |
---|
3117 | 3797 | * buffer_sectors |
---|
3118 | 3798 | * journal_watermark |
---|
3119 | 3799 | * commit_time |
---|
| 3800 | + * meta_device |
---|
| 3801 | + * block_size |
---|
| 3802 | + * sectors_per_bit |
---|
| 3803 | + * bitmap_flush_interval |
---|
3120 | 3804 | * internal_hash |
---|
3121 | 3805 | * journal_crypt |
---|
3122 | 3806 | * journal_mac |
---|
3123 | | - * block_size |
---|
| 3807 | + * recalculate |
---|
3124 | 3808 | */ |
---|
3125 | 3809 | static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) |
---|
3126 | 3810 | { |
---|
.. | .. |
---|
3130 | 3814 | unsigned extra_args; |
---|
3131 | 3815 | struct dm_arg_set as; |
---|
3132 | 3816 | static const struct dm_arg _args[] = { |
---|
3133 | | - {0, 12, "Invalid number of feature args"}, |
---|
| 3817 | + {0, 16, "Invalid number of feature args"}, |
---|
3134 | 3818 | }; |
---|
3135 | 3819 | unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec; |
---|
3136 | | - bool recalculate; |
---|
3137 | 3820 | bool should_write_sb; |
---|
3138 | 3821 | __u64 threshold; |
---|
3139 | 3822 | unsigned long long start; |
---|
| 3823 | + __s8 log2_sectors_per_bitmap_bit = -1; |
---|
| 3824 | + __s8 log2_blocks_per_bitmap_bit; |
---|
| 3825 | + __u64 bits_in_journal; |
---|
| 3826 | + __u64 n_bitmap_bits; |
---|
3140 | 3827 | |
---|
3141 | 3828 | #define DIRECT_ARGUMENTS 4 |
---|
3142 | 3829 | |
---|
.. | .. |
---|
3161 | 3848 | init_waitqueue_head(&ic->copy_to_journal_wait); |
---|
3162 | 3849 | init_completion(&ic->crypto_backoff); |
---|
3163 | 3850 | atomic64_set(&ic->number_of_mismatches, 0); |
---|
| 3851 | + ic->bitmap_flush_interval = BITMAP_FLUSH_INTERVAL; |
---|
3164 | 3852 | |
---|
3165 | 3853 | r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &ic->dev); |
---|
3166 | 3854 | if (r) { |
---|
.. | .. |
---|
3183 | 3871 | } |
---|
3184 | 3872 | } |
---|
3185 | 3873 | |
---|
3186 | | - if (!strcmp(argv[3], "J") || !strcmp(argv[3], "D") || !strcmp(argv[3], "R")) |
---|
| 3874 | + if (!strcmp(argv[3], "J") || !strcmp(argv[3], "B") || |
---|
| 3875 | + !strcmp(argv[3], "D") || !strcmp(argv[3], "R")) { |
---|
3187 | 3876 | ic->mode = argv[3][0]; |
---|
3188 | | - else { |
---|
3189 | | - ti->error = "Invalid mode (expecting J, D, R)"; |
---|
| 3877 | + } else { |
---|
| 3878 | + ti->error = "Invalid mode (expecting J, B, D, R)"; |
---|
3190 | 3879 | r = -EINVAL; |
---|
3191 | 3880 | goto bad; |
---|
3192 | 3881 | } |
---|
.. | .. |
---|
3196 | 3885 | buffer_sectors = DEFAULT_BUFFER_SECTORS; |
---|
3197 | 3886 | journal_watermark = DEFAULT_JOURNAL_WATERMARK; |
---|
3198 | 3887 | sync_msec = DEFAULT_SYNC_MSEC; |
---|
3199 | | - recalculate = false; |
---|
3200 | 3888 | ic->sectors_per_block = 1; |
---|
3201 | 3889 | |
---|
3202 | 3890 | as.argc = argc - DIRECT_ARGUMENTS; |
---|
.. | .. |
---|
3208 | 3896 | while (extra_args--) { |
---|
3209 | 3897 | const char *opt_string; |
---|
3210 | 3898 | unsigned val; |
---|
| 3899 | + unsigned long long llval; |
---|
3211 | 3900 | opt_string = dm_shift_arg(&as); |
---|
3212 | 3901 | if (!opt_string) { |
---|
3213 | 3902 | r = -EINVAL; |
---|
.. | .. |
---|
3229 | 3918 | dm_put_device(ti, ic->meta_dev); |
---|
3230 | 3919 | ic->meta_dev = NULL; |
---|
3231 | 3920 | } |
---|
3232 | | - r = dm_get_device(ti, strchr(opt_string, ':') + 1, dm_table_get_mode(ti->table), &ic->meta_dev); |
---|
| 3921 | + r = dm_get_device(ti, strchr(opt_string, ':') + 1, |
---|
| 3922 | + dm_table_get_mode(ti->table), &ic->meta_dev); |
---|
3233 | 3923 | if (r) { |
---|
3234 | 3924 | ti->error = "Device lookup failed"; |
---|
3235 | 3925 | goto bad; |
---|
.. | .. |
---|
3243 | 3933 | goto bad; |
---|
3244 | 3934 | } |
---|
3245 | 3935 | ic->sectors_per_block = val >> SECTOR_SHIFT; |
---|
| 3936 | + } else if (sscanf(opt_string, "sectors_per_bit:%llu%c", &llval, &dummy) == 1) { |
---|
| 3937 | + log2_sectors_per_bitmap_bit = !llval ? 0 : __ilog2_u64(llval); |
---|
| 3938 | + } else if (sscanf(opt_string, "bitmap_flush_interval:%u%c", &val, &dummy) == 1) { |
---|
| 3939 | + if (val >= (uint64_t)UINT_MAX * 1000 / HZ) { |
---|
| 3940 | + r = -EINVAL; |
---|
| 3941 | + ti->error = "Invalid bitmap_flush_interval argument"; |
---|
| 3942 | + goto bad; |
---|
| 3943 | + } |
---|
| 3944 | + ic->bitmap_flush_interval = msecs_to_jiffies(val); |
---|
3246 | 3945 | } else if (!strncmp(opt_string, "internal_hash:", strlen("internal_hash:"))) { |
---|
3247 | 3946 | r = get_alg_and_key(opt_string, &ic->internal_hash_alg, &ti->error, |
---|
3248 | 3947 | "Invalid internal_hash argument"); |
---|
.. | .. |
---|
3259 | 3958 | if (r) |
---|
3260 | 3959 | goto bad; |
---|
3261 | 3960 | } else if (!strcmp(opt_string, "recalculate")) { |
---|
3262 | | - recalculate = true; |
---|
| 3961 | + ic->recalculate_flag = true; |
---|
| 3962 | + } else if (!strcmp(opt_string, "allow_discards")) { |
---|
| 3963 | + ic->discard = true; |
---|
| 3964 | + } else if (!strcmp(opt_string, "fix_padding")) { |
---|
| 3965 | + ic->fix_padding = true; |
---|
3263 | 3966 | } else if (!strcmp(opt_string, "legacy_recalculate")) { |
---|
3264 | 3967 | ic->legacy_recalculate = true; |
---|
3265 | 3968 | } else { |
---|
.. | .. |
---|
3277 | 3980 | |
---|
3278 | 3981 | if (!journal_sectors) { |
---|
3279 | 3982 | journal_sectors = min((sector_t)DEFAULT_MAX_JOURNAL_SECTORS, |
---|
3280 | | - ic->data_device_sectors >> DEFAULT_JOURNAL_SIZE_FACTOR); |
---|
| 3983 | + ic->data_device_sectors >> DEFAULT_JOURNAL_SIZE_FACTOR); |
---|
3281 | 3984 | } |
---|
3282 | 3985 | |
---|
3283 | 3986 | if (!buffer_sectors) |
---|
.. | .. |
---|
3311 | 4014 | ic->log2_tag_size = __ffs(ic->tag_size); |
---|
3312 | 4015 | else |
---|
3313 | 4016 | ic->log2_tag_size = -1; |
---|
| 4017 | + |
---|
| 4018 | + if (ic->mode == 'B' && !ic->internal_hash) { |
---|
| 4019 | + r = -EINVAL; |
---|
| 4020 | + ti->error = "Bitmap mode can be only used with internal hash"; |
---|
| 4021 | + goto bad; |
---|
| 4022 | + } |
---|
| 4023 | + |
---|
| 4024 | + if (ic->discard && !ic->internal_hash) { |
---|
| 4025 | + r = -EINVAL; |
---|
| 4026 | + ti->error = "Discard can be only used with internal hash"; |
---|
| 4027 | + goto bad; |
---|
| 4028 | + } |
---|
3314 | 4029 | |
---|
3315 | 4030 | ic->autocommit_jiffies = msecs_to_jiffies(sync_msec); |
---|
3316 | 4031 | ic->autocommit_msec = sync_msec; |
---|
.. | .. |
---|
3365 | 4080 | } |
---|
3366 | 4081 | INIT_WORK(&ic->commit_work, integrity_commit); |
---|
3367 | 4082 | |
---|
3368 | | - if (ic->mode == 'J') { |
---|
| 4083 | + if (ic->mode == 'J' || ic->mode == 'B') { |
---|
3369 | 4084 | ic->writer_wq = alloc_workqueue("dm-integrity-writer", WQ_MEM_RECLAIM, 1); |
---|
3370 | 4085 | if (!ic->writer_wq) { |
---|
3371 | 4086 | ti->error = "Cannot allocate workqueue"; |
---|
.. | .. |
---|
3406 | 4121 | should_write_sb = true; |
---|
3407 | 4122 | } |
---|
3408 | 4123 | |
---|
3409 | | - if (!ic->sb->version || ic->sb->version > SB_VERSION_2) { |
---|
| 4124 | + if (!ic->sb->version || ic->sb->version > SB_VERSION_4) { |
---|
3410 | 4125 | r = -EINVAL; |
---|
3411 | 4126 | ti->error = "Unknown version"; |
---|
3412 | 4127 | goto bad; |
---|
.. | .. |
---|
3441 | 4156 | goto bad; |
---|
3442 | 4157 | } |
---|
3443 | 4158 | } |
---|
3444 | | - ic->provided_data_sectors = le64_to_cpu(ic->sb->provided_data_sectors); |
---|
3445 | | - if (ic->provided_data_sectors != le64_to_cpu(ic->sb->provided_data_sectors)) { |
---|
3446 | | - /* test for overflow */ |
---|
3447 | | - r = -EINVAL; |
---|
3448 | | - ti->error = "The superblock has 64-bit device size, but the kernel was compiled with 32-bit sectors"; |
---|
3449 | | - goto bad; |
---|
3450 | | - } |
---|
3451 | 4159 | if (!!(ic->sb->flags & cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC)) != !!ic->journal_mac_alg.alg_string) { |
---|
3452 | 4160 | r = -EINVAL; |
---|
3453 | 4161 | ti->error = "Journal mac mismatch"; |
---|
| 4162 | + goto bad; |
---|
| 4163 | + } |
---|
| 4164 | + |
---|
| 4165 | + get_provided_data_sectors(ic); |
---|
| 4166 | + if (!ic->provided_data_sectors) { |
---|
| 4167 | + r = -EINVAL; |
---|
| 4168 | + ti->error = "The device is too small"; |
---|
3454 | 4169 | goto bad; |
---|
3455 | 4170 | } |
---|
3456 | 4171 | |
---|
.. | .. |
---|
3466 | 4181 | ti->error = "The device is too small"; |
---|
3467 | 4182 | goto bad; |
---|
3468 | 4183 | } |
---|
| 4184 | + |
---|
| 4185 | + if (log2_sectors_per_bitmap_bit < 0) |
---|
| 4186 | + log2_sectors_per_bitmap_bit = __fls(DEFAULT_SECTORS_PER_BITMAP_BIT); |
---|
| 4187 | + if (log2_sectors_per_bitmap_bit < ic->sb->log2_sectors_per_block) |
---|
| 4188 | + log2_sectors_per_bitmap_bit = ic->sb->log2_sectors_per_block; |
---|
| 4189 | + |
---|
| 4190 | + bits_in_journal = ((__u64)ic->journal_section_sectors * ic->journal_sections) << (SECTOR_SHIFT + 3); |
---|
| 4191 | + if (bits_in_journal > UINT_MAX) |
---|
| 4192 | + bits_in_journal = UINT_MAX; |
---|
| 4193 | + while (bits_in_journal < (ic->provided_data_sectors + ((sector_t)1 << log2_sectors_per_bitmap_bit) - 1) >> log2_sectors_per_bitmap_bit) |
---|
| 4194 | + log2_sectors_per_bitmap_bit++; |
---|
| 4195 | + |
---|
| 4196 | + log2_blocks_per_bitmap_bit = log2_sectors_per_bitmap_bit - ic->sb->log2_sectors_per_block; |
---|
| 4197 | + ic->log2_blocks_per_bitmap_bit = log2_blocks_per_bitmap_bit; |
---|
| 4198 | + if (should_write_sb) { |
---|
| 4199 | + ic->sb->log2_blocks_per_bitmap_bit = log2_blocks_per_bitmap_bit; |
---|
| 4200 | + } |
---|
| 4201 | + n_bitmap_bits = ((ic->provided_data_sectors >> ic->sb->log2_sectors_per_block) |
---|
| 4202 | + + (((sector_t)1 << log2_blocks_per_bitmap_bit) - 1)) >> log2_blocks_per_bitmap_bit; |
---|
| 4203 | + ic->n_bitmap_blocks = DIV_ROUND_UP(n_bitmap_bits, BITMAP_BLOCK_SIZE * 8); |
---|
| 4204 | + |
---|
3469 | 4205 | if (!ic->meta_dev) |
---|
3470 | 4206 | ic->log2_buffer_sectors = min(ic->log2_buffer_sectors, (__u8)__ffs(ic->metadata_run)); |
---|
3471 | 4207 | |
---|
.. | .. |
---|
3490 | 4226 | DEBUG_print(" journal_sections %u\n", (unsigned)le32_to_cpu(ic->sb->journal_sections)); |
---|
3491 | 4227 | DEBUG_print(" journal_entries %u\n", ic->journal_entries); |
---|
3492 | 4228 | DEBUG_print(" log2_interleave_sectors %d\n", ic->sb->log2_interleave_sectors); |
---|
3493 | | - DEBUG_print(" data_device_sectors 0x%llx\n", (unsigned long long)ic->data_device_sectors); |
---|
| 4229 | + DEBUG_print(" data_device_sectors 0x%llx\n", i_size_read(ic->dev->bdev->bd_inode) >> SECTOR_SHIFT); |
---|
3494 | 4230 | DEBUG_print(" initial_sectors 0x%x\n", ic->initial_sectors); |
---|
3495 | 4231 | DEBUG_print(" metadata_run 0x%x\n", ic->metadata_run); |
---|
3496 | 4232 | DEBUG_print(" log2_metadata_run %d\n", ic->log2_metadata_run); |
---|
3497 | | - DEBUG_print(" provided_data_sectors 0x%llx (%llu)\n", (unsigned long long)ic->provided_data_sectors, |
---|
3498 | | - (unsigned long long)ic->provided_data_sectors); |
---|
| 4233 | + DEBUG_print(" provided_data_sectors 0x%llx (%llu)\n", ic->provided_data_sectors, ic->provided_data_sectors); |
---|
3499 | 4234 | DEBUG_print(" log2_buffer_sectors %u\n", ic->log2_buffer_sectors); |
---|
| 4235 | + DEBUG_print(" bits_in_journal %llu\n", bits_in_journal); |
---|
3500 | 4236 | |
---|
3501 | | - if (recalculate && !(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))) { |
---|
| 4237 | + if (ic->recalculate_flag && !(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))) { |
---|
3502 | 4238 | ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING); |
---|
3503 | 4239 | ic->sb->recalc_sector = cpu_to_le64(0); |
---|
3504 | 4240 | } |
---|
3505 | 4241 | |
---|
3506 | | - if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) { |
---|
3507 | | - if (!ic->internal_hash) { |
---|
3508 | | - r = -EINVAL; |
---|
3509 | | - ti->error = "Recalculate is only valid with internal hash"; |
---|
3510 | | - goto bad; |
---|
3511 | | - } |
---|
3512 | | - ic->recalc_wq = alloc_workqueue("dm-intergrity-recalc", WQ_MEM_RECLAIM, 1); |
---|
| 4242 | + if (ic->internal_hash) { |
---|
| 4243 | + size_t recalc_tags_size; |
---|
| 4244 | + ic->recalc_wq = alloc_workqueue("dm-integrity-recalc", WQ_MEM_RECLAIM, 1); |
---|
3513 | 4245 | if (!ic->recalc_wq ) { |
---|
3514 | 4246 | ti->error = "Cannot allocate workqueue"; |
---|
3515 | 4247 | r = -ENOMEM; |
---|
.. | .. |
---|
3522 | 4254 | r = -ENOMEM; |
---|
3523 | 4255 | goto bad; |
---|
3524 | 4256 | } |
---|
3525 | | - ic->recalc_tags = kvmalloc_array(RECALC_SECTORS >> ic->sb->log2_sectors_per_block, |
---|
3526 | | - ic->tag_size, GFP_KERNEL); |
---|
| 4257 | + recalc_tags_size = (RECALC_SECTORS >> ic->sb->log2_sectors_per_block) * ic->tag_size; |
---|
| 4258 | + if (crypto_shash_digestsize(ic->internal_hash) > ic->tag_size) |
---|
| 4259 | + recalc_tags_size += crypto_shash_digestsize(ic->internal_hash) - ic->tag_size; |
---|
| 4260 | + ic->recalc_tags = kvmalloc(recalc_tags_size, GFP_KERNEL); |
---|
3527 | 4261 | if (!ic->recalc_tags) { |
---|
3528 | 4262 | ti->error = "Cannot allocate tags for recalculating"; |
---|
3529 | 4263 | r = -ENOMEM; |
---|
.. | .. |
---|
3559 | 4293 | r = create_journal(ic, &ti->error); |
---|
3560 | 4294 | if (r) |
---|
3561 | 4295 | goto bad; |
---|
| 4296 | + |
---|
| 4297 | + } |
---|
| 4298 | + |
---|
| 4299 | + if (ic->mode == 'B') { |
---|
| 4300 | + unsigned i; |
---|
| 4301 | + unsigned n_bitmap_pages = DIV_ROUND_UP(ic->n_bitmap_blocks, PAGE_SIZE / BITMAP_BLOCK_SIZE); |
---|
| 4302 | + |
---|
| 4303 | + ic->recalc_bitmap = dm_integrity_alloc_page_list(n_bitmap_pages); |
---|
| 4304 | + if (!ic->recalc_bitmap) { |
---|
| 4305 | + r = -ENOMEM; |
---|
| 4306 | + goto bad; |
---|
| 4307 | + } |
---|
| 4308 | + ic->may_write_bitmap = dm_integrity_alloc_page_list(n_bitmap_pages); |
---|
| 4309 | + if (!ic->may_write_bitmap) { |
---|
| 4310 | + r = -ENOMEM; |
---|
| 4311 | + goto bad; |
---|
| 4312 | + } |
---|
| 4313 | + ic->bbs = kvmalloc_array(ic->n_bitmap_blocks, sizeof(struct bitmap_block_status), GFP_KERNEL); |
---|
| 4314 | + if (!ic->bbs) { |
---|
| 4315 | + r = -ENOMEM; |
---|
| 4316 | + goto bad; |
---|
| 4317 | + } |
---|
| 4318 | + INIT_DELAYED_WORK(&ic->bitmap_flush_work, bitmap_flush_work); |
---|
| 4319 | + for (i = 0; i < ic->n_bitmap_blocks; i++) { |
---|
| 4320 | + struct bitmap_block_status *bbs = &ic->bbs[i]; |
---|
| 4321 | + unsigned sector, pl_index, pl_offset; |
---|
| 4322 | + |
---|
| 4323 | + INIT_WORK(&bbs->work, bitmap_block_work); |
---|
| 4324 | + bbs->ic = ic; |
---|
| 4325 | + bbs->idx = i; |
---|
| 4326 | + bio_list_init(&bbs->bio_queue); |
---|
| 4327 | + spin_lock_init(&bbs->bio_queue_lock); |
---|
| 4328 | + |
---|
| 4329 | + sector = i * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT); |
---|
| 4330 | + pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT); |
---|
| 4331 | + pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1); |
---|
| 4332 | + |
---|
| 4333 | + bbs->bitmap = lowmem_page_address(ic->journal[pl_index].page) + pl_offset; |
---|
| 4334 | + } |
---|
3562 | 4335 | } |
---|
3563 | 4336 | |
---|
3564 | 4337 | if (should_write_sb) { |
---|
3565 | | - int r; |
---|
3566 | | - |
---|
3567 | 4338 | init_journal(ic, 0, ic->journal_sections, 0); |
---|
3568 | 4339 | r = dm_integrity_failed(ic); |
---|
3569 | 4340 | if (unlikely(r)) { |
---|
.. | .. |
---|
3583 | 4354 | if (r) |
---|
3584 | 4355 | goto bad; |
---|
3585 | 4356 | } |
---|
| 4357 | + if (ic->mode == 'B') { |
---|
| 4358 | + unsigned max_io_len = ((sector_t)ic->sectors_per_block << ic->log2_blocks_per_bitmap_bit) * (BITMAP_BLOCK_SIZE * 8); |
---|
| 4359 | + if (!max_io_len) |
---|
| 4360 | + max_io_len = 1U << 31; |
---|
| 4361 | + DEBUG_print("max_io_len: old %u, new %u\n", ti->max_io_len, max_io_len); |
---|
| 4362 | + if (!ti->max_io_len || ti->max_io_len > max_io_len) { |
---|
| 4363 | + r = dm_set_target_max_io_len(ti, max_io_len); |
---|
| 4364 | + if (r) |
---|
| 4365 | + goto bad; |
---|
| 4366 | + } |
---|
| 4367 | + } |
---|
3586 | 4368 | |
---|
3587 | 4369 | if (!ic->internal_hash) |
---|
3588 | 4370 | dm_integrity_set(ti, ic); |
---|
3589 | 4371 | |
---|
3590 | 4372 | ti->num_flush_bios = 1; |
---|
3591 | 4373 | ti->flush_supported = true; |
---|
| 4374 | + if (ic->discard) |
---|
| 4375 | + ti->num_discard_bios = 1; |
---|
3592 | 4376 | |
---|
3593 | 4377 | return 0; |
---|
| 4378 | + |
---|
3594 | 4379 | bad: |
---|
3595 | 4380 | dm_integrity_dtr(ti); |
---|
3596 | 4381 | return r; |
---|
.. | .. |
---|
3603 | 4388 | BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress)); |
---|
3604 | 4389 | BUG_ON(!list_empty(&ic->wait_list)); |
---|
3605 | 4390 | |
---|
| 4391 | + if (ic->mode == 'B') |
---|
| 4392 | + cancel_delayed_work_sync(&ic->bitmap_flush_work); |
---|
3606 | 4393 | if (ic->metadata_wq) |
---|
3607 | 4394 | destroy_workqueue(ic->metadata_wq); |
---|
3608 | 4395 | if (ic->wait_wq) |
---|
.. | .. |
---|
3615 | 4402 | destroy_workqueue(ic->writer_wq); |
---|
3616 | 4403 | if (ic->recalc_wq) |
---|
3617 | 4404 | destroy_workqueue(ic->recalc_wq); |
---|
3618 | | - if (ic->recalc_buffer) |
---|
3619 | | - vfree(ic->recalc_buffer); |
---|
3620 | | - if (ic->recalc_tags) |
---|
3621 | | - kvfree(ic->recalc_tags); |
---|
| 4405 | + vfree(ic->recalc_buffer); |
---|
| 4406 | + kvfree(ic->recalc_tags); |
---|
| 4407 | + kvfree(ic->bbs); |
---|
3622 | 4408 | if (ic->bufio) |
---|
3623 | 4409 | dm_bufio_client_destroy(ic->bufio); |
---|
3624 | 4410 | mempool_exit(&ic->journal_io_mempool); |
---|
.. | .. |
---|
3628 | 4414 | dm_put_device(ti, ic->dev); |
---|
3629 | 4415 | if (ic->meta_dev) |
---|
3630 | 4416 | dm_put_device(ti, ic->meta_dev); |
---|
3631 | | - dm_integrity_free_page_list(ic, ic->journal); |
---|
3632 | | - dm_integrity_free_page_list(ic, ic->journal_io); |
---|
3633 | | - dm_integrity_free_page_list(ic, ic->journal_xor); |
---|
| 4417 | + dm_integrity_free_page_list(ic->journal); |
---|
| 4418 | + dm_integrity_free_page_list(ic->journal_io); |
---|
| 4419 | + dm_integrity_free_page_list(ic->journal_xor); |
---|
| 4420 | + dm_integrity_free_page_list(ic->recalc_bitmap); |
---|
| 4421 | + dm_integrity_free_page_list(ic->may_write_bitmap); |
---|
3634 | 4422 | if (ic->journal_scatterlist) |
---|
3635 | 4423 | dm_integrity_free_journal_scatterlist(ic, ic->journal_scatterlist); |
---|
3636 | 4424 | if (ic->journal_io_scatterlist) |
---|
.. | .. |
---|
3641 | 4429 | for (i = 0; i < ic->journal_sections; i++) { |
---|
3642 | 4430 | struct skcipher_request *req = ic->sk_requests[i]; |
---|
3643 | 4431 | if (req) { |
---|
3644 | | - kzfree(req->iv); |
---|
| 4432 | + kfree_sensitive(req->iv); |
---|
3645 | 4433 | skcipher_request_free(req); |
---|
3646 | 4434 | } |
---|
3647 | 4435 | } |
---|
.. | .. |
---|
3668 | 4456 | |
---|
3669 | 4457 | static struct target_type integrity_target = { |
---|
3670 | 4458 | .name = "integrity", |
---|
3671 | | - .version = {1, 2, 0}, |
---|
| 4459 | + .version = {1, 6, 0}, |
---|
3672 | 4460 | .module = THIS_MODULE, |
---|
3673 | 4461 | .features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY, |
---|
3674 | 4462 | .ctr = dm_integrity_ctr, |
---|
.. | .. |
---|
3681 | 4469 | .io_hints = dm_integrity_io_hints, |
---|
3682 | 4470 | }; |
---|
3683 | 4471 | |
---|
3684 | | -int __init dm_integrity_init(void) |
---|
| 4472 | +static int __init dm_integrity_init(void) |
---|
3685 | 4473 | { |
---|
3686 | 4474 | int r; |
---|
3687 | 4475 | |
---|
.. | .. |
---|
3693 | 4481 | } |
---|
3694 | 4482 | |
---|
3695 | 4483 | r = dm_register_target(&integrity_target); |
---|
3696 | | - |
---|
3697 | | - if (r < 0) |
---|
| 4484 | + if (r < 0) { |
---|
3698 | 4485 | DMERR("register failed %d", r); |
---|
| 4486 | + kmem_cache_destroy(journal_io_cache); |
---|
| 4487 | + return r; |
---|
| 4488 | + } |
---|
3699 | 4489 | |
---|
3700 | | - return r; |
---|
| 4490 | + return 0; |
---|
3701 | 4491 | } |
---|
3702 | 4492 | |
---|
3703 | | -void dm_integrity_exit(void) |
---|
| 4493 | +static void __exit dm_integrity_exit(void) |
---|
3704 | 4494 | { |
---|
3705 | 4495 | dm_unregister_target(&integrity_target); |
---|
3706 | 4496 | kmem_cache_destroy(journal_io_cache); |
---|