hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/fs/ext4/super.c
....@@ -43,7 +43,7 @@
4343 #include <linux/uaccess.h>
4444 #include <linux/iversion.h>
4545 #include <linux/unicode.h>
46
-
46
+#include <linux/part_stat.h>
4747 #include <linux/kthread.h>
4848 #include <linux/freezer.h>
4949
....@@ -93,11 +93,11 @@
9393 * i_mmap_rwsem (inode->i_mmap_rwsem)!
9494 *
9595 * page fault path:
96
- * mmap_sem -> sb_start_pagefault -> i_mmap_sem (r) -> transaction start ->
96
+ * mmap_lock -> sb_start_pagefault -> i_mmap_sem (r) -> transaction start ->
9797 * page lock -> i_data_sem (rw)
9898 *
9999 * buffered write path:
100
- * sb_start_write -> i_mutex -> mmap_sem
100
+ * sb_start_write -> i_mutex -> mmap_lock
101101 * sb_start_write -> i_mutex -> transaction start -> page lock ->
102102 * i_data_sem (rw)
103103 *
....@@ -107,7 +107,7 @@
107107 * i_data_sem (rw)
108108 *
109109 * direct IO:
110
- * sb_start_write -> i_mutex -> mmap_sem
110
+ * sb_start_write -> i_mutex -> mmap_lock
111111 * sb_start_write -> i_mutex -> transaction start -> i_data_sem (rw)
112112 *
113113 * writepages:
....@@ -141,27 +141,109 @@
141141 MODULE_ALIAS("ext3");
142142 #define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
143143
144
+
145
+static inline void __ext4_read_bh(struct buffer_head *bh, int op_flags,
146
+ bh_end_io_t *end_io)
147
+{
148
+ /*
149
+ * buffer's verified bit is no longer valid after reading from
150
+ * disk again due to write out error, clear it to make sure we
151
+ * recheck the buffer contents.
152
+ */
153
+ clear_buffer_verified(bh);
154
+
155
+ bh->b_end_io = end_io ? end_io : end_buffer_read_sync;
156
+ get_bh(bh);
157
+ submit_bh(REQ_OP_READ, op_flags, bh);
158
+}
159
+
160
+void ext4_read_bh_nowait(struct buffer_head *bh, int op_flags,
161
+ bh_end_io_t *end_io)
162
+{
163
+ BUG_ON(!buffer_locked(bh));
164
+
165
+ if (ext4_buffer_uptodate(bh)) {
166
+ unlock_buffer(bh);
167
+ return;
168
+ }
169
+ __ext4_read_bh(bh, op_flags, end_io);
170
+}
171
+
172
+int ext4_read_bh(struct buffer_head *bh, int op_flags, bh_end_io_t *end_io)
173
+{
174
+ BUG_ON(!buffer_locked(bh));
175
+
176
+ if (ext4_buffer_uptodate(bh)) {
177
+ unlock_buffer(bh);
178
+ return 0;
179
+ }
180
+
181
+ __ext4_read_bh(bh, op_flags, end_io);
182
+
183
+ wait_on_buffer(bh);
184
+ if (buffer_uptodate(bh))
185
+ return 0;
186
+ return -EIO;
187
+}
188
+
189
+int ext4_read_bh_lock(struct buffer_head *bh, int op_flags, bool wait)
190
+{
191
+ lock_buffer(bh);
192
+ if (!wait) {
193
+ ext4_read_bh_nowait(bh, op_flags, NULL);
194
+ return 0;
195
+ }
196
+ return ext4_read_bh(bh, op_flags, NULL);
197
+}
198
+
144199 /*
145
- * This works like sb_bread() except it uses ERR_PTR for error
200
+ * This works like __bread_gfp() except it uses ERR_PTR for error
146201 * returns. Currently with sb_bread it's impossible to distinguish
147202 * between ENOMEM and EIO situations (since both result in a NULL
148203 * return.
149204 */
150
-struct buffer_head *
151
-ext4_sb_bread(struct super_block *sb, sector_t block, int op_flags)
205
+static struct buffer_head *__ext4_sb_bread_gfp(struct super_block *sb,
206
+ sector_t block, int op_flags,
207
+ gfp_t gfp)
152208 {
153
- struct buffer_head *bh = sb_getblk(sb, block);
209
+ struct buffer_head *bh;
210
+ int ret;
154211
212
+ bh = sb_getblk_gfp(sb, block, gfp);
155213 if (bh == NULL)
156214 return ERR_PTR(-ENOMEM);
157
- if (buffer_uptodate(bh))
215
+ if (ext4_buffer_uptodate(bh))
158216 return bh;
159
- ll_rw_block(REQ_OP_READ, REQ_META | op_flags, 1, &bh);
160
- wait_on_buffer(bh);
161
- if (buffer_uptodate(bh))
162
- return bh;
163
- put_bh(bh);
164
- return ERR_PTR(-EIO);
217
+
218
+ ret = ext4_read_bh_lock(bh, REQ_META | op_flags, true);
219
+ if (ret) {
220
+ put_bh(bh);
221
+ return ERR_PTR(ret);
222
+ }
223
+ return bh;
224
+}
225
+
226
+struct buffer_head *ext4_sb_bread(struct super_block *sb, sector_t block,
227
+ int op_flags)
228
+{
229
+ return __ext4_sb_bread_gfp(sb, block, op_flags, __GFP_MOVABLE);
230
+}
231
+
232
+struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb,
233
+ sector_t block)
234
+{
235
+ return __ext4_sb_bread_gfp(sb, block, 0, 0);
236
+}
237
+
238
+void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block)
239
+{
240
+ struct buffer_head *bh = sb_getblk_gfp(sb, block, 0);
241
+
242
+ if (likely(bh)) {
243
+ if (trylock_buffer(bh))
244
+ ext4_read_bh_nowait(bh, REQ_RAHEAD, NULL);
245
+ brelse(bh);
246
+ }
165247 }
166248
167249 static int ext4_verify_csum_type(struct super_block *sb,
....@@ -202,26 +284,6 @@
202284 return;
203285
204286 es->s_checksum = ext4_superblock_csum(sb, es);
205
-}
206
-
207
-void *ext4_kvmalloc(size_t size, gfp_t flags)
208
-{
209
- void *ret;
210
-
211
- ret = kmalloc(size, flags | __GFP_NOWARN);
212
- if (!ret)
213
- ret = __vmalloc(size, flags, PAGE_KERNEL);
214
- return ret;
215
-}
216
-
217
-void *ext4_kvzalloc(size_t size, gfp_t flags)
218
-{
219
- void *ret;
220
-
221
- ret = kzalloc(size, flags | __GFP_NOWARN);
222
- if (!ret)
223
- ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL);
224
- return ret;
225287 }
226288
227289 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
....@@ -355,44 +417,6 @@
355417 #define ext4_get_tstamp(es, tstamp) \
356418 __ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
357419
358
-static void __save_error_info(struct super_block *sb, const char *func,
359
- unsigned int line)
360
-{
361
- struct ext4_super_block *es = EXT4_SB(sb)->s_es;
362
-
363
- EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
364
- if (bdev_read_only(sb->s_bdev))
365
- return;
366
- es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
367
- ext4_update_tstamp(es, s_last_error_time);
368
- strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
369
- es->s_last_error_line = cpu_to_le32(line);
370
- if (!es->s_first_error_time) {
371
- es->s_first_error_time = es->s_last_error_time;
372
- es->s_first_error_time_hi = es->s_last_error_time_hi;
373
- strncpy(es->s_first_error_func, func,
374
- sizeof(es->s_first_error_func));
375
- es->s_first_error_line = cpu_to_le32(line);
376
- es->s_first_error_ino = es->s_last_error_ino;
377
- es->s_first_error_block = es->s_last_error_block;
378
- }
379
- /*
380
- * Start the daily error reporting function if it hasn't been
381
- * started already
382
- */
383
- if (!es->s_error_count)
384
- mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
385
- le32_add_cpu(&es->s_error_count, 1);
386
-}
387
-
388
-static void save_error_info(struct super_block *sb, const char *func,
389
- unsigned int line)
390
-{
391
- __save_error_info(sb, func, line);
392
- if (!bdev_read_only(sb->s_bdev))
393
- ext4_commit_super(sb, 1);
394
-}
395
-
396420 /*
397421 * The del_gendisk() function uninitializes the disk-specific data
398422 * structures, including the bdi structure, without telling anyone
....@@ -432,10 +456,176 @@
432456 spin_unlock(&sbi->s_md_lock);
433457 }
434458
459
+/*
460
+ * This writepage callback for write_cache_pages()
461
+ * takes care of a few cases after page cleaning.
462
+ *
463
+ * write_cache_pages() already checks for dirty pages
464
+ * and calls clear_page_dirty_for_io(), which we want,
465
+ * to write protect the pages.
466
+ *
467
+ * However, we may have to redirty a page (see below.)
468
+ */
469
+static int ext4_journalled_writepage_callback(struct page *page,
470
+ struct writeback_control *wbc,
471
+ void *data)
472
+{
473
+ transaction_t *transaction = (transaction_t *) data;
474
+ struct buffer_head *bh, *head;
475
+ struct journal_head *jh;
476
+
477
+ bh = head = page_buffers(page);
478
+ do {
479
+ /*
480
+ * We have to redirty a page in these cases:
481
+ * 1) If buffer is dirty, it means the page was dirty because it
482
+ * contains a buffer that needs checkpointing. So the dirty bit
483
+ * needs to be preserved so that checkpointing writes the buffer
484
+ * properly.
485
+ * 2) If buffer is not part of the committing transaction
486
+ * (we may have just accidentally come across this buffer because
487
+ * inode range tracking is not exact) or if the currently running
488
+ * transaction already contains this buffer as well, dirty bit
489
+ * needs to be preserved so that the buffer gets writeprotected
490
+ * properly on running transaction's commit.
491
+ */
492
+ jh = bh2jh(bh);
493
+ if (buffer_dirty(bh) ||
494
+ (jh && (jh->b_transaction != transaction ||
495
+ jh->b_next_transaction))) {
496
+ redirty_page_for_writepage(wbc, page);
497
+ goto out;
498
+ }
499
+ } while ((bh = bh->b_this_page) != head);
500
+
501
+out:
502
+ return AOP_WRITEPAGE_ACTIVATE;
503
+}
504
+
505
+static int ext4_journalled_submit_inode_data_buffers(struct jbd2_inode *jinode)
506
+{
507
+ struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
508
+ struct writeback_control wbc = {
509
+ .sync_mode = WB_SYNC_ALL,
510
+ .nr_to_write = LONG_MAX,
511
+ .range_start = jinode->i_dirty_start,
512
+ .range_end = jinode->i_dirty_end,
513
+ };
514
+
515
+ return write_cache_pages(mapping, &wbc,
516
+ ext4_journalled_writepage_callback,
517
+ jinode->i_transaction);
518
+}
519
+
520
+static int ext4_journal_submit_inode_data_buffers(struct jbd2_inode *jinode)
521
+{
522
+ int ret;
523
+
524
+ if (ext4_should_journal_data(jinode->i_vfs_inode))
525
+ ret = ext4_journalled_submit_inode_data_buffers(jinode);
526
+ else
527
+ ret = jbd2_journal_submit_inode_data_buffers(jinode);
528
+
529
+ return ret;
530
+}
531
+
532
+static int ext4_journal_finish_inode_data_buffers(struct jbd2_inode *jinode)
533
+{
534
+ int ret = 0;
535
+
536
+ if (!ext4_should_journal_data(jinode->i_vfs_inode))
537
+ ret = jbd2_journal_finish_inode_data_buffers(jinode);
538
+
539
+ return ret;
540
+}
541
+
435542 static bool system_going_down(void)
436543 {
437544 return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
438545 || system_state == SYSTEM_RESTART;
546
+}
547
+
548
+struct ext4_err_translation {
549
+ int code;
550
+ int errno;
551
+};
552
+
553
+#define EXT4_ERR_TRANSLATE(err) { .code = EXT4_ERR_##err, .errno = err }
554
+
555
+static struct ext4_err_translation err_translation[] = {
556
+ EXT4_ERR_TRANSLATE(EIO),
557
+ EXT4_ERR_TRANSLATE(ENOMEM),
558
+ EXT4_ERR_TRANSLATE(EFSBADCRC),
559
+ EXT4_ERR_TRANSLATE(EFSCORRUPTED),
560
+ EXT4_ERR_TRANSLATE(ENOSPC),
561
+ EXT4_ERR_TRANSLATE(ENOKEY),
562
+ EXT4_ERR_TRANSLATE(EROFS),
563
+ EXT4_ERR_TRANSLATE(EFBIG),
564
+ EXT4_ERR_TRANSLATE(EEXIST),
565
+ EXT4_ERR_TRANSLATE(ERANGE),
566
+ EXT4_ERR_TRANSLATE(EOVERFLOW),
567
+ EXT4_ERR_TRANSLATE(EBUSY),
568
+ EXT4_ERR_TRANSLATE(ENOTDIR),
569
+ EXT4_ERR_TRANSLATE(ENOTEMPTY),
570
+ EXT4_ERR_TRANSLATE(ESHUTDOWN),
571
+ EXT4_ERR_TRANSLATE(EFAULT),
572
+};
573
+
574
+static int ext4_errno_to_code(int errno)
575
+{
576
+ int i;
577
+
578
+ for (i = 0; i < ARRAY_SIZE(err_translation); i++)
579
+ if (err_translation[i].errno == errno)
580
+ return err_translation[i].code;
581
+ return EXT4_ERR_UNKNOWN;
582
+}
583
+
584
+static void __save_error_info(struct super_block *sb, int error,
585
+ __u32 ino, __u64 block,
586
+ const char *func, unsigned int line)
587
+{
588
+ struct ext4_super_block *es = EXT4_SB(sb)->s_es;
589
+
590
+ EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
591
+ if (bdev_read_only(sb->s_bdev))
592
+ return;
593
+ /* We default to EFSCORRUPTED error... */
594
+ if (error == 0)
595
+ error = EFSCORRUPTED;
596
+ es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
597
+ ext4_update_tstamp(es, s_last_error_time);
598
+ strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
599
+ es->s_last_error_line = cpu_to_le32(line);
600
+ es->s_last_error_ino = cpu_to_le32(ino);
601
+ es->s_last_error_block = cpu_to_le64(block);
602
+ es->s_last_error_errcode = ext4_errno_to_code(error);
603
+ if (!es->s_first_error_time) {
604
+ es->s_first_error_time = es->s_last_error_time;
605
+ es->s_first_error_time_hi = es->s_last_error_time_hi;
606
+ strncpy(es->s_first_error_func, func,
607
+ sizeof(es->s_first_error_func));
608
+ es->s_first_error_line = cpu_to_le32(line);
609
+ es->s_first_error_ino = es->s_last_error_ino;
610
+ es->s_first_error_block = es->s_last_error_block;
611
+ es->s_first_error_errcode = es->s_last_error_errcode;
612
+ }
613
+ /*
614
+ * Start the daily error reporting function if it hasn't been
615
+ * started already
616
+ */
617
+ if (!es->s_error_count)
618
+ mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
619
+ le32_add_cpu(&es->s_error_count, 1);
620
+}
621
+
622
+static void save_error_info(struct super_block *sb, int error,
623
+ __u32 ino, __u64 block,
624
+ const char *func, unsigned int line)
625
+{
626
+ __save_error_info(sb, error, ino, block, func, line);
627
+ if (!bdev_read_only(sb->s_bdev))
628
+ ext4_commit_super(sb, 1);
439629 }
440630
441631 /* Deal with the reporting of failure conditions on a filesystem such as
....@@ -463,7 +653,7 @@
463653 if (sb_rdonly(sb) || test_opt(sb, ERRORS_CONT))
464654 return;
465655
466
- EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
656
+ ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
467657 if (journal)
468658 jbd2_journal_abort(journal, -EIO);
469659 /*
....@@ -480,9 +670,6 @@
480670 smp_wmb();
481671 sb->s_flags |= SB_RDONLY;
482672 } else if (test_opt(sb, ERRORS_PANIC)) {
483
- if (EXT4_SB(sb)->s_journal &&
484
- !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
485
- return;
486673 panic("EXT4-fs (device %s): panic forced after error\n",
487674 sb->s_id);
488675 }
....@@ -493,7 +680,8 @@
493680 "EXT4-fs error")
494681
495682 void __ext4_error(struct super_block *sb, const char *function,
496
- unsigned int line, const char *fmt, ...)
683
+ unsigned int line, int error, __u64 block,
684
+ const char *fmt, ...)
497685 {
498686 struct va_format vaf;
499687 va_list args;
....@@ -511,24 +699,21 @@
511699 sb->s_id, function, line, current->comm, &vaf);
512700 va_end(args);
513701 }
514
- save_error_info(sb, function, line);
702
+ save_error_info(sb, error, 0, block, function, line);
515703 ext4_handle_error(sb);
516704 }
517705
518706 void __ext4_error_inode(struct inode *inode, const char *function,
519
- unsigned int line, ext4_fsblk_t block,
707
+ unsigned int line, ext4_fsblk_t block, int error,
520708 const char *fmt, ...)
521709 {
522710 va_list args;
523711 struct va_format vaf;
524
- struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
525712
526713 if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
527714 return;
528715
529716 trace_ext4_error(inode->i_sb, function, line);
530
- es->s_last_error_ino = cpu_to_le32(inode->i_ino);
531
- es->s_last_error_block = cpu_to_le64(block);
532717 if (ext4_error_ratelimit(inode->i_sb)) {
533718 va_start(args, fmt);
534719 vaf.fmt = fmt;
....@@ -545,7 +730,8 @@
545730 current->comm, &vaf);
546731 va_end(args);
547732 }
548
- save_error_info(inode->i_sb, function, line);
733
+ save_error_info(inode->i_sb, error, inode->i_ino, block,
734
+ function, line);
549735 ext4_handle_error(inode->i_sb);
550736 }
551737
....@@ -555,7 +741,6 @@
555741 {
556742 va_list args;
557743 struct va_format vaf;
558
- struct ext4_super_block *es;
559744 struct inode *inode = file_inode(file);
560745 char pathname[80], *path;
561746
....@@ -563,8 +748,6 @@
563748 return;
564749
565750 trace_ext4_error(inode->i_sb, function, line);
566
- es = EXT4_SB(inode->i_sb)->s_es;
567
- es->s_last_error_ino = cpu_to_le32(inode->i_ino);
568751 if (ext4_error_ratelimit(inode->i_sb)) {
569752 path = file_path(file, pathname, sizeof(pathname));
570753 if (IS_ERR(path))
....@@ -586,7 +769,8 @@
586769 current->comm, path, &vaf);
587770 va_end(args);
588771 }
589
- save_error_info(inode->i_sb, function, line);
772
+ save_error_info(inode->i_sb, EFSCORRUPTED, inode->i_ino, block,
773
+ function, line);
590774 ext4_handle_error(inode->i_sb);
591775 }
592776
....@@ -654,7 +838,7 @@
654838 sb->s_id, function, line, errstr);
655839 }
656840
657
- save_error_info(sb, function, line);
841
+ save_error_info(sb, -errno, 0, 0, function, line);
658842 ext4_handle_error(sb);
659843 }
660844
....@@ -669,7 +853,7 @@
669853 */
670854
671855 void __ext4_abort(struct super_block *sb, const char *function,
672
- unsigned int line, const char *fmt, ...)
856
+ unsigned int line, int error, const char *fmt, ...)
673857 {
674858 struct va_format vaf;
675859 va_list args;
....@@ -677,7 +861,7 @@
677861 if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
678862 return;
679863
680
- save_error_info(sb, function, line);
864
+ save_error_info(sb, error, 0, 0, function, line);
681865 va_start(args, fmt);
682866 vaf.fmt = fmt;
683867 vaf.va = &args;
....@@ -686,24 +870,20 @@
686870 va_end(args);
687871
688872 if (sb_rdonly(sb) == 0) {
873
+ ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
874
+ if (EXT4_SB(sb)->s_journal)
875
+ jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
876
+
689877 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
690
- EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
691878 /*
692879 * Make sure updated value of ->s_mount_flags will be visible
693880 * before ->s_flags update
694881 */
695882 smp_wmb();
696883 sb->s_flags |= SB_RDONLY;
697
- if (EXT4_SB(sb)->s_journal)
698
- jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
699
- save_error_info(sb, function, line);
700884 }
701
- if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) {
702
- if (EXT4_SB(sb)->s_journal &&
703
- !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
704
- return;
885
+ if (test_opt(sb, ERRORS_PANIC) && !system_going_down())
705886 panic("EXT4-fs panic from previous error\n");
706
- }
707887 }
708888
709889 void __ext4_msg(struct super_block *sb,
....@@ -712,6 +892,7 @@
712892 struct va_format vaf;
713893 va_list args;
714894
895
+ atomic_inc(&EXT4_SB(sb)->s_msg_count);
715896 if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), "EXT4-fs"))
716897 return;
717898
....@@ -722,9 +903,12 @@
722903 va_end(args);
723904 }
724905
725
-#define ext4_warning_ratelimit(sb) \
726
- ___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state), \
727
- "EXT4-fs warning")
906
+static int ext4_warning_ratelimit(struct super_block *sb)
907
+{
908
+ atomic_inc(&EXT4_SB(sb)->s_warning_count);
909
+ return ___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state),
910
+ "EXT4-fs warning");
911
+}
728912
729913 void __ext4_warning(struct super_block *sb, const char *function,
730914 unsigned int line, const char *fmt, ...)
....@@ -770,15 +954,12 @@
770954 {
771955 struct va_format vaf;
772956 va_list args;
773
- struct ext4_super_block *es = EXT4_SB(sb)->s_es;
774957
775958 if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
776959 return;
777960
778961 trace_ext4_error(sb, function, line);
779
- es->s_last_error_ino = cpu_to_le32(ino);
780
- es->s_last_error_block = cpu_to_le64(block);
781
- __save_error_info(sb, function, line);
962
+ __save_error_info(sb, EFSCORRUPTED, ino, block, function, line);
782963
783964 if (ext4_error_ratelimit(sb)) {
784965 va_start(args, fmt);
....@@ -830,6 +1011,8 @@
8301011 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
8311012 int ret;
8321013
1014
+ if (!grp || !gdp)
1015
+ return;
8331016 if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) {
8341017 ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
8351018 &grp->bb_state);
....@@ -882,7 +1065,6 @@
8821065 static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
8831066 {
8841067 struct block_device *bdev;
885
- char b[BDEVNAME_SIZE];
8861068
8871069 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
8881070 if (IS_ERR(bdev))
....@@ -890,8 +1072,9 @@
8901072 return bdev;
8911073
8921074 fail:
893
- ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld",
894
- __bdevname(dev, b), PTR_ERR(bdev));
1075
+ ext4_msg(sb, KERN_ERR,
1076
+ "failed to open journal device unknown-block(%u,%u) %ld",
1077
+ MAJOR(dev), MINOR(dev), PTR_ERR(bdev));
8951078 return NULL;
8961079 }
8971080
....@@ -906,10 +1089,16 @@
9061089 static void ext4_blkdev_remove(struct ext4_sb_info *sbi)
9071090 {
9081091 struct block_device *bdev;
909
- bdev = sbi->journal_bdev;
1092
+ bdev = sbi->s_journal_bdev;
9101093 if (bdev) {
1094
+ /*
1095
+ * Invalidate the journal device's buffers. We don't want them
1096
+ * floating about in memory - the physical journal device may
1097
+ * hotswapped, and it breaks the `ro-after' testing code.
1098
+ */
1099
+ invalidate_bdev(bdev);
9111100 ext4_blkdev_put(bdev);
912
- sbi->journal_bdev = NULL;
1101
+ sbi->s_journal_bdev = NULL;
9131102 }
9141103 }
9151104
....@@ -974,6 +1163,18 @@
9741163 int aborted = 0;
9751164 int i, err;
9761165
1166
+ /*
1167
+ * Unregister sysfs before destroying jbd2 journal.
1168
+ * Since we could still access attr_journal_task attribute via sysfs
1169
+ * path which could have sbi->s_journal->j_task as NULL
1170
+ * Unregister sysfs before flush sbi->s_error_work.
1171
+ * Since user may read /proc/fs/ext4/xx/mb_groups during umount, If
1172
+ * read metadata verify failed then will queue error work.
1173
+ * flush_stashed_error_work will call start_this_handle may trigger
1174
+ * BUG_ON.
1175
+ */
1176
+ ext4_unregister_sysfs(sb);
1177
+
9771178 ext4_unregister_li_request(sb);
9781179 ext4_quota_off_umount(sb);
9791180
....@@ -983,11 +1184,11 @@
9831184 aborted = is_journal_aborted(sbi->s_journal);
9841185 err = jbd2_journal_destroy(sbi->s_journal);
9851186 sbi->s_journal = NULL;
986
- if ((err < 0) && !aborted)
987
- ext4_abort(sb, "Couldn't clean up the journal");
1187
+ if ((err < 0) && !aborted) {
1188
+ ext4_abort(sb, -err, "Couldn't clean up the journal");
1189
+ }
9881190 }
9891191
990
- ext4_unregister_sysfs(sb);
9911192 ext4_es_unregister_shrinker(sbi);
9921193 del_timer_sync(&sbi->s_err_report);
9931194 ext4_release_system_zone(sb);
....@@ -1017,6 +1218,7 @@
10171218 percpu_counter_destroy(&sbi->s_freeinodes_counter);
10181219 percpu_counter_destroy(&sbi->s_dirs_counter);
10191220 percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
1221
+ percpu_counter_destroy(&sbi->s_sra_exceeded_retry_limit);
10201222 percpu_free_rwsem(&sbi->s_writepages_rwsem);
10211223 #ifdef CONFIG_QUOTA
10221224 for (i = 0; i < EXT4_MAXQUOTAS; i++)
....@@ -1033,26 +1235,19 @@
10331235
10341236 sync_blockdev(sb->s_bdev);
10351237 invalidate_bdev(sb->s_bdev);
1036
- if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
1037
- /*
1038
- * Invalidate the journal device's buffers. We don't want them
1039
- * floating about in memory - the physical journal device may
1040
- * hotswapped, and it breaks the `ro-after' testing code.
1041
- */
1042
- sync_blockdev(sbi->journal_bdev);
1043
- invalidate_bdev(sbi->journal_bdev);
1238
+ if (sbi->s_journal_bdev && sbi->s_journal_bdev != sb->s_bdev) {
1239
+ sync_blockdev(sbi->s_journal_bdev);
10441240 ext4_blkdev_remove(sbi);
10451241 }
1046
- if (sbi->s_ea_inode_cache) {
1047
- ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
1048
- sbi->s_ea_inode_cache = NULL;
1049
- }
1050
- if (sbi->s_ea_block_cache) {
1051
- ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
1052
- sbi->s_ea_block_cache = NULL;
1053
- }
1054
- if (sbi->s_mmp_tsk)
1055
- kthread_stop(sbi->s_mmp_tsk);
1242
+
1243
+ ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
1244
+ sbi->s_ea_inode_cache = NULL;
1245
+
1246
+ ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
1247
+ sbi->s_ea_block_cache = NULL;
1248
+
1249
+ ext4_stop_mmpd(sbi);
1250
+
10561251 brelse(sbi->s_sbh);
10571252 sb->s_fs_info = NULL;
10581253 /*
....@@ -1065,7 +1260,7 @@
10651260 crypto_free_shash(sbi->s_chksum_driver);
10661261 kfree(sbi->s_blockgroup_lock);
10671262 fs_put_dax(sbi->s_daxdev);
1068
- fscrypt_free_dummy_context(&sbi->s_dummy_enc_ctx);
1263
+ fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
10691264 #ifdef CONFIG_UNICODE
10701265 utf8_unload(sb->s_encoding);
10711266 #endif
....@@ -1086,8 +1281,10 @@
10861281 return NULL;
10871282
10881283 inode_set_iversion(&ei->vfs_inode, 1);
1284
+ ei->i_flags = 0;
10891285 spin_lock_init(&ei->i_raw_lock);
10901286 INIT_LIST_HEAD(&ei->i_prealloc_list);
1287
+ atomic_set(&ei->i_prealloc_active, 0);
10911288 spin_lock_init(&ei->i_prealloc_lock);
10921289 ext4_es_init_tree(&ei->i_es_tree);
10931290 rwlock_init(&ei->i_es_lock);
....@@ -1096,9 +1293,8 @@
10961293 ei->i_es_shk_nr = 0;
10971294 ei->i_es_shrink_lblk = 0;
10981295 ei->i_reserved_data_blocks = 0;
1099
- ei->i_da_metadata_calc_len = 0;
1100
- ei->i_da_metadata_calc_last_lblock = 0;
11011296 spin_lock_init(&(ei->i_block_reservation_lock));
1297
+ ext4_init_pending_tree(&ei->i_pending_tree);
11021298 #ifdef CONFIG_QUOTA
11031299 ei->i_reserved_quota = 0;
11041300 memset(&ei->i_dquot, 0, sizeof(ei->i_dquot));
....@@ -1110,6 +1306,8 @@
11101306 ei->i_datasync_tid = 0;
11111307 atomic_set(&ei->i_unwritten, 0);
11121308 INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
1309
+ ext4_fc_init_inode(&ei->vfs_inode);
1310
+ mutex_init(&ei->i_fc_lock);
11131311 return &ei->vfs_inode;
11141312 }
11151313
....@@ -1124,12 +1322,13 @@
11241322 return drop;
11251323 }
11261324
1127
-static void ext4_i_callback(struct rcu_head *head)
1325
+static void ext4_free_in_core_inode(struct inode *inode)
11281326 {
1129
- struct inode *inode = container_of(head, struct inode, i_rcu);
1130
-
11311327 fscrypt_free_inode(inode);
1132
-
1328
+ if (!list_empty(&(EXT4_I(inode)->i_fc_list))) {
1329
+ pr_warn("%s: inode %ld still in fc list",
1330
+ __func__, inode->i_ino);
1331
+ }
11331332 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
11341333 }
11351334
....@@ -1144,7 +1343,12 @@
11441343 true);
11451344 dump_stack();
11461345 }
1147
- call_rcu(&inode->i_rcu, ext4_i_callback);
1346
+
1347
+ if (EXT4_I(inode)->i_reserved_data_blocks)
1348
+ ext4_msg(inode->i_sb, KERN_ERR,
1349
+ "Inode %lu (%p): i_reserved_data_blocks (%u) not cleared!",
1350
+ inode->i_ino, EXT4_I(inode),
1351
+ EXT4_I(inode)->i_reserved_data_blocks);
11481352 }
11491353
11501354 static void init_once(void *foo)
....@@ -1156,6 +1360,7 @@
11561360 init_rwsem(&ei->i_data_sem);
11571361 init_rwsem(&ei->i_mmap_sem);
11581362 inode_init_once(&ei->vfs_inode);
1363
+ ext4_fc_init_inode(&ei->vfs_inode);
11591364 }
11601365
11611366 static int __init init_inodecache(void)
....@@ -1184,11 +1389,12 @@
11841389
11851390 void ext4_clear_inode(struct inode *inode)
11861391 {
1392
+ ext4_fc_del(inode);
11871393 invalidate_inode_buffers(inode);
11881394 clear_inode(inode);
1189
- dquot_drop(inode);
1190
- ext4_discard_preallocations(inode);
1395
+ ext4_discard_preallocations(inode, 0);
11911396 ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
1397
+ dquot_drop(inode);
11921398 if (EXT4_I(inode)->jinode) {
11931399 jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
11941400 EXT4_I(inode)->jinode);
....@@ -1258,8 +1464,8 @@
12581464 if (!page_has_buffers(page))
12591465 return 0;
12601466 if (journal)
1261
- return jbd2_journal_try_to_free_buffers(journal, page,
1262
- wait & ~__GFP_DIRECT_RECLAIM);
1467
+ return jbd2_journal_try_to_free_buffers(journal, page);
1468
+
12631469 return try_to_free_buffers(page);
12641470 }
12651471
....@@ -1288,6 +1494,9 @@
12881494 if (WARN_ON_ONCE(IS_DAX(inode) && i_size_read(inode)))
12891495 return -EINVAL;
12901496
1497
+ if (ext4_test_inode_flag(inode, EXT4_INODE_DAX))
1498
+ return -EOPNOTSUPP;
1499
+
12911500 res = ext4_convert_inline_data(inode);
12921501 if (res)
12931502 return res;
....@@ -1313,7 +1522,7 @@
13131522 * Update inode->i_flags - S_ENCRYPTED will be enabled,
13141523 * S_DAX may be disabled
13151524 */
1316
- ext4_set_inode_flags(inode);
1525
+ ext4_set_inode_flags(inode, false);
13171526 }
13181527 return res;
13191528 }
....@@ -1340,7 +1549,7 @@
13401549 * Update inode->i_flags - S_ENCRYPTED will be enabled,
13411550 * S_DAX may be disabled
13421551 */
1343
- ext4_set_inode_flags(inode);
1552
+ ext4_set_inode_flags(inode, false);
13441553 res = ext4_mark_inode_dirty(handle, inode);
13451554 if (res)
13461555 EXT4_ERROR_INODE(inode, "Failed to mark inode dirty");
....@@ -1354,10 +1563,9 @@
13541563 return res;
13551564 }
13561565
1357
-static const union fscrypt_context *
1358
-ext4_get_dummy_context(struct super_block *sb)
1566
+static const union fscrypt_policy *ext4_get_dummy_policy(struct super_block *sb)
13591567 {
1360
- return EXT4_SB(sb)->s_dummy_enc_ctx.ctx;
1568
+ return EXT4_SB(sb)->s_dummy_enc_policy.policy;
13611569 }
13621570
13631571 static bool ext4_has_stable_inodes(struct super_block *sb)
....@@ -1372,21 +1580,15 @@
13721580 *lblk_bits_ret = 8 * sizeof(ext4_lblk_t);
13731581 }
13741582
1375
-static bool ext4_inline_crypt_enabled(struct super_block *sb)
1376
-{
1377
- return test_opt(sb, INLINECRYPT);
1378
-}
1379
-
13801583 static const struct fscrypt_operations ext4_cryptops = {
13811584 .key_prefix = "ext4:",
13821585 .get_context = ext4_get_context,
13831586 .set_context = ext4_set_context,
1384
- .get_dummy_context = ext4_get_dummy_context,
1587
+ .get_dummy_policy = ext4_get_dummy_policy,
13851588 .empty_dir = ext4_empty_dir,
13861589 .max_namelen = EXT4_NAME_LEN,
13871590 .has_stable_inodes = ext4_has_stable_inodes,
13881591 .get_ino_and_lblk_bits = ext4_get_ino_and_lblk_bits,
1389
- .inline_crypt_enabled = ext4_inline_crypt_enabled,
13901592 };
13911593 #endif
13921594
....@@ -1409,7 +1611,6 @@
14091611 static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
14101612 unsigned int flags);
14111613 static int ext4_enable_quotas(struct super_block *sb);
1412
-static int ext4_get_next_id(struct super_block *sb, struct kqid *qid);
14131614
14141615 static struct dquot **ext4_get_dquots(struct inode *inode)
14151616 {
....@@ -1427,7 +1628,7 @@
14271628 .destroy_dquot = dquot_destroy,
14281629 .get_projid = ext4_get_projid,
14291630 .get_inode_usage = ext4_get_inode_usage,
1430
- .get_next_id = ext4_get_next_id,
1631
+ .get_next_id = dquot_get_next_id,
14311632 };
14321633
14331634 static const struct quotactl_ops ext4_qctl_operations = {
....@@ -1444,6 +1645,7 @@
14441645
14451646 static const struct super_operations ext4_sops = {
14461647 .alloc_inode = ext4_alloc_inode,
1648
+ .free_inode = ext4_free_in_core_inode,
14471649 .destroy_inode = ext4_destroy_inode,
14481650 .write_inode = ext4_write_inode,
14491651 .dirty_inode = ext4_dirty_inode,
....@@ -1485,7 +1687,8 @@
14851687 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
14861688 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
14871689 Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
1488
- Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, Opt_dax,
1690
+ Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version,
1691
+ Opt_dax, Opt_dax_always, Opt_dax_inode, Opt_dax_never,
14891692 Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error,
14901693 Opt_nowarn_on_error, Opt_mblk_io_submit,
14911694 Opt_lazytime, Opt_nolazytime, Opt_debug_want_extra_isize,
....@@ -1494,6 +1697,10 @@
14941697 Opt_dioread_nolock, Opt_dioread_lock,
14951698 Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
14961699 Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
1700
+ Opt_prefetch_block_bitmaps,
1701
+#ifdef CONFIG_EXT4_DEBUG
1702
+ Opt_fc_debug_max_replay, Opt_fc_debug_force
1703
+#endif
14971704 };
14981705
14991706 static const match_table_t tokens = {
....@@ -1552,6 +1759,9 @@
15521759 {Opt_nobarrier, "nobarrier"},
15531760 {Opt_i_version, "i_version"},
15541761 {Opt_dax, "dax"},
1762
+ {Opt_dax_always, "dax=always"},
1763
+ {Opt_dax_inode, "dax=inode"},
1764
+ {Opt_dax_never, "dax=never"},
15551765 {Opt_stripe, "stripe=%u"},
15561766 {Opt_delalloc, "delalloc"},
15571767 {Opt_warn_on_error, "warn_on_error"},
....@@ -1570,18 +1780,24 @@
15701780 {Opt_auto_da_alloc, "auto_da_alloc"},
15711781 {Opt_noauto_da_alloc, "noauto_da_alloc"},
15721782 {Opt_dioread_nolock, "dioread_nolock"},
1783
+ {Opt_dioread_lock, "nodioread_nolock"},
15731784 {Opt_dioread_lock, "dioread_lock"},
15741785 {Opt_discard, "discard"},
15751786 {Opt_nodiscard, "nodiscard"},
15761787 {Opt_init_itable, "init_itable=%u"},
15771788 {Opt_init_itable, "init_itable"},
15781789 {Opt_noinit_itable, "noinit_itable"},
1790
+#ifdef CONFIG_EXT4_DEBUG
1791
+ {Opt_fc_debug_force, "fc_debug_force"},
1792
+ {Opt_fc_debug_max_replay, "fc_debug_max_replay=%u"},
1793
+#endif
15791794 {Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
15801795 {Opt_test_dummy_encryption, "test_dummy_encryption=%s"},
15811796 {Opt_test_dummy_encryption, "test_dummy_encryption"},
15821797 {Opt_inlinecrypt, "inlinecrypt"},
15831798 {Opt_nombcache, "nombcache"},
15841799 {Opt_nombcache, "no_mbcache"}, /* for backward compatibility */
1800
+ {Opt_prefetch_block_bitmaps, "prefetch_block_bitmaps"},
15851801 {Opt_removed, "check=none"}, /* mount option from ext2/3 */
15861802 {Opt_removed, "nocheck"}, /* mount option from ext2/3 */
15871803 {Opt_removed, "reservation"}, /* mount option from ext2/3 */
....@@ -1700,6 +1916,8 @@
17001916 #define MOPT_NO_EXT3 0x0200
17011917 #define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3)
17021918 #define MOPT_STRING 0x0400
1919
+#define MOPT_SKIP 0x0800
1920
+#define MOPT_2 0x1000
17031921
17041922 static const struct mount_opts {
17051923 int token;
....@@ -1724,6 +1942,7 @@
17241942 MOPT_EXT4_ONLY | MOPT_CLEAR},
17251943 {Opt_warn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_SET},
17261944 {Opt_nowarn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_CLEAR},
1945
+ {Opt_commit, 0, MOPT_NO_EXT2},
17271946 {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
17281947 MOPT_EXT4_ONLY | MOPT_CLEAR},
17291948 {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
....@@ -1749,7 +1968,13 @@
17491968 {Opt_min_batch_time, 0, MOPT_GTE0},
17501969 {Opt_inode_readahead_blks, 0, MOPT_GTE0},
17511970 {Opt_init_itable, 0, MOPT_GTE0},
1752
- {Opt_dax, EXT4_MOUNT_DAX, MOPT_SET},
1971
+ {Opt_dax, EXT4_MOUNT_DAX_ALWAYS, MOPT_SET | MOPT_SKIP},
1972
+ {Opt_dax_always, EXT4_MOUNT_DAX_ALWAYS,
1973
+ MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
1974
+ {Opt_dax_inode, EXT4_MOUNT2_DAX_INODE,
1975
+ MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
1976
+ {Opt_dax_never, EXT4_MOUNT2_DAX_NEVER,
1977
+ MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
17531978 {Opt_stripe, 0, MOPT_GTE0},
17541979 {Opt_resuid, 0, MOPT_GTE0},
17551980 {Opt_resgid, 0, MOPT_GTE0},
....@@ -1791,12 +2016,14 @@
17912016 {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
17922017 {Opt_max_dir_size_kb, 0, MOPT_GTE0},
17932018 {Opt_test_dummy_encryption, 0, MOPT_STRING},
1794
-#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT
1795
- {Opt_inlinecrypt, EXT4_MOUNT_INLINECRYPT, MOPT_SET},
1796
-#else
1797
- {Opt_inlinecrypt, EXT4_MOUNT_INLINECRYPT, MOPT_NOSUPPORT},
1798
-#endif
17992019 {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
2020
+ {Opt_prefetch_block_bitmaps, EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS,
2021
+ MOPT_SET},
2022
+#ifdef CONFIG_EXT4_DEBUG
2023
+ {Opt_fc_debug_force, EXT4_MOUNT2_JOURNAL_FAST_COMMIT,
2024
+ MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY},
2025
+ {Opt_fc_debug_max_replay, 0, MOPT_GTE0},
2026
+#endif
18002027 {Opt_err, 0, 0}
18012028 };
18022029
....@@ -1839,18 +2066,25 @@
18392066 struct ext4_sb_info *sbi = EXT4_SB(sb);
18402067 int err;
18412068
2069
+ if (!ext4_has_feature_encrypt(sb)) {
2070
+ ext4_msg(sb, KERN_WARNING,
2071
+ "test_dummy_encryption requires encrypt feature");
2072
+ return -1;
2073
+ }
2074
+
18422075 /*
18432076 * This mount option is just for testing, and it's not worthwhile to
18442077 * implement the extra complexity (e.g. RCU protection) that would be
18452078 * needed to allow it to be set or changed during remount. We do allow
18462079 * it to be specified during remount, but only if there is no change.
18472080 */
1848
- if (is_remount && !sbi->s_dummy_enc_ctx.ctx) {
2081
+ if (is_remount && !sbi->s_dummy_enc_policy.policy) {
18492082 ext4_msg(sb, KERN_WARNING,
18502083 "Can't set test_dummy_encryption on remount");
18512084 return -1;
18522085 }
1853
- err = fscrypt_set_test_dummy_encryption(sb, arg, &sbi->s_dummy_enc_ctx);
2086
+ err = fscrypt_set_test_dummy_encryption(sb, arg->from,
2087
+ &sbi->s_dummy_enc_policy);
18542088 if (err) {
18552089 if (err == -EEXIST)
18562090 ext4_msg(sb, KERN_WARNING,
....@@ -1865,11 +2099,13 @@
18652099 return -1;
18662100 }
18672101 ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled");
2102
+ return 1;
18682103 #else
18692104 ext4_msg(sb, KERN_WARNING,
1870
- "Test dummy encryption mount option ignored");
2105
+ "test_dummy_encryption option not supported");
2106
+ return -1;
2107
+
18712108 #endif
1872
- return 1;
18732109 }
18742110
18752111 static int handle_mount_opt(struct super_block *sb, char *opt, int token,
....@@ -1903,7 +2139,7 @@
19032139 ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt);
19042140 return 1;
19052141 case Opt_abort:
1906
- sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
2142
+ ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
19072143 return 1;
19082144 case Opt_i_version:
19092145 sb->s_flags |= SB_I_VERSION;
....@@ -1913,6 +2149,13 @@
19132149 return 1;
19142150 case Opt_nolazytime:
19152151 sb->s_flags &= ~SB_LAZYTIME;
2152
+ return 1;
2153
+ case Opt_inlinecrypt:
2154
+#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT
2155
+ sb->s_flags |= SB_INLINECRYPT;
2156
+#else
2157
+ ext4_msg(sb, KERN_ERR, "inline encryption not supported");
2158
+#endif
19162159 return 1;
19172160 }
19182161
....@@ -1962,6 +2205,13 @@
19622205 } else if (token == Opt_commit) {
19632206 if (arg == 0)
19642207 arg = JBD2_DEFAULT_MAX_COMMIT_AGE;
2208
+ else if (arg > INT_MAX / HZ) {
2209
+ ext4_msg(sb, KERN_ERR,
2210
+ "Invalid commit interval %d, "
2211
+ "must be smaller than %d",
2212
+ arg, INT_MAX / HZ);
2213
+ return -1;
2214
+ }
19652215 sbi->s_commit_interval = HZ * arg;
19662216 } else if (token == Opt_debug_want_extra_isize) {
19672217 if ((arg & 1) ||
....@@ -1991,6 +2241,10 @@
19912241 sbi->s_li_wait_mult = arg;
19922242 } else if (token == Opt_max_dir_size_kb) {
19932243 sbi->s_max_dir_size_kb = arg;
2244
+#ifdef CONFIG_EXT4_DEBUG
2245
+ } else if (token == Opt_fc_debug_max_replay) {
2246
+ sbi->s_fc_debug_max_replay = arg;
2247
+#endif
19942248 } else if (token == Opt_stripe) {
19952249 sbi->s_stripe = arg;
19962250 } else if (token == Opt_resuid) {
....@@ -2092,23 +2346,56 @@
20922346 }
20932347 sbi->s_jquota_fmt = m->mount_opt;
20942348 #endif
2095
- } else if (token == Opt_dax) {
2349
+ } else if (token == Opt_dax || token == Opt_dax_always ||
2350
+ token == Opt_dax_inode || token == Opt_dax_never) {
20962351 #ifdef CONFIG_FS_DAX
2097
- if (is_remount && test_opt(sb, DAX)) {
2098
- ext4_msg(sb, KERN_ERR, "can't mount with "
2099
- "both data=journal and dax");
2100
- return -1;
2352
+ switch (token) {
2353
+ case Opt_dax:
2354
+ case Opt_dax_always:
2355
+ if (is_remount &&
2356
+ (!(sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
2357
+ (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER))) {
2358
+ fail_dax_change_remount:
2359
+ ext4_msg(sb, KERN_ERR, "can't change "
2360
+ "dax mount option while remounting");
2361
+ return -1;
2362
+ }
2363
+ if (is_remount &&
2364
+ (test_opt(sb, DATA_FLAGS) ==
2365
+ EXT4_MOUNT_JOURNAL_DATA)) {
2366
+ ext4_msg(sb, KERN_ERR, "can't mount with "
2367
+ "both data=journal and dax");
2368
+ return -1;
2369
+ }
2370
+ ext4_msg(sb, KERN_WARNING,
2371
+ "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
2372
+ sbi->s_mount_opt |= EXT4_MOUNT_DAX_ALWAYS;
2373
+ sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER;
2374
+ break;
2375
+ case Opt_dax_never:
2376
+ if (is_remount &&
2377
+ (!(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
2378
+ (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS)))
2379
+ goto fail_dax_change_remount;
2380
+ sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_NEVER;
2381
+ sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
2382
+ break;
2383
+ case Opt_dax_inode:
2384
+ if (is_remount &&
2385
+ ((sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
2386
+ (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
2387
+ !(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_INODE)))
2388
+ goto fail_dax_change_remount;
2389
+ sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
2390
+ sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER;
2391
+ /* Strictly for printing options */
2392
+ sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_INODE;
2393
+ break;
21012394 }
2102
- if (is_remount && !(sbi->s_mount_opt & EXT4_MOUNT_DAX)) {
2103
- ext4_msg(sb, KERN_ERR, "can't change "
2104
- "dax mount option while remounting");
2105
- return -1;
2106
- }
2107
- ext4_msg(sb, KERN_WARNING,
2108
- "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
2109
- sbi->s_mount_opt |= m->mount_opt;
21102395 #else
21112396 ext4_msg(sb, KERN_INFO, "dax option not supported");
2397
+ sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_NEVER;
2398
+ sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
21122399 return -1;
21132400 #endif
21142401 } else if (token == Opt_data_err_abort) {
....@@ -2126,10 +2413,17 @@
21262413 WARN_ON(1);
21272414 return -1;
21282415 }
2129
- if (arg != 0)
2130
- sbi->s_mount_opt |= m->mount_opt;
2131
- else
2132
- sbi->s_mount_opt &= ~m->mount_opt;
2416
+ if (m->flags & MOPT_2) {
2417
+ if (arg != 0)
2418
+ sbi->s_mount_opt2 |= m->mount_opt;
2419
+ else
2420
+ sbi->s_mount_opt2 &= ~m->mount_opt;
2421
+ } else {
2422
+ if (arg != 0)
2423
+ sbi->s_mount_opt |= m->mount_opt;
2424
+ else
2425
+ sbi->s_mount_opt &= ~m->mount_opt;
2426
+ }
21332427 }
21342428 return 1;
21352429 }
....@@ -2139,7 +2433,7 @@
21392433 unsigned int *journal_ioprio,
21402434 int is_remount)
21412435 {
2142
- struct ext4_sb_info *sbi = EXT4_SB(sb);
2436
+ struct ext4_sb_info __maybe_unused *sbi = EXT4_SB(sb);
21432437 char *p, __maybe_unused *usr_qf_name, __maybe_unused *grp_qf_name;
21442438 substring_t args[MAX_OPT_ARGS];
21452439 int token;
....@@ -2196,12 +2490,10 @@
21962490 if (test_opt(sb, DIOREAD_NOLOCK)) {
21972491 int blocksize =
21982492 BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
2199
-
2200
- if (blocksize < PAGE_SIZE) {
2201
- ext4_msg(sb, KERN_ERR, "can't mount with "
2202
- "dioread_nolock if block size != PAGE_SIZE");
2203
- return 0;
2204
- }
2493
+ if (blocksize < PAGE_SIZE)
2494
+ ext4_msg(sb, KERN_WARNING, "Warning: mounting with an "
2495
+ "experimental mount option 'dioread_nolock' "
2496
+ "for blocksize < PAGE_SIZE");
22052497 }
22062498 return 1;
22072499 }
....@@ -2274,7 +2566,7 @@
22742566 for (m = ext4_mount_opts; m->token != Opt_err; m++) {
22752567 int want_set = m->flags & MOPT_SET;
22762568 if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
2277
- (m->flags & MOPT_CLEAR_ERR))
2569
+ (m->flags & MOPT_CLEAR_ERR) || m->flags & MOPT_SKIP)
22782570 continue;
22792571 if (!nodefs && !(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
22802572 continue; /* skip if same as the default */
....@@ -2334,6 +2626,19 @@
23342626
23352627 fscrypt_show_test_dummy_encryption(seq, sep, sb);
23362628
2629
+ if (sb->s_flags & SB_INLINECRYPT)
2630
+ SEQ_OPTS_PUTS("inlinecrypt");
2631
+
2632
+ if (test_opt(sb, DAX_ALWAYS)) {
2633
+ if (IS_EXT2_SB(sb))
2634
+ SEQ_OPTS_PUTS("dax");
2635
+ else
2636
+ SEQ_OPTS_PUTS("dax=always");
2637
+ } else if (test_opt2(sb, DAX_NEVER)) {
2638
+ SEQ_OPTS_PUTS("dax=never");
2639
+ } else if (test_opt2(sb, DAX_INODE)) {
2640
+ SEQ_OPTS_PUTS("dax=inode");
2641
+ }
23372642 ext4_show_quota_options(seq, sb);
23382643 return 0;
23392644 }
....@@ -2528,11 +2833,9 @@
25282833 crc = crc16(crc, (__u8 *)gdp, offset);
25292834 offset += sizeof(gdp->bg_checksum); /* skip checksum */
25302835 /* for checksum of struct ext4_group_desc do the rest...*/
2531
- if (ext4_has_feature_64bit(sb) &&
2532
- offset < le16_to_cpu(sbi->s_es->s_desc_size))
2836
+ if (ext4_has_feature_64bit(sb) && offset < sbi->s_desc_size)
25332837 crc = crc16(crc, (__u8 *)gdp + offset,
2534
- le16_to_cpu(sbi->s_es->s_desc_size) -
2535
- offset);
2838
+ sbi->s_desc_size - offset);
25362839
25372840 out:
25382841 return cpu_to_le16(crc);
....@@ -2859,13 +3162,9 @@
28593162 loff_t res;
28603163 loff_t upper_limit = MAX_LFS_FILESIZE;
28613164
2862
- /* small i_blocks in vfs inode? */
2863
- if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
2864
- /*
2865
- * CONFIG_LBDAF is not enabled implies the inode
2866
- * i_block represent total blocks in 512 bytes
2867
- * 32 == size of vfs inode i_blocks * 8
2868
- */
3165
+ BUILD_BUG_ON(sizeof(blkcnt_t) < sizeof(u64));
3166
+
3167
+ if (!has_huge_files) {
28693168 upper_limit = (1LL << 32) - 1;
28703169
28713170 /* total blocks in file system block size */
....@@ -2895,22 +3194,22 @@
28953194 */
28963195 static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
28973196 {
2898
- loff_t res = EXT4_NDIR_BLOCKS;
3197
+ unsigned long long upper_limit, res = EXT4_NDIR_BLOCKS;
28993198 int meta_blocks;
2900
- loff_t upper_limit;
2901
- /* This is calculated to be the largest file size for a dense, block
3199
+
3200
+ /*
3201
+ * This is calculated to be the largest file size for a dense, block
29023202 * mapped file such that the file's total number of 512-byte sectors,
29033203 * including data and all indirect blocks, does not exceed (2^48 - 1).
29043204 *
29053205 * __u32 i_blocks_lo and _u16 i_blocks_high represent the total
29063206 * number of 512-byte sectors of the file.
29073207 */
2908
-
2909
- if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
3208
+ if (!has_huge_files) {
29103209 /*
2911
- * !has_huge_files or CONFIG_LBDAF not enabled implies that
2912
- * the inode i_block field represents total file blocks in
2913
- * 2^32 512-byte sectors == size of vfs inode i_blocks * 8
3210
+ * !has_huge_files or implies that the inode i_block field
3211
+ * represents total file blocks in 2^32 512-byte sectors ==
3212
+ * size of vfs inode i_blocks * 8
29143213 */
29153214 upper_limit = (1LL << 32) - 1;
29163215
....@@ -2948,7 +3247,7 @@
29483247 if (res > MAX_LFS_FILESIZE)
29493248 res = MAX_LFS_FILESIZE;
29503249
2951
- return res;
3250
+ return (loff_t)res;
29523251 }
29533252
29543253 static ext4_fsblk_t descriptor_loc(struct super_block *sb,
....@@ -3059,18 +3358,6 @@
30593358 ~EXT4_FEATURE_RO_COMPAT_SUPP));
30603359 return 0;
30613360 }
3062
- /*
3063
- * Large file size enabled file system can only be mounted
3064
- * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF
3065
- */
3066
- if (ext4_has_feature_huge_file(sb)) {
3067
- if (sizeof(blkcnt_t) < sizeof(u64)) {
3068
- ext4_msg(sb, KERN_ERR, "Filesystem with huge files "
3069
- "cannot be mounted RDWR without "
3070
- "CONFIG_LBDAF");
3071
- return 0;
3072
- }
3073
- }
30743361 if (ext4_has_feature_bigalloc(sb) && !ext4_has_feature_extents(sb)) {
30753362 ext4_msg(sb, KERN_ERR,
30763363 "Can't support bigalloc feature without "
....@@ -3140,15 +3427,34 @@
31403427 static int ext4_run_li_request(struct ext4_li_request *elr)
31413428 {
31423429 struct ext4_group_desc *gdp = NULL;
3143
- ext4_group_t group, ngroups;
3144
- struct super_block *sb;
3430
+ struct super_block *sb = elr->lr_super;
3431
+ ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
3432
+ ext4_group_t group = elr->lr_next_group;
3433
+ unsigned int prefetch_ios = 0;
31453434 int ret = 0;
31463435 u64 start_time;
31473436
3148
- sb = elr->lr_super;
3149
- ngroups = EXT4_SB(sb)->s_groups_count;
3437
+ if (elr->lr_mode == EXT4_LI_MODE_PREFETCH_BBITMAP) {
3438
+ elr->lr_next_group = ext4_mb_prefetch(sb, group,
3439
+ EXT4_SB(sb)->s_mb_prefetch, &prefetch_ios);
3440
+ if (prefetch_ios)
3441
+ ext4_mb_prefetch_fini(sb, elr->lr_next_group,
3442
+ prefetch_ios);
3443
+ trace_ext4_prefetch_bitmaps(sb, group, elr->lr_next_group,
3444
+ prefetch_ios);
3445
+ if (group >= elr->lr_next_group) {
3446
+ ret = 1;
3447
+ if (elr->lr_first_not_zeroed != ngroups &&
3448
+ !sb_rdonly(sb) && test_opt(sb, INIT_INODE_TABLE)) {
3449
+ elr->lr_next_group = elr->lr_first_not_zeroed;
3450
+ elr->lr_mode = EXT4_LI_MODE_ITABLE;
3451
+ ret = 0;
3452
+ }
3453
+ }
3454
+ return ret;
3455
+ }
31503456
3151
- for (group = elr->lr_next_group; group < ngroups; group++) {
3457
+ for (; group < ngroups; group++) {
31523458 gdp = ext4_get_group_desc(sb, group, NULL);
31533459 if (!gdp) {
31543460 ret = 1;
....@@ -3166,9 +3472,10 @@
31663472 start_time = ktime_get_real_ns();
31673473 ret = ext4_init_inode_table(sb, group,
31683474 elr->lr_timeout ? 0 : 1);
3475
+ trace_ext4_lazy_itable_init(sb, group);
31693476 if (elr->lr_timeout == 0) {
31703477 elr->lr_timeout = nsecs_to_jiffies((ktime_get_real_ns() - start_time) *
3171
- elr->lr_sbi->s_li_wait_mult);
3478
+ EXT4_SB(elr->lr_super)->s_li_wait_mult);
31723479 }
31733480 elr->lr_next_sched = jiffies + elr->lr_timeout;
31743481 elr->lr_next_group = group + 1;
....@@ -3182,15 +3489,11 @@
31823489 */
31833490 static void ext4_remove_li_request(struct ext4_li_request *elr)
31843491 {
3185
- struct ext4_sb_info *sbi;
3186
-
31873492 if (!elr)
31883493 return;
31893494
3190
- sbi = elr->lr_sbi;
3191
-
31923495 list_del(&elr->lr_request);
3193
- sbi->s_li_request = NULL;
3496
+ EXT4_SB(elr->lr_super)->s_li_request = NULL;
31943497 kfree(elr);
31953498 }
31963499
....@@ -3227,6 +3530,7 @@
32273530 unsigned long next_wakeup, cur;
32283531
32293532 BUG_ON(NULL == eli);
3533
+ set_freezable();
32303534
32313535 cont_thread:
32323536 while (true) {
....@@ -3399,7 +3703,6 @@
33993703 static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
34003704 ext4_group_t start)
34013705 {
3402
- struct ext4_sb_info *sbi = EXT4_SB(sb);
34033706 struct ext4_li_request *elr;
34043707
34053708 elr = kzalloc(sizeof(*elr), GFP_KERNEL);
....@@ -3407,8 +3710,13 @@
34073710 return NULL;
34083711
34093712 elr->lr_super = sb;
3410
- elr->lr_sbi = sbi;
3411
- elr->lr_next_group = start;
3713
+ elr->lr_first_not_zeroed = start;
3714
+ if (test_opt(sb, PREFETCH_BLOCK_BITMAPS))
3715
+ elr->lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP;
3716
+ else {
3717
+ elr->lr_mode = EXT4_LI_MODE_ITABLE;
3718
+ elr->lr_next_group = start;
3719
+ }
34123720
34133721 /*
34143722 * Randomize first schedule time of the request to
....@@ -3438,8 +3746,9 @@
34383746 goto out;
34393747 }
34403748
3441
- if (first_not_zeroed == ngroups || sb_rdonly(sb) ||
3442
- !test_opt(sb, INIT_INODE_TABLE))
3749
+ if (!test_opt(sb, PREFETCH_BLOCK_BITMAPS) &&
3750
+ (first_not_zeroed == ngroups || sb_rdonly(sb) ||
3751
+ !test_opt(sb, INIT_INODE_TABLE)))
34433752 goto out;
34443753
34453754 elr = ext4_li_request_new(sb, first_not_zeroed);
....@@ -3556,9 +3865,11 @@
35563865 ext4_fsblk_t first_block, last_block, b;
35573866 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
35583867 int s, j, count = 0;
3868
+ int has_super = ext4_bg_has_super(sb, grp);
35593869
35603870 if (!ext4_has_feature_bigalloc(sb))
3561
- return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) +
3871
+ return (has_super + ext4_bg_num_gdb(sb, grp) +
3872
+ (has_super ? le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0) +
35623873 sbi->s_itb_per_group + 2);
35633874
35643875 first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
....@@ -3650,8 +3961,8 @@
36503961 * Add the internal journal blocks whether the journal has been
36513962 * loaded or not
36523963 */
3653
- if (sbi->s_journal && !sbi->journal_bdev)
3654
- overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen);
3964
+ if (sbi->s_journal && !sbi->s_journal_bdev)
3965
+ overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_total_len);
36553966 else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) {
36563967 /* j_inum for internal journal is non-zero */
36573968 j_inode = ext4_get_journal_inode(sb, j_inum);
....@@ -3719,7 +4030,7 @@
37194030 int blocksize, clustersize;
37204031 unsigned int db_count;
37214032 unsigned int i;
3722
- int needs_recovery, has_huge_files, has_bigalloc;
4033
+ int needs_recovery, has_huge_files;
37234034 __u64 blocks_count;
37244035 int err = 0;
37254036 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
....@@ -3764,8 +4075,11 @@
37644075 logical_sb_block = sb_block;
37654076 }
37664077
3767
- if (!(bh = sb_bread_unmovable(sb, logical_sb_block))) {
4078
+ bh = ext4_sb_bread_unmovable(sb, logical_sb_block);
4079
+ if (IS_ERR(bh)) {
37684080 ext4_msg(sb, KERN_ERR, "unable to read superblock");
4081
+ ret = PTR_ERR(bh);
4082
+ bh = NULL;
37694083 goto out_fail;
37704084 }
37714085 /*
....@@ -3832,6 +4146,8 @@
38324146 #ifdef CONFIG_EXT4_FS_POSIX_ACL
38334147 set_opt(sb, POSIX_ACL);
38344148 #endif
4149
+ if (ext4_has_feature_fast_commit(sb))
4150
+ set_opt2(sb, JOURNAL_FAST_COMMIT);
38354151 /* don't forget to enable journal_csum when metadata_csum is enabled. */
38364152 if (ext4_has_metadata_csum(sb))
38374153 set_opt(sb, JOURNAL_CHECKSUM);
....@@ -3877,14 +4193,25 @@
38774193 */
38784194 sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
38794195
3880
- blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
3881
- if (blocksize < EXT4_MIN_BLOCK_SIZE ||
3882
- blocksize > EXT4_MAX_BLOCK_SIZE) {
4196
+ if (le32_to_cpu(es->s_log_block_size) >
4197
+ (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
38834198 ext4_msg(sb, KERN_ERR,
3884
- "Unsupported filesystem blocksize %d (%d log_block_size)",
3885
- blocksize, le32_to_cpu(es->s_log_block_size));
4199
+ "Invalid log block size: %u",
4200
+ le32_to_cpu(es->s_log_block_size));
38864201 goto failed_mount;
38874202 }
4203
+ if (le32_to_cpu(es->s_log_cluster_size) >
4204
+ (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
4205
+ ext4_msg(sb, KERN_ERR,
4206
+ "Invalid log cluster size: %u",
4207
+ le32_to_cpu(es->s_log_cluster_size));
4208
+ goto failed_mount;
4209
+ }
4210
+
4211
+ blocksize = EXT4_MIN_BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
4212
+
4213
+ if (blocksize == PAGE_SIZE)
4214
+ set_opt(sb, DIOREAD_NOLOCK);
38884215
38894216 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
38904217 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
....@@ -3915,9 +4242,12 @@
39154242 if (sbi->s_inode_size >= offsetof(struct ext4_inode, i_atime_extra) +
39164243 sizeof(((struct ext4_inode *)0)->i_atime_extra)) {
39174244 sb->s_time_gran = 1;
4245
+ sb->s_time_max = EXT4_EXTRA_TIMESTAMP_MAX;
39184246 } else {
39194247 sb->s_time_gran = NSEC_PER_SEC;
4248
+ sb->s_time_max = EXT4_NON_EXTRA_TIMESTAMP_MAX;
39204249 }
4250
+ sb->s_time_min = EXT4_TIMESTAMP_MIN;
39214251 }
39224252 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
39234253 sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
....@@ -3997,20 +4327,16 @@
39974327 #endif
39984328
39994329 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
4000
- printk_once(KERN_WARNING "EXT4-fs: Warning: mounting "
4001
- "with data=journal disables delayed "
4002
- "allocation and O_DIRECT support!\n");
4330
+ printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with data=journal disables delayed allocation, dioread_nolock, O_DIRECT and fast_commit support!\n");
4331
+ /* can't mount with both data=journal and dioread_nolock. */
4332
+ clear_opt(sb, DIOREAD_NOLOCK);
4333
+ clear_opt2(sb, JOURNAL_FAST_COMMIT);
40034334 if (test_opt2(sb, EXPLICIT_DELALLOC)) {
40044335 ext4_msg(sb, KERN_ERR, "can't mount with "
40054336 "both data=journal and delalloc");
40064337 goto failed_mount;
40074338 }
4008
- if (test_opt(sb, DIOREAD_NOLOCK)) {
4009
- ext4_msg(sb, KERN_ERR, "can't mount with "
4010
- "both data=journal and dioread_nolock");
4011
- goto failed_mount;
4012
- }
4013
- if (test_opt(sb, DAX)) {
4339
+ if (test_opt(sb, DAX_ALWAYS)) {
40144340 ext4_msg(sb, KERN_ERR, "can't mount with "
40154341 "both data=journal and dax");
40164342 goto failed_mount;
....@@ -4098,21 +4424,6 @@
40984424 if (!ext4_feature_set_ok(sb, (sb_rdonly(sb))))
40994425 goto failed_mount;
41004426
4101
- if (le32_to_cpu(es->s_log_block_size) >
4102
- (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
4103
- ext4_msg(sb, KERN_ERR,
4104
- "Invalid log block size: %u",
4105
- le32_to_cpu(es->s_log_block_size));
4106
- goto failed_mount;
4107
- }
4108
- if (le32_to_cpu(es->s_log_cluster_size) >
4109
- (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
4110
- ext4_msg(sb, KERN_ERR,
4111
- "Invalid log cluster size: %u",
4112
- le32_to_cpu(es->s_log_cluster_size));
4113
- goto failed_mount;
4114
- }
4115
-
41164427 if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) {
41174428 ext4_msg(sb, KERN_ERR,
41184429 "Number of reserved GDT blocks insanely large: %d",
....@@ -4120,16 +4431,19 @@
41204431 goto failed_mount;
41214432 }
41224433
4123
- if (sbi->s_mount_opt & EXT4_MOUNT_DAX) {
4434
+ if (bdev_dax_supported(sb->s_bdev, blocksize))
4435
+ set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags);
4436
+
4437
+ if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) {
41244438 if (ext4_has_feature_inline_data(sb)) {
41254439 ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
41264440 " that may contain inline data");
4127
- sbi->s_mount_opt &= ~EXT4_MOUNT_DAX;
4441
+ goto failed_mount;
41284442 }
4129
- if (!bdev_dax_supported(sb->s_bdev, blocksize)) {
4443
+ if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags)) {
41304444 ext4_msg(sb, KERN_ERR,
4131
- "DAX unsupported by block device. Turning off DAX.");
4132
- sbi->s_mount_opt &= ~EXT4_MOUNT_DAX;
4445
+ "DAX unsupported by block device.");
4446
+ goto failed_mount;
41334447 }
41344448 }
41354449
....@@ -4140,20 +4454,28 @@
41404454 }
41414455
41424456 if (sb->s_blocksize != blocksize) {
4457
+ /*
4458
+ * bh must be released before kill_bdev(), otherwise
4459
+ * it won't be freed and its page also. kill_bdev()
4460
+ * is called by sb_set_blocksize().
4461
+ */
4462
+ brelse(bh);
41434463 /* Validate the filesystem blocksize */
41444464 if (!sb_set_blocksize(sb, blocksize)) {
41454465 ext4_msg(sb, KERN_ERR, "bad block size %d",
41464466 blocksize);
4467
+ bh = NULL;
41474468 goto failed_mount;
41484469 }
41494470
4150
- brelse(bh);
41514471 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
41524472 offset = do_div(logical_sb_block, blocksize);
4153
- bh = sb_bread_unmovable(sb, logical_sb_block);
4154
- if (!bh) {
4473
+ bh = ext4_sb_bread_unmovable(sb, logical_sb_block);
4474
+ if (IS_ERR(bh)) {
41554475 ext4_msg(sb, KERN_ERR,
41564476 "Can't read superblock on 2nd try");
4477
+ ret = PTR_ERR(bh);
4478
+ bh = NULL;
41574479 goto failed_mount;
41584480 }
41594481 es = (struct ext4_super_block *)(bh->b_data + offset);
....@@ -4199,7 +4521,7 @@
41994521 sbi->s_inodes_per_block;
42004522 sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
42014523 sbi->s_sbh = bh;
4202
- sbi->s_mount_state = le16_to_cpu(es->s_state);
4524
+ sbi->s_mount_state = le16_to_cpu(es->s_state) & ~EXT4_FC_REPLAY;
42034525 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
42044526 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
42054527
....@@ -4226,8 +4548,7 @@
42264548
42274549 /* Handle clustersize */
42284550 clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
4229
- has_bigalloc = ext4_has_feature_bigalloc(sb);
4230
- if (has_bigalloc) {
4551
+ if (ext4_has_feature_bigalloc(sb)) {
42314552 if (clustersize < blocksize) {
42324553 ext4_msg(sb, KERN_ERR,
42334554 "cluster size (%d) smaller than "
....@@ -4283,8 +4604,6 @@
42834604 if (err) {
42844605 ext4_msg(sb, KERN_ERR, "filesystem"
42854606 " too large to mount safely on this system");
4286
- if (sizeof(sector_t) < 8)
4287
- ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
42884607 goto failed_mount;
42894608 }
42904609
....@@ -4368,18 +4687,20 @@
43684687 /* Pre-read the descriptors into the buffer cache */
43694688 for (i = 0; i < db_count; i++) {
43704689 block = descriptor_loc(sb, logical_sb_block, i);
4371
- sb_breadahead_unmovable(sb, block);
4690
+ ext4_sb_breadahead_unmovable(sb, block);
43724691 }
43734692
43744693 for (i = 0; i < db_count; i++) {
43754694 struct buffer_head *bh;
43764695
43774696 block = descriptor_loc(sb, logical_sb_block, i);
4378
- bh = sb_bread_unmovable(sb, block);
4379
- if (!bh) {
4697
+ bh = ext4_sb_bread_unmovable(sb, block);
4698
+ if (IS_ERR(bh)) {
43804699 ext4_msg(sb, KERN_ERR,
43814700 "can't read group descriptor %d", i);
43824701 db_count = i;
4702
+ ret = PTR_ERR(bh);
4703
+ bh = NULL;
43834704 goto failed_mount2;
43844705 }
43854706 rcu_read_lock();
....@@ -4427,14 +4748,36 @@
44274748 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
44284749 mutex_init(&sbi->s_orphan_lock);
44294750
4751
+ /* Initialize fast commit stuff */
4752
+ atomic_set(&sbi->s_fc_subtid, 0);
4753
+ atomic_set(&sbi->s_fc_ineligible_updates, 0);
4754
+ INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_MAIN]);
4755
+ INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_STAGING]);
4756
+ INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_MAIN]);
4757
+ INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]);
4758
+ sbi->s_fc_bytes = 0;
4759
+ ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
4760
+ ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
4761
+ spin_lock_init(&sbi->s_fc_lock);
4762
+ memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats));
4763
+ sbi->s_fc_replay_state.fc_regions = NULL;
4764
+ sbi->s_fc_replay_state.fc_regions_size = 0;
4765
+ sbi->s_fc_replay_state.fc_regions_used = 0;
4766
+ sbi->s_fc_replay_state.fc_regions_valid = 0;
4767
+ sbi->s_fc_replay_state.fc_modified_inodes = NULL;
4768
+ sbi->s_fc_replay_state.fc_modified_inodes_size = 0;
4769
+ sbi->s_fc_replay_state.fc_modified_inodes_used = 0;
4770
+
44304771 sb->s_root = NULL;
44314772
44324773 needs_recovery = (es->s_last_orphan != 0 ||
44334774 ext4_has_feature_journal_needs_recovery(sb));
44344775
4435
- if (ext4_has_feature_mmp(sb) && !sb_rdonly(sb))
4436
- if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
4776
+ if (ext4_has_feature_mmp(sb) && !sb_rdonly(sb)) {
4777
+ err = ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block));
4778
+ if (err)
44374779 goto failed_mount3a;
4780
+ }
44384781
44394782 /*
44404783 * The first inode we look at is the journal inode. Don't try
....@@ -4448,34 +4791,36 @@
44484791 ext4_has_feature_journal_needs_recovery(sb)) {
44494792 ext4_msg(sb, KERN_ERR, "required journal recovery "
44504793 "suppressed and not mounted read-only");
4451
- goto failed_mount_wq;
4794
+ goto failed_mount3a;
44524795 } else {
44534796 /* Nojournal mode, all journal mount options are illegal */
4454
- if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
4455
- ext4_msg(sb, KERN_ERR, "can't mount with "
4456
- "journal_checksum, fs mounted w/o journal");
4457
- goto failed_mount_wq;
4458
- }
44594797 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
44604798 ext4_msg(sb, KERN_ERR, "can't mount with "
44614799 "journal_async_commit, fs mounted w/o journal");
4462
- goto failed_mount_wq;
4800
+ goto failed_mount3a;
4801
+ }
4802
+
4803
+ if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
4804
+ ext4_msg(sb, KERN_ERR, "can't mount with "
4805
+ "journal_checksum, fs mounted w/o journal");
4806
+ goto failed_mount3a;
44634807 }
44644808 if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
44654809 ext4_msg(sb, KERN_ERR, "can't mount with "
44664810 "commit=%lu, fs mounted w/o journal",
44674811 sbi->s_commit_interval / HZ);
4468
- goto failed_mount_wq;
4812
+ goto failed_mount3a;
44694813 }
44704814 if (EXT4_MOUNT_DATA_FLAGS &
44714815 (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
44724816 ext4_msg(sb, KERN_ERR, "can't mount with "
44734817 "data=, fs mounted w/o journal");
4474
- goto failed_mount_wq;
4818
+ goto failed_mount3a;
44754819 }
44764820 sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM;
44774821 clear_opt(sb, JOURNAL_CHECKSUM);
44784822 clear_opt(sb, DATA_FLAGS);
4823
+ clear_opt2(sb, JOURNAL_FAST_COMMIT);
44794824 sbi->s_journal = NULL;
44804825 needs_recovery = 0;
44814826 goto no_journal;
....@@ -4491,6 +4836,14 @@
44914836 if (!set_journal_csum_feature_set(sb)) {
44924837 ext4_msg(sb, KERN_ERR, "Failed to set journal checksum "
44934838 "feature set");
4839
+ goto failed_mount_wq;
4840
+ }
4841
+
4842
+ if (test_opt2(sb, JOURNAL_FAST_COMMIT) &&
4843
+ !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
4844
+ JBD2_FEATURE_INCOMPAT_FAST_COMMIT)) {
4845
+ ext4_msg(sb, KERN_ERR,
4846
+ "Failed to set fast commit journal feature");
44944847 goto failed_mount_wq;
44954848 }
44964849
....@@ -4533,7 +4886,10 @@
45334886
45344887 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
45354888
4536
- sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
4889
+ sbi->s_journal->j_submit_inode_data_buffers =
4890
+ ext4_journal_submit_inode_data_buffers;
4891
+ sbi->s_journal->j_finish_inode_data_buffers =
4892
+ ext4_journal_finish_inode_data_buffers;
45374893
45384894 no_journal:
45394895 if (!test_opt(sb, NO_MBCACHE)) {
....@@ -4554,31 +4910,27 @@
45544910 }
45554911 }
45564912
4557
- if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) &&
4558
- (blocksize != PAGE_SIZE)) {
4559
- ext4_msg(sb, KERN_ERR,
4560
- "Unsupported blocksize for fs encryption");
4561
- goto failed_mount_wq;
4562
- }
4563
-
45644913 if (ext4_has_feature_verity(sb) && blocksize != PAGE_SIZE) {
45654914 ext4_msg(sb, KERN_ERR, "Unsupported blocksize for fs-verity");
45664915 goto failed_mount_wq;
4567
- }
4568
-
4569
- if (DUMMY_ENCRYPTION_ENABLED(sbi) && !sb_rdonly(sb) &&
4570
- !ext4_has_feature_encrypt(sb)) {
4571
- ext4_set_feature_encrypt(sb);
4572
- ext4_commit_super(sb, 1);
45734916 }
45744917
45754918 /*
45764919 * Get the # of file system overhead blocks from the
45774920 * superblock if present.
45784921 */
4579
- if (es->s_overhead_clusters)
4580
- sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
4581
- else {
4922
+ sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
4923
+ /* ignore the precalculated value if it is ridiculous */
4924
+ if (sbi->s_overhead > ext4_blocks_count(es))
4925
+ sbi->s_overhead = 0;
4926
+ /*
4927
+ * If the bigalloc feature is not enabled recalculating the
4928
+ * overhead doesn't take long, so we might as well just redo
4929
+ * it to make sure we are using the correct value.
4930
+ */
4931
+ if (!ext4_has_feature_bigalloc(sb))
4932
+ sbi->s_overhead = 0;
4933
+ if (sbi->s_overhead == 0) {
45824934 err = ext4_calculate_overhead(sb);
45834935 if (err)
45844936 goto failed_mount_wq;
....@@ -4638,6 +4990,7 @@
46384990 goto failed_mount4a;
46394991 }
46404992 }
4993
+ ext4_fc_replay_cleanup(sb);
46414994
46424995 ext4_ext_init(sb);
46434996 err = ext4_mb_init(sb);
....@@ -4646,6 +4999,14 @@
46464999 err);
46475000 goto failed_mount5;
46485001 }
5002
+
5003
+ /*
5004
+ * We can only set up the journal commit callback once
5005
+ * mballoc is initialized
5006
+ */
5007
+ if (sbi->s_journal)
5008
+ sbi->s_journal->j_commit_callback =
5009
+ ext4_journal_commit_callback;
46495010
46505011 block = ext4_count_free_clusters(sb);
46515012 ext4_free_blocks_count_set(sbi->s_es,
....@@ -4665,6 +5026,9 @@
46655026 ext4_count_dirs(sb), GFP_KERNEL);
46665027 if (!err)
46675028 err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
5029
+ GFP_KERNEL);
5030
+ if (!err)
5031
+ err = percpu_counter_init(&sbi->s_sra_exceeded_retry_limit, 0,
46685032 GFP_KERNEL);
46695033 if (!err)
46705034 err = percpu_init_rwsem(&sbi->s_writepages_rwsem);
....@@ -4700,6 +5064,14 @@
47005064 }
47015065 #endif /* CONFIG_QUOTA */
47025066
5067
+ /*
5068
+ * Save the original bdev mapping's wb_err value which could be
5069
+ * used to detect the metadata async write error.
5070
+ */
5071
+ spin_lock_init(&sbi->s_bdev_wb_lock);
5072
+ errseq_check_and_advance(&sb->s_bdev->bd_inode->i_mapping->wb_err,
5073
+ &sbi->s_bdev_wb_err);
5074
+ sb->s_bdev->bd_super = sb;
47035075 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
47045076 ext4_orphan_cleanup(sb, es);
47055077 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
....@@ -4741,6 +5113,8 @@
47415113 ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10);
47425114 ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10);
47435115 ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
5116
+ atomic_set(&sbi->s_warning_count, 0);
5117
+ atomic_set(&sbi->s_msg_count, 0);
47445118
47455119 kfree(orig_data);
47465120 return 0;
....@@ -4769,6 +5143,7 @@
47695143 percpu_counter_destroy(&sbi->s_freeinodes_counter);
47705144 percpu_counter_destroy(&sbi->s_dirs_counter);
47715145 percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
5146
+ percpu_counter_destroy(&sbi->s_sra_exceeded_retry_limit);
47725147 percpu_free_rwsem(&sbi->s_writepages_rwsem);
47735148 failed_mount5:
47745149 ext4_ext_release(sb);
....@@ -4781,14 +5156,12 @@
47815156 if (EXT4_SB(sb)->rsv_conversion_wq)
47825157 destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
47835158 failed_mount_wq:
4784
- if (sbi->s_ea_inode_cache) {
4785
- ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
4786
- sbi->s_ea_inode_cache = NULL;
4787
- }
4788
- if (sbi->s_ea_block_cache) {
4789
- ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
4790
- sbi->s_ea_block_cache = NULL;
4791
- }
5159
+ ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
5160
+ sbi->s_ea_inode_cache = NULL;
5161
+
5162
+ ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
5163
+ sbi->s_ea_block_cache = NULL;
5164
+
47925165 if (sbi->s_journal) {
47935166 jbd2_journal_destroy(sbi->s_journal);
47945167 sbi->s_journal = NULL;
....@@ -4797,8 +5170,7 @@
47975170 ext4_es_unregister_shrinker(sbi);
47985171 failed_mount3:
47995172 del_timer_sync(&sbi->s_err_report);
4800
- if (sbi->s_mmp_tsk)
4801
- kthread_stop(sbi->s_mmp_tsk);
5173
+ ext4_stop_mmpd(sbi);
48025174 failed_mount2:
48035175 rcu_read_lock();
48045176 group_desc = rcu_dereference(sbi->s_group_desc);
....@@ -4816,12 +5188,14 @@
48165188
48175189 #ifdef CONFIG_QUOTA
48185190 for (i = 0; i < EXT4_MAXQUOTAS; i++)
4819
- kfree(sbi->s_qf_names[i]);
5191
+ kfree(get_qf_name(sb, sbi, i));
48205192 #endif
4821
- fscrypt_free_dummy_context(&sbi->s_dummy_enc_ctx);
4822
- ext4_blkdev_remove(sbi);
5193
+ fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
5194
+ /* ext4_blkdev_remove() calls kill_bdev(), release bh before it. */
48235195 brelse(bh);
5196
+ ext4_blkdev_remove(sbi);
48245197 out_fail:
5198
+ invalidate_bdev(sb->s_bdev);
48255199 sb->s_fs_info = NULL;
48265200 kfree(sbi->s_blockgroup_lock);
48275201 out_free_base:
....@@ -4843,6 +5217,7 @@
48435217 journal->j_commit_interval = sbi->s_commit_interval;
48445218 journal->j_min_batch_time = sbi->s_min_batch_time;
48455219 journal->j_max_batch_time = sbi->s_max_batch_time;
5220
+ ext4_fc_init(sb, journal);
48465221
48475222 write_lock(&journal->j_state_lock);
48485223 if (test_opt(sb, BARRIER))
....@@ -4880,7 +5255,7 @@
48805255
48815256 jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
48825257 journal_inode, journal_inode->i_size);
4883
- if (!S_ISREG(journal_inode->i_mode)) {
5258
+ if (!S_ISREG(journal_inode->i_mode) || IS_ENCRYPTED(journal_inode)) {
48845259 ext4_msg(sb, KERN_ERR, "invalid journal inode");
48855260 iput(journal_inode);
48865261 return NULL;
....@@ -4985,9 +5360,7 @@
49855360 goto out_bdev;
49865361 }
49875362 journal->j_private = sb;
4988
- ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &journal->j_sb_buffer);
4989
- wait_on_buffer(journal->j_sb_buffer);
4990
- if (!buffer_uptodate(journal->j_sb_buffer)) {
5363
+ if (ext4_read_bh_lock(journal->j_sb_buffer, REQ_META | REQ_PRIO, true)) {
49915364 ext4_msg(sb, KERN_ERR, "I/O error on journal device");
49925365 goto out_journal;
49935366 }
....@@ -4997,7 +5370,7 @@
49975370 be32_to_cpu(journal->j_superblock->s_nr_users));
49985371 goto out_journal;
49995372 }
5000
- EXT4_SB(sb)->journal_bdev = bdev;
5373
+ EXT4_SB(sb)->s_journal_bdev = bdev;
50015374 ext4_init_journal_params(sb, journal);
50025375 return journal;
50035376
....@@ -5323,7 +5696,7 @@
53235696 needs_barrier = true;
53245697 if (needs_barrier) {
53255698 int err;
5326
- err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
5699
+ err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL);
53275700 if (!ret)
53285701 ret = err;
53295702 }
....@@ -5413,11 +5786,11 @@
54135786 struct ext4_sb_info *sbi = EXT4_SB(sb);
54145787 unsigned long old_sb_flags, vfs_flags;
54155788 struct ext4_mount_options old_opts;
5416
- int enable_quota = 0;
54175789 ext4_group_t g;
54185790 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
54195791 int err = 0;
54205792 #ifdef CONFIG_QUOTA
5793
+ int enable_quota = 0;
54215794 int i, j;
54225795 char *to_free[EXT4_MAXQUOTAS];
54235796 #endif
....@@ -5502,8 +5875,8 @@
55025875 goto restore_opts;
55035876 }
55045877
5505
- if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
5506
- ext4_abort(sb, "Abort forced by user");
5878
+ if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
5879
+ ext4_abort(sb, EXT4_ERR_ESHUTDOWN, "Abort forced by user");
55075880
55085881 sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
55095882 (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
....@@ -5516,7 +5889,7 @@
55165889 }
55175890
55185891 if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) {
5519
- if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
5892
+ if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) {
55205893 err = -EROFS;
55215894 goto restore_opts;
55225895 }
....@@ -5551,8 +5924,6 @@
55515924 */
55525925 ext4_mark_recovery_complete(sb, es);
55535926 }
5554
- if (sbi->s_mmp_tsk)
5555
- kthread_stop(sbi->s_mmp_tsk);
55565927 } else {
55575928 /* Make sure we can mount this feature set readwrite */
55585929 if (ext4_has_feature_readonly(sb) ||
....@@ -5603,22 +5974,59 @@
56035974 if (err)
56045975 goto restore_opts;
56055976 }
5606
- sbi->s_mount_state = le16_to_cpu(es->s_state);
5977
+ sbi->s_mount_state = (le16_to_cpu(es->s_state) &
5978
+ ~EXT4_FC_REPLAY);
56075979
56085980 err = ext4_setup_super(sb, es, 0);
56095981 if (err)
56105982 goto restore_opts;
56115983
56125984 sb->s_flags &= ~SB_RDONLY;
5613
- if (ext4_has_feature_mmp(sb))
5614
- if (ext4_multi_mount_protect(sb,
5615
- le64_to_cpu(es->s_mmp_block))) {
5616
- err = -EROFS;
5985
+ if (ext4_has_feature_mmp(sb)) {
5986
+ err = ext4_multi_mount_protect(sb,
5987
+ le64_to_cpu(es->s_mmp_block));
5988
+ if (err)
56175989 goto restore_opts;
5618
- }
5990
+ }
5991
+#ifdef CONFIG_QUOTA
56195992 enable_quota = 1;
5993
+#endif
56205994 }
56215995 }
5996
+
5997
+ /*
5998
+ * Handle creation of system zone data early because it can fail.
5999
+ * Releasing of existing data is done when we are sure remount will
6000
+ * succeed.
6001
+ */
6002
+ if (test_opt(sb, BLOCK_VALIDITY) && !sbi->s_system_blks) {
6003
+ err = ext4_setup_system_zone(sb);
6004
+ if (err)
6005
+ goto restore_opts;
6006
+ }
6007
+
6008
+ if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY)) {
6009
+ err = ext4_commit_super(sb, 1);
6010
+ if (err)
6011
+ goto restore_opts;
6012
+ }
6013
+
6014
+#ifdef CONFIG_QUOTA
6015
+ if (enable_quota) {
6016
+ if (sb_any_quota_suspended(sb))
6017
+ dquot_resume(sb, -1);
6018
+ else if (ext4_has_feature_quota(sb)) {
6019
+ err = ext4_enable_quotas(sb);
6020
+ if (err)
6021
+ goto restore_opts;
6022
+ }
6023
+ }
6024
+ /* Release old quota file names */
6025
+ for (i = 0; i < EXT4_MAXQUOTAS; i++)
6026
+ kfree(old_opts.s_qf_names[i]);
6027
+#endif
6028
+ if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
6029
+ ext4_release_system_zone(sb);
56226030
56236031 /*
56246032 * Reinitialize lazy itable initialization thread based on
....@@ -5632,39 +6040,8 @@
56326040 ext4_register_li_request(sb, first_not_zeroed);
56336041 }
56346042
5635
- /*
5636
- * Handle creation of system zone data early because it can fail.
5637
- * Releasing of existing data is done when we are sure remount will
5638
- * succeed.
5639
- */
5640
- if (test_opt(sb, BLOCK_VALIDITY) && !sbi->system_blks) {
5641
- err = ext4_setup_system_zone(sb);
5642
- if (err)
5643
- goto restore_opts;
5644
- }
5645
-
5646
- if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY)) {
5647
- err = ext4_commit_super(sb, 1);
5648
- if (err)
5649
- goto restore_opts;
5650
- }
5651
-
5652
-#ifdef CONFIG_QUOTA
5653
- /* Release old quota file names */
5654
- for (i = 0; i < EXT4_MAXQUOTAS; i++)
5655
- kfree(old_opts.s_qf_names[i]);
5656
- if (enable_quota) {
5657
- if (sb_any_quota_suspended(sb))
5658
- dquot_resume(sb, -1);
5659
- else if (ext4_has_feature_quota(sb)) {
5660
- err = ext4_enable_quotas(sb);
5661
- if (err)
5662
- goto restore_opts;
5663
- }
5664
- }
5665
-#endif
5666
- if (!test_opt(sb, BLOCK_VALIDITY) && sbi->system_blks)
5667
- ext4_release_system_zone(sb);
6043
+ if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb))
6044
+ ext4_stop_mmpd(sbi);
56686045
56696046 /*
56706047 * Some options can be enabled by ext4 and/or by VFS mount flag
....@@ -5678,6 +6055,13 @@
56786055 return 0;
56796056
56806057 restore_opts:
6058
+ /*
6059
+ * If there was a failing r/w to ro transition, we may need to
6060
+ * re-enable quota
6061
+ */
6062
+ if ((sb->s_flags & SB_RDONLY) && !(old_sb_flags & SB_RDONLY) &&
6063
+ sb_any_quota_suspended(sb))
6064
+ dquot_resume(sb, -1);
56816065 sb->s_flags = old_sb_flags;
56826066 sbi->s_mount_opt = old_opts.s_mount_opt;
56836067 sbi->s_mount_opt2 = old_opts.s_mount_opt2;
....@@ -5686,7 +6070,7 @@
56866070 sbi->s_commit_interval = old_opts.s_commit_interval;
56876071 sbi->s_min_batch_time = old_opts.s_min_batch_time;
56886072 sbi->s_max_batch_time = old_opts.s_max_batch_time;
5689
- if (!test_opt(sb, BLOCK_VALIDITY) && sbi->system_blks)
6073
+ if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
56906074 ext4_release_system_zone(sb);
56916075 #ifdef CONFIG_QUOTA
56926076 sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
....@@ -5698,6 +6082,8 @@
56986082 for (i = 0; i < EXT4_MAXQUOTAS; i++)
56996083 kfree(to_free[i]);
57006084 #endif
6085
+ if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb))
6086
+ ext4_stop_mmpd(sbi);
57016087 kfree(orig_data);
57026088 return err;
57036089 }
....@@ -5717,9 +6103,10 @@
57176103 return PTR_ERR(dquot);
57186104 spin_lock(&dquot->dq_dqb_lock);
57196105
5720
- limit = (dquot->dq_dqb.dqb_bsoftlimit ?
5721
- dquot->dq_dqb.dqb_bsoftlimit :
5722
- dquot->dq_dqb.dqb_bhardlimit) >> sb->s_blocksize_bits;
6106
+ limit = min_not_zero(dquot->dq_dqb.dqb_bsoftlimit,
6107
+ dquot->dq_dqb.dqb_bhardlimit);
6108
+ limit >>= sb->s_blocksize_bits;
6109
+
57236110 if (limit && buf->f_blocks > limit) {
57246111 curblock = (dquot->dq_dqb.dqb_curspace +
57256112 dquot->dq_dqb.dqb_rsvspace) >> sb->s_blocksize_bits;
....@@ -5729,9 +6116,8 @@
57296116 (buf->f_blocks - curblock) : 0;
57306117 }
57316118
5732
- limit = dquot->dq_dqb.dqb_isoftlimit ?
5733
- dquot->dq_dqb.dqb_isoftlimit :
5734
- dquot->dq_dqb.dqb_ihardlimit;
6119
+ limit = min_not_zero(dquot->dq_dqb.dqb_isoftlimit,
6120
+ dquot->dq_dqb.dqb_ihardlimit);
57356121 if (limit && buf->f_files > limit) {
57366122 buf->f_files = limit;
57376123 buf->f_ffree =
....@@ -5774,8 +6160,7 @@
57746160 buf->f_namelen = EXT4_NAME_LEN;
57756161 fsid = le64_to_cpup((void *)es->s_uuid) ^
57766162 le64_to_cpup((void *)es->s_uuid + sizeof(u64));
5777
- buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
5778
- buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
6163
+ buf->f_fsid = u64_to_fsid(fsid);
57796164
57806165 #ifdef CONFIG_QUOTA
57816166 if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
....@@ -5871,7 +6256,7 @@
58716256 handle_t *handle;
58726257
58736258 /* Data block + inode block */
5874
- handle = ext4_journal_start(d_inode(sb->s_root), EXT4_HT_QUOTA, 2);
6259
+ handle = ext4_journal_start_sb(sb, EXT4_HT_QUOTA, 2);
58756260 if (IS_ERR(handle))
58766261 return PTR_ERR(handle);
58776262 ret = dquot_commit_info(sb, type);
....@@ -5975,7 +6360,7 @@
59756360 EXT4_I(inode)->i_flags |= EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL;
59766361 inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
59776362 S_NOATIME | S_IMMUTABLE);
5978
- ext4_mark_inode_dirty(handle, inode);
6363
+ err = ext4_mark_inode_dirty(handle, inode);
59796364 ext4_journal_stop(handle);
59806365 unlock_inode:
59816366 inode_unlock(inode);
....@@ -5986,6 +6371,20 @@
59866371 lockdep_set_quota_inode(path->dentry->d_inode,
59876372 I_DATA_SEM_NORMAL);
59886373 return err;
6374
+}
6375
+
6376
+static inline bool ext4_check_quota_inum(int type, unsigned long qf_inum)
6377
+{
6378
+ switch (type) {
6379
+ case USRQUOTA:
6380
+ return qf_inum == EXT4_USR_QUOTA_INO;
6381
+ case GRPQUOTA:
6382
+ return qf_inum == EXT4_GRP_QUOTA_INO;
6383
+ case PRJQUOTA:
6384
+ return qf_inum >= EXT4_GOOD_OLD_FIRST_INO;
6385
+ default:
6386
+ BUG();
6387
+ }
59896388 }
59906389
59916390 static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
....@@ -6004,16 +6403,23 @@
60046403 if (!qf_inums[type])
60056404 return -EPERM;
60066405
6406
+ if (!ext4_check_quota_inum(type, qf_inums[type])) {
6407
+ ext4_error(sb, "Bad quota inum: %lu, type: %d",
6408
+ qf_inums[type], type);
6409
+ return -EUCLEAN;
6410
+ }
6411
+
60076412 qf_inode = ext4_iget(sb, qf_inums[type], EXT4_IGET_SPECIAL);
60086413 if (IS_ERR(qf_inode)) {
6009
- ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]);
6414
+ ext4_error(sb, "Bad quota inode: %lu, type: %d",
6415
+ qf_inums[type], type);
60106416 return PTR_ERR(qf_inode);
60116417 }
60126418
60136419 /* Don't account quota for quota files to avoid recursion */
60146420 qf_inode->i_flags |= S_NOQUOTA;
60156421 lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA);
6016
- err = dquot_enable(qf_inode, type, format_id, flags);
6422
+ err = dquot_load_quota_inode(qf_inode, type, format_id, flags);
60176423 if (err)
60186424 lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL);
60196425 iput(qf_inode);
....@@ -6045,8 +6451,9 @@
60456451 if (err) {
60466452 ext4_warning(sb,
60476453 "Failed to enable quota tracking "
6048
- "(type=%d, err=%d). Please run "
6049
- "e2fsck to fix.", type, err);
6454
+ "(type=%d, err=%d, ino=%lu). "
6455
+ "Please run e2fsck to fix.", type,
6456
+ err, qf_inums[type]);
60506457 for (type--; type >= 0; type--) {
60516458 struct inode *inode;
60526459
....@@ -6093,12 +6500,14 @@
60936500 * this is not a hard failure and quotas are already disabled.
60946501 */
60956502 handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
6096
- if (IS_ERR(handle))
6503
+ if (IS_ERR(handle)) {
6504
+ err = PTR_ERR(handle);
60976505 goto out_unlock;
6506
+ }
60986507 EXT4_I(inode)->i_flags &= ~(EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL);
60996508 inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
61006509 inode->i_mtime = inode->i_ctime = current_time(inode);
6101
- ext4_mark_inode_dirty(handle, inode);
6510
+ err = ext4_mark_inode_dirty(handle, inode);
61026511 ext4_journal_stop(handle);
61036512 out_unlock:
61046513 inode_unlock(inode);
....@@ -6156,7 +6565,7 @@
61566565 {
61576566 struct inode *inode = sb_dqopt(sb)->files[type];
61586567 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
6159
- int err, offset = off & (sb->s_blocksize - 1);
6568
+ int err = 0, err2 = 0, offset = off & (sb->s_blocksize - 1);
61606569 int retries = 0;
61616570 struct buffer_head *bh;
61626571 handle_t *handle = journal_current_handle();
....@@ -6182,7 +6591,7 @@
61826591 bh = ext4_bread(handle, inode, blk,
61836592 EXT4_GET_BLOCKS_CREATE |
61846593 EXT4_GET_BLOCKS_METADATA_NOFAIL);
6185
- } while (IS_ERR(bh) && (PTR_ERR(bh) == -ENOSPC) &&
6594
+ } while (PTR_ERR(bh) == -ENOSPC &&
61866595 ext4_should_retry_alloc(inode->i_sb, &retries));
61876596 if (IS_ERR(bh))
61886597 return PTR_ERR(bh);
....@@ -6204,21 +6613,11 @@
62046613 if (inode->i_size < off + len) {
62056614 i_size_write(inode, off + len);
62066615 EXT4_I(inode)->i_disksize = inode->i_size;
6207
- ext4_mark_inode_dirty(handle, inode);
6616
+ err2 = ext4_mark_inode_dirty(handle, inode);
6617
+ if (unlikely(err2 && !err))
6618
+ err = err2;
62086619 }
6209
- return len;
6210
-}
6211
-
6212
-static int ext4_get_next_id(struct super_block *sb, struct kqid *qid)
6213
-{
6214
- const struct quota_format_ops *ops;
6215
-
6216
- if (!sb_has_quota_loaded(sb, qid->type))
6217
- return -ESRCH;
6218
- ops = sb_dqopt(sb)->ops[qid->type];
6219
- if (!ops || !ops->get_next_id)
6220
- return -ENOSYS;
6221
- return dquot_get_next_id(sb, qid);
6620
+ return err ? err : len;
62226621 }
62236622 #endif
62246623
....@@ -6314,6 +6713,10 @@
63146713 if (err)
63156714 return err;
63166715
6716
+ err = ext4_init_pending();
6717
+ if (err)
6718
+ goto out7;
6719
+
63176720 err = ext4_init_post_read_processing();
63186721 if (err)
63196722 goto out6;
....@@ -6336,6 +6739,11 @@
63366739 err = init_inodecache();
63376740 if (err)
63386741 goto out1;
6742
+
6743
+ err = ext4_fc_init_dentry_cache();
6744
+ if (err)
6745
+ goto out05;
6746
+
63396747 register_as_ext3();
63406748 register_as_ext2();
63416749 err = register_filesystem(&ext4_fs_type);
....@@ -6346,6 +6754,8 @@
63466754 out:
63476755 unregister_as_ext2();
63486756 unregister_as_ext3();
6757
+ ext4_fc_destroy_dentry_cache();
6758
+out05:
63496759 destroy_inodecache();
63506760 out1:
63516761 ext4_exit_mballoc();
....@@ -6358,6 +6768,8 @@
63586768 out5:
63596769 ext4_exit_post_read_processing();
63606770 out6:
6771
+ ext4_exit_pending();
6772
+out7:
63616773 ext4_exit_es();
63626774
63636775 return err;
....@@ -6369,6 +6781,7 @@
63696781 unregister_as_ext2();
63706782 unregister_as_ext3();
63716783 unregister_filesystem(&ext4_fs_type);
6784
+ ext4_fc_destroy_dentry_cache();
63726785 destroy_inodecache();
63736786 ext4_exit_mballoc();
63746787 ext4_exit_sysfs();
....@@ -6376,11 +6789,13 @@
63766789 ext4_exit_pageio();
63776790 ext4_exit_post_read_processing();
63786791 ext4_exit_es();
6792
+ ext4_exit_pending();
63796793 }
63806794
63816795 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
63826796 MODULE_DESCRIPTION("Fourth Extended Filesystem");
63836797 MODULE_LICENSE("GPL");
6798
+MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY);
63846799 MODULE_SOFTDEP("pre: crc32c");
63856800 module_init(ext4_init_fs)
63866801 module_exit(ext4_exit_fs)