hc
2024-01-31 f70575805708cabdedea7498aaa3f710fde4d920
kernel/fs/nilfs2/inode.c
....@@ -14,6 +14,7 @@
1414 #include <linux/pagemap.h>
1515 #include <linux/writeback.h>
1616 #include <linux/uio.h>
17
+#include <linux/fiemap.h>
1718 #include "nilfs.h"
1819 #include "btnode.h"
1920 #include "segment.h"
....@@ -28,12 +29,16 @@
2829 * @cno: checkpoint number
2930 * @root: pointer on NILFS root object (mounted checkpoint)
3031 * @for_gc: inode for GC flag
32
+ * @for_btnc: inode for B-tree node cache flag
33
+ * @for_shadow: inode for shadowed page cache flag
3134 */
3235 struct nilfs_iget_args {
3336 u64 ino;
3437 __u64 cno;
3538 struct nilfs_root *root;
36
- int for_gc;
39
+ bool for_gc;
40
+ bool for_btnc;
41
+ bool for_shadow;
3742 };
3843
3944 static int nilfs_iget_test(struct inode *inode, void *opaque);
....@@ -103,10 +108,10 @@
103108 * However, the page having this block must
104109 * be locked in this case.
105110 */
106
- nilfs_msg(inode->i_sb, KERN_WARNING,
107
- "%s (ino=%lu): a race condition while inserting a data block at offset=%llu",
108
- __func__, inode->i_ino,
109
- (unsigned long long)blkoff);
111
+ nilfs_warn(inode->i_sb,
112
+ "%s (ino=%lu): a race condition while inserting a data block at offset=%llu",
113
+ __func__, inode->i_ino,
114
+ (unsigned long long)blkoff);
110115 err = 0;
111116 }
112117 nilfs_transaction_abort(inode->i_sb);
....@@ -145,18 +150,9 @@
145150 return mpage_readpage(page, nilfs_get_block);
146151 }
147152
148
-/**
149
- * nilfs_readpages() - implement readpages() method of nilfs_aops {}
150
- * address_space_operations.
151
- * @file - file struct of the file to be read
152
- * @mapping - address_space struct used for reading multiple pages
153
- * @pages - the pages to be read
154
- * @nr_pages - number of pages to be read
155
- */
156
-static int nilfs_readpages(struct file *file, struct address_space *mapping,
157
- struct list_head *pages, unsigned int nr_pages)
153
+static void nilfs_readahead(struct readahead_control *rac)
158154 {
159
- return mpage_readpages(mapping, pages, nr_pages, nilfs_get_block);
155
+ mpage_readahead(rac, nilfs_get_block);
160156 }
161157
162158 static int nilfs_writepages(struct address_space *mapping,
....@@ -308,7 +304,7 @@
308304 .readpage = nilfs_readpage,
309305 .writepages = nilfs_writepages,
310306 .set_page_dirty = nilfs_set_page_dirty,
311
- .readpages = nilfs_readpages,
307
+ .readahead = nilfs_readahead,
312308 .write_begin = nilfs_write_begin,
313309 .write_end = nilfs_write_end,
314310 /* .releasepage = nilfs_releasepage, */
....@@ -322,7 +318,8 @@
322318 unsigned long ino)
323319 {
324320 struct nilfs_iget_args args = {
325
- .ino = ino, .root = root, .cno = 0, .for_gc = 0
321
+ .ino = ino, .root = root, .cno = 0, .for_gc = false,
322
+ .for_btnc = false, .for_shadow = false
326323 };
327324
328325 return insert_inode_locked4(inode, ino, nilfs_iget_test, &args);
....@@ -335,6 +332,7 @@
335332 struct inode *inode;
336333 struct nilfs_inode_info *ii;
337334 struct nilfs_root *root;
335
+ struct buffer_head *bh;
338336 int err = -ENOMEM;
339337 ino_t ino;
340338
....@@ -350,10 +348,24 @@
350348 ii->i_state = BIT(NILFS_I_NEW);
351349 ii->i_root = root;
352350
353
- err = nilfs_ifile_create_inode(root->ifile, &ino, &ii->i_bh);
351
+ err = nilfs_ifile_create_inode(root->ifile, &ino, &bh);
354352 if (unlikely(err))
355353 goto failed_ifile_create_inode;
356354 /* reference count of i_bh inherits from nilfs_mdt_read_block() */
355
+
356
+ if (unlikely(ino < NILFS_USER_INO)) {
357
+ nilfs_warn(sb,
358
+ "inode bitmap is inconsistent for reserved inodes");
359
+ do {
360
+ brelse(bh);
361
+ err = nilfs_ifile_create_inode(root->ifile, &ino, &bh);
362
+ if (unlikely(err))
363
+ goto failed_ifile_create_inode;
364
+ } while (ino < NILFS_USER_INO);
365
+
366
+ nilfs_info(sb, "repaired inode bitmap for reserved inodes");
367
+ }
368
+ ii->i_bh = bh;
357369
358370 atomic64_inc(&root->inodes_count);
359371 inode_init_owner(inode, dir, mode);
....@@ -396,7 +408,8 @@
396408
397409 failed_after_creation:
398410 clear_nlink(inode);
399
- unlock_new_inode(inode);
411
+ if (inode->i_state & I_NEW)
412
+ unlock_new_inode(inode);
400413 iput(inode); /*
401414 * raw_inode will be deleted through
402415 * nilfs_evict_inode().
....@@ -446,6 +459,8 @@
446459 inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
447460 inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec);
448461 inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
462
+ if (nilfs_is_metadata_file_inode(inode) && !S_ISREG(inode->i_mode))
463
+ return -EIO; /* this inode is for metadata and corrupted */
449464 if (inode->i_nlink == 0)
450465 return -ESTALE; /* this inode is deleted */
451466
....@@ -534,6 +549,19 @@
534549 return 0;
535550
536551 ii = NILFS_I(inode);
552
+ if (test_bit(NILFS_I_BTNC, &ii->i_state)) {
553
+ if (!args->for_btnc)
554
+ return 0;
555
+ } else if (args->for_btnc) {
556
+ return 0;
557
+ }
558
+ if (test_bit(NILFS_I_SHADOW, &ii->i_state)) {
559
+ if (!args->for_shadow)
560
+ return 0;
561
+ } else if (args->for_shadow) {
562
+ return 0;
563
+ }
564
+
537565 if (!test_bit(NILFS_I_GCINODE, &ii->i_state))
538566 return !args->for_gc;
539567
....@@ -545,15 +573,17 @@
545573 struct nilfs_iget_args *args = opaque;
546574
547575 inode->i_ino = args->ino;
548
- if (args->for_gc) {
576
+ NILFS_I(inode)->i_cno = args->cno;
577
+ NILFS_I(inode)->i_root = args->root;
578
+ if (args->root && args->ino == NILFS_ROOT_INO)
579
+ nilfs_get_root(args->root);
580
+
581
+ if (args->for_gc)
549582 NILFS_I(inode)->i_state = BIT(NILFS_I_GCINODE);
550
- NILFS_I(inode)->i_cno = args->cno;
551
- NILFS_I(inode)->i_root = NULL;
552
- } else {
553
- if (args->root && args->ino == NILFS_ROOT_INO)
554
- nilfs_get_root(args->root);
555
- NILFS_I(inode)->i_root = args->root;
556
- }
583
+ if (args->for_btnc)
584
+ NILFS_I(inode)->i_state |= BIT(NILFS_I_BTNC);
585
+ if (args->for_shadow)
586
+ NILFS_I(inode)->i_state |= BIT(NILFS_I_SHADOW);
557587 return 0;
558588 }
559589
....@@ -561,7 +591,8 @@
561591 unsigned long ino)
562592 {
563593 struct nilfs_iget_args args = {
564
- .ino = ino, .root = root, .cno = 0, .for_gc = 0
594
+ .ino = ino, .root = root, .cno = 0, .for_gc = false,
595
+ .for_btnc = false, .for_shadow = false
565596 };
566597
567598 return ilookup5(sb, ino, nilfs_iget_test, &args);
....@@ -571,7 +602,8 @@
571602 unsigned long ino)
572603 {
573604 struct nilfs_iget_args args = {
574
- .ino = ino, .root = root, .cno = 0, .for_gc = 0
605
+ .ino = ino, .root = root, .cno = 0, .for_gc = false,
606
+ .for_btnc = false, .for_shadow = false
575607 };
576608
577609 return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args);
....@@ -602,7 +634,8 @@
602634 __u64 cno)
603635 {
604636 struct nilfs_iget_args args = {
605
- .ino = ino, .root = NULL, .cno = cno, .for_gc = 1
637
+ .ino = ino, .root = NULL, .cno = cno, .for_gc = true,
638
+ .for_btnc = false, .for_shadow = false
606639 };
607640 struct inode *inode;
608641 int err;
....@@ -620,6 +653,113 @@
620653 }
621654 unlock_new_inode(inode);
622655 return inode;
656
+}
657
+
658
+/**
659
+ * nilfs_attach_btree_node_cache - attach a B-tree node cache to the inode
660
+ * @inode: inode object
661
+ *
662
+ * nilfs_attach_btree_node_cache() attaches a B-tree node cache to @inode,
663
+ * or does nothing if the inode already has it. This function allocates
664
+ * an additional inode to maintain page cache of B-tree nodes one-on-one.
665
+ *
666
+ * Return Value: On success, 0 is returned. On errors, one of the following
667
+ * negative error code is returned.
668
+ *
669
+ * %-ENOMEM - Insufficient memory available.
670
+ */
671
+int nilfs_attach_btree_node_cache(struct inode *inode)
672
+{
673
+ struct nilfs_inode_info *ii = NILFS_I(inode);
674
+ struct inode *btnc_inode;
675
+ struct nilfs_iget_args args;
676
+
677
+ if (ii->i_assoc_inode)
678
+ return 0;
679
+
680
+ args.ino = inode->i_ino;
681
+ args.root = ii->i_root;
682
+ args.cno = ii->i_cno;
683
+ args.for_gc = test_bit(NILFS_I_GCINODE, &ii->i_state) != 0;
684
+ args.for_btnc = true;
685
+ args.for_shadow = test_bit(NILFS_I_SHADOW, &ii->i_state) != 0;
686
+
687
+ btnc_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test,
688
+ nilfs_iget_set, &args);
689
+ if (unlikely(!btnc_inode))
690
+ return -ENOMEM;
691
+ if (btnc_inode->i_state & I_NEW) {
692
+ nilfs_init_btnc_inode(btnc_inode);
693
+ unlock_new_inode(btnc_inode);
694
+ }
695
+ NILFS_I(btnc_inode)->i_assoc_inode = inode;
696
+ NILFS_I(btnc_inode)->i_bmap = ii->i_bmap;
697
+ ii->i_assoc_inode = btnc_inode;
698
+
699
+ return 0;
700
+}
701
+
702
+/**
703
+ * nilfs_detach_btree_node_cache - detach the B-tree node cache from the inode
704
+ * @inode: inode object
705
+ *
706
+ * nilfs_detach_btree_node_cache() detaches the B-tree node cache and its
707
+ * holder inode bound to @inode, or does nothing if @inode doesn't have it.
708
+ */
709
+void nilfs_detach_btree_node_cache(struct inode *inode)
710
+{
711
+ struct nilfs_inode_info *ii = NILFS_I(inode);
712
+ struct inode *btnc_inode = ii->i_assoc_inode;
713
+
714
+ if (btnc_inode) {
715
+ NILFS_I(btnc_inode)->i_assoc_inode = NULL;
716
+ ii->i_assoc_inode = NULL;
717
+ iput(btnc_inode);
718
+ }
719
+}
720
+
721
+/**
722
+ * nilfs_iget_for_shadow - obtain inode for shadow mapping
723
+ * @inode: inode object that uses shadow mapping
724
+ *
725
+ * nilfs_iget_for_shadow() allocates a pair of inodes that holds page
726
+ * caches for shadow mapping. The page cache for data pages is set up
727
+ * in one inode and the one for b-tree node pages is set up in the
728
+ * other inode, which is attached to the former inode.
729
+ *
730
+ * Return Value: On success, a pointer to the inode for data pages is
731
+ * returned. On errors, one of the following negative error code is returned
732
+ * in a pointer type.
733
+ *
734
+ * %-ENOMEM - Insufficient memory available.
735
+ */
736
+struct inode *nilfs_iget_for_shadow(struct inode *inode)
737
+{
738
+ struct nilfs_iget_args args = {
739
+ .ino = inode->i_ino, .root = NULL, .cno = 0, .for_gc = false,
740
+ .for_btnc = false, .for_shadow = true
741
+ };
742
+ struct inode *s_inode;
743
+ int err;
744
+
745
+ s_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test,
746
+ nilfs_iget_set, &args);
747
+ if (unlikely(!s_inode))
748
+ return ERR_PTR(-ENOMEM);
749
+ if (!(s_inode->i_state & I_NEW))
750
+ return inode;
751
+
752
+ NILFS_I(s_inode)->i_flags = 0;
753
+ memset(NILFS_I(s_inode)->i_bmap, 0, sizeof(struct nilfs_bmap));
754
+ mapping_set_gfp_mask(s_inode->i_mapping, GFP_NOFS);
755
+
756
+ err = nilfs_attach_btree_node_cache(s_inode);
757
+ if (unlikely(err)) {
758
+ iget_failed(s_inode);
759
+ return ERR_PTR(err);
760
+ }
761
+ unlock_new_inode(s_inode);
762
+ return s_inode;
623763 }
624764
625765 void nilfs_write_inode_common(struct inode *inode,
....@@ -714,9 +854,8 @@
714854 goto repeat;
715855
716856 failed:
717
- nilfs_msg(ii->vfs_inode.i_sb, KERN_WARNING,
718
- "error %d truncating bmap (ino=%lu)", ret,
719
- ii->vfs_inode.i_ino);
857
+ nilfs_warn(ii->vfs_inode.i_sb, "error %d truncating bmap (ino=%lu)",
858
+ ret, ii->vfs_inode.i_ino);
720859 }
721860
722861 void nilfs_truncate(struct inode *inode)
....@@ -770,7 +909,8 @@
770909 if (test_bit(NILFS_I_BMAP, &ii->i_state))
771910 nilfs_bmap_clear(ii->i_bmap);
772911
773
- nilfs_btnode_cache_clear(&ii->i_btnode_cache);
912
+ if (!test_bit(NILFS_I_BTNC, &ii->i_state))
913
+ nilfs_detach_btree_node_cache(inode);
774914
775915 if (ii->i_root && inode->i_ino == NILFS_ROOT_INO)
776916 nilfs_put_root(ii->i_root);
....@@ -781,6 +921,7 @@
781921 struct nilfs_transaction_info ti;
782922 struct super_block *sb = inode->i_sb;
783923 struct nilfs_inode_info *ii = NILFS_I(inode);
924
+ struct the_nilfs *nilfs;
784925 int ret;
785926
786927 if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) {
....@@ -792,6 +933,23 @@
792933 nilfs_transaction_begin(sb, &ti, 0); /* never fails */
793934
794935 truncate_inode_pages_final(&inode->i_data);
936
+
937
+ nilfs = sb->s_fs_info;
938
+ if (unlikely(sb_rdonly(sb) || !nilfs->ns_writer)) {
939
+ /*
940
+ * If this inode is about to be disposed after the file system
941
+ * has been degraded to read-only due to file system corruption
942
+ * or after the writer has been detached, do not make any
943
+ * changes that cause writes, just clear it.
944
+ * Do this check after read-locking ns_segctor_sem by
945
+ * nilfs_transaction_begin() in order to avoid a race with
946
+ * the writer detach operation.
947
+ */
948
+ clear_inode(inode);
949
+ nilfs_clear_inode(inode);
950
+ nilfs_transaction_abort(sb);
951
+ return;
952
+ }
795953
796954 /* TODO: some of the following operations may fail. */
797955 nilfs_truncate_bmap(ii, 0);
....@@ -869,7 +1027,7 @@
8691027 int err;
8701028
8711029 spin_lock(&nilfs->ns_inode_lock);
872
- if (ii->i_bh == NULL) {
1030
+ if (ii->i_bh == NULL || unlikely(!buffer_uptodate(ii->i_bh))) {
8731031 spin_unlock(&nilfs->ns_inode_lock);
8741032 err = nilfs_ifile_get_inode_block(ii->i_root->ifile,
8751033 inode->i_ino, pbh);
....@@ -878,7 +1036,10 @@
8781036 spin_lock(&nilfs->ns_inode_lock);
8791037 if (ii->i_bh == NULL)
8801038 ii->i_bh = *pbh;
881
- else {
1039
+ else if (unlikely(!buffer_uptodate(ii->i_bh))) {
1040
+ __brelse(ii->i_bh);
1041
+ ii->i_bh = *pbh;
1042
+ } else {
8821043 brelse(*pbh);
8831044 *pbh = ii->i_bh;
8841045 }
....@@ -927,9 +1088,9 @@
9271088 * This will happen when somebody is freeing
9281089 * this inode.
9291090 */
930
- nilfs_msg(inode->i_sb, KERN_WARNING,
931
- "cannot set file dirty (ino=%lu): the file is being freed",
932
- inode->i_ino);
1091
+ nilfs_warn(inode->i_sb,
1092
+ "cannot set file dirty (ino=%lu): the file is being freed",
1093
+ inode->i_ino);
9331094 spin_unlock(&nilfs->ns_inode_lock);
9341095 return -EINVAL; /*
9351096 * NILFS_I_DIRTY may remain for
....@@ -945,14 +1106,22 @@
9451106
9461107 int __nilfs_mark_inode_dirty(struct inode *inode, int flags)
9471108 {
1109
+ struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
9481110 struct buffer_head *ibh;
9491111 int err;
9501112
1113
+ /*
1114
+ * Do not dirty inodes after the log writer has been detached
1115
+ * and its nilfs_root struct has been freed.
1116
+ */
1117
+ if (unlikely(nilfs_purging(nilfs)))
1118
+ return 0;
1119
+
9511120 err = nilfs_load_inode_block(inode, &ibh);
9521121 if (unlikely(err)) {
953
- nilfs_msg(inode->i_sb, KERN_WARNING,
954
- "cannot mark inode dirty (ino=%lu): error %d loading inode block",
955
- inode->i_ino, err);
1122
+ nilfs_warn(inode->i_sb,
1123
+ "cannot mark inode dirty (ino=%lu): error %d loading inode block",
1124
+ inode->i_ino, err);
9561125 return err;
9571126 }
9581127 nilfs_update_inode(inode, ibh, flags);
....@@ -978,8 +1147,8 @@
9781147 struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
9791148
9801149 if (is_bad_inode(inode)) {
981
- nilfs_msg(inode->i_sb, KERN_WARNING,
982
- "tried to mark bad_inode dirty. ignored.");
1150
+ nilfs_warn(inode->i_sb,
1151
+ "tried to mark bad_inode dirty. ignored.");
9831152 dump_stack();
9841153 return;
9851154 }
....@@ -1005,7 +1174,7 @@
10051174 unsigned int blkbits = inode->i_blkbits;
10061175 int ret, n;
10071176
1008
- ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
1177
+ ret = fiemap_prep(inode, fieinfo, start, &len, 0);
10091178 if (ret)
10101179 return ret;
10111180