hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/fs/btrfs/btrfs_inode.h
....@@ -7,6 +7,7 @@
77 #define BTRFS_INODE_H
88
99 #include <linux/hash.h>
10
+#include <linux/refcount.h>
1011 #include "extent_map.h"
1112 #include "extent_io.h"
1213 #include "ordered-data.h"
....@@ -20,16 +21,36 @@
2021 * new data the application may have written before commit.
2122 */
2223 enum {
23
- BTRFS_INODE_ORDERED_DATA_CLOSE = 0,
24
+ BTRFS_INODE_FLUSH_ON_CLOSE,
2425 BTRFS_INODE_DUMMY,
2526 BTRFS_INODE_IN_DEFRAG,
2627 BTRFS_INODE_HAS_ASYNC_EXTENT,
28
+ /*
29
+ * Always set under the VFS' inode lock, otherwise it can cause races
30
+ * during fsync (we start as a fast fsync and then end up in a full
31
+ * fsync racing with ordered extent completion).
32
+ */
2733 BTRFS_INODE_NEEDS_FULL_SYNC,
2834 BTRFS_INODE_COPY_EVERYTHING,
2935 BTRFS_INODE_IN_DELALLOC_LIST,
30
- BTRFS_INODE_READDIO_NEED_LOCK,
3136 BTRFS_INODE_HAS_PROPS,
3237 BTRFS_INODE_SNAPSHOT_FLUSH,
38
+ /*
39
+ * Set and used when logging an inode and it serves to signal that an
40
+ * inode does not have xattrs, so subsequent fsyncs can avoid searching
41
+ * for xattrs to log. This bit must be cleared whenever a xattr is added
42
+ * to an inode.
43
+ */
44
+ BTRFS_INODE_NO_XATTRS,
45
+ /*
46
+ * Set when we are in a context where we need to start a transaction and
47
+ * have dirty pages with the respective file range locked. This is to
48
+ * ensure that when reserving space for the transaction, if we are low
49
+ * on available space and need to flush delalloc, we will not flush
50
+ * delalloc for this inode, because that could result in a deadlock (on
51
+ * the file range, inode's io_tree).
52
+ */
53
+ BTRFS_INODE_NO_DELALLOC_FLUSH,
3354 };
3455
3556 /* in memory btrfs inode */
....@@ -60,11 +81,14 @@
6081 */
6182 struct extent_io_tree io_failure_tree;
6283
84
+ /*
85
+ * Keep track of where the inode has extent items mapped in order to
86
+ * make sure the i_size adjustments are accurate
87
+ */
88
+ struct extent_io_tree file_extent_tree;
89
+
6390 /* held while logging the inode in tree-log.c */
6491 struct mutex log_mutex;
65
-
66
- /* held while doing delalloc reservations */
67
- struct mutex delalloc_mutex;
6892
6993 /* used to order data wrt metadata */
7094 struct btrfs_ordered_inode_tree ordered_tree;
....@@ -148,10 +172,15 @@
148172 u64 last_unlink_trans;
149173
150174 /*
151
- * Track the transaction id of the last transaction used to create a
152
- * hard link for the inode. This is used by the log tree (fsync).
175
+ * The id/generation of the last transaction where this inode was
176
+ * either the source or the destination of a clone/dedupe operation.
177
+ * Used when logging an inode to know if there are shared extents that
178
+ * need special care when logging checksum items, to avoid duplicate
179
+ * checksum items in a log (which can lead to a corruption where we end
180
+ * up with missing checksum ranges after log replay).
181
+ * Protected by the vfs inode lock.
153182 */
154
- u64 last_link_trans;
183
+ u64 last_reflink_trans;
155184
156185 /*
157186 * Number of bytes outstanding that are going to need csums. This is
....@@ -203,7 +232,10 @@
203232 struct inode vfs_inode;
204233 };
205234
206
-extern unsigned char btrfs_filetype_table[];
235
+static inline u32 btrfs_inode_sectorsize(const struct btrfs_inode *inode)
236
+{
237
+ return inode->root->fs_info->sectorsize;
238
+}
207239
208240 static inline struct btrfs_inode *BTRFS_I(const struct inode *inode)
209241 {
....@@ -213,7 +245,7 @@
213245 static inline unsigned long btrfs_inode_hash(u64 objectid,
214246 const struct btrfs_root *root)
215247 {
216
- u64 h = objectid ^ (root->objectid * GOLDEN_RATIO_PRIME);
248
+ u64 h = objectid ^ (root->root_key.objectid * GOLDEN_RATIO_PRIME);
217249
218250 #if BITS_PER_LONG == 32
219251 h = (h >> 32) ^ (h & 0xffffffff);
....@@ -260,6 +292,11 @@
260292 return false;
261293 }
262294
295
+static inline bool is_data_inode(struct inode *inode)
296
+{
297
+ return btrfs_ino(BTRFS_I(inode)) != BTRFS_BTREE_INODE_OBJECTID;
298
+}
299
+
263300 static inline void btrfs_mod_outstanding_extents(struct btrfs_inode *inode,
264301 int mod)
265302 {
....@@ -269,6 +306,21 @@
269306 return;
270307 trace_btrfs_inode_mod_outstanding_extents(inode->root, btrfs_ino(inode),
271308 mod);
309
+}
310
+
311
+/*
312
+ * Called every time after doing a buffered, direct IO or memory mapped write.
313
+ *
314
+ * This is to ensure that if we write to a file that was previously fsynced in
315
+ * the current transaction, then try to fsync it again in the same transaction,
316
+ * we will know that there were changes in the file and that it needs to be
317
+ * logged.
318
+ */
319
+static inline void btrfs_set_inode_last_sub_trans(struct btrfs_inode *inode)
320
+{
321
+ spin_lock(&inode->lock);
322
+ inode->last_sub_trans = inode->root->log_transid;
323
+ spin_unlock(&inode->lock);
272324 }
273325
274326 static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
....@@ -293,69 +345,53 @@
293345 return ret;
294346 }
295347
296
-#define BTRFS_DIO_ORIG_BIO_SUBMITTED 0x1
297
-
298348 struct btrfs_dio_private {
299349 struct inode *inode;
300
- unsigned long flags;
301350 u64 logical_offset;
302351 u64 disk_bytenr;
303352 u64 bytes;
304
- void *private;
305353
306
- /* number of bios pending for this dio */
307
- atomic_t pending_bios;
308
-
309
- /* IO errors */
310
- int errors;
311
-
312
- /* orig_bio is our btrfs_io_bio */
313
- struct bio *orig_bio;
354
+ /*
355
+ * References to this structure. There is one reference per in-flight
356
+ * bio plus one while we're still setting up.
357
+ */
358
+ refcount_t refs;
314359
315360 /* dio_bio came from fs/direct-io.c */
316361 struct bio *dio_bio;
317362
318
- /*
319
- * The original bio may be split to several sub-bios, this is
320
- * done during endio of sub-bios
321
- */
322
- blk_status_t (*subio_endio)(struct inode *, struct btrfs_io_bio *,
323
- blk_status_t);
363
+ /* Array of checksums */
364
+ u8 csums[];
324365 };
325366
326
-/*
327
- * Disable DIO read nolock optimization, so new dio readers will be forced
328
- * to grab i_mutex. It is used to avoid the endless truncate due to
329
- * nonlocked dio read.
330
- */
331
-static inline void btrfs_inode_block_unlocked_dio(struct btrfs_inode *inode)
332
-{
333
- set_bit(BTRFS_INODE_READDIO_NEED_LOCK, &inode->runtime_flags);
334
- smp_mb();
335
-}
336
-
337
-static inline void btrfs_inode_resume_unlocked_dio(struct btrfs_inode *inode)
338
-{
339
- smp_mb__before_atomic();
340
- clear_bit(BTRFS_INODE_READDIO_NEED_LOCK, &inode->runtime_flags);
341
-}
367
+/* Array of bytes with variable length, hexadecimal format 0x1234 */
368
+#define CSUM_FMT "0x%*phN"
369
+#define CSUM_FMT_VALUE(size, bytes) size, bytes
342370
343371 static inline void btrfs_print_data_csum_error(struct btrfs_inode *inode,
344
- u64 logical_start, u32 csum, u32 csum_expected, int mirror_num)
372
+ u64 logical_start, u8 *csum, u8 *csum_expected, int mirror_num)
345373 {
346374 struct btrfs_root *root = inode->root;
375
+ struct btrfs_super_block *sb = root->fs_info->super_copy;
376
+ const u16 csum_size = btrfs_super_csum_size(sb);
347377
348378 /* Output minus objectid, which is more meaningful */
349
- if (root->objectid >= BTRFS_LAST_FREE_OBJECTID)
379
+ if (root->root_key.objectid >= BTRFS_LAST_FREE_OBJECTID)
350380 btrfs_warn_rl(root->fs_info,
351
- "csum failed root %lld ino %lld off %llu csum 0x%08x expected csum 0x%08x mirror %d",
352
- root->objectid, btrfs_ino(inode),
353
- logical_start, csum, csum_expected, mirror_num);
381
+"csum failed root %lld ino %lld off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d",
382
+ root->root_key.objectid, btrfs_ino(inode),
383
+ logical_start,
384
+ CSUM_FMT_VALUE(csum_size, csum),
385
+ CSUM_FMT_VALUE(csum_size, csum_expected),
386
+ mirror_num);
354387 else
355388 btrfs_warn_rl(root->fs_info,
356
- "csum failed root %llu ino %llu off %llu csum 0x%08x expected csum 0x%08x mirror %d",
357
- root->objectid, btrfs_ino(inode),
358
- logical_start, csum, csum_expected, mirror_num);
389
+"csum failed root %llu ino %llu off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d",
390
+ root->root_key.objectid, btrfs_ino(inode),
391
+ logical_start,
392
+ CSUM_FMT_VALUE(csum_size, csum),
393
+ CSUM_FMT_VALUE(csum_size, csum_expected),
394
+ mirror_num);
359395 }
360396
361397 #endif