.. | .. |
---|
7 | 7 | #define BTRFS_INODE_H |
---|
8 | 8 | |
---|
9 | 9 | #include <linux/hash.h> |
---|
| 10 | +#include <linux/refcount.h> |
---|
10 | 11 | #include "extent_map.h" |
---|
11 | 12 | #include "extent_io.h" |
---|
12 | 13 | #include "ordered-data.h" |
---|
.. | .. |
---|
20 | 21 | * new data the application may have written before commit. |
---|
21 | 22 | */ |
---|
22 | 23 | enum { |
---|
23 | | - BTRFS_INODE_ORDERED_DATA_CLOSE = 0, |
---|
| 24 | + BTRFS_INODE_FLUSH_ON_CLOSE, |
---|
24 | 25 | BTRFS_INODE_DUMMY, |
---|
25 | 26 | BTRFS_INODE_IN_DEFRAG, |
---|
26 | 27 | BTRFS_INODE_HAS_ASYNC_EXTENT, |
---|
| 28 | + /* |
---|
| 29 | + * Always set under the VFS' inode lock, otherwise it can cause races |
---|
| 30 | + * during fsync (we start as a fast fsync and then end up in a full |
---|
| 31 | + * fsync racing with ordered extent completion). |
---|
| 32 | + */ |
---|
27 | 33 | BTRFS_INODE_NEEDS_FULL_SYNC, |
---|
28 | 34 | BTRFS_INODE_COPY_EVERYTHING, |
---|
29 | 35 | BTRFS_INODE_IN_DELALLOC_LIST, |
---|
30 | | - BTRFS_INODE_READDIO_NEED_LOCK, |
---|
31 | 36 | BTRFS_INODE_HAS_PROPS, |
---|
32 | 37 | BTRFS_INODE_SNAPSHOT_FLUSH, |
---|
| 38 | + /* |
---|
| 39 | + * Set and used when logging an inode and it serves to signal that an |
---|
| 40 | + * inode does not have xattrs, so subsequent fsyncs can avoid searching |
---|
| 41 | + * for xattrs to log. This bit must be cleared whenever a xattr is added |
---|
| 42 | + * to an inode. |
---|
| 43 | + */ |
---|
| 44 | + BTRFS_INODE_NO_XATTRS, |
---|
| 45 | + /* |
---|
| 46 | + * Set when we are in a context where we need to start a transaction and |
---|
| 47 | + * have dirty pages with the respective file range locked. This is to |
---|
| 48 | + * ensure that when reserving space for the transaction, if we are low |
---|
| 49 | + * on available space and need to flush delalloc, we will not flush |
---|
| 50 | + * delalloc for this inode, because that could result in a deadlock (on |
---|
| 51 | + * the file range, inode's io_tree). |
---|
| 52 | + */ |
---|
| 53 | + BTRFS_INODE_NO_DELALLOC_FLUSH, |
---|
33 | 54 | }; |
---|
34 | 55 | |
---|
35 | 56 | /* in memory btrfs inode */ |
---|
.. | .. |
---|
60 | 81 | */ |
---|
61 | 82 | struct extent_io_tree io_failure_tree; |
---|
62 | 83 | |
---|
| 84 | + /* |
---|
| 85 | + * Keep track of where the inode has extent items mapped in order to |
---|
| 86 | + * make sure the i_size adjustments are accurate |
---|
| 87 | + */ |
---|
| 88 | + struct extent_io_tree file_extent_tree; |
---|
| 89 | + |
---|
63 | 90 | /* held while logging the inode in tree-log.c */ |
---|
64 | 91 | struct mutex log_mutex; |
---|
65 | | - |
---|
66 | | - /* held while doing delalloc reservations */ |
---|
67 | | - struct mutex delalloc_mutex; |
---|
68 | 92 | |
---|
69 | 93 | /* used to order data wrt metadata */ |
---|
70 | 94 | struct btrfs_ordered_inode_tree ordered_tree; |
---|
.. | .. |
---|
148 | 172 | u64 last_unlink_trans; |
---|
149 | 173 | |
---|
150 | 174 | /* |
---|
151 | | - * Track the transaction id of the last transaction used to create a |
---|
152 | | - * hard link for the inode. This is used by the log tree (fsync). |
---|
| 175 | + * The id/generation of the last transaction where this inode was |
---|
| 176 | + * either the source or the destination of a clone/dedupe operation. |
---|
| 177 | + * Used when logging an inode to know if there are shared extents that |
---|
| 178 | + * need special care when logging checksum items, to avoid duplicate |
---|
| 179 | + * checksum items in a log (which can lead to a corruption where we end |
---|
| 180 | + * up with missing checksum ranges after log replay). |
---|
| 181 | + * Protected by the vfs inode lock. |
---|
153 | 182 | */ |
---|
154 | | - u64 last_link_trans; |
---|
| 183 | + u64 last_reflink_trans; |
---|
155 | 184 | |
---|
156 | 185 | /* |
---|
157 | 186 | * Number of bytes outstanding that are going to need csums. This is |
---|
.. | .. |
---|
203 | 232 | struct inode vfs_inode; |
---|
204 | 233 | }; |
---|
205 | 234 | |
---|
206 | | -extern unsigned char btrfs_filetype_table[]; |
---|
| 235 | +static inline u32 btrfs_inode_sectorsize(const struct btrfs_inode *inode) |
---|
| 236 | +{ |
---|
| 237 | + return inode->root->fs_info->sectorsize; |
---|
| 238 | +} |
---|
207 | 239 | |
---|
208 | 240 | static inline struct btrfs_inode *BTRFS_I(const struct inode *inode) |
---|
209 | 241 | { |
---|
.. | .. |
---|
213 | 245 | static inline unsigned long btrfs_inode_hash(u64 objectid, |
---|
214 | 246 | const struct btrfs_root *root) |
---|
215 | 247 | { |
---|
216 | | - u64 h = objectid ^ (root->objectid * GOLDEN_RATIO_PRIME); |
---|
| 248 | + u64 h = objectid ^ (root->root_key.objectid * GOLDEN_RATIO_PRIME); |
---|
217 | 249 | |
---|
218 | 250 | #if BITS_PER_LONG == 32 |
---|
219 | 251 | h = (h >> 32) ^ (h & 0xffffffff); |
---|
.. | .. |
---|
260 | 292 | return false; |
---|
261 | 293 | } |
---|
262 | 294 | |
---|
| 295 | +static inline bool is_data_inode(struct inode *inode) |
---|
| 296 | +{ |
---|
| 297 | + return btrfs_ino(BTRFS_I(inode)) != BTRFS_BTREE_INODE_OBJECTID; |
---|
| 298 | +} |
---|
| 299 | + |
---|
263 | 300 | static inline void btrfs_mod_outstanding_extents(struct btrfs_inode *inode, |
---|
264 | 301 | int mod) |
---|
265 | 302 | { |
---|
.. | .. |
---|
269 | 306 | return; |
---|
270 | 307 | trace_btrfs_inode_mod_outstanding_extents(inode->root, btrfs_ino(inode), |
---|
271 | 308 | mod); |
---|
| 309 | +} |
---|
| 310 | + |
---|
| 311 | +/* |
---|
| 312 | + * Called every time after doing a buffered, direct IO or memory mapped write. |
---|
| 313 | + * |
---|
| 314 | + * This is to ensure that if we write to a file that was previously fsynced in |
---|
| 315 | + * the current transaction, then try to fsync it again in the same transaction, |
---|
| 316 | + * we will know that there were changes in the file and that it needs to be |
---|
| 317 | + * logged. |
---|
| 318 | + */ |
---|
| 319 | +static inline void btrfs_set_inode_last_sub_trans(struct btrfs_inode *inode) |
---|
| 320 | +{ |
---|
| 321 | + spin_lock(&inode->lock); |
---|
| 322 | + inode->last_sub_trans = inode->root->log_transid; |
---|
| 323 | + spin_unlock(&inode->lock); |
---|
272 | 324 | } |
---|
273 | 325 | |
---|
274 | 326 | static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation) |
---|
.. | .. |
---|
293 | 345 | return ret; |
---|
294 | 346 | } |
---|
295 | 347 | |
---|
296 | | -#define BTRFS_DIO_ORIG_BIO_SUBMITTED 0x1 |
---|
297 | | - |
---|
298 | 348 | struct btrfs_dio_private { |
---|
299 | 349 | struct inode *inode; |
---|
300 | | - unsigned long flags; |
---|
301 | 350 | u64 logical_offset; |
---|
302 | 351 | u64 disk_bytenr; |
---|
303 | 352 | u64 bytes; |
---|
304 | | - void *private; |
---|
305 | 353 | |
---|
306 | | - /* number of bios pending for this dio */ |
---|
307 | | - atomic_t pending_bios; |
---|
308 | | - |
---|
309 | | - /* IO errors */ |
---|
310 | | - int errors; |
---|
311 | | - |
---|
312 | | - /* orig_bio is our btrfs_io_bio */ |
---|
313 | | - struct bio *orig_bio; |
---|
| 354 | + /* |
---|
| 355 | + * References to this structure. There is one reference per in-flight |
---|
| 356 | + * bio plus one while we're still setting up. |
---|
| 357 | + */ |
---|
| 358 | + refcount_t refs; |
---|
314 | 359 | |
---|
315 | 360 | /* dio_bio came from fs/direct-io.c */ |
---|
316 | 361 | struct bio *dio_bio; |
---|
317 | 362 | |
---|
318 | | - /* |
---|
319 | | - * The original bio may be split to several sub-bios, this is |
---|
320 | | - * done during endio of sub-bios |
---|
321 | | - */ |
---|
322 | | - blk_status_t (*subio_endio)(struct inode *, struct btrfs_io_bio *, |
---|
323 | | - blk_status_t); |
---|
| 363 | + /* Array of checksums */ |
---|
| 364 | + u8 csums[]; |
---|
324 | 365 | }; |
---|
325 | 366 | |
---|
326 | | -/* |
---|
327 | | - * Disable DIO read nolock optimization, so new dio readers will be forced |
---|
328 | | - * to grab i_mutex. It is used to avoid the endless truncate due to |
---|
329 | | - * nonlocked dio read. |
---|
330 | | - */ |
---|
331 | | -static inline void btrfs_inode_block_unlocked_dio(struct btrfs_inode *inode) |
---|
332 | | -{ |
---|
333 | | - set_bit(BTRFS_INODE_READDIO_NEED_LOCK, &inode->runtime_flags); |
---|
334 | | - smp_mb(); |
---|
335 | | -} |
---|
336 | | - |
---|
337 | | -static inline void btrfs_inode_resume_unlocked_dio(struct btrfs_inode *inode) |
---|
338 | | -{ |
---|
339 | | - smp_mb__before_atomic(); |
---|
340 | | - clear_bit(BTRFS_INODE_READDIO_NEED_LOCK, &inode->runtime_flags); |
---|
341 | | -} |
---|
| 367 | +/* Array of bytes with variable length, hexadecimal format 0x1234 */ |
---|
| 368 | +#define CSUM_FMT "0x%*phN" |
---|
| 369 | +#define CSUM_FMT_VALUE(size, bytes) size, bytes |
---|
342 | 370 | |
---|
343 | 371 | static inline void btrfs_print_data_csum_error(struct btrfs_inode *inode, |
---|
344 | | - u64 logical_start, u32 csum, u32 csum_expected, int mirror_num) |
---|
| 372 | + u64 logical_start, u8 *csum, u8 *csum_expected, int mirror_num) |
---|
345 | 373 | { |
---|
346 | 374 | struct btrfs_root *root = inode->root; |
---|
| 375 | + struct btrfs_super_block *sb = root->fs_info->super_copy; |
---|
| 376 | + const u16 csum_size = btrfs_super_csum_size(sb); |
---|
347 | 377 | |
---|
348 | 378 | /* Output minus objectid, which is more meaningful */ |
---|
349 | | - if (root->objectid >= BTRFS_LAST_FREE_OBJECTID) |
---|
| 379 | + if (root->root_key.objectid >= BTRFS_LAST_FREE_OBJECTID) |
---|
350 | 380 | btrfs_warn_rl(root->fs_info, |
---|
351 | | - "csum failed root %lld ino %lld off %llu csum 0x%08x expected csum 0x%08x mirror %d", |
---|
352 | | - root->objectid, btrfs_ino(inode), |
---|
353 | | - logical_start, csum, csum_expected, mirror_num); |
---|
| 381 | +"csum failed root %lld ino %lld off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d", |
---|
| 382 | + root->root_key.objectid, btrfs_ino(inode), |
---|
| 383 | + logical_start, |
---|
| 384 | + CSUM_FMT_VALUE(csum_size, csum), |
---|
| 385 | + CSUM_FMT_VALUE(csum_size, csum_expected), |
---|
| 386 | + mirror_num); |
---|
354 | 387 | else |
---|
355 | 388 | btrfs_warn_rl(root->fs_info, |
---|
356 | | - "csum failed root %llu ino %llu off %llu csum 0x%08x expected csum 0x%08x mirror %d", |
---|
357 | | - root->objectid, btrfs_ino(inode), |
---|
358 | | - logical_start, csum, csum_expected, mirror_num); |
---|
| 389 | +"csum failed root %llu ino %llu off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d", |
---|
| 390 | + root->root_key.objectid, btrfs_ino(inode), |
---|
| 391 | + logical_start, |
---|
| 392 | + CSUM_FMT_VALUE(csum_size, csum), |
---|
| 393 | + CSUM_FMT_VALUE(csum_size, csum_expected), |
---|
| 394 | + mirror_num); |
---|
359 | 395 | } |
---|
360 | 396 | |
---|
361 | 397 | #endif |
---|