| .. | .. |
|---|
| 7 | 7 | #define BTRFS_INODE_H |
|---|
| 8 | 8 | |
|---|
| 9 | 9 | #include <linux/hash.h> |
|---|
| 10 | +#include <linux/refcount.h> |
|---|
| 10 | 11 | #include "extent_map.h" |
|---|
| 11 | 12 | #include "extent_io.h" |
|---|
| 12 | 13 | #include "ordered-data.h" |
|---|
| .. | .. |
|---|
| 20 | 21 | * new data the application may have written before commit. |
|---|
| 21 | 22 | */ |
|---|
| 22 | 23 | enum { |
|---|
| 23 | | - BTRFS_INODE_ORDERED_DATA_CLOSE = 0, |
|---|
| 24 | + BTRFS_INODE_FLUSH_ON_CLOSE, |
|---|
| 24 | 25 | BTRFS_INODE_DUMMY, |
|---|
| 25 | 26 | BTRFS_INODE_IN_DEFRAG, |
|---|
| 26 | 27 | BTRFS_INODE_HAS_ASYNC_EXTENT, |
|---|
| 28 | + /* |
|---|
| 29 | + * Always set under the VFS' inode lock, otherwise it can cause races |
|---|
| 30 | + * during fsync (we start as a fast fsync and then end up in a full |
|---|
| 31 | + * fsync racing with ordered extent completion). |
|---|
| 32 | + */ |
|---|
| 27 | 33 | BTRFS_INODE_NEEDS_FULL_SYNC, |
|---|
| 28 | 34 | BTRFS_INODE_COPY_EVERYTHING, |
|---|
| 29 | 35 | BTRFS_INODE_IN_DELALLOC_LIST, |
|---|
| 30 | | - BTRFS_INODE_READDIO_NEED_LOCK, |
|---|
| 31 | 36 | BTRFS_INODE_HAS_PROPS, |
|---|
| 32 | 37 | BTRFS_INODE_SNAPSHOT_FLUSH, |
|---|
| 38 | + /* |
|---|
| 39 | + * Set and used when logging an inode and it serves to signal that an |
|---|
| 40 | + * inode does not have xattrs, so subsequent fsyncs can avoid searching |
|---|
| 41 | + * for xattrs to log. This bit must be cleared whenever a xattr is added |
|---|
| 42 | + * to an inode. |
|---|
| 43 | + */ |
|---|
| 44 | + BTRFS_INODE_NO_XATTRS, |
|---|
| 45 | + /* |
|---|
| 46 | + * Set when we are in a context where we need to start a transaction and |
|---|
| 47 | + * have dirty pages with the respective file range locked. This is to |
|---|
| 48 | + * ensure that when reserving space for the transaction, if we are low |
|---|
| 49 | + * on available space and need to flush delalloc, we will not flush |
|---|
| 50 | + * delalloc for this inode, because that could result in a deadlock (on |
|---|
| 51 | + * the file range, inode's io_tree). |
|---|
| 52 | + */ |
|---|
| 53 | + BTRFS_INODE_NO_DELALLOC_FLUSH, |
|---|
| 33 | 54 | }; |
|---|
| 34 | 55 | |
|---|
| 35 | 56 | /* in memory btrfs inode */ |
|---|
| .. | .. |
|---|
| 60 | 81 | */ |
|---|
| 61 | 82 | struct extent_io_tree io_failure_tree; |
|---|
| 62 | 83 | |
|---|
| 84 | + /* |
|---|
| 85 | + * Keep track of where the inode has extent items mapped in order to |
|---|
| 86 | + * make sure the i_size adjustments are accurate |
|---|
| 87 | + */ |
|---|
| 88 | + struct extent_io_tree file_extent_tree; |
|---|
| 89 | + |
|---|
| 63 | 90 | /* held while logging the inode in tree-log.c */ |
|---|
| 64 | 91 | struct mutex log_mutex; |
|---|
| 65 | | - |
|---|
| 66 | | - /* held while doing delalloc reservations */ |
|---|
| 67 | | - struct mutex delalloc_mutex; |
|---|
| 68 | 92 | |
|---|
| 69 | 93 | /* used to order data wrt metadata */ |
|---|
| 70 | 94 | struct btrfs_ordered_inode_tree ordered_tree; |
|---|
| .. | .. |
|---|
| 148 | 172 | u64 last_unlink_trans; |
|---|
| 149 | 173 | |
|---|
| 150 | 174 | /* |
|---|
| 151 | | - * Track the transaction id of the last transaction used to create a |
|---|
| 152 | | - * hard link for the inode. This is used by the log tree (fsync). |
|---|
| 175 | + * The id/generation of the last transaction where this inode was |
|---|
| 176 | + * either the source or the destination of a clone/dedupe operation. |
|---|
| 177 | + * Used when logging an inode to know if there are shared extents that |
|---|
| 178 | + * need special care when logging checksum items, to avoid duplicate |
|---|
| 179 | + * checksum items in a log (which can lead to a corruption where we end |
|---|
| 180 | + * up with missing checksum ranges after log replay). |
|---|
| 181 | + * Protected by the vfs inode lock. |
|---|
| 153 | 182 | */ |
|---|
| 154 | | - u64 last_link_trans; |
|---|
| 183 | + u64 last_reflink_trans; |
|---|
| 155 | 184 | |
|---|
| 156 | 185 | /* |
|---|
| 157 | 186 | * Number of bytes outstanding that are going to need csums. This is |
|---|
| .. | .. |
|---|
| 203 | 232 | struct inode vfs_inode; |
|---|
| 204 | 233 | }; |
|---|
| 205 | 234 | |
|---|
| 206 | | -extern unsigned char btrfs_filetype_table[]; |
|---|
| 235 | +static inline u32 btrfs_inode_sectorsize(const struct btrfs_inode *inode) |
|---|
| 236 | +{ |
|---|
| 237 | + return inode->root->fs_info->sectorsize; |
|---|
| 238 | +} |
|---|
| 207 | 239 | |
|---|
| 208 | 240 | static inline struct btrfs_inode *BTRFS_I(const struct inode *inode) |
|---|
| 209 | 241 | { |
|---|
| .. | .. |
|---|
| 213 | 245 | static inline unsigned long btrfs_inode_hash(u64 objectid, |
|---|
| 214 | 246 | const struct btrfs_root *root) |
|---|
| 215 | 247 | { |
|---|
| 216 | | - u64 h = objectid ^ (root->objectid * GOLDEN_RATIO_PRIME); |
|---|
| 248 | + u64 h = objectid ^ (root->root_key.objectid * GOLDEN_RATIO_PRIME); |
|---|
| 217 | 249 | |
|---|
| 218 | 250 | #if BITS_PER_LONG == 32 |
|---|
| 219 | 251 | h = (h >> 32) ^ (h & 0xffffffff); |
|---|
| .. | .. |
|---|
| 260 | 292 | return false; |
|---|
| 261 | 293 | } |
|---|
| 262 | 294 | |
|---|
| 295 | +static inline bool is_data_inode(struct inode *inode) |
|---|
| 296 | +{ |
|---|
| 297 | + return btrfs_ino(BTRFS_I(inode)) != BTRFS_BTREE_INODE_OBJECTID; |
|---|
| 298 | +} |
|---|
| 299 | + |
|---|
| 263 | 300 | static inline void btrfs_mod_outstanding_extents(struct btrfs_inode *inode, |
|---|
| 264 | 301 | int mod) |
|---|
| 265 | 302 | { |
|---|
| .. | .. |
|---|
| 269 | 306 | return; |
|---|
| 270 | 307 | trace_btrfs_inode_mod_outstanding_extents(inode->root, btrfs_ino(inode), |
|---|
| 271 | 308 | mod); |
|---|
| 309 | +} |
|---|
| 310 | + |
|---|
| 311 | +/* |
|---|
| 312 | + * Called every time after doing a buffered, direct IO or memory mapped write. |
|---|
| 313 | + * |
|---|
| 314 | + * This is to ensure that if we write to a file that was previously fsynced in |
|---|
| 315 | + * the current transaction, then try to fsync it again in the same transaction, |
|---|
| 316 | + * we will know that there were changes in the file and that it needs to be |
|---|
| 317 | + * logged. |
|---|
| 318 | + */ |
|---|
| 319 | +static inline void btrfs_set_inode_last_sub_trans(struct btrfs_inode *inode) |
|---|
| 320 | +{ |
|---|
| 321 | + spin_lock(&inode->lock); |
|---|
| 322 | + inode->last_sub_trans = inode->root->log_transid; |
|---|
| 323 | + spin_unlock(&inode->lock); |
|---|
| 272 | 324 | } |
|---|
| 273 | 325 | |
|---|
| 274 | 326 | static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation) |
|---|
| .. | .. |
|---|
| 293 | 345 | return ret; |
|---|
| 294 | 346 | } |
|---|
| 295 | 347 | |
|---|
| 296 | | -#define BTRFS_DIO_ORIG_BIO_SUBMITTED 0x1 |
|---|
| 297 | | - |
|---|
| 298 | 348 | struct btrfs_dio_private { |
|---|
| 299 | 349 | struct inode *inode; |
|---|
| 300 | | - unsigned long flags; |
|---|
| 301 | 350 | u64 logical_offset; |
|---|
| 302 | 351 | u64 disk_bytenr; |
|---|
| 303 | 352 | u64 bytes; |
|---|
| 304 | | - void *private; |
|---|
| 305 | 353 | |
|---|
| 306 | | - /* number of bios pending for this dio */ |
|---|
| 307 | | - atomic_t pending_bios; |
|---|
| 308 | | - |
|---|
| 309 | | - /* IO errors */ |
|---|
| 310 | | - int errors; |
|---|
| 311 | | - |
|---|
| 312 | | - /* orig_bio is our btrfs_io_bio */ |
|---|
| 313 | | - struct bio *orig_bio; |
|---|
| 354 | + /* |
|---|
| 355 | + * References to this structure. There is one reference per in-flight |
|---|
| 356 | + * bio plus one while we're still setting up. |
|---|
| 357 | + */ |
|---|
| 358 | + refcount_t refs; |
|---|
| 314 | 359 | |
|---|
| 315 | 360 | /* dio_bio came from fs/direct-io.c */ |
|---|
| 316 | 361 | struct bio *dio_bio; |
|---|
| 317 | 362 | |
|---|
| 318 | | - /* |
|---|
| 319 | | - * The original bio may be split to several sub-bios, this is |
|---|
| 320 | | - * done during endio of sub-bios |
|---|
| 321 | | - */ |
|---|
| 322 | | - blk_status_t (*subio_endio)(struct inode *, struct btrfs_io_bio *, |
|---|
| 323 | | - blk_status_t); |
|---|
| 363 | + /* Array of checksums */ |
|---|
| 364 | + u8 csums[]; |
|---|
| 324 | 365 | }; |
|---|
| 325 | 366 | |
|---|
| 326 | | -/* |
|---|
| 327 | | - * Disable DIO read nolock optimization, so new dio readers will be forced |
|---|
| 328 | | - * to grab i_mutex. It is used to avoid the endless truncate due to |
|---|
| 329 | | - * nonlocked dio read. |
|---|
| 330 | | - */ |
|---|
| 331 | | -static inline void btrfs_inode_block_unlocked_dio(struct btrfs_inode *inode) |
|---|
| 332 | | -{ |
|---|
| 333 | | - set_bit(BTRFS_INODE_READDIO_NEED_LOCK, &inode->runtime_flags); |
|---|
| 334 | | - smp_mb(); |
|---|
| 335 | | -} |
|---|
| 336 | | - |
|---|
| 337 | | -static inline void btrfs_inode_resume_unlocked_dio(struct btrfs_inode *inode) |
|---|
| 338 | | -{ |
|---|
| 339 | | - smp_mb__before_atomic(); |
|---|
| 340 | | - clear_bit(BTRFS_INODE_READDIO_NEED_LOCK, &inode->runtime_flags); |
|---|
| 341 | | -} |
|---|
| 367 | +/* Array of bytes with variable length, hexadecimal format 0x1234 */ |
|---|
| 368 | +#define CSUM_FMT "0x%*phN" |
|---|
| 369 | +#define CSUM_FMT_VALUE(size, bytes) size, bytes |
|---|
| 342 | 370 | |
|---|
| 343 | 371 | static inline void btrfs_print_data_csum_error(struct btrfs_inode *inode, |
|---|
| 344 | | - u64 logical_start, u32 csum, u32 csum_expected, int mirror_num) |
|---|
| 372 | + u64 logical_start, u8 *csum, u8 *csum_expected, int mirror_num) |
|---|
| 345 | 373 | { |
|---|
| 346 | 374 | struct btrfs_root *root = inode->root; |
|---|
| 375 | + struct btrfs_super_block *sb = root->fs_info->super_copy; |
|---|
| 376 | + const u16 csum_size = btrfs_super_csum_size(sb); |
|---|
| 347 | 377 | |
|---|
| 348 | 378 | /* Output minus objectid, which is more meaningful */ |
|---|
| 349 | | - if (root->objectid >= BTRFS_LAST_FREE_OBJECTID) |
|---|
| 379 | + if (root->root_key.objectid >= BTRFS_LAST_FREE_OBJECTID) |
|---|
| 350 | 380 | btrfs_warn_rl(root->fs_info, |
|---|
| 351 | | - "csum failed root %lld ino %lld off %llu csum 0x%08x expected csum 0x%08x mirror %d", |
|---|
| 352 | | - root->objectid, btrfs_ino(inode), |
|---|
| 353 | | - logical_start, csum, csum_expected, mirror_num); |
|---|
| 381 | +"csum failed root %lld ino %lld off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d", |
|---|
| 382 | + root->root_key.objectid, btrfs_ino(inode), |
|---|
| 383 | + logical_start, |
|---|
| 384 | + CSUM_FMT_VALUE(csum_size, csum), |
|---|
| 385 | + CSUM_FMT_VALUE(csum_size, csum_expected), |
|---|
| 386 | + mirror_num); |
|---|
| 354 | 387 | else |
|---|
| 355 | 388 | btrfs_warn_rl(root->fs_info, |
|---|
| 356 | | - "csum failed root %llu ino %llu off %llu csum 0x%08x expected csum 0x%08x mirror %d", |
|---|
| 357 | | - root->objectid, btrfs_ino(inode), |
|---|
| 358 | | - logical_start, csum, csum_expected, mirror_num); |
|---|
| 389 | +"csum failed root %llu ino %llu off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d", |
|---|
| 390 | + root->root_key.objectid, btrfs_ino(inode), |
|---|
| 391 | + logical_start, |
|---|
| 392 | + CSUM_FMT_VALUE(csum_size, csum), |
|---|
| 393 | + CSUM_FMT_VALUE(csum_size, csum_expected), |
|---|
| 394 | + mirror_num); |
|---|
| 359 | 395 | } |
|---|
| 360 | 396 | |
|---|
| 361 | 397 | #endif |
|---|