| .. | .. |
|---|
| 28 | 28 | #include <linux/iversion.h> |
|---|
| 29 | 29 | #include "ctree.h" |
|---|
| 30 | 30 | #include "disk-io.h" |
|---|
| 31 | +#include "export.h" |
|---|
| 31 | 32 | #include "transaction.h" |
|---|
| 32 | 33 | #include "btrfs_inode.h" |
|---|
| 33 | 34 | #include "print-tree.h" |
|---|
| .. | .. |
|---|
| 43 | 44 | #include "qgroup.h" |
|---|
| 44 | 45 | #include "tree-log.h" |
|---|
| 45 | 46 | #include "compression.h" |
|---|
| 47 | +#include "space-info.h" |
|---|
| 48 | +#include "delalloc-space.h" |
|---|
| 49 | +#include "block-group.h" |
|---|
| 46 | 50 | |
|---|
| 47 | 51 | #ifdef CONFIG_64BIT |
|---|
| 48 | 52 | /* If we have a 32-bit userspace and 64-bit kernel, then the UAPI |
|---|
| .. | .. |
|---|
| 82 | 86 | #define BTRFS_IOC_SEND_32 _IOW(BTRFS_IOCTL_MAGIC, 38, \ |
|---|
| 83 | 87 | struct btrfs_ioctl_send_args_32) |
|---|
| 84 | 88 | #endif |
|---|
| 85 | | - |
|---|
| 86 | | -static int btrfs_clone(struct inode *src, struct inode *inode, |
|---|
| 87 | | - u64 off, u64 olen, u64 olen_aligned, u64 destoff, |
|---|
| 88 | | - int no_time_update); |
|---|
| 89 | 89 | |
|---|
| 90 | 90 | /* Mask out flags that are inappropriate for the given type of inode. */ |
|---|
| 91 | 91 | static unsigned int btrfs_mask_fsflags_for_type(struct inode *inode, |
|---|
| .. | .. |
|---|
| 164 | 164 | return 0; |
|---|
| 165 | 165 | } |
|---|
| 166 | 166 | |
|---|
| 167 | | -/* Check if @flags are a supported and valid set of FS_*_FL flags */ |
|---|
| 168 | | -static int check_fsflags(unsigned int flags) |
|---|
| 167 | +/* |
|---|
| 168 | + * Check if @flags are a supported and valid set of FS_*_FL flags and that |
|---|
| 169 | + * the old and new flags are not conflicting |
|---|
| 170 | + */ |
|---|
| 171 | +static int check_fsflags(unsigned int old_flags, unsigned int flags) |
|---|
| 169 | 172 | { |
|---|
| 170 | 173 | if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ |
|---|
| 171 | 174 | FS_NOATIME_FL | FS_NODUMP_FL | \ |
|---|
| .. | .. |
|---|
| 174 | 177 | FS_NOCOW_FL)) |
|---|
| 175 | 178 | return -EOPNOTSUPP; |
|---|
| 176 | 179 | |
|---|
| 180 | + /* COMPR and NOCOMP on new/old are valid */ |
|---|
| 177 | 181 | if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL)) |
|---|
| 182 | + return -EINVAL; |
|---|
| 183 | + |
|---|
| 184 | + if ((flags & FS_COMPR_FL) && (flags & FS_NOCOW_FL)) |
|---|
| 185 | + return -EINVAL; |
|---|
| 186 | + |
|---|
| 187 | + /* NOCOW and compression options are mutually exclusive */ |
|---|
| 188 | + if ((old_flags & FS_NOCOW_FL) && (flags & (FS_COMPR_FL | FS_NOCOMP_FL))) |
|---|
| 189 | + return -EINVAL; |
|---|
| 190 | + if ((flags & FS_NOCOW_FL) && (old_flags & (FS_COMPR_FL | FS_NOCOMP_FL))) |
|---|
| 178 | 191 | return -EINVAL; |
|---|
| 179 | 192 | |
|---|
| 180 | 193 | return 0; |
|---|
| .. | .. |
|---|
| 189 | 202 | struct btrfs_trans_handle *trans; |
|---|
| 190 | 203 | unsigned int fsflags, old_fsflags; |
|---|
| 191 | 204 | int ret; |
|---|
| 192 | | - u64 old_flags; |
|---|
| 193 | | - unsigned int old_i_flags; |
|---|
| 194 | | - umode_t mode; |
|---|
| 205 | + const char *comp = NULL; |
|---|
| 206 | + u32 binode_flags; |
|---|
| 195 | 207 | |
|---|
| 196 | 208 | if (!inode_owner_or_capable(inode)) |
|---|
| 197 | 209 | return -EPERM; |
|---|
| .. | .. |
|---|
| 202 | 214 | if (copy_from_user(&fsflags, arg, sizeof(fsflags))) |
|---|
| 203 | 215 | return -EFAULT; |
|---|
| 204 | 216 | |
|---|
| 205 | | - ret = check_fsflags(fsflags); |
|---|
| 206 | | - if (ret) |
|---|
| 207 | | - return ret; |
|---|
| 208 | | - |
|---|
| 209 | 217 | ret = mnt_want_write_file(file); |
|---|
| 210 | 218 | if (ret) |
|---|
| 211 | 219 | return ret; |
|---|
| 212 | 220 | |
|---|
| 213 | 221 | inode_lock(inode); |
|---|
| 214 | | - |
|---|
| 215 | | - old_flags = binode->flags; |
|---|
| 216 | | - old_i_flags = inode->i_flags; |
|---|
| 217 | | - mode = inode->i_mode; |
|---|
| 218 | | - |
|---|
| 219 | 222 | fsflags = btrfs_mask_fsflags_for_type(inode, fsflags); |
|---|
| 220 | 223 | old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags); |
|---|
| 221 | | - if ((fsflags ^ old_fsflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { |
|---|
| 222 | | - if (!capable(CAP_LINUX_IMMUTABLE)) { |
|---|
| 223 | | - ret = -EPERM; |
|---|
| 224 | | - goto out_unlock; |
|---|
| 225 | | - } |
|---|
| 226 | | - } |
|---|
| 227 | 224 | |
|---|
| 225 | + ret = vfs_ioc_setflags_prepare(inode, old_fsflags, fsflags); |
|---|
| 226 | + if (ret) |
|---|
| 227 | + goto out_unlock; |
|---|
| 228 | + |
|---|
| 229 | + ret = check_fsflags(old_fsflags, fsflags); |
|---|
| 230 | + if (ret) |
|---|
| 231 | + goto out_unlock; |
|---|
| 232 | + |
|---|
| 233 | + binode_flags = binode->flags; |
|---|
| 228 | 234 | if (fsflags & FS_SYNC_FL) |
|---|
| 229 | | - binode->flags |= BTRFS_INODE_SYNC; |
|---|
| 235 | + binode_flags |= BTRFS_INODE_SYNC; |
|---|
| 230 | 236 | else |
|---|
| 231 | | - binode->flags &= ~BTRFS_INODE_SYNC; |
|---|
| 237 | + binode_flags &= ~BTRFS_INODE_SYNC; |
|---|
| 232 | 238 | if (fsflags & FS_IMMUTABLE_FL) |
|---|
| 233 | | - binode->flags |= BTRFS_INODE_IMMUTABLE; |
|---|
| 239 | + binode_flags |= BTRFS_INODE_IMMUTABLE; |
|---|
| 234 | 240 | else |
|---|
| 235 | | - binode->flags &= ~BTRFS_INODE_IMMUTABLE; |
|---|
| 241 | + binode_flags &= ~BTRFS_INODE_IMMUTABLE; |
|---|
| 236 | 242 | if (fsflags & FS_APPEND_FL) |
|---|
| 237 | | - binode->flags |= BTRFS_INODE_APPEND; |
|---|
| 243 | + binode_flags |= BTRFS_INODE_APPEND; |
|---|
| 238 | 244 | else |
|---|
| 239 | | - binode->flags &= ~BTRFS_INODE_APPEND; |
|---|
| 245 | + binode_flags &= ~BTRFS_INODE_APPEND; |
|---|
| 240 | 246 | if (fsflags & FS_NODUMP_FL) |
|---|
| 241 | | - binode->flags |= BTRFS_INODE_NODUMP; |
|---|
| 247 | + binode_flags |= BTRFS_INODE_NODUMP; |
|---|
| 242 | 248 | else |
|---|
| 243 | | - binode->flags &= ~BTRFS_INODE_NODUMP; |
|---|
| 249 | + binode_flags &= ~BTRFS_INODE_NODUMP; |
|---|
| 244 | 250 | if (fsflags & FS_NOATIME_FL) |
|---|
| 245 | | - binode->flags |= BTRFS_INODE_NOATIME; |
|---|
| 251 | + binode_flags |= BTRFS_INODE_NOATIME; |
|---|
| 246 | 252 | else |
|---|
| 247 | | - binode->flags &= ~BTRFS_INODE_NOATIME; |
|---|
| 253 | + binode_flags &= ~BTRFS_INODE_NOATIME; |
|---|
| 248 | 254 | if (fsflags & FS_DIRSYNC_FL) |
|---|
| 249 | | - binode->flags |= BTRFS_INODE_DIRSYNC; |
|---|
| 255 | + binode_flags |= BTRFS_INODE_DIRSYNC; |
|---|
| 250 | 256 | else |
|---|
| 251 | | - binode->flags &= ~BTRFS_INODE_DIRSYNC; |
|---|
| 257 | + binode_flags &= ~BTRFS_INODE_DIRSYNC; |
|---|
| 252 | 258 | if (fsflags & FS_NOCOW_FL) { |
|---|
| 253 | | - if (S_ISREG(mode)) { |
|---|
| 259 | + if (S_ISREG(inode->i_mode)) { |
|---|
| 254 | 260 | /* |
|---|
| 255 | 261 | * It's safe to turn csums off here, no extents exist. |
|---|
| 256 | 262 | * Otherwise we want the flag to reflect the real COW |
|---|
| 257 | 263 | * status of the file and will not set it. |
|---|
| 258 | 264 | */ |
|---|
| 259 | 265 | if (inode->i_size == 0) |
|---|
| 260 | | - binode->flags |= BTRFS_INODE_NODATACOW |
|---|
| 261 | | - | BTRFS_INODE_NODATASUM; |
|---|
| 266 | + binode_flags |= BTRFS_INODE_NODATACOW | |
|---|
| 267 | + BTRFS_INODE_NODATASUM; |
|---|
| 262 | 268 | } else { |
|---|
| 263 | | - binode->flags |= BTRFS_INODE_NODATACOW; |
|---|
| 269 | + binode_flags |= BTRFS_INODE_NODATACOW; |
|---|
| 264 | 270 | } |
|---|
| 265 | 271 | } else { |
|---|
| 266 | 272 | /* |
|---|
| 267 | 273 | * Revert back under same assumptions as above |
|---|
| 268 | 274 | */ |
|---|
| 269 | | - if (S_ISREG(mode)) { |
|---|
| 275 | + if (S_ISREG(inode->i_mode)) { |
|---|
| 270 | 276 | if (inode->i_size == 0) |
|---|
| 271 | | - binode->flags &= ~(BTRFS_INODE_NODATACOW |
|---|
| 272 | | - | BTRFS_INODE_NODATASUM); |
|---|
| 277 | + binode_flags &= ~(BTRFS_INODE_NODATACOW | |
|---|
| 278 | + BTRFS_INODE_NODATASUM); |
|---|
| 273 | 279 | } else { |
|---|
| 274 | | - binode->flags &= ~BTRFS_INODE_NODATACOW; |
|---|
| 280 | + binode_flags &= ~BTRFS_INODE_NODATACOW; |
|---|
| 275 | 281 | } |
|---|
| 276 | 282 | } |
|---|
| 277 | 283 | |
|---|
| .. | .. |
|---|
| 281 | 287 | * things smaller. |
|---|
| 282 | 288 | */ |
|---|
| 283 | 289 | if (fsflags & FS_NOCOMP_FL) { |
|---|
| 284 | | - binode->flags &= ~BTRFS_INODE_COMPRESS; |
|---|
| 285 | | - binode->flags |= BTRFS_INODE_NOCOMPRESS; |
|---|
| 286 | | - |
|---|
| 287 | | - ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0); |
|---|
| 288 | | - if (ret && ret != -ENODATA) |
|---|
| 289 | | - goto out_drop; |
|---|
| 290 | + binode_flags &= ~BTRFS_INODE_COMPRESS; |
|---|
| 291 | + binode_flags |= BTRFS_INODE_NOCOMPRESS; |
|---|
| 290 | 292 | } else if (fsflags & FS_COMPR_FL) { |
|---|
| 291 | | - const char *comp; |
|---|
| 292 | 293 | |
|---|
| 293 | | - binode->flags |= BTRFS_INODE_COMPRESS; |
|---|
| 294 | | - binode->flags &= ~BTRFS_INODE_NOCOMPRESS; |
|---|
| 294 | + if (IS_SWAPFILE(inode)) { |
|---|
| 295 | + ret = -ETXTBSY; |
|---|
| 296 | + goto out_unlock; |
|---|
| 297 | + } |
|---|
| 298 | + |
|---|
| 299 | + binode_flags |= BTRFS_INODE_COMPRESS; |
|---|
| 300 | + binode_flags &= ~BTRFS_INODE_NOCOMPRESS; |
|---|
| 295 | 301 | |
|---|
| 296 | 302 | comp = btrfs_compress_type2str(fs_info->compress_type); |
|---|
| 297 | 303 | if (!comp || comp[0] == 0) |
|---|
| 298 | 304 | comp = btrfs_compress_type2str(BTRFS_COMPRESS_ZLIB); |
|---|
| 299 | | - |
|---|
| 300 | | - ret = btrfs_set_prop(inode, "btrfs.compression", |
|---|
| 301 | | - comp, strlen(comp), 0); |
|---|
| 302 | | - if (ret) |
|---|
| 303 | | - goto out_drop; |
|---|
| 304 | | - |
|---|
| 305 | 305 | } else { |
|---|
| 306 | | - ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0); |
|---|
| 307 | | - if (ret && ret != -ENODATA) |
|---|
| 308 | | - goto out_drop; |
|---|
| 309 | | - binode->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); |
|---|
| 306 | + binode_flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); |
|---|
| 310 | 307 | } |
|---|
| 311 | 308 | |
|---|
| 312 | | - trans = btrfs_start_transaction(root, 1); |
|---|
| 309 | + /* |
|---|
| 310 | + * 1 for inode item |
|---|
| 311 | + * 2 for properties |
|---|
| 312 | + */ |
|---|
| 313 | + trans = btrfs_start_transaction(root, 3); |
|---|
| 313 | 314 | if (IS_ERR(trans)) { |
|---|
| 314 | 315 | ret = PTR_ERR(trans); |
|---|
| 315 | | - goto out_drop; |
|---|
| 316 | + goto out_unlock; |
|---|
| 316 | 317 | } |
|---|
| 317 | 318 | |
|---|
| 319 | + if (comp) { |
|---|
| 320 | + ret = btrfs_set_prop(trans, inode, "btrfs.compression", comp, |
|---|
| 321 | + strlen(comp), 0); |
|---|
| 322 | + if (ret) { |
|---|
| 323 | + btrfs_abort_transaction(trans, ret); |
|---|
| 324 | + goto out_end_trans; |
|---|
| 325 | + } |
|---|
| 326 | + } else { |
|---|
| 327 | + ret = btrfs_set_prop(trans, inode, "btrfs.compression", NULL, |
|---|
| 328 | + 0, 0); |
|---|
| 329 | + if (ret && ret != -ENODATA) { |
|---|
| 330 | + btrfs_abort_transaction(trans, ret); |
|---|
| 331 | + goto out_end_trans; |
|---|
| 332 | + } |
|---|
| 333 | + } |
|---|
| 334 | + |
|---|
| 335 | + binode->flags = binode_flags; |
|---|
| 318 | 336 | btrfs_sync_inode_flags_to_i_flags(inode); |
|---|
| 319 | 337 | inode_inc_iversion(inode); |
|---|
| 320 | 338 | inode->i_ctime = current_time(inode); |
|---|
| 321 | 339 | ret = btrfs_update_inode(trans, root, inode); |
|---|
| 322 | 340 | |
|---|
| 341 | + out_end_trans: |
|---|
| 323 | 342 | btrfs_end_transaction(trans); |
|---|
| 324 | | - out_drop: |
|---|
| 325 | | - if (ret) { |
|---|
| 326 | | - binode->flags = old_flags; |
|---|
| 327 | | - inode->i_flags = old_i_flags; |
|---|
| 328 | | - } |
|---|
| 329 | | - |
|---|
| 330 | 343 | out_unlock: |
|---|
| 331 | 344 | inode_unlock(inode); |
|---|
| 332 | 345 | mnt_drop_write_file(file); |
|---|
| .. | .. |
|---|
| 365 | 378 | return 0; |
|---|
| 366 | 379 | } |
|---|
| 367 | 380 | |
|---|
| 381 | +bool btrfs_exclop_start(struct btrfs_fs_info *fs_info, |
|---|
| 382 | + enum btrfs_exclusive_operation type) |
|---|
| 383 | +{ |
|---|
| 384 | + return !cmpxchg(&fs_info->exclusive_operation, BTRFS_EXCLOP_NONE, type); |
|---|
| 385 | +} |
|---|
| 386 | + |
|---|
| 387 | +void btrfs_exclop_finish(struct btrfs_fs_info *fs_info) |
|---|
| 388 | +{ |
|---|
| 389 | + WRITE_ONCE(fs_info->exclusive_operation, BTRFS_EXCLOP_NONE); |
|---|
| 390 | + sysfs_notify(&fs_info->fs_devices->fsid_kobj, NULL, "exclusive_operation"); |
|---|
| 391 | +} |
|---|
| 392 | + |
|---|
| 368 | 393 | /* |
|---|
| 369 | 394 | * Set the xflags from the internal inode flags. The remaining items of fsxattr |
|---|
| 370 | 395 | * are zeroed. |
|---|
| .. | .. |
|---|
| 374 | 399 | struct btrfs_inode *binode = BTRFS_I(file_inode(file)); |
|---|
| 375 | 400 | struct fsxattr fa; |
|---|
| 376 | 401 | |
|---|
| 377 | | - memset(&fa, 0, sizeof(fa)); |
|---|
| 378 | | - fa.fsx_xflags = btrfs_inode_flags_to_xflags(binode->flags); |
|---|
| 379 | | - |
|---|
| 402 | + simple_fill_fsxattr(&fa, btrfs_inode_flags_to_xflags(binode->flags)); |
|---|
| 380 | 403 | if (copy_to_user(arg, &fa, sizeof(fa))) |
|---|
| 381 | 404 | return -EFAULT; |
|---|
| 382 | 405 | |
|---|
| .. | .. |
|---|
| 389 | 412 | struct btrfs_inode *binode = BTRFS_I(inode); |
|---|
| 390 | 413 | struct btrfs_root *root = binode->root; |
|---|
| 391 | 414 | struct btrfs_trans_handle *trans; |
|---|
| 392 | | - struct fsxattr fa; |
|---|
| 415 | + struct fsxattr fa, old_fa; |
|---|
| 393 | 416 | unsigned old_flags; |
|---|
| 394 | 417 | unsigned old_i_flags; |
|---|
| 395 | 418 | int ret = 0; |
|---|
| .. | .. |
|---|
| 400 | 423 | if (btrfs_root_readonly(root)) |
|---|
| 401 | 424 | return -EROFS; |
|---|
| 402 | 425 | |
|---|
| 403 | | - memset(&fa, 0, sizeof(fa)); |
|---|
| 404 | 426 | if (copy_from_user(&fa, arg, sizeof(fa))) |
|---|
| 405 | 427 | return -EFAULT; |
|---|
| 406 | 428 | |
|---|
| .. | .. |
|---|
| 420 | 442 | old_flags = binode->flags; |
|---|
| 421 | 443 | old_i_flags = inode->i_flags; |
|---|
| 422 | 444 | |
|---|
| 423 | | - /* We need the capabilities to change append-only or immutable inode */ |
|---|
| 424 | | - if (((old_flags & (BTRFS_INODE_APPEND | BTRFS_INODE_IMMUTABLE)) || |
|---|
| 425 | | - (fa.fsx_xflags & (FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE))) && |
|---|
| 426 | | - !capable(CAP_LINUX_IMMUTABLE)) { |
|---|
| 427 | | - ret = -EPERM; |
|---|
| 445 | + simple_fill_fsxattr(&old_fa, |
|---|
| 446 | + btrfs_inode_flags_to_xflags(binode->flags)); |
|---|
| 447 | + ret = vfs_ioc_fssetxattr_check(inode, &old_fa, &fa); |
|---|
| 448 | + if (ret) |
|---|
| 428 | 449 | goto out_unlock; |
|---|
| 429 | | - } |
|---|
| 430 | 450 | |
|---|
| 431 | 451 | if (fa.fsx_xflags & FS_XFLAG_SYNC) |
|---|
| 432 | 452 | binode->flags |= BTRFS_INODE_SYNC; |
|---|
| .. | .. |
|---|
| 482 | 502 | return put_user(inode->i_generation, arg); |
|---|
| 483 | 503 | } |
|---|
| 484 | 504 | |
|---|
| 485 | | -static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) |
|---|
| 505 | +static noinline int btrfs_ioctl_fitrim(struct btrfs_fs_info *fs_info, |
|---|
| 506 | + void __user *arg) |
|---|
| 486 | 507 | { |
|---|
| 487 | | - struct inode *inode = file_inode(file); |
|---|
| 488 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
|---|
| 489 | 508 | struct btrfs_device *device; |
|---|
| 490 | 509 | struct request_queue *q; |
|---|
| 491 | 510 | struct fstrim_range range; |
|---|
| .. | .. |
|---|
| 544 | 563 | return 0; |
|---|
| 545 | 564 | } |
|---|
| 546 | 565 | |
|---|
| 547 | | -int btrfs_is_empty_uuid(u8 *uuid) |
|---|
| 566 | +int __pure btrfs_is_empty_uuid(u8 *uuid) |
|---|
| 548 | 567 | { |
|---|
| 549 | 568 | int i; |
|---|
| 550 | 569 | |
|---|
| .. | .. |
|---|
| 558 | 577 | static noinline int create_subvol(struct inode *dir, |
|---|
| 559 | 578 | struct dentry *dentry, |
|---|
| 560 | 579 | const char *name, int namelen, |
|---|
| 561 | | - u64 *async_transid, |
|---|
| 562 | 580 | struct btrfs_qgroup_inherit *inherit) |
|---|
| 563 | 581 | { |
|---|
| 564 | 582 | struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb); |
|---|
| .. | .. |
|---|
| 574 | 592 | struct inode *inode; |
|---|
| 575 | 593 | int ret; |
|---|
| 576 | 594 | int err; |
|---|
| 595 | + dev_t anon_dev = 0; |
|---|
| 577 | 596 | u64 objectid; |
|---|
| 578 | 597 | u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; |
|---|
| 579 | 598 | u64 index = 0; |
|---|
| 580 | | - uuid_le new_uuid; |
|---|
| 581 | 599 | |
|---|
| 582 | 600 | root_item = kzalloc(sizeof(*root_item), GFP_KERNEL); |
|---|
| 583 | 601 | if (!root_item) |
|---|
| .. | .. |
|---|
| 585 | 603 | |
|---|
| 586 | 604 | ret = btrfs_find_free_objectid(fs_info->tree_root, &objectid); |
|---|
| 587 | 605 | if (ret) |
|---|
| 606 | + goto fail_free; |
|---|
| 607 | + |
|---|
| 608 | + ret = get_anon_bdev(&anon_dev); |
|---|
| 609 | + if (ret < 0) |
|---|
| 588 | 610 | goto fail_free; |
|---|
| 589 | 611 | |
|---|
| 590 | 612 | /* |
|---|
| .. | .. |
|---|
| 608 | 630 | trans = btrfs_start_transaction(root, 0); |
|---|
| 609 | 631 | if (IS_ERR(trans)) { |
|---|
| 610 | 632 | ret = PTR_ERR(trans); |
|---|
| 611 | | - btrfs_subvolume_release_metadata(fs_info, &block_rsv); |
|---|
| 633 | + btrfs_subvolume_release_metadata(root, &block_rsv); |
|---|
| 612 | 634 | goto fail_free; |
|---|
| 613 | 635 | } |
|---|
| 614 | 636 | trans->block_rsv = &block_rsv; |
|---|
| .. | .. |
|---|
| 618 | 640 | if (ret) |
|---|
| 619 | 641 | goto fail; |
|---|
| 620 | 642 | |
|---|
| 621 | | - leaf = btrfs_alloc_tree_block(trans, root, 0, objectid, NULL, 0, 0, 0); |
|---|
| 643 | + leaf = btrfs_alloc_tree_block(trans, root, 0, objectid, NULL, 0, 0, 0, |
|---|
| 644 | + BTRFS_NESTING_NORMAL); |
|---|
| 622 | 645 | if (IS_ERR(leaf)) { |
|---|
| 623 | 646 | ret = PTR_ERR(leaf); |
|---|
| 624 | 647 | goto fail; |
|---|
| .. | .. |
|---|
| 647 | 670 | |
|---|
| 648 | 671 | btrfs_set_root_generation_v2(root_item, |
|---|
| 649 | 672 | btrfs_root_generation(root_item)); |
|---|
| 650 | | - uuid_le_gen(&new_uuid); |
|---|
| 651 | | - memcpy(root_item->uuid, new_uuid.b, BTRFS_UUID_SIZE); |
|---|
| 673 | + generate_random_guid(root_item->uuid); |
|---|
| 652 | 674 | btrfs_set_stack_timespec_sec(&root_item->otime, cur_time.tv_sec); |
|---|
| 653 | 675 | btrfs_set_stack_timespec_nsec(&root_item->otime, cur_time.tv_nsec); |
|---|
| 654 | 676 | root_item->ctime = root_item->otime; |
|---|
| .. | .. |
|---|
| 682 | 704 | leaf = NULL; |
|---|
| 683 | 705 | |
|---|
| 684 | 706 | key.offset = (u64)-1; |
|---|
| 685 | | - new_root = btrfs_read_fs_root_no_name(fs_info, &key); |
|---|
| 707 | + new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev); |
|---|
| 686 | 708 | if (IS_ERR(new_root)) { |
|---|
| 709 | + free_anon_bdev(anon_dev); |
|---|
| 687 | 710 | ret = PTR_ERR(new_root); |
|---|
| 688 | 711 | btrfs_abort_transaction(trans, ret); |
|---|
| 689 | 712 | goto fail; |
|---|
| 690 | 713 | } |
|---|
| 714 | + /* Freeing will be done in btrfs_put_root() of new_root */ |
|---|
| 715 | + anon_dev = 0; |
|---|
| 691 | 716 | |
|---|
| 692 | 717 | btrfs_record_root_in_trans(trans, new_root); |
|---|
| 693 | 718 | |
|---|
| 694 | 719 | ret = btrfs_create_subvol_root(trans, new_root, root, new_dirid); |
|---|
| 720 | + btrfs_put_root(new_root); |
|---|
| 695 | 721 | if (ret) { |
|---|
| 696 | 722 | /* We potentially lose an unused inode item here */ |
|---|
| 697 | 723 | btrfs_abort_transaction(trans, ret); |
|---|
| .. | .. |
|---|
| 711 | 737 | goto fail; |
|---|
| 712 | 738 | } |
|---|
| 713 | 739 | |
|---|
| 714 | | - ret = btrfs_insert_dir_item(trans, root, |
|---|
| 715 | | - name, namelen, BTRFS_I(dir), &key, |
|---|
| 740 | + ret = btrfs_insert_dir_item(trans, name, namelen, BTRFS_I(dir), &key, |
|---|
| 716 | 741 | BTRFS_FT_DIR, index); |
|---|
| 717 | 742 | if (ret) { |
|---|
| 718 | 743 | btrfs_abort_transaction(trans, ret); |
|---|
| .. | .. |
|---|
| 742 | 767 | kfree(root_item); |
|---|
| 743 | 768 | trans->block_rsv = NULL; |
|---|
| 744 | 769 | trans->bytes_reserved = 0; |
|---|
| 745 | | - btrfs_subvolume_release_metadata(fs_info, &block_rsv); |
|---|
| 770 | + btrfs_subvolume_release_metadata(root, &block_rsv); |
|---|
| 746 | 771 | |
|---|
| 747 | | - if (async_transid) { |
|---|
| 748 | | - *async_transid = trans->transid; |
|---|
| 749 | | - err = btrfs_commit_transaction_async(trans, 1); |
|---|
| 750 | | - if (err) |
|---|
| 751 | | - err = btrfs_commit_transaction(trans); |
|---|
| 752 | | - } else { |
|---|
| 753 | | - err = btrfs_commit_transaction(trans); |
|---|
| 754 | | - } |
|---|
| 772 | + err = btrfs_commit_transaction(trans); |
|---|
| 755 | 773 | if (err && !ret) |
|---|
| 756 | 774 | ret = err; |
|---|
| 757 | 775 | |
|---|
| .. | .. |
|---|
| 764 | 782 | return ret; |
|---|
| 765 | 783 | |
|---|
| 766 | 784 | fail_free: |
|---|
| 785 | + if (anon_dev) |
|---|
| 786 | + free_anon_bdev(anon_dev); |
|---|
| 767 | 787 | kfree(root_item); |
|---|
| 768 | 788 | return ret; |
|---|
| 769 | 789 | } |
|---|
| 770 | 790 | |
|---|
| 771 | 791 | static int create_snapshot(struct btrfs_root *root, struct inode *dir, |
|---|
| 772 | | - struct dentry *dentry, |
|---|
| 773 | | - u64 *async_transid, bool readonly, |
|---|
| 792 | + struct dentry *dentry, bool readonly, |
|---|
| 774 | 793 | struct btrfs_qgroup_inherit *inherit) |
|---|
| 775 | 794 | { |
|---|
| 776 | 795 | struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb); |
|---|
| .. | .. |
|---|
| 778 | 797 | struct btrfs_pending_snapshot *pending_snapshot; |
|---|
| 779 | 798 | struct btrfs_trans_handle *trans; |
|---|
| 780 | 799 | int ret; |
|---|
| 781 | | - bool snapshot_force_cow = false; |
|---|
| 782 | 800 | |
|---|
| 783 | | - if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) |
|---|
| 801 | + if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) |
|---|
| 784 | 802 | return -EINVAL; |
|---|
| 803 | + |
|---|
| 804 | + if (atomic_read(&root->nr_swapfiles)) { |
|---|
| 805 | + btrfs_warn(fs_info, |
|---|
| 806 | + "cannot snapshot subvolume with active swapfile"); |
|---|
| 807 | + return -ETXTBSY; |
|---|
| 808 | + } |
|---|
| 785 | 809 | |
|---|
| 786 | 810 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_KERNEL); |
|---|
| 787 | 811 | if (!pending_snapshot) |
|---|
| 788 | 812 | return -ENOMEM; |
|---|
| 789 | 813 | |
|---|
| 814 | + ret = get_anon_bdev(&pending_snapshot->anon_dev); |
|---|
| 815 | + if (ret < 0) |
|---|
| 816 | + goto free_pending; |
|---|
| 790 | 817 | pending_snapshot->root_item = kzalloc(sizeof(struct btrfs_root_item), |
|---|
| 791 | 818 | GFP_KERNEL); |
|---|
| 792 | 819 | pending_snapshot->path = btrfs_alloc_path(); |
|---|
| .. | .. |
|---|
| 794 | 821 | ret = -ENOMEM; |
|---|
| 795 | 822 | goto free_pending; |
|---|
| 796 | 823 | } |
|---|
| 797 | | - |
|---|
| 798 | | - /* |
|---|
| 799 | | - * Force new buffered writes to reserve space even when NOCOW is |
|---|
| 800 | | - * possible. This is to avoid later writeback (running dealloc) to |
|---|
| 801 | | - * fallback to COW mode and unexpectedly fail with ENOSPC. |
|---|
| 802 | | - */ |
|---|
| 803 | | - atomic_inc(&root->will_be_snapshotted); |
|---|
| 804 | | - smp_mb__after_atomic(); |
|---|
| 805 | | - /* wait for no snapshot writes */ |
|---|
| 806 | | - wait_event(root->subv_writers->wait, |
|---|
| 807 | | - percpu_counter_sum(&root->subv_writers->counter) == 0); |
|---|
| 808 | | - |
|---|
| 809 | | - ret = btrfs_start_delalloc_snapshot(root); |
|---|
| 810 | | - if (ret) |
|---|
| 811 | | - goto dec_and_free; |
|---|
| 812 | | - |
|---|
| 813 | | - /* |
|---|
| 814 | | - * All previous writes have started writeback in NOCOW mode, so now |
|---|
| 815 | | - * we force future writes to fallback to COW mode during snapshot |
|---|
| 816 | | - * creation. |
|---|
| 817 | | - */ |
|---|
| 818 | | - atomic_inc(&root->snapshot_force_cow); |
|---|
| 819 | | - snapshot_force_cow = true; |
|---|
| 820 | | - |
|---|
| 821 | | - btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1); |
|---|
| 822 | 824 | |
|---|
| 823 | 825 | btrfs_init_block_rsv(&pending_snapshot->block_rsv, |
|---|
| 824 | 826 | BTRFS_BLOCK_RSV_TEMP); |
|---|
| .. | .. |
|---|
| 834 | 836 | &pending_snapshot->block_rsv, 8, |
|---|
| 835 | 837 | false); |
|---|
| 836 | 838 | if (ret) |
|---|
| 837 | | - goto dec_and_free; |
|---|
| 839 | + goto free_pending; |
|---|
| 838 | 840 | |
|---|
| 839 | 841 | pending_snapshot->dentry = dentry; |
|---|
| 840 | 842 | pending_snapshot->root = root; |
|---|
| .. | .. |
|---|
| 852 | 854 | list_add(&pending_snapshot->list, |
|---|
| 853 | 855 | &trans->transaction->pending_snapshots); |
|---|
| 854 | 856 | spin_unlock(&fs_info->trans_lock); |
|---|
| 855 | | - if (async_transid) { |
|---|
| 856 | | - *async_transid = trans->transid; |
|---|
| 857 | | - ret = btrfs_commit_transaction_async(trans, 1); |
|---|
| 858 | | - if (ret) |
|---|
| 859 | | - ret = btrfs_commit_transaction(trans); |
|---|
| 860 | | - } else { |
|---|
| 861 | | - ret = btrfs_commit_transaction(trans); |
|---|
| 862 | | - } |
|---|
| 857 | + |
|---|
| 858 | + ret = btrfs_commit_transaction(trans); |
|---|
| 863 | 859 | if (ret) |
|---|
| 864 | 860 | goto fail; |
|---|
| 865 | 861 | |
|---|
| .. | .. |
|---|
| 879 | 875 | |
|---|
| 880 | 876 | d_instantiate(dentry, inode); |
|---|
| 881 | 877 | ret = 0; |
|---|
| 878 | + pending_snapshot->anon_dev = 0; |
|---|
| 882 | 879 | fail: |
|---|
| 883 | | - btrfs_subvolume_release_metadata(fs_info, &pending_snapshot->block_rsv); |
|---|
| 884 | | -dec_and_free: |
|---|
| 885 | | - if (snapshot_force_cow) |
|---|
| 886 | | - atomic_dec(&root->snapshot_force_cow); |
|---|
| 887 | | - if (atomic_dec_and_test(&root->will_be_snapshotted)) |
|---|
| 888 | | - wake_up_var(&root->will_be_snapshotted); |
|---|
| 880 | + /* Prevent double freeing of anon_dev */ |
|---|
| 881 | + if (ret && pending_snapshot->snap) |
|---|
| 882 | + pending_snapshot->snap->anon_dev = 0; |
|---|
| 883 | + btrfs_put_root(pending_snapshot->snap); |
|---|
| 884 | + btrfs_subvolume_release_metadata(root, &pending_snapshot->block_rsv); |
|---|
| 889 | 885 | free_pending: |
|---|
| 886 | + if (pending_snapshot->anon_dev) |
|---|
| 887 | + free_anon_bdev(pending_snapshot->anon_dev); |
|---|
| 890 | 888 | kfree(pending_snapshot->root_item); |
|---|
| 891 | 889 | btrfs_free_path(pending_snapshot->path); |
|---|
| 892 | 890 | kfree(pending_snapshot); |
|---|
| .. | .. |
|---|
| 964 | 962 | static noinline int btrfs_mksubvol(const struct path *parent, |
|---|
| 965 | 963 | const char *name, int namelen, |
|---|
| 966 | 964 | struct btrfs_root *snap_src, |
|---|
| 967 | | - u64 *async_transid, bool readonly, |
|---|
| 965 | + bool readonly, |
|---|
| 968 | 966 | struct btrfs_qgroup_inherit *inherit) |
|---|
| 969 | 967 | { |
|---|
| 970 | 968 | struct inode *dir = d_inode(parent->dentry); |
|---|
| .. | .. |
|---|
| 1000 | 998 | if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0) |
|---|
| 1001 | 999 | goto out_up_read; |
|---|
| 1002 | 1000 | |
|---|
| 1003 | | - if (snap_src) { |
|---|
| 1004 | | - error = create_snapshot(snap_src, dir, dentry, |
|---|
| 1005 | | - async_transid, readonly, inherit); |
|---|
| 1006 | | - } else { |
|---|
| 1007 | | - error = create_subvol(dir, dentry, name, namelen, |
|---|
| 1008 | | - async_transid, inherit); |
|---|
| 1009 | | - } |
|---|
| 1001 | + if (snap_src) |
|---|
| 1002 | + error = create_snapshot(snap_src, dir, dentry, readonly, inherit); |
|---|
| 1003 | + else |
|---|
| 1004 | + error = create_subvol(dir, dentry, name, namelen, inherit); |
|---|
| 1005 | + |
|---|
| 1010 | 1006 | if (!error) |
|---|
| 1011 | 1007 | fsnotify_mkdir(dir, dentry); |
|---|
| 1012 | 1008 | out_up_read: |
|---|
| .. | .. |
|---|
| 1016 | 1012 | out_unlock: |
|---|
| 1017 | 1013 | inode_unlock(dir); |
|---|
| 1018 | 1014 | return error; |
|---|
| 1015 | +} |
|---|
| 1016 | + |
|---|
| 1017 | +static noinline int btrfs_mksnapshot(const struct path *parent, |
|---|
| 1018 | + const char *name, int namelen, |
|---|
| 1019 | + struct btrfs_root *root, |
|---|
| 1020 | + bool readonly, |
|---|
| 1021 | + struct btrfs_qgroup_inherit *inherit) |
|---|
| 1022 | +{ |
|---|
| 1023 | + int ret; |
|---|
| 1024 | + bool snapshot_force_cow = false; |
|---|
| 1025 | + |
|---|
| 1026 | + /* |
|---|
| 1027 | + * Force new buffered writes to reserve space even when NOCOW is |
|---|
| 1028 | + * possible. This is to avoid later writeback (running dealloc) to |
|---|
| 1029 | + * fallback to COW mode and unexpectedly fail with ENOSPC. |
|---|
| 1030 | + */ |
|---|
| 1031 | + btrfs_drew_read_lock(&root->snapshot_lock); |
|---|
| 1032 | + |
|---|
| 1033 | + ret = btrfs_start_delalloc_snapshot(root); |
|---|
| 1034 | + if (ret) |
|---|
| 1035 | + goto out; |
|---|
| 1036 | + |
|---|
| 1037 | + /* |
|---|
| 1038 | + * All previous writes have started writeback in NOCOW mode, so now |
|---|
| 1039 | + * we force future writes to fallback to COW mode during snapshot |
|---|
| 1040 | + * creation. |
|---|
| 1041 | + */ |
|---|
| 1042 | + atomic_inc(&root->snapshot_force_cow); |
|---|
| 1043 | + snapshot_force_cow = true; |
|---|
| 1044 | + |
|---|
| 1045 | + btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1); |
|---|
| 1046 | + |
|---|
| 1047 | + ret = btrfs_mksubvol(parent, name, namelen, |
|---|
| 1048 | + root, readonly, inherit); |
|---|
| 1049 | +out: |
|---|
| 1050 | + if (snapshot_force_cow) |
|---|
| 1051 | + atomic_dec(&root->snapshot_force_cow); |
|---|
| 1052 | + btrfs_drew_read_unlock(&root->snapshot_lock); |
|---|
| 1053 | + return ret; |
|---|
| 1019 | 1054 | } |
|---|
| 1020 | 1055 | |
|---|
| 1021 | 1056 | /* |
|---|
| .. | .. |
|---|
| 1139 | 1174 | |
|---|
| 1140 | 1175 | /* get the big lock and read metadata off disk */ |
|---|
| 1141 | 1176 | lock_extent_bits(io_tree, start, end, &cached); |
|---|
| 1142 | | - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0); |
|---|
| 1177 | + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len); |
|---|
| 1143 | 1178 | unlock_extent_cached(io_tree, start, end, &cached); |
|---|
| 1144 | 1179 | |
|---|
| 1145 | 1180 | if (IS_ERR(em)) |
|---|
| .. | .. |
|---|
| 1252 | 1287 | u64 page_end; |
|---|
| 1253 | 1288 | u64 page_cnt; |
|---|
| 1254 | 1289 | u64 start = (u64)start_index << PAGE_SHIFT; |
|---|
| 1290 | + u64 search_start; |
|---|
| 1255 | 1291 | int ret; |
|---|
| 1256 | 1292 | int i; |
|---|
| 1257 | 1293 | int i_done; |
|---|
| .. | .. |
|---|
| 1267 | 1303 | |
|---|
| 1268 | 1304 | page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1); |
|---|
| 1269 | 1305 | |
|---|
| 1270 | | - ret = btrfs_delalloc_reserve_space(inode, &data_reserved, |
|---|
| 1306 | + ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved, |
|---|
| 1271 | 1307 | start, page_cnt << PAGE_SHIFT); |
|---|
| 1272 | 1308 | if (ret) |
|---|
| 1273 | 1309 | return ret; |
|---|
| .. | .. |
|---|
| 1288 | 1324 | while (1) { |
|---|
| 1289 | 1325 | lock_extent_bits(tree, page_start, page_end, |
|---|
| 1290 | 1326 | &cached_state); |
|---|
| 1291 | | - ordered = btrfs_lookup_ordered_extent(inode, |
|---|
| 1327 | + ordered = btrfs_lookup_ordered_extent(BTRFS_I(inode), |
|---|
| 1292 | 1328 | page_start); |
|---|
| 1293 | 1329 | unlock_extent_cached(tree, page_start, page_end, |
|---|
| 1294 | 1330 | &cached_state); |
|---|
| .. | .. |
|---|
| 1296 | 1332 | break; |
|---|
| 1297 | 1333 | |
|---|
| 1298 | 1334 | unlock_page(page); |
|---|
| 1299 | | - btrfs_start_ordered_extent(inode, ordered, 1); |
|---|
| 1335 | + btrfs_start_ordered_extent(ordered, 1); |
|---|
| 1300 | 1336 | btrfs_put_ordered_extent(ordered); |
|---|
| 1301 | 1337 | lock_page(page); |
|---|
| 1302 | 1338 | /* |
|---|
| .. | .. |
|---|
| 1348 | 1384 | |
|---|
| 1349 | 1385 | lock_extent_bits(&BTRFS_I(inode)->io_tree, |
|---|
| 1350 | 1386 | page_start, page_end - 1, &cached_state); |
|---|
| 1387 | + |
|---|
| 1388 | + /* |
|---|
| 1389 | + * When defragmenting we skip ranges that have holes or inline extents, |
|---|
| 1390 | + * (check should_defrag_range()), to avoid unnecessary IO and wasting |
|---|
| 1391 | + * space. At btrfs_defrag_file(), we check if a range should be defragged |
|---|
| 1392 | + * before locking the inode and then, if it should, we trigger a sync |
|---|
| 1393 | + * page cache readahead - we lock the inode only after that to avoid |
|---|
| 1394 | + * blocking for too long other tasks that possibly want to operate on |
|---|
| 1395 | + * other file ranges. But before we were able to get the inode lock, |
|---|
| 1396 | + * some other task may have punched a hole in the range, or we may have |
|---|
| 1397 | + * now an inline extent, in which case we should not defrag. So check |
|---|
| 1398 | + * for that here, where we have the inode and the range locked, and bail |
|---|
| 1399 | + * out if that happened. |
|---|
| 1400 | + */ |
|---|
| 1401 | + search_start = page_start; |
|---|
| 1402 | + while (search_start < page_end) { |
|---|
| 1403 | + struct extent_map *em; |
|---|
| 1404 | + |
|---|
| 1405 | + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, search_start, |
|---|
| 1406 | + page_end - search_start); |
|---|
| 1407 | + if (IS_ERR(em)) { |
|---|
| 1408 | + ret = PTR_ERR(em); |
|---|
| 1409 | + goto out_unlock_range; |
|---|
| 1410 | + } |
|---|
| 1411 | + if (em->block_start >= EXTENT_MAP_LAST_BYTE) { |
|---|
| 1412 | + free_extent_map(em); |
|---|
| 1413 | + /* Ok, 0 means we did not defrag anything */ |
|---|
| 1414 | + ret = 0; |
|---|
| 1415 | + goto out_unlock_range; |
|---|
| 1416 | + } |
|---|
| 1417 | + search_start = extent_map_end(em); |
|---|
| 1418 | + free_extent_map(em); |
|---|
| 1419 | + } |
|---|
| 1420 | + |
|---|
| 1351 | 1421 | clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, |
|---|
| 1352 | | - page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC | |
|---|
| 1353 | | - EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, |
|---|
| 1354 | | - &cached_state); |
|---|
| 1422 | + page_end - 1, EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | |
|---|
| 1423 | + EXTENT_DEFRAG, 0, 0, &cached_state); |
|---|
| 1355 | 1424 | |
|---|
| 1356 | 1425 | if (i_done != page_cnt) { |
|---|
| 1357 | 1426 | spin_lock(&BTRFS_I(inode)->lock); |
|---|
| 1358 | 1427 | btrfs_mod_outstanding_extents(BTRFS_I(inode), 1); |
|---|
| 1359 | 1428 | spin_unlock(&BTRFS_I(inode)->lock); |
|---|
| 1360 | | - btrfs_delalloc_release_space(inode, data_reserved, |
|---|
| 1429 | + btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, |
|---|
| 1361 | 1430 | start, (page_cnt - i_done) << PAGE_SHIFT, true); |
|---|
| 1362 | 1431 | } |
|---|
| 1363 | 1432 | |
|---|
| .. | .. |
|---|
| 1379 | 1448 | btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT); |
|---|
| 1380 | 1449 | extent_changeset_free(data_reserved); |
|---|
| 1381 | 1450 | return i_done; |
|---|
| 1451 | + |
|---|
| 1452 | +out_unlock_range: |
|---|
| 1453 | + unlock_extent_cached(&BTRFS_I(inode)->io_tree, |
|---|
| 1454 | + page_start, page_end - 1, &cached_state); |
|---|
| 1382 | 1455 | out: |
|---|
| 1383 | 1456 | for (i = 0; i < i_done; i++) { |
|---|
| 1384 | 1457 | unlock_page(pages[i]); |
|---|
| 1385 | 1458 | put_page(pages[i]); |
|---|
| 1386 | 1459 | } |
|---|
| 1387 | | - btrfs_delalloc_release_space(inode, data_reserved, |
|---|
| 1460 | + btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, |
|---|
| 1388 | 1461 | start, page_cnt << PAGE_SHIFT, true); |
|---|
| 1389 | 1462 | btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT); |
|---|
| 1390 | 1463 | extent_changeset_free(data_reserved); |
|---|
| .. | .. |
|---|
| 1424 | 1497 | return -EINVAL; |
|---|
| 1425 | 1498 | |
|---|
| 1426 | 1499 | if (do_compress) { |
|---|
| 1427 | | - if (range->compress_type > BTRFS_COMPRESS_TYPES) |
|---|
| 1500 | + if (range->compress_type >= BTRFS_NR_COMPRESS_TYPES) |
|---|
| 1428 | 1501 | return -EINVAL; |
|---|
| 1429 | 1502 | if (range->compress_type) |
|---|
| 1430 | 1503 | compress_type = range->compress_type; |
|---|
| .. | .. |
|---|
| 1530 | 1603 | } |
|---|
| 1531 | 1604 | |
|---|
| 1532 | 1605 | inode_lock(inode); |
|---|
| 1533 | | - if (do_compress) |
|---|
| 1534 | | - BTRFS_I(inode)->defrag_compress = compress_type; |
|---|
| 1535 | | - ret = cluster_pages_for_defrag(inode, pages, i, cluster); |
|---|
| 1606 | + if (IS_SWAPFILE(inode)) { |
|---|
| 1607 | + ret = -ETXTBSY; |
|---|
| 1608 | + } else { |
|---|
| 1609 | + if (do_compress) |
|---|
| 1610 | + BTRFS_I(inode)->defrag_compress = compress_type; |
|---|
| 1611 | + ret = cluster_pages_for_defrag(inode, pages, i, cluster); |
|---|
| 1612 | + } |
|---|
| 1536 | 1613 | if (ret < 0) { |
|---|
| 1537 | 1614 | inode_unlock(inode); |
|---|
| 1538 | 1615 | goto out_ra; |
|---|
| .. | .. |
|---|
| 1623 | 1700 | if (ret) |
|---|
| 1624 | 1701 | return ret; |
|---|
| 1625 | 1702 | |
|---|
| 1626 | | - if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) { |
|---|
| 1703 | + if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_RESIZE)) { |
|---|
| 1627 | 1704 | mnt_drop_write_file(file); |
|---|
| 1628 | 1705 | return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; |
|---|
| 1629 | 1706 | } |
|---|
| .. | .. |
|---|
| 1717 | 1794 | |
|---|
| 1718 | 1795 | new_size = round_down(new_size, fs_info->sectorsize); |
|---|
| 1719 | 1796 | |
|---|
| 1720 | | - btrfs_info_in_rcu(fs_info, "new size for %s is %llu", |
|---|
| 1721 | | - rcu_str_deref(device->name), new_size); |
|---|
| 1722 | | - |
|---|
| 1723 | 1797 | if (new_size > old_size) { |
|---|
| 1724 | 1798 | trans = btrfs_start_transaction(root, 0); |
|---|
| 1725 | 1799 | if (IS_ERR(trans)) { |
|---|
| .. | .. |
|---|
| 1732 | 1806 | ret = btrfs_shrink_device(device, new_size); |
|---|
| 1733 | 1807 | } /* equal, nothing need to do */ |
|---|
| 1734 | 1808 | |
|---|
| 1809 | + if (ret == 0 && new_size != old_size) |
|---|
| 1810 | + btrfs_info_in_rcu(fs_info, |
|---|
| 1811 | + "resize device %s (devid %llu) from %llu to %llu", |
|---|
| 1812 | + rcu_str_deref(device->name), device->devid, |
|---|
| 1813 | + old_size, new_size); |
|---|
| 1735 | 1814 | out_free: |
|---|
| 1736 | 1815 | kfree(vol_args); |
|---|
| 1737 | 1816 | out: |
|---|
| 1738 | | - clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags); |
|---|
| 1817 | + btrfs_exclop_finish(fs_info); |
|---|
| 1739 | 1818 | mnt_drop_write_file(file); |
|---|
| 1740 | 1819 | return ret; |
|---|
| 1741 | 1820 | } |
|---|
| 1742 | 1821 | |
|---|
| 1743 | | -static noinline int btrfs_ioctl_snap_create_transid(struct file *file, |
|---|
| 1822 | +static noinline int __btrfs_ioctl_snap_create(struct file *file, |
|---|
| 1744 | 1823 | const char *name, unsigned long fd, int subvol, |
|---|
| 1745 | | - u64 *transid, bool readonly, |
|---|
| 1824 | + bool readonly, |
|---|
| 1746 | 1825 | struct btrfs_qgroup_inherit *inherit) |
|---|
| 1747 | 1826 | { |
|---|
| 1748 | 1827 | int namelen; |
|---|
| .. | .. |
|---|
| 1769 | 1848 | |
|---|
| 1770 | 1849 | if (subvol) { |
|---|
| 1771 | 1850 | ret = btrfs_mksubvol(&file->f_path, name, namelen, |
|---|
| 1772 | | - NULL, transid, readonly, inherit); |
|---|
| 1851 | + NULL, readonly, inherit); |
|---|
| 1773 | 1852 | } else { |
|---|
| 1774 | 1853 | struct fd src = fdget(fd); |
|---|
| 1775 | 1854 | struct inode *src_inode; |
|---|
| .. | .. |
|---|
| 1790 | 1869 | */ |
|---|
| 1791 | 1870 | ret = -EPERM; |
|---|
| 1792 | 1871 | } else { |
|---|
| 1793 | | - ret = btrfs_mksubvol(&file->f_path, name, namelen, |
|---|
| 1872 | + ret = btrfs_mksnapshot(&file->f_path, name, namelen, |
|---|
| 1794 | 1873 | BTRFS_I(src_inode)->root, |
|---|
| 1795 | | - transid, readonly, inherit); |
|---|
| 1874 | + readonly, inherit); |
|---|
| 1796 | 1875 | } |
|---|
| 1797 | 1876 | fdput(src); |
|---|
| 1798 | 1877 | } |
|---|
| .. | .. |
|---|
| 1816 | 1895 | return PTR_ERR(vol_args); |
|---|
| 1817 | 1896 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; |
|---|
| 1818 | 1897 | |
|---|
| 1819 | | - ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, |
|---|
| 1820 | | - vol_args->fd, subvol, |
|---|
| 1821 | | - NULL, false, NULL); |
|---|
| 1898 | + ret = __btrfs_ioctl_snap_create(file, vol_args->name, vol_args->fd, |
|---|
| 1899 | + subvol, false, NULL); |
|---|
| 1822 | 1900 | |
|---|
| 1823 | 1901 | kfree(vol_args); |
|---|
| 1824 | 1902 | return ret; |
|---|
| .. | .. |
|---|
| 1829 | 1907 | { |
|---|
| 1830 | 1908 | struct btrfs_ioctl_vol_args_v2 *vol_args; |
|---|
| 1831 | 1909 | int ret; |
|---|
| 1832 | | - u64 transid = 0; |
|---|
| 1833 | | - u64 *ptr = NULL; |
|---|
| 1834 | 1910 | bool readonly = false; |
|---|
| 1835 | 1911 | struct btrfs_qgroup_inherit *inherit = NULL; |
|---|
| 1836 | 1912 | |
|---|
| .. | .. |
|---|
| 1842 | 1918 | return PTR_ERR(vol_args); |
|---|
| 1843 | 1919 | vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; |
|---|
| 1844 | 1920 | |
|---|
| 1845 | | - if (vol_args->flags & |
|---|
| 1846 | | - ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY | |
|---|
| 1847 | | - BTRFS_SUBVOL_QGROUP_INHERIT)) { |
|---|
| 1921 | + if (vol_args->flags & ~BTRFS_SUBVOL_CREATE_ARGS_MASK) { |
|---|
| 1848 | 1922 | ret = -EOPNOTSUPP; |
|---|
| 1849 | 1923 | goto free_args; |
|---|
| 1850 | 1924 | } |
|---|
| 1851 | 1925 | |
|---|
| 1852 | | - if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC) |
|---|
| 1853 | | - ptr = &transid; |
|---|
| 1854 | 1926 | if (vol_args->flags & BTRFS_SUBVOL_RDONLY) |
|---|
| 1855 | 1927 | readonly = true; |
|---|
| 1856 | 1928 | if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) { |
|---|
| .. | .. |
|---|
| 1882 | 1954 | } |
|---|
| 1883 | 1955 | } |
|---|
| 1884 | 1956 | |
|---|
| 1885 | | - ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, |
|---|
| 1886 | | - vol_args->fd, subvol, ptr, |
|---|
| 1887 | | - readonly, inherit); |
|---|
| 1957 | + ret = __btrfs_ioctl_snap_create(file, vol_args->name, vol_args->fd, |
|---|
| 1958 | + subvol, readonly, inherit); |
|---|
| 1888 | 1959 | if (ret) |
|---|
| 1889 | 1960 | goto free_inherit; |
|---|
| 1890 | | - |
|---|
| 1891 | | - if (ptr && copy_to_user(arg + |
|---|
| 1892 | | - offsetof(struct btrfs_ioctl_vol_args_v2, |
|---|
| 1893 | | - transid), |
|---|
| 1894 | | - ptr, sizeof(*ptr))) |
|---|
| 1895 | | - ret = -EFAULT; |
|---|
| 1896 | | - |
|---|
| 1897 | 1961 | free_inherit: |
|---|
| 1898 | 1962 | kfree(inherit); |
|---|
| 1899 | 1963 | free_args: |
|---|
| .. | .. |
|---|
| 1949 | 2013 | |
|---|
| 1950 | 2014 | if (copy_from_user(&flags, arg, sizeof(flags))) { |
|---|
| 1951 | 2015 | ret = -EFAULT; |
|---|
| 1952 | | - goto out_drop_write; |
|---|
| 1953 | | - } |
|---|
| 1954 | | - |
|---|
| 1955 | | - if (flags & BTRFS_SUBVOL_CREATE_ASYNC) { |
|---|
| 1956 | | - ret = -EINVAL; |
|---|
| 1957 | 2016 | goto out_drop_write; |
|---|
| 1958 | 2017 | } |
|---|
| 1959 | 2018 | |
|---|
| .. | .. |
|---|
| 2112 | 2171 | * problem. Otherwise we'll fault and then copy the buffer in |
|---|
| 2113 | 2172 | * properly this next time through |
|---|
| 2114 | 2173 | */ |
|---|
| 2115 | | - if (probe_user_write(ubuf + *sk_offset, &sh, sizeof(sh))) { |
|---|
| 2174 | + if (copy_to_user_nofault(ubuf + *sk_offset, &sh, sizeof(sh))) { |
|---|
| 2116 | 2175 | ret = 0; |
|---|
| 2117 | 2176 | goto out; |
|---|
| 2118 | 2177 | } |
|---|
| .. | .. |
|---|
| 2199 | 2258 | |
|---|
| 2200 | 2259 | if (sk->tree_id == 0) { |
|---|
| 2201 | 2260 | /* search the root of the inode that was passed */ |
|---|
| 2202 | | - root = BTRFS_I(inode)->root; |
|---|
| 2261 | + root = btrfs_grab_root(BTRFS_I(inode)->root); |
|---|
| 2203 | 2262 | } else { |
|---|
| 2204 | | - key.objectid = sk->tree_id; |
|---|
| 2205 | | - key.type = BTRFS_ROOT_ITEM_KEY; |
|---|
| 2206 | | - key.offset = (u64)-1; |
|---|
| 2207 | | - root = btrfs_read_fs_root_no_name(info, &key); |
|---|
| 2263 | + root = btrfs_get_fs_root(info, sk->tree_id, true); |
|---|
| 2208 | 2264 | if (IS_ERR(root)) { |
|---|
| 2209 | 2265 | btrfs_free_path(path); |
|---|
| 2210 | 2266 | return PTR_ERR(root); |
|---|
| .. | .. |
|---|
| 2238 | 2294 | ret = 0; |
|---|
| 2239 | 2295 | err: |
|---|
| 2240 | 2296 | sk->nr_items = num_found; |
|---|
| 2297 | + btrfs_put_root(root); |
|---|
| 2241 | 2298 | btrfs_free_path(path); |
|---|
| 2242 | 2299 | return ret; |
|---|
| 2243 | 2300 | } |
|---|
| .. | .. |
|---|
| 2341 | 2398 | |
|---|
| 2342 | 2399 | ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX - 1]; |
|---|
| 2343 | 2400 | |
|---|
| 2344 | | - key.objectid = tree_id; |
|---|
| 2345 | | - key.type = BTRFS_ROOT_ITEM_KEY; |
|---|
| 2346 | | - key.offset = (u64)-1; |
|---|
| 2347 | | - root = btrfs_read_fs_root_no_name(info, &key); |
|---|
| 2401 | + root = btrfs_get_fs_root(info, tree_id, true); |
|---|
| 2348 | 2402 | if (IS_ERR(root)) { |
|---|
| 2349 | 2403 | ret = PTR_ERR(root); |
|---|
| 2404 | + root = NULL; |
|---|
| 2350 | 2405 | goto out; |
|---|
| 2351 | 2406 | } |
|---|
| 2352 | 2407 | |
|---|
| .. | .. |
|---|
| 2397 | 2452 | name[total_len] = '\0'; |
|---|
| 2398 | 2453 | ret = 0; |
|---|
| 2399 | 2454 | out: |
|---|
| 2455 | + btrfs_put_root(root); |
|---|
| 2400 | 2456 | btrfs_free_path(path); |
|---|
| 2401 | 2457 | return ret; |
|---|
| 2402 | 2458 | } |
|---|
| .. | .. |
|---|
| 2413 | 2469 | unsigned long item_len; |
|---|
| 2414 | 2470 | struct btrfs_inode_ref *iref; |
|---|
| 2415 | 2471 | struct btrfs_root_ref *rref; |
|---|
| 2416 | | - struct btrfs_root *root; |
|---|
| 2472 | + struct btrfs_root *root = NULL; |
|---|
| 2417 | 2473 | struct btrfs_path *path; |
|---|
| 2418 | 2474 | struct btrfs_key key, key2; |
|---|
| 2419 | 2475 | struct extent_buffer *leaf; |
|---|
| .. | .. |
|---|
| 2435 | 2491 | if (dirid != upper_limit.objectid) { |
|---|
| 2436 | 2492 | ptr = &args->path[BTRFS_INO_LOOKUP_USER_PATH_MAX - 1]; |
|---|
| 2437 | 2493 | |
|---|
| 2438 | | - key.objectid = treeid; |
|---|
| 2439 | | - key.type = BTRFS_ROOT_ITEM_KEY; |
|---|
| 2440 | | - key.offset = (u64)-1; |
|---|
| 2441 | | - root = btrfs_read_fs_root_no_name(fs_info, &key); |
|---|
| 2494 | + root = btrfs_get_fs_root(fs_info, treeid, true); |
|---|
| 2442 | 2495 | if (IS_ERR(root)) { |
|---|
| 2443 | 2496 | ret = PTR_ERR(root); |
|---|
| 2444 | 2497 | goto out; |
|---|
| .. | .. |
|---|
| 2450 | 2503 | while (1) { |
|---|
| 2451 | 2504 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
|---|
| 2452 | 2505 | if (ret < 0) { |
|---|
| 2453 | | - goto out; |
|---|
| 2506 | + goto out_put; |
|---|
| 2454 | 2507 | } else if (ret > 0) { |
|---|
| 2455 | 2508 | ret = btrfs_previous_item(root, path, dirid, |
|---|
| 2456 | 2509 | BTRFS_INODE_REF_KEY); |
|---|
| 2457 | 2510 | if (ret < 0) { |
|---|
| 2458 | | - goto out; |
|---|
| 2511 | + goto out_put; |
|---|
| 2459 | 2512 | } else if (ret > 0) { |
|---|
| 2460 | 2513 | ret = -ENOENT; |
|---|
| 2461 | | - goto out; |
|---|
| 2514 | + goto out_put; |
|---|
| 2462 | 2515 | } |
|---|
| 2463 | 2516 | } |
|---|
| 2464 | 2517 | |
|---|
| .. | .. |
|---|
| 2472 | 2525 | total_len += len + 1; |
|---|
| 2473 | 2526 | if (ptr < args->path) { |
|---|
| 2474 | 2527 | ret = -ENAMETOOLONG; |
|---|
| 2475 | | - goto out; |
|---|
| 2528 | + goto out_put; |
|---|
| 2476 | 2529 | } |
|---|
| 2477 | 2530 | |
|---|
| 2478 | 2531 | *(ptr + len) = '/'; |
|---|
| .. | .. |
|---|
| 2483 | 2536 | ret = btrfs_previous_item(root, path, dirid, |
|---|
| 2484 | 2537 | BTRFS_INODE_ITEM_KEY); |
|---|
| 2485 | 2538 | if (ret < 0) { |
|---|
| 2486 | | - goto out; |
|---|
| 2539 | + goto out_put; |
|---|
| 2487 | 2540 | } else if (ret > 0) { |
|---|
| 2488 | 2541 | ret = -ENOENT; |
|---|
| 2489 | | - goto out; |
|---|
| 2542 | + goto out_put; |
|---|
| 2490 | 2543 | } |
|---|
| 2491 | 2544 | |
|---|
| 2492 | 2545 | leaf = path->nodes[0]; |
|---|
| .. | .. |
|---|
| 2494 | 2547 | btrfs_item_key_to_cpu(leaf, &key2, slot); |
|---|
| 2495 | 2548 | if (key2.objectid != dirid) { |
|---|
| 2496 | 2549 | ret = -ENOENT; |
|---|
| 2497 | | - goto out; |
|---|
| 2550 | + goto out_put; |
|---|
| 2498 | 2551 | } |
|---|
| 2499 | 2552 | |
|---|
| 2500 | | - temp_inode = btrfs_iget(sb, &key2, root, NULL); |
|---|
| 2553 | + temp_inode = btrfs_iget(sb, key2.objectid, root); |
|---|
| 2501 | 2554 | if (IS_ERR(temp_inode)) { |
|---|
| 2502 | 2555 | ret = PTR_ERR(temp_inode); |
|---|
| 2503 | | - goto out; |
|---|
| 2556 | + goto out_put; |
|---|
| 2504 | 2557 | } |
|---|
| 2505 | 2558 | ret = inode_permission(temp_inode, MAY_READ | MAY_EXEC); |
|---|
| 2506 | 2559 | iput(temp_inode); |
|---|
| 2507 | 2560 | if (ret) { |
|---|
| 2508 | 2561 | ret = -EACCES; |
|---|
| 2509 | | - goto out; |
|---|
| 2562 | + goto out_put; |
|---|
| 2510 | 2563 | } |
|---|
| 2511 | 2564 | |
|---|
| 2512 | 2565 | if (key.offset == upper_limit.objectid) |
|---|
| 2513 | 2566 | break; |
|---|
| 2514 | 2567 | if (key.objectid == BTRFS_FIRST_FREE_OBJECTID) { |
|---|
| 2515 | 2568 | ret = -EACCES; |
|---|
| 2516 | | - goto out; |
|---|
| 2569 | + goto out_put; |
|---|
| 2517 | 2570 | } |
|---|
| 2518 | 2571 | |
|---|
| 2519 | 2572 | btrfs_release_path(path); |
|---|
| .. | .. |
|---|
| 2524 | 2577 | |
|---|
| 2525 | 2578 | memmove(args->path, ptr, total_len); |
|---|
| 2526 | 2579 | args->path[total_len] = '\0'; |
|---|
| 2580 | + btrfs_put_root(root); |
|---|
| 2581 | + root = NULL; |
|---|
| 2527 | 2582 | btrfs_release_path(path); |
|---|
| 2528 | 2583 | } |
|---|
| 2529 | 2584 | |
|---|
| 2530 | 2585 | /* Get the bottom subvolume's name from ROOT_REF */ |
|---|
| 2531 | | - root = fs_info->tree_root; |
|---|
| 2532 | 2586 | key.objectid = treeid; |
|---|
| 2533 | 2587 | key.type = BTRFS_ROOT_REF_KEY; |
|---|
| 2534 | 2588 | key.offset = args->treeid; |
|---|
| 2535 | | - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
|---|
| 2589 | + ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0); |
|---|
| 2536 | 2590 | if (ret < 0) { |
|---|
| 2537 | 2591 | goto out; |
|---|
| 2538 | 2592 | } else if (ret > 0) { |
|---|
| .. | .. |
|---|
| 2559 | 2613 | read_extent_buffer(leaf, args->name, item_off, item_len); |
|---|
| 2560 | 2614 | args->name[item_len] = 0; |
|---|
| 2561 | 2615 | |
|---|
| 2616 | +out_put: |
|---|
| 2617 | + btrfs_put_root(root); |
|---|
| 2562 | 2618 | out: |
|---|
| 2563 | 2619 | btrfs_free_path(path); |
|---|
| 2564 | 2620 | return ret; |
|---|
| .. | .. |
|---|
| 2681 | 2737 | |
|---|
| 2682 | 2738 | /* Get root_item of inode's subvolume */ |
|---|
| 2683 | 2739 | key.objectid = BTRFS_I(inode)->root->root_key.objectid; |
|---|
| 2684 | | - key.type = BTRFS_ROOT_ITEM_KEY; |
|---|
| 2685 | | - key.offset = (u64)-1; |
|---|
| 2686 | | - root = btrfs_read_fs_root_no_name(fs_info, &key); |
|---|
| 2740 | + root = btrfs_get_fs_root(fs_info, key.objectid, true); |
|---|
| 2687 | 2741 | if (IS_ERR(root)) { |
|---|
| 2688 | 2742 | ret = PTR_ERR(root); |
|---|
| 2689 | | - goto out; |
|---|
| 2743 | + goto out_free; |
|---|
| 2690 | 2744 | } |
|---|
| 2691 | 2745 | root_item = &root->root_item; |
|---|
| 2692 | 2746 | |
|---|
| .. | .. |
|---|
| 2719 | 2773 | |
|---|
| 2720 | 2774 | if (key.objectid != BTRFS_FS_TREE_OBJECTID) { |
|---|
| 2721 | 2775 | /* Search root tree for ROOT_BACKREF of this subvolume */ |
|---|
| 2722 | | - root = fs_info->tree_root; |
|---|
| 2723 | | - |
|---|
| 2724 | 2776 | key.type = BTRFS_ROOT_BACKREF_KEY; |
|---|
| 2725 | 2777 | key.offset = 0; |
|---|
| 2726 | | - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
|---|
| 2778 | + ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0); |
|---|
| 2727 | 2779 | if (ret < 0) { |
|---|
| 2728 | 2780 | goto out; |
|---|
| 2729 | 2781 | } else if (path->slots[0] >= |
|---|
| 2730 | 2782 | btrfs_header_nritems(path->nodes[0])) { |
|---|
| 2731 | | - ret = btrfs_next_leaf(root, path); |
|---|
| 2783 | + ret = btrfs_next_leaf(fs_info->tree_root, path); |
|---|
| 2732 | 2784 | if (ret < 0) { |
|---|
| 2733 | 2785 | goto out; |
|---|
| 2734 | 2786 | } else if (ret > 0) { |
|---|
| .. | .. |
|---|
| 2759 | 2811 | } |
|---|
| 2760 | 2812 | } |
|---|
| 2761 | 2813 | |
|---|
| 2814 | + btrfs_free_path(path); |
|---|
| 2815 | + path = NULL; |
|---|
| 2762 | 2816 | if (copy_to_user(argp, subvol_info, sizeof(*subvol_info))) |
|---|
| 2763 | 2817 | ret = -EFAULT; |
|---|
| 2764 | 2818 | |
|---|
| 2765 | 2819 | out: |
|---|
| 2820 | + btrfs_put_root(root); |
|---|
| 2821 | +out_free: |
|---|
| 2766 | 2822 | btrfs_free_path(path); |
|---|
| 2767 | | - kzfree(subvol_info); |
|---|
| 2823 | + kfree(subvol_info); |
|---|
| 2768 | 2824 | return ret; |
|---|
| 2769 | 2825 | } |
|---|
| 2770 | 2826 | |
|---|
| .. | .. |
|---|
| 2849 | 2905 | } |
|---|
| 2850 | 2906 | |
|---|
| 2851 | 2907 | out: |
|---|
| 2908 | + btrfs_free_path(path); |
|---|
| 2909 | + |
|---|
| 2852 | 2910 | if (!ret || ret == -EOVERFLOW) { |
|---|
| 2853 | 2911 | rootrefs->num_items = found; |
|---|
| 2854 | 2912 | /* update min_treeid for next search */ |
|---|
| .. | .. |
|---|
| 2860 | 2918 | } |
|---|
| 2861 | 2919 | |
|---|
| 2862 | 2920 | kfree(rootrefs); |
|---|
| 2863 | | - btrfs_free_path(path); |
|---|
| 2864 | 2921 | |
|---|
| 2865 | 2922 | return ret; |
|---|
| 2866 | 2923 | } |
|---|
| 2867 | 2924 | |
|---|
| 2868 | 2925 | static noinline int btrfs_ioctl_snap_destroy(struct file *file, |
|---|
| 2869 | | - void __user *arg) |
|---|
| 2926 | + void __user *arg, |
|---|
| 2927 | + bool destroy_v2) |
|---|
| 2870 | 2928 | { |
|---|
| 2871 | 2929 | struct dentry *parent = file->f_path.dentry; |
|---|
| 2872 | 2930 | struct btrfs_fs_info *fs_info = btrfs_sb(parent->d_sb); |
|---|
| .. | .. |
|---|
| 2875 | 2933 | struct inode *inode; |
|---|
| 2876 | 2934 | struct btrfs_root *root = BTRFS_I(dir)->root; |
|---|
| 2877 | 2935 | struct btrfs_root *dest = NULL; |
|---|
| 2878 | | - struct btrfs_ioctl_vol_args *vol_args; |
|---|
| 2879 | | - int namelen; |
|---|
| 2936 | + struct btrfs_ioctl_vol_args *vol_args = NULL; |
|---|
| 2937 | + struct btrfs_ioctl_vol_args_v2 *vol_args2 = NULL; |
|---|
| 2938 | + char *subvol_name, *subvol_name_ptr = NULL; |
|---|
| 2939 | + int subvol_namelen; |
|---|
| 2880 | 2940 | int err = 0; |
|---|
| 2941 | + bool destroy_parent = false; |
|---|
| 2881 | 2942 | |
|---|
| 2882 | | - if (!S_ISDIR(dir->i_mode)) |
|---|
| 2883 | | - return -ENOTDIR; |
|---|
| 2943 | + if (destroy_v2) { |
|---|
| 2944 | + vol_args2 = memdup_user(arg, sizeof(*vol_args2)); |
|---|
| 2945 | + if (IS_ERR(vol_args2)) |
|---|
| 2946 | + return PTR_ERR(vol_args2); |
|---|
| 2884 | 2947 | |
|---|
| 2885 | | - vol_args = memdup_user(arg, sizeof(*vol_args)); |
|---|
| 2886 | | - if (IS_ERR(vol_args)) |
|---|
| 2887 | | - return PTR_ERR(vol_args); |
|---|
| 2948 | + if (vol_args2->flags & ~BTRFS_SUBVOL_DELETE_ARGS_MASK) { |
|---|
| 2949 | + err = -EOPNOTSUPP; |
|---|
| 2950 | + goto out; |
|---|
| 2951 | + } |
|---|
| 2888 | 2952 | |
|---|
| 2889 | | - vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; |
|---|
| 2890 | | - namelen = strlen(vol_args->name); |
|---|
| 2891 | | - if (strchr(vol_args->name, '/') || |
|---|
| 2892 | | - strncmp(vol_args->name, "..", namelen) == 0) { |
|---|
| 2893 | | - err = -EINVAL; |
|---|
| 2894 | | - goto out; |
|---|
| 2953 | + /* |
|---|
| 2954 | + * If SPEC_BY_ID is not set, we are looking for the subvolume by |
|---|
| 2955 | + * name, same as v1 currently does. |
|---|
| 2956 | + */ |
|---|
| 2957 | + if (!(vol_args2->flags & BTRFS_SUBVOL_SPEC_BY_ID)) { |
|---|
| 2958 | + vol_args2->name[BTRFS_SUBVOL_NAME_MAX] = 0; |
|---|
| 2959 | + subvol_name = vol_args2->name; |
|---|
| 2960 | + |
|---|
| 2961 | + err = mnt_want_write_file(file); |
|---|
| 2962 | + if (err) |
|---|
| 2963 | + goto out; |
|---|
| 2964 | + } else { |
|---|
| 2965 | + if (vol_args2->subvolid < BTRFS_FIRST_FREE_OBJECTID) { |
|---|
| 2966 | + err = -EINVAL; |
|---|
| 2967 | + goto out; |
|---|
| 2968 | + } |
|---|
| 2969 | + |
|---|
| 2970 | + err = mnt_want_write_file(file); |
|---|
| 2971 | + if (err) |
|---|
| 2972 | + goto out; |
|---|
| 2973 | + |
|---|
| 2974 | + dentry = btrfs_get_dentry(fs_info->sb, |
|---|
| 2975 | + BTRFS_FIRST_FREE_OBJECTID, |
|---|
| 2976 | + vol_args2->subvolid, 0, 0); |
|---|
| 2977 | + if (IS_ERR(dentry)) { |
|---|
| 2978 | + err = PTR_ERR(dentry); |
|---|
| 2979 | + goto out_drop_write; |
|---|
| 2980 | + } |
|---|
| 2981 | + |
|---|
| 2982 | + /* |
|---|
| 2983 | + * Change the default parent since the subvolume being |
|---|
| 2984 | + * deleted can be outside of the current mount point. |
|---|
| 2985 | + */ |
|---|
| 2986 | + parent = btrfs_get_parent(dentry); |
|---|
| 2987 | + |
|---|
| 2988 | + /* |
|---|
| 2989 | + * At this point dentry->d_name can point to '/' if the |
|---|
| 2990 | + * subvolume we want to destroy is outsite of the |
|---|
| 2991 | + * current mount point, so we need to release the |
|---|
| 2992 | + * current dentry and execute the lookup to return a new |
|---|
| 2993 | + * one with ->d_name pointing to the |
|---|
| 2994 | + * <mount point>/subvol_name. |
|---|
| 2995 | + */ |
|---|
| 2996 | + dput(dentry); |
|---|
| 2997 | + if (IS_ERR(parent)) { |
|---|
| 2998 | + err = PTR_ERR(parent); |
|---|
| 2999 | + goto out_drop_write; |
|---|
| 3000 | + } |
|---|
| 3001 | + dir = d_inode(parent); |
|---|
| 3002 | + |
|---|
| 3003 | + /* |
|---|
| 3004 | + * If v2 was used with SPEC_BY_ID, a new parent was |
|---|
| 3005 | + * allocated since the subvolume can be outside of the |
|---|
| 3006 | + * current mount point. Later on we need to release this |
|---|
| 3007 | + * new parent dentry. |
|---|
| 3008 | + */ |
|---|
| 3009 | + destroy_parent = true; |
|---|
| 3010 | + |
|---|
| 3011 | + subvol_name_ptr = btrfs_get_subvol_name_from_objectid( |
|---|
| 3012 | + fs_info, vol_args2->subvolid); |
|---|
| 3013 | + if (IS_ERR(subvol_name_ptr)) { |
|---|
| 3014 | + err = PTR_ERR(subvol_name_ptr); |
|---|
| 3015 | + goto free_parent; |
|---|
| 3016 | + } |
|---|
| 3017 | + /* subvol_name_ptr is already NULL termined */ |
|---|
| 3018 | + subvol_name = (char *)kbasename(subvol_name_ptr); |
|---|
| 3019 | + } |
|---|
| 3020 | + } else { |
|---|
| 3021 | + vol_args = memdup_user(arg, sizeof(*vol_args)); |
|---|
| 3022 | + if (IS_ERR(vol_args)) |
|---|
| 3023 | + return PTR_ERR(vol_args); |
|---|
| 3024 | + |
|---|
| 3025 | + vol_args->name[BTRFS_PATH_NAME_MAX] = 0; |
|---|
| 3026 | + subvol_name = vol_args->name; |
|---|
| 3027 | + |
|---|
| 3028 | + err = mnt_want_write_file(file); |
|---|
| 3029 | + if (err) |
|---|
| 3030 | + goto out; |
|---|
| 2895 | 3031 | } |
|---|
| 2896 | 3032 | |
|---|
| 2897 | | - err = mnt_want_write_file(file); |
|---|
| 2898 | | - if (err) |
|---|
| 2899 | | - goto out; |
|---|
| 3033 | + subvol_namelen = strlen(subvol_name); |
|---|
| 2900 | 3034 | |
|---|
| 3035 | + if (strchr(subvol_name, '/') || |
|---|
| 3036 | + strncmp(subvol_name, "..", subvol_namelen) == 0) { |
|---|
| 3037 | + err = -EINVAL; |
|---|
| 3038 | + goto free_subvol_name; |
|---|
| 3039 | + } |
|---|
| 3040 | + |
|---|
| 3041 | + if (!S_ISDIR(dir->i_mode)) { |
|---|
| 3042 | + err = -ENOTDIR; |
|---|
| 3043 | + goto free_subvol_name; |
|---|
| 3044 | + } |
|---|
| 2901 | 3045 | |
|---|
| 2902 | 3046 | err = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT); |
|---|
| 2903 | 3047 | if (err == -EINTR) |
|---|
| 2904 | | - goto out_drop_write; |
|---|
| 2905 | | - dentry = lookup_one_len(vol_args->name, parent, namelen); |
|---|
| 3048 | + goto free_subvol_name; |
|---|
| 3049 | + dentry = lookup_one_len(subvol_name, parent, subvol_namelen); |
|---|
| 2906 | 3050 | if (IS_ERR(dentry)) { |
|---|
| 2907 | 3051 | err = PTR_ERR(dentry); |
|---|
| 2908 | 3052 | goto out_unlock_dir; |
|---|
| .. | .. |
|---|
| 2963 | 3107 | err = btrfs_delete_subvolume(dir, dentry); |
|---|
| 2964 | 3108 | inode_unlock(inode); |
|---|
| 2965 | 3109 | if (!err) |
|---|
| 2966 | | - d_delete(dentry); |
|---|
| 3110 | + d_delete_notify(dir, dentry); |
|---|
| 2967 | 3111 | |
|---|
| 2968 | 3112 | out_dput: |
|---|
| 2969 | 3113 | dput(dentry); |
|---|
| 2970 | 3114 | out_unlock_dir: |
|---|
| 2971 | 3115 | inode_unlock(dir); |
|---|
| 3116 | +free_subvol_name: |
|---|
| 3117 | + kfree(subvol_name_ptr); |
|---|
| 3118 | +free_parent: |
|---|
| 3119 | + if (destroy_parent) |
|---|
| 3120 | + dput(parent); |
|---|
| 2972 | 3121 | out_drop_write: |
|---|
| 2973 | 3122 | mnt_drop_write_file(file); |
|---|
| 2974 | 3123 | out: |
|---|
| 3124 | + kfree(vol_args2); |
|---|
| 2975 | 3125 | kfree(vol_args); |
|---|
| 2976 | 3126 | return err; |
|---|
| 2977 | 3127 | } |
|---|
| .. | .. |
|---|
| 3056 | 3206 | if (!capable(CAP_SYS_ADMIN)) |
|---|
| 3057 | 3207 | return -EPERM; |
|---|
| 3058 | 3208 | |
|---|
| 3059 | | - if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) |
|---|
| 3209 | + if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_DEV_ADD)) |
|---|
| 3060 | 3210 | return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; |
|---|
| 3061 | 3211 | |
|---|
| 3062 | 3212 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
|---|
| .. | .. |
|---|
| 3073 | 3223 | |
|---|
| 3074 | 3224 | kfree(vol_args); |
|---|
| 3075 | 3225 | out: |
|---|
| 3076 | | - clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags); |
|---|
| 3226 | + btrfs_exclop_finish(fs_info); |
|---|
| 3077 | 3227 | return ret; |
|---|
| 3078 | 3228 | } |
|---|
| 3079 | 3229 | |
|---|
| .. | .. |
|---|
| 3097 | 3247 | goto err_drop; |
|---|
| 3098 | 3248 | } |
|---|
| 3099 | 3249 | |
|---|
| 3100 | | - /* Check for compatibility reject unknown flags */ |
|---|
| 3101 | | - if (vol_args->flags & ~BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED) { |
|---|
| 3250 | + if (vol_args->flags & ~BTRFS_DEVICE_REMOVE_ARGS_MASK) { |
|---|
| 3102 | 3251 | ret = -EOPNOTSUPP; |
|---|
| 3103 | 3252 | goto out; |
|---|
| 3104 | 3253 | } |
|---|
| 3105 | 3254 | |
|---|
| 3106 | | - if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) { |
|---|
| 3255 | + if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_DEV_REMOVE)) { |
|---|
| 3107 | 3256 | ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; |
|---|
| 3108 | 3257 | goto out; |
|---|
| 3109 | 3258 | } |
|---|
| .. | .. |
|---|
| 3114 | 3263 | vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; |
|---|
| 3115 | 3264 | ret = btrfs_rm_device(fs_info, vol_args->name, 0); |
|---|
| 3116 | 3265 | } |
|---|
| 3117 | | - clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags); |
|---|
| 3266 | + btrfs_exclop_finish(fs_info); |
|---|
| 3118 | 3267 | |
|---|
| 3119 | 3268 | if (!ret) { |
|---|
| 3120 | 3269 | if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) |
|---|
| .. | .. |
|---|
| 3145 | 3294 | if (ret) |
|---|
| 3146 | 3295 | return ret; |
|---|
| 3147 | 3296 | |
|---|
| 3148 | | - if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) { |
|---|
| 3297 | + if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_DEV_REMOVE)) { |
|---|
| 3149 | 3298 | ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; |
|---|
| 3150 | 3299 | goto out_drop_write; |
|---|
| 3151 | 3300 | } |
|---|
| .. | .. |
|---|
| 3163 | 3312 | btrfs_info(fs_info, "disk deleted %s", vol_args->name); |
|---|
| 3164 | 3313 | kfree(vol_args); |
|---|
| 3165 | 3314 | out: |
|---|
| 3166 | | - clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags); |
|---|
| 3315 | + btrfs_exclop_finish(fs_info); |
|---|
| 3167 | 3316 | out_drop_write: |
|---|
| 3168 | 3317 | mnt_drop_write_file(file); |
|---|
| 3169 | 3318 | |
|---|
| .. | .. |
|---|
| 3176 | 3325 | struct btrfs_ioctl_fs_info_args *fi_args; |
|---|
| 3177 | 3326 | struct btrfs_device *device; |
|---|
| 3178 | 3327 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; |
|---|
| 3328 | + u64 flags_in; |
|---|
| 3179 | 3329 | int ret = 0; |
|---|
| 3180 | 3330 | |
|---|
| 3181 | | - fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL); |
|---|
| 3182 | | - if (!fi_args) |
|---|
| 3183 | | - return -ENOMEM; |
|---|
| 3331 | + fi_args = memdup_user(arg, sizeof(*fi_args)); |
|---|
| 3332 | + if (IS_ERR(fi_args)) |
|---|
| 3333 | + return PTR_ERR(fi_args); |
|---|
| 3334 | + |
|---|
| 3335 | + flags_in = fi_args->flags; |
|---|
| 3336 | + memset(fi_args, 0, sizeof(*fi_args)); |
|---|
| 3184 | 3337 | |
|---|
| 3185 | 3338 | rcu_read_lock(); |
|---|
| 3186 | 3339 | fi_args->num_devices = fs_devices->num_devices; |
|---|
| .. | .. |
|---|
| 3191 | 3344 | } |
|---|
| 3192 | 3345 | rcu_read_unlock(); |
|---|
| 3193 | 3346 | |
|---|
| 3194 | | - memcpy(&fi_args->fsid, fs_info->fsid, sizeof(fi_args->fsid)); |
|---|
| 3347 | + memcpy(&fi_args->fsid, fs_devices->fsid, sizeof(fi_args->fsid)); |
|---|
| 3195 | 3348 | fi_args->nodesize = fs_info->nodesize; |
|---|
| 3196 | 3349 | fi_args->sectorsize = fs_info->sectorsize; |
|---|
| 3197 | 3350 | fi_args->clone_alignment = fs_info->sectorsize; |
|---|
| 3351 | + |
|---|
| 3352 | + if (flags_in & BTRFS_FS_INFO_FLAG_CSUM_INFO) { |
|---|
| 3353 | + fi_args->csum_type = btrfs_super_csum_type(fs_info->super_copy); |
|---|
| 3354 | + fi_args->csum_size = btrfs_super_csum_size(fs_info->super_copy); |
|---|
| 3355 | + fi_args->flags |= BTRFS_FS_INFO_FLAG_CSUM_INFO; |
|---|
| 3356 | + } |
|---|
| 3357 | + |
|---|
| 3358 | + if (flags_in & BTRFS_FS_INFO_FLAG_GENERATION) { |
|---|
| 3359 | + fi_args->generation = fs_info->generation; |
|---|
| 3360 | + fi_args->flags |= BTRFS_FS_INFO_FLAG_GENERATION; |
|---|
| 3361 | + } |
|---|
| 3362 | + |
|---|
| 3363 | + if (flags_in & BTRFS_FS_INFO_FLAG_METADATA_UUID) { |
|---|
| 3364 | + memcpy(&fi_args->metadata_uuid, fs_devices->metadata_uuid, |
|---|
| 3365 | + sizeof(fi_args->metadata_uuid)); |
|---|
| 3366 | + fi_args->flags |= BTRFS_FS_INFO_FLAG_METADATA_UUID; |
|---|
| 3367 | + } |
|---|
| 3198 | 3368 | |
|---|
| 3199 | 3369 | if (copy_to_user(arg, fi_args, sizeof(*fi_args))) |
|---|
| 3200 | 3370 | ret = -EFAULT; |
|---|
| .. | .. |
|---|
| 3248 | 3418 | return ret; |
|---|
| 3249 | 3419 | } |
|---|
| 3250 | 3420 | |
|---|
| 3251 | | -static struct page *extent_same_get_page(struct inode *inode, pgoff_t index) |
|---|
| 3252 | | -{ |
|---|
| 3253 | | - struct page *page; |
|---|
| 3254 | | - |
|---|
| 3255 | | - page = grab_cache_page(inode->i_mapping, index); |
|---|
| 3256 | | - if (!page) |
|---|
| 3257 | | - return ERR_PTR(-ENOMEM); |
|---|
| 3258 | | - |
|---|
| 3259 | | - if (!PageUptodate(page)) { |
|---|
| 3260 | | - int ret; |
|---|
| 3261 | | - |
|---|
| 3262 | | - ret = btrfs_readpage(NULL, page); |
|---|
| 3263 | | - if (ret) |
|---|
| 3264 | | - return ERR_PTR(ret); |
|---|
| 3265 | | - lock_page(page); |
|---|
| 3266 | | - if (!PageUptodate(page)) { |
|---|
| 3267 | | - unlock_page(page); |
|---|
| 3268 | | - put_page(page); |
|---|
| 3269 | | - return ERR_PTR(-EIO); |
|---|
| 3270 | | - } |
|---|
| 3271 | | - if (page->mapping != inode->i_mapping) { |
|---|
| 3272 | | - unlock_page(page); |
|---|
| 3273 | | - put_page(page); |
|---|
| 3274 | | - return ERR_PTR(-EAGAIN); |
|---|
| 3275 | | - } |
|---|
| 3276 | | - } |
|---|
| 3277 | | - |
|---|
| 3278 | | - return page; |
|---|
| 3279 | | -} |
|---|
| 3280 | | - |
|---|
| 3281 | | -static int gather_extent_pages(struct inode *inode, struct page **pages, |
|---|
| 3282 | | - int num_pages, u64 off) |
|---|
| 3283 | | -{ |
|---|
| 3284 | | - int i; |
|---|
| 3285 | | - pgoff_t index = off >> PAGE_SHIFT; |
|---|
| 3286 | | - |
|---|
| 3287 | | - for (i = 0; i < num_pages; i++) { |
|---|
| 3288 | | -again: |
|---|
| 3289 | | - pages[i] = extent_same_get_page(inode, index + i); |
|---|
| 3290 | | - if (IS_ERR(pages[i])) { |
|---|
| 3291 | | - int err = PTR_ERR(pages[i]); |
|---|
| 3292 | | - |
|---|
| 3293 | | - if (err == -EAGAIN) |
|---|
| 3294 | | - goto again; |
|---|
| 3295 | | - pages[i] = NULL; |
|---|
| 3296 | | - return err; |
|---|
| 3297 | | - } |
|---|
| 3298 | | - } |
|---|
| 3299 | | - return 0; |
|---|
| 3300 | | -} |
|---|
| 3301 | | - |
|---|
| 3302 | | -static int lock_extent_range(struct inode *inode, u64 off, u64 len, |
|---|
| 3303 | | - bool retry_range_locking) |
|---|
| 3304 | | -{ |
|---|
| 3305 | | - /* |
|---|
| 3306 | | - * Do any pending delalloc/csum calculations on inode, one way or |
|---|
| 3307 | | - * another, and lock file content. |
|---|
| 3308 | | - * The locking order is: |
|---|
| 3309 | | - * |
|---|
| 3310 | | - * 1) pages |
|---|
| 3311 | | - * 2) range in the inode's io tree |
|---|
| 3312 | | - */ |
|---|
| 3313 | | - while (1) { |
|---|
| 3314 | | - struct btrfs_ordered_extent *ordered; |
|---|
| 3315 | | - lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); |
|---|
| 3316 | | - ordered = btrfs_lookup_first_ordered_extent(inode, |
|---|
| 3317 | | - off + len - 1); |
|---|
| 3318 | | - if ((!ordered || |
|---|
| 3319 | | - ordered->file_offset + ordered->len <= off || |
|---|
| 3320 | | - ordered->file_offset >= off + len) && |
|---|
| 3321 | | - !test_range_bit(&BTRFS_I(inode)->io_tree, off, |
|---|
| 3322 | | - off + len - 1, EXTENT_DELALLOC, 0, NULL)) { |
|---|
| 3323 | | - if (ordered) |
|---|
| 3324 | | - btrfs_put_ordered_extent(ordered); |
|---|
| 3325 | | - break; |
|---|
| 3326 | | - } |
|---|
| 3327 | | - unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); |
|---|
| 3328 | | - if (ordered) |
|---|
| 3329 | | - btrfs_put_ordered_extent(ordered); |
|---|
| 3330 | | - if (!retry_range_locking) |
|---|
| 3331 | | - return -EAGAIN; |
|---|
| 3332 | | - btrfs_wait_ordered_range(inode, off, len); |
|---|
| 3333 | | - } |
|---|
| 3334 | | - return 0; |
|---|
| 3335 | | -} |
|---|
| 3336 | | - |
|---|
| 3337 | | -static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2) |
|---|
| 3338 | | -{ |
|---|
| 3339 | | - inode_unlock(inode1); |
|---|
| 3340 | | - inode_unlock(inode2); |
|---|
| 3341 | | -} |
|---|
| 3342 | | - |
|---|
| 3343 | | -static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2) |
|---|
| 3344 | | -{ |
|---|
| 3345 | | - if (inode1 < inode2) |
|---|
| 3346 | | - swap(inode1, inode2); |
|---|
| 3347 | | - |
|---|
| 3348 | | - inode_lock_nested(inode1, I_MUTEX_PARENT); |
|---|
| 3349 | | - inode_lock_nested(inode2, I_MUTEX_CHILD); |
|---|
| 3350 | | -} |
|---|
| 3351 | | - |
|---|
| 3352 | | -static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1, |
|---|
| 3353 | | - struct inode *inode2, u64 loff2, u64 len) |
|---|
| 3354 | | -{ |
|---|
| 3355 | | - unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); |
|---|
| 3356 | | - unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); |
|---|
| 3357 | | -} |
|---|
| 3358 | | - |
|---|
| 3359 | | -static int btrfs_double_extent_lock(struct inode *inode1, u64 loff1, |
|---|
| 3360 | | - struct inode *inode2, u64 loff2, u64 len, |
|---|
| 3361 | | - bool retry_range_locking) |
|---|
| 3362 | | -{ |
|---|
| 3363 | | - int ret; |
|---|
| 3364 | | - |
|---|
| 3365 | | - if (inode1 < inode2) { |
|---|
| 3366 | | - swap(inode1, inode2); |
|---|
| 3367 | | - swap(loff1, loff2); |
|---|
| 3368 | | - } |
|---|
| 3369 | | - ret = lock_extent_range(inode1, loff1, len, retry_range_locking); |
|---|
| 3370 | | - if (ret) |
|---|
| 3371 | | - return ret; |
|---|
| 3372 | | - ret = lock_extent_range(inode2, loff2, len, retry_range_locking); |
|---|
| 3373 | | - if (ret) |
|---|
| 3374 | | - unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, |
|---|
| 3375 | | - loff1 + len - 1); |
|---|
| 3376 | | - return ret; |
|---|
| 3377 | | -} |
|---|
| 3378 | | - |
|---|
| 3379 | | -struct cmp_pages { |
|---|
| 3380 | | - int num_pages; |
|---|
| 3381 | | - struct page **src_pages; |
|---|
| 3382 | | - struct page **dst_pages; |
|---|
| 3383 | | -}; |
|---|
| 3384 | | - |
|---|
| 3385 | | -static void btrfs_cmp_data_free(struct cmp_pages *cmp) |
|---|
| 3386 | | -{ |
|---|
| 3387 | | - int i; |
|---|
| 3388 | | - struct page *pg; |
|---|
| 3389 | | - |
|---|
| 3390 | | - for (i = 0; i < cmp->num_pages; i++) { |
|---|
| 3391 | | - pg = cmp->src_pages[i]; |
|---|
| 3392 | | - if (pg) { |
|---|
| 3393 | | - unlock_page(pg); |
|---|
| 3394 | | - put_page(pg); |
|---|
| 3395 | | - cmp->src_pages[i] = NULL; |
|---|
| 3396 | | - } |
|---|
| 3397 | | - pg = cmp->dst_pages[i]; |
|---|
| 3398 | | - if (pg) { |
|---|
| 3399 | | - unlock_page(pg); |
|---|
| 3400 | | - put_page(pg); |
|---|
| 3401 | | - cmp->dst_pages[i] = NULL; |
|---|
| 3402 | | - } |
|---|
| 3403 | | - } |
|---|
| 3404 | | -} |
|---|
| 3405 | | - |
|---|
| 3406 | | -static int btrfs_cmp_data_prepare(struct inode *src, u64 loff, |
|---|
| 3407 | | - struct inode *dst, u64 dst_loff, |
|---|
| 3408 | | - u64 len, struct cmp_pages *cmp) |
|---|
| 3409 | | -{ |
|---|
| 3410 | | - int ret; |
|---|
| 3411 | | - int num_pages = PAGE_ALIGN(len) >> PAGE_SHIFT; |
|---|
| 3412 | | - |
|---|
| 3413 | | - cmp->num_pages = num_pages; |
|---|
| 3414 | | - |
|---|
| 3415 | | - ret = gather_extent_pages(src, cmp->src_pages, num_pages, loff); |
|---|
| 3416 | | - if (ret) |
|---|
| 3417 | | - goto out; |
|---|
| 3418 | | - |
|---|
| 3419 | | - ret = gather_extent_pages(dst, cmp->dst_pages, num_pages, dst_loff); |
|---|
| 3420 | | - |
|---|
| 3421 | | -out: |
|---|
| 3422 | | - if (ret) |
|---|
| 3423 | | - btrfs_cmp_data_free(cmp); |
|---|
| 3424 | | - return ret; |
|---|
| 3425 | | -} |
|---|
| 3426 | | - |
|---|
| 3427 | | -static int btrfs_cmp_data(u64 len, struct cmp_pages *cmp) |
|---|
| 3428 | | -{ |
|---|
| 3429 | | - int ret = 0; |
|---|
| 3430 | | - int i; |
|---|
| 3431 | | - struct page *src_page, *dst_page; |
|---|
| 3432 | | - unsigned int cmp_len = PAGE_SIZE; |
|---|
| 3433 | | - void *addr, *dst_addr; |
|---|
| 3434 | | - |
|---|
| 3435 | | - i = 0; |
|---|
| 3436 | | - while (len) { |
|---|
| 3437 | | - if (len < PAGE_SIZE) |
|---|
| 3438 | | - cmp_len = len; |
|---|
| 3439 | | - |
|---|
| 3440 | | - BUG_ON(i >= cmp->num_pages); |
|---|
| 3441 | | - |
|---|
| 3442 | | - src_page = cmp->src_pages[i]; |
|---|
| 3443 | | - dst_page = cmp->dst_pages[i]; |
|---|
| 3444 | | - ASSERT(PageLocked(src_page)); |
|---|
| 3445 | | - ASSERT(PageLocked(dst_page)); |
|---|
| 3446 | | - |
|---|
| 3447 | | - addr = kmap_atomic(src_page); |
|---|
| 3448 | | - dst_addr = kmap_atomic(dst_page); |
|---|
| 3449 | | - |
|---|
| 3450 | | - flush_dcache_page(src_page); |
|---|
| 3451 | | - flush_dcache_page(dst_page); |
|---|
| 3452 | | - |
|---|
| 3453 | | - if (memcmp(addr, dst_addr, cmp_len)) |
|---|
| 3454 | | - ret = -EBADE; |
|---|
| 3455 | | - |
|---|
| 3456 | | - kunmap_atomic(addr); |
|---|
| 3457 | | - kunmap_atomic(dst_addr); |
|---|
| 3458 | | - |
|---|
| 3459 | | - if (ret) |
|---|
| 3460 | | - break; |
|---|
| 3461 | | - |
|---|
| 3462 | | - len -= cmp_len; |
|---|
| 3463 | | - i++; |
|---|
| 3464 | | - } |
|---|
| 3465 | | - |
|---|
| 3466 | | - return ret; |
|---|
| 3467 | | -} |
|---|
| 3468 | | - |
|---|
| 3469 | | -static int extent_same_check_offsets(struct inode *inode, u64 off, u64 *plen, |
|---|
| 3470 | | - u64 olen) |
|---|
| 3471 | | -{ |
|---|
| 3472 | | - u64 len = *plen; |
|---|
| 3473 | | - u64 bs = BTRFS_I(inode)->root->fs_info->sb->s_blocksize; |
|---|
| 3474 | | - |
|---|
| 3475 | | - if (off + olen > inode->i_size || off + olen < off) |
|---|
| 3476 | | - return -EINVAL; |
|---|
| 3477 | | - |
|---|
| 3478 | | - /* if we extend to eof, continue to block boundary */ |
|---|
| 3479 | | - if (off + len == inode->i_size) |
|---|
| 3480 | | - *plen = len = ALIGN(inode->i_size, bs) - off; |
|---|
| 3481 | | - |
|---|
| 3482 | | - /* Check that we are block aligned - btrfs_clone() requires this */ |
|---|
| 3483 | | - if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs)) |
|---|
| 3484 | | - return -EINVAL; |
|---|
| 3485 | | - |
|---|
| 3486 | | - return 0; |
|---|
| 3487 | | -} |
|---|
| 3488 | | - |
|---|
| 3489 | | -static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen, |
|---|
| 3490 | | - struct inode *dst, u64 dst_loff, |
|---|
| 3491 | | - struct cmp_pages *cmp) |
|---|
| 3492 | | -{ |
|---|
| 3493 | | - int ret; |
|---|
| 3494 | | - u64 len = olen; |
|---|
| 3495 | | - bool same_inode = (src == dst); |
|---|
| 3496 | | - u64 same_lock_start = 0; |
|---|
| 3497 | | - u64 same_lock_len = 0; |
|---|
| 3498 | | - |
|---|
| 3499 | | - ret = extent_same_check_offsets(src, loff, &len, olen); |
|---|
| 3500 | | - if (ret) |
|---|
| 3501 | | - return ret; |
|---|
| 3502 | | - |
|---|
| 3503 | | - ret = extent_same_check_offsets(dst, dst_loff, &len, olen); |
|---|
| 3504 | | - if (ret) |
|---|
| 3505 | | - return ret; |
|---|
| 3506 | | - |
|---|
| 3507 | | - if (same_inode) { |
|---|
| 3508 | | - /* |
|---|
| 3509 | | - * Single inode case wants the same checks, except we |
|---|
| 3510 | | - * don't want our length pushed out past i_size as |
|---|
| 3511 | | - * comparing that data range makes no sense. |
|---|
| 3512 | | - * |
|---|
| 3513 | | - * extent_same_check_offsets() will do this for an |
|---|
| 3514 | | - * unaligned length at i_size, so catch it here and |
|---|
| 3515 | | - * reject the request. |
|---|
| 3516 | | - * |
|---|
| 3517 | | - * This effectively means we require aligned extents |
|---|
| 3518 | | - * for the single-inode case, whereas the other cases |
|---|
| 3519 | | - * allow an unaligned length so long as it ends at |
|---|
| 3520 | | - * i_size. |
|---|
| 3521 | | - */ |
|---|
| 3522 | | - if (len != olen) |
|---|
| 3523 | | - return -EINVAL; |
|---|
| 3524 | | - |
|---|
| 3525 | | - /* Check for overlapping ranges */ |
|---|
| 3526 | | - if (dst_loff + len > loff && dst_loff < loff + len) |
|---|
| 3527 | | - return -EINVAL; |
|---|
| 3528 | | - |
|---|
| 3529 | | - same_lock_start = min_t(u64, loff, dst_loff); |
|---|
| 3530 | | - same_lock_len = max_t(u64, loff, dst_loff) + len - same_lock_start; |
|---|
| 3531 | | - } else { |
|---|
| 3532 | | - /* |
|---|
| 3533 | | - * If the source and destination inodes are different, the |
|---|
| 3534 | | - * source's range end offset matches the source's i_size, that |
|---|
| 3535 | | - * i_size is not a multiple of the sector size, and the |
|---|
| 3536 | | - * destination range does not go past the destination's i_size, |
|---|
| 3537 | | - * we must round down the length to the nearest sector size |
|---|
| 3538 | | - * multiple. If we don't do this adjustment we end replacing |
|---|
| 3539 | | - * with zeroes the bytes in the range that starts at the |
|---|
| 3540 | | - * deduplication range's end offset and ends at the next sector |
|---|
| 3541 | | - * size multiple. |
|---|
| 3542 | | - */ |
|---|
| 3543 | | - if (loff + olen == i_size_read(src) && |
|---|
| 3544 | | - dst_loff + len < i_size_read(dst)) { |
|---|
| 3545 | | - const u64 sz = BTRFS_I(src)->root->fs_info->sectorsize; |
|---|
| 3546 | | - |
|---|
| 3547 | | - len = round_down(i_size_read(src), sz) - loff; |
|---|
| 3548 | | - if (len == 0) |
|---|
| 3549 | | - return 0; |
|---|
| 3550 | | - olen = len; |
|---|
| 3551 | | - } |
|---|
| 3552 | | - } |
|---|
| 3553 | | - |
|---|
| 3554 | | -again: |
|---|
| 3555 | | - ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, cmp); |
|---|
| 3556 | | - if (ret) |
|---|
| 3557 | | - return ret; |
|---|
| 3558 | | - |
|---|
| 3559 | | - if (same_inode) |
|---|
| 3560 | | - ret = lock_extent_range(src, same_lock_start, same_lock_len, |
|---|
| 3561 | | - false); |
|---|
| 3562 | | - else |
|---|
| 3563 | | - ret = btrfs_double_extent_lock(src, loff, dst, dst_loff, len, |
|---|
| 3564 | | - false); |
|---|
| 3565 | | - /* |
|---|
| 3566 | | - * If one of the inodes has dirty pages in the respective range or |
|---|
| 3567 | | - * ordered extents, we need to flush dellaloc and wait for all ordered |
|---|
| 3568 | | - * extents in the range. We must unlock the pages and the ranges in the |
|---|
| 3569 | | - * io trees to avoid deadlocks when flushing delalloc (requires locking |
|---|
| 3570 | | - * pages) and when waiting for ordered extents to complete (they require |
|---|
| 3571 | | - * range locking). |
|---|
| 3572 | | - */ |
|---|
| 3573 | | - if (ret == -EAGAIN) { |
|---|
| 3574 | | - /* |
|---|
| 3575 | | - * Ranges in the io trees already unlocked. Now unlock all |
|---|
| 3576 | | - * pages before waiting for all IO to complete. |
|---|
| 3577 | | - */ |
|---|
| 3578 | | - btrfs_cmp_data_free(cmp); |
|---|
| 3579 | | - if (same_inode) { |
|---|
| 3580 | | - btrfs_wait_ordered_range(src, same_lock_start, |
|---|
| 3581 | | - same_lock_len); |
|---|
| 3582 | | - } else { |
|---|
| 3583 | | - btrfs_wait_ordered_range(src, loff, len); |
|---|
| 3584 | | - btrfs_wait_ordered_range(dst, dst_loff, len); |
|---|
| 3585 | | - } |
|---|
| 3586 | | - goto again; |
|---|
| 3587 | | - } |
|---|
| 3588 | | - ASSERT(ret == 0); |
|---|
| 3589 | | - if (WARN_ON(ret)) { |
|---|
| 3590 | | - /* ranges in the io trees already unlocked */ |
|---|
| 3591 | | - btrfs_cmp_data_free(cmp); |
|---|
| 3592 | | - return ret; |
|---|
| 3593 | | - } |
|---|
| 3594 | | - |
|---|
| 3595 | | - /* pass original length for comparison so we stay within i_size */ |
|---|
| 3596 | | - ret = btrfs_cmp_data(olen, cmp); |
|---|
| 3597 | | - if (ret == 0) |
|---|
| 3598 | | - ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1); |
|---|
| 3599 | | - |
|---|
| 3600 | | - if (same_inode) |
|---|
| 3601 | | - unlock_extent(&BTRFS_I(src)->io_tree, same_lock_start, |
|---|
| 3602 | | - same_lock_start + same_lock_len - 1); |
|---|
| 3603 | | - else |
|---|
| 3604 | | - btrfs_double_extent_unlock(src, loff, dst, dst_loff, len); |
|---|
| 3605 | | - |
|---|
| 3606 | | - btrfs_cmp_data_free(cmp); |
|---|
| 3607 | | - |
|---|
| 3608 | | - return ret; |
|---|
| 3609 | | -} |
|---|
| 3610 | | - |
|---|
| 3611 | | -#define BTRFS_MAX_DEDUPE_LEN SZ_16M |
|---|
| 3612 | | - |
|---|
| 3613 | | -static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, |
|---|
| 3614 | | - struct inode *dst, u64 dst_loff) |
|---|
| 3615 | | -{ |
|---|
| 3616 | | - int ret; |
|---|
| 3617 | | - struct cmp_pages cmp; |
|---|
| 3618 | | - int num_pages = PAGE_ALIGN(BTRFS_MAX_DEDUPE_LEN) >> PAGE_SHIFT; |
|---|
| 3619 | | - bool same_inode = (src == dst); |
|---|
| 3620 | | - u64 i, tail_len, chunk_count; |
|---|
| 3621 | | - |
|---|
| 3622 | | - if (olen == 0) |
|---|
| 3623 | | - return 0; |
|---|
| 3624 | | - |
|---|
| 3625 | | - if (same_inode) |
|---|
| 3626 | | - inode_lock(src); |
|---|
| 3627 | | - else |
|---|
| 3628 | | - btrfs_double_inode_lock(src, dst); |
|---|
| 3629 | | - |
|---|
| 3630 | | - /* don't make the dst file partly checksummed */ |
|---|
| 3631 | | - if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != |
|---|
| 3632 | | - (BTRFS_I(dst)->flags & BTRFS_INODE_NODATASUM)) { |
|---|
| 3633 | | - ret = -EINVAL; |
|---|
| 3634 | | - goto out_unlock; |
|---|
| 3635 | | - } |
|---|
| 3636 | | - |
|---|
| 3637 | | - tail_len = olen % BTRFS_MAX_DEDUPE_LEN; |
|---|
| 3638 | | - chunk_count = div_u64(olen, BTRFS_MAX_DEDUPE_LEN); |
|---|
| 3639 | | - if (chunk_count == 0) |
|---|
| 3640 | | - num_pages = PAGE_ALIGN(tail_len) >> PAGE_SHIFT; |
|---|
| 3641 | | - |
|---|
| 3642 | | - /* |
|---|
| 3643 | | - * If deduping ranges in the same inode, locking rules make it |
|---|
| 3644 | | - * mandatory to always lock pages in ascending order to avoid deadlocks |
|---|
| 3645 | | - * with concurrent tasks (such as starting writeback/delalloc). |
|---|
| 3646 | | - */ |
|---|
| 3647 | | - if (same_inode && dst_loff < loff) |
|---|
| 3648 | | - swap(loff, dst_loff); |
|---|
| 3649 | | - |
|---|
| 3650 | | - /* |
|---|
| 3651 | | - * We must gather up all the pages before we initiate our extent |
|---|
| 3652 | | - * locking. We use an array for the page pointers. Size of the array is |
|---|
| 3653 | | - * bounded by len, which is in turn bounded by BTRFS_MAX_DEDUPE_LEN. |
|---|
| 3654 | | - */ |
|---|
| 3655 | | - cmp.src_pages = kvmalloc_array(num_pages, sizeof(struct page *), |
|---|
| 3656 | | - GFP_KERNEL | __GFP_ZERO); |
|---|
| 3657 | | - cmp.dst_pages = kvmalloc_array(num_pages, sizeof(struct page *), |
|---|
| 3658 | | - GFP_KERNEL | __GFP_ZERO); |
|---|
| 3659 | | - if (!cmp.src_pages || !cmp.dst_pages) { |
|---|
| 3660 | | - ret = -ENOMEM; |
|---|
| 3661 | | - goto out_free; |
|---|
| 3662 | | - } |
|---|
| 3663 | | - |
|---|
| 3664 | | - for (i = 0; i < chunk_count; i++) { |
|---|
| 3665 | | - ret = btrfs_extent_same_range(src, loff, BTRFS_MAX_DEDUPE_LEN, |
|---|
| 3666 | | - dst, dst_loff, &cmp); |
|---|
| 3667 | | - if (ret) |
|---|
| 3668 | | - goto out_free; |
|---|
| 3669 | | - |
|---|
| 3670 | | - loff += BTRFS_MAX_DEDUPE_LEN; |
|---|
| 3671 | | - dst_loff += BTRFS_MAX_DEDUPE_LEN; |
|---|
| 3672 | | - } |
|---|
| 3673 | | - |
|---|
| 3674 | | - if (tail_len > 0) |
|---|
| 3675 | | - ret = btrfs_extent_same_range(src, loff, tail_len, dst, |
|---|
| 3676 | | - dst_loff, &cmp); |
|---|
| 3677 | | - |
|---|
| 3678 | | -out_free: |
|---|
| 3679 | | - kvfree(cmp.src_pages); |
|---|
| 3680 | | - kvfree(cmp.dst_pages); |
|---|
| 3681 | | - |
|---|
| 3682 | | -out_unlock: |
|---|
| 3683 | | - if (same_inode) |
|---|
| 3684 | | - inode_unlock(src); |
|---|
| 3685 | | - else |
|---|
| 3686 | | - btrfs_double_inode_unlock(src, dst); |
|---|
| 3687 | | - |
|---|
| 3688 | | - return ret; |
|---|
| 3689 | | -} |
|---|
| 3690 | | - |
|---|
| 3691 | | -int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff, |
|---|
| 3692 | | - struct file *dst_file, loff_t dst_loff, |
|---|
| 3693 | | - u64 olen) |
|---|
| 3694 | | -{ |
|---|
| 3695 | | - struct inode *src = file_inode(src_file); |
|---|
| 3696 | | - struct inode *dst = file_inode(dst_file); |
|---|
| 3697 | | - u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; |
|---|
| 3698 | | - |
|---|
| 3699 | | - if (WARN_ON_ONCE(bs < PAGE_SIZE)) { |
|---|
| 3700 | | - /* |
|---|
| 3701 | | - * Btrfs does not support blocksize < page_size. As a |
|---|
| 3702 | | - * result, btrfs_cmp_data() won't correctly handle |
|---|
| 3703 | | - * this situation without an update. |
|---|
| 3704 | | - */ |
|---|
| 3705 | | - return -EINVAL; |
|---|
| 3706 | | - } |
|---|
| 3707 | | - |
|---|
| 3708 | | - return btrfs_extent_same(src, src_loff, olen, dst, dst_loff); |
|---|
| 3709 | | -} |
|---|
| 3710 | | - |
|---|
| 3711 | | -static int clone_finish_inode_update(struct btrfs_trans_handle *trans, |
|---|
| 3712 | | - struct inode *inode, |
|---|
| 3713 | | - u64 endoff, |
|---|
| 3714 | | - const u64 destoff, |
|---|
| 3715 | | - const u64 olen, |
|---|
| 3716 | | - int no_time_update) |
|---|
| 3717 | | -{ |
|---|
| 3718 | | - struct btrfs_root *root = BTRFS_I(inode)->root; |
|---|
| 3719 | | - int ret; |
|---|
| 3720 | | - |
|---|
| 3721 | | - inode_inc_iversion(inode); |
|---|
| 3722 | | - if (!no_time_update) |
|---|
| 3723 | | - inode->i_mtime = inode->i_ctime = current_time(inode); |
|---|
| 3724 | | - /* |
|---|
| 3725 | | - * We round up to the block size at eof when determining which |
|---|
| 3726 | | - * extents to clone above, but shouldn't round up the file size. |
|---|
| 3727 | | - */ |
|---|
| 3728 | | - if (endoff > destoff + olen) |
|---|
| 3729 | | - endoff = destoff + olen; |
|---|
| 3730 | | - if (endoff > inode->i_size) |
|---|
| 3731 | | - btrfs_i_size_write(BTRFS_I(inode), endoff); |
|---|
| 3732 | | - |
|---|
| 3733 | | - ret = btrfs_update_inode(trans, root, inode); |
|---|
| 3734 | | - if (ret) { |
|---|
| 3735 | | - btrfs_abort_transaction(trans, ret); |
|---|
| 3736 | | - btrfs_end_transaction(trans); |
|---|
| 3737 | | - goto out; |
|---|
| 3738 | | - } |
|---|
| 3739 | | - ret = btrfs_end_transaction(trans); |
|---|
| 3740 | | -out: |
|---|
| 3741 | | - return ret; |
|---|
| 3742 | | -} |
|---|
| 3743 | | - |
|---|
| 3744 | | -static void clone_update_extent_map(struct btrfs_inode *inode, |
|---|
| 3745 | | - const struct btrfs_trans_handle *trans, |
|---|
| 3746 | | - const struct btrfs_path *path, |
|---|
| 3747 | | - const u64 hole_offset, |
|---|
| 3748 | | - const u64 hole_len) |
|---|
| 3749 | | -{ |
|---|
| 3750 | | - struct extent_map_tree *em_tree = &inode->extent_tree; |
|---|
| 3751 | | - struct extent_map *em; |
|---|
| 3752 | | - int ret; |
|---|
| 3753 | | - |
|---|
| 3754 | | - em = alloc_extent_map(); |
|---|
| 3755 | | - if (!em) { |
|---|
| 3756 | | - set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags); |
|---|
| 3757 | | - return; |
|---|
| 3758 | | - } |
|---|
| 3759 | | - |
|---|
| 3760 | | - if (path) { |
|---|
| 3761 | | - struct btrfs_file_extent_item *fi; |
|---|
| 3762 | | - |
|---|
| 3763 | | - fi = btrfs_item_ptr(path->nodes[0], path->slots[0], |
|---|
| 3764 | | - struct btrfs_file_extent_item); |
|---|
| 3765 | | - btrfs_extent_item_to_extent_map(inode, path, fi, false, em); |
|---|
| 3766 | | - em->generation = -1; |
|---|
| 3767 | | - if (btrfs_file_extent_type(path->nodes[0], fi) == |
|---|
| 3768 | | - BTRFS_FILE_EXTENT_INLINE) |
|---|
| 3769 | | - set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, |
|---|
| 3770 | | - &inode->runtime_flags); |
|---|
| 3771 | | - } else { |
|---|
| 3772 | | - em->start = hole_offset; |
|---|
| 3773 | | - em->len = hole_len; |
|---|
| 3774 | | - em->ram_bytes = em->len; |
|---|
| 3775 | | - em->orig_start = hole_offset; |
|---|
| 3776 | | - em->block_start = EXTENT_MAP_HOLE; |
|---|
| 3777 | | - em->block_len = 0; |
|---|
| 3778 | | - em->orig_block_len = 0; |
|---|
| 3779 | | - em->compress_type = BTRFS_COMPRESS_NONE; |
|---|
| 3780 | | - em->generation = trans->transid; |
|---|
| 3781 | | - } |
|---|
| 3782 | | - |
|---|
| 3783 | | - while (1) { |
|---|
| 3784 | | - write_lock(&em_tree->lock); |
|---|
| 3785 | | - ret = add_extent_mapping(em_tree, em, 1); |
|---|
| 3786 | | - write_unlock(&em_tree->lock); |
|---|
| 3787 | | - if (ret != -EEXIST) { |
|---|
| 3788 | | - free_extent_map(em); |
|---|
| 3789 | | - break; |
|---|
| 3790 | | - } |
|---|
| 3791 | | - btrfs_drop_extent_cache(inode, em->start, |
|---|
| 3792 | | - em->start + em->len - 1, 0); |
|---|
| 3793 | | - } |
|---|
| 3794 | | - |
|---|
| 3795 | | - if (ret) |
|---|
| 3796 | | - set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags); |
|---|
| 3797 | | -} |
|---|
| 3798 | | - |
|---|
| 3799 | | -/* |
|---|
| 3800 | | - * Make sure we do not end up inserting an inline extent into a file that has |
|---|
| 3801 | | - * already other (non-inline) extents. If a file has an inline extent it can |
|---|
| 3802 | | - * not have any other extents and the (single) inline extent must start at the |
|---|
| 3803 | | - * file offset 0. Failing to respect these rules will lead to file corruption, |
|---|
| 3804 | | - * resulting in EIO errors on read/write operations, hitting BUG_ON's in mm, etc |
|---|
| 3805 | | - * |
|---|
| 3806 | | - * We can have extents that have been already written to disk or we can have |
|---|
| 3807 | | - * dirty ranges still in delalloc, in which case the extent maps and items are |
|---|
| 3808 | | - * created only when we run delalloc, and the delalloc ranges might fall outside |
|---|
| 3809 | | - * the range we are currently locking in the inode's io tree. So we check the |
|---|
| 3810 | | - * inode's i_size because of that (i_size updates are done while holding the |
|---|
| 3811 | | - * i_mutex, which we are holding here). |
|---|
| 3812 | | - * We also check to see if the inode has a size not greater than "datal" but has |
|---|
| 3813 | | - * extents beyond it, due to an fallocate with FALLOC_FL_KEEP_SIZE (and we are |
|---|
| 3814 | | - * protected against such concurrent fallocate calls by the i_mutex). |
|---|
| 3815 | | - * |
|---|
| 3816 | | - * If the file has no extents but a size greater than datal, do not allow the |
|---|
| 3817 | | - * copy because we would need turn the inline extent into a non-inline one (even |
|---|
| 3818 | | - * with NO_HOLES enabled). If we find our destination inode only has one inline |
|---|
| 3819 | | - * extent, just overwrite it with the source inline extent if its size is less |
|---|
| 3820 | | - * than the source extent's size, or we could copy the source inline extent's |
|---|
| 3821 | | - * data into the destination inode's inline extent if the later is greater then |
|---|
| 3822 | | - * the former. |
|---|
| 3823 | | - */ |
|---|
| 3824 | | -static int clone_copy_inline_extent(struct inode *dst, |
|---|
| 3825 | | - struct btrfs_trans_handle *trans, |
|---|
| 3826 | | - struct btrfs_path *path, |
|---|
| 3827 | | - struct btrfs_key *new_key, |
|---|
| 3828 | | - const u64 drop_start, |
|---|
| 3829 | | - const u64 datal, |
|---|
| 3830 | | - const u64 skip, |
|---|
| 3831 | | - const u64 size, |
|---|
| 3832 | | - char *inline_data) |
|---|
| 3833 | | -{ |
|---|
| 3834 | | - struct btrfs_fs_info *fs_info = btrfs_sb(dst->i_sb); |
|---|
| 3835 | | - struct btrfs_root *root = BTRFS_I(dst)->root; |
|---|
| 3836 | | - const u64 aligned_end = ALIGN(new_key->offset + datal, |
|---|
| 3837 | | - fs_info->sectorsize); |
|---|
| 3838 | | - int ret; |
|---|
| 3839 | | - struct btrfs_key key; |
|---|
| 3840 | | - |
|---|
| 3841 | | - if (new_key->offset > 0) |
|---|
| 3842 | | - return -EOPNOTSUPP; |
|---|
| 3843 | | - |
|---|
| 3844 | | - key.objectid = btrfs_ino(BTRFS_I(dst)); |
|---|
| 3845 | | - key.type = BTRFS_EXTENT_DATA_KEY; |
|---|
| 3846 | | - key.offset = 0; |
|---|
| 3847 | | - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
|---|
| 3848 | | - if (ret < 0) { |
|---|
| 3849 | | - return ret; |
|---|
| 3850 | | - } else if (ret > 0) { |
|---|
| 3851 | | - if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { |
|---|
| 3852 | | - ret = btrfs_next_leaf(root, path); |
|---|
| 3853 | | - if (ret < 0) |
|---|
| 3854 | | - return ret; |
|---|
| 3855 | | - else if (ret > 0) |
|---|
| 3856 | | - goto copy_inline_extent; |
|---|
| 3857 | | - } |
|---|
| 3858 | | - btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); |
|---|
| 3859 | | - if (key.objectid == btrfs_ino(BTRFS_I(dst)) && |
|---|
| 3860 | | - key.type == BTRFS_EXTENT_DATA_KEY) { |
|---|
| 3861 | | - ASSERT(key.offset > 0); |
|---|
| 3862 | | - return -EOPNOTSUPP; |
|---|
| 3863 | | - } |
|---|
| 3864 | | - } else if (i_size_read(dst) <= datal) { |
|---|
| 3865 | | - struct btrfs_file_extent_item *ei; |
|---|
| 3866 | | - u64 ext_len; |
|---|
| 3867 | | - |
|---|
| 3868 | | - /* |
|---|
| 3869 | | - * If the file size is <= datal, make sure there are no other |
|---|
| 3870 | | - * extents following (can happen do to an fallocate call with |
|---|
| 3871 | | - * the flag FALLOC_FL_KEEP_SIZE). |
|---|
| 3872 | | - */ |
|---|
| 3873 | | - ei = btrfs_item_ptr(path->nodes[0], path->slots[0], |
|---|
| 3874 | | - struct btrfs_file_extent_item); |
|---|
| 3875 | | - /* |
|---|
| 3876 | | - * If it's an inline extent, it can not have other extents |
|---|
| 3877 | | - * following it. |
|---|
| 3878 | | - */ |
|---|
| 3879 | | - if (btrfs_file_extent_type(path->nodes[0], ei) == |
|---|
| 3880 | | - BTRFS_FILE_EXTENT_INLINE) |
|---|
| 3881 | | - goto copy_inline_extent; |
|---|
| 3882 | | - |
|---|
| 3883 | | - ext_len = btrfs_file_extent_num_bytes(path->nodes[0], ei); |
|---|
| 3884 | | - if (ext_len > aligned_end) |
|---|
| 3885 | | - return -EOPNOTSUPP; |
|---|
| 3886 | | - |
|---|
| 3887 | | - ret = btrfs_next_item(root, path); |
|---|
| 3888 | | - if (ret < 0) { |
|---|
| 3889 | | - return ret; |
|---|
| 3890 | | - } else if (ret == 0) { |
|---|
| 3891 | | - btrfs_item_key_to_cpu(path->nodes[0], &key, |
|---|
| 3892 | | - path->slots[0]); |
|---|
| 3893 | | - if (key.objectid == btrfs_ino(BTRFS_I(dst)) && |
|---|
| 3894 | | - key.type == BTRFS_EXTENT_DATA_KEY) |
|---|
| 3895 | | - return -EOPNOTSUPP; |
|---|
| 3896 | | - } |
|---|
| 3897 | | - } |
|---|
| 3898 | | - |
|---|
| 3899 | | -copy_inline_extent: |
|---|
| 3900 | | - /* |
|---|
| 3901 | | - * We have no extent items, or we have an extent at offset 0 which may |
|---|
| 3902 | | - * or may not be inlined. All these cases are dealt the same way. |
|---|
| 3903 | | - */ |
|---|
| 3904 | | - if (i_size_read(dst) > datal) { |
|---|
| 3905 | | - /* |
|---|
| 3906 | | - * If the destination inode has an inline extent... |
|---|
| 3907 | | - * This would require copying the data from the source inline |
|---|
| 3908 | | - * extent into the beginning of the destination's inline extent. |
|---|
| 3909 | | - * But this is really complex, both extents can be compressed |
|---|
| 3910 | | - * or just one of them, which would require decompressing and |
|---|
| 3911 | | - * re-compressing data (which could increase the new compressed |
|---|
| 3912 | | - * size, not allowing the compressed data to fit anymore in an |
|---|
| 3913 | | - * inline extent). |
|---|
| 3914 | | - * So just don't support this case for now (it should be rare, |
|---|
| 3915 | | - * we are not really saving space when cloning inline extents). |
|---|
| 3916 | | - */ |
|---|
| 3917 | | - return -EOPNOTSUPP; |
|---|
| 3918 | | - } |
|---|
| 3919 | | - |
|---|
| 3920 | | - btrfs_release_path(path); |
|---|
| 3921 | | - ret = btrfs_drop_extents(trans, root, dst, drop_start, aligned_end, 1); |
|---|
| 3922 | | - if (ret) |
|---|
| 3923 | | - return ret; |
|---|
| 3924 | | - ret = btrfs_insert_empty_item(trans, root, path, new_key, size); |
|---|
| 3925 | | - if (ret) |
|---|
| 3926 | | - return ret; |
|---|
| 3927 | | - |
|---|
| 3928 | | - if (skip) { |
|---|
| 3929 | | - const u32 start = btrfs_file_extent_calc_inline_size(0); |
|---|
| 3930 | | - |
|---|
| 3931 | | - memmove(inline_data + start, inline_data + start + skip, datal); |
|---|
| 3932 | | - } |
|---|
| 3933 | | - |
|---|
| 3934 | | - write_extent_buffer(path->nodes[0], inline_data, |
|---|
| 3935 | | - btrfs_item_ptr_offset(path->nodes[0], |
|---|
| 3936 | | - path->slots[0]), |
|---|
| 3937 | | - size); |
|---|
| 3938 | | - inode_add_bytes(dst, datal); |
|---|
| 3939 | | - |
|---|
| 3940 | | - return 0; |
|---|
| 3941 | | -} |
|---|
| 3942 | | - |
|---|
| 3943 | | -/** |
|---|
| 3944 | | - * btrfs_clone() - clone a range from inode file to another |
|---|
| 3945 | | - * |
|---|
| 3946 | | - * @src: Inode to clone from |
|---|
| 3947 | | - * @inode: Inode to clone to |
|---|
| 3948 | | - * @off: Offset within source to start clone from |
|---|
| 3949 | | - * @olen: Original length, passed by user, of range to clone |
|---|
| 3950 | | - * @olen_aligned: Block-aligned value of olen |
|---|
| 3951 | | - * @destoff: Offset within @inode to start clone |
|---|
| 3952 | | - * @no_time_update: Whether to update mtime/ctime on the target inode |
|---|
| 3953 | | - */ |
|---|
| 3954 | | -static int btrfs_clone(struct inode *src, struct inode *inode, |
|---|
| 3955 | | - const u64 off, const u64 olen, const u64 olen_aligned, |
|---|
| 3956 | | - const u64 destoff, int no_time_update) |
|---|
| 3957 | | -{ |
|---|
| 3958 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
|---|
| 3959 | | - struct btrfs_root *root = BTRFS_I(inode)->root; |
|---|
| 3960 | | - struct btrfs_path *path = NULL; |
|---|
| 3961 | | - struct extent_buffer *leaf; |
|---|
| 3962 | | - struct btrfs_trans_handle *trans; |
|---|
| 3963 | | - char *buf = NULL; |
|---|
| 3964 | | - struct btrfs_key key; |
|---|
| 3965 | | - u32 nritems; |
|---|
| 3966 | | - int slot; |
|---|
| 3967 | | - int ret; |
|---|
| 3968 | | - const u64 len = olen_aligned; |
|---|
| 3969 | | - u64 last_dest_end = destoff; |
|---|
| 3970 | | - |
|---|
| 3971 | | - ret = -ENOMEM; |
|---|
| 3972 | | - buf = kvmalloc(fs_info->nodesize, GFP_KERNEL); |
|---|
| 3973 | | - if (!buf) |
|---|
| 3974 | | - return ret; |
|---|
| 3975 | | - |
|---|
| 3976 | | - path = btrfs_alloc_path(); |
|---|
| 3977 | | - if (!path) { |
|---|
| 3978 | | - kvfree(buf); |
|---|
| 3979 | | - return ret; |
|---|
| 3980 | | - } |
|---|
| 3981 | | - |
|---|
| 3982 | | - path->reada = READA_FORWARD; |
|---|
| 3983 | | - /* clone data */ |
|---|
| 3984 | | - key.objectid = btrfs_ino(BTRFS_I(src)); |
|---|
| 3985 | | - key.type = BTRFS_EXTENT_DATA_KEY; |
|---|
| 3986 | | - key.offset = off; |
|---|
| 3987 | | - |
|---|
| 3988 | | - while (1) { |
|---|
| 3989 | | - u64 next_key_min_offset = key.offset + 1; |
|---|
| 3990 | | - |
|---|
| 3991 | | - /* |
|---|
| 3992 | | - * note the key will change type as we walk through the |
|---|
| 3993 | | - * tree. |
|---|
| 3994 | | - */ |
|---|
| 3995 | | - path->leave_spinning = 1; |
|---|
| 3996 | | - ret = btrfs_search_slot(NULL, BTRFS_I(src)->root, &key, path, |
|---|
| 3997 | | - 0, 0); |
|---|
| 3998 | | - if (ret < 0) |
|---|
| 3999 | | - goto out; |
|---|
| 4000 | | - /* |
|---|
| 4001 | | - * First search, if no extent item that starts at offset off was |
|---|
| 4002 | | - * found but the previous item is an extent item, it's possible |
|---|
| 4003 | | - * it might overlap our target range, therefore process it. |
|---|
| 4004 | | - */ |
|---|
| 4005 | | - if (key.offset == off && ret > 0 && path->slots[0] > 0) { |
|---|
| 4006 | | - btrfs_item_key_to_cpu(path->nodes[0], &key, |
|---|
| 4007 | | - path->slots[0] - 1); |
|---|
| 4008 | | - if (key.type == BTRFS_EXTENT_DATA_KEY) |
|---|
| 4009 | | - path->slots[0]--; |
|---|
| 4010 | | - } |
|---|
| 4011 | | - |
|---|
| 4012 | | - nritems = btrfs_header_nritems(path->nodes[0]); |
|---|
| 4013 | | -process_slot: |
|---|
| 4014 | | - if (path->slots[0] >= nritems) { |
|---|
| 4015 | | - ret = btrfs_next_leaf(BTRFS_I(src)->root, path); |
|---|
| 4016 | | - if (ret < 0) |
|---|
| 4017 | | - goto out; |
|---|
| 4018 | | - if (ret > 0) |
|---|
| 4019 | | - break; |
|---|
| 4020 | | - nritems = btrfs_header_nritems(path->nodes[0]); |
|---|
| 4021 | | - } |
|---|
| 4022 | | - leaf = path->nodes[0]; |
|---|
| 4023 | | - slot = path->slots[0]; |
|---|
| 4024 | | - |
|---|
| 4025 | | - btrfs_item_key_to_cpu(leaf, &key, slot); |
|---|
| 4026 | | - if (key.type > BTRFS_EXTENT_DATA_KEY || |
|---|
| 4027 | | - key.objectid != btrfs_ino(BTRFS_I(src))) |
|---|
| 4028 | | - break; |
|---|
| 4029 | | - |
|---|
| 4030 | | - if (key.type == BTRFS_EXTENT_DATA_KEY) { |
|---|
| 4031 | | - struct btrfs_file_extent_item *extent; |
|---|
| 4032 | | - int type; |
|---|
| 4033 | | - u32 size; |
|---|
| 4034 | | - struct btrfs_key new_key; |
|---|
| 4035 | | - u64 disko = 0, diskl = 0; |
|---|
| 4036 | | - u64 datao = 0, datal = 0; |
|---|
| 4037 | | - u8 comp; |
|---|
| 4038 | | - u64 drop_start; |
|---|
| 4039 | | - |
|---|
| 4040 | | - extent = btrfs_item_ptr(leaf, slot, |
|---|
| 4041 | | - struct btrfs_file_extent_item); |
|---|
| 4042 | | - comp = btrfs_file_extent_compression(leaf, extent); |
|---|
| 4043 | | - type = btrfs_file_extent_type(leaf, extent); |
|---|
| 4044 | | - if (type == BTRFS_FILE_EXTENT_REG || |
|---|
| 4045 | | - type == BTRFS_FILE_EXTENT_PREALLOC) { |
|---|
| 4046 | | - disko = btrfs_file_extent_disk_bytenr(leaf, |
|---|
| 4047 | | - extent); |
|---|
| 4048 | | - diskl = btrfs_file_extent_disk_num_bytes(leaf, |
|---|
| 4049 | | - extent); |
|---|
| 4050 | | - datao = btrfs_file_extent_offset(leaf, extent); |
|---|
| 4051 | | - datal = btrfs_file_extent_num_bytes(leaf, |
|---|
| 4052 | | - extent); |
|---|
| 4053 | | - } else if (type == BTRFS_FILE_EXTENT_INLINE) { |
|---|
| 4054 | | - /* take upper bound, may be compressed */ |
|---|
| 4055 | | - datal = btrfs_file_extent_ram_bytes(leaf, |
|---|
| 4056 | | - extent); |
|---|
| 4057 | | - } |
|---|
| 4058 | | - |
|---|
| 4059 | | - /* |
|---|
| 4060 | | - * The first search might have left us at an extent |
|---|
| 4061 | | - * item that ends before our target range's start, can |
|---|
| 4062 | | - * happen if we have holes and NO_HOLES feature enabled. |
|---|
| 4063 | | - */ |
|---|
| 4064 | | - if (key.offset + datal <= off) { |
|---|
| 4065 | | - path->slots[0]++; |
|---|
| 4066 | | - goto process_slot; |
|---|
| 4067 | | - } else if (key.offset >= off + len) { |
|---|
| 4068 | | - break; |
|---|
| 4069 | | - } |
|---|
| 4070 | | - next_key_min_offset = key.offset + datal; |
|---|
| 4071 | | - size = btrfs_item_size_nr(leaf, slot); |
|---|
| 4072 | | - read_extent_buffer(leaf, buf, |
|---|
| 4073 | | - btrfs_item_ptr_offset(leaf, slot), |
|---|
| 4074 | | - size); |
|---|
| 4075 | | - |
|---|
| 4076 | | - btrfs_release_path(path); |
|---|
| 4077 | | - path->leave_spinning = 0; |
|---|
| 4078 | | - |
|---|
| 4079 | | - memcpy(&new_key, &key, sizeof(new_key)); |
|---|
| 4080 | | - new_key.objectid = btrfs_ino(BTRFS_I(inode)); |
|---|
| 4081 | | - if (off <= key.offset) |
|---|
| 4082 | | - new_key.offset = key.offset + destoff - off; |
|---|
| 4083 | | - else |
|---|
| 4084 | | - new_key.offset = destoff; |
|---|
| 4085 | | - |
|---|
| 4086 | | - /* |
|---|
| 4087 | | - * Deal with a hole that doesn't have an extent item |
|---|
| 4088 | | - * that represents it (NO_HOLES feature enabled). |
|---|
| 4089 | | - * This hole is either in the middle of the cloning |
|---|
| 4090 | | - * range or at the beginning (fully overlaps it or |
|---|
| 4091 | | - * partially overlaps it). |
|---|
| 4092 | | - */ |
|---|
| 4093 | | - if (new_key.offset != last_dest_end) |
|---|
| 4094 | | - drop_start = last_dest_end; |
|---|
| 4095 | | - else |
|---|
| 4096 | | - drop_start = new_key.offset; |
|---|
| 4097 | | - |
|---|
| 4098 | | - /* |
|---|
| 4099 | | - * 1 - adjusting old extent (we may have to split it) |
|---|
| 4100 | | - * 1 - add new extent |
|---|
| 4101 | | - * 1 - inode update |
|---|
| 4102 | | - */ |
|---|
| 4103 | | - trans = btrfs_start_transaction(root, 3); |
|---|
| 4104 | | - if (IS_ERR(trans)) { |
|---|
| 4105 | | - ret = PTR_ERR(trans); |
|---|
| 4106 | | - goto out; |
|---|
| 4107 | | - } |
|---|
| 4108 | | - |
|---|
| 4109 | | - if (type == BTRFS_FILE_EXTENT_REG || |
|---|
| 4110 | | - type == BTRFS_FILE_EXTENT_PREALLOC) { |
|---|
| 4111 | | - /* |
|---|
| 4112 | | - * a | --- range to clone ---| b |
|---|
| 4113 | | - * | ------------- extent ------------- | |
|---|
| 4114 | | - */ |
|---|
| 4115 | | - |
|---|
| 4116 | | - /* subtract range b */ |
|---|
| 4117 | | - if (key.offset + datal > off + len) |
|---|
| 4118 | | - datal = off + len - key.offset; |
|---|
| 4119 | | - |
|---|
| 4120 | | - /* subtract range a */ |
|---|
| 4121 | | - if (off > key.offset) { |
|---|
| 4122 | | - datao += off - key.offset; |
|---|
| 4123 | | - datal -= off - key.offset; |
|---|
| 4124 | | - } |
|---|
| 4125 | | - |
|---|
| 4126 | | - ret = btrfs_drop_extents(trans, root, inode, |
|---|
| 4127 | | - drop_start, |
|---|
| 4128 | | - new_key.offset + datal, |
|---|
| 4129 | | - 1); |
|---|
| 4130 | | - if (ret) { |
|---|
| 4131 | | - if (ret != -EOPNOTSUPP) |
|---|
| 4132 | | - btrfs_abort_transaction(trans, |
|---|
| 4133 | | - ret); |
|---|
| 4134 | | - btrfs_end_transaction(trans); |
|---|
| 4135 | | - goto out; |
|---|
| 4136 | | - } |
|---|
| 4137 | | - |
|---|
| 4138 | | - ret = btrfs_insert_empty_item(trans, root, path, |
|---|
| 4139 | | - &new_key, size); |
|---|
| 4140 | | - if (ret) { |
|---|
| 4141 | | - btrfs_abort_transaction(trans, ret); |
|---|
| 4142 | | - btrfs_end_transaction(trans); |
|---|
| 4143 | | - goto out; |
|---|
| 4144 | | - } |
|---|
| 4145 | | - |
|---|
| 4146 | | - leaf = path->nodes[0]; |
|---|
| 4147 | | - slot = path->slots[0]; |
|---|
| 4148 | | - write_extent_buffer(leaf, buf, |
|---|
| 4149 | | - btrfs_item_ptr_offset(leaf, slot), |
|---|
| 4150 | | - size); |
|---|
| 4151 | | - |
|---|
| 4152 | | - extent = btrfs_item_ptr(leaf, slot, |
|---|
| 4153 | | - struct btrfs_file_extent_item); |
|---|
| 4154 | | - |
|---|
| 4155 | | - /* disko == 0 means it's a hole */ |
|---|
| 4156 | | - if (!disko) |
|---|
| 4157 | | - datao = 0; |
|---|
| 4158 | | - |
|---|
| 4159 | | - btrfs_set_file_extent_offset(leaf, extent, |
|---|
| 4160 | | - datao); |
|---|
| 4161 | | - btrfs_set_file_extent_num_bytes(leaf, extent, |
|---|
| 4162 | | - datal); |
|---|
| 4163 | | - |
|---|
| 4164 | | - if (disko) { |
|---|
| 4165 | | - inode_add_bytes(inode, datal); |
|---|
| 4166 | | - ret = btrfs_inc_extent_ref(trans, |
|---|
| 4167 | | - root, |
|---|
| 4168 | | - disko, diskl, 0, |
|---|
| 4169 | | - root->root_key.objectid, |
|---|
| 4170 | | - btrfs_ino(BTRFS_I(inode)), |
|---|
| 4171 | | - new_key.offset - datao); |
|---|
| 4172 | | - if (ret) { |
|---|
| 4173 | | - btrfs_abort_transaction(trans, |
|---|
| 4174 | | - ret); |
|---|
| 4175 | | - btrfs_end_transaction(trans); |
|---|
| 4176 | | - goto out; |
|---|
| 4177 | | - |
|---|
| 4178 | | - } |
|---|
| 4179 | | - } |
|---|
| 4180 | | - } else if (type == BTRFS_FILE_EXTENT_INLINE) { |
|---|
| 4181 | | - u64 skip = 0; |
|---|
| 4182 | | - u64 trim = 0; |
|---|
| 4183 | | - |
|---|
| 4184 | | - if (off > key.offset) { |
|---|
| 4185 | | - skip = off - key.offset; |
|---|
| 4186 | | - new_key.offset += skip; |
|---|
| 4187 | | - } |
|---|
| 4188 | | - |
|---|
| 4189 | | - if (key.offset + datal > off + len) |
|---|
| 4190 | | - trim = key.offset + datal - (off + len); |
|---|
| 4191 | | - |
|---|
| 4192 | | - if (comp && (skip || trim)) { |
|---|
| 4193 | | - ret = -EINVAL; |
|---|
| 4194 | | - btrfs_end_transaction(trans); |
|---|
| 4195 | | - goto out; |
|---|
| 4196 | | - } |
|---|
| 4197 | | - size -= skip + trim; |
|---|
| 4198 | | - datal -= skip + trim; |
|---|
| 4199 | | - |
|---|
| 4200 | | - ret = clone_copy_inline_extent(inode, |
|---|
| 4201 | | - trans, path, |
|---|
| 4202 | | - &new_key, |
|---|
| 4203 | | - drop_start, |
|---|
| 4204 | | - datal, |
|---|
| 4205 | | - skip, size, buf); |
|---|
| 4206 | | - if (ret) { |
|---|
| 4207 | | - if (ret != -EOPNOTSUPP) |
|---|
| 4208 | | - btrfs_abort_transaction(trans, |
|---|
| 4209 | | - ret); |
|---|
| 4210 | | - btrfs_end_transaction(trans); |
|---|
| 4211 | | - goto out; |
|---|
| 4212 | | - } |
|---|
| 4213 | | - leaf = path->nodes[0]; |
|---|
| 4214 | | - slot = path->slots[0]; |
|---|
| 4215 | | - } |
|---|
| 4216 | | - |
|---|
| 4217 | | - /* If we have an implicit hole (NO_HOLES feature). */ |
|---|
| 4218 | | - if (drop_start < new_key.offset) |
|---|
| 4219 | | - clone_update_extent_map(BTRFS_I(inode), trans, |
|---|
| 4220 | | - NULL, drop_start, |
|---|
| 4221 | | - new_key.offset - drop_start); |
|---|
| 4222 | | - |
|---|
| 4223 | | - clone_update_extent_map(BTRFS_I(inode), trans, |
|---|
| 4224 | | - path, 0, 0); |
|---|
| 4225 | | - |
|---|
| 4226 | | - btrfs_mark_buffer_dirty(leaf); |
|---|
| 4227 | | - btrfs_release_path(path); |
|---|
| 4228 | | - |
|---|
| 4229 | | - last_dest_end = ALIGN(new_key.offset + datal, |
|---|
| 4230 | | - fs_info->sectorsize); |
|---|
| 4231 | | - ret = clone_finish_inode_update(trans, inode, |
|---|
| 4232 | | - last_dest_end, |
|---|
| 4233 | | - destoff, olen, |
|---|
| 4234 | | - no_time_update); |
|---|
| 4235 | | - if (ret) |
|---|
| 4236 | | - goto out; |
|---|
| 4237 | | - if (new_key.offset + datal >= destoff + len) |
|---|
| 4238 | | - break; |
|---|
| 4239 | | - } |
|---|
| 4240 | | - btrfs_release_path(path); |
|---|
| 4241 | | - key.offset = next_key_min_offset; |
|---|
| 4242 | | - |
|---|
| 4243 | | - if (fatal_signal_pending(current)) { |
|---|
| 4244 | | - ret = -EINTR; |
|---|
| 4245 | | - goto out; |
|---|
| 4246 | | - } |
|---|
| 4247 | | - |
|---|
| 4248 | | - cond_resched(); |
|---|
| 4249 | | - } |
|---|
| 4250 | | - ret = 0; |
|---|
| 4251 | | - |
|---|
| 4252 | | - if (last_dest_end < destoff + len) { |
|---|
| 4253 | | - /* |
|---|
| 4254 | | - * We have an implicit hole (NO_HOLES feature is enabled) that |
|---|
| 4255 | | - * fully or partially overlaps our cloning range at its end. |
|---|
| 4256 | | - */ |
|---|
| 4257 | | - btrfs_release_path(path); |
|---|
| 4258 | | - |
|---|
| 4259 | | - /* |
|---|
| 4260 | | - * 1 - remove extent(s) |
|---|
| 4261 | | - * 1 - inode update |
|---|
| 4262 | | - */ |
|---|
| 4263 | | - trans = btrfs_start_transaction(root, 2); |
|---|
| 4264 | | - if (IS_ERR(trans)) { |
|---|
| 4265 | | - ret = PTR_ERR(trans); |
|---|
| 4266 | | - goto out; |
|---|
| 4267 | | - } |
|---|
| 4268 | | - ret = btrfs_drop_extents(trans, root, inode, |
|---|
| 4269 | | - last_dest_end, destoff + len, 1); |
|---|
| 4270 | | - if (ret) { |
|---|
| 4271 | | - if (ret != -EOPNOTSUPP) |
|---|
| 4272 | | - btrfs_abort_transaction(trans, ret); |
|---|
| 4273 | | - btrfs_end_transaction(trans); |
|---|
| 4274 | | - goto out; |
|---|
| 4275 | | - } |
|---|
| 4276 | | - clone_update_extent_map(BTRFS_I(inode), trans, NULL, |
|---|
| 4277 | | - last_dest_end, |
|---|
| 4278 | | - destoff + len - last_dest_end); |
|---|
| 4279 | | - ret = clone_finish_inode_update(trans, inode, destoff + len, |
|---|
| 4280 | | - destoff, olen, no_time_update); |
|---|
| 4281 | | - } |
|---|
| 4282 | | - |
|---|
| 4283 | | -out: |
|---|
| 4284 | | - btrfs_free_path(path); |
|---|
| 4285 | | - kvfree(buf); |
|---|
| 4286 | | - return ret; |
|---|
| 4287 | | -} |
|---|
| 4288 | | - |
|---|
| 4289 | | -static noinline int btrfs_clone_files(struct file *file, struct file *file_src, |
|---|
| 4290 | | - u64 off, u64 olen, u64 destoff) |
|---|
| 4291 | | -{ |
|---|
| 4292 | | - struct inode *inode = file_inode(file); |
|---|
| 4293 | | - struct inode *src = file_inode(file_src); |
|---|
| 4294 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
|---|
| 4295 | | - struct btrfs_root *root = BTRFS_I(inode)->root; |
|---|
| 4296 | | - int ret; |
|---|
| 4297 | | - u64 len = olen; |
|---|
| 4298 | | - u64 bs = fs_info->sb->s_blocksize; |
|---|
| 4299 | | - int same_inode = src == inode; |
|---|
| 4300 | | - |
|---|
| 4301 | | - /* |
|---|
| 4302 | | - * TODO: |
|---|
| 4303 | | - * - split compressed inline extents. annoying: we need to |
|---|
| 4304 | | - * decompress into destination's address_space (the file offset |
|---|
| 4305 | | - * may change, so source mapping won't do), then recompress (or |
|---|
| 4306 | | - * otherwise reinsert) a subrange. |
|---|
| 4307 | | - * |
|---|
| 4308 | | - * - split destination inode's inline extents. The inline extents can |
|---|
| 4309 | | - * be either compressed or non-compressed. |
|---|
| 4310 | | - */ |
|---|
| 4311 | | - |
|---|
| 4312 | | - if (btrfs_root_readonly(root)) |
|---|
| 4313 | | - return -EROFS; |
|---|
| 4314 | | - |
|---|
| 4315 | | - if (file_src->f_path.mnt != file->f_path.mnt || |
|---|
| 4316 | | - src->i_sb != inode->i_sb) |
|---|
| 4317 | | - return -EXDEV; |
|---|
| 4318 | | - |
|---|
| 4319 | | - if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) |
|---|
| 4320 | | - return -EISDIR; |
|---|
| 4321 | | - |
|---|
| 4322 | | - if (!same_inode) { |
|---|
| 4323 | | - btrfs_double_inode_lock(src, inode); |
|---|
| 4324 | | - } else { |
|---|
| 4325 | | - inode_lock(src); |
|---|
| 4326 | | - } |
|---|
| 4327 | | - |
|---|
| 4328 | | - /* don't make the dst file partly checksummed */ |
|---|
| 4329 | | - if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != |
|---|
| 4330 | | - (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { |
|---|
| 4331 | | - ret = -EINVAL; |
|---|
| 4332 | | - goto out_unlock; |
|---|
| 4333 | | - } |
|---|
| 4334 | | - |
|---|
| 4335 | | - /* determine range to clone */ |
|---|
| 4336 | | - ret = -EINVAL; |
|---|
| 4337 | | - if (off + len > src->i_size || off + len < off) |
|---|
| 4338 | | - goto out_unlock; |
|---|
| 4339 | | - if (len == 0) |
|---|
| 4340 | | - olen = len = src->i_size - off; |
|---|
| 4341 | | - /* |
|---|
| 4342 | | - * If we extend to eof, continue to block boundary if and only if the |
|---|
| 4343 | | - * destination end offset matches the destination file's size, otherwise |
|---|
| 4344 | | - * we would be corrupting data by placing the eof block into the middle |
|---|
| 4345 | | - * of a file. |
|---|
| 4346 | | - */ |
|---|
| 4347 | | - if (off + len == src->i_size) { |
|---|
| 4348 | | - if (!IS_ALIGNED(len, bs) && destoff + len < inode->i_size) |
|---|
| 4349 | | - goto out_unlock; |
|---|
| 4350 | | - len = ALIGN(src->i_size, bs) - off; |
|---|
| 4351 | | - } |
|---|
| 4352 | | - |
|---|
| 4353 | | - if (len == 0) { |
|---|
| 4354 | | - ret = 0; |
|---|
| 4355 | | - goto out_unlock; |
|---|
| 4356 | | - } |
|---|
| 4357 | | - |
|---|
| 4358 | | - /* verify the end result is block aligned */ |
|---|
| 4359 | | - if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) || |
|---|
| 4360 | | - !IS_ALIGNED(destoff, bs)) |
|---|
| 4361 | | - goto out_unlock; |
|---|
| 4362 | | - |
|---|
| 4363 | | - /* verify if ranges are overlapped within the same file */ |
|---|
| 4364 | | - if (same_inode) { |
|---|
| 4365 | | - if (destoff + len > off && destoff < off + len) |
|---|
| 4366 | | - goto out_unlock; |
|---|
| 4367 | | - } |
|---|
| 4368 | | - |
|---|
| 4369 | | - if (destoff > inode->i_size) { |
|---|
| 4370 | | - ret = btrfs_cont_expand(inode, inode->i_size, destoff); |
|---|
| 4371 | | - if (ret) |
|---|
| 4372 | | - goto out_unlock; |
|---|
| 4373 | | - } |
|---|
| 4374 | | - |
|---|
| 4375 | | - /* |
|---|
| 4376 | | - * Lock the target range too. Right after we replace the file extent |
|---|
| 4377 | | - * items in the fs tree (which now point to the cloned data), we might |
|---|
| 4378 | | - * have a worker replace them with extent items relative to a write |
|---|
| 4379 | | - * operation that was issued before this clone operation (i.e. confront |
|---|
| 4380 | | - * with inode.c:btrfs_finish_ordered_io). |
|---|
| 4381 | | - */ |
|---|
| 4382 | | - if (same_inode) { |
|---|
| 4383 | | - u64 lock_start = min_t(u64, off, destoff); |
|---|
| 4384 | | - u64 lock_len = max_t(u64, off, destoff) + len - lock_start; |
|---|
| 4385 | | - |
|---|
| 4386 | | - ret = lock_extent_range(src, lock_start, lock_len, true); |
|---|
| 4387 | | - } else { |
|---|
| 4388 | | - ret = btrfs_double_extent_lock(src, off, inode, destoff, len, |
|---|
| 4389 | | - true); |
|---|
| 4390 | | - } |
|---|
| 4391 | | - ASSERT(ret == 0); |
|---|
| 4392 | | - if (WARN_ON(ret)) { |
|---|
| 4393 | | - /* ranges in the io trees already unlocked */ |
|---|
| 4394 | | - goto out_unlock; |
|---|
| 4395 | | - } |
|---|
| 4396 | | - |
|---|
| 4397 | | - ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); |
|---|
| 4398 | | - |
|---|
| 4399 | | - if (same_inode) { |
|---|
| 4400 | | - u64 lock_start = min_t(u64, off, destoff); |
|---|
| 4401 | | - u64 lock_end = max_t(u64, off, destoff) + len - 1; |
|---|
| 4402 | | - |
|---|
| 4403 | | - unlock_extent(&BTRFS_I(src)->io_tree, lock_start, lock_end); |
|---|
| 4404 | | - } else { |
|---|
| 4405 | | - btrfs_double_extent_unlock(src, off, inode, destoff, len); |
|---|
| 4406 | | - } |
|---|
| 4407 | | - /* |
|---|
| 4408 | | - * Truncate page cache pages so that future reads will see the cloned |
|---|
| 4409 | | - * data immediately and not the previous data. |
|---|
| 4410 | | - */ |
|---|
| 4411 | | - truncate_inode_pages_range(&inode->i_data, |
|---|
| 4412 | | - round_down(destoff, PAGE_SIZE), |
|---|
| 4413 | | - round_up(destoff + len, PAGE_SIZE) - 1); |
|---|
| 4414 | | -out_unlock: |
|---|
| 4415 | | - if (!same_inode) |
|---|
| 4416 | | - btrfs_double_inode_unlock(src, inode); |
|---|
| 4417 | | - else |
|---|
| 4418 | | - inode_unlock(src); |
|---|
| 4419 | | - return ret; |
|---|
| 4420 | | -} |
|---|
| 4421 | | - |
|---|
| 4422 | | -int btrfs_clone_file_range(struct file *src_file, loff_t off, |
|---|
| 4423 | | - struct file *dst_file, loff_t destoff, u64 len) |
|---|
| 4424 | | -{ |
|---|
| 4425 | | - return btrfs_clone_files(dst_file, src_file, off, len, destoff); |
|---|
| 4426 | | -} |
|---|
| 4427 | | - |
|---|
| 4428 | 3421 | static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) |
|---|
| 4429 | 3422 | { |
|---|
| 4430 | 3423 | struct inode *inode = file_inode(file); |
|---|
| .. | .. |
|---|
| 4433 | 3426 | struct btrfs_root *new_root; |
|---|
| 4434 | 3427 | struct btrfs_dir_item *di; |
|---|
| 4435 | 3428 | struct btrfs_trans_handle *trans; |
|---|
| 4436 | | - struct btrfs_path *path; |
|---|
| 4437 | | - struct btrfs_key location; |
|---|
| 3429 | + struct btrfs_path *path = NULL; |
|---|
| 4438 | 3430 | struct btrfs_disk_key disk_key; |
|---|
| 4439 | 3431 | u64 objectid = 0; |
|---|
| 4440 | 3432 | u64 dir_id; |
|---|
| .. | .. |
|---|
| 4455 | 3447 | if (!objectid) |
|---|
| 4456 | 3448 | objectid = BTRFS_FS_TREE_OBJECTID; |
|---|
| 4457 | 3449 | |
|---|
| 4458 | | - location.objectid = objectid; |
|---|
| 4459 | | - location.type = BTRFS_ROOT_ITEM_KEY; |
|---|
| 4460 | | - location.offset = (u64)-1; |
|---|
| 4461 | | - |
|---|
| 4462 | | - new_root = btrfs_read_fs_root_no_name(fs_info, &location); |
|---|
| 3450 | + new_root = btrfs_get_fs_root(fs_info, objectid, true); |
|---|
| 4463 | 3451 | if (IS_ERR(new_root)) { |
|---|
| 4464 | 3452 | ret = PTR_ERR(new_root); |
|---|
| 4465 | 3453 | goto out; |
|---|
| 4466 | 3454 | } |
|---|
| 4467 | | - if (!is_fstree(new_root->objectid)) { |
|---|
| 3455 | + if (!is_fstree(new_root->root_key.objectid)) { |
|---|
| 4468 | 3456 | ret = -ENOENT; |
|---|
| 4469 | | - goto out; |
|---|
| 3457 | + goto out_free; |
|---|
| 4470 | 3458 | } |
|---|
| 4471 | 3459 | |
|---|
| 4472 | 3460 | path = btrfs_alloc_path(); |
|---|
| 4473 | 3461 | if (!path) { |
|---|
| 4474 | 3462 | ret = -ENOMEM; |
|---|
| 4475 | | - goto out; |
|---|
| 3463 | + goto out_free; |
|---|
| 4476 | 3464 | } |
|---|
| 4477 | 3465 | path->leave_spinning = 1; |
|---|
| 4478 | 3466 | |
|---|
| 4479 | 3467 | trans = btrfs_start_transaction(root, 1); |
|---|
| 4480 | 3468 | if (IS_ERR(trans)) { |
|---|
| 4481 | | - btrfs_free_path(path); |
|---|
| 4482 | 3469 | ret = PTR_ERR(trans); |
|---|
| 4483 | | - goto out; |
|---|
| 3470 | + goto out_free; |
|---|
| 4484 | 3471 | } |
|---|
| 4485 | 3472 | |
|---|
| 4486 | 3473 | dir_id = btrfs_super_root_dir(fs_info->super_copy); |
|---|
| 4487 | 3474 | di = btrfs_lookup_dir_item(trans, fs_info->tree_root, path, |
|---|
| 4488 | 3475 | dir_id, "default", 7, 1); |
|---|
| 4489 | 3476 | if (IS_ERR_OR_NULL(di)) { |
|---|
| 4490 | | - btrfs_free_path(path); |
|---|
| 3477 | + btrfs_release_path(path); |
|---|
| 4491 | 3478 | btrfs_end_transaction(trans); |
|---|
| 4492 | 3479 | btrfs_err(fs_info, |
|---|
| 4493 | 3480 | "Umm, you don't have the default diritem, this isn't going to work"); |
|---|
| 4494 | 3481 | ret = -ENOENT; |
|---|
| 4495 | | - goto out; |
|---|
| 3482 | + goto out_free; |
|---|
| 4496 | 3483 | } |
|---|
| 4497 | 3484 | |
|---|
| 4498 | 3485 | btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key); |
|---|
| 4499 | 3486 | btrfs_set_dir_item_key(path->nodes[0], di, &disk_key); |
|---|
| 4500 | 3487 | btrfs_mark_buffer_dirty(path->nodes[0]); |
|---|
| 4501 | | - btrfs_free_path(path); |
|---|
| 3488 | + btrfs_release_path(path); |
|---|
| 4502 | 3489 | |
|---|
| 4503 | 3490 | btrfs_set_fs_incompat(fs_info, DEFAULT_SUBVOL); |
|---|
| 4504 | 3491 | btrfs_end_transaction(trans); |
|---|
| 3492 | +out_free: |
|---|
| 3493 | + btrfs_put_root(new_root); |
|---|
| 3494 | + btrfs_free_path(path); |
|---|
| 4505 | 3495 | out: |
|---|
| 4506 | 3496 | mnt_drop_write_file(file); |
|---|
| 4507 | 3497 | return ret; |
|---|
| .. | .. |
|---|
| 4510 | 3500 | static void get_block_group_info(struct list_head *groups_list, |
|---|
| 4511 | 3501 | struct btrfs_ioctl_space_info *space) |
|---|
| 4512 | 3502 | { |
|---|
| 4513 | | - struct btrfs_block_group_cache *block_group; |
|---|
| 3503 | + struct btrfs_block_group *block_group; |
|---|
| 4514 | 3504 | |
|---|
| 4515 | 3505 | space->total_bytes = 0; |
|---|
| 4516 | 3506 | space->used_bytes = 0; |
|---|
| 4517 | 3507 | space->flags = 0; |
|---|
| 4518 | 3508 | list_for_each_entry(block_group, groups_list, list) { |
|---|
| 4519 | 3509 | space->flags = block_group->flags; |
|---|
| 4520 | | - space->total_bytes += block_group->key.offset; |
|---|
| 4521 | | - space->used_bytes += |
|---|
| 4522 | | - btrfs_block_group_used(&block_group->item); |
|---|
| 3510 | + space->total_bytes += block_group->length; |
|---|
| 3511 | + space->used_bytes += block_group->used; |
|---|
| 4523 | 3512 | } |
|---|
| 4524 | 3513 | } |
|---|
| 4525 | 3514 | |
|---|
| .. | .. |
|---|
| 4553 | 3542 | struct btrfs_space_info *tmp; |
|---|
| 4554 | 3543 | |
|---|
| 4555 | 3544 | info = NULL; |
|---|
| 4556 | | - rcu_read_lock(); |
|---|
| 4557 | | - list_for_each_entry_rcu(tmp, &fs_info->space_info, |
|---|
| 4558 | | - list) { |
|---|
| 3545 | + list_for_each_entry(tmp, &fs_info->space_info, list) { |
|---|
| 4559 | 3546 | if (tmp->flags == types[i]) { |
|---|
| 4560 | 3547 | info = tmp; |
|---|
| 4561 | 3548 | break; |
|---|
| 4562 | 3549 | } |
|---|
| 4563 | 3550 | } |
|---|
| 4564 | | - rcu_read_unlock(); |
|---|
| 4565 | 3551 | |
|---|
| 4566 | 3552 | if (!info) |
|---|
| 4567 | 3553 | continue; |
|---|
| .. | .. |
|---|
| 4609 | 3595 | break; |
|---|
| 4610 | 3596 | |
|---|
| 4611 | 3597 | info = NULL; |
|---|
| 4612 | | - rcu_read_lock(); |
|---|
| 4613 | | - list_for_each_entry_rcu(tmp, &fs_info->space_info, |
|---|
| 4614 | | - list) { |
|---|
| 3598 | + list_for_each_entry(tmp, &fs_info->space_info, list) { |
|---|
| 4615 | 3599 | if (tmp->flags == types[i]) { |
|---|
| 4616 | 3600 | info = tmp; |
|---|
| 4617 | 3601 | break; |
|---|
| 4618 | 3602 | } |
|---|
| 4619 | 3603 | } |
|---|
| 4620 | | - rcu_read_unlock(); |
|---|
| 4621 | 3604 | |
|---|
| 4622 | 3605 | if (!info) |
|---|
| 4623 | 3606 | continue; |
|---|
| .. | .. |
|---|
| 4732 | 3715 | &sa->progress, sa->flags & BTRFS_SCRUB_READONLY, |
|---|
| 4733 | 3716 | 0); |
|---|
| 4734 | 3717 | |
|---|
| 3718 | + /* |
|---|
| 3719 | + * Copy scrub args to user space even if btrfs_scrub_dev() returned an |
|---|
| 3720 | + * error. This is important as it allows user space to know how much |
|---|
| 3721 | + * progress scrub has done. For example, if scrub is canceled we get |
|---|
| 3722 | + * -ECANCELED from btrfs_scrub_dev() and return that error back to user |
|---|
| 3723 | + * space. Later user space can inspect the progress from the structure |
|---|
| 3724 | + * btrfs_ioctl_scrub_args and resume scrub from where it left off |
|---|
| 3725 | + * previously (btrfs-progs does this). |
|---|
| 3726 | + * If we fail to copy the btrfs_ioctl_scrub_args structure to user space |
|---|
| 3727 | + * then return -EFAULT to signal the structure was not copied or it may |
|---|
| 3728 | + * be corrupt and unreliable due to a partial copy. |
|---|
| 3729 | + */ |
|---|
| 4735 | 3730 | if (copy_to_user(arg, sa, sizeof(*sa))) |
|---|
| 4736 | 3731 | ret = -EFAULT; |
|---|
| 4737 | 3732 | |
|---|
| .. | .. |
|---|
| 4765 | 3760 | |
|---|
| 4766 | 3761 | ret = btrfs_scrub_progress(fs_info, sa->devid, &sa->progress); |
|---|
| 4767 | 3762 | |
|---|
| 4768 | | - if (copy_to_user(arg, sa, sizeof(*sa))) |
|---|
| 3763 | + if (ret == 0 && copy_to_user(arg, sa, sizeof(*sa))) |
|---|
| 4769 | 3764 | ret = -EFAULT; |
|---|
| 4770 | 3765 | |
|---|
| 4771 | 3766 | kfree(sa); |
|---|
| .. | .. |
|---|
| 4789 | 3784 | |
|---|
| 4790 | 3785 | ret = btrfs_get_dev_stats(fs_info, sa); |
|---|
| 4791 | 3786 | |
|---|
| 4792 | | - if (copy_to_user(arg, sa, sizeof(*sa))) |
|---|
| 3787 | + if (ret == 0 && copy_to_user(arg, sa, sizeof(*sa))) |
|---|
| 4793 | 3788 | ret = -EFAULT; |
|---|
| 4794 | 3789 | |
|---|
| 4795 | 3790 | kfree(sa); |
|---|
| .. | .. |
|---|
| 4815 | 3810 | ret = -EROFS; |
|---|
| 4816 | 3811 | goto out; |
|---|
| 4817 | 3812 | } |
|---|
| 4818 | | - if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) { |
|---|
| 3813 | + if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_DEV_REPLACE)) { |
|---|
| 4819 | 3814 | ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; |
|---|
| 4820 | 3815 | } else { |
|---|
| 4821 | 3816 | ret = btrfs_dev_replace_by_ioctl(fs_info, p); |
|---|
| 4822 | | - clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags); |
|---|
| 3817 | + btrfs_exclop_finish(fs_info); |
|---|
| 4823 | 3818 | } |
|---|
| 4824 | 3819 | break; |
|---|
| 4825 | 3820 | case BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS: |
|---|
| .. | .. |
|---|
| 4835 | 3830 | break; |
|---|
| 4836 | 3831 | } |
|---|
| 4837 | 3832 | |
|---|
| 4838 | | - if (copy_to_user(arg, p, sizeof(*p))) |
|---|
| 3833 | + if ((ret == 0 || ret == -ECANCELED) && copy_to_user(arg, p, sizeof(*p))) |
|---|
| 4839 | 3834 | ret = -EFAULT; |
|---|
| 4840 | 3835 | out: |
|---|
| 4841 | 3836 | kfree(p); |
|---|
| .. | .. |
|---|
| 4886 | 3881 | ipath->fspath->val[i] = rel_ptr; |
|---|
| 4887 | 3882 | } |
|---|
| 4888 | 3883 | |
|---|
| 3884 | + btrfs_free_path(path); |
|---|
| 3885 | + path = NULL; |
|---|
| 4889 | 3886 | ret = copy_to_user((void __user *)(unsigned long)ipa->fspath, |
|---|
| 4890 | 3887 | ipath->fspath, size); |
|---|
| 4891 | 3888 | if (ret) { |
|---|
| .. | .. |
|---|
| 4899 | 3896 | kfree(ipa); |
|---|
| 4900 | 3897 | |
|---|
| 4901 | 3898 | return ret; |
|---|
| 4902 | | -} |
|---|
| 4903 | | - |
|---|
| 4904 | | -static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx) |
|---|
| 4905 | | -{ |
|---|
| 4906 | | - struct btrfs_data_container *inodes = ctx; |
|---|
| 4907 | | - const size_t c = 3 * sizeof(u64); |
|---|
| 4908 | | - |
|---|
| 4909 | | - if (inodes->bytes_left >= c) { |
|---|
| 4910 | | - inodes->bytes_left -= c; |
|---|
| 4911 | | - inodes->val[inodes->elem_cnt] = inum; |
|---|
| 4912 | | - inodes->val[inodes->elem_cnt + 1] = offset; |
|---|
| 4913 | | - inodes->val[inodes->elem_cnt + 2] = root; |
|---|
| 4914 | | - inodes->elem_cnt += 3; |
|---|
| 4915 | | - } else { |
|---|
| 4916 | | - inodes->bytes_missing += c - inodes->bytes_left; |
|---|
| 4917 | | - inodes->bytes_left = 0; |
|---|
| 4918 | | - inodes->elem_missed += 3; |
|---|
| 4919 | | - } |
|---|
| 4920 | | - |
|---|
| 4921 | | - return 0; |
|---|
| 4922 | 3899 | } |
|---|
| 4923 | 3900 | |
|---|
| 4924 | 3901 | static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info, |
|---|
| .. | .. |
|---|
| 4956 | 3933 | size = min_t(u32, loi->size, SZ_16M); |
|---|
| 4957 | 3934 | } |
|---|
| 4958 | 3935 | |
|---|
| 3936 | + inodes = init_data_container(size); |
|---|
| 3937 | + if (IS_ERR(inodes)) { |
|---|
| 3938 | + ret = PTR_ERR(inodes); |
|---|
| 3939 | + goto out_loi; |
|---|
| 3940 | + } |
|---|
| 3941 | + |
|---|
| 4959 | 3942 | path = btrfs_alloc_path(); |
|---|
| 4960 | 3943 | if (!path) { |
|---|
| 4961 | 3944 | ret = -ENOMEM; |
|---|
| 4962 | 3945 | goto out; |
|---|
| 4963 | 3946 | } |
|---|
| 4964 | | - |
|---|
| 4965 | | - inodes = init_data_container(size); |
|---|
| 4966 | | - if (IS_ERR(inodes)) { |
|---|
| 4967 | | - ret = PTR_ERR(inodes); |
|---|
| 4968 | | - inodes = NULL; |
|---|
| 4969 | | - goto out; |
|---|
| 4970 | | - } |
|---|
| 4971 | | - |
|---|
| 4972 | 3947 | ret = iterate_inodes_from_logical(loi->logical, fs_info, path, |
|---|
| 4973 | | - build_ino_list, inodes, ignore_offset); |
|---|
| 3948 | + inodes, ignore_offset); |
|---|
| 3949 | + btrfs_free_path(path); |
|---|
| 4974 | 3950 | if (ret == -EINVAL) |
|---|
| 4975 | 3951 | ret = -ENOENT; |
|---|
| 4976 | 3952 | if (ret < 0) |
|---|
| .. | .. |
|---|
| 4982 | 3958 | ret = -EFAULT; |
|---|
| 4983 | 3959 | |
|---|
| 4984 | 3960 | out: |
|---|
| 4985 | | - btrfs_free_path(path); |
|---|
| 4986 | 3961 | kvfree(inodes); |
|---|
| 4987 | 3962 | out_loi: |
|---|
| 4988 | 3963 | kfree(loi); |
|---|
| .. | .. |
|---|
| 5030 | 4005 | return ret; |
|---|
| 5031 | 4006 | |
|---|
| 5032 | 4007 | again: |
|---|
| 5033 | | - if (!test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) { |
|---|
| 4008 | + if (btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) { |
|---|
| 5034 | 4009 | mutex_lock(&fs_info->balance_mutex); |
|---|
| 5035 | 4010 | need_unlock = true; |
|---|
| 5036 | 4011 | goto locked; |
|---|
| .. | .. |
|---|
| 5076 | 4051 | } |
|---|
| 5077 | 4052 | |
|---|
| 5078 | 4053 | locked: |
|---|
| 5079 | | - BUG_ON(!test_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)); |
|---|
| 5080 | 4054 | |
|---|
| 5081 | 4055 | if (arg) { |
|---|
| 5082 | 4056 | bargs = memdup_user(arg, sizeof(*bargs)); |
|---|
| .. | .. |
|---|
| 5131 | 4105 | |
|---|
| 5132 | 4106 | do_balance: |
|---|
| 5133 | 4107 | /* |
|---|
| 5134 | | - * Ownership of bctl and filesystem flag BTRFS_FS_EXCL_OP goes to |
|---|
| 5135 | | - * btrfs_balance. bctl is freed in reset_balance_state, or, if |
|---|
| 5136 | | - * restriper was paused all the way until unmount, in free_fs_info. |
|---|
| 5137 | | - * The flag should be cleared after reset_balance_state. |
|---|
| 4108 | + * Ownership of bctl and exclusive operation goes to btrfs_balance. |
|---|
| 4109 | + * bctl is freed in reset_balance_state, or, if restriper was paused |
|---|
| 4110 | + * all the way until unmount, in free_fs_info. The flag should be |
|---|
| 4111 | + * cleared after reset_balance_state. |
|---|
| 5138 | 4112 | */ |
|---|
| 5139 | 4113 | need_unlock = false; |
|---|
| 5140 | 4114 | |
|---|
| 5141 | 4115 | ret = btrfs_balance(fs_info, bctl, bargs); |
|---|
| 5142 | 4116 | bctl = NULL; |
|---|
| 5143 | 4117 | |
|---|
| 5144 | | - if (arg) { |
|---|
| 4118 | + if ((ret == 0 || ret == -ECANCELED) && arg) { |
|---|
| 5145 | 4119 | if (copy_to_user(arg, bargs, sizeof(*bargs))) |
|---|
| 5146 | 4120 | ret = -EFAULT; |
|---|
| 5147 | 4121 | } |
|---|
| .. | .. |
|---|
| 5153 | 4127 | out_unlock: |
|---|
| 5154 | 4128 | mutex_unlock(&fs_info->balance_mutex); |
|---|
| 5155 | 4129 | if (need_unlock) |
|---|
| 5156 | | - clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags); |
|---|
| 4130 | + btrfs_exclop_finish(fs_info); |
|---|
| 5157 | 4131 | out: |
|---|
| 5158 | 4132 | mnt_drop_write_file(file); |
|---|
| 5159 | 4133 | return ret; |
|---|
| .. | .. |
|---|
| 5430 | 4404 | return ret; |
|---|
| 5431 | 4405 | } |
|---|
| 5432 | 4406 | |
|---|
| 5433 | | -static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg) |
|---|
| 4407 | +static long btrfs_ioctl_quota_rescan_status(struct btrfs_fs_info *fs_info, |
|---|
| 4408 | + void __user *arg) |
|---|
| 5434 | 4409 | { |
|---|
| 5435 | | - struct inode *inode = file_inode(file); |
|---|
| 5436 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
|---|
| 5437 | 4410 | struct btrfs_ioctl_quota_rescan_args *qsa; |
|---|
| 5438 | 4411 | int ret = 0; |
|---|
| 5439 | 4412 | |
|---|
| .. | .. |
|---|
| 5456 | 4429 | return ret; |
|---|
| 5457 | 4430 | } |
|---|
| 5458 | 4431 | |
|---|
| 5459 | | -static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg) |
|---|
| 4432 | +static long btrfs_ioctl_quota_rescan_wait(struct btrfs_fs_info *fs_info, |
|---|
| 4433 | + void __user *arg) |
|---|
| 5460 | 4434 | { |
|---|
| 5461 | | - struct inode *inode = file_inode(file); |
|---|
| 5462 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
|---|
| 5463 | | - |
|---|
| 5464 | 4435 | if (!capable(CAP_SYS_ADMIN)) |
|---|
| 5465 | 4436 | return -EPERM; |
|---|
| 5466 | 4437 | |
|---|
| .. | .. |
|---|
| 5632 | 4603 | return ret; |
|---|
| 5633 | 4604 | } |
|---|
| 5634 | 4605 | |
|---|
| 5635 | | -static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg) |
|---|
| 4606 | +static int btrfs_ioctl_get_fslabel(struct btrfs_fs_info *fs_info, |
|---|
| 4607 | + void __user *arg) |
|---|
| 5636 | 4608 | { |
|---|
| 5637 | | - struct inode *inode = file_inode(file); |
|---|
| 5638 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
|---|
| 5639 | 4609 | size_t len; |
|---|
| 5640 | 4610 | int ret; |
|---|
| 5641 | 4611 | char label[BTRFS_LABEL_SIZE]; |
|---|
| .. | .. |
|---|
| 5719 | 4689 | return 0; |
|---|
| 5720 | 4690 | } |
|---|
| 5721 | 4691 | |
|---|
| 5722 | | -static int btrfs_ioctl_get_features(struct file *file, void __user *arg) |
|---|
| 4692 | +static int btrfs_ioctl_get_features(struct btrfs_fs_info *fs_info, |
|---|
| 4693 | + void __user *arg) |
|---|
| 5723 | 4694 | { |
|---|
| 5724 | | - struct inode *inode = file_inode(file); |
|---|
| 5725 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
|---|
| 5726 | 4695 | struct btrfs_super_block *super_block = fs_info->super_copy; |
|---|
| 5727 | 4696 | struct btrfs_ioctl_feature_flags features; |
|---|
| 5728 | 4697 | |
|---|
| .. | .. |
|---|
| 5741 | 4710 | u64 change_mask, u64 flags, u64 supported_flags, |
|---|
| 5742 | 4711 | u64 safe_set, u64 safe_clear) |
|---|
| 5743 | 4712 | { |
|---|
| 5744 | | - const char *type = btrfs_feature_set_names[set]; |
|---|
| 4713 | + const char *type = btrfs_feature_set_name(set); |
|---|
| 5745 | 4714 | char *names; |
|---|
| 5746 | 4715 | u64 disallowed, unsupported; |
|---|
| 5747 | 4716 | u64 set_mask = flags & change_mask; |
|---|
| .. | .. |
|---|
| 5922 | 4891 | return btrfs_ioctl_setflags(file, argp); |
|---|
| 5923 | 4892 | case FS_IOC_GETVERSION: |
|---|
| 5924 | 4893 | return btrfs_ioctl_getversion(file, argp); |
|---|
| 4894 | + case FS_IOC_GETFSLABEL: |
|---|
| 4895 | + return btrfs_ioctl_get_fslabel(fs_info, argp); |
|---|
| 4896 | + case FS_IOC_SETFSLABEL: |
|---|
| 4897 | + return btrfs_ioctl_set_fslabel(file, argp); |
|---|
| 5925 | 4898 | case FITRIM: |
|---|
| 5926 | | - return btrfs_ioctl_fitrim(file, argp); |
|---|
| 4899 | + return btrfs_ioctl_fitrim(fs_info, argp); |
|---|
| 5927 | 4900 | case BTRFS_IOC_SNAP_CREATE: |
|---|
| 5928 | 4901 | return btrfs_ioctl_snap_create(file, argp, 0); |
|---|
| 5929 | 4902 | case BTRFS_IOC_SNAP_CREATE_V2: |
|---|
| .. | .. |
|---|
| 5933 | 4906 | case BTRFS_IOC_SUBVOL_CREATE_V2: |
|---|
| 5934 | 4907 | return btrfs_ioctl_snap_create_v2(file, argp, 1); |
|---|
| 5935 | 4908 | case BTRFS_IOC_SNAP_DESTROY: |
|---|
| 5936 | | - return btrfs_ioctl_snap_destroy(file, argp); |
|---|
| 4909 | + return btrfs_ioctl_snap_destroy(file, argp, false); |
|---|
| 4910 | + case BTRFS_IOC_SNAP_DESTROY_V2: |
|---|
| 4911 | + return btrfs_ioctl_snap_destroy(file, argp, true); |
|---|
| 5937 | 4912 | case BTRFS_IOC_SUBVOL_GETFLAGS: |
|---|
| 5938 | 4913 | return btrfs_ioctl_subvol_getflags(file, argp); |
|---|
| 5939 | 4914 | case BTRFS_IOC_SUBVOL_SETFLAGS: |
|---|
| .. | .. |
|---|
| 5975 | 4950 | case BTRFS_IOC_SYNC: { |
|---|
| 5976 | 4951 | int ret; |
|---|
| 5977 | 4952 | |
|---|
| 5978 | | - ret = btrfs_start_delalloc_roots(fs_info, -1); |
|---|
| 4953 | + ret = btrfs_start_delalloc_roots(fs_info, U64_MAX, false); |
|---|
| 5979 | 4954 | if (ret) |
|---|
| 5980 | 4955 | return ret; |
|---|
| 5981 | 4956 | ret = btrfs_sync_fs(inode->i_sb, 1); |
|---|
| .. | .. |
|---|
| 6028 | 5003 | case BTRFS_IOC_QUOTA_RESCAN: |
|---|
| 6029 | 5004 | return btrfs_ioctl_quota_rescan(file, argp); |
|---|
| 6030 | 5005 | case BTRFS_IOC_QUOTA_RESCAN_STATUS: |
|---|
| 6031 | | - return btrfs_ioctl_quota_rescan_status(file, argp); |
|---|
| 5006 | + return btrfs_ioctl_quota_rescan_status(fs_info, argp); |
|---|
| 6032 | 5007 | case BTRFS_IOC_QUOTA_RESCAN_WAIT: |
|---|
| 6033 | | - return btrfs_ioctl_quota_rescan_wait(file, argp); |
|---|
| 5008 | + return btrfs_ioctl_quota_rescan_wait(fs_info, argp); |
|---|
| 6034 | 5009 | case BTRFS_IOC_DEV_REPLACE: |
|---|
| 6035 | 5010 | return btrfs_ioctl_dev_replace(fs_info, argp); |
|---|
| 6036 | | - case BTRFS_IOC_GET_FSLABEL: |
|---|
| 6037 | | - return btrfs_ioctl_get_fslabel(file, argp); |
|---|
| 6038 | | - case BTRFS_IOC_SET_FSLABEL: |
|---|
| 6039 | | - return btrfs_ioctl_set_fslabel(file, argp); |
|---|
| 6040 | 5011 | case BTRFS_IOC_GET_SUPPORTED_FEATURES: |
|---|
| 6041 | 5012 | return btrfs_ioctl_get_supported_features(argp); |
|---|
| 6042 | 5013 | case BTRFS_IOC_GET_FEATURES: |
|---|
| 6043 | | - return btrfs_ioctl_get_features(file, argp); |
|---|
| 5014 | + return btrfs_ioctl_get_features(fs_info, argp); |
|---|
| 6044 | 5015 | case BTRFS_IOC_SET_FEATURES: |
|---|
| 6045 | 5016 | return btrfs_ioctl_set_features(file, argp); |
|---|
| 6046 | 5017 | case FS_IOC_FSGETXATTR: |
|---|