From 95099d4622f8cb224d94e314c7a8e0df60b13f87 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Sat, 09 Dec 2023 08:38:01 +0000
Subject: [PATCH] enable docker ppp
---
kernel/fs/btrfs/ioctl.c | 2061 ++++++++++++++--------------------------------------------
1 files changed, 516 insertions(+), 1,545 deletions(-)
diff --git a/kernel/fs/btrfs/ioctl.c b/kernel/fs/btrfs/ioctl.c
index 717385b..a17076a 100644
--- a/kernel/fs/btrfs/ioctl.c
+++ b/kernel/fs/btrfs/ioctl.c
@@ -28,6 +28,7 @@
#include <linux/iversion.h>
#include "ctree.h"
#include "disk-io.h"
+#include "export.h"
#include "transaction.h"
#include "btrfs_inode.h"
#include "print-tree.h"
@@ -43,6 +44,9 @@
#include "qgroup.h"
#include "tree-log.h"
#include "compression.h"
+#include "space-info.h"
+#include "delalloc-space.h"
+#include "block-group.h"
#ifdef CONFIG_64BIT
/* If we have a 32-bit userspace and 64-bit kernel, then the UAPI
@@ -82,10 +86,6 @@
#define BTRFS_IOC_SEND_32 _IOW(BTRFS_IOCTL_MAGIC, 38, \
struct btrfs_ioctl_send_args_32)
#endif
-
-static int btrfs_clone(struct inode *src, struct inode *inode,
- u64 off, u64 olen, u64 olen_aligned, u64 destoff,
- int no_time_update);
/* Mask out flags that are inappropriate for the given type of inode. */
static unsigned int btrfs_mask_fsflags_for_type(struct inode *inode,
@@ -164,8 +164,11 @@
return 0;
}
-/* Check if @flags are a supported and valid set of FS_*_FL flags */
-static int check_fsflags(unsigned int flags)
+/*
+ * Check if @flags are a supported and valid set of FS_*_FL flags and that
+ * the old and new flags are not conflicting
+ */
+static int check_fsflags(unsigned int old_flags, unsigned int flags)
{
if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
FS_NOATIME_FL | FS_NODUMP_FL | \
@@ -174,7 +177,17 @@
FS_NOCOW_FL))
return -EOPNOTSUPP;
+ /* COMPR and NOCOMP on new/old are valid */
if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL))
+ return -EINVAL;
+
+ if ((flags & FS_COMPR_FL) && (flags & FS_NOCOW_FL))
+ return -EINVAL;
+
+ /* NOCOW and compression options are mutually exclusive */
+ if ((old_flags & FS_NOCOW_FL) && (flags & (FS_COMPR_FL | FS_NOCOMP_FL)))
+ return -EINVAL;
+ if ((flags & FS_NOCOW_FL) && (old_flags & (FS_COMPR_FL | FS_NOCOMP_FL)))
return -EINVAL;
return 0;
@@ -189,9 +202,8 @@
struct btrfs_trans_handle *trans;
unsigned int fsflags, old_fsflags;
int ret;
- u64 old_flags;
- unsigned int old_i_flags;
- umode_t mode;
+ const char *comp = NULL;
+ u32 binode_flags;
if (!inode_owner_or_capable(inode))
return -EPERM;
@@ -202,76 +214,70 @@
if (copy_from_user(&fsflags, arg, sizeof(fsflags)))
return -EFAULT;
- ret = check_fsflags(fsflags);
- if (ret)
- return ret;
-
ret = mnt_want_write_file(file);
if (ret)
return ret;
inode_lock(inode);
-
- old_flags = binode->flags;
- old_i_flags = inode->i_flags;
- mode = inode->i_mode;
-
fsflags = btrfs_mask_fsflags_for_type(inode, fsflags);
old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags);
- if ((fsflags ^ old_fsflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
- if (!capable(CAP_LINUX_IMMUTABLE)) {
- ret = -EPERM;
- goto out_unlock;
- }
- }
+ ret = vfs_ioc_setflags_prepare(inode, old_fsflags, fsflags);
+ if (ret)
+ goto out_unlock;
+
+ ret = check_fsflags(old_fsflags, fsflags);
+ if (ret)
+ goto out_unlock;
+
+ binode_flags = binode->flags;
if (fsflags & FS_SYNC_FL)
- binode->flags |= BTRFS_INODE_SYNC;
+ binode_flags |= BTRFS_INODE_SYNC;
else
- binode->flags &= ~BTRFS_INODE_SYNC;
+ binode_flags &= ~BTRFS_INODE_SYNC;
if (fsflags & FS_IMMUTABLE_FL)
- binode->flags |= BTRFS_INODE_IMMUTABLE;
+ binode_flags |= BTRFS_INODE_IMMUTABLE;
else
- binode->flags &= ~BTRFS_INODE_IMMUTABLE;
+ binode_flags &= ~BTRFS_INODE_IMMUTABLE;
if (fsflags & FS_APPEND_FL)
- binode->flags |= BTRFS_INODE_APPEND;
+ binode_flags |= BTRFS_INODE_APPEND;
else
- binode->flags &= ~BTRFS_INODE_APPEND;
+ binode_flags &= ~BTRFS_INODE_APPEND;
if (fsflags & FS_NODUMP_FL)
- binode->flags |= BTRFS_INODE_NODUMP;
+ binode_flags |= BTRFS_INODE_NODUMP;
else
- binode->flags &= ~BTRFS_INODE_NODUMP;
+ binode_flags &= ~BTRFS_INODE_NODUMP;
if (fsflags & FS_NOATIME_FL)
- binode->flags |= BTRFS_INODE_NOATIME;
+ binode_flags |= BTRFS_INODE_NOATIME;
else
- binode->flags &= ~BTRFS_INODE_NOATIME;
+ binode_flags &= ~BTRFS_INODE_NOATIME;
if (fsflags & FS_DIRSYNC_FL)
- binode->flags |= BTRFS_INODE_DIRSYNC;
+ binode_flags |= BTRFS_INODE_DIRSYNC;
else
- binode->flags &= ~BTRFS_INODE_DIRSYNC;
+ binode_flags &= ~BTRFS_INODE_DIRSYNC;
if (fsflags & FS_NOCOW_FL) {
- if (S_ISREG(mode)) {
+ if (S_ISREG(inode->i_mode)) {
/*
* It's safe to turn csums off here, no extents exist.
* Otherwise we want the flag to reflect the real COW
* status of the file and will not set it.
*/
if (inode->i_size == 0)
- binode->flags |= BTRFS_INODE_NODATACOW
- | BTRFS_INODE_NODATASUM;
+ binode_flags |= BTRFS_INODE_NODATACOW |
+ BTRFS_INODE_NODATASUM;
} else {
- binode->flags |= BTRFS_INODE_NODATACOW;
+ binode_flags |= BTRFS_INODE_NODATACOW;
}
} else {
/*
* Revert back under same assumptions as above
*/
- if (S_ISREG(mode)) {
+ if (S_ISREG(inode->i_mode)) {
if (inode->i_size == 0)
- binode->flags &= ~(BTRFS_INODE_NODATACOW
- | BTRFS_INODE_NODATASUM);
+ binode_flags &= ~(BTRFS_INODE_NODATACOW |
+ BTRFS_INODE_NODATASUM);
} else {
- binode->flags &= ~BTRFS_INODE_NODATACOW;
+ binode_flags &= ~BTRFS_INODE_NODATACOW;
}
}
@@ -281,52 +287,59 @@
* things smaller.
*/
if (fsflags & FS_NOCOMP_FL) {
- binode->flags &= ~BTRFS_INODE_COMPRESS;
- binode->flags |= BTRFS_INODE_NOCOMPRESS;
-
- ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0);
- if (ret && ret != -ENODATA)
- goto out_drop;
+ binode_flags &= ~BTRFS_INODE_COMPRESS;
+ binode_flags |= BTRFS_INODE_NOCOMPRESS;
} else if (fsflags & FS_COMPR_FL) {
- const char *comp;
- binode->flags |= BTRFS_INODE_COMPRESS;
- binode->flags &= ~BTRFS_INODE_NOCOMPRESS;
+ if (IS_SWAPFILE(inode)) {
+ ret = -ETXTBSY;
+ goto out_unlock;
+ }
+
+ binode_flags |= BTRFS_INODE_COMPRESS;
+ binode_flags &= ~BTRFS_INODE_NOCOMPRESS;
comp = btrfs_compress_type2str(fs_info->compress_type);
if (!comp || comp[0] == 0)
comp = btrfs_compress_type2str(BTRFS_COMPRESS_ZLIB);
-
- ret = btrfs_set_prop(inode, "btrfs.compression",
- comp, strlen(comp), 0);
- if (ret)
- goto out_drop;
-
} else {
- ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0);
- if (ret && ret != -ENODATA)
- goto out_drop;
- binode->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
+ binode_flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
}
- trans = btrfs_start_transaction(root, 1);
+ /*
+ * 1 for inode item
+ * 2 for properties
+ */
+ trans = btrfs_start_transaction(root, 3);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
- goto out_drop;
+ goto out_unlock;
}
+ if (comp) {
+ ret = btrfs_set_prop(trans, inode, "btrfs.compression", comp,
+ strlen(comp), 0);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ goto out_end_trans;
+ }
+ } else {
+ ret = btrfs_set_prop(trans, inode, "btrfs.compression", NULL,
+ 0, 0);
+ if (ret && ret != -ENODATA) {
+ btrfs_abort_transaction(trans, ret);
+ goto out_end_trans;
+ }
+ }
+
+ binode->flags = binode_flags;
btrfs_sync_inode_flags_to_i_flags(inode);
inode_inc_iversion(inode);
inode->i_ctime = current_time(inode);
ret = btrfs_update_inode(trans, root, inode);
+ out_end_trans:
btrfs_end_transaction(trans);
- out_drop:
- if (ret) {
- binode->flags = old_flags;
- inode->i_flags = old_i_flags;
- }
-
out_unlock:
inode_unlock(inode);
mnt_drop_write_file(file);
@@ -365,6 +378,18 @@
return 0;
}
+bool btrfs_exclop_start(struct btrfs_fs_info *fs_info,
+ enum btrfs_exclusive_operation type)
+{
+ return !cmpxchg(&fs_info->exclusive_operation, BTRFS_EXCLOP_NONE, type);
+}
+
+void btrfs_exclop_finish(struct btrfs_fs_info *fs_info)
+{
+ WRITE_ONCE(fs_info->exclusive_operation, BTRFS_EXCLOP_NONE);
+ sysfs_notify(&fs_info->fs_devices->fsid_kobj, NULL, "exclusive_operation");
+}
+
/*
* Set the xflags from the internal inode flags. The remaining items of fsxattr
* are zeroed.
@@ -374,9 +399,7 @@
struct btrfs_inode *binode = BTRFS_I(file_inode(file));
struct fsxattr fa;
- memset(&fa, 0, sizeof(fa));
- fa.fsx_xflags = btrfs_inode_flags_to_xflags(binode->flags);
-
+ simple_fill_fsxattr(&fa, btrfs_inode_flags_to_xflags(binode->flags));
if (copy_to_user(arg, &fa, sizeof(fa)))
return -EFAULT;
@@ -389,7 +412,7 @@
struct btrfs_inode *binode = BTRFS_I(inode);
struct btrfs_root *root = binode->root;
struct btrfs_trans_handle *trans;
- struct fsxattr fa;
+ struct fsxattr fa, old_fa;
unsigned old_flags;
unsigned old_i_flags;
int ret = 0;
@@ -400,7 +423,6 @@
if (btrfs_root_readonly(root))
return -EROFS;
- memset(&fa, 0, sizeof(fa));
if (copy_from_user(&fa, arg, sizeof(fa)))
return -EFAULT;
@@ -420,13 +442,11 @@
old_flags = binode->flags;
old_i_flags = inode->i_flags;
- /* We need the capabilities to change append-only or immutable inode */
- if (((old_flags & (BTRFS_INODE_APPEND | BTRFS_INODE_IMMUTABLE)) ||
- (fa.fsx_xflags & (FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE))) &&
- !capable(CAP_LINUX_IMMUTABLE)) {
- ret = -EPERM;
+ simple_fill_fsxattr(&old_fa,
+ btrfs_inode_flags_to_xflags(binode->flags));
+ ret = vfs_ioc_fssetxattr_check(inode, &old_fa, &fa);
+ if (ret)
goto out_unlock;
- }
if (fa.fsx_xflags & FS_XFLAG_SYNC)
binode->flags |= BTRFS_INODE_SYNC;
@@ -482,10 +502,9 @@
return put_user(inode->i_generation, arg);
}
-static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
+static noinline int btrfs_ioctl_fitrim(struct btrfs_fs_info *fs_info,
+ void __user *arg)
{
- struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_device *device;
struct request_queue *q;
struct fstrim_range range;
@@ -544,7 +563,7 @@
return 0;
}
-int btrfs_is_empty_uuid(u8 *uuid)
+int __pure btrfs_is_empty_uuid(u8 *uuid)
{
int i;
@@ -558,7 +577,6 @@
static noinline int create_subvol(struct inode *dir,
struct dentry *dentry,
const char *name, int namelen,
- u64 *async_transid,
struct btrfs_qgroup_inherit *inherit)
{
struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
@@ -574,10 +592,10 @@
struct inode *inode;
int ret;
int err;
+ dev_t anon_dev = 0;
u64 objectid;
u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
u64 index = 0;
- uuid_le new_uuid;
root_item = kzalloc(sizeof(*root_item), GFP_KERNEL);
if (!root_item)
@@ -585,6 +603,10 @@
ret = btrfs_find_free_objectid(fs_info->tree_root, &objectid);
if (ret)
+ goto fail_free;
+
+ ret = get_anon_bdev(&anon_dev);
+ if (ret < 0)
goto fail_free;
/*
@@ -608,7 +630,7 @@
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
- btrfs_subvolume_release_metadata(fs_info, &block_rsv);
+ btrfs_subvolume_release_metadata(root, &block_rsv);
goto fail_free;
}
trans->block_rsv = &block_rsv;
@@ -618,7 +640,8 @@
if (ret)
goto fail;
- leaf = btrfs_alloc_tree_block(trans, root, 0, objectid, NULL, 0, 0, 0);
+ leaf = btrfs_alloc_tree_block(trans, root, 0, objectid, NULL, 0, 0, 0,
+ BTRFS_NESTING_NORMAL);
if (IS_ERR(leaf)) {
ret = PTR_ERR(leaf);
goto fail;
@@ -647,8 +670,7 @@
btrfs_set_root_generation_v2(root_item,
btrfs_root_generation(root_item));
- uuid_le_gen(&new_uuid);
- memcpy(root_item->uuid, new_uuid.b, BTRFS_UUID_SIZE);
+ generate_random_guid(root_item->uuid);
btrfs_set_stack_timespec_sec(&root_item->otime, cur_time.tv_sec);
btrfs_set_stack_timespec_nsec(&root_item->otime, cur_time.tv_nsec);
root_item->ctime = root_item->otime;
@@ -682,16 +704,20 @@
leaf = NULL;
key.offset = (u64)-1;
- new_root = btrfs_read_fs_root_no_name(fs_info, &key);
+ new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev);
if (IS_ERR(new_root)) {
+ free_anon_bdev(anon_dev);
ret = PTR_ERR(new_root);
btrfs_abort_transaction(trans, ret);
goto fail;
}
+ /* Freeing will be done in btrfs_put_root() of new_root */
+ anon_dev = 0;
btrfs_record_root_in_trans(trans, new_root);
ret = btrfs_create_subvol_root(trans, new_root, root, new_dirid);
+ btrfs_put_root(new_root);
if (ret) {
/* We potentially lose an unused inode item here */
btrfs_abort_transaction(trans, ret);
@@ -711,8 +737,7 @@
goto fail;
}
- ret = btrfs_insert_dir_item(trans, root,
- name, namelen, BTRFS_I(dir), &key,
+ ret = btrfs_insert_dir_item(trans, name, namelen, BTRFS_I(dir), &key,
BTRFS_FT_DIR, index);
if (ret) {
btrfs_abort_transaction(trans, ret);
@@ -742,16 +767,9 @@
kfree(root_item);
trans->block_rsv = NULL;
trans->bytes_reserved = 0;
- btrfs_subvolume_release_metadata(fs_info, &block_rsv);
+ btrfs_subvolume_release_metadata(root, &block_rsv);
- if (async_transid) {
- *async_transid = trans->transid;
- err = btrfs_commit_transaction_async(trans, 1);
- if (err)
- err = btrfs_commit_transaction(trans);
- } else {
- err = btrfs_commit_transaction(trans);
- }
+ err = btrfs_commit_transaction(trans);
if (err && !ret)
ret = err;
@@ -764,13 +782,14 @@
return ret;
fail_free:
+ if (anon_dev)
+ free_anon_bdev(anon_dev);
kfree(root_item);
return ret;
}
static int create_snapshot(struct btrfs_root *root, struct inode *dir,
- struct dentry *dentry,
- u64 *async_transid, bool readonly,
+ struct dentry *dentry, bool readonly,
struct btrfs_qgroup_inherit *inherit)
{
struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
@@ -778,15 +797,23 @@
struct btrfs_pending_snapshot *pending_snapshot;
struct btrfs_trans_handle *trans;
int ret;
- bool snapshot_force_cow = false;
- if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
+ if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
return -EINVAL;
+
+ if (atomic_read(&root->nr_swapfiles)) {
+ btrfs_warn(fs_info,
+ "cannot snapshot subvolume with active swapfile");
+ return -ETXTBSY;
+ }
pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_KERNEL);
if (!pending_snapshot)
return -ENOMEM;
+ ret = get_anon_bdev(&pending_snapshot->anon_dev);
+ if (ret < 0)
+ goto free_pending;
pending_snapshot->root_item = kzalloc(sizeof(struct btrfs_root_item),
GFP_KERNEL);
pending_snapshot->path = btrfs_alloc_path();
@@ -794,31 +821,6 @@
ret = -ENOMEM;
goto free_pending;
}
-
- /*
- * Force new buffered writes to reserve space even when NOCOW is
- * possible. This is to avoid later writeback (running dealloc) to
- * fallback to COW mode and unexpectedly fail with ENOSPC.
- */
- atomic_inc(&root->will_be_snapshotted);
- smp_mb__after_atomic();
- /* wait for no snapshot writes */
- wait_event(root->subv_writers->wait,
- percpu_counter_sum(&root->subv_writers->counter) == 0);
-
- ret = btrfs_start_delalloc_snapshot(root);
- if (ret)
- goto dec_and_free;
-
- /*
- * All previous writes have started writeback in NOCOW mode, so now
- * we force future writes to fallback to COW mode during snapshot
- * creation.
- */
- atomic_inc(&root->snapshot_force_cow);
- snapshot_force_cow = true;
-
- btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
btrfs_init_block_rsv(&pending_snapshot->block_rsv,
BTRFS_BLOCK_RSV_TEMP);
@@ -834,7 +836,7 @@
&pending_snapshot->block_rsv, 8,
false);
if (ret)
- goto dec_and_free;
+ goto free_pending;
pending_snapshot->dentry = dentry;
pending_snapshot->root = root;
@@ -852,14 +854,8 @@
list_add(&pending_snapshot->list,
&trans->transaction->pending_snapshots);
spin_unlock(&fs_info->trans_lock);
- if (async_transid) {
- *async_transid = trans->transid;
- ret = btrfs_commit_transaction_async(trans, 1);
- if (ret)
- ret = btrfs_commit_transaction(trans);
- } else {
- ret = btrfs_commit_transaction(trans);
- }
+
+ ret = btrfs_commit_transaction(trans);
if (ret)
goto fail;
@@ -879,14 +875,16 @@
d_instantiate(dentry, inode);
ret = 0;
+ pending_snapshot->anon_dev = 0;
fail:
- btrfs_subvolume_release_metadata(fs_info, &pending_snapshot->block_rsv);
-dec_and_free:
- if (snapshot_force_cow)
- atomic_dec(&root->snapshot_force_cow);
- if (atomic_dec_and_test(&root->will_be_snapshotted))
- wake_up_var(&root->will_be_snapshotted);
+ /* Prevent double freeing of anon_dev */
+ if (ret && pending_snapshot->snap)
+ pending_snapshot->snap->anon_dev = 0;
+ btrfs_put_root(pending_snapshot->snap);
+ btrfs_subvolume_release_metadata(root, &pending_snapshot->block_rsv);
free_pending:
+ if (pending_snapshot->anon_dev)
+ free_anon_bdev(pending_snapshot->anon_dev);
kfree(pending_snapshot->root_item);
btrfs_free_path(pending_snapshot->path);
kfree(pending_snapshot);
@@ -964,7 +962,7 @@
static noinline int btrfs_mksubvol(const struct path *parent,
const char *name, int namelen,
struct btrfs_root *snap_src,
- u64 *async_transid, bool readonly,
+ bool readonly,
struct btrfs_qgroup_inherit *inherit)
{
struct inode *dir = d_inode(parent->dentry);
@@ -1000,13 +998,11 @@
if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0)
goto out_up_read;
- if (snap_src) {
- error = create_snapshot(snap_src, dir, dentry,
- async_transid, readonly, inherit);
- } else {
- error = create_subvol(dir, dentry, name, namelen,
- async_transid, inherit);
- }
+ if (snap_src)
+ error = create_snapshot(snap_src, dir, dentry, readonly, inherit);
+ else
+ error = create_subvol(dir, dentry, name, namelen, inherit);
+
if (!error)
fsnotify_mkdir(dir, dentry);
out_up_read:
@@ -1016,6 +1012,45 @@
out_unlock:
inode_unlock(dir);
return error;
+}
+
+static noinline int btrfs_mksnapshot(const struct path *parent,
+ const char *name, int namelen,
+ struct btrfs_root *root,
+ bool readonly,
+ struct btrfs_qgroup_inherit *inherit)
+{
+ int ret;
+ bool snapshot_force_cow = false;
+
+ /*
+ * Force new buffered writes to reserve space even when NOCOW is
+ * possible. This is to avoid later writeback (running dealloc) to
+ * fallback to COW mode and unexpectedly fail with ENOSPC.
+ */
+ btrfs_drew_read_lock(&root->snapshot_lock);
+
+ ret = btrfs_start_delalloc_snapshot(root);
+ if (ret)
+ goto out;
+
+ /*
+ * All previous writes have started writeback in NOCOW mode, so now
+ * we force future writes to fallback to COW mode during snapshot
+ * creation.
+ */
+ atomic_inc(&root->snapshot_force_cow);
+ snapshot_force_cow = true;
+
+ btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
+
+ ret = btrfs_mksubvol(parent, name, namelen,
+ root, readonly, inherit);
+out:
+ if (snapshot_force_cow)
+ atomic_dec(&root->snapshot_force_cow);
+ btrfs_drew_read_unlock(&root->snapshot_lock);
+ return ret;
}
/*
@@ -1139,7 +1174,7 @@
/* get the big lock and read metadata off disk */
lock_extent_bits(io_tree, start, end, &cached);
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len);
unlock_extent_cached(io_tree, start, end, &cached);
if (IS_ERR(em))
@@ -1252,6 +1287,7 @@
u64 page_end;
u64 page_cnt;
u64 start = (u64)start_index << PAGE_SHIFT;
+ u64 search_start;
int ret;
int i;
int i_done;
@@ -1267,7 +1303,7 @@
page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1);
- ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
+ ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved,
start, page_cnt << PAGE_SHIFT);
if (ret)
return ret;
@@ -1288,7 +1324,7 @@
while (1) {
lock_extent_bits(tree, page_start, page_end,
&cached_state);
- ordered = btrfs_lookup_ordered_extent(inode,
+ ordered = btrfs_lookup_ordered_extent(BTRFS_I(inode),
page_start);
unlock_extent_cached(tree, page_start, page_end,
&cached_state);
@@ -1296,7 +1332,7 @@
break;
unlock_page(page);
- btrfs_start_ordered_extent(inode, ordered, 1);
+ btrfs_start_ordered_extent(ordered, 1);
btrfs_put_ordered_extent(ordered);
lock_page(page);
/*
@@ -1348,16 +1384,49 @@
lock_extent_bits(&BTRFS_I(inode)->io_tree,
page_start, page_end - 1, &cached_state);
+
+ /*
+ * When defragmenting we skip ranges that have holes or inline extents,
+ * (check should_defrag_range()), to avoid unnecessary IO and wasting
+ * space. At btrfs_defrag_file(), we check if a range should be defragged
+ * before locking the inode and then, if it should, we trigger a sync
+ * page cache readahead - we lock the inode only after that to avoid
+ * blocking for too long other tasks that possibly want to operate on
+ * other file ranges. But before we were able to get the inode lock,
+ * some other task may have punched a hole in the range, or we may have
+ * now an inline extent, in which case we should not defrag. So check
+ * for that here, where we have the inode and the range locked, and bail
+ * out if that happened.
+ */
+ search_start = page_start;
+ while (search_start < page_end) {
+ struct extent_map *em;
+
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, search_start,
+ page_end - search_start);
+ if (IS_ERR(em)) {
+ ret = PTR_ERR(em);
+ goto out_unlock_range;
+ }
+ if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
+ free_extent_map(em);
+ /* Ok, 0 means we did not defrag anything */
+ ret = 0;
+ goto out_unlock_range;
+ }
+ search_start = extent_map_end(em);
+ free_extent_map(em);
+ }
+
clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start,
- page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0,
- &cached_state);
+ page_end - 1, EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
+ EXTENT_DEFRAG, 0, 0, &cached_state);
if (i_done != page_cnt) {
spin_lock(&BTRFS_I(inode)->lock);
btrfs_mod_outstanding_extents(BTRFS_I(inode), 1);
spin_unlock(&BTRFS_I(inode)->lock);
- btrfs_delalloc_release_space(inode, data_reserved,
+ btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
start, (page_cnt - i_done) << PAGE_SHIFT, true);
}
@@ -1379,12 +1448,16 @@
btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
extent_changeset_free(data_reserved);
return i_done;
+
+out_unlock_range:
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree,
+ page_start, page_end - 1, &cached_state);
out:
for (i = 0; i < i_done; i++) {
unlock_page(pages[i]);
put_page(pages[i]);
}
- btrfs_delalloc_release_space(inode, data_reserved,
+ btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
start, page_cnt << PAGE_SHIFT, true);
btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
extent_changeset_free(data_reserved);
@@ -1424,7 +1497,7 @@
return -EINVAL;
if (do_compress) {
- if (range->compress_type > BTRFS_COMPRESS_TYPES)
+ if (range->compress_type >= BTRFS_NR_COMPRESS_TYPES)
return -EINVAL;
if (range->compress_type)
compress_type = range->compress_type;
@@ -1530,9 +1603,13 @@
}
inode_lock(inode);
- if (do_compress)
- BTRFS_I(inode)->defrag_compress = compress_type;
- ret = cluster_pages_for_defrag(inode, pages, i, cluster);
+ if (IS_SWAPFILE(inode)) {
+ ret = -ETXTBSY;
+ } else {
+ if (do_compress)
+ BTRFS_I(inode)->defrag_compress = compress_type;
+ ret = cluster_pages_for_defrag(inode, pages, i, cluster);
+ }
if (ret < 0) {
inode_unlock(inode);
goto out_ra;
@@ -1623,7 +1700,7 @@
if (ret)
return ret;
- if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
+ if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_RESIZE)) {
mnt_drop_write_file(file);
return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
}
@@ -1717,9 +1794,6 @@
new_size = round_down(new_size, fs_info->sectorsize);
- btrfs_info_in_rcu(fs_info, "new size for %s is %llu",
- rcu_str_deref(device->name), new_size);
-
if (new_size > old_size) {
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
@@ -1732,17 +1806,22 @@
ret = btrfs_shrink_device(device, new_size);
} /* equal, nothing need to do */
+ if (ret == 0 && new_size != old_size)
+ btrfs_info_in_rcu(fs_info,
+ "resize device %s (devid %llu) from %llu to %llu",
+ rcu_str_deref(device->name), device->devid,
+ old_size, new_size);
out_free:
kfree(vol_args);
out:
- clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
+ btrfs_exclop_finish(fs_info);
mnt_drop_write_file(file);
return ret;
}
-static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
+static noinline int __btrfs_ioctl_snap_create(struct file *file,
const char *name, unsigned long fd, int subvol,
- u64 *transid, bool readonly,
+ bool readonly,
struct btrfs_qgroup_inherit *inherit)
{
int namelen;
@@ -1769,7 +1848,7 @@
if (subvol) {
ret = btrfs_mksubvol(&file->f_path, name, namelen,
- NULL, transid, readonly, inherit);
+ NULL, readonly, inherit);
} else {
struct fd src = fdget(fd);
struct inode *src_inode;
@@ -1790,9 +1869,9 @@
*/
ret = -EPERM;
} else {
- ret = btrfs_mksubvol(&file->f_path, name, namelen,
+ ret = btrfs_mksnapshot(&file->f_path, name, namelen,
BTRFS_I(src_inode)->root,
- transid, readonly, inherit);
+ readonly, inherit);
}
fdput(src);
}
@@ -1816,9 +1895,8 @@
return PTR_ERR(vol_args);
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
- ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
- vol_args->fd, subvol,
- NULL, false, NULL);
+ ret = __btrfs_ioctl_snap_create(file, vol_args->name, vol_args->fd,
+ subvol, false, NULL);
kfree(vol_args);
return ret;
@@ -1829,8 +1907,6 @@
{
struct btrfs_ioctl_vol_args_v2 *vol_args;
int ret;
- u64 transid = 0;
- u64 *ptr = NULL;
bool readonly = false;
struct btrfs_qgroup_inherit *inherit = NULL;
@@ -1842,15 +1918,11 @@
return PTR_ERR(vol_args);
vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
- if (vol_args->flags &
- ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY |
- BTRFS_SUBVOL_QGROUP_INHERIT)) {
+ if (vol_args->flags & ~BTRFS_SUBVOL_CREATE_ARGS_MASK) {
ret = -EOPNOTSUPP;
goto free_args;
}
- if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC)
- ptr = &transid;
if (vol_args->flags & BTRFS_SUBVOL_RDONLY)
readonly = true;
if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) {
@@ -1882,18 +1954,10 @@
}
}
- ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
- vol_args->fd, subvol, ptr,
- readonly, inherit);
+ ret = __btrfs_ioctl_snap_create(file, vol_args->name, vol_args->fd,
+ subvol, readonly, inherit);
if (ret)
goto free_inherit;
-
- if (ptr && copy_to_user(arg +
- offsetof(struct btrfs_ioctl_vol_args_v2,
- transid),
- ptr, sizeof(*ptr)))
- ret = -EFAULT;
-
free_inherit:
kfree(inherit);
free_args:
@@ -1949,11 +2013,6 @@
if (copy_from_user(&flags, arg, sizeof(flags))) {
ret = -EFAULT;
- goto out_drop_write;
- }
-
- if (flags & BTRFS_SUBVOL_CREATE_ASYNC) {
- ret = -EINVAL;
goto out_drop_write;
}
@@ -2112,7 +2171,7 @@
* problem. Otherwise we'll fault and then copy the buffer in
* properly this next time through
*/
- if (probe_user_write(ubuf + *sk_offset, &sh, sizeof(sh))) {
+ if (copy_to_user_nofault(ubuf + *sk_offset, &sh, sizeof(sh))) {
ret = 0;
goto out;
}
@@ -2199,12 +2258,9 @@
if (sk->tree_id == 0) {
/* search the root of the inode that was passed */
- root = BTRFS_I(inode)->root;
+ root = btrfs_grab_root(BTRFS_I(inode)->root);
} else {
- key.objectid = sk->tree_id;
- key.type = BTRFS_ROOT_ITEM_KEY;
- key.offset = (u64)-1;
- root = btrfs_read_fs_root_no_name(info, &key);
+ root = btrfs_get_fs_root(info, sk->tree_id, true);
if (IS_ERR(root)) {
btrfs_free_path(path);
return PTR_ERR(root);
@@ -2238,6 +2294,7 @@
ret = 0;
err:
sk->nr_items = num_found;
+ btrfs_put_root(root);
btrfs_free_path(path);
return ret;
}
@@ -2341,12 +2398,10 @@
ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX - 1];
- key.objectid = tree_id;
- key.type = BTRFS_ROOT_ITEM_KEY;
- key.offset = (u64)-1;
- root = btrfs_read_fs_root_no_name(info, &key);
+ root = btrfs_get_fs_root(info, tree_id, true);
if (IS_ERR(root)) {
ret = PTR_ERR(root);
+ root = NULL;
goto out;
}
@@ -2397,6 +2452,7 @@
name[total_len] = '\0';
ret = 0;
out:
+ btrfs_put_root(root);
btrfs_free_path(path);
return ret;
}
@@ -2413,7 +2469,7 @@
unsigned long item_len;
struct btrfs_inode_ref *iref;
struct btrfs_root_ref *rref;
- struct btrfs_root *root;
+ struct btrfs_root *root = NULL;
struct btrfs_path *path;
struct btrfs_key key, key2;
struct extent_buffer *leaf;
@@ -2435,10 +2491,7 @@
if (dirid != upper_limit.objectid) {
ptr = &args->path[BTRFS_INO_LOOKUP_USER_PATH_MAX - 1];
- key.objectid = treeid;
- key.type = BTRFS_ROOT_ITEM_KEY;
- key.offset = (u64)-1;
- root = btrfs_read_fs_root_no_name(fs_info, &key);
+ root = btrfs_get_fs_root(fs_info, treeid, true);
if (IS_ERR(root)) {
ret = PTR_ERR(root);
goto out;
@@ -2450,15 +2503,15 @@
while (1) {
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0) {
- goto out;
+ goto out_put;
} else if (ret > 0) {
ret = btrfs_previous_item(root, path, dirid,
BTRFS_INODE_REF_KEY);
if (ret < 0) {
- goto out;
+ goto out_put;
} else if (ret > 0) {
ret = -ENOENT;
- goto out;
+ goto out_put;
}
}
@@ -2472,7 +2525,7 @@
total_len += len + 1;
if (ptr < args->path) {
ret = -ENAMETOOLONG;
- goto out;
+ goto out_put;
}
*(ptr + len) = '/';
@@ -2483,10 +2536,10 @@
ret = btrfs_previous_item(root, path, dirid,
BTRFS_INODE_ITEM_KEY);
if (ret < 0) {
- goto out;
+ goto out_put;
} else if (ret > 0) {
ret = -ENOENT;
- goto out;
+ goto out_put;
}
leaf = path->nodes[0];
@@ -2494,26 +2547,26 @@
btrfs_item_key_to_cpu(leaf, &key2, slot);
if (key2.objectid != dirid) {
ret = -ENOENT;
- goto out;
+ goto out_put;
}
- temp_inode = btrfs_iget(sb, &key2, root, NULL);
+ temp_inode = btrfs_iget(sb, key2.objectid, root);
if (IS_ERR(temp_inode)) {
ret = PTR_ERR(temp_inode);
- goto out;
+ goto out_put;
}
ret = inode_permission(temp_inode, MAY_READ | MAY_EXEC);
iput(temp_inode);
if (ret) {
ret = -EACCES;
- goto out;
+ goto out_put;
}
if (key.offset == upper_limit.objectid)
break;
if (key.objectid == BTRFS_FIRST_FREE_OBJECTID) {
ret = -EACCES;
- goto out;
+ goto out_put;
}
btrfs_release_path(path);
@@ -2524,15 +2577,16 @@
memmove(args->path, ptr, total_len);
args->path[total_len] = '\0';
+ btrfs_put_root(root);
+ root = NULL;
btrfs_release_path(path);
}
/* Get the bottom subvolume's name from ROOT_REF */
- root = fs_info->tree_root;
key.objectid = treeid;
key.type = BTRFS_ROOT_REF_KEY;
key.offset = args->treeid;
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
if (ret < 0) {
goto out;
} else if (ret > 0) {
@@ -2559,6 +2613,8 @@
read_extent_buffer(leaf, args->name, item_off, item_len);
args->name[item_len] = 0;
+out_put:
+ btrfs_put_root(root);
out:
btrfs_free_path(path);
return ret;
@@ -2681,12 +2737,10 @@
/* Get root_item of inode's subvolume */
key.objectid = BTRFS_I(inode)->root->root_key.objectid;
- key.type = BTRFS_ROOT_ITEM_KEY;
- key.offset = (u64)-1;
- root = btrfs_read_fs_root_no_name(fs_info, &key);
+ root = btrfs_get_fs_root(fs_info, key.objectid, true);
if (IS_ERR(root)) {
ret = PTR_ERR(root);
- goto out;
+ goto out_free;
}
root_item = &root->root_item;
@@ -2719,16 +2773,14 @@
if (key.objectid != BTRFS_FS_TREE_OBJECTID) {
/* Search root tree for ROOT_BACKREF of this subvolume */
- root = fs_info->tree_root;
-
key.type = BTRFS_ROOT_BACKREF_KEY;
key.offset = 0;
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
if (ret < 0) {
goto out;
} else if (path->slots[0] >=
btrfs_header_nritems(path->nodes[0])) {
- ret = btrfs_next_leaf(root, path);
+ ret = btrfs_next_leaf(fs_info->tree_root, path);
if (ret < 0) {
goto out;
} else if (ret > 0) {
@@ -2759,12 +2811,16 @@
}
}
+ btrfs_free_path(path);
+ path = NULL;
if (copy_to_user(argp, subvol_info, sizeof(*subvol_info)))
ret = -EFAULT;
out:
+ btrfs_put_root(root);
+out_free:
btrfs_free_path(path);
- kzfree(subvol_info);
+ kfree(subvol_info);
return ret;
}
@@ -2849,6 +2905,8 @@
}
out:
+ btrfs_free_path(path);
+
if (!ret || ret == -EOVERFLOW) {
rootrefs->num_items = found;
/* update min_treeid for next search */
@@ -2860,13 +2918,13 @@
}
kfree(rootrefs);
- btrfs_free_path(path);
return ret;
}
static noinline int btrfs_ioctl_snap_destroy(struct file *file,
- void __user *arg)
+ void __user *arg,
+ bool destroy_v2)
{
struct dentry *parent = file->f_path.dentry;
struct btrfs_fs_info *fs_info = btrfs_sb(parent->d_sb);
@@ -2875,34 +2933,120 @@
struct inode *inode;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_root *dest = NULL;
- struct btrfs_ioctl_vol_args *vol_args;
- int namelen;
+ struct btrfs_ioctl_vol_args *vol_args = NULL;
+ struct btrfs_ioctl_vol_args_v2 *vol_args2 = NULL;
+ char *subvol_name, *subvol_name_ptr = NULL;
+ int subvol_namelen;
int err = 0;
+ bool destroy_parent = false;
- if (!S_ISDIR(dir->i_mode))
- return -ENOTDIR;
+ if (destroy_v2) {
+ vol_args2 = memdup_user(arg, sizeof(*vol_args2));
+ if (IS_ERR(vol_args2))
+ return PTR_ERR(vol_args2);
- vol_args = memdup_user(arg, sizeof(*vol_args));
- if (IS_ERR(vol_args))
- return PTR_ERR(vol_args);
+ if (vol_args2->flags & ~BTRFS_SUBVOL_DELETE_ARGS_MASK) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
- vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
- namelen = strlen(vol_args->name);
- if (strchr(vol_args->name, '/') ||
- strncmp(vol_args->name, "..", namelen) == 0) {
- err = -EINVAL;
- goto out;
+ /*
+ * If SPEC_BY_ID is not set, we are looking for the subvolume by
+ * name, same as v1 currently does.
+ */
+ if (!(vol_args2->flags & BTRFS_SUBVOL_SPEC_BY_ID)) {
+ vol_args2->name[BTRFS_SUBVOL_NAME_MAX] = 0;
+ subvol_name = vol_args2->name;
+
+ err = mnt_want_write_file(file);
+ if (err)
+ goto out;
+ } else {
+ if (vol_args2->subvolid < BTRFS_FIRST_FREE_OBJECTID) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = mnt_want_write_file(file);
+ if (err)
+ goto out;
+
+ dentry = btrfs_get_dentry(fs_info->sb,
+ BTRFS_FIRST_FREE_OBJECTID,
+ vol_args2->subvolid, 0, 0);
+ if (IS_ERR(dentry)) {
+ err = PTR_ERR(dentry);
+ goto out_drop_write;
+ }
+
+ /*
+ * Change the default parent since the subvolume being
+ * deleted can be outside of the current mount point.
+ */
+ parent = btrfs_get_parent(dentry);
+
+ /*
+ * At this point dentry->d_name can point to '/' if the
+ * subvolume we want to destroy is outsite of the
+ * current mount point, so we need to release the
+ * current dentry and execute the lookup to return a new
+ * one with ->d_name pointing to the
+ * <mount point>/subvol_name.
+ */
+ dput(dentry);
+ if (IS_ERR(parent)) {
+ err = PTR_ERR(parent);
+ goto out_drop_write;
+ }
+ dir = d_inode(parent);
+
+ /*
+ * If v2 was used with SPEC_BY_ID, a new parent was
+ * allocated since the subvolume can be outside of the
+ * current mount point. Later on we need to release this
+ * new parent dentry.
+ */
+ destroy_parent = true;
+
+ subvol_name_ptr = btrfs_get_subvol_name_from_objectid(
+ fs_info, vol_args2->subvolid);
+ if (IS_ERR(subvol_name_ptr)) {
+ err = PTR_ERR(subvol_name_ptr);
+ goto free_parent;
+ }
+ /* subvol_name_ptr is already NULL termined */
+ subvol_name = (char *)kbasename(subvol_name_ptr);
+ }
+ } else {
+ vol_args = memdup_user(arg, sizeof(*vol_args));
+ if (IS_ERR(vol_args))
+ return PTR_ERR(vol_args);
+
+ vol_args->name[BTRFS_PATH_NAME_MAX] = 0;
+ subvol_name = vol_args->name;
+
+ err = mnt_want_write_file(file);
+ if (err)
+ goto out;
}
- err = mnt_want_write_file(file);
- if (err)
- goto out;
+ subvol_namelen = strlen(subvol_name);
+ if (strchr(subvol_name, '/') ||
+ strncmp(subvol_name, "..", subvol_namelen) == 0) {
+ err = -EINVAL;
+ goto free_subvol_name;
+ }
+
+ if (!S_ISDIR(dir->i_mode)) {
+ err = -ENOTDIR;
+ goto free_subvol_name;
+ }
err = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
if (err == -EINTR)
- goto out_drop_write;
- dentry = lookup_one_len(vol_args->name, parent, namelen);
+ goto free_subvol_name;
+ dentry = lookup_one_len(subvol_name, parent, subvol_namelen);
if (IS_ERR(dentry)) {
err = PTR_ERR(dentry);
goto out_unlock_dir;
@@ -2963,15 +3107,21 @@
err = btrfs_delete_subvolume(dir, dentry);
inode_unlock(inode);
if (!err)
- d_delete(dentry);
+ d_delete_notify(dir, dentry);
out_dput:
dput(dentry);
out_unlock_dir:
inode_unlock(dir);
+free_subvol_name:
+ kfree(subvol_name_ptr);
+free_parent:
+ if (destroy_parent)
+ dput(parent);
out_drop_write:
mnt_drop_write_file(file);
out:
+ kfree(vol_args2);
kfree(vol_args);
return err;
}
@@ -3056,7 +3206,7 @@
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags))
+ if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_DEV_ADD))
return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
vol_args = memdup_user(arg, sizeof(*vol_args));
@@ -3073,7 +3223,7 @@
kfree(vol_args);
out:
- clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
+ btrfs_exclop_finish(fs_info);
return ret;
}
@@ -3097,13 +3247,12 @@
goto err_drop;
}
- /* Check for compatibility reject unknown flags */
- if (vol_args->flags & ~BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED) {
+ if (vol_args->flags & ~BTRFS_DEVICE_REMOVE_ARGS_MASK) {
ret = -EOPNOTSUPP;
goto out;
}
- if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
+ if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_DEV_REMOVE)) {
ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
goto out;
}
@@ -3114,7 +3263,7 @@
vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
ret = btrfs_rm_device(fs_info, vol_args->name, 0);
}
- clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
+ btrfs_exclop_finish(fs_info);
if (!ret) {
if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID)
@@ -3145,7 +3294,7 @@
if (ret)
return ret;
- if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
+ if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_DEV_REMOVE)) {
ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
goto out_drop_write;
}
@@ -3163,7 +3312,7 @@
btrfs_info(fs_info, "disk deleted %s", vol_args->name);
kfree(vol_args);
out:
- clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
+ btrfs_exclop_finish(fs_info);
out_drop_write:
mnt_drop_write_file(file);
@@ -3176,11 +3325,15 @@
struct btrfs_ioctl_fs_info_args *fi_args;
struct btrfs_device *device;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+ u64 flags_in;
int ret = 0;
- fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL);
- if (!fi_args)
- return -ENOMEM;
+ fi_args = memdup_user(arg, sizeof(*fi_args));
+ if (IS_ERR(fi_args))
+ return PTR_ERR(fi_args);
+
+ flags_in = fi_args->flags;
+ memset(fi_args, 0, sizeof(*fi_args));
rcu_read_lock();
fi_args->num_devices = fs_devices->num_devices;
@@ -3191,10 +3344,27 @@
}
rcu_read_unlock();
- memcpy(&fi_args->fsid, fs_info->fsid, sizeof(fi_args->fsid));
+ memcpy(&fi_args->fsid, fs_devices->fsid, sizeof(fi_args->fsid));
fi_args->nodesize = fs_info->nodesize;
fi_args->sectorsize = fs_info->sectorsize;
fi_args->clone_alignment = fs_info->sectorsize;
+
+ if (flags_in & BTRFS_FS_INFO_FLAG_CSUM_INFO) {
+ fi_args->csum_type = btrfs_super_csum_type(fs_info->super_copy);
+ fi_args->csum_size = btrfs_super_csum_size(fs_info->super_copy);
+ fi_args->flags |= BTRFS_FS_INFO_FLAG_CSUM_INFO;
+ }
+
+ if (flags_in & BTRFS_FS_INFO_FLAG_GENERATION) {
+ fi_args->generation = fs_info->generation;
+ fi_args->flags |= BTRFS_FS_INFO_FLAG_GENERATION;
+ }
+
+ if (flags_in & BTRFS_FS_INFO_FLAG_METADATA_UUID) {
+ memcpy(&fi_args->metadata_uuid, fs_devices->metadata_uuid,
+ sizeof(fi_args->metadata_uuid));
+ fi_args->flags |= BTRFS_FS_INFO_FLAG_METADATA_UUID;
+ }
if (copy_to_user(arg, fi_args, sizeof(*fi_args)))
ret = -EFAULT;
@@ -3248,1183 +3418,6 @@
return ret;
}
-static struct page *extent_same_get_page(struct inode *inode, pgoff_t index)
-{
- struct page *page;
-
- page = grab_cache_page(inode->i_mapping, index);
- if (!page)
- return ERR_PTR(-ENOMEM);
-
- if (!PageUptodate(page)) {
- int ret;
-
- ret = btrfs_readpage(NULL, page);
- if (ret)
- return ERR_PTR(ret);
- lock_page(page);
- if (!PageUptodate(page)) {
- unlock_page(page);
- put_page(page);
- return ERR_PTR(-EIO);
- }
- if (page->mapping != inode->i_mapping) {
- unlock_page(page);
- put_page(page);
- return ERR_PTR(-EAGAIN);
- }
- }
-
- return page;
-}
-
-static int gather_extent_pages(struct inode *inode, struct page **pages,
- int num_pages, u64 off)
-{
- int i;
- pgoff_t index = off >> PAGE_SHIFT;
-
- for (i = 0; i < num_pages; i++) {
-again:
- pages[i] = extent_same_get_page(inode, index + i);
- if (IS_ERR(pages[i])) {
- int err = PTR_ERR(pages[i]);
-
- if (err == -EAGAIN)
- goto again;
- pages[i] = NULL;
- return err;
- }
- }
- return 0;
-}
-
-static int lock_extent_range(struct inode *inode, u64 off, u64 len,
- bool retry_range_locking)
-{
- /*
- * Do any pending delalloc/csum calculations on inode, one way or
- * another, and lock file content.
- * The locking order is:
- *
- * 1) pages
- * 2) range in the inode's io tree
- */
- while (1) {
- struct btrfs_ordered_extent *ordered;
- lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
- ordered = btrfs_lookup_first_ordered_extent(inode,
- off + len - 1);
- if ((!ordered ||
- ordered->file_offset + ordered->len <= off ||
- ordered->file_offset >= off + len) &&
- !test_range_bit(&BTRFS_I(inode)->io_tree, off,
- off + len - 1, EXTENT_DELALLOC, 0, NULL)) {
- if (ordered)
- btrfs_put_ordered_extent(ordered);
- break;
- }
- unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
- if (ordered)
- btrfs_put_ordered_extent(ordered);
- if (!retry_range_locking)
- return -EAGAIN;
- btrfs_wait_ordered_range(inode, off, len);
- }
- return 0;
-}
-
-static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2)
-{
- inode_unlock(inode1);
- inode_unlock(inode2);
-}
-
-static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2)
-{
- if (inode1 < inode2)
- swap(inode1, inode2);
-
- inode_lock_nested(inode1, I_MUTEX_PARENT);
- inode_lock_nested(inode2, I_MUTEX_CHILD);
-}
-
-static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
- struct inode *inode2, u64 loff2, u64 len)
-{
- unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
- unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
-}
-
-static int btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
- struct inode *inode2, u64 loff2, u64 len,
- bool retry_range_locking)
-{
- int ret;
-
- if (inode1 < inode2) {
- swap(inode1, inode2);
- swap(loff1, loff2);
- }
- ret = lock_extent_range(inode1, loff1, len, retry_range_locking);
- if (ret)
- return ret;
- ret = lock_extent_range(inode2, loff2, len, retry_range_locking);
- if (ret)
- unlock_extent(&BTRFS_I(inode1)->io_tree, loff1,
- loff1 + len - 1);
- return ret;
-}
-
-struct cmp_pages {
- int num_pages;
- struct page **src_pages;
- struct page **dst_pages;
-};
-
-static void btrfs_cmp_data_free(struct cmp_pages *cmp)
-{
- int i;
- struct page *pg;
-
- for (i = 0; i < cmp->num_pages; i++) {
- pg = cmp->src_pages[i];
- if (pg) {
- unlock_page(pg);
- put_page(pg);
- cmp->src_pages[i] = NULL;
- }
- pg = cmp->dst_pages[i];
- if (pg) {
- unlock_page(pg);
- put_page(pg);
- cmp->dst_pages[i] = NULL;
- }
- }
-}
-
-static int btrfs_cmp_data_prepare(struct inode *src, u64 loff,
- struct inode *dst, u64 dst_loff,
- u64 len, struct cmp_pages *cmp)
-{
- int ret;
- int num_pages = PAGE_ALIGN(len) >> PAGE_SHIFT;
-
- cmp->num_pages = num_pages;
-
- ret = gather_extent_pages(src, cmp->src_pages, num_pages, loff);
- if (ret)
- goto out;
-
- ret = gather_extent_pages(dst, cmp->dst_pages, num_pages, dst_loff);
-
-out:
- if (ret)
- btrfs_cmp_data_free(cmp);
- return ret;
-}
-
-static int btrfs_cmp_data(u64 len, struct cmp_pages *cmp)
-{
- int ret = 0;
- int i;
- struct page *src_page, *dst_page;
- unsigned int cmp_len = PAGE_SIZE;
- void *addr, *dst_addr;
-
- i = 0;
- while (len) {
- if (len < PAGE_SIZE)
- cmp_len = len;
-
- BUG_ON(i >= cmp->num_pages);
-
- src_page = cmp->src_pages[i];
- dst_page = cmp->dst_pages[i];
- ASSERT(PageLocked(src_page));
- ASSERT(PageLocked(dst_page));
-
- addr = kmap_atomic(src_page);
- dst_addr = kmap_atomic(dst_page);
-
- flush_dcache_page(src_page);
- flush_dcache_page(dst_page);
-
- if (memcmp(addr, dst_addr, cmp_len))
- ret = -EBADE;
-
- kunmap_atomic(addr);
- kunmap_atomic(dst_addr);
-
- if (ret)
- break;
-
- len -= cmp_len;
- i++;
- }
-
- return ret;
-}
-
-static int extent_same_check_offsets(struct inode *inode, u64 off, u64 *plen,
- u64 olen)
-{
- u64 len = *plen;
- u64 bs = BTRFS_I(inode)->root->fs_info->sb->s_blocksize;
-
- if (off + olen > inode->i_size || off + olen < off)
- return -EINVAL;
-
- /* if we extend to eof, continue to block boundary */
- if (off + len == inode->i_size)
- *plen = len = ALIGN(inode->i_size, bs) - off;
-
- /* Check that we are block aligned - btrfs_clone() requires this */
- if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs))
- return -EINVAL;
-
- return 0;
-}
-
-static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen,
- struct inode *dst, u64 dst_loff,
- struct cmp_pages *cmp)
-{
- int ret;
- u64 len = olen;
- bool same_inode = (src == dst);
- u64 same_lock_start = 0;
- u64 same_lock_len = 0;
-
- ret = extent_same_check_offsets(src, loff, &len, olen);
- if (ret)
- return ret;
-
- ret = extent_same_check_offsets(dst, dst_loff, &len, olen);
- if (ret)
- return ret;
-
- if (same_inode) {
- /*
- * Single inode case wants the same checks, except we
- * don't want our length pushed out past i_size as
- * comparing that data range makes no sense.
- *
- * extent_same_check_offsets() will do this for an
- * unaligned length at i_size, so catch it here and
- * reject the request.
- *
- * This effectively means we require aligned extents
- * for the single-inode case, whereas the other cases
- * allow an unaligned length so long as it ends at
- * i_size.
- */
- if (len != olen)
- return -EINVAL;
-
- /* Check for overlapping ranges */
- if (dst_loff + len > loff && dst_loff < loff + len)
- return -EINVAL;
-
- same_lock_start = min_t(u64, loff, dst_loff);
- same_lock_len = max_t(u64, loff, dst_loff) + len - same_lock_start;
- } else {
- /*
- * If the source and destination inodes are different, the
- * source's range end offset matches the source's i_size, that
- * i_size is not a multiple of the sector size, and the
- * destination range does not go past the destination's i_size,
- * we must round down the length to the nearest sector size
- * multiple. If we don't do this adjustment we end replacing
- * with zeroes the bytes in the range that starts at the
- * deduplication range's end offset and ends at the next sector
- * size multiple.
- */
- if (loff + olen == i_size_read(src) &&
- dst_loff + len < i_size_read(dst)) {
- const u64 sz = BTRFS_I(src)->root->fs_info->sectorsize;
-
- len = round_down(i_size_read(src), sz) - loff;
- if (len == 0)
- return 0;
- olen = len;
- }
- }
-
-again:
- ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, cmp);
- if (ret)
- return ret;
-
- if (same_inode)
- ret = lock_extent_range(src, same_lock_start, same_lock_len,
- false);
- else
- ret = btrfs_double_extent_lock(src, loff, dst, dst_loff, len,
- false);
- /*
- * If one of the inodes has dirty pages in the respective range or
- * ordered extents, we need to flush dellaloc and wait for all ordered
- * extents in the range. We must unlock the pages and the ranges in the
- * io trees to avoid deadlocks when flushing delalloc (requires locking
- * pages) and when waiting for ordered extents to complete (they require
- * range locking).
- */
- if (ret == -EAGAIN) {
- /*
- * Ranges in the io trees already unlocked. Now unlock all
- * pages before waiting for all IO to complete.
- */
- btrfs_cmp_data_free(cmp);
- if (same_inode) {
- btrfs_wait_ordered_range(src, same_lock_start,
- same_lock_len);
- } else {
- btrfs_wait_ordered_range(src, loff, len);
- btrfs_wait_ordered_range(dst, dst_loff, len);
- }
- goto again;
- }
- ASSERT(ret == 0);
- if (WARN_ON(ret)) {
- /* ranges in the io trees already unlocked */
- btrfs_cmp_data_free(cmp);
- return ret;
- }
-
- /* pass original length for comparison so we stay within i_size */
- ret = btrfs_cmp_data(olen, cmp);
- if (ret == 0)
- ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1);
-
- if (same_inode)
- unlock_extent(&BTRFS_I(src)->io_tree, same_lock_start,
- same_lock_start + same_lock_len - 1);
- else
- btrfs_double_extent_unlock(src, loff, dst, dst_loff, len);
-
- btrfs_cmp_data_free(cmp);
-
- return ret;
-}
-
-#define BTRFS_MAX_DEDUPE_LEN SZ_16M
-
-static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
- struct inode *dst, u64 dst_loff)
-{
- int ret;
- struct cmp_pages cmp;
- int num_pages = PAGE_ALIGN(BTRFS_MAX_DEDUPE_LEN) >> PAGE_SHIFT;
- bool same_inode = (src == dst);
- u64 i, tail_len, chunk_count;
-
- if (olen == 0)
- return 0;
-
- if (same_inode)
- inode_lock(src);
- else
- btrfs_double_inode_lock(src, dst);
-
- /* don't make the dst file partly checksummed */
- if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
- (BTRFS_I(dst)->flags & BTRFS_INODE_NODATASUM)) {
- ret = -EINVAL;
- goto out_unlock;
- }
-
- tail_len = olen % BTRFS_MAX_DEDUPE_LEN;
- chunk_count = div_u64(olen, BTRFS_MAX_DEDUPE_LEN);
- if (chunk_count == 0)
- num_pages = PAGE_ALIGN(tail_len) >> PAGE_SHIFT;
-
- /*
- * If deduping ranges in the same inode, locking rules make it
- * mandatory to always lock pages in ascending order to avoid deadlocks
- * with concurrent tasks (such as starting writeback/delalloc).
- */
- if (same_inode && dst_loff < loff)
- swap(loff, dst_loff);
-
- /*
- * We must gather up all the pages before we initiate our extent
- * locking. We use an array for the page pointers. Size of the array is
- * bounded by len, which is in turn bounded by BTRFS_MAX_DEDUPE_LEN.
- */
- cmp.src_pages = kvmalloc_array(num_pages, sizeof(struct page *),
- GFP_KERNEL | __GFP_ZERO);
- cmp.dst_pages = kvmalloc_array(num_pages, sizeof(struct page *),
- GFP_KERNEL | __GFP_ZERO);
- if (!cmp.src_pages || !cmp.dst_pages) {
- ret = -ENOMEM;
- goto out_free;
- }
-
- for (i = 0; i < chunk_count; i++) {
- ret = btrfs_extent_same_range(src, loff, BTRFS_MAX_DEDUPE_LEN,
- dst, dst_loff, &cmp);
- if (ret)
- goto out_free;
-
- loff += BTRFS_MAX_DEDUPE_LEN;
- dst_loff += BTRFS_MAX_DEDUPE_LEN;
- }
-
- if (tail_len > 0)
- ret = btrfs_extent_same_range(src, loff, tail_len, dst,
- dst_loff, &cmp);
-
-out_free:
- kvfree(cmp.src_pages);
- kvfree(cmp.dst_pages);
-
-out_unlock:
- if (same_inode)
- inode_unlock(src);
- else
- btrfs_double_inode_unlock(src, dst);
-
- return ret;
-}
-
-int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff,
- struct file *dst_file, loff_t dst_loff,
- u64 olen)
-{
- struct inode *src = file_inode(src_file);
- struct inode *dst = file_inode(dst_file);
- u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
-
- if (WARN_ON_ONCE(bs < PAGE_SIZE)) {
- /*
- * Btrfs does not support blocksize < page_size. As a
- * result, btrfs_cmp_data() won't correctly handle
- * this situation without an update.
- */
- return -EINVAL;
- }
-
- return btrfs_extent_same(src, src_loff, olen, dst, dst_loff);
-}
-
-static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
- struct inode *inode,
- u64 endoff,
- const u64 destoff,
- const u64 olen,
- int no_time_update)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- int ret;
-
- inode_inc_iversion(inode);
- if (!no_time_update)
- inode->i_mtime = inode->i_ctime = current_time(inode);
- /*
- * We round up to the block size at eof when determining which
- * extents to clone above, but shouldn't round up the file size.
- */
- if (endoff > destoff + olen)
- endoff = destoff + olen;
- if (endoff > inode->i_size)
- btrfs_i_size_write(BTRFS_I(inode), endoff);
-
- ret = btrfs_update_inode(trans, root, inode);
- if (ret) {
- btrfs_abort_transaction(trans, ret);
- btrfs_end_transaction(trans);
- goto out;
- }
- ret = btrfs_end_transaction(trans);
-out:
- return ret;
-}
-
-static void clone_update_extent_map(struct btrfs_inode *inode,
- const struct btrfs_trans_handle *trans,
- const struct btrfs_path *path,
- const u64 hole_offset,
- const u64 hole_len)
-{
- struct extent_map_tree *em_tree = &inode->extent_tree;
- struct extent_map *em;
- int ret;
-
- em = alloc_extent_map();
- if (!em) {
- set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
- return;
- }
-
- if (path) {
- struct btrfs_file_extent_item *fi;
-
- fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
- struct btrfs_file_extent_item);
- btrfs_extent_item_to_extent_map(inode, path, fi, false, em);
- em->generation = -1;
- if (btrfs_file_extent_type(path->nodes[0], fi) ==
- BTRFS_FILE_EXTENT_INLINE)
- set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
- &inode->runtime_flags);
- } else {
- em->start = hole_offset;
- em->len = hole_len;
- em->ram_bytes = em->len;
- em->orig_start = hole_offset;
- em->block_start = EXTENT_MAP_HOLE;
- em->block_len = 0;
- em->orig_block_len = 0;
- em->compress_type = BTRFS_COMPRESS_NONE;
- em->generation = trans->transid;
- }
-
- while (1) {
- write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em, 1);
- write_unlock(&em_tree->lock);
- if (ret != -EEXIST) {
- free_extent_map(em);
- break;
- }
- btrfs_drop_extent_cache(inode, em->start,
- em->start + em->len - 1, 0);
- }
-
- if (ret)
- set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
-}
-
-/*
- * Make sure we do not end up inserting an inline extent into a file that has
- * already other (non-inline) extents. If a file has an inline extent it can
- * not have any other extents and the (single) inline extent must start at the
- * file offset 0. Failing to respect these rules will lead to file corruption,
- * resulting in EIO errors on read/write operations, hitting BUG_ON's in mm, etc
- *
- * We can have extents that have been already written to disk or we can have
- * dirty ranges still in delalloc, in which case the extent maps and items are
- * created only when we run delalloc, and the delalloc ranges might fall outside
- * the range we are currently locking in the inode's io tree. So we check the
- * inode's i_size because of that (i_size updates are done while holding the
- * i_mutex, which we are holding here).
- * We also check to see if the inode has a size not greater than "datal" but has
- * extents beyond it, due to an fallocate with FALLOC_FL_KEEP_SIZE (and we are
- * protected against such concurrent fallocate calls by the i_mutex).
- *
- * If the file has no extents but a size greater than datal, do not allow the
- * copy because we would need turn the inline extent into a non-inline one (even
- * with NO_HOLES enabled). If we find our destination inode only has one inline
- * extent, just overwrite it with the source inline extent if its size is less
- * than the source extent's size, or we could copy the source inline extent's
- * data into the destination inode's inline extent if the later is greater then
- * the former.
- */
-static int clone_copy_inline_extent(struct inode *dst,
- struct btrfs_trans_handle *trans,
- struct btrfs_path *path,
- struct btrfs_key *new_key,
- const u64 drop_start,
- const u64 datal,
- const u64 skip,
- const u64 size,
- char *inline_data)
-{
- struct btrfs_fs_info *fs_info = btrfs_sb(dst->i_sb);
- struct btrfs_root *root = BTRFS_I(dst)->root;
- const u64 aligned_end = ALIGN(new_key->offset + datal,
- fs_info->sectorsize);
- int ret;
- struct btrfs_key key;
-
- if (new_key->offset > 0)
- return -EOPNOTSUPP;
-
- key.objectid = btrfs_ino(BTRFS_I(dst));
- key.type = BTRFS_EXTENT_DATA_KEY;
- key.offset = 0;
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0) {
- return ret;
- } else if (ret > 0) {
- if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
- ret = btrfs_next_leaf(root, path);
- if (ret < 0)
- return ret;
- else if (ret > 0)
- goto copy_inline_extent;
- }
- btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
- if (key.objectid == btrfs_ino(BTRFS_I(dst)) &&
- key.type == BTRFS_EXTENT_DATA_KEY) {
- ASSERT(key.offset > 0);
- return -EOPNOTSUPP;
- }
- } else if (i_size_read(dst) <= datal) {
- struct btrfs_file_extent_item *ei;
- u64 ext_len;
-
- /*
- * If the file size is <= datal, make sure there are no other
- * extents following (can happen do to an fallocate call with
- * the flag FALLOC_FL_KEEP_SIZE).
- */
- ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
- struct btrfs_file_extent_item);
- /*
- * If it's an inline extent, it can not have other extents
- * following it.
- */
- if (btrfs_file_extent_type(path->nodes[0], ei) ==
- BTRFS_FILE_EXTENT_INLINE)
- goto copy_inline_extent;
-
- ext_len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
- if (ext_len > aligned_end)
- return -EOPNOTSUPP;
-
- ret = btrfs_next_item(root, path);
- if (ret < 0) {
- return ret;
- } else if (ret == 0) {
- btrfs_item_key_to_cpu(path->nodes[0], &key,
- path->slots[0]);
- if (key.objectid == btrfs_ino(BTRFS_I(dst)) &&
- key.type == BTRFS_EXTENT_DATA_KEY)
- return -EOPNOTSUPP;
- }
- }
-
-copy_inline_extent:
- /*
- * We have no extent items, or we have an extent at offset 0 which may
- * or may not be inlined. All these cases are dealt the same way.
- */
- if (i_size_read(dst) > datal) {
- /*
- * If the destination inode has an inline extent...
- * This would require copying the data from the source inline
- * extent into the beginning of the destination's inline extent.
- * But this is really complex, both extents can be compressed
- * or just one of them, which would require decompressing and
- * re-compressing data (which could increase the new compressed
- * size, not allowing the compressed data to fit anymore in an
- * inline extent).
- * So just don't support this case for now (it should be rare,
- * we are not really saving space when cloning inline extents).
- */
- return -EOPNOTSUPP;
- }
-
- btrfs_release_path(path);
- ret = btrfs_drop_extents(trans, root, dst, drop_start, aligned_end, 1);
- if (ret)
- return ret;
- ret = btrfs_insert_empty_item(trans, root, path, new_key, size);
- if (ret)
- return ret;
-
- if (skip) {
- const u32 start = btrfs_file_extent_calc_inline_size(0);
-
- memmove(inline_data + start, inline_data + start + skip, datal);
- }
-
- write_extent_buffer(path->nodes[0], inline_data,
- btrfs_item_ptr_offset(path->nodes[0],
- path->slots[0]),
- size);
- inode_add_bytes(dst, datal);
-
- return 0;
-}
-
-/**
- * btrfs_clone() - clone a range from inode file to another
- *
- * @src: Inode to clone from
- * @inode: Inode to clone to
- * @off: Offset within source to start clone from
- * @olen: Original length, passed by user, of range to clone
- * @olen_aligned: Block-aligned value of olen
- * @destoff: Offset within @inode to start clone
- * @no_time_update: Whether to update mtime/ctime on the target inode
- */
-static int btrfs_clone(struct inode *src, struct inode *inode,
- const u64 off, const u64 olen, const u64 olen_aligned,
- const u64 destoff, int no_time_update)
-{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_path *path = NULL;
- struct extent_buffer *leaf;
- struct btrfs_trans_handle *trans;
- char *buf = NULL;
- struct btrfs_key key;
- u32 nritems;
- int slot;
- int ret;
- const u64 len = olen_aligned;
- u64 last_dest_end = destoff;
-
- ret = -ENOMEM;
- buf = kvmalloc(fs_info->nodesize, GFP_KERNEL);
- if (!buf)
- return ret;
-
- path = btrfs_alloc_path();
- if (!path) {
- kvfree(buf);
- return ret;
- }
-
- path->reada = READA_FORWARD;
- /* clone data */
- key.objectid = btrfs_ino(BTRFS_I(src));
- key.type = BTRFS_EXTENT_DATA_KEY;
- key.offset = off;
-
- while (1) {
- u64 next_key_min_offset = key.offset + 1;
-
- /*
- * note the key will change type as we walk through the
- * tree.
- */
- path->leave_spinning = 1;
- ret = btrfs_search_slot(NULL, BTRFS_I(src)->root, &key, path,
- 0, 0);
- if (ret < 0)
- goto out;
- /*
- * First search, if no extent item that starts at offset off was
- * found but the previous item is an extent item, it's possible
- * it might overlap our target range, therefore process it.
- */
- if (key.offset == off && ret > 0 && path->slots[0] > 0) {
- btrfs_item_key_to_cpu(path->nodes[0], &key,
- path->slots[0] - 1);
- if (key.type == BTRFS_EXTENT_DATA_KEY)
- path->slots[0]--;
- }
-
- nritems = btrfs_header_nritems(path->nodes[0]);
-process_slot:
- if (path->slots[0] >= nritems) {
- ret = btrfs_next_leaf(BTRFS_I(src)->root, path);
- if (ret < 0)
- goto out;
- if (ret > 0)
- break;
- nritems = btrfs_header_nritems(path->nodes[0]);
- }
- leaf = path->nodes[0];
- slot = path->slots[0];
-
- btrfs_item_key_to_cpu(leaf, &key, slot);
- if (key.type > BTRFS_EXTENT_DATA_KEY ||
- key.objectid != btrfs_ino(BTRFS_I(src)))
- break;
-
- if (key.type == BTRFS_EXTENT_DATA_KEY) {
- struct btrfs_file_extent_item *extent;
- int type;
- u32 size;
- struct btrfs_key new_key;
- u64 disko = 0, diskl = 0;
- u64 datao = 0, datal = 0;
- u8 comp;
- u64 drop_start;
-
- extent = btrfs_item_ptr(leaf, slot,
- struct btrfs_file_extent_item);
- comp = btrfs_file_extent_compression(leaf, extent);
- type = btrfs_file_extent_type(leaf, extent);
- if (type == BTRFS_FILE_EXTENT_REG ||
- type == BTRFS_FILE_EXTENT_PREALLOC) {
- disko = btrfs_file_extent_disk_bytenr(leaf,
- extent);
- diskl = btrfs_file_extent_disk_num_bytes(leaf,
- extent);
- datao = btrfs_file_extent_offset(leaf, extent);
- datal = btrfs_file_extent_num_bytes(leaf,
- extent);
- } else if (type == BTRFS_FILE_EXTENT_INLINE) {
- /* take upper bound, may be compressed */
- datal = btrfs_file_extent_ram_bytes(leaf,
- extent);
- }
-
- /*
- * The first search might have left us at an extent
- * item that ends before our target range's start, can
- * happen if we have holes and NO_HOLES feature enabled.
- */
- if (key.offset + datal <= off) {
- path->slots[0]++;
- goto process_slot;
- } else if (key.offset >= off + len) {
- break;
- }
- next_key_min_offset = key.offset + datal;
- size = btrfs_item_size_nr(leaf, slot);
- read_extent_buffer(leaf, buf,
- btrfs_item_ptr_offset(leaf, slot),
- size);
-
- btrfs_release_path(path);
- path->leave_spinning = 0;
-
- memcpy(&new_key, &key, sizeof(new_key));
- new_key.objectid = btrfs_ino(BTRFS_I(inode));
- if (off <= key.offset)
- new_key.offset = key.offset + destoff - off;
- else
- new_key.offset = destoff;
-
- /*
- * Deal with a hole that doesn't have an extent item
- * that represents it (NO_HOLES feature enabled).
- * This hole is either in the middle of the cloning
- * range or at the beginning (fully overlaps it or
- * partially overlaps it).
- */
- if (new_key.offset != last_dest_end)
- drop_start = last_dest_end;
- else
- drop_start = new_key.offset;
-
- /*
- * 1 - adjusting old extent (we may have to split it)
- * 1 - add new extent
- * 1 - inode update
- */
- trans = btrfs_start_transaction(root, 3);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- goto out;
- }
-
- if (type == BTRFS_FILE_EXTENT_REG ||
- type == BTRFS_FILE_EXTENT_PREALLOC) {
- /*
- * a | --- range to clone ---| b
- * | ------------- extent ------------- |
- */
-
- /* subtract range b */
- if (key.offset + datal > off + len)
- datal = off + len - key.offset;
-
- /* subtract range a */
- if (off > key.offset) {
- datao += off - key.offset;
- datal -= off - key.offset;
- }
-
- ret = btrfs_drop_extents(trans, root, inode,
- drop_start,
- new_key.offset + datal,
- 1);
- if (ret) {
- if (ret != -EOPNOTSUPP)
- btrfs_abort_transaction(trans,
- ret);
- btrfs_end_transaction(trans);
- goto out;
- }
-
- ret = btrfs_insert_empty_item(trans, root, path,
- &new_key, size);
- if (ret) {
- btrfs_abort_transaction(trans, ret);
- btrfs_end_transaction(trans);
- goto out;
- }
-
- leaf = path->nodes[0];
- slot = path->slots[0];
- write_extent_buffer(leaf, buf,
- btrfs_item_ptr_offset(leaf, slot),
- size);
-
- extent = btrfs_item_ptr(leaf, slot,
- struct btrfs_file_extent_item);
-
- /* disko == 0 means it's a hole */
- if (!disko)
- datao = 0;
-
- btrfs_set_file_extent_offset(leaf, extent,
- datao);
- btrfs_set_file_extent_num_bytes(leaf, extent,
- datal);
-
- if (disko) {
- inode_add_bytes(inode, datal);
- ret = btrfs_inc_extent_ref(trans,
- root,
- disko, diskl, 0,
- root->root_key.objectid,
- btrfs_ino(BTRFS_I(inode)),
- new_key.offset - datao);
- if (ret) {
- btrfs_abort_transaction(trans,
- ret);
- btrfs_end_transaction(trans);
- goto out;
-
- }
- }
- } else if (type == BTRFS_FILE_EXTENT_INLINE) {
- u64 skip = 0;
- u64 trim = 0;
-
- if (off > key.offset) {
- skip = off - key.offset;
- new_key.offset += skip;
- }
-
- if (key.offset + datal > off + len)
- trim = key.offset + datal - (off + len);
-
- if (comp && (skip || trim)) {
- ret = -EINVAL;
- btrfs_end_transaction(trans);
- goto out;
- }
- size -= skip + trim;
- datal -= skip + trim;
-
- ret = clone_copy_inline_extent(inode,
- trans, path,
- &new_key,
- drop_start,
- datal,
- skip, size, buf);
- if (ret) {
- if (ret != -EOPNOTSUPP)
- btrfs_abort_transaction(trans,
- ret);
- btrfs_end_transaction(trans);
- goto out;
- }
- leaf = path->nodes[0];
- slot = path->slots[0];
- }
-
- /* If we have an implicit hole (NO_HOLES feature). */
- if (drop_start < new_key.offset)
- clone_update_extent_map(BTRFS_I(inode), trans,
- NULL, drop_start,
- new_key.offset - drop_start);
-
- clone_update_extent_map(BTRFS_I(inode), trans,
- path, 0, 0);
-
- btrfs_mark_buffer_dirty(leaf);
- btrfs_release_path(path);
-
- last_dest_end = ALIGN(new_key.offset + datal,
- fs_info->sectorsize);
- ret = clone_finish_inode_update(trans, inode,
- last_dest_end,
- destoff, olen,
- no_time_update);
- if (ret)
- goto out;
- if (new_key.offset + datal >= destoff + len)
- break;
- }
- btrfs_release_path(path);
- key.offset = next_key_min_offset;
-
- if (fatal_signal_pending(current)) {
- ret = -EINTR;
- goto out;
- }
-
- cond_resched();
- }
- ret = 0;
-
- if (last_dest_end < destoff + len) {
- /*
- * We have an implicit hole (NO_HOLES feature is enabled) that
- * fully or partially overlaps our cloning range at its end.
- */
- btrfs_release_path(path);
-
- /*
- * 1 - remove extent(s)
- * 1 - inode update
- */
- trans = btrfs_start_transaction(root, 2);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- goto out;
- }
- ret = btrfs_drop_extents(trans, root, inode,
- last_dest_end, destoff + len, 1);
- if (ret) {
- if (ret != -EOPNOTSUPP)
- btrfs_abort_transaction(trans, ret);
- btrfs_end_transaction(trans);
- goto out;
- }
- clone_update_extent_map(BTRFS_I(inode), trans, NULL,
- last_dest_end,
- destoff + len - last_dest_end);
- ret = clone_finish_inode_update(trans, inode, destoff + len,
- destoff, olen, no_time_update);
- }
-
-out:
- btrfs_free_path(path);
- kvfree(buf);
- return ret;
-}
-
-static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
- u64 off, u64 olen, u64 destoff)
-{
- struct inode *inode = file_inode(file);
- struct inode *src = file_inode(file_src);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct btrfs_root *root = BTRFS_I(inode)->root;
- int ret;
- u64 len = olen;
- u64 bs = fs_info->sb->s_blocksize;
- int same_inode = src == inode;
-
- /*
- * TODO:
- * - split compressed inline extents. annoying: we need to
- * decompress into destination's address_space (the file offset
- * may change, so source mapping won't do), then recompress (or
- * otherwise reinsert) a subrange.
- *
- * - split destination inode's inline extents. The inline extents can
- * be either compressed or non-compressed.
- */
-
- if (btrfs_root_readonly(root))
- return -EROFS;
-
- if (file_src->f_path.mnt != file->f_path.mnt ||
- src->i_sb != inode->i_sb)
- return -EXDEV;
-
- if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
- return -EISDIR;
-
- if (!same_inode) {
- btrfs_double_inode_lock(src, inode);
- } else {
- inode_lock(src);
- }
-
- /* don't make the dst file partly checksummed */
- if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
- (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
- ret = -EINVAL;
- goto out_unlock;
- }
-
- /* determine range to clone */
- ret = -EINVAL;
- if (off + len > src->i_size || off + len < off)
- goto out_unlock;
- if (len == 0)
- olen = len = src->i_size - off;
- /*
- * If we extend to eof, continue to block boundary if and only if the
- * destination end offset matches the destination file's size, otherwise
- * we would be corrupting data by placing the eof block into the middle
- * of a file.
- */
- if (off + len == src->i_size) {
- if (!IS_ALIGNED(len, bs) && destoff + len < inode->i_size)
- goto out_unlock;
- len = ALIGN(src->i_size, bs) - off;
- }
-
- if (len == 0) {
- ret = 0;
- goto out_unlock;
- }
-
- /* verify the end result is block aligned */
- if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) ||
- !IS_ALIGNED(destoff, bs))
- goto out_unlock;
-
- /* verify if ranges are overlapped within the same file */
- if (same_inode) {
- if (destoff + len > off && destoff < off + len)
- goto out_unlock;
- }
-
- if (destoff > inode->i_size) {
- ret = btrfs_cont_expand(inode, inode->i_size, destoff);
- if (ret)
- goto out_unlock;
- }
-
- /*
- * Lock the target range too. Right after we replace the file extent
- * items in the fs tree (which now point to the cloned data), we might
- * have a worker replace them with extent items relative to a write
- * operation that was issued before this clone operation (i.e. confront
- * with inode.c:btrfs_finish_ordered_io).
- */
- if (same_inode) {
- u64 lock_start = min_t(u64, off, destoff);
- u64 lock_len = max_t(u64, off, destoff) + len - lock_start;
-
- ret = lock_extent_range(src, lock_start, lock_len, true);
- } else {
- ret = btrfs_double_extent_lock(src, off, inode, destoff, len,
- true);
- }
- ASSERT(ret == 0);
- if (WARN_ON(ret)) {
- /* ranges in the io trees already unlocked */
- goto out_unlock;
- }
-
- ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);
-
- if (same_inode) {
- u64 lock_start = min_t(u64, off, destoff);
- u64 lock_end = max_t(u64, off, destoff) + len - 1;
-
- unlock_extent(&BTRFS_I(src)->io_tree, lock_start, lock_end);
- } else {
- btrfs_double_extent_unlock(src, off, inode, destoff, len);
- }
- /*
- * Truncate page cache pages so that future reads will see the cloned
- * data immediately and not the previous data.
- */
- truncate_inode_pages_range(&inode->i_data,
- round_down(destoff, PAGE_SIZE),
- round_up(destoff + len, PAGE_SIZE) - 1);
-out_unlock:
- if (!same_inode)
- btrfs_double_inode_unlock(src, inode);
- else
- inode_unlock(src);
- return ret;
-}
-
-int btrfs_clone_file_range(struct file *src_file, loff_t off,
- struct file *dst_file, loff_t destoff, u64 len)
-{
- return btrfs_clone_files(dst_file, src_file, off, len, destoff);
-}
-
static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
{
struct inode *inode = file_inode(file);
@@ -4433,8 +3426,7 @@
struct btrfs_root *new_root;
struct btrfs_dir_item *di;
struct btrfs_trans_handle *trans;
- struct btrfs_path *path;
- struct btrfs_key location;
+ struct btrfs_path *path = NULL;
struct btrfs_disk_key disk_key;
u64 objectid = 0;
u64 dir_id;
@@ -4455,53 +3447,51 @@
if (!objectid)
objectid = BTRFS_FS_TREE_OBJECTID;
- location.objectid = objectid;
- location.type = BTRFS_ROOT_ITEM_KEY;
- location.offset = (u64)-1;
-
- new_root = btrfs_read_fs_root_no_name(fs_info, &location);
+ new_root = btrfs_get_fs_root(fs_info, objectid, true);
if (IS_ERR(new_root)) {
ret = PTR_ERR(new_root);
goto out;
}
- if (!is_fstree(new_root->objectid)) {
+ if (!is_fstree(new_root->root_key.objectid)) {
ret = -ENOENT;
- goto out;
+ goto out_free;
}
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
- goto out;
+ goto out_free;
}
path->leave_spinning = 1;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
- btrfs_free_path(path);
ret = PTR_ERR(trans);
- goto out;
+ goto out_free;
}
dir_id = btrfs_super_root_dir(fs_info->super_copy);
di = btrfs_lookup_dir_item(trans, fs_info->tree_root, path,
dir_id, "default", 7, 1);
if (IS_ERR_OR_NULL(di)) {
- btrfs_free_path(path);
+ btrfs_release_path(path);
btrfs_end_transaction(trans);
btrfs_err(fs_info,
"Umm, you don't have the default diritem, this isn't going to work");
ret = -ENOENT;
- goto out;
+ goto out_free;
}
btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key);
btrfs_set_dir_item_key(path->nodes[0], di, &disk_key);
btrfs_mark_buffer_dirty(path->nodes[0]);
- btrfs_free_path(path);
+ btrfs_release_path(path);
btrfs_set_fs_incompat(fs_info, DEFAULT_SUBVOL);
btrfs_end_transaction(trans);
+out_free:
+ btrfs_put_root(new_root);
+ btrfs_free_path(path);
out:
mnt_drop_write_file(file);
return ret;
@@ -4510,16 +3500,15 @@
static void get_block_group_info(struct list_head *groups_list,
struct btrfs_ioctl_space_info *space)
{
- struct btrfs_block_group_cache *block_group;
+ struct btrfs_block_group *block_group;
space->total_bytes = 0;
space->used_bytes = 0;
space->flags = 0;
list_for_each_entry(block_group, groups_list, list) {
space->flags = block_group->flags;
- space->total_bytes += block_group->key.offset;
- space->used_bytes +=
- btrfs_block_group_used(&block_group->item);
+ space->total_bytes += block_group->length;
+ space->used_bytes += block_group->used;
}
}
@@ -4553,15 +3542,12 @@
struct btrfs_space_info *tmp;
info = NULL;
- rcu_read_lock();
- list_for_each_entry_rcu(tmp, &fs_info->space_info,
- list) {
+ list_for_each_entry(tmp, &fs_info->space_info, list) {
if (tmp->flags == types[i]) {
info = tmp;
break;
}
}
- rcu_read_unlock();
if (!info)
continue;
@@ -4609,15 +3595,12 @@
break;
info = NULL;
- rcu_read_lock();
- list_for_each_entry_rcu(tmp, &fs_info->space_info,
- list) {
+ list_for_each_entry(tmp, &fs_info->space_info, list) {
if (tmp->flags == types[i]) {
info = tmp;
break;
}
}
- rcu_read_unlock();
if (!info)
continue;
@@ -4732,6 +3715,18 @@
&sa->progress, sa->flags & BTRFS_SCRUB_READONLY,
0);
+ /*
+ * Copy scrub args to user space even if btrfs_scrub_dev() returned an
+ * error. This is important as it allows user space to know how much
+ * progress scrub has done. For example, if scrub is canceled we get
+ * -ECANCELED from btrfs_scrub_dev() and return that error back to user
+ * space. Later user space can inspect the progress from the structure
+ * btrfs_ioctl_scrub_args and resume scrub from where it left off
+ * previously (btrfs-progs does this).
+ * If we fail to copy the btrfs_ioctl_scrub_args structure to user space
+ * then return -EFAULT to signal the structure was not copied or it may
+ * be corrupt and unreliable due to a partial copy.
+ */
if (copy_to_user(arg, sa, sizeof(*sa)))
ret = -EFAULT;
@@ -4765,7 +3760,7 @@
ret = btrfs_scrub_progress(fs_info, sa->devid, &sa->progress);
- if (copy_to_user(arg, sa, sizeof(*sa)))
+ if (ret == 0 && copy_to_user(arg, sa, sizeof(*sa)))
ret = -EFAULT;
kfree(sa);
@@ -4789,7 +3784,7 @@
ret = btrfs_get_dev_stats(fs_info, sa);
- if (copy_to_user(arg, sa, sizeof(*sa)))
+ if (ret == 0 && copy_to_user(arg, sa, sizeof(*sa)))
ret = -EFAULT;
kfree(sa);
@@ -4815,11 +3810,11 @@
ret = -EROFS;
goto out;
}
- if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
+ if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_DEV_REPLACE)) {
ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
} else {
ret = btrfs_dev_replace_by_ioctl(fs_info, p);
- clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
+ btrfs_exclop_finish(fs_info);
}
break;
case BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS:
@@ -4835,7 +3830,7 @@
break;
}
- if (copy_to_user(arg, p, sizeof(*p)))
+ if ((ret == 0 || ret == -ECANCELED) && copy_to_user(arg, p, sizeof(*p)))
ret = -EFAULT;
out:
kfree(p);
@@ -4886,6 +3881,8 @@
ipath->fspath->val[i] = rel_ptr;
}
+ btrfs_free_path(path);
+ path = NULL;
ret = copy_to_user((void __user *)(unsigned long)ipa->fspath,
ipath->fspath, size);
if (ret) {
@@ -4899,26 +3896,6 @@
kfree(ipa);
return ret;
-}
-
-static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx)
-{
- struct btrfs_data_container *inodes = ctx;
- const size_t c = 3 * sizeof(u64);
-
- if (inodes->bytes_left >= c) {
- inodes->bytes_left -= c;
- inodes->val[inodes->elem_cnt] = inum;
- inodes->val[inodes->elem_cnt + 1] = offset;
- inodes->val[inodes->elem_cnt + 2] = root;
- inodes->elem_cnt += 3;
- } else {
- inodes->bytes_missing += c - inodes->bytes_left;
- inodes->bytes_left = 0;
- inodes->elem_missed += 3;
- }
-
- return 0;
}
static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
@@ -4956,21 +3933,20 @@
size = min_t(u32, loi->size, SZ_16M);
}
+ inodes = init_data_container(size);
+ if (IS_ERR(inodes)) {
+ ret = PTR_ERR(inodes);
+ goto out_loi;
+ }
+
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
goto out;
}
-
- inodes = init_data_container(size);
- if (IS_ERR(inodes)) {
- ret = PTR_ERR(inodes);
- inodes = NULL;
- goto out;
- }
-
ret = iterate_inodes_from_logical(loi->logical, fs_info, path,
- build_ino_list, inodes, ignore_offset);
+ inodes, ignore_offset);
+ btrfs_free_path(path);
if (ret == -EINVAL)
ret = -ENOENT;
if (ret < 0)
@@ -4982,7 +3958,6 @@
ret = -EFAULT;
out:
- btrfs_free_path(path);
kvfree(inodes);
out_loi:
kfree(loi);
@@ -5030,7 +4005,7 @@
return ret;
again:
- if (!test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
+ if (btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
mutex_lock(&fs_info->balance_mutex);
need_unlock = true;
goto locked;
@@ -5076,7 +4051,6 @@
}
locked:
- BUG_ON(!test_bit(BTRFS_FS_EXCL_OP, &fs_info->flags));
if (arg) {
bargs = memdup_user(arg, sizeof(*bargs));
@@ -5131,17 +4105,17 @@
do_balance:
/*
- * Ownership of bctl and filesystem flag BTRFS_FS_EXCL_OP goes to
- * btrfs_balance. bctl is freed in reset_balance_state, or, if
- * restriper was paused all the way until unmount, in free_fs_info.
- * The flag should be cleared after reset_balance_state.
+ * Ownership of bctl and exclusive operation goes to btrfs_balance.
+ * bctl is freed in reset_balance_state, or, if restriper was paused
+ * all the way until unmount, in free_fs_info. The flag should be
+ * cleared after reset_balance_state.
*/
need_unlock = false;
ret = btrfs_balance(fs_info, bctl, bargs);
bctl = NULL;
- if (arg) {
+ if ((ret == 0 || ret == -ECANCELED) && arg) {
if (copy_to_user(arg, bargs, sizeof(*bargs)))
ret = -EFAULT;
}
@@ -5153,7 +4127,7 @@
out_unlock:
mutex_unlock(&fs_info->balance_mutex);
if (need_unlock)
- clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
+ btrfs_exclop_finish(fs_info);
out:
mnt_drop_write_file(file);
return ret;
@@ -5430,10 +4404,9 @@
return ret;
}
-static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg)
+static long btrfs_ioctl_quota_rescan_status(struct btrfs_fs_info *fs_info,
+ void __user *arg)
{
- struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_ioctl_quota_rescan_args *qsa;
int ret = 0;
@@ -5456,11 +4429,9 @@
return ret;
}
-static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg)
+static long btrfs_ioctl_quota_rescan_wait(struct btrfs_fs_info *fs_info,
+ void __user *arg)
{
- struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -5632,10 +4603,9 @@
return ret;
}
-static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg)
+static int btrfs_ioctl_get_fslabel(struct btrfs_fs_info *fs_info,
+ void __user *arg)
{
- struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
size_t len;
int ret;
char label[BTRFS_LABEL_SIZE];
@@ -5719,10 +4689,9 @@
return 0;
}
-static int btrfs_ioctl_get_features(struct file *file, void __user *arg)
+static int btrfs_ioctl_get_features(struct btrfs_fs_info *fs_info,
+ void __user *arg)
{
- struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_super_block *super_block = fs_info->super_copy;
struct btrfs_ioctl_feature_flags features;
@@ -5741,7 +4710,7 @@
u64 change_mask, u64 flags, u64 supported_flags,
u64 safe_set, u64 safe_clear)
{
- const char *type = btrfs_feature_set_names[set];
+ const char *type = btrfs_feature_set_name(set);
char *names;
u64 disallowed, unsupported;
u64 set_mask = flags & change_mask;
@@ -5922,8 +4891,12 @@
return btrfs_ioctl_setflags(file, argp);
case FS_IOC_GETVERSION:
return btrfs_ioctl_getversion(file, argp);
+ case FS_IOC_GETFSLABEL:
+ return btrfs_ioctl_get_fslabel(fs_info, argp);
+ case FS_IOC_SETFSLABEL:
+ return btrfs_ioctl_set_fslabel(file, argp);
case FITRIM:
- return btrfs_ioctl_fitrim(file, argp);
+ return btrfs_ioctl_fitrim(fs_info, argp);
case BTRFS_IOC_SNAP_CREATE:
return btrfs_ioctl_snap_create(file, argp, 0);
case BTRFS_IOC_SNAP_CREATE_V2:
@@ -5933,7 +4906,9 @@
case BTRFS_IOC_SUBVOL_CREATE_V2:
return btrfs_ioctl_snap_create_v2(file, argp, 1);
case BTRFS_IOC_SNAP_DESTROY:
- return btrfs_ioctl_snap_destroy(file, argp);
+ return btrfs_ioctl_snap_destroy(file, argp, false);
+ case BTRFS_IOC_SNAP_DESTROY_V2:
+ return btrfs_ioctl_snap_destroy(file, argp, true);
case BTRFS_IOC_SUBVOL_GETFLAGS:
return btrfs_ioctl_subvol_getflags(file, argp);
case BTRFS_IOC_SUBVOL_SETFLAGS:
@@ -5975,7 +4950,7 @@
case BTRFS_IOC_SYNC: {
int ret;
- ret = btrfs_start_delalloc_roots(fs_info, -1);
+ ret = btrfs_start_delalloc_roots(fs_info, U64_MAX, false);
if (ret)
return ret;
ret = btrfs_sync_fs(inode->i_sb, 1);
@@ -6028,19 +5003,15 @@
case BTRFS_IOC_QUOTA_RESCAN:
return btrfs_ioctl_quota_rescan(file, argp);
case BTRFS_IOC_QUOTA_RESCAN_STATUS:
- return btrfs_ioctl_quota_rescan_status(file, argp);
+ return btrfs_ioctl_quota_rescan_status(fs_info, argp);
case BTRFS_IOC_QUOTA_RESCAN_WAIT:
- return btrfs_ioctl_quota_rescan_wait(file, argp);
+ return btrfs_ioctl_quota_rescan_wait(fs_info, argp);
case BTRFS_IOC_DEV_REPLACE:
return btrfs_ioctl_dev_replace(fs_info, argp);
- case BTRFS_IOC_GET_FSLABEL:
- return btrfs_ioctl_get_fslabel(file, argp);
- case BTRFS_IOC_SET_FSLABEL:
- return btrfs_ioctl_set_fslabel(file, argp);
case BTRFS_IOC_GET_SUPPORTED_FEATURES:
return btrfs_ioctl_get_supported_features(argp);
case BTRFS_IOC_GET_FEATURES:
- return btrfs_ioctl_get_features(file, argp);
+ return btrfs_ioctl_get_features(fs_info, argp);
case BTRFS_IOC_SET_FEATURES:
return btrfs_ioctl_set_features(file, argp);
case FS_IOC_FSGETXATTR:
--
Gitblit v1.6.2