From 01573e231f18eb2d99162747186f59511f56b64d Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Fri, 08 Dec 2023 10:40:48 +0000
Subject: [PATCH] 移去rt
---
kernel/fs/btrfs/super.c | 481 +++++++++++++++++++++++++++++++---------------------
1 files changed, 286 insertions(+), 195 deletions(-)
diff --git a/kernel/fs/btrfs/super.c b/kernel/fs/btrfs/super.c
index 521f6c2..310f18d 100644
--- a/kernel/fs/btrfs/super.c
+++ b/kernel/fs/btrfs/super.c
@@ -42,7 +42,11 @@
#include "dev-replace.h"
#include "free-space-cache.h"
#include "backref.h"
+#include "space-info.h"
+#include "sysfs.h"
#include "tests/btrfs-tests.h"
+#include "block-group.h"
+#include "discard.h"
#include "qgroup.h"
#define CREATE_TRACE_POINTS
@@ -63,28 +67,52 @@
static int btrfs_remount(struct super_block *sb, int *flags, char *data);
-const char *btrfs_decode_error(int errno)
+/*
+ * Generally the error codes correspond to their respective errors, but there
+ * are a few special cases.
+ *
+ * EUCLEAN: Any sort of corruption that we encounter. The tree-checker for
+ * instance will return EUCLEAN if any of the blocks are corrupted in
+ * a way that is problematic. We want to reserve EUCLEAN for these
+ * sort of corruptions.
+ *
+ * EROFS: If we check BTRFS_FS_STATE_ERROR and fail out with a return error, we
+ * need to use EROFS for this case. We will have no idea of the
+ * original failure, that will have been reported at the time we tripped
+ * over the error. Each subsequent error that doesn't have any context
+ * of the original error should use EROFS when handling BTRFS_FS_STATE_ERROR.
+ */
+const char * __attribute_const__ btrfs_decode_error(int errno)
{
char *errstr = "unknown";
switch (errno) {
- case -EIO:
+ case -ENOENT: /* -2 */
+ errstr = "No such entry";
+ break;
+ case -EIO: /* -5 */
errstr = "IO failure";
break;
- case -ENOMEM:
+ case -ENOMEM: /* -12*/
errstr = "Out of memory";
break;
- case -EROFS:
- errstr = "Readonly filesystem";
- break;
- case -EEXIST:
+ case -EEXIST: /* -17 */
errstr = "Object already exists";
break;
- case -ENOSPC:
+ case -ENOSPC: /* -28 */
errstr = "No space left";
break;
- case -ENOENT:
- errstr = "No such entry";
+ case -EROFS: /* -30 */
+ errstr = "Readonly filesystem";
+ break;
+ case -EOPNOTSUPP: /* -95 */
+ errstr = "Operation not supported";
+ break;
+ case -EUCLEAN: /* -117 */
+ errstr = "Filesystem corrupted";
+ break;
+ case -EDQUOT: /* -122 */
+ errstr = "Quota exceeded";
break;
}
@@ -93,7 +121,7 @@
/*
* __btrfs_handle_fs_error decodes expected errors from the caller and
- * invokes the approciate error response.
+ * invokes the appropriate error response.
*/
__cold
void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function,
@@ -143,6 +171,8 @@
if (sb_rdonly(sb))
return;
+ btrfs_discard_stop(fs_info);
+
/* btrfs handle error by forcing the filesystem readonly */
sb->s_flags |= SB_RDONLY;
btrfs_info(fs_info, "forced readonly");
@@ -151,7 +181,7 @@
* although there is no way to update the progress. It would add the
* risk of a deadlock, therefore the canceling is omitted. The only
* penalty is that some I/O remains active until the procedure
- * completes. The next time when the filesystem is mounted writeable
+ * completes. The next time when the filesystem is mounted writable
* again, the device replace operation continues.
*/
}
@@ -184,7 +214,7 @@
RATELIMIT_STATE_INIT(printk_limits[7], DEFAULT_RATELIMIT_INTERVAL, 100),
};
-void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
+void __cold btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
{
char lvl[PRINTK_MAX_SINGLE_HEADER_LEN + 1] = "\0";
struct va_format vaf;
@@ -238,7 +268,7 @@
{
struct btrfs_fs_info *fs_info = trans->fs_info;
- trans->aborted = errno;
+ WRITE_ONCE(trans->aborted, errno);
/* Nothing used. The other threads that have joined this
* transaction may be able to continue. */
if (!trans->dirty && list_empty(&trans->new_bgs)) {
@@ -310,7 +340,7 @@
Opt_datasum, Opt_nodatasum,
Opt_defrag, Opt_nodefrag,
Opt_discard, Opt_nodiscard,
- Opt_nologreplay,
+ Opt_discard_mode,
Opt_norecovery,
Opt_ratio,
Opt_rescan_uuid_tree,
@@ -324,13 +354,15 @@
Opt_subvolid,
Opt_thread_pool,
Opt_treelog, Opt_notreelog,
- Opt_usebackuproot,
Opt_user_subvol_rm_allowed,
+ /* Rescue options */
+ Opt_rescue,
+ Opt_usebackuproot,
+ Opt_nologreplay,
+
/* Deprecated options */
- Opt_alloc_start,
Opt_recovery,
- Opt_subvolrootid,
/* Debugging options */
Opt_check_integrity,
@@ -372,8 +404,8 @@
{Opt_defrag, "autodefrag"},
{Opt_nodefrag, "noautodefrag"},
{Opt_discard, "discard"},
+ {Opt_discard_mode, "discard=%s"},
{Opt_nodiscard, "nodiscard"},
- {Opt_nologreplay, "nologreplay"},
{Opt_norecovery, "norecovery"},
{Opt_ratio, "metadata_ratio=%u"},
{Opt_rescan_uuid_tree, "rescan_uuid_tree"},
@@ -391,13 +423,17 @@
{Opt_thread_pool, "thread_pool=%u"},
{Opt_treelog, "treelog"},
{Opt_notreelog, "notreelog"},
- {Opt_usebackuproot, "usebackuproot"},
{Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
+ /* Rescue options */
+ {Opt_rescue, "rescue=%s"},
+ /* Deprecated, with alias rescue=nologreplay */
+ {Opt_nologreplay, "nologreplay"},
+ /* Deprecated, with alias rescue=usebackuproot */
+ {Opt_usebackuproot, "usebackuproot"},
+
/* Deprecated options */
- {Opt_alloc_start, "alloc_start=%s"},
{Opt_recovery, "recovery"},
- {Opt_subvolrootid, "subvolrootid=%d"},
/* Debugging options */
{Opt_check_integrity, "check_int"},
@@ -415,6 +451,55 @@
#endif
{Opt_err, NULL},
};
+
+static const match_table_t rescue_tokens = {
+ {Opt_usebackuproot, "usebackuproot"},
+ {Opt_nologreplay, "nologreplay"},
+ {Opt_err, NULL},
+};
+
+static int parse_rescue_options(struct btrfs_fs_info *info, const char *options)
+{
+ char *opts;
+ char *orig;
+ char *p;
+ substring_t args[MAX_OPT_ARGS];
+ int ret = 0;
+
+ opts = kstrdup(options, GFP_KERNEL);
+ if (!opts)
+ return -ENOMEM;
+ orig = opts;
+
+ while ((p = strsep(&opts, ":")) != NULL) {
+ int token;
+
+ if (!*p)
+ continue;
+ token = match_token(p, rescue_tokens, args);
+ switch (token){
+ case Opt_usebackuproot:
+ btrfs_info(info,
+ "trying to use backup root at mount time");
+ btrfs_set_opt(info->mount_opt, USEBACKUPROOT);
+ break;
+ case Opt_nologreplay:
+ btrfs_set_and_info(info, NOLOGREPLAY,
+ "disabling log replay at mount time");
+ break;
+ case Opt_err:
+ btrfs_info(info, "unrecognized rescue option '%s'", p);
+ ret = -EINVAL;
+ goto out;
+ default:
+ break;
+ }
+
+ }
+out:
+ kfree(orig);
+ return ret;
+}
/*
* Regular mount options parser. Everything that is needed only when
@@ -463,7 +548,6 @@
case Opt_subvol:
case Opt_subvol_empty:
case Opt_subvolid:
- case Opt_subvolrootid:
case Opt_device:
/*
* These are parsed by btrfs_parse_subvol_options or
@@ -507,7 +591,7 @@
case Opt_compress_force:
case Opt_compress_force_type:
compress_force = true;
- /* Fallthrough */
+ fallthrough;
case Opt_compress:
case Opt_compress_type:
saved_compress_type = btrfs_test_opt(info,
@@ -531,7 +615,9 @@
if (token != Opt_compress &&
token != Opt_compress_force)
info->compress_level =
- btrfs_compress_str2level(args[0].from);
+ btrfs_compress_str2level(
+ BTRFS_COMPRESS_ZLIB,
+ args[0].from + 4);
btrfs_set_opt(info->mount_opt, COMPRESS);
btrfs_clear_opt(info->mount_opt, NODATACOW);
btrfs_clear_opt(info->mount_opt, NODATASUM);
@@ -545,9 +631,13 @@
btrfs_clear_opt(info->mount_opt, NODATASUM);
btrfs_set_fs_incompat(info, COMPRESS_LZO);
no_compress = 0;
- } else if (strcmp(args[0].from, "zstd") == 0) {
+ } else if (strncmp(args[0].from, "zstd", 4) == 0) {
compress_type = "zstd";
info->compress_type = BTRFS_COMPRESS_ZSTD;
+ info->compress_level =
+ btrfs_compress_str2level(
+ BTRFS_COMPRESS_ZSTD,
+ args[0].from + 4);
btrfs_set_opt(info->mount_opt, COMPRESS);
btrfs_clear_opt(info->mount_opt, NODATACOW);
btrfs_clear_opt(info->mount_opt, NODATASUM);
@@ -562,6 +652,8 @@
compress_force = false;
no_compress++;
} else {
+ btrfs_err(info, "unrecognized compression value %s",
+ args[0].from);
ret = -EINVAL;
goto out;
}
@@ -604,7 +696,7 @@
btrfs_set_opt(info->mount_opt, NOSSD);
btrfs_clear_and_info(info, SSD,
"not using ssd optimizations");
- /* Fallthrough */
+ fallthrough;
case Opt_nossd_spread:
btrfs_clear_and_info(info, SSD_SPREAD,
"not using spread ssd allocation scheme");
@@ -620,8 +712,11 @@
case Opt_thread_pool:
ret = match_int(&args[0], &intarg);
if (ret) {
+ btrfs_err(info, "unrecognized thread_pool value %s",
+ args[0].from);
goto out;
} else if (intarg == 0) {
+ btrfs_err(info, "invalid value 0 for thread_pool");
ret = -EINVAL;
goto out;
}
@@ -645,10 +740,6 @@
goto out;
}
break;
- case Opt_alloc_start:
- btrfs_info(info,
- "option alloc_start is obsolete, ignored");
- break;
case Opt_acl:
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
info->sb->s_flags |= SB_POSIXACL;
@@ -671,6 +762,8 @@
break;
case Opt_norecovery:
case Opt_nologreplay:
+ btrfs_warn(info,
+ "'nologreplay' is deprecated, use 'rescue=nologreplay' instead");
btrfs_set_and_info(info, NOLOGREPLAY,
"disabling log replay at mount time");
break;
@@ -684,19 +777,38 @@
break;
case Opt_ratio:
ret = match_int(&args[0], &intarg);
- if (ret)
+ if (ret) {
+ btrfs_err(info, "unrecognized metadata_ratio value %s",
+ args[0].from);
goto out;
+ }
info->metadata_ratio = intarg;
btrfs_info(info, "metadata ratio %u",
info->metadata_ratio);
break;
case Opt_discard:
- btrfs_set_and_info(info, DISCARD,
- "turning on discard");
+ case Opt_discard_mode:
+ if (token == Opt_discard ||
+ strcmp(args[0].from, "sync") == 0) {
+ btrfs_clear_opt(info->mount_opt, DISCARD_ASYNC);
+ btrfs_set_and_info(info, DISCARD_SYNC,
+ "turning on sync discard");
+ } else if (strcmp(args[0].from, "async") == 0) {
+ btrfs_clear_opt(info->mount_opt, DISCARD_SYNC);
+ btrfs_set_and_info(info, DISCARD_ASYNC,
+ "turning on async discard");
+ } else {
+ btrfs_err(info, "unrecognized discard mode value %s",
+ args[0].from);
+ ret = -EINVAL;
+ goto out;
+ }
break;
case Opt_nodiscard:
- btrfs_clear_and_info(info, DISCARD,
+ btrfs_clear_and_info(info, DISCARD_SYNC,
"turning off discard");
+ btrfs_clear_and_info(info, DISCARD_ASYNC,
+ "turning off async discard");
break;
case Opt_space_cache:
case Opt_space_cache_version:
@@ -712,6 +824,8 @@
btrfs_set_and_info(info, FREE_SPACE_TREE,
"enabling free space tree");
} else {
+ btrfs_err(info, "unrecognized space_cache value %s",
+ args[0].from);
ret = -EINVAL;
goto out;
}
@@ -730,6 +844,8 @@
}
break;
case Opt_inode_cache:
+ btrfs_warn(info,
+ "the 'inode_cache' option is deprecated and will have no effect from 5.11");
btrfs_set_pending_and_info(info, INODE_MAP_CACHE,
"enabling inode map caching");
break;
@@ -759,10 +875,11 @@
"disabling auto defrag");
break;
case Opt_recovery:
- btrfs_warn(info,
- "'recovery' is deprecated, use 'usebackuproot' instead");
- /* fall through */
case Opt_usebackuproot:
+ btrfs_warn(info,
+ "'%s' is deprecated, use 'rescue=usebackuproot' instead",
+ token == Opt_recovery ? "recovery" :
+ "usebackuproot");
btrfs_info(info,
"trying to use backup root at mount time");
btrfs_set_opt(info->mount_opt, USEBACKUPROOT);
@@ -784,8 +901,12 @@
break;
case Opt_check_integrity_print_mask:
ret = match_int(&args[0], &intarg);
- if (ret)
+ if (ret) {
+ btrfs_err(info,
+ "unrecognized check_integrity_print_mask value %s",
+ args[0].from);
goto out;
+ }
info->check_integrity_print_mask = intarg;
btrfs_info(info, "check_integrity_print_mask 0x%x",
info->check_integrity_print_mask);
@@ -800,13 +921,15 @@
goto out;
#endif
case Opt_fatal_errors:
- if (strcmp(args[0].from, "panic") == 0)
+ if (strcmp(args[0].from, "panic") == 0) {
btrfs_set_opt(info->mount_opt,
PANIC_ON_FATAL_ERROR);
- else if (strcmp(args[0].from, "bug") == 0)
+ } else if (strcmp(args[0].from, "bug") == 0) {
btrfs_clear_opt(info->mount_opt,
PANIC_ON_FATAL_ERROR);
- else {
+ } else {
+ btrfs_err(info, "unrecognized fatal_errors value %s",
+ args[0].from);
ret = -EINVAL;
goto out;
}
@@ -814,8 +937,12 @@
case Opt_commit_interval:
intarg = 0;
ret = match_int(&args[0], &intarg);
- if (ret)
+ if (ret) {
+ btrfs_err(info, "unrecognized commit_interval value %s",
+ args[0].from);
+ ret = -EINVAL;
goto out;
+ }
if (intarg == 0) {
btrfs_info(info,
"using default commit interval %us",
@@ -826,6 +953,14 @@
intarg);
}
info->commit_interval = intarg;
+ break;
+ case Opt_rescue:
+ ret = parse_rescue_options(info, args[0].from);
+ if (ret < 0) {
+ btrfs_err(info, "unrecognized rescue value %s",
+ args[0].from);
+ goto out;
+ }
break;
#ifdef CONFIG_BTRFS_DEBUG
case Opt_fragment_all:
@@ -850,7 +985,7 @@
break;
#endif
case Opt_err:
- btrfs_info(info, "unrecognized mount option '%s'", p);
+ btrfs_err(info, "unrecognized mount option '%s'", p);
ret = -EINVAL;
goto out;
default:
@@ -988,9 +1123,6 @@
*subvol_objectid = subvolid;
break;
- case Opt_subvolrootid:
- pr_warn("BTRFS: 'subvolrootid' mount option is deprecated and has no effect\n");
- break;
default:
break;
}
@@ -1005,7 +1137,7 @@
u64 subvol_objectid)
{
struct btrfs_root *root = fs_info->tree_root;
- struct btrfs_root *fs_root;
+ struct btrfs_root *fs_root = NULL;
struct btrfs_root_ref *root_ref;
struct btrfs_inode_ref *inode_ref;
struct btrfs_key key;
@@ -1070,12 +1202,10 @@
dirid = btrfs_root_ref_dirid(path->nodes[0], root_ref);
btrfs_release_path(path);
- key.objectid = subvol_objectid;
- key.type = BTRFS_ROOT_ITEM_KEY;
- key.offset = (u64)-1;
- fs_root = btrfs_read_fs_root_no_name(fs_info, &key);
+ fs_root = btrfs_get_fs_root(fs_info, subvol_objectid, true);
if (IS_ERR(fs_root)) {
ret = PTR_ERR(fs_root);
+ fs_root = NULL;
goto err;
}
@@ -1120,6 +1250,8 @@
ptr[0] = '/';
btrfs_release_path(path);
}
+ btrfs_put_root(fs_root);
+ fs_root = NULL;
}
btrfs_free_path(path);
@@ -1132,6 +1264,7 @@
return name;
err:
+ btrfs_put_root(fs_root);
btrfs_free_path(path);
kfree(name);
return ERR_PTR(ret);
@@ -1184,7 +1317,6 @@
{
struct inode *inode;
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
- struct btrfs_key key;
int err;
sb->s_maxbytes = MAX_LFS_FILESIZE;
@@ -1212,10 +1344,7 @@
return err;
}
- key.objectid = BTRFS_FIRST_FREE_OBJECTID;
- key.type = BTRFS_INODE_ITEM_KEY;
- key.offset = 0;
- inode = btrfs_iget(sb, &key, fs_info->fs_root, NULL);
+ inode = btrfs_iget(sb, BTRFS_FIRST_FREE_OBJECTID, fs_info->fs_root);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto fail_close;
@@ -1316,11 +1445,13 @@
if (btrfs_test_opt(info, NOTREELOG))
seq_puts(seq, ",notreelog");
if (btrfs_test_opt(info, NOLOGREPLAY))
- seq_puts(seq, ",nologreplay");
+ seq_puts(seq, ",rescue=nologreplay");
if (btrfs_test_opt(info, FLUSHONCOMMIT))
seq_puts(seq, ",flushoncommit");
- if (btrfs_test_opt(info, DISCARD))
+ if (btrfs_test_opt(info, DISCARD_SYNC))
seq_puts(seq, ",discard");
+ if (btrfs_test_opt(info, DISCARD_ASYNC))
+ seq_puts(seq, ",discard=async");
if (!(info->sb->s_flags & SB_POSIXACL))
seq_puts(seq, ",noacl");
if (btrfs_test_opt(info, SPACE_CACHE))
@@ -1405,7 +1536,7 @@
}
static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
- const char *device_name, struct vfsmount *mnt)
+ struct vfsmount *mnt)
{
struct dentry *root;
int ret;
@@ -1469,56 +1600,6 @@
return root;
}
-static int parse_security_options(char *orig_opts,
- struct security_mnt_opts *sec_opts)
-{
- char *secdata = NULL;
- int ret = 0;
-
- secdata = alloc_secdata();
- if (!secdata)
- return -ENOMEM;
- ret = security_sb_copy_data(orig_opts, secdata);
- if (ret) {
- free_secdata(secdata);
- return ret;
- }
- ret = security_sb_parse_opts_str(secdata, sec_opts);
- free_secdata(secdata);
- return ret;
-}
-
-static int setup_security_options(struct btrfs_fs_info *fs_info,
- struct super_block *sb,
- struct security_mnt_opts *sec_opts)
-{
- int ret = 0;
-
- /*
- * Call security_sb_set_mnt_opts() to check whether new sec_opts
- * is valid.
- */
- ret = security_sb_set_mnt_opts(sb, sec_opts, 0, NULL);
- if (ret)
- return ret;
-
-#ifdef CONFIG_SECURITY
- if (!fs_info->security_opts.num_mnt_opts) {
- /* first time security setup, copy sec_opts to fs_info */
- memcpy(&fs_info->security_opts, sec_opts, sizeof(*sec_opts));
- } else {
- /*
- * Since SELinux (the only one supporting security_mnt_opts)
- * does NOT support changing context during remount/mount of
- * the same sb, this must be the same or part of the same
- * security options, just free it.
- */
- security_free_mnt_opts(sec_opts);
- }
-#endif
- return ret;
-}
-
/*
* Find a superblock for the given device / mount point.
*
@@ -1533,16 +1614,15 @@
struct btrfs_device *device = NULL;
struct btrfs_fs_devices *fs_devices = NULL;
struct btrfs_fs_info *fs_info = NULL;
- struct security_mnt_opts new_sec_opts;
+ void *new_sec_opts = NULL;
fmode_t mode = FMODE_READ;
int error = 0;
if (!(flags & SB_RDONLY))
mode |= FMODE_WRITE;
- security_init_mnt_opts(&new_sec_opts);
if (data) {
- error = parse_security_options(data, &new_sec_opts);
+ error = security_sb_eat_lsm_opts(data, &new_sec_opts);
if (error)
return ERR_PTR(error);
}
@@ -1550,18 +1630,20 @@
/*
* Setup a dummy root and fs_info for test/set super. This is because
* we don't actually fill this stuff out until open_ctree, but we need
- * it for searching for existing supers, so this lets us do that and
- * then open_ctree will properly initialize everything later.
+ * then open_ctree will properly initialize the file system specific
+ * settings later. btrfs_init_fs_info initializes the static elements
+ * of the fs_info (locks and such) to make cleanup easier if we find a
+ * superblock with our given fs_devices later on at sget() time.
*/
fs_info = kvzalloc(sizeof(struct btrfs_fs_info), GFP_KERNEL);
if (!fs_info) {
error = -ENOMEM;
goto error_sec_opts;
}
+ btrfs_init_fs_info(fs_info);
fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
- security_init_mnt_opts(&fs_info->security_opts);
if (!fs_info->super_copy || !fs_info->super_for_commit) {
error = -ENOMEM;
goto error_fs_info;
@@ -1604,24 +1686,22 @@
if (s->s_root) {
btrfs_close_devices(fs_devices);
- free_fs_info(fs_info);
+ btrfs_free_fs_info(fs_info);
if ((flags ^ s->s_flags) & SB_RDONLY)
error = -EBUSY;
} else {
snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
btrfs_sb(s)->bdev_holder = fs_type;
+ if (!strstr(crc32c_impl(), "generic"))
+ set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags);
error = btrfs_fill_super(s, fs_devices, data);
}
+ if (!error)
+ error = security_sb_set_mnt_opts(s, new_sec_opts, 0, NULL);
+ security_free_mnt_opts(&new_sec_opts);
if (error) {
deactivate_locked_super(s);
- goto error_sec_opts;
- }
-
- fs_info = btrfs_sb(s);
- error = setup_security_options(fs_info, s, &new_sec_opts);
- if (error) {
- deactivate_locked_super(s);
- goto error_sec_opts;
+ return ERR_PTR(error);
}
return dget(s->s_root);
@@ -1629,7 +1709,7 @@
error_close_devices:
btrfs_close_devices(fs_devices);
error_fs_info:
- free_fs_info(fs_info);
+ btrfs_free_fs_info(fs_info);
error_sec_opts:
security_free_mnt_opts(&new_sec_opts);
return ERR_PTR(error);
@@ -1662,13 +1742,9 @@
{
struct vfsmount *mnt_root;
struct dentry *root;
- fmode_t mode = FMODE_READ;
char *subvol_name = NULL;
u64 subvol_objectid = 0;
int error = 0;
-
- if (!(flags & SB_RDONLY))
- mode |= FMODE_WRITE;
error = btrfs_parse_subvol_options(data, &subvol_name,
&subvol_objectid);
@@ -1710,7 +1786,7 @@
}
/* mount_subvol() will free subvol_name and mnt_root */
- root = mount_subvol(subvol_name, subvol_objectid, device_name, mnt_root);
+ root = mount_subvol(subvol_name, subvol_objectid, mnt_root);
out:
return root;
@@ -1729,7 +1805,6 @@
btrfs_workqueue_set_max(fs_info->workers, new_pool_size);
btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size);
- btrfs_workqueue_set_max(fs_info->submit_workers, new_pool_size);
btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size);
btrfs_workqueue_set_max(fs_info->endio_workers, new_pool_size);
btrfs_workqueue_set_max(fs_info->endio_meta_workers, new_pool_size);
@@ -1741,11 +1816,6 @@
btrfs_workqueue_set_max(fs_info->readahead_workers, new_pool_size);
btrfs_workqueue_set_max(fs_info->scrub_wr_completion_workers,
new_pool_size);
-}
-
-static inline void btrfs_remount_prepare(struct btrfs_fs_info *fs_info)
-{
- set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
}
static inline void btrfs_remount_begin(struct btrfs_fs_info *fs_info,
@@ -1774,7 +1844,13 @@
btrfs_cleanup_defrag_inodes(fs_info);
}
- clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
+ /* If we toggled discard async */
+ if (!btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) &&
+ btrfs_test_opt(fs_info, DISCARD_ASYNC))
+ btrfs_discard_resume(fs_info);
+ else if (btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) &&
+ !btrfs_test_opt(fs_info, DISCARD_ASYNC))
+ btrfs_discard_cleanup(fs_info);
}
static int btrfs_remount(struct super_block *sb, int *flags, char *data)
@@ -1790,21 +1866,17 @@
int ret;
sync_filesystem(sb);
- btrfs_remount_prepare(fs_info);
+ set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
if (data) {
- struct security_mnt_opts new_sec_opts;
+ void *new_sec_opts = NULL;
- security_init_mnt_opts(&new_sec_opts);
- ret = parse_security_options(data, &new_sec_opts);
+ ret = security_sb_eat_lsm_opts(data, &new_sec_opts);
+ if (!ret)
+ ret = security_sb_remount(sb, new_sec_opts);
+ security_free_mnt_opts(&new_sec_opts);
if (ret)
goto restore;
- ret = setup_security_options(fs_info, sb,
- &new_sec_opts);
- if (ret) {
- security_free_mnt_opts(&new_sec_opts);
- goto restore;
- }
}
ret = btrfs_parse_options(fs_info, data, *flags);
@@ -1824,6 +1896,9 @@
* the filesystem is busy.
*/
cancel_work_sync(&fs_info->async_reclaim_work);
+ cancel_work_sync(&fs_info->async_data_reclaim_work);
+
+ btrfs_discard_cleanup(fs_info);
/* wait for the uuid_scan task to finish */
down(&fs_info->uuid_tree_rescan_sem);
@@ -1870,7 +1945,7 @@
if (!btrfs_check_rw_degradable(fs_info, NULL)) {
btrfs_warn(fs_info,
- "too many missing devices, writeable remount is not allowed");
+ "too many missing devices, writable remount is not allowed");
ret = -EACCES;
goto restore;
}
@@ -1920,8 +1995,16 @@
set_bit(BTRFS_FS_OPEN, &fs_info->flags);
}
out:
+ /*
+ * We need to set SB_I_VERSION here otherwise it'll get cleared by VFS,
+ * since the absence of the flag means it can be toggled off by remount.
+ */
+ *flags |= SB_I_VERSION;
+
wake_up_process(fs_info->transaction_kthread);
btrfs_remount_cleanup(fs_info, old_opts);
+ clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
+
return 0;
restore:
@@ -1936,6 +2019,8 @@
old_thread_pool_size, fs_info->thread_pool_size);
fs_info->metadata_ratio = old_metadata_ratio;
btrfs_remount_cleanup(fs_info, old_opts);
+ clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
+
return ret;
}
@@ -1975,12 +2060,12 @@
struct btrfs_device_info *devices_info;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
struct btrfs_device *device;
- u64 skip_space;
u64 type;
u64 avail_space;
u64 min_stripe_size;
- int min_stripes = 1, num_stripes = 1;
+ int num_stripes = 1;
int i = 0, nr_devices;
+ const struct btrfs_raid_attr *rattr;
/*
* We aren't under the device list lock, so this is racy-ish, but good
@@ -2004,21 +2089,21 @@
/* calc min stripe number for data space allocation */
type = btrfs_data_alloc_profile(fs_info);
- if (type & BTRFS_BLOCK_GROUP_RAID0) {
- min_stripes = 2;
- num_stripes = nr_devices;
- } else if (type & BTRFS_BLOCK_GROUP_RAID1) {
- min_stripes = 2;
- num_stripes = 2;
- } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
- min_stripes = 4;
- num_stripes = 4;
- }
+ rattr = &btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)];
- if (type & BTRFS_BLOCK_GROUP_DUP)
- min_stripe_size = 2 * BTRFS_STRIPE_LEN;
- else
- min_stripe_size = BTRFS_STRIPE_LEN;
+ if (type & BTRFS_BLOCK_GROUP_RAID0)
+ num_stripes = nr_devices;
+ else if (type & BTRFS_BLOCK_GROUP_RAID1)
+ num_stripes = 2;
+ else if (type & BTRFS_BLOCK_GROUP_RAID1C3)
+ num_stripes = 3;
+ else if (type & BTRFS_BLOCK_GROUP_RAID1C4)
+ num_stripes = 4;
+ else if (type & BTRFS_BLOCK_GROUP_RAID10)
+ num_stripes = 4;
+
+ /* Adjust for more than 1 stripe per device */
+ min_stripe_size = rattr->dev_stripes * BTRFS_STRIPE_LEN;
rcu_read_lock();
list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
@@ -2034,27 +2119,20 @@
avail_space = device->total_bytes - device->bytes_used;
/* align with stripe_len */
- avail_space = div_u64(avail_space, BTRFS_STRIPE_LEN);
- avail_space *= BTRFS_STRIPE_LEN;
+ avail_space = rounddown(avail_space, BTRFS_STRIPE_LEN);
/*
* In order to avoid overwriting the superblock on the drive,
* btrfs starts at an offset of at least 1MB when doing chunk
* allocation.
+ *
+ * This ensures we have at least min_stripe_size free space
+ * after excluding 1MB.
*/
- skip_space = SZ_1M;
-
- /*
- * we can use the free space in [0, skip_space - 1], subtract
- * it from the total.
- */
- if (avail_space && avail_space >= skip_space)
- avail_space -= skip_space;
- else
- avail_space = 0;
-
- if (avail_space < min_stripe_size)
+ if (avail_space <= SZ_1M + min_stripe_size)
continue;
+
+ avail_space -= SZ_1M;
devices_info[i].dev = device;
devices_info[i].max_avail = avail_space;
@@ -2069,9 +2147,8 @@
i = nr_devices - 1;
avail_space = 0;
- while (nr_devices >= min_stripes) {
- if (num_stripes > nr_devices)
- num_stripes = nr_devices;
+ while (nr_devices >= rattr->devs_min) {
+ num_stripes = min(num_stripes, nr_devices);
if (devices_info[i].max_avail >= min_stripe_size) {
int j;
@@ -2108,21 +2185,19 @@
{
struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
struct btrfs_super_block *disk_super = fs_info->super_copy;
- struct list_head *head = &fs_info->space_info;
struct btrfs_space_info *found;
u64 total_used = 0;
u64 total_free_data = 0;
u64 total_free_meta = 0;
int bits = dentry->d_sb->s_blocksize_bits;
- __be32 *fsid = (__be32 *)fs_info->fsid;
+ __be32 *fsid = (__be32 *)fs_info->fs_devices->fsid;
unsigned factor = 1;
struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
int ret;
u64 thresh = 0;
int mixed = 0;
- rcu_read_lock();
- list_for_each_entry_rcu(found, head, list) {
+ list_for_each_entry(found, &fs_info->space_info, list) {
if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
int i;
@@ -2150,8 +2225,6 @@
total_used += found->disk_used;
}
-
- rcu_read_unlock();
buf->f_blocks = div_u64(btrfs_super_total_bytes(disk_super), factor);
buf->f_blocks >>= bits;
@@ -2209,8 +2282,10 @@
buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]);
buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]);
/* Mask in the root object ID too, to disambiguate subvols */
- buf->f_fsid.val[0] ^= BTRFS_I(d_inode(dentry))->root->objectid >> 32;
- buf->f_fsid.val[1] ^= BTRFS_I(d_inode(dentry))->root->objectid;
+ buf->f_fsid.val[0] ^=
+ BTRFS_I(d_inode(dentry))->root->root_key.objectid >> 32;
+ buf->f_fsid.val[1] ^=
+ BTRFS_I(d_inode(dentry))->root->root_key.objectid;
return 0;
}
@@ -2219,7 +2294,7 @@
{
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
kill_anon_super(sb);
- free_fs_info(fs_info);
+ btrfs_free_fs_info(fs_info);
}
static struct file_system_type btrfs_fs_type = {
@@ -2252,7 +2327,7 @@
}
/*
- * used by btrfsctl to scan devices when no FS is mounted
+ * Used by /dev/btrfs-control for devices ioctls.
*/
static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
@@ -2276,6 +2351,9 @@
&btrfs_root_fs_type);
ret = PTR_ERR_OR_ZERO(device);
mutex_unlock(&uuid_mutex);
+ break;
+ case BTRFS_IOC_FORGET_DEV:
+ ret = btrfs_forget_devices(vol->name);
break;
case BTRFS_IOC_DEVICES_READY:
mutex_lock(&uuid_mutex);
@@ -2340,7 +2418,7 @@
* device_list_mutex here as we only read the device data and the list
* is protected by RCU. Even if a device is deleted during the list
* traversals, we'll get valid data, the freeing callback will wait at
- * least until until the rcu_read_unlock.
+ * least until the rcu_read_unlock.
*/
rcu_read_lock();
list_for_each_entry_rcu(dev, &fs_info->fs_devices->devices, dev_list) {
@@ -2369,6 +2447,7 @@
.show_devname = btrfs_show_devname,
.alloc_inode = btrfs_alloc_inode,
.destroy_inode = btrfs_destroy_inode,
+ .free_inode = btrfs_free_inode,
.statfs = btrfs_statfs,
.remount_fs = btrfs_remount,
.freeze_fs = btrfs_freeze,
@@ -2378,7 +2457,7 @@
static const struct file_operations btrfs_ctl_fops = {
.open = btrfs_control_open,
.unlocked_ioctl = btrfs_control_ioctl,
- .compat_ioctl = btrfs_control_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.owner = THIS_MODULE,
.llseek = noop_llseek,
};
@@ -2441,9 +2520,13 @@
if (err)
goto free_cachep;
- err = extent_map_init();
+ err = extent_state_cache_init();
if (err)
goto free_extent_io;
+
+ err = extent_map_init();
+ if (err)
+ goto free_extent_state_cache;
err = ordered_data_init();
if (err)
@@ -2503,6 +2586,8 @@
ordered_data_exit();
free_extent_map:
extent_map_exit();
+free_extent_state_cache:
+ extent_state_cache_exit();
free_extent_io:
extent_io_exit();
free_cachep:
@@ -2523,6 +2608,7 @@
btrfs_prelim_ref_exit();
ordered_data_exit();
extent_map_exit();
+ extent_state_cache_exit();
extent_io_exit();
btrfs_interface_exit();
btrfs_end_io_wq_exit();
@@ -2536,3 +2622,8 @@
module_exit(exit_btrfs_fs)
MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY);
+MODULE_SOFTDEP("pre: crc32c");
+MODULE_SOFTDEP("pre: xxhash64");
+MODULE_SOFTDEP("pre: sha256");
+MODULE_SOFTDEP("pre: blake2b-256");
--
Gitblit v1.6.2