From 102a0743326a03cd1a1202ceda21e175b7d3575c Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Tue, 20 Feb 2024 01:20:52 +0000 Subject: [PATCH] add new system file --- kernel/fs/btrfs/super.c | 481 +++++++++++++++++++++++++++++++--------------------- 1 files changed, 285 insertions(+), 196 deletions(-) diff --git a/kernel/fs/btrfs/super.c b/kernel/fs/btrfs/super.c index 521f6c2..95b7115 100644 --- a/kernel/fs/btrfs/super.c +++ b/kernel/fs/btrfs/super.c @@ -42,7 +42,11 @@ #include "dev-replace.h" #include "free-space-cache.h" #include "backref.h" +#include "space-info.h" +#include "sysfs.h" #include "tests/btrfs-tests.h" +#include "block-group.h" +#include "discard.h" #include "qgroup.h" #define CREATE_TRACE_POINTS @@ -63,28 +67,52 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data); -const char *btrfs_decode_error(int errno) +/* + * Generally the error codes correspond to their respective errors, but there + * are a few special cases. + * + * EUCLEAN: Any sort of corruption that we encounter. The tree-checker for + * instance will return EUCLEAN if any of the blocks are corrupted in + * a way that is problematic. We want to reserve EUCLEAN for these + * sort of corruptions. + * + * EROFS: If we check BTRFS_FS_STATE_ERROR and fail out with a return error, we + * need to use EROFS for this case. We will have no idea of the + * original failure, that will have been reported at the time we tripped + * over the error. Each subsequent error that doesn't have any context + * of the original error should use EROFS when handling BTRFS_FS_STATE_ERROR. + */ +const char * __attribute_const__ btrfs_decode_error(int errno) { char *errstr = "unknown"; switch (errno) { - case -EIO: + case -ENOENT: /* -2 */ + errstr = "No such entry"; + break; + case -EIO: /* -5 */ errstr = "IO failure"; break; - case -ENOMEM: + case -ENOMEM: /* -12*/ errstr = "Out of memory"; break; - case -EROFS: - errstr = "Readonly filesystem"; - break; - case -EEXIST: + case -EEXIST: /* -17 */ errstr = "Object already exists"; break; - case -ENOSPC: + case -ENOSPC: /* -28 */ errstr = "No space left"; break; - case -ENOENT: - errstr = "No such entry"; + case -EROFS: /* -30 */ + errstr = "Readonly filesystem"; + break; + case -EOPNOTSUPP: /* -95 */ + errstr = "Operation not supported"; + break; + case -EUCLEAN: /* -117 */ + errstr = "Filesystem corrupted"; + break; + case -EDQUOT: /* -122 */ + errstr = "Quota exceeded"; break; } @@ -93,7 +121,7 @@ /* * __btrfs_handle_fs_error decodes expected errors from the caller and - * invokes the approciate error response. + * invokes the appropriate error response. */ __cold void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function, @@ -143,6 +171,8 @@ if (sb_rdonly(sb)) return; + btrfs_discard_stop(fs_info); + /* btrfs handle error by forcing the filesystem readonly */ sb->s_flags |= SB_RDONLY; btrfs_info(fs_info, "forced readonly"); @@ -151,7 +181,7 @@ * although there is no way to update the progress. It would add the * risk of a deadlock, therefore the canceling is omitted. The only * penalty is that some I/O remains active until the procedure - * completes. The next time when the filesystem is mounted writeable + * completes. The next time when the filesystem is mounted writable * again, the device replace operation continues. */ } @@ -184,7 +214,7 @@ RATELIMIT_STATE_INIT(printk_limits[7], DEFAULT_RATELIMIT_INTERVAL, 100), }; -void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...) +void __cold btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...) { char lvl[PRINTK_MAX_SINGLE_HEADER_LEN + 1] = "\0"; struct va_format vaf; @@ -238,7 +268,7 @@ { struct btrfs_fs_info *fs_info = trans->fs_info; - trans->aborted = errno; + WRITE_ONCE(trans->aborted, errno); /* Nothing used. The other threads that have joined this * transaction may be able to continue. */ if (!trans->dirty && list_empty(&trans->new_bgs)) { @@ -310,7 +340,7 @@ Opt_datasum, Opt_nodatasum, Opt_defrag, Opt_nodefrag, Opt_discard, Opt_nodiscard, - Opt_nologreplay, + Opt_discard_mode, Opt_norecovery, Opt_ratio, Opt_rescan_uuid_tree, @@ -324,13 +354,15 @@ Opt_subvolid, Opt_thread_pool, Opt_treelog, Opt_notreelog, - Opt_usebackuproot, Opt_user_subvol_rm_allowed, + /* Rescue options */ + Opt_rescue, + Opt_usebackuproot, + Opt_nologreplay, + /* Deprecated options */ - Opt_alloc_start, Opt_recovery, - Opt_subvolrootid, /* Debugging options */ Opt_check_integrity, @@ -372,8 +404,8 @@ {Opt_defrag, "autodefrag"}, {Opt_nodefrag, "noautodefrag"}, {Opt_discard, "discard"}, + {Opt_discard_mode, "discard=%s"}, {Opt_nodiscard, "nodiscard"}, - {Opt_nologreplay, "nologreplay"}, {Opt_norecovery, "norecovery"}, {Opt_ratio, "metadata_ratio=%u"}, {Opt_rescan_uuid_tree, "rescan_uuid_tree"}, @@ -391,13 +423,17 @@ {Opt_thread_pool, "thread_pool=%u"}, {Opt_treelog, "treelog"}, {Opt_notreelog, "notreelog"}, - {Opt_usebackuproot, "usebackuproot"}, {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, + /* Rescue options */ + {Opt_rescue, "rescue=%s"}, + /* Deprecated, with alias rescue=nologreplay */ + {Opt_nologreplay, "nologreplay"}, + /* Deprecated, with alias rescue=usebackuproot */ + {Opt_usebackuproot, "usebackuproot"}, + /* Deprecated options */ - {Opt_alloc_start, "alloc_start=%s"}, {Opt_recovery, "recovery"}, - {Opt_subvolrootid, "subvolrootid=%d"}, /* Debugging options */ {Opt_check_integrity, "check_int"}, @@ -415,6 +451,55 @@ #endif {Opt_err, NULL}, }; + +static const match_table_t rescue_tokens = { + {Opt_usebackuproot, "usebackuproot"}, + {Opt_nologreplay, "nologreplay"}, + {Opt_err, NULL}, +}; + +static int parse_rescue_options(struct btrfs_fs_info *info, const char *options) +{ + char *opts; + char *orig; + char *p; + substring_t args[MAX_OPT_ARGS]; + int ret = 0; + + opts = kstrdup(options, GFP_KERNEL); + if (!opts) + return -ENOMEM; + orig = opts; + + while ((p = strsep(&opts, ":")) != NULL) { + int token; + + if (!*p) + continue; + token = match_token(p, rescue_tokens, args); + switch (token){ + case Opt_usebackuproot: + btrfs_info(info, + "trying to use backup root at mount time"); + btrfs_set_opt(info->mount_opt, USEBACKUPROOT); + break; + case Opt_nologreplay: + btrfs_set_and_info(info, NOLOGREPLAY, + "disabling log replay at mount time"); + break; + case Opt_err: + btrfs_info(info, "unrecognized rescue option '%s'", p); + ret = -EINVAL; + goto out; + default: + break; + } + + } +out: + kfree(orig); + return ret; +} /* * Regular mount options parser. Everything that is needed only when @@ -463,7 +548,6 @@ case Opt_subvol: case Opt_subvol_empty: case Opt_subvolid: - case Opt_subvolrootid: case Opt_device: /* * These are parsed by btrfs_parse_subvol_options or @@ -507,7 +591,7 @@ case Opt_compress_force: case Opt_compress_force_type: compress_force = true; - /* Fallthrough */ + fallthrough; case Opt_compress: case Opt_compress_type: saved_compress_type = btrfs_test_opt(info, @@ -531,7 +615,9 @@ if (token != Opt_compress && token != Opt_compress_force) info->compress_level = - btrfs_compress_str2level(args[0].from); + btrfs_compress_str2level( + BTRFS_COMPRESS_ZLIB, + args[0].from + 4); btrfs_set_opt(info->mount_opt, COMPRESS); btrfs_clear_opt(info->mount_opt, NODATACOW); btrfs_clear_opt(info->mount_opt, NODATASUM); @@ -545,9 +631,13 @@ btrfs_clear_opt(info->mount_opt, NODATASUM); btrfs_set_fs_incompat(info, COMPRESS_LZO); no_compress = 0; - } else if (strcmp(args[0].from, "zstd") == 0) { + } else if (strncmp(args[0].from, "zstd", 4) == 0) { compress_type = "zstd"; info->compress_type = BTRFS_COMPRESS_ZSTD; + info->compress_level = + btrfs_compress_str2level( + BTRFS_COMPRESS_ZSTD, + args[0].from + 4); btrfs_set_opt(info->mount_opt, COMPRESS); btrfs_clear_opt(info->mount_opt, NODATACOW); btrfs_clear_opt(info->mount_opt, NODATASUM); @@ -562,6 +652,8 @@ compress_force = false; no_compress++; } else { + btrfs_err(info, "unrecognized compression value %s", + args[0].from); ret = -EINVAL; goto out; } @@ -604,7 +696,7 @@ btrfs_set_opt(info->mount_opt, NOSSD); btrfs_clear_and_info(info, SSD, "not using ssd optimizations"); - /* Fallthrough */ + fallthrough; case Opt_nossd_spread: btrfs_clear_and_info(info, SSD_SPREAD, "not using spread ssd allocation scheme"); @@ -620,8 +712,11 @@ case Opt_thread_pool: ret = match_int(&args[0], &intarg); if (ret) { + btrfs_err(info, "unrecognized thread_pool value %s", + args[0].from); goto out; } else if (intarg == 0) { + btrfs_err(info, "invalid value 0 for thread_pool"); ret = -EINVAL; goto out; } @@ -645,10 +740,6 @@ goto out; } break; - case Opt_alloc_start: - btrfs_info(info, - "option alloc_start is obsolete, ignored"); - break; case Opt_acl: #ifdef CONFIG_BTRFS_FS_POSIX_ACL info->sb->s_flags |= SB_POSIXACL; @@ -671,6 +762,8 @@ break; case Opt_norecovery: case Opt_nologreplay: + btrfs_warn(info, + "'nologreplay' is deprecated, use 'rescue=nologreplay' instead"); btrfs_set_and_info(info, NOLOGREPLAY, "disabling log replay at mount time"); break; @@ -684,19 +777,38 @@ break; case Opt_ratio: ret = match_int(&args[0], &intarg); - if (ret) + if (ret) { + btrfs_err(info, "unrecognized metadata_ratio value %s", + args[0].from); goto out; + } info->metadata_ratio = intarg; btrfs_info(info, "metadata ratio %u", info->metadata_ratio); break; case Opt_discard: - btrfs_set_and_info(info, DISCARD, - "turning on discard"); + case Opt_discard_mode: + if (token == Opt_discard || + strcmp(args[0].from, "sync") == 0) { + btrfs_clear_opt(info->mount_opt, DISCARD_ASYNC); + btrfs_set_and_info(info, DISCARD_SYNC, + "turning on sync discard"); + } else if (strcmp(args[0].from, "async") == 0) { + btrfs_clear_opt(info->mount_opt, DISCARD_SYNC); + btrfs_set_and_info(info, DISCARD_ASYNC, + "turning on async discard"); + } else { + btrfs_err(info, "unrecognized discard mode value %s", + args[0].from); + ret = -EINVAL; + goto out; + } break; case Opt_nodiscard: - btrfs_clear_and_info(info, DISCARD, + btrfs_clear_and_info(info, DISCARD_SYNC, "turning off discard"); + btrfs_clear_and_info(info, DISCARD_ASYNC, + "turning off async discard"); break; case Opt_space_cache: case Opt_space_cache_version: @@ -712,6 +824,8 @@ btrfs_set_and_info(info, FREE_SPACE_TREE, "enabling free space tree"); } else { + btrfs_err(info, "unrecognized space_cache value %s", + args[0].from); ret = -EINVAL; goto out; } @@ -730,6 +844,8 @@ } break; case Opt_inode_cache: + btrfs_warn(info, + "the 'inode_cache' option is deprecated and will have no effect from 5.11"); btrfs_set_pending_and_info(info, INODE_MAP_CACHE, "enabling inode map caching"); break; @@ -759,10 +875,11 @@ "disabling auto defrag"); break; case Opt_recovery: - btrfs_warn(info, - "'recovery' is deprecated, use 'usebackuproot' instead"); - /* fall through */ case Opt_usebackuproot: + btrfs_warn(info, + "'%s' is deprecated, use 'rescue=usebackuproot' instead", + token == Opt_recovery ? "recovery" : + "usebackuproot"); btrfs_info(info, "trying to use backup root at mount time"); btrfs_set_opt(info->mount_opt, USEBACKUPROOT); @@ -784,8 +901,12 @@ break; case Opt_check_integrity_print_mask: ret = match_int(&args[0], &intarg); - if (ret) + if (ret) { + btrfs_err(info, + "unrecognized check_integrity_print_mask value %s", + args[0].from); goto out; + } info->check_integrity_print_mask = intarg; btrfs_info(info, "check_integrity_print_mask 0x%x", info->check_integrity_print_mask); @@ -800,13 +921,15 @@ goto out; #endif case Opt_fatal_errors: - if (strcmp(args[0].from, "panic") == 0) + if (strcmp(args[0].from, "panic") == 0) { btrfs_set_opt(info->mount_opt, PANIC_ON_FATAL_ERROR); - else if (strcmp(args[0].from, "bug") == 0) + } else if (strcmp(args[0].from, "bug") == 0) { btrfs_clear_opt(info->mount_opt, PANIC_ON_FATAL_ERROR); - else { + } else { + btrfs_err(info, "unrecognized fatal_errors value %s", + args[0].from); ret = -EINVAL; goto out; } @@ -814,8 +937,12 @@ case Opt_commit_interval: intarg = 0; ret = match_int(&args[0], &intarg); - if (ret) + if (ret) { + btrfs_err(info, "unrecognized commit_interval value %s", + args[0].from); + ret = -EINVAL; goto out; + } if (intarg == 0) { btrfs_info(info, "using default commit interval %us", @@ -826,6 +953,14 @@ intarg); } info->commit_interval = intarg; + break; + case Opt_rescue: + ret = parse_rescue_options(info, args[0].from); + if (ret < 0) { + btrfs_err(info, "unrecognized rescue value %s", + args[0].from); + goto out; + } break; #ifdef CONFIG_BTRFS_DEBUG case Opt_fragment_all: @@ -850,7 +985,7 @@ break; #endif case Opt_err: - btrfs_info(info, "unrecognized mount option '%s'", p); + btrfs_err(info, "unrecognized mount option '%s'", p); ret = -EINVAL; goto out; default: @@ -988,9 +1123,6 @@ *subvol_objectid = subvolid; break; - case Opt_subvolrootid: - pr_warn("BTRFS: 'subvolrootid' mount option is deprecated and has no effect\n"); - break; default: break; } @@ -1005,7 +1137,7 @@ u64 subvol_objectid) { struct btrfs_root *root = fs_info->tree_root; - struct btrfs_root *fs_root; + struct btrfs_root *fs_root = NULL; struct btrfs_root_ref *root_ref; struct btrfs_inode_ref *inode_ref; struct btrfs_key key; @@ -1070,12 +1202,10 @@ dirid = btrfs_root_ref_dirid(path->nodes[0], root_ref); btrfs_release_path(path); - key.objectid = subvol_objectid; - key.type = BTRFS_ROOT_ITEM_KEY; - key.offset = (u64)-1; - fs_root = btrfs_read_fs_root_no_name(fs_info, &key); + fs_root = btrfs_get_fs_root(fs_info, subvol_objectid, true); if (IS_ERR(fs_root)) { ret = PTR_ERR(fs_root); + fs_root = NULL; goto err; } @@ -1120,6 +1250,8 @@ ptr[0] = '/'; btrfs_release_path(path); } + btrfs_put_root(fs_root); + fs_root = NULL; } btrfs_free_path(path); @@ -1132,6 +1264,7 @@ return name; err: + btrfs_put_root(fs_root); btrfs_free_path(path); kfree(name); return ERR_PTR(ret); @@ -1184,7 +1317,6 @@ { struct inode *inode; struct btrfs_fs_info *fs_info = btrfs_sb(sb); - struct btrfs_key key; int err; sb->s_maxbytes = MAX_LFS_FILESIZE; @@ -1212,10 +1344,7 @@ return err; } - key.objectid = BTRFS_FIRST_FREE_OBJECTID; - key.type = BTRFS_INODE_ITEM_KEY; - key.offset = 0; - inode = btrfs_iget(sb, &key, fs_info->fs_root, NULL); + inode = btrfs_iget(sb, BTRFS_FIRST_FREE_OBJECTID, fs_info->fs_root); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto fail_close; @@ -1316,11 +1445,13 @@ if (btrfs_test_opt(info, NOTREELOG)) seq_puts(seq, ",notreelog"); if (btrfs_test_opt(info, NOLOGREPLAY)) - seq_puts(seq, ",nologreplay"); + seq_puts(seq, ",rescue=nologreplay"); if (btrfs_test_opt(info, FLUSHONCOMMIT)) seq_puts(seq, ",flushoncommit"); - if (btrfs_test_opt(info, DISCARD)) + if (btrfs_test_opt(info, DISCARD_SYNC)) seq_puts(seq, ",discard"); + if (btrfs_test_opt(info, DISCARD_ASYNC)) + seq_puts(seq, ",discard=async"); if (!(info->sb->s_flags & SB_POSIXACL)) seq_puts(seq, ",noacl"); if (btrfs_test_opt(info, SPACE_CACHE)) @@ -1405,7 +1536,7 @@ } static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid, - const char *device_name, struct vfsmount *mnt) + struct vfsmount *mnt) { struct dentry *root; int ret; @@ -1469,56 +1600,6 @@ return root; } -static int parse_security_options(char *orig_opts, - struct security_mnt_opts *sec_opts) -{ - char *secdata = NULL; - int ret = 0; - - secdata = alloc_secdata(); - if (!secdata) - return -ENOMEM; - ret = security_sb_copy_data(orig_opts, secdata); - if (ret) { - free_secdata(secdata); - return ret; - } - ret = security_sb_parse_opts_str(secdata, sec_opts); - free_secdata(secdata); - return ret; -} - -static int setup_security_options(struct btrfs_fs_info *fs_info, - struct super_block *sb, - struct security_mnt_opts *sec_opts) -{ - int ret = 0; - - /* - * Call security_sb_set_mnt_opts() to check whether new sec_opts - * is valid. - */ - ret = security_sb_set_mnt_opts(sb, sec_opts, 0, NULL); - if (ret) - return ret; - -#ifdef CONFIG_SECURITY - if (!fs_info->security_opts.num_mnt_opts) { - /* first time security setup, copy sec_opts to fs_info */ - memcpy(&fs_info->security_opts, sec_opts, sizeof(*sec_opts)); - } else { - /* - * Since SELinux (the only one supporting security_mnt_opts) - * does NOT support changing context during remount/mount of - * the same sb, this must be the same or part of the same - * security options, just free it. - */ - security_free_mnt_opts(sec_opts); - } -#endif - return ret; -} - /* * Find a superblock for the given device / mount point. * @@ -1533,16 +1614,15 @@ struct btrfs_device *device = NULL; struct btrfs_fs_devices *fs_devices = NULL; struct btrfs_fs_info *fs_info = NULL; - struct security_mnt_opts new_sec_opts; + void *new_sec_opts = NULL; fmode_t mode = FMODE_READ; int error = 0; if (!(flags & SB_RDONLY)) mode |= FMODE_WRITE; - security_init_mnt_opts(&new_sec_opts); if (data) { - error = parse_security_options(data, &new_sec_opts); + error = security_sb_eat_lsm_opts(data, &new_sec_opts); if (error) return ERR_PTR(error); } @@ -1550,18 +1630,20 @@ /* * Setup a dummy root and fs_info for test/set super. This is because * we don't actually fill this stuff out until open_ctree, but we need - * it for searching for existing supers, so this lets us do that and - * then open_ctree will properly initialize everything later. + * then open_ctree will properly initialize the file system specific + * settings later. btrfs_init_fs_info initializes the static elements + * of the fs_info (locks and such) to make cleanup easier if we find a + * superblock with our given fs_devices later on at sget() time. */ fs_info = kvzalloc(sizeof(struct btrfs_fs_info), GFP_KERNEL); if (!fs_info) { error = -ENOMEM; goto error_sec_opts; } + btrfs_init_fs_info(fs_info); fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL); fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL); - security_init_mnt_opts(&fs_info->security_opts); if (!fs_info->super_copy || !fs_info->super_for_commit) { error = -ENOMEM; goto error_fs_info; @@ -1604,7 +1686,7 @@ if (s->s_root) { btrfs_close_devices(fs_devices); - free_fs_info(fs_info); + btrfs_free_fs_info(fs_info); if ((flags ^ s->s_flags) & SB_RDONLY) error = -EBUSY; } else { @@ -1612,16 +1694,12 @@ btrfs_sb(s)->bdev_holder = fs_type; error = btrfs_fill_super(s, fs_devices, data); } + if (!error) + error = security_sb_set_mnt_opts(s, new_sec_opts, 0, NULL); + security_free_mnt_opts(&new_sec_opts); if (error) { deactivate_locked_super(s); - goto error_sec_opts; - } - - fs_info = btrfs_sb(s); - error = setup_security_options(fs_info, s, &new_sec_opts); - if (error) { - deactivate_locked_super(s); - goto error_sec_opts; + return ERR_PTR(error); } return dget(s->s_root); @@ -1629,7 +1707,7 @@ error_close_devices: btrfs_close_devices(fs_devices); error_fs_info: - free_fs_info(fs_info); + btrfs_free_fs_info(fs_info); error_sec_opts: security_free_mnt_opts(&new_sec_opts); return ERR_PTR(error); @@ -1662,13 +1740,9 @@ { struct vfsmount *mnt_root; struct dentry *root; - fmode_t mode = FMODE_READ; char *subvol_name = NULL; u64 subvol_objectid = 0; int error = 0; - - if (!(flags & SB_RDONLY)) - mode |= FMODE_WRITE; error = btrfs_parse_subvol_options(data, &subvol_name, &subvol_objectid); @@ -1710,7 +1784,7 @@ } /* mount_subvol() will free subvol_name and mnt_root */ - root = mount_subvol(subvol_name, subvol_objectid, device_name, mnt_root); + root = mount_subvol(subvol_name, subvol_objectid, mnt_root); out: return root; @@ -1729,7 +1803,6 @@ btrfs_workqueue_set_max(fs_info->workers, new_pool_size); btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size); - btrfs_workqueue_set_max(fs_info->submit_workers, new_pool_size); btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size); btrfs_workqueue_set_max(fs_info->endio_workers, new_pool_size); btrfs_workqueue_set_max(fs_info->endio_meta_workers, new_pool_size); @@ -1741,11 +1814,6 @@ btrfs_workqueue_set_max(fs_info->readahead_workers, new_pool_size); btrfs_workqueue_set_max(fs_info->scrub_wr_completion_workers, new_pool_size); -} - -static inline void btrfs_remount_prepare(struct btrfs_fs_info *fs_info) -{ - set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); } static inline void btrfs_remount_begin(struct btrfs_fs_info *fs_info, @@ -1774,7 +1842,13 @@ btrfs_cleanup_defrag_inodes(fs_info); } - clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); + /* If we toggled discard async */ + if (!btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) && + btrfs_test_opt(fs_info, DISCARD_ASYNC)) + btrfs_discard_resume(fs_info); + else if (btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) && + !btrfs_test_opt(fs_info, DISCARD_ASYNC)) + btrfs_discard_cleanup(fs_info); } static int btrfs_remount(struct super_block *sb, int *flags, char *data) @@ -1790,21 +1864,17 @@ int ret; sync_filesystem(sb); - btrfs_remount_prepare(fs_info); + set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); if (data) { - struct security_mnt_opts new_sec_opts; + void *new_sec_opts = NULL; - security_init_mnt_opts(&new_sec_opts); - ret = parse_security_options(data, &new_sec_opts); + ret = security_sb_eat_lsm_opts(data, &new_sec_opts); + if (!ret) + ret = security_sb_remount(sb, new_sec_opts); + security_free_mnt_opts(&new_sec_opts); if (ret) goto restore; - ret = setup_security_options(fs_info, sb, - &new_sec_opts); - if (ret) { - security_free_mnt_opts(&new_sec_opts); - goto restore; - } } ret = btrfs_parse_options(fs_info, data, *flags); @@ -1824,6 +1894,9 @@ * the filesystem is busy. */ cancel_work_sync(&fs_info->async_reclaim_work); + cancel_work_sync(&fs_info->async_data_reclaim_work); + + btrfs_discard_cleanup(fs_info); /* wait for the uuid_scan task to finish */ down(&fs_info->uuid_tree_rescan_sem); @@ -1870,7 +1943,7 @@ if (!btrfs_check_rw_degradable(fs_info, NULL)) { btrfs_warn(fs_info, - "too many missing devices, writeable remount is not allowed"); + "too many missing devices, writable remount is not allowed"); ret = -EACCES; goto restore; } @@ -1920,8 +1993,16 @@ set_bit(BTRFS_FS_OPEN, &fs_info->flags); } out: + /* + * We need to set SB_I_VERSION here otherwise it'll get cleared by VFS, + * since the absence of the flag means it can be toggled off by remount. + */ + *flags |= SB_I_VERSION; + wake_up_process(fs_info->transaction_kthread); btrfs_remount_cleanup(fs_info, old_opts); + clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); + return 0; restore: @@ -1936,6 +2017,8 @@ old_thread_pool_size, fs_info->thread_pool_size); fs_info->metadata_ratio = old_metadata_ratio; btrfs_remount_cleanup(fs_info, old_opts); + clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); + return ret; } @@ -1975,12 +2058,12 @@ struct btrfs_device_info *devices_info; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; struct btrfs_device *device; - u64 skip_space; u64 type; u64 avail_space; u64 min_stripe_size; - int min_stripes = 1, num_stripes = 1; + int num_stripes = 1; int i = 0, nr_devices; + const struct btrfs_raid_attr *rattr; /* * We aren't under the device list lock, so this is racy-ish, but good @@ -2004,21 +2087,21 @@ /* calc min stripe number for data space allocation */ type = btrfs_data_alloc_profile(fs_info); - if (type & BTRFS_BLOCK_GROUP_RAID0) { - min_stripes = 2; - num_stripes = nr_devices; - } else if (type & BTRFS_BLOCK_GROUP_RAID1) { - min_stripes = 2; - num_stripes = 2; - } else if (type & BTRFS_BLOCK_GROUP_RAID10) { - min_stripes = 4; - num_stripes = 4; - } + rattr = &btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)]; - if (type & BTRFS_BLOCK_GROUP_DUP) - min_stripe_size = 2 * BTRFS_STRIPE_LEN; - else - min_stripe_size = BTRFS_STRIPE_LEN; + if (type & BTRFS_BLOCK_GROUP_RAID0) + num_stripes = nr_devices; + else if (type & BTRFS_BLOCK_GROUP_RAID1) + num_stripes = 2; + else if (type & BTRFS_BLOCK_GROUP_RAID1C3) + num_stripes = 3; + else if (type & BTRFS_BLOCK_GROUP_RAID1C4) + num_stripes = 4; + else if (type & BTRFS_BLOCK_GROUP_RAID10) + num_stripes = 4; + + /* Adjust for more than 1 stripe per device */ + min_stripe_size = rattr->dev_stripes * BTRFS_STRIPE_LEN; rcu_read_lock(); list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) { @@ -2034,27 +2117,20 @@ avail_space = device->total_bytes - device->bytes_used; /* align with stripe_len */ - avail_space = div_u64(avail_space, BTRFS_STRIPE_LEN); - avail_space *= BTRFS_STRIPE_LEN; + avail_space = rounddown(avail_space, BTRFS_STRIPE_LEN); /* * In order to avoid overwriting the superblock on the drive, * btrfs starts at an offset of at least 1MB when doing chunk * allocation. + * + * This ensures we have at least min_stripe_size free space + * after excluding 1MB. */ - skip_space = SZ_1M; - - /* - * we can use the free space in [0, skip_space - 1], subtract - * it from the total. - */ - if (avail_space && avail_space >= skip_space) - avail_space -= skip_space; - else - avail_space = 0; - - if (avail_space < min_stripe_size) + if (avail_space <= SZ_1M + min_stripe_size) continue; + + avail_space -= SZ_1M; devices_info[i].dev = device; devices_info[i].max_avail = avail_space; @@ -2069,9 +2145,8 @@ i = nr_devices - 1; avail_space = 0; - while (nr_devices >= min_stripes) { - if (num_stripes > nr_devices) - num_stripes = nr_devices; + while (nr_devices >= rattr->devs_min) { + num_stripes = min(num_stripes, nr_devices); if (devices_info[i].max_avail >= min_stripe_size) { int j; @@ -2108,21 +2183,19 @@ { struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb); struct btrfs_super_block *disk_super = fs_info->super_copy; - struct list_head *head = &fs_info->space_info; struct btrfs_space_info *found; u64 total_used = 0; u64 total_free_data = 0; u64 total_free_meta = 0; int bits = dentry->d_sb->s_blocksize_bits; - __be32 *fsid = (__be32 *)fs_info->fsid; + __be32 *fsid = (__be32 *)fs_info->fs_devices->fsid; unsigned factor = 1; struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; int ret; u64 thresh = 0; int mixed = 0; - rcu_read_lock(); - list_for_each_entry_rcu(found, head, list) { + list_for_each_entry(found, &fs_info->space_info, list) { if (found->flags & BTRFS_BLOCK_GROUP_DATA) { int i; @@ -2150,8 +2223,6 @@ total_used += found->disk_used; } - - rcu_read_unlock(); buf->f_blocks = div_u64(btrfs_super_total_bytes(disk_super), factor); buf->f_blocks >>= bits; @@ -2196,7 +2267,7 @@ * calculated f_bavail. */ if (!mixed && block_rsv->space_info->full && - total_free_meta - thresh < block_rsv->size) + (total_free_meta < thresh || total_free_meta - thresh < block_rsv->size)) buf->f_bavail = 0; buf->f_type = BTRFS_SUPER_MAGIC; @@ -2209,8 +2280,10 @@ buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]); buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]); /* Mask in the root object ID too, to disambiguate subvols */ - buf->f_fsid.val[0] ^= BTRFS_I(d_inode(dentry))->root->objectid >> 32; - buf->f_fsid.val[1] ^= BTRFS_I(d_inode(dentry))->root->objectid; + buf->f_fsid.val[0] ^= + BTRFS_I(d_inode(dentry))->root->root_key.objectid >> 32; + buf->f_fsid.val[1] ^= + BTRFS_I(d_inode(dentry))->root->root_key.objectid; return 0; } @@ -2219,7 +2292,7 @@ { struct btrfs_fs_info *fs_info = btrfs_sb(sb); kill_anon_super(sb); - free_fs_info(fs_info); + btrfs_free_fs_info(fs_info); } static struct file_system_type btrfs_fs_type = { @@ -2252,7 +2325,7 @@ } /* - * used by btrfsctl to scan devices when no FS is mounted + * Used by /dev/btrfs-control for devices ioctls. */ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, unsigned long arg) @@ -2276,6 +2349,9 @@ &btrfs_root_fs_type); ret = PTR_ERR_OR_ZERO(device); mutex_unlock(&uuid_mutex); + break; + case BTRFS_IOC_FORGET_DEV: + ret = btrfs_forget_devices(vol->name); break; case BTRFS_IOC_DEVICES_READY: mutex_lock(&uuid_mutex); @@ -2340,7 +2416,7 @@ * device_list_mutex here as we only read the device data and the list * is protected by RCU. Even if a device is deleted during the list * traversals, we'll get valid data, the freeing callback will wait at - * least until until the rcu_read_unlock. + * least until the rcu_read_unlock. */ rcu_read_lock(); list_for_each_entry_rcu(dev, &fs_info->fs_devices->devices, dev_list) { @@ -2369,6 +2445,7 @@ .show_devname = btrfs_show_devname, .alloc_inode = btrfs_alloc_inode, .destroy_inode = btrfs_destroy_inode, + .free_inode = btrfs_free_inode, .statfs = btrfs_statfs, .remount_fs = btrfs_remount, .freeze_fs = btrfs_freeze, @@ -2378,7 +2455,7 @@ static const struct file_operations btrfs_ctl_fops = { .open = btrfs_control_open, .unlocked_ioctl = btrfs_control_ioctl, - .compat_ioctl = btrfs_control_ioctl, + .compat_ioctl = compat_ptr_ioctl, .owner = THIS_MODULE, .llseek = noop_llseek, }; @@ -2441,9 +2518,13 @@ if (err) goto free_cachep; - err = extent_map_init(); + err = extent_state_cache_init(); if (err) goto free_extent_io; + + err = extent_map_init(); + if (err) + goto free_extent_state_cache; err = ordered_data_init(); if (err) @@ -2503,6 +2584,8 @@ ordered_data_exit(); free_extent_map: extent_map_exit(); +free_extent_state_cache: + extent_state_cache_exit(); free_extent_io: extent_io_exit(); free_cachep: @@ -2523,6 +2606,7 @@ btrfs_prelim_ref_exit(); ordered_data_exit(); extent_map_exit(); + extent_state_cache_exit(); extent_io_exit(); btrfs_interface_exit(); btrfs_end_io_wq_exit(); @@ -2536,3 +2620,8 @@ module_exit(exit_btrfs_fs) MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); +MODULE_SOFTDEP("pre: crc32c"); +MODULE_SOFTDEP("pre: xxhash64"); +MODULE_SOFTDEP("pre: sha256"); +MODULE_SOFTDEP("pre: blake2b-256"); -- Gitblit v1.6.2