hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/fs/btrfs/super.c
....@@ -42,7 +42,11 @@
4242 #include "dev-replace.h"
4343 #include "free-space-cache.h"
4444 #include "backref.h"
45
+#include "space-info.h"
46
+#include "sysfs.h"
4547 #include "tests/btrfs-tests.h"
48
+#include "block-group.h"
49
+#include "discard.h"
4650
4751 #include "qgroup.h"
4852 #define CREATE_TRACE_POINTS
....@@ -63,28 +67,52 @@
6367
6468 static int btrfs_remount(struct super_block *sb, int *flags, char *data);
6569
66
-const char *btrfs_decode_error(int errno)
70
+/*
71
+ * Generally the error codes correspond to their respective errors, but there
72
+ * are a few special cases.
73
+ *
74
+ * EUCLEAN: Any sort of corruption that we encounter. The tree-checker for
75
+ * instance will return EUCLEAN if any of the blocks are corrupted in
76
+ * a way that is problematic. We want to reserve EUCLEAN for these
77
+ * sort of corruptions.
78
+ *
79
+ * EROFS: If we check BTRFS_FS_STATE_ERROR and fail out with a return error, we
80
+ * need to use EROFS for this case. We will have no idea of the
81
+ * original failure, that will have been reported at the time we tripped
82
+ * over the error. Each subsequent error that doesn't have any context
83
+ * of the original error should use EROFS when handling BTRFS_FS_STATE_ERROR.
84
+ */
85
+const char * __attribute_const__ btrfs_decode_error(int errno)
6786 {
6887 char *errstr = "unknown";
6988
7089 switch (errno) {
71
- case -EIO:
90
+ case -ENOENT: /* -2 */
91
+ errstr = "No such entry";
92
+ break;
93
+ case -EIO: /* -5 */
7294 errstr = "IO failure";
7395 break;
74
- case -ENOMEM:
96
+ case -ENOMEM: /* -12*/
7597 errstr = "Out of memory";
7698 break;
77
- case -EROFS:
78
- errstr = "Readonly filesystem";
79
- break;
80
- case -EEXIST:
99
+ case -EEXIST: /* -17 */
81100 errstr = "Object already exists";
82101 break;
83
- case -ENOSPC:
102
+ case -ENOSPC: /* -28 */
84103 errstr = "No space left";
85104 break;
86
- case -ENOENT:
87
- errstr = "No such entry";
105
+ case -EROFS: /* -30 */
106
+ errstr = "Readonly filesystem";
107
+ break;
108
+ case -EOPNOTSUPP: /* -95 */
109
+ errstr = "Operation not supported";
110
+ break;
111
+ case -EUCLEAN: /* -117 */
112
+ errstr = "Filesystem corrupted";
113
+ break;
114
+ case -EDQUOT: /* -122 */
115
+ errstr = "Quota exceeded";
88116 break;
89117 }
90118
....@@ -93,7 +121,7 @@
93121
94122 /*
95123 * __btrfs_handle_fs_error decodes expected errors from the caller and
96
- * invokes the approciate error response.
124
+ * invokes the appropriate error response.
97125 */
98126 __cold
99127 void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function,
....@@ -143,6 +171,8 @@
143171 if (sb_rdonly(sb))
144172 return;
145173
174
+ btrfs_discard_stop(fs_info);
175
+
146176 /* btrfs handle error by forcing the filesystem readonly */
147177 sb->s_flags |= SB_RDONLY;
148178 btrfs_info(fs_info, "forced readonly");
....@@ -151,7 +181,7 @@
151181 * although there is no way to update the progress. It would add the
152182 * risk of a deadlock, therefore the canceling is omitted. The only
153183 * penalty is that some I/O remains active until the procedure
154
- * completes. The next time when the filesystem is mounted writeable
184
+ * completes. The next time when the filesystem is mounted writable
155185 * again, the device replace operation continues.
156186 */
157187 }
....@@ -184,7 +214,7 @@
184214 RATELIMIT_STATE_INIT(printk_limits[7], DEFAULT_RATELIMIT_INTERVAL, 100),
185215 };
186216
187
-void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
217
+void __cold btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
188218 {
189219 char lvl[PRINTK_MAX_SINGLE_HEADER_LEN + 1] = "\0";
190220 struct va_format vaf;
....@@ -238,7 +268,7 @@
238268 {
239269 struct btrfs_fs_info *fs_info = trans->fs_info;
240270
241
- trans->aborted = errno;
271
+ WRITE_ONCE(trans->aborted, errno);
242272 /* Nothing used. The other threads that have joined this
243273 * transaction may be able to continue. */
244274 if (!trans->dirty && list_empty(&trans->new_bgs)) {
....@@ -310,7 +340,7 @@
310340 Opt_datasum, Opt_nodatasum,
311341 Opt_defrag, Opt_nodefrag,
312342 Opt_discard, Opt_nodiscard,
313
- Opt_nologreplay,
343
+ Opt_discard_mode,
314344 Opt_norecovery,
315345 Opt_ratio,
316346 Opt_rescan_uuid_tree,
....@@ -324,13 +354,15 @@
324354 Opt_subvolid,
325355 Opt_thread_pool,
326356 Opt_treelog, Opt_notreelog,
327
- Opt_usebackuproot,
328357 Opt_user_subvol_rm_allowed,
329358
359
+ /* Rescue options */
360
+ Opt_rescue,
361
+ Opt_usebackuproot,
362
+ Opt_nologreplay,
363
+
330364 /* Deprecated options */
331
- Opt_alloc_start,
332365 Opt_recovery,
333
- Opt_subvolrootid,
334366
335367 /* Debugging options */
336368 Opt_check_integrity,
....@@ -372,8 +404,8 @@
372404 {Opt_defrag, "autodefrag"},
373405 {Opt_nodefrag, "noautodefrag"},
374406 {Opt_discard, "discard"},
407
+ {Opt_discard_mode, "discard=%s"},
375408 {Opt_nodiscard, "nodiscard"},
376
- {Opt_nologreplay, "nologreplay"},
377409 {Opt_norecovery, "norecovery"},
378410 {Opt_ratio, "metadata_ratio=%u"},
379411 {Opt_rescan_uuid_tree, "rescan_uuid_tree"},
....@@ -391,13 +423,17 @@
391423 {Opt_thread_pool, "thread_pool=%u"},
392424 {Opt_treelog, "treelog"},
393425 {Opt_notreelog, "notreelog"},
394
- {Opt_usebackuproot, "usebackuproot"},
395426 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
396427
428
+ /* Rescue options */
429
+ {Opt_rescue, "rescue=%s"},
430
+ /* Deprecated, with alias rescue=nologreplay */
431
+ {Opt_nologreplay, "nologreplay"},
432
+ /* Deprecated, with alias rescue=usebackuproot */
433
+ {Opt_usebackuproot, "usebackuproot"},
434
+
397435 /* Deprecated options */
398
- {Opt_alloc_start, "alloc_start=%s"},
399436 {Opt_recovery, "recovery"},
400
- {Opt_subvolrootid, "subvolrootid=%d"},
401437
402438 /* Debugging options */
403439 {Opt_check_integrity, "check_int"},
....@@ -415,6 +451,55 @@
415451 #endif
416452 {Opt_err, NULL},
417453 };
454
+
455
+static const match_table_t rescue_tokens = {
456
+ {Opt_usebackuproot, "usebackuproot"},
457
+ {Opt_nologreplay, "nologreplay"},
458
+ {Opt_err, NULL},
459
+};
460
+
461
+static int parse_rescue_options(struct btrfs_fs_info *info, const char *options)
462
+{
463
+ char *opts;
464
+ char *orig;
465
+ char *p;
466
+ substring_t args[MAX_OPT_ARGS];
467
+ int ret = 0;
468
+
469
+ opts = kstrdup(options, GFP_KERNEL);
470
+ if (!opts)
471
+ return -ENOMEM;
472
+ orig = opts;
473
+
474
+ while ((p = strsep(&opts, ":")) != NULL) {
475
+ int token;
476
+
477
+ if (!*p)
478
+ continue;
479
+ token = match_token(p, rescue_tokens, args);
480
+ switch (token){
481
+ case Opt_usebackuproot:
482
+ btrfs_info(info,
483
+ "trying to use backup root at mount time");
484
+ btrfs_set_opt(info->mount_opt, USEBACKUPROOT);
485
+ break;
486
+ case Opt_nologreplay:
487
+ btrfs_set_and_info(info, NOLOGREPLAY,
488
+ "disabling log replay at mount time");
489
+ break;
490
+ case Opt_err:
491
+ btrfs_info(info, "unrecognized rescue option '%s'", p);
492
+ ret = -EINVAL;
493
+ goto out;
494
+ default:
495
+ break;
496
+ }
497
+
498
+ }
499
+out:
500
+ kfree(orig);
501
+ return ret;
502
+}
418503
419504 /*
420505 * Regular mount options parser. Everything that is needed only when
....@@ -463,7 +548,6 @@
463548 case Opt_subvol:
464549 case Opt_subvol_empty:
465550 case Opt_subvolid:
466
- case Opt_subvolrootid:
467551 case Opt_device:
468552 /*
469553 * These are parsed by btrfs_parse_subvol_options or
....@@ -507,7 +591,7 @@
507591 case Opt_compress_force:
508592 case Opt_compress_force_type:
509593 compress_force = true;
510
- /* Fallthrough */
594
+ fallthrough;
511595 case Opt_compress:
512596 case Opt_compress_type:
513597 saved_compress_type = btrfs_test_opt(info,
....@@ -531,7 +615,9 @@
531615 if (token != Opt_compress &&
532616 token != Opt_compress_force)
533617 info->compress_level =
534
- btrfs_compress_str2level(args[0].from);
618
+ btrfs_compress_str2level(
619
+ BTRFS_COMPRESS_ZLIB,
620
+ args[0].from + 4);
535621 btrfs_set_opt(info->mount_opt, COMPRESS);
536622 btrfs_clear_opt(info->mount_opt, NODATACOW);
537623 btrfs_clear_opt(info->mount_opt, NODATASUM);
....@@ -545,9 +631,13 @@
545631 btrfs_clear_opt(info->mount_opt, NODATASUM);
546632 btrfs_set_fs_incompat(info, COMPRESS_LZO);
547633 no_compress = 0;
548
- } else if (strcmp(args[0].from, "zstd") == 0) {
634
+ } else if (strncmp(args[0].from, "zstd", 4) == 0) {
549635 compress_type = "zstd";
550636 info->compress_type = BTRFS_COMPRESS_ZSTD;
637
+ info->compress_level =
638
+ btrfs_compress_str2level(
639
+ BTRFS_COMPRESS_ZSTD,
640
+ args[0].from + 4);
551641 btrfs_set_opt(info->mount_opt, COMPRESS);
552642 btrfs_clear_opt(info->mount_opt, NODATACOW);
553643 btrfs_clear_opt(info->mount_opt, NODATASUM);
....@@ -562,6 +652,8 @@
562652 compress_force = false;
563653 no_compress++;
564654 } else {
655
+ btrfs_err(info, "unrecognized compression value %s",
656
+ args[0].from);
565657 ret = -EINVAL;
566658 goto out;
567659 }
....@@ -604,7 +696,7 @@
604696 btrfs_set_opt(info->mount_opt, NOSSD);
605697 btrfs_clear_and_info(info, SSD,
606698 "not using ssd optimizations");
607
- /* Fallthrough */
699
+ fallthrough;
608700 case Opt_nossd_spread:
609701 btrfs_clear_and_info(info, SSD_SPREAD,
610702 "not using spread ssd allocation scheme");
....@@ -620,8 +712,11 @@
620712 case Opt_thread_pool:
621713 ret = match_int(&args[0], &intarg);
622714 if (ret) {
715
+ btrfs_err(info, "unrecognized thread_pool value %s",
716
+ args[0].from);
623717 goto out;
624718 } else if (intarg == 0) {
719
+ btrfs_err(info, "invalid value 0 for thread_pool");
625720 ret = -EINVAL;
626721 goto out;
627722 }
....@@ -645,10 +740,6 @@
645740 goto out;
646741 }
647742 break;
648
- case Opt_alloc_start:
649
- btrfs_info(info,
650
- "option alloc_start is obsolete, ignored");
651
- break;
652743 case Opt_acl:
653744 #ifdef CONFIG_BTRFS_FS_POSIX_ACL
654745 info->sb->s_flags |= SB_POSIXACL;
....@@ -671,6 +762,8 @@
671762 break;
672763 case Opt_norecovery:
673764 case Opt_nologreplay:
765
+ btrfs_warn(info,
766
+ "'nologreplay' is deprecated, use 'rescue=nologreplay' instead");
674767 btrfs_set_and_info(info, NOLOGREPLAY,
675768 "disabling log replay at mount time");
676769 break;
....@@ -684,19 +777,38 @@
684777 break;
685778 case Opt_ratio:
686779 ret = match_int(&args[0], &intarg);
687
- if (ret)
780
+ if (ret) {
781
+ btrfs_err(info, "unrecognized metadata_ratio value %s",
782
+ args[0].from);
688783 goto out;
784
+ }
689785 info->metadata_ratio = intarg;
690786 btrfs_info(info, "metadata ratio %u",
691787 info->metadata_ratio);
692788 break;
693789 case Opt_discard:
694
- btrfs_set_and_info(info, DISCARD,
695
- "turning on discard");
790
+ case Opt_discard_mode:
791
+ if (token == Opt_discard ||
792
+ strcmp(args[0].from, "sync") == 0) {
793
+ btrfs_clear_opt(info->mount_opt, DISCARD_ASYNC);
794
+ btrfs_set_and_info(info, DISCARD_SYNC,
795
+ "turning on sync discard");
796
+ } else if (strcmp(args[0].from, "async") == 0) {
797
+ btrfs_clear_opt(info->mount_opt, DISCARD_SYNC);
798
+ btrfs_set_and_info(info, DISCARD_ASYNC,
799
+ "turning on async discard");
800
+ } else {
801
+ btrfs_err(info, "unrecognized discard mode value %s",
802
+ args[0].from);
803
+ ret = -EINVAL;
804
+ goto out;
805
+ }
696806 break;
697807 case Opt_nodiscard:
698
- btrfs_clear_and_info(info, DISCARD,
808
+ btrfs_clear_and_info(info, DISCARD_SYNC,
699809 "turning off discard");
810
+ btrfs_clear_and_info(info, DISCARD_ASYNC,
811
+ "turning off async discard");
700812 break;
701813 case Opt_space_cache:
702814 case Opt_space_cache_version:
....@@ -712,6 +824,8 @@
712824 btrfs_set_and_info(info, FREE_SPACE_TREE,
713825 "enabling free space tree");
714826 } else {
827
+ btrfs_err(info, "unrecognized space_cache value %s",
828
+ args[0].from);
715829 ret = -EINVAL;
716830 goto out;
717831 }
....@@ -730,6 +844,8 @@
730844 }
731845 break;
732846 case Opt_inode_cache:
847
+ btrfs_warn(info,
848
+ "the 'inode_cache' option is deprecated and will have no effect from 5.11");
733849 btrfs_set_pending_and_info(info, INODE_MAP_CACHE,
734850 "enabling inode map caching");
735851 break;
....@@ -759,10 +875,11 @@
759875 "disabling auto defrag");
760876 break;
761877 case Opt_recovery:
762
- btrfs_warn(info,
763
- "'recovery' is deprecated, use 'usebackuproot' instead");
764
- /* fall through */
765878 case Opt_usebackuproot:
879
+ btrfs_warn(info,
880
+ "'%s' is deprecated, use 'rescue=usebackuproot' instead",
881
+ token == Opt_recovery ? "recovery" :
882
+ "usebackuproot");
766883 btrfs_info(info,
767884 "trying to use backup root at mount time");
768885 btrfs_set_opt(info->mount_opt, USEBACKUPROOT);
....@@ -784,8 +901,12 @@
784901 break;
785902 case Opt_check_integrity_print_mask:
786903 ret = match_int(&args[0], &intarg);
787
- if (ret)
904
+ if (ret) {
905
+ btrfs_err(info,
906
+ "unrecognized check_integrity_print_mask value %s",
907
+ args[0].from);
788908 goto out;
909
+ }
789910 info->check_integrity_print_mask = intarg;
790911 btrfs_info(info, "check_integrity_print_mask 0x%x",
791912 info->check_integrity_print_mask);
....@@ -800,13 +921,15 @@
800921 goto out;
801922 #endif
802923 case Opt_fatal_errors:
803
- if (strcmp(args[0].from, "panic") == 0)
924
+ if (strcmp(args[0].from, "panic") == 0) {
804925 btrfs_set_opt(info->mount_opt,
805926 PANIC_ON_FATAL_ERROR);
806
- else if (strcmp(args[0].from, "bug") == 0)
927
+ } else if (strcmp(args[0].from, "bug") == 0) {
807928 btrfs_clear_opt(info->mount_opt,
808929 PANIC_ON_FATAL_ERROR);
809
- else {
930
+ } else {
931
+ btrfs_err(info, "unrecognized fatal_errors value %s",
932
+ args[0].from);
810933 ret = -EINVAL;
811934 goto out;
812935 }
....@@ -814,8 +937,12 @@
814937 case Opt_commit_interval:
815938 intarg = 0;
816939 ret = match_int(&args[0], &intarg);
817
- if (ret)
940
+ if (ret) {
941
+ btrfs_err(info, "unrecognized commit_interval value %s",
942
+ args[0].from);
943
+ ret = -EINVAL;
818944 goto out;
945
+ }
819946 if (intarg == 0) {
820947 btrfs_info(info,
821948 "using default commit interval %us",
....@@ -826,6 +953,14 @@
826953 intarg);
827954 }
828955 info->commit_interval = intarg;
956
+ break;
957
+ case Opt_rescue:
958
+ ret = parse_rescue_options(info, args[0].from);
959
+ if (ret < 0) {
960
+ btrfs_err(info, "unrecognized rescue value %s",
961
+ args[0].from);
962
+ goto out;
963
+ }
829964 break;
830965 #ifdef CONFIG_BTRFS_DEBUG
831966 case Opt_fragment_all:
....@@ -850,7 +985,7 @@
850985 break;
851986 #endif
852987 case Opt_err:
853
- btrfs_info(info, "unrecognized mount option '%s'", p);
988
+ btrfs_err(info, "unrecognized mount option '%s'", p);
854989 ret = -EINVAL;
855990 goto out;
856991 default:
....@@ -988,9 +1123,6 @@
9881123
9891124 *subvol_objectid = subvolid;
9901125 break;
991
- case Opt_subvolrootid:
992
- pr_warn("BTRFS: 'subvolrootid' mount option is deprecated and has no effect\n");
993
- break;
9941126 default:
9951127 break;
9961128 }
....@@ -1005,7 +1137,7 @@
10051137 u64 subvol_objectid)
10061138 {
10071139 struct btrfs_root *root = fs_info->tree_root;
1008
- struct btrfs_root *fs_root;
1140
+ struct btrfs_root *fs_root = NULL;
10091141 struct btrfs_root_ref *root_ref;
10101142 struct btrfs_inode_ref *inode_ref;
10111143 struct btrfs_key key;
....@@ -1070,12 +1202,10 @@
10701202 dirid = btrfs_root_ref_dirid(path->nodes[0], root_ref);
10711203 btrfs_release_path(path);
10721204
1073
- key.objectid = subvol_objectid;
1074
- key.type = BTRFS_ROOT_ITEM_KEY;
1075
- key.offset = (u64)-1;
1076
- fs_root = btrfs_read_fs_root_no_name(fs_info, &key);
1205
+ fs_root = btrfs_get_fs_root(fs_info, subvol_objectid, true);
10771206 if (IS_ERR(fs_root)) {
10781207 ret = PTR_ERR(fs_root);
1208
+ fs_root = NULL;
10791209 goto err;
10801210 }
10811211
....@@ -1120,6 +1250,8 @@
11201250 ptr[0] = '/';
11211251 btrfs_release_path(path);
11221252 }
1253
+ btrfs_put_root(fs_root);
1254
+ fs_root = NULL;
11231255 }
11241256
11251257 btrfs_free_path(path);
....@@ -1132,6 +1264,7 @@
11321264 return name;
11331265
11341266 err:
1267
+ btrfs_put_root(fs_root);
11351268 btrfs_free_path(path);
11361269 kfree(name);
11371270 return ERR_PTR(ret);
....@@ -1184,7 +1317,6 @@
11841317 {
11851318 struct inode *inode;
11861319 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
1187
- struct btrfs_key key;
11881320 int err;
11891321
11901322 sb->s_maxbytes = MAX_LFS_FILESIZE;
....@@ -1212,10 +1344,7 @@
12121344 return err;
12131345 }
12141346
1215
- key.objectid = BTRFS_FIRST_FREE_OBJECTID;
1216
- key.type = BTRFS_INODE_ITEM_KEY;
1217
- key.offset = 0;
1218
- inode = btrfs_iget(sb, &key, fs_info->fs_root, NULL);
1347
+ inode = btrfs_iget(sb, BTRFS_FIRST_FREE_OBJECTID, fs_info->fs_root);
12191348 if (IS_ERR(inode)) {
12201349 err = PTR_ERR(inode);
12211350 goto fail_close;
....@@ -1316,11 +1445,13 @@
13161445 if (btrfs_test_opt(info, NOTREELOG))
13171446 seq_puts(seq, ",notreelog");
13181447 if (btrfs_test_opt(info, NOLOGREPLAY))
1319
- seq_puts(seq, ",nologreplay");
1448
+ seq_puts(seq, ",rescue=nologreplay");
13201449 if (btrfs_test_opt(info, FLUSHONCOMMIT))
13211450 seq_puts(seq, ",flushoncommit");
1322
- if (btrfs_test_opt(info, DISCARD))
1451
+ if (btrfs_test_opt(info, DISCARD_SYNC))
13231452 seq_puts(seq, ",discard");
1453
+ if (btrfs_test_opt(info, DISCARD_ASYNC))
1454
+ seq_puts(seq, ",discard=async");
13241455 if (!(info->sb->s_flags & SB_POSIXACL))
13251456 seq_puts(seq, ",noacl");
13261457 if (btrfs_test_opt(info, SPACE_CACHE))
....@@ -1405,7 +1536,7 @@
14051536 }
14061537
14071538 static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
1408
- const char *device_name, struct vfsmount *mnt)
1539
+ struct vfsmount *mnt)
14091540 {
14101541 struct dentry *root;
14111542 int ret;
....@@ -1469,56 +1600,6 @@
14691600 return root;
14701601 }
14711602
1472
-static int parse_security_options(char *orig_opts,
1473
- struct security_mnt_opts *sec_opts)
1474
-{
1475
- char *secdata = NULL;
1476
- int ret = 0;
1477
-
1478
- secdata = alloc_secdata();
1479
- if (!secdata)
1480
- return -ENOMEM;
1481
- ret = security_sb_copy_data(orig_opts, secdata);
1482
- if (ret) {
1483
- free_secdata(secdata);
1484
- return ret;
1485
- }
1486
- ret = security_sb_parse_opts_str(secdata, sec_opts);
1487
- free_secdata(secdata);
1488
- return ret;
1489
-}
1490
-
1491
-static int setup_security_options(struct btrfs_fs_info *fs_info,
1492
- struct super_block *sb,
1493
- struct security_mnt_opts *sec_opts)
1494
-{
1495
- int ret = 0;
1496
-
1497
- /*
1498
- * Call security_sb_set_mnt_opts() to check whether new sec_opts
1499
- * is valid.
1500
- */
1501
- ret = security_sb_set_mnt_opts(sb, sec_opts, 0, NULL);
1502
- if (ret)
1503
- return ret;
1504
-
1505
-#ifdef CONFIG_SECURITY
1506
- if (!fs_info->security_opts.num_mnt_opts) {
1507
- /* first time security setup, copy sec_opts to fs_info */
1508
- memcpy(&fs_info->security_opts, sec_opts, sizeof(*sec_opts));
1509
- } else {
1510
- /*
1511
- * Since SELinux (the only one supporting security_mnt_opts)
1512
- * does NOT support changing context during remount/mount of
1513
- * the same sb, this must be the same or part of the same
1514
- * security options, just free it.
1515
- */
1516
- security_free_mnt_opts(sec_opts);
1517
- }
1518
-#endif
1519
- return ret;
1520
-}
1521
-
15221603 /*
15231604 * Find a superblock for the given device / mount point.
15241605 *
....@@ -1533,16 +1614,15 @@
15331614 struct btrfs_device *device = NULL;
15341615 struct btrfs_fs_devices *fs_devices = NULL;
15351616 struct btrfs_fs_info *fs_info = NULL;
1536
- struct security_mnt_opts new_sec_opts;
1617
+ void *new_sec_opts = NULL;
15371618 fmode_t mode = FMODE_READ;
15381619 int error = 0;
15391620
15401621 if (!(flags & SB_RDONLY))
15411622 mode |= FMODE_WRITE;
15421623
1543
- security_init_mnt_opts(&new_sec_opts);
15441624 if (data) {
1545
- error = parse_security_options(data, &new_sec_opts);
1625
+ error = security_sb_eat_lsm_opts(data, &new_sec_opts);
15461626 if (error)
15471627 return ERR_PTR(error);
15481628 }
....@@ -1550,18 +1630,20 @@
15501630 /*
15511631 * Setup a dummy root and fs_info for test/set super. This is because
15521632 * we don't actually fill this stuff out until open_ctree, but we need
1553
- * it for searching for existing supers, so this lets us do that and
1554
- * then open_ctree will properly initialize everything later.
1633
+ * then open_ctree will properly initialize the file system specific
1634
+ * settings later. btrfs_init_fs_info initializes the static elements
1635
+ * of the fs_info (locks and such) to make cleanup easier if we find a
1636
+ * superblock with our given fs_devices later on at sget() time.
15551637 */
15561638 fs_info = kvzalloc(sizeof(struct btrfs_fs_info), GFP_KERNEL);
15571639 if (!fs_info) {
15581640 error = -ENOMEM;
15591641 goto error_sec_opts;
15601642 }
1643
+ btrfs_init_fs_info(fs_info);
15611644
15621645 fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
15631646 fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
1564
- security_init_mnt_opts(&fs_info->security_opts);
15651647 if (!fs_info->super_copy || !fs_info->super_for_commit) {
15661648 error = -ENOMEM;
15671649 goto error_fs_info;
....@@ -1604,7 +1686,7 @@
16041686
16051687 if (s->s_root) {
16061688 btrfs_close_devices(fs_devices);
1607
- free_fs_info(fs_info);
1689
+ btrfs_free_fs_info(fs_info);
16081690 if ((flags ^ s->s_flags) & SB_RDONLY)
16091691 error = -EBUSY;
16101692 } else {
....@@ -1612,16 +1694,12 @@
16121694 btrfs_sb(s)->bdev_holder = fs_type;
16131695 error = btrfs_fill_super(s, fs_devices, data);
16141696 }
1697
+ if (!error)
1698
+ error = security_sb_set_mnt_opts(s, new_sec_opts, 0, NULL);
1699
+ security_free_mnt_opts(&new_sec_opts);
16151700 if (error) {
16161701 deactivate_locked_super(s);
1617
- goto error_sec_opts;
1618
- }
1619
-
1620
- fs_info = btrfs_sb(s);
1621
- error = setup_security_options(fs_info, s, &new_sec_opts);
1622
- if (error) {
1623
- deactivate_locked_super(s);
1624
- goto error_sec_opts;
1702
+ return ERR_PTR(error);
16251703 }
16261704
16271705 return dget(s->s_root);
....@@ -1629,7 +1707,7 @@
16291707 error_close_devices:
16301708 btrfs_close_devices(fs_devices);
16311709 error_fs_info:
1632
- free_fs_info(fs_info);
1710
+ btrfs_free_fs_info(fs_info);
16331711 error_sec_opts:
16341712 security_free_mnt_opts(&new_sec_opts);
16351713 return ERR_PTR(error);
....@@ -1662,13 +1740,9 @@
16621740 {
16631741 struct vfsmount *mnt_root;
16641742 struct dentry *root;
1665
- fmode_t mode = FMODE_READ;
16661743 char *subvol_name = NULL;
16671744 u64 subvol_objectid = 0;
16681745 int error = 0;
1669
-
1670
- if (!(flags & SB_RDONLY))
1671
- mode |= FMODE_WRITE;
16721746
16731747 error = btrfs_parse_subvol_options(data, &subvol_name,
16741748 &subvol_objectid);
....@@ -1710,7 +1784,7 @@
17101784 }
17111785
17121786 /* mount_subvol() will free subvol_name and mnt_root */
1713
- root = mount_subvol(subvol_name, subvol_objectid, device_name, mnt_root);
1787
+ root = mount_subvol(subvol_name, subvol_objectid, mnt_root);
17141788
17151789 out:
17161790 return root;
....@@ -1729,7 +1803,6 @@
17291803
17301804 btrfs_workqueue_set_max(fs_info->workers, new_pool_size);
17311805 btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size);
1732
- btrfs_workqueue_set_max(fs_info->submit_workers, new_pool_size);
17331806 btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size);
17341807 btrfs_workqueue_set_max(fs_info->endio_workers, new_pool_size);
17351808 btrfs_workqueue_set_max(fs_info->endio_meta_workers, new_pool_size);
....@@ -1741,11 +1814,6 @@
17411814 btrfs_workqueue_set_max(fs_info->readahead_workers, new_pool_size);
17421815 btrfs_workqueue_set_max(fs_info->scrub_wr_completion_workers,
17431816 new_pool_size);
1744
-}
1745
-
1746
-static inline void btrfs_remount_prepare(struct btrfs_fs_info *fs_info)
1747
-{
1748
- set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
17491817 }
17501818
17511819 static inline void btrfs_remount_begin(struct btrfs_fs_info *fs_info,
....@@ -1774,7 +1842,13 @@
17741842 btrfs_cleanup_defrag_inodes(fs_info);
17751843 }
17761844
1777
- clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
1845
+ /* If we toggled discard async */
1846
+ if (!btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) &&
1847
+ btrfs_test_opt(fs_info, DISCARD_ASYNC))
1848
+ btrfs_discard_resume(fs_info);
1849
+ else if (btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) &&
1850
+ !btrfs_test_opt(fs_info, DISCARD_ASYNC))
1851
+ btrfs_discard_cleanup(fs_info);
17781852 }
17791853
17801854 static int btrfs_remount(struct super_block *sb, int *flags, char *data)
....@@ -1790,21 +1864,17 @@
17901864 int ret;
17911865
17921866 sync_filesystem(sb);
1793
- btrfs_remount_prepare(fs_info);
1867
+ set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
17941868
17951869 if (data) {
1796
- struct security_mnt_opts new_sec_opts;
1870
+ void *new_sec_opts = NULL;
17971871
1798
- security_init_mnt_opts(&new_sec_opts);
1799
- ret = parse_security_options(data, &new_sec_opts);
1872
+ ret = security_sb_eat_lsm_opts(data, &new_sec_opts);
1873
+ if (!ret)
1874
+ ret = security_sb_remount(sb, new_sec_opts);
1875
+ security_free_mnt_opts(&new_sec_opts);
18001876 if (ret)
18011877 goto restore;
1802
- ret = setup_security_options(fs_info, sb,
1803
- &new_sec_opts);
1804
- if (ret) {
1805
- security_free_mnt_opts(&new_sec_opts);
1806
- goto restore;
1807
- }
18081878 }
18091879
18101880 ret = btrfs_parse_options(fs_info, data, *flags);
....@@ -1824,6 +1894,9 @@
18241894 * the filesystem is busy.
18251895 */
18261896 cancel_work_sync(&fs_info->async_reclaim_work);
1897
+ cancel_work_sync(&fs_info->async_data_reclaim_work);
1898
+
1899
+ btrfs_discard_cleanup(fs_info);
18271900
18281901 /* wait for the uuid_scan task to finish */
18291902 down(&fs_info->uuid_tree_rescan_sem);
....@@ -1870,7 +1943,7 @@
18701943
18711944 if (!btrfs_check_rw_degradable(fs_info, NULL)) {
18721945 btrfs_warn(fs_info,
1873
- "too many missing devices, writeable remount is not allowed");
1946
+ "too many missing devices, writable remount is not allowed");
18741947 ret = -EACCES;
18751948 goto restore;
18761949 }
....@@ -1920,8 +1993,16 @@
19201993 set_bit(BTRFS_FS_OPEN, &fs_info->flags);
19211994 }
19221995 out:
1996
+ /*
1997
+ * We need to set SB_I_VERSION here otherwise it'll get cleared by VFS,
1998
+ * since the absence of the flag means it can be toggled off by remount.
1999
+ */
2000
+ *flags |= SB_I_VERSION;
2001
+
19232002 wake_up_process(fs_info->transaction_kthread);
19242003 btrfs_remount_cleanup(fs_info, old_opts);
2004
+ clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
2005
+
19252006 return 0;
19262007
19272008 restore:
....@@ -1936,6 +2017,8 @@
19362017 old_thread_pool_size, fs_info->thread_pool_size);
19372018 fs_info->metadata_ratio = old_metadata_ratio;
19382019 btrfs_remount_cleanup(fs_info, old_opts);
2020
+ clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
2021
+
19392022 return ret;
19402023 }
19412024
....@@ -1975,12 +2058,12 @@
19752058 struct btrfs_device_info *devices_info;
19762059 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
19772060 struct btrfs_device *device;
1978
- u64 skip_space;
19792061 u64 type;
19802062 u64 avail_space;
19812063 u64 min_stripe_size;
1982
- int min_stripes = 1, num_stripes = 1;
2064
+ int num_stripes = 1;
19832065 int i = 0, nr_devices;
2066
+ const struct btrfs_raid_attr *rattr;
19842067
19852068 /*
19862069 * We aren't under the device list lock, so this is racy-ish, but good
....@@ -2004,21 +2087,21 @@
20042087
20052088 /* calc min stripe number for data space allocation */
20062089 type = btrfs_data_alloc_profile(fs_info);
2007
- if (type & BTRFS_BLOCK_GROUP_RAID0) {
2008
- min_stripes = 2;
2009
- num_stripes = nr_devices;
2010
- } else if (type & BTRFS_BLOCK_GROUP_RAID1) {
2011
- min_stripes = 2;
2012
- num_stripes = 2;
2013
- } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
2014
- min_stripes = 4;
2015
- num_stripes = 4;
2016
- }
2090
+ rattr = &btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)];
20172091
2018
- if (type & BTRFS_BLOCK_GROUP_DUP)
2019
- min_stripe_size = 2 * BTRFS_STRIPE_LEN;
2020
- else
2021
- min_stripe_size = BTRFS_STRIPE_LEN;
2092
+ if (type & BTRFS_BLOCK_GROUP_RAID0)
2093
+ num_stripes = nr_devices;
2094
+ else if (type & BTRFS_BLOCK_GROUP_RAID1)
2095
+ num_stripes = 2;
2096
+ else if (type & BTRFS_BLOCK_GROUP_RAID1C3)
2097
+ num_stripes = 3;
2098
+ else if (type & BTRFS_BLOCK_GROUP_RAID1C4)
2099
+ num_stripes = 4;
2100
+ else if (type & BTRFS_BLOCK_GROUP_RAID10)
2101
+ num_stripes = 4;
2102
+
2103
+ /* Adjust for more than 1 stripe per device */
2104
+ min_stripe_size = rattr->dev_stripes * BTRFS_STRIPE_LEN;
20222105
20232106 rcu_read_lock();
20242107 list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
....@@ -2034,27 +2117,20 @@
20342117 avail_space = device->total_bytes - device->bytes_used;
20352118
20362119 /* align with stripe_len */
2037
- avail_space = div_u64(avail_space, BTRFS_STRIPE_LEN);
2038
- avail_space *= BTRFS_STRIPE_LEN;
2120
+ avail_space = rounddown(avail_space, BTRFS_STRIPE_LEN);
20392121
20402122 /*
20412123 * In order to avoid overwriting the superblock on the drive,
20422124 * btrfs starts at an offset of at least 1MB when doing chunk
20432125 * allocation.
2126
+ *
2127
+ * This ensures we have at least min_stripe_size free space
2128
+ * after excluding 1MB.
20442129 */
2045
- skip_space = SZ_1M;
2046
-
2047
- /*
2048
- * we can use the free space in [0, skip_space - 1], subtract
2049
- * it from the total.
2050
- */
2051
- if (avail_space && avail_space >= skip_space)
2052
- avail_space -= skip_space;
2053
- else
2054
- avail_space = 0;
2055
-
2056
- if (avail_space < min_stripe_size)
2130
+ if (avail_space <= SZ_1M + min_stripe_size)
20572131 continue;
2132
+
2133
+ avail_space -= SZ_1M;
20582134
20592135 devices_info[i].dev = device;
20602136 devices_info[i].max_avail = avail_space;
....@@ -2069,9 +2145,8 @@
20692145
20702146 i = nr_devices - 1;
20712147 avail_space = 0;
2072
- while (nr_devices >= min_stripes) {
2073
- if (num_stripes > nr_devices)
2074
- num_stripes = nr_devices;
2148
+ while (nr_devices >= rattr->devs_min) {
2149
+ num_stripes = min(num_stripes, nr_devices);
20752150
20762151 if (devices_info[i].max_avail >= min_stripe_size) {
20772152 int j;
....@@ -2108,21 +2183,19 @@
21082183 {
21092184 struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
21102185 struct btrfs_super_block *disk_super = fs_info->super_copy;
2111
- struct list_head *head = &fs_info->space_info;
21122186 struct btrfs_space_info *found;
21132187 u64 total_used = 0;
21142188 u64 total_free_data = 0;
21152189 u64 total_free_meta = 0;
21162190 int bits = dentry->d_sb->s_blocksize_bits;
2117
- __be32 *fsid = (__be32 *)fs_info->fsid;
2191
+ __be32 *fsid = (__be32 *)fs_info->fs_devices->fsid;
21182192 unsigned factor = 1;
21192193 struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
21202194 int ret;
21212195 u64 thresh = 0;
21222196 int mixed = 0;
21232197
2124
- rcu_read_lock();
2125
- list_for_each_entry_rcu(found, head, list) {
2198
+ list_for_each_entry(found, &fs_info->space_info, list) {
21262199 if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
21272200 int i;
21282201
....@@ -2150,8 +2223,6 @@
21502223
21512224 total_used += found->disk_used;
21522225 }
2153
-
2154
- rcu_read_unlock();
21552226
21562227 buf->f_blocks = div_u64(btrfs_super_total_bytes(disk_super), factor);
21572228 buf->f_blocks >>= bits;
....@@ -2196,7 +2267,7 @@
21962267 * calculated f_bavail.
21972268 */
21982269 if (!mixed && block_rsv->space_info->full &&
2199
- total_free_meta - thresh < block_rsv->size)
2270
+ (total_free_meta < thresh || total_free_meta - thresh < block_rsv->size))
22002271 buf->f_bavail = 0;
22012272
22022273 buf->f_type = BTRFS_SUPER_MAGIC;
....@@ -2209,8 +2280,10 @@
22092280 buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]);
22102281 buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]);
22112282 /* Mask in the root object ID too, to disambiguate subvols */
2212
- buf->f_fsid.val[0] ^= BTRFS_I(d_inode(dentry))->root->objectid >> 32;
2213
- buf->f_fsid.val[1] ^= BTRFS_I(d_inode(dentry))->root->objectid;
2283
+ buf->f_fsid.val[0] ^=
2284
+ BTRFS_I(d_inode(dentry))->root->root_key.objectid >> 32;
2285
+ buf->f_fsid.val[1] ^=
2286
+ BTRFS_I(d_inode(dentry))->root->root_key.objectid;
22142287
22152288 return 0;
22162289 }
....@@ -2219,7 +2292,7 @@
22192292 {
22202293 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
22212294 kill_anon_super(sb);
2222
- free_fs_info(fs_info);
2295
+ btrfs_free_fs_info(fs_info);
22232296 }
22242297
22252298 static struct file_system_type btrfs_fs_type = {
....@@ -2252,7 +2325,7 @@
22522325 }
22532326
22542327 /*
2255
- * used by btrfsctl to scan devices when no FS is mounted
2328
+ * Used by /dev/btrfs-control for devices ioctls.
22562329 */
22572330 static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
22582331 unsigned long arg)
....@@ -2276,6 +2349,9 @@
22762349 &btrfs_root_fs_type);
22772350 ret = PTR_ERR_OR_ZERO(device);
22782351 mutex_unlock(&uuid_mutex);
2352
+ break;
2353
+ case BTRFS_IOC_FORGET_DEV:
2354
+ ret = btrfs_forget_devices(vol->name);
22792355 break;
22802356 case BTRFS_IOC_DEVICES_READY:
22812357 mutex_lock(&uuid_mutex);
....@@ -2340,7 +2416,7 @@
23402416 * device_list_mutex here as we only read the device data and the list
23412417 * is protected by RCU. Even if a device is deleted during the list
23422418 * traversals, we'll get valid data, the freeing callback will wait at
2343
- * least until until the rcu_read_unlock.
2419
+ * least until the rcu_read_unlock.
23442420 */
23452421 rcu_read_lock();
23462422 list_for_each_entry_rcu(dev, &fs_info->fs_devices->devices, dev_list) {
....@@ -2369,6 +2445,7 @@
23692445 .show_devname = btrfs_show_devname,
23702446 .alloc_inode = btrfs_alloc_inode,
23712447 .destroy_inode = btrfs_destroy_inode,
2448
+ .free_inode = btrfs_free_inode,
23722449 .statfs = btrfs_statfs,
23732450 .remount_fs = btrfs_remount,
23742451 .freeze_fs = btrfs_freeze,
....@@ -2378,7 +2455,7 @@
23782455 static const struct file_operations btrfs_ctl_fops = {
23792456 .open = btrfs_control_open,
23802457 .unlocked_ioctl = btrfs_control_ioctl,
2381
- .compat_ioctl = btrfs_control_ioctl,
2458
+ .compat_ioctl = compat_ptr_ioctl,
23822459 .owner = THIS_MODULE,
23832460 .llseek = noop_llseek,
23842461 };
....@@ -2441,9 +2518,13 @@
24412518 if (err)
24422519 goto free_cachep;
24432520
2444
- err = extent_map_init();
2521
+ err = extent_state_cache_init();
24452522 if (err)
24462523 goto free_extent_io;
2524
+
2525
+ err = extent_map_init();
2526
+ if (err)
2527
+ goto free_extent_state_cache;
24472528
24482529 err = ordered_data_init();
24492530 if (err)
....@@ -2503,6 +2584,8 @@
25032584 ordered_data_exit();
25042585 free_extent_map:
25052586 extent_map_exit();
2587
+free_extent_state_cache:
2588
+ extent_state_cache_exit();
25062589 free_extent_io:
25072590 extent_io_exit();
25082591 free_cachep:
....@@ -2523,6 +2606,7 @@
25232606 btrfs_prelim_ref_exit();
25242607 ordered_data_exit();
25252608 extent_map_exit();
2609
+ extent_state_cache_exit();
25262610 extent_io_exit();
25272611 btrfs_interface_exit();
25282612 btrfs_end_io_wq_exit();
....@@ -2536,3 +2620,8 @@
25362620 module_exit(exit_btrfs_fs)
25372621
25382622 MODULE_LICENSE("GPL");
2623
+MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY);
2624
+MODULE_SOFTDEP("pre: crc32c");
2625
+MODULE_SOFTDEP("pre: xxhash64");
2626
+MODULE_SOFTDEP("pre: sha256");
2627
+MODULE_SOFTDEP("pre: blake2b-256");