hc
2024-02-19 1c055e55a242a33e574e48be530e06770a210dcd
kernel/fs/btrfs/ioctl.c
....@@ -28,6 +28,7 @@
2828 #include <linux/iversion.h>
2929 #include "ctree.h"
3030 #include "disk-io.h"
31
+#include "export.h"
3132 #include "transaction.h"
3233 #include "btrfs_inode.h"
3334 #include "print-tree.h"
....@@ -43,6 +44,9 @@
4344 #include "qgroup.h"
4445 #include "tree-log.h"
4546 #include "compression.h"
47
+#include "space-info.h"
48
+#include "delalloc-space.h"
49
+#include "block-group.h"
4650
4751 #ifdef CONFIG_64BIT
4852 /* If we have a 32-bit userspace and 64-bit kernel, then the UAPI
....@@ -82,10 +86,6 @@
8286 #define BTRFS_IOC_SEND_32 _IOW(BTRFS_IOCTL_MAGIC, 38, \
8387 struct btrfs_ioctl_send_args_32)
8488 #endif
85
-
86
-static int btrfs_clone(struct inode *src, struct inode *inode,
87
- u64 off, u64 olen, u64 olen_aligned, u64 destoff,
88
- int no_time_update);
8989
9090 /* Mask out flags that are inappropriate for the given type of inode. */
9191 static unsigned int btrfs_mask_fsflags_for_type(struct inode *inode,
....@@ -164,8 +164,11 @@
164164 return 0;
165165 }
166166
167
-/* Check if @flags are a supported and valid set of FS_*_FL flags */
168
-static int check_fsflags(unsigned int flags)
167
+/*
168
+ * Check if @flags are a supported and valid set of FS_*_FL flags and that
169
+ * the old and new flags are not conflicting
170
+ */
171
+static int check_fsflags(unsigned int old_flags, unsigned int flags)
169172 {
170173 if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
171174 FS_NOATIME_FL | FS_NODUMP_FL | \
....@@ -174,7 +177,17 @@
174177 FS_NOCOW_FL))
175178 return -EOPNOTSUPP;
176179
180
+ /* COMPR and NOCOMP on new/old are valid */
177181 if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL))
182
+ return -EINVAL;
183
+
184
+ if ((flags & FS_COMPR_FL) && (flags & FS_NOCOW_FL))
185
+ return -EINVAL;
186
+
187
+ /* NOCOW and compression options are mutually exclusive */
188
+ if ((old_flags & FS_NOCOW_FL) && (flags & (FS_COMPR_FL | FS_NOCOMP_FL)))
189
+ return -EINVAL;
190
+ if ((flags & FS_NOCOW_FL) && (old_flags & (FS_COMPR_FL | FS_NOCOMP_FL)))
178191 return -EINVAL;
179192
180193 return 0;
....@@ -189,9 +202,8 @@
189202 struct btrfs_trans_handle *trans;
190203 unsigned int fsflags, old_fsflags;
191204 int ret;
192
- u64 old_flags;
193
- unsigned int old_i_flags;
194
- umode_t mode;
205
+ const char *comp = NULL;
206
+ u32 binode_flags;
195207
196208 if (!inode_owner_or_capable(inode))
197209 return -EPERM;
....@@ -202,76 +214,70 @@
202214 if (copy_from_user(&fsflags, arg, sizeof(fsflags)))
203215 return -EFAULT;
204216
205
- ret = check_fsflags(fsflags);
206
- if (ret)
207
- return ret;
208
-
209217 ret = mnt_want_write_file(file);
210218 if (ret)
211219 return ret;
212220
213221 inode_lock(inode);
214
-
215
- old_flags = binode->flags;
216
- old_i_flags = inode->i_flags;
217
- mode = inode->i_mode;
218
-
219222 fsflags = btrfs_mask_fsflags_for_type(inode, fsflags);
220223 old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags);
221
- if ((fsflags ^ old_fsflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
222
- if (!capable(CAP_LINUX_IMMUTABLE)) {
223
- ret = -EPERM;
224
- goto out_unlock;
225
- }
226
- }
227224
225
+ ret = vfs_ioc_setflags_prepare(inode, old_fsflags, fsflags);
226
+ if (ret)
227
+ goto out_unlock;
228
+
229
+ ret = check_fsflags(old_fsflags, fsflags);
230
+ if (ret)
231
+ goto out_unlock;
232
+
233
+ binode_flags = binode->flags;
228234 if (fsflags & FS_SYNC_FL)
229
- binode->flags |= BTRFS_INODE_SYNC;
235
+ binode_flags |= BTRFS_INODE_SYNC;
230236 else
231
- binode->flags &= ~BTRFS_INODE_SYNC;
237
+ binode_flags &= ~BTRFS_INODE_SYNC;
232238 if (fsflags & FS_IMMUTABLE_FL)
233
- binode->flags |= BTRFS_INODE_IMMUTABLE;
239
+ binode_flags |= BTRFS_INODE_IMMUTABLE;
234240 else
235
- binode->flags &= ~BTRFS_INODE_IMMUTABLE;
241
+ binode_flags &= ~BTRFS_INODE_IMMUTABLE;
236242 if (fsflags & FS_APPEND_FL)
237
- binode->flags |= BTRFS_INODE_APPEND;
243
+ binode_flags |= BTRFS_INODE_APPEND;
238244 else
239
- binode->flags &= ~BTRFS_INODE_APPEND;
245
+ binode_flags &= ~BTRFS_INODE_APPEND;
240246 if (fsflags & FS_NODUMP_FL)
241
- binode->flags |= BTRFS_INODE_NODUMP;
247
+ binode_flags |= BTRFS_INODE_NODUMP;
242248 else
243
- binode->flags &= ~BTRFS_INODE_NODUMP;
249
+ binode_flags &= ~BTRFS_INODE_NODUMP;
244250 if (fsflags & FS_NOATIME_FL)
245
- binode->flags |= BTRFS_INODE_NOATIME;
251
+ binode_flags |= BTRFS_INODE_NOATIME;
246252 else
247
- binode->flags &= ~BTRFS_INODE_NOATIME;
253
+ binode_flags &= ~BTRFS_INODE_NOATIME;
248254 if (fsflags & FS_DIRSYNC_FL)
249
- binode->flags |= BTRFS_INODE_DIRSYNC;
255
+ binode_flags |= BTRFS_INODE_DIRSYNC;
250256 else
251
- binode->flags &= ~BTRFS_INODE_DIRSYNC;
257
+ binode_flags &= ~BTRFS_INODE_DIRSYNC;
252258 if (fsflags & FS_NOCOW_FL) {
253
- if (S_ISREG(mode)) {
259
+ if (S_ISREG(inode->i_mode)) {
254260 /*
255261 * It's safe to turn csums off here, no extents exist.
256262 * Otherwise we want the flag to reflect the real COW
257263 * status of the file and will not set it.
258264 */
259265 if (inode->i_size == 0)
260
- binode->flags |= BTRFS_INODE_NODATACOW
261
- | BTRFS_INODE_NODATASUM;
266
+ binode_flags |= BTRFS_INODE_NODATACOW |
267
+ BTRFS_INODE_NODATASUM;
262268 } else {
263
- binode->flags |= BTRFS_INODE_NODATACOW;
269
+ binode_flags |= BTRFS_INODE_NODATACOW;
264270 }
265271 } else {
266272 /*
267273 * Revert back under same assumptions as above
268274 */
269
- if (S_ISREG(mode)) {
275
+ if (S_ISREG(inode->i_mode)) {
270276 if (inode->i_size == 0)
271
- binode->flags &= ~(BTRFS_INODE_NODATACOW
272
- | BTRFS_INODE_NODATASUM);
277
+ binode_flags &= ~(BTRFS_INODE_NODATACOW |
278
+ BTRFS_INODE_NODATASUM);
273279 } else {
274
- binode->flags &= ~BTRFS_INODE_NODATACOW;
280
+ binode_flags &= ~BTRFS_INODE_NODATACOW;
275281 }
276282 }
277283
....@@ -281,52 +287,59 @@
281287 * things smaller.
282288 */
283289 if (fsflags & FS_NOCOMP_FL) {
284
- binode->flags &= ~BTRFS_INODE_COMPRESS;
285
- binode->flags |= BTRFS_INODE_NOCOMPRESS;
286
-
287
- ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0);
288
- if (ret && ret != -ENODATA)
289
- goto out_drop;
290
+ binode_flags &= ~BTRFS_INODE_COMPRESS;
291
+ binode_flags |= BTRFS_INODE_NOCOMPRESS;
290292 } else if (fsflags & FS_COMPR_FL) {
291
- const char *comp;
292293
293
- binode->flags |= BTRFS_INODE_COMPRESS;
294
- binode->flags &= ~BTRFS_INODE_NOCOMPRESS;
294
+ if (IS_SWAPFILE(inode)) {
295
+ ret = -ETXTBSY;
296
+ goto out_unlock;
297
+ }
298
+
299
+ binode_flags |= BTRFS_INODE_COMPRESS;
300
+ binode_flags &= ~BTRFS_INODE_NOCOMPRESS;
295301
296302 comp = btrfs_compress_type2str(fs_info->compress_type);
297303 if (!comp || comp[0] == 0)
298304 comp = btrfs_compress_type2str(BTRFS_COMPRESS_ZLIB);
299
-
300
- ret = btrfs_set_prop(inode, "btrfs.compression",
301
- comp, strlen(comp), 0);
302
- if (ret)
303
- goto out_drop;
304
-
305305 } else {
306
- ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0);
307
- if (ret && ret != -ENODATA)
308
- goto out_drop;
309
- binode->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
306
+ binode_flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
310307 }
311308
312
- trans = btrfs_start_transaction(root, 1);
309
+ /*
310
+ * 1 for inode item
311
+ * 2 for properties
312
+ */
313
+ trans = btrfs_start_transaction(root, 3);
313314 if (IS_ERR(trans)) {
314315 ret = PTR_ERR(trans);
315
- goto out_drop;
316
+ goto out_unlock;
316317 }
317318
319
+ if (comp) {
320
+ ret = btrfs_set_prop(trans, inode, "btrfs.compression", comp,
321
+ strlen(comp), 0);
322
+ if (ret) {
323
+ btrfs_abort_transaction(trans, ret);
324
+ goto out_end_trans;
325
+ }
326
+ } else {
327
+ ret = btrfs_set_prop(trans, inode, "btrfs.compression", NULL,
328
+ 0, 0);
329
+ if (ret && ret != -ENODATA) {
330
+ btrfs_abort_transaction(trans, ret);
331
+ goto out_end_trans;
332
+ }
333
+ }
334
+
335
+ binode->flags = binode_flags;
318336 btrfs_sync_inode_flags_to_i_flags(inode);
319337 inode_inc_iversion(inode);
320338 inode->i_ctime = current_time(inode);
321339 ret = btrfs_update_inode(trans, root, inode);
322340
341
+ out_end_trans:
323342 btrfs_end_transaction(trans);
324
- out_drop:
325
- if (ret) {
326
- binode->flags = old_flags;
327
- inode->i_flags = old_i_flags;
328
- }
329
-
330343 out_unlock:
331344 inode_unlock(inode);
332345 mnt_drop_write_file(file);
....@@ -365,6 +378,18 @@
365378 return 0;
366379 }
367380
381
+bool btrfs_exclop_start(struct btrfs_fs_info *fs_info,
382
+ enum btrfs_exclusive_operation type)
383
+{
384
+ return !cmpxchg(&fs_info->exclusive_operation, BTRFS_EXCLOP_NONE, type);
385
+}
386
+
387
+void btrfs_exclop_finish(struct btrfs_fs_info *fs_info)
388
+{
389
+ WRITE_ONCE(fs_info->exclusive_operation, BTRFS_EXCLOP_NONE);
390
+ sysfs_notify(&fs_info->fs_devices->fsid_kobj, NULL, "exclusive_operation");
391
+}
392
+
368393 /*
369394 * Set the xflags from the internal inode flags. The remaining items of fsxattr
370395 * are zeroed.
....@@ -374,9 +399,7 @@
374399 struct btrfs_inode *binode = BTRFS_I(file_inode(file));
375400 struct fsxattr fa;
376401
377
- memset(&fa, 0, sizeof(fa));
378
- fa.fsx_xflags = btrfs_inode_flags_to_xflags(binode->flags);
379
-
402
+ simple_fill_fsxattr(&fa, btrfs_inode_flags_to_xflags(binode->flags));
380403 if (copy_to_user(arg, &fa, sizeof(fa)))
381404 return -EFAULT;
382405
....@@ -389,7 +412,7 @@
389412 struct btrfs_inode *binode = BTRFS_I(inode);
390413 struct btrfs_root *root = binode->root;
391414 struct btrfs_trans_handle *trans;
392
- struct fsxattr fa;
415
+ struct fsxattr fa, old_fa;
393416 unsigned old_flags;
394417 unsigned old_i_flags;
395418 int ret = 0;
....@@ -400,7 +423,6 @@
400423 if (btrfs_root_readonly(root))
401424 return -EROFS;
402425
403
- memset(&fa, 0, sizeof(fa));
404426 if (copy_from_user(&fa, arg, sizeof(fa)))
405427 return -EFAULT;
406428
....@@ -420,13 +442,11 @@
420442 old_flags = binode->flags;
421443 old_i_flags = inode->i_flags;
422444
423
- /* We need the capabilities to change append-only or immutable inode */
424
- if (((old_flags & (BTRFS_INODE_APPEND | BTRFS_INODE_IMMUTABLE)) ||
425
- (fa.fsx_xflags & (FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE))) &&
426
- !capable(CAP_LINUX_IMMUTABLE)) {
427
- ret = -EPERM;
445
+ simple_fill_fsxattr(&old_fa,
446
+ btrfs_inode_flags_to_xflags(binode->flags));
447
+ ret = vfs_ioc_fssetxattr_check(inode, &old_fa, &fa);
448
+ if (ret)
428449 goto out_unlock;
429
- }
430450
431451 if (fa.fsx_xflags & FS_XFLAG_SYNC)
432452 binode->flags |= BTRFS_INODE_SYNC;
....@@ -482,10 +502,9 @@
482502 return put_user(inode->i_generation, arg);
483503 }
484504
485
-static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
505
+static noinline int btrfs_ioctl_fitrim(struct btrfs_fs_info *fs_info,
506
+ void __user *arg)
486507 {
487
- struct inode *inode = file_inode(file);
488
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
489508 struct btrfs_device *device;
490509 struct request_queue *q;
491510 struct fstrim_range range;
....@@ -544,7 +563,7 @@
544563 return 0;
545564 }
546565
547
-int btrfs_is_empty_uuid(u8 *uuid)
566
+int __pure btrfs_is_empty_uuid(u8 *uuid)
548567 {
549568 int i;
550569
....@@ -558,7 +577,6 @@
558577 static noinline int create_subvol(struct inode *dir,
559578 struct dentry *dentry,
560579 const char *name, int namelen,
561
- u64 *async_transid,
562580 struct btrfs_qgroup_inherit *inherit)
563581 {
564582 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
....@@ -574,10 +592,10 @@
574592 struct inode *inode;
575593 int ret;
576594 int err;
595
+ dev_t anon_dev = 0;
577596 u64 objectid;
578597 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
579598 u64 index = 0;
580
- uuid_le new_uuid;
581599
582600 root_item = kzalloc(sizeof(*root_item), GFP_KERNEL);
583601 if (!root_item)
....@@ -585,6 +603,10 @@
585603
586604 ret = btrfs_find_free_objectid(fs_info->tree_root, &objectid);
587605 if (ret)
606
+ goto fail_free;
607
+
608
+ ret = get_anon_bdev(&anon_dev);
609
+ if (ret < 0)
588610 goto fail_free;
589611
590612 /*
....@@ -608,7 +630,7 @@
608630 trans = btrfs_start_transaction(root, 0);
609631 if (IS_ERR(trans)) {
610632 ret = PTR_ERR(trans);
611
- btrfs_subvolume_release_metadata(fs_info, &block_rsv);
633
+ btrfs_subvolume_release_metadata(root, &block_rsv);
612634 goto fail_free;
613635 }
614636 trans->block_rsv = &block_rsv;
....@@ -618,7 +640,8 @@
618640 if (ret)
619641 goto fail;
620642
621
- leaf = btrfs_alloc_tree_block(trans, root, 0, objectid, NULL, 0, 0, 0);
643
+ leaf = btrfs_alloc_tree_block(trans, root, 0, objectid, NULL, 0, 0, 0,
644
+ BTRFS_NESTING_NORMAL);
622645 if (IS_ERR(leaf)) {
623646 ret = PTR_ERR(leaf);
624647 goto fail;
....@@ -647,8 +670,7 @@
647670
648671 btrfs_set_root_generation_v2(root_item,
649672 btrfs_root_generation(root_item));
650
- uuid_le_gen(&new_uuid);
651
- memcpy(root_item->uuid, new_uuid.b, BTRFS_UUID_SIZE);
673
+ generate_random_guid(root_item->uuid);
652674 btrfs_set_stack_timespec_sec(&root_item->otime, cur_time.tv_sec);
653675 btrfs_set_stack_timespec_nsec(&root_item->otime, cur_time.tv_nsec);
654676 root_item->ctime = root_item->otime;
....@@ -682,16 +704,20 @@
682704 leaf = NULL;
683705
684706 key.offset = (u64)-1;
685
- new_root = btrfs_read_fs_root_no_name(fs_info, &key);
707
+ new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev);
686708 if (IS_ERR(new_root)) {
709
+ free_anon_bdev(anon_dev);
687710 ret = PTR_ERR(new_root);
688711 btrfs_abort_transaction(trans, ret);
689712 goto fail;
690713 }
714
+ /* Freeing will be done in btrfs_put_root() of new_root */
715
+ anon_dev = 0;
691716
692717 btrfs_record_root_in_trans(trans, new_root);
693718
694719 ret = btrfs_create_subvol_root(trans, new_root, root, new_dirid);
720
+ btrfs_put_root(new_root);
695721 if (ret) {
696722 /* We potentially lose an unused inode item here */
697723 btrfs_abort_transaction(trans, ret);
....@@ -711,8 +737,7 @@
711737 goto fail;
712738 }
713739
714
- ret = btrfs_insert_dir_item(trans, root,
715
- name, namelen, BTRFS_I(dir), &key,
740
+ ret = btrfs_insert_dir_item(trans, name, namelen, BTRFS_I(dir), &key,
716741 BTRFS_FT_DIR, index);
717742 if (ret) {
718743 btrfs_abort_transaction(trans, ret);
....@@ -742,16 +767,9 @@
742767 kfree(root_item);
743768 trans->block_rsv = NULL;
744769 trans->bytes_reserved = 0;
745
- btrfs_subvolume_release_metadata(fs_info, &block_rsv);
770
+ btrfs_subvolume_release_metadata(root, &block_rsv);
746771
747
- if (async_transid) {
748
- *async_transid = trans->transid;
749
- err = btrfs_commit_transaction_async(trans, 1);
750
- if (err)
751
- err = btrfs_commit_transaction(trans);
752
- } else {
753
- err = btrfs_commit_transaction(trans);
754
- }
772
+ err = btrfs_commit_transaction(trans);
755773 if (err && !ret)
756774 ret = err;
757775
....@@ -764,13 +782,14 @@
764782 return ret;
765783
766784 fail_free:
785
+ if (anon_dev)
786
+ free_anon_bdev(anon_dev);
767787 kfree(root_item);
768788 return ret;
769789 }
770790
771791 static int create_snapshot(struct btrfs_root *root, struct inode *dir,
772
- struct dentry *dentry,
773
- u64 *async_transid, bool readonly,
792
+ struct dentry *dentry, bool readonly,
774793 struct btrfs_qgroup_inherit *inherit)
775794 {
776795 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
....@@ -778,15 +797,23 @@
778797 struct btrfs_pending_snapshot *pending_snapshot;
779798 struct btrfs_trans_handle *trans;
780799 int ret;
781
- bool snapshot_force_cow = false;
782800
783
- if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
801
+ if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
784802 return -EINVAL;
803
+
804
+ if (atomic_read(&root->nr_swapfiles)) {
805
+ btrfs_warn(fs_info,
806
+ "cannot snapshot subvolume with active swapfile");
807
+ return -ETXTBSY;
808
+ }
785809
786810 pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_KERNEL);
787811 if (!pending_snapshot)
788812 return -ENOMEM;
789813
814
+ ret = get_anon_bdev(&pending_snapshot->anon_dev);
815
+ if (ret < 0)
816
+ goto free_pending;
790817 pending_snapshot->root_item = kzalloc(sizeof(struct btrfs_root_item),
791818 GFP_KERNEL);
792819 pending_snapshot->path = btrfs_alloc_path();
....@@ -794,31 +821,6 @@
794821 ret = -ENOMEM;
795822 goto free_pending;
796823 }
797
-
798
- /*
799
- * Force new buffered writes to reserve space even when NOCOW is
800
- * possible. This is to avoid later writeback (running dealloc) to
801
- * fallback to COW mode and unexpectedly fail with ENOSPC.
802
- */
803
- atomic_inc(&root->will_be_snapshotted);
804
- smp_mb__after_atomic();
805
- /* wait for no snapshot writes */
806
- wait_event(root->subv_writers->wait,
807
- percpu_counter_sum(&root->subv_writers->counter) == 0);
808
-
809
- ret = btrfs_start_delalloc_snapshot(root);
810
- if (ret)
811
- goto dec_and_free;
812
-
813
- /*
814
- * All previous writes have started writeback in NOCOW mode, so now
815
- * we force future writes to fallback to COW mode during snapshot
816
- * creation.
817
- */
818
- atomic_inc(&root->snapshot_force_cow);
819
- snapshot_force_cow = true;
820
-
821
- btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
822824
823825 btrfs_init_block_rsv(&pending_snapshot->block_rsv,
824826 BTRFS_BLOCK_RSV_TEMP);
....@@ -834,7 +836,7 @@
834836 &pending_snapshot->block_rsv, 8,
835837 false);
836838 if (ret)
837
- goto dec_and_free;
839
+ goto free_pending;
838840
839841 pending_snapshot->dentry = dentry;
840842 pending_snapshot->root = root;
....@@ -852,14 +854,8 @@
852854 list_add(&pending_snapshot->list,
853855 &trans->transaction->pending_snapshots);
854856 spin_unlock(&fs_info->trans_lock);
855
- if (async_transid) {
856
- *async_transid = trans->transid;
857
- ret = btrfs_commit_transaction_async(trans, 1);
858
- if (ret)
859
- ret = btrfs_commit_transaction(trans);
860
- } else {
861
- ret = btrfs_commit_transaction(trans);
862
- }
857
+
858
+ ret = btrfs_commit_transaction(trans);
863859 if (ret)
864860 goto fail;
865861
....@@ -879,14 +875,16 @@
879875
880876 d_instantiate(dentry, inode);
881877 ret = 0;
878
+ pending_snapshot->anon_dev = 0;
882879 fail:
883
- btrfs_subvolume_release_metadata(fs_info, &pending_snapshot->block_rsv);
884
-dec_and_free:
885
- if (snapshot_force_cow)
886
- atomic_dec(&root->snapshot_force_cow);
887
- if (atomic_dec_and_test(&root->will_be_snapshotted))
888
- wake_up_var(&root->will_be_snapshotted);
880
+ /* Prevent double freeing of anon_dev */
881
+ if (ret && pending_snapshot->snap)
882
+ pending_snapshot->snap->anon_dev = 0;
883
+ btrfs_put_root(pending_snapshot->snap);
884
+ btrfs_subvolume_release_metadata(root, &pending_snapshot->block_rsv);
889885 free_pending:
886
+ if (pending_snapshot->anon_dev)
887
+ free_anon_bdev(pending_snapshot->anon_dev);
890888 kfree(pending_snapshot->root_item);
891889 btrfs_free_path(pending_snapshot->path);
892890 kfree(pending_snapshot);
....@@ -964,7 +962,7 @@
964962 static noinline int btrfs_mksubvol(const struct path *parent,
965963 const char *name, int namelen,
966964 struct btrfs_root *snap_src,
967
- u64 *async_transid, bool readonly,
965
+ bool readonly,
968966 struct btrfs_qgroup_inherit *inherit)
969967 {
970968 struct inode *dir = d_inode(parent->dentry);
....@@ -1000,13 +998,11 @@
1000998 if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0)
1001999 goto out_up_read;
10021000
1003
- if (snap_src) {
1004
- error = create_snapshot(snap_src, dir, dentry,
1005
- async_transid, readonly, inherit);
1006
- } else {
1007
- error = create_subvol(dir, dentry, name, namelen,
1008
- async_transid, inherit);
1009
- }
1001
+ if (snap_src)
1002
+ error = create_snapshot(snap_src, dir, dentry, readonly, inherit);
1003
+ else
1004
+ error = create_subvol(dir, dentry, name, namelen, inherit);
1005
+
10101006 if (!error)
10111007 fsnotify_mkdir(dir, dentry);
10121008 out_up_read:
....@@ -1016,6 +1012,45 @@
10161012 out_unlock:
10171013 inode_unlock(dir);
10181014 return error;
1015
+}
1016
+
1017
+static noinline int btrfs_mksnapshot(const struct path *parent,
1018
+ const char *name, int namelen,
1019
+ struct btrfs_root *root,
1020
+ bool readonly,
1021
+ struct btrfs_qgroup_inherit *inherit)
1022
+{
1023
+ int ret;
1024
+ bool snapshot_force_cow = false;
1025
+
1026
+ /*
1027
+ * Force new buffered writes to reserve space even when NOCOW is
1028
+ * possible. This is to avoid later writeback (running dealloc) to
1029
+ * fallback to COW mode and unexpectedly fail with ENOSPC.
1030
+ */
1031
+ btrfs_drew_read_lock(&root->snapshot_lock);
1032
+
1033
+ ret = btrfs_start_delalloc_snapshot(root);
1034
+ if (ret)
1035
+ goto out;
1036
+
1037
+ /*
1038
+ * All previous writes have started writeback in NOCOW mode, so now
1039
+ * we force future writes to fallback to COW mode during snapshot
1040
+ * creation.
1041
+ */
1042
+ atomic_inc(&root->snapshot_force_cow);
1043
+ snapshot_force_cow = true;
1044
+
1045
+ btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
1046
+
1047
+ ret = btrfs_mksubvol(parent, name, namelen,
1048
+ root, readonly, inherit);
1049
+out:
1050
+ if (snapshot_force_cow)
1051
+ atomic_dec(&root->snapshot_force_cow);
1052
+ btrfs_drew_read_unlock(&root->snapshot_lock);
1053
+ return ret;
10191054 }
10201055
10211056 /*
....@@ -1139,7 +1174,7 @@
11391174
11401175 /* get the big lock and read metadata off disk */
11411176 lock_extent_bits(io_tree, start, end, &cached);
1142
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0);
1177
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len);
11431178 unlock_extent_cached(io_tree, start, end, &cached);
11441179
11451180 if (IS_ERR(em))
....@@ -1252,6 +1287,7 @@
12521287 u64 page_end;
12531288 u64 page_cnt;
12541289 u64 start = (u64)start_index << PAGE_SHIFT;
1290
+ u64 search_start;
12551291 int ret;
12561292 int i;
12571293 int i_done;
....@@ -1267,7 +1303,7 @@
12671303
12681304 page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1);
12691305
1270
- ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
1306
+ ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved,
12711307 start, page_cnt << PAGE_SHIFT);
12721308 if (ret)
12731309 return ret;
....@@ -1288,7 +1324,7 @@
12881324 while (1) {
12891325 lock_extent_bits(tree, page_start, page_end,
12901326 &cached_state);
1291
- ordered = btrfs_lookup_ordered_extent(inode,
1327
+ ordered = btrfs_lookup_ordered_extent(BTRFS_I(inode),
12921328 page_start);
12931329 unlock_extent_cached(tree, page_start, page_end,
12941330 &cached_state);
....@@ -1296,7 +1332,7 @@
12961332 break;
12971333
12981334 unlock_page(page);
1299
- btrfs_start_ordered_extent(inode, ordered, 1);
1335
+ btrfs_start_ordered_extent(ordered, 1);
13001336 btrfs_put_ordered_extent(ordered);
13011337 lock_page(page);
13021338 /*
....@@ -1348,16 +1384,49 @@
13481384
13491385 lock_extent_bits(&BTRFS_I(inode)->io_tree,
13501386 page_start, page_end - 1, &cached_state);
1387
+
1388
+ /*
1389
+ * When defragmenting we skip ranges that have holes or inline extents,
1390
+ * (check should_defrag_range()), to avoid unnecessary IO and wasting
1391
+ * space. At btrfs_defrag_file(), we check if a range should be defragged
1392
+ * before locking the inode and then, if it should, we trigger a sync
1393
+ * page cache readahead - we lock the inode only after that to avoid
1394
+ * blocking for too long other tasks that possibly want to operate on
1395
+ * other file ranges. But before we were able to get the inode lock,
1396
+ * some other task may have punched a hole in the range, or we may have
1397
+ * now an inline extent, in which case we should not defrag. So check
1398
+ * for that here, where we have the inode and the range locked, and bail
1399
+ * out if that happened.
1400
+ */
1401
+ search_start = page_start;
1402
+ while (search_start < page_end) {
1403
+ struct extent_map *em;
1404
+
1405
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, search_start,
1406
+ page_end - search_start);
1407
+ if (IS_ERR(em)) {
1408
+ ret = PTR_ERR(em);
1409
+ goto out_unlock_range;
1410
+ }
1411
+ if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
1412
+ free_extent_map(em);
1413
+ /* Ok, 0 means we did not defrag anything */
1414
+ ret = 0;
1415
+ goto out_unlock_range;
1416
+ }
1417
+ search_start = extent_map_end(em);
1418
+ free_extent_map(em);
1419
+ }
1420
+
13511421 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start,
1352
- page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
1353
- EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0,
1354
- &cached_state);
1422
+ page_end - 1, EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
1423
+ EXTENT_DEFRAG, 0, 0, &cached_state);
13551424
13561425 if (i_done != page_cnt) {
13571426 spin_lock(&BTRFS_I(inode)->lock);
13581427 btrfs_mod_outstanding_extents(BTRFS_I(inode), 1);
13591428 spin_unlock(&BTRFS_I(inode)->lock);
1360
- btrfs_delalloc_release_space(inode, data_reserved,
1429
+ btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
13611430 start, (page_cnt - i_done) << PAGE_SHIFT, true);
13621431 }
13631432
....@@ -1379,12 +1448,16 @@
13791448 btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
13801449 extent_changeset_free(data_reserved);
13811450 return i_done;
1451
+
1452
+out_unlock_range:
1453
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree,
1454
+ page_start, page_end - 1, &cached_state);
13821455 out:
13831456 for (i = 0; i < i_done; i++) {
13841457 unlock_page(pages[i]);
13851458 put_page(pages[i]);
13861459 }
1387
- btrfs_delalloc_release_space(inode, data_reserved,
1460
+ btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
13881461 start, page_cnt << PAGE_SHIFT, true);
13891462 btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
13901463 extent_changeset_free(data_reserved);
....@@ -1424,7 +1497,7 @@
14241497 return -EINVAL;
14251498
14261499 if (do_compress) {
1427
- if (range->compress_type > BTRFS_COMPRESS_TYPES)
1500
+ if (range->compress_type >= BTRFS_NR_COMPRESS_TYPES)
14281501 return -EINVAL;
14291502 if (range->compress_type)
14301503 compress_type = range->compress_type;
....@@ -1530,9 +1603,13 @@
15301603 }
15311604
15321605 inode_lock(inode);
1533
- if (do_compress)
1534
- BTRFS_I(inode)->defrag_compress = compress_type;
1535
- ret = cluster_pages_for_defrag(inode, pages, i, cluster);
1606
+ if (IS_SWAPFILE(inode)) {
1607
+ ret = -ETXTBSY;
1608
+ } else {
1609
+ if (do_compress)
1610
+ BTRFS_I(inode)->defrag_compress = compress_type;
1611
+ ret = cluster_pages_for_defrag(inode, pages, i, cluster);
1612
+ }
15361613 if (ret < 0) {
15371614 inode_unlock(inode);
15381615 goto out_ra;
....@@ -1623,7 +1700,7 @@
16231700 if (ret)
16241701 return ret;
16251702
1626
- if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
1703
+ if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_RESIZE)) {
16271704 mnt_drop_write_file(file);
16281705 return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
16291706 }
....@@ -1717,9 +1794,6 @@
17171794
17181795 new_size = round_down(new_size, fs_info->sectorsize);
17191796
1720
- btrfs_info_in_rcu(fs_info, "new size for %s is %llu",
1721
- rcu_str_deref(device->name), new_size);
1722
-
17231797 if (new_size > old_size) {
17241798 trans = btrfs_start_transaction(root, 0);
17251799 if (IS_ERR(trans)) {
....@@ -1732,17 +1806,22 @@
17321806 ret = btrfs_shrink_device(device, new_size);
17331807 } /* equal, nothing need to do */
17341808
1809
+ if (ret == 0 && new_size != old_size)
1810
+ btrfs_info_in_rcu(fs_info,
1811
+ "resize device %s (devid %llu) from %llu to %llu",
1812
+ rcu_str_deref(device->name), device->devid,
1813
+ old_size, new_size);
17351814 out_free:
17361815 kfree(vol_args);
17371816 out:
1738
- clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
1817
+ btrfs_exclop_finish(fs_info);
17391818 mnt_drop_write_file(file);
17401819 return ret;
17411820 }
17421821
1743
-static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
1822
+static noinline int __btrfs_ioctl_snap_create(struct file *file,
17441823 const char *name, unsigned long fd, int subvol,
1745
- u64 *transid, bool readonly,
1824
+ bool readonly,
17461825 struct btrfs_qgroup_inherit *inherit)
17471826 {
17481827 int namelen;
....@@ -1769,7 +1848,7 @@
17691848
17701849 if (subvol) {
17711850 ret = btrfs_mksubvol(&file->f_path, name, namelen,
1772
- NULL, transid, readonly, inherit);
1851
+ NULL, readonly, inherit);
17731852 } else {
17741853 struct fd src = fdget(fd);
17751854 struct inode *src_inode;
....@@ -1790,9 +1869,9 @@
17901869 */
17911870 ret = -EPERM;
17921871 } else {
1793
- ret = btrfs_mksubvol(&file->f_path, name, namelen,
1872
+ ret = btrfs_mksnapshot(&file->f_path, name, namelen,
17941873 BTRFS_I(src_inode)->root,
1795
- transid, readonly, inherit);
1874
+ readonly, inherit);
17961875 }
17971876 fdput(src);
17981877 }
....@@ -1816,9 +1895,8 @@
18161895 return PTR_ERR(vol_args);
18171896 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
18181897
1819
- ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
1820
- vol_args->fd, subvol,
1821
- NULL, false, NULL);
1898
+ ret = __btrfs_ioctl_snap_create(file, vol_args->name, vol_args->fd,
1899
+ subvol, false, NULL);
18221900
18231901 kfree(vol_args);
18241902 return ret;
....@@ -1829,8 +1907,6 @@
18291907 {
18301908 struct btrfs_ioctl_vol_args_v2 *vol_args;
18311909 int ret;
1832
- u64 transid = 0;
1833
- u64 *ptr = NULL;
18341910 bool readonly = false;
18351911 struct btrfs_qgroup_inherit *inherit = NULL;
18361912
....@@ -1842,15 +1918,11 @@
18421918 return PTR_ERR(vol_args);
18431919 vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
18441920
1845
- if (vol_args->flags &
1846
- ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY |
1847
- BTRFS_SUBVOL_QGROUP_INHERIT)) {
1921
+ if (vol_args->flags & ~BTRFS_SUBVOL_CREATE_ARGS_MASK) {
18481922 ret = -EOPNOTSUPP;
18491923 goto free_args;
18501924 }
18511925
1852
- if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC)
1853
- ptr = &transid;
18541926 if (vol_args->flags & BTRFS_SUBVOL_RDONLY)
18551927 readonly = true;
18561928 if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) {
....@@ -1882,18 +1954,10 @@
18821954 }
18831955 }
18841956
1885
- ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
1886
- vol_args->fd, subvol, ptr,
1887
- readonly, inherit);
1957
+ ret = __btrfs_ioctl_snap_create(file, vol_args->name, vol_args->fd,
1958
+ subvol, readonly, inherit);
18881959 if (ret)
18891960 goto free_inherit;
1890
-
1891
- if (ptr && copy_to_user(arg +
1892
- offsetof(struct btrfs_ioctl_vol_args_v2,
1893
- transid),
1894
- ptr, sizeof(*ptr)))
1895
- ret = -EFAULT;
1896
-
18971961 free_inherit:
18981962 kfree(inherit);
18991963 free_args:
....@@ -1949,11 +2013,6 @@
19492013
19502014 if (copy_from_user(&flags, arg, sizeof(flags))) {
19512015 ret = -EFAULT;
1952
- goto out_drop_write;
1953
- }
1954
-
1955
- if (flags & BTRFS_SUBVOL_CREATE_ASYNC) {
1956
- ret = -EINVAL;
19572016 goto out_drop_write;
19582017 }
19592018
....@@ -2112,7 +2171,7 @@
21122171 * problem. Otherwise we'll fault and then copy the buffer in
21132172 * properly this next time through
21142173 */
2115
- if (probe_user_write(ubuf + *sk_offset, &sh, sizeof(sh))) {
2174
+ if (copy_to_user_nofault(ubuf + *sk_offset, &sh, sizeof(sh))) {
21162175 ret = 0;
21172176 goto out;
21182177 }
....@@ -2199,12 +2258,9 @@
21992258
22002259 if (sk->tree_id == 0) {
22012260 /* search the root of the inode that was passed */
2202
- root = BTRFS_I(inode)->root;
2261
+ root = btrfs_grab_root(BTRFS_I(inode)->root);
22032262 } else {
2204
- key.objectid = sk->tree_id;
2205
- key.type = BTRFS_ROOT_ITEM_KEY;
2206
- key.offset = (u64)-1;
2207
- root = btrfs_read_fs_root_no_name(info, &key);
2263
+ root = btrfs_get_fs_root(info, sk->tree_id, true);
22082264 if (IS_ERR(root)) {
22092265 btrfs_free_path(path);
22102266 return PTR_ERR(root);
....@@ -2238,6 +2294,7 @@
22382294 ret = 0;
22392295 err:
22402296 sk->nr_items = num_found;
2297
+ btrfs_put_root(root);
22412298 btrfs_free_path(path);
22422299 return ret;
22432300 }
....@@ -2341,12 +2398,10 @@
23412398
23422399 ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX - 1];
23432400
2344
- key.objectid = tree_id;
2345
- key.type = BTRFS_ROOT_ITEM_KEY;
2346
- key.offset = (u64)-1;
2347
- root = btrfs_read_fs_root_no_name(info, &key);
2401
+ root = btrfs_get_fs_root(info, tree_id, true);
23482402 if (IS_ERR(root)) {
23492403 ret = PTR_ERR(root);
2404
+ root = NULL;
23502405 goto out;
23512406 }
23522407
....@@ -2397,6 +2452,7 @@
23972452 name[total_len] = '\0';
23982453 ret = 0;
23992454 out:
2455
+ btrfs_put_root(root);
24002456 btrfs_free_path(path);
24012457 return ret;
24022458 }
....@@ -2413,7 +2469,7 @@
24132469 unsigned long item_len;
24142470 struct btrfs_inode_ref *iref;
24152471 struct btrfs_root_ref *rref;
2416
- struct btrfs_root *root;
2472
+ struct btrfs_root *root = NULL;
24172473 struct btrfs_path *path;
24182474 struct btrfs_key key, key2;
24192475 struct extent_buffer *leaf;
....@@ -2435,10 +2491,7 @@
24352491 if (dirid != upper_limit.objectid) {
24362492 ptr = &args->path[BTRFS_INO_LOOKUP_USER_PATH_MAX - 1];
24372493
2438
- key.objectid = treeid;
2439
- key.type = BTRFS_ROOT_ITEM_KEY;
2440
- key.offset = (u64)-1;
2441
- root = btrfs_read_fs_root_no_name(fs_info, &key);
2494
+ root = btrfs_get_fs_root(fs_info, treeid, true);
24422495 if (IS_ERR(root)) {
24432496 ret = PTR_ERR(root);
24442497 goto out;
....@@ -2450,15 +2503,15 @@
24502503 while (1) {
24512504 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
24522505 if (ret < 0) {
2453
- goto out;
2506
+ goto out_put;
24542507 } else if (ret > 0) {
24552508 ret = btrfs_previous_item(root, path, dirid,
24562509 BTRFS_INODE_REF_KEY);
24572510 if (ret < 0) {
2458
- goto out;
2511
+ goto out_put;
24592512 } else if (ret > 0) {
24602513 ret = -ENOENT;
2461
- goto out;
2514
+ goto out_put;
24622515 }
24632516 }
24642517
....@@ -2472,7 +2525,7 @@
24722525 total_len += len + 1;
24732526 if (ptr < args->path) {
24742527 ret = -ENAMETOOLONG;
2475
- goto out;
2528
+ goto out_put;
24762529 }
24772530
24782531 *(ptr + len) = '/';
....@@ -2483,10 +2536,10 @@
24832536 ret = btrfs_previous_item(root, path, dirid,
24842537 BTRFS_INODE_ITEM_KEY);
24852538 if (ret < 0) {
2486
- goto out;
2539
+ goto out_put;
24872540 } else if (ret > 0) {
24882541 ret = -ENOENT;
2489
- goto out;
2542
+ goto out_put;
24902543 }
24912544
24922545 leaf = path->nodes[0];
....@@ -2494,29 +2547,35 @@
24942547 btrfs_item_key_to_cpu(leaf, &key2, slot);
24952548 if (key2.objectid != dirid) {
24962549 ret = -ENOENT;
2497
- goto out;
2550
+ goto out_put;
24982551 }
24992552
2500
- temp_inode = btrfs_iget(sb, &key2, root, NULL);
2553
+ /*
2554
+ * We don't need the path anymore, so release it and
2555
+ * avoid deadlocks and lockdep warnings in case
2556
+ * btrfs_iget() needs to lookup the inode from its root
2557
+ * btree and lock the same leaf.
2558
+ */
2559
+ btrfs_release_path(path);
2560
+ temp_inode = btrfs_iget(sb, key2.objectid, root);
25012561 if (IS_ERR(temp_inode)) {
25022562 ret = PTR_ERR(temp_inode);
2503
- goto out;
2563
+ goto out_put;
25042564 }
25052565 ret = inode_permission(temp_inode, MAY_READ | MAY_EXEC);
25062566 iput(temp_inode);
25072567 if (ret) {
25082568 ret = -EACCES;
2509
- goto out;
2569
+ goto out_put;
25102570 }
25112571
25122572 if (key.offset == upper_limit.objectid)
25132573 break;
25142574 if (key.objectid == BTRFS_FIRST_FREE_OBJECTID) {
25152575 ret = -EACCES;
2516
- goto out;
2576
+ goto out_put;
25172577 }
25182578
2519
- btrfs_release_path(path);
25202579 key.objectid = key.offset;
25212580 key.offset = (u64)-1;
25222581 dirid = key.objectid;
....@@ -2524,15 +2583,16 @@
25242583
25252584 memmove(args->path, ptr, total_len);
25262585 args->path[total_len] = '\0';
2586
+ btrfs_put_root(root);
2587
+ root = NULL;
25272588 btrfs_release_path(path);
25282589 }
25292590
25302591 /* Get the bottom subvolume's name from ROOT_REF */
2531
- root = fs_info->tree_root;
25322592 key.objectid = treeid;
25332593 key.type = BTRFS_ROOT_REF_KEY;
25342594 key.offset = args->treeid;
2535
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2595
+ ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
25362596 if (ret < 0) {
25372597 goto out;
25382598 } else if (ret > 0) {
....@@ -2559,6 +2619,8 @@
25592619 read_extent_buffer(leaf, args->name, item_off, item_len);
25602620 args->name[item_len] = 0;
25612621
2622
+out_put:
2623
+ btrfs_put_root(root);
25622624 out:
25632625 btrfs_free_path(path);
25642626 return ret;
....@@ -2681,12 +2743,10 @@
26812743
26822744 /* Get root_item of inode's subvolume */
26832745 key.objectid = BTRFS_I(inode)->root->root_key.objectid;
2684
- key.type = BTRFS_ROOT_ITEM_KEY;
2685
- key.offset = (u64)-1;
2686
- root = btrfs_read_fs_root_no_name(fs_info, &key);
2746
+ root = btrfs_get_fs_root(fs_info, key.objectid, true);
26872747 if (IS_ERR(root)) {
26882748 ret = PTR_ERR(root);
2689
- goto out;
2749
+ goto out_free;
26902750 }
26912751 root_item = &root->root_item;
26922752
....@@ -2719,16 +2779,14 @@
27192779
27202780 if (key.objectid != BTRFS_FS_TREE_OBJECTID) {
27212781 /* Search root tree for ROOT_BACKREF of this subvolume */
2722
- root = fs_info->tree_root;
2723
-
27242782 key.type = BTRFS_ROOT_BACKREF_KEY;
27252783 key.offset = 0;
2726
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2784
+ ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
27272785 if (ret < 0) {
27282786 goto out;
27292787 } else if (path->slots[0] >=
27302788 btrfs_header_nritems(path->nodes[0])) {
2731
- ret = btrfs_next_leaf(root, path);
2789
+ ret = btrfs_next_leaf(fs_info->tree_root, path);
27322790 if (ret < 0) {
27332791 goto out;
27342792 } else if (ret > 0) {
....@@ -2759,12 +2817,16 @@
27592817 }
27602818 }
27612819
2820
+ btrfs_free_path(path);
2821
+ path = NULL;
27622822 if (copy_to_user(argp, subvol_info, sizeof(*subvol_info)))
27632823 ret = -EFAULT;
27642824
27652825 out:
2826
+ btrfs_put_root(root);
2827
+out_free:
27662828 btrfs_free_path(path);
2767
- kzfree(subvol_info);
2829
+ kfree(subvol_info);
27682830 return ret;
27692831 }
27702832
....@@ -2849,6 +2911,8 @@
28492911 }
28502912
28512913 out:
2914
+ btrfs_free_path(path);
2915
+
28522916 if (!ret || ret == -EOVERFLOW) {
28532917 rootrefs->num_items = found;
28542918 /* update min_treeid for next search */
....@@ -2860,13 +2924,13 @@
28602924 }
28612925
28622926 kfree(rootrefs);
2863
- btrfs_free_path(path);
28642927
28652928 return ret;
28662929 }
28672930
28682931 static noinline int btrfs_ioctl_snap_destroy(struct file *file,
2869
- void __user *arg)
2932
+ void __user *arg,
2933
+ bool destroy_v2)
28702934 {
28712935 struct dentry *parent = file->f_path.dentry;
28722936 struct btrfs_fs_info *fs_info = btrfs_sb(parent->d_sb);
....@@ -2875,34 +2939,120 @@
28752939 struct inode *inode;
28762940 struct btrfs_root *root = BTRFS_I(dir)->root;
28772941 struct btrfs_root *dest = NULL;
2878
- struct btrfs_ioctl_vol_args *vol_args;
2879
- int namelen;
2942
+ struct btrfs_ioctl_vol_args *vol_args = NULL;
2943
+ struct btrfs_ioctl_vol_args_v2 *vol_args2 = NULL;
2944
+ char *subvol_name, *subvol_name_ptr = NULL;
2945
+ int subvol_namelen;
28802946 int err = 0;
2947
+ bool destroy_parent = false;
28812948
2882
- if (!S_ISDIR(dir->i_mode))
2883
- return -ENOTDIR;
2949
+ if (destroy_v2) {
2950
+ vol_args2 = memdup_user(arg, sizeof(*vol_args2));
2951
+ if (IS_ERR(vol_args2))
2952
+ return PTR_ERR(vol_args2);
28842953
2885
- vol_args = memdup_user(arg, sizeof(*vol_args));
2886
- if (IS_ERR(vol_args))
2887
- return PTR_ERR(vol_args);
2954
+ if (vol_args2->flags & ~BTRFS_SUBVOL_DELETE_ARGS_MASK) {
2955
+ err = -EOPNOTSUPP;
2956
+ goto out;
2957
+ }
28882958
2889
- vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
2890
- namelen = strlen(vol_args->name);
2891
- if (strchr(vol_args->name, '/') ||
2892
- strncmp(vol_args->name, "..", namelen) == 0) {
2893
- err = -EINVAL;
2894
- goto out;
2959
+ /*
2960
+ * If SPEC_BY_ID is not set, we are looking for the subvolume by
2961
+ * name, same as v1 currently does.
2962
+ */
2963
+ if (!(vol_args2->flags & BTRFS_SUBVOL_SPEC_BY_ID)) {
2964
+ vol_args2->name[BTRFS_SUBVOL_NAME_MAX] = 0;
2965
+ subvol_name = vol_args2->name;
2966
+
2967
+ err = mnt_want_write_file(file);
2968
+ if (err)
2969
+ goto out;
2970
+ } else {
2971
+ if (vol_args2->subvolid < BTRFS_FIRST_FREE_OBJECTID) {
2972
+ err = -EINVAL;
2973
+ goto out;
2974
+ }
2975
+
2976
+ err = mnt_want_write_file(file);
2977
+ if (err)
2978
+ goto out;
2979
+
2980
+ dentry = btrfs_get_dentry(fs_info->sb,
2981
+ BTRFS_FIRST_FREE_OBJECTID,
2982
+ vol_args2->subvolid, 0, 0);
2983
+ if (IS_ERR(dentry)) {
2984
+ err = PTR_ERR(dentry);
2985
+ goto out_drop_write;
2986
+ }
2987
+
2988
+ /*
2989
+ * Change the default parent since the subvolume being
2990
+ * deleted can be outside of the current mount point.
2991
+ */
2992
+ parent = btrfs_get_parent(dentry);
2993
+
2994
+ /*
2995
+ * At this point dentry->d_name can point to '/' if the
2996
+ * subvolume we want to destroy is outsite of the
2997
+ * current mount point, so we need to release the
2998
+ * current dentry and execute the lookup to return a new
2999
+ * one with ->d_name pointing to the
3000
+ * <mount point>/subvol_name.
3001
+ */
3002
+ dput(dentry);
3003
+ if (IS_ERR(parent)) {
3004
+ err = PTR_ERR(parent);
3005
+ goto out_drop_write;
3006
+ }
3007
+ dir = d_inode(parent);
3008
+
3009
+ /*
3010
+ * If v2 was used with SPEC_BY_ID, a new parent was
3011
+ * allocated since the subvolume can be outside of the
3012
+ * current mount point. Later on we need to release this
3013
+ * new parent dentry.
3014
+ */
3015
+ destroy_parent = true;
3016
+
3017
+ subvol_name_ptr = btrfs_get_subvol_name_from_objectid(
3018
+ fs_info, vol_args2->subvolid);
3019
+ if (IS_ERR(subvol_name_ptr)) {
3020
+ err = PTR_ERR(subvol_name_ptr);
3021
+ goto free_parent;
3022
+ }
3023
+ /* subvol_name_ptr is already NULL termined */
3024
+ subvol_name = (char *)kbasename(subvol_name_ptr);
3025
+ }
3026
+ } else {
3027
+ vol_args = memdup_user(arg, sizeof(*vol_args));
3028
+ if (IS_ERR(vol_args))
3029
+ return PTR_ERR(vol_args);
3030
+
3031
+ vol_args->name[BTRFS_PATH_NAME_MAX] = 0;
3032
+ subvol_name = vol_args->name;
3033
+
3034
+ err = mnt_want_write_file(file);
3035
+ if (err)
3036
+ goto out;
28953037 }
28963038
2897
- err = mnt_want_write_file(file);
2898
- if (err)
2899
- goto out;
3039
+ subvol_namelen = strlen(subvol_name);
29003040
3041
+ if (strchr(subvol_name, '/') ||
3042
+ strncmp(subvol_name, "..", subvol_namelen) == 0) {
3043
+ err = -EINVAL;
3044
+ goto free_subvol_name;
3045
+ }
3046
+
3047
+ if (!S_ISDIR(dir->i_mode)) {
3048
+ err = -ENOTDIR;
3049
+ goto free_subvol_name;
3050
+ }
29013051
29023052 err = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
29033053 if (err == -EINTR)
2904
- goto out_drop_write;
2905
- dentry = lookup_one_len(vol_args->name, parent, namelen);
3054
+ goto free_subvol_name;
3055
+ dentry = lookup_one_len(subvol_name, parent, subvol_namelen);
29063056 if (IS_ERR(dentry)) {
29073057 err = PTR_ERR(dentry);
29083058 goto out_unlock_dir;
....@@ -2963,15 +3113,21 @@
29633113 err = btrfs_delete_subvolume(dir, dentry);
29643114 inode_unlock(inode);
29653115 if (!err)
2966
- d_delete(dentry);
3116
+ d_delete_notify(dir, dentry);
29673117
29683118 out_dput:
29693119 dput(dentry);
29703120 out_unlock_dir:
29713121 inode_unlock(dir);
3122
+free_subvol_name:
3123
+ kfree(subvol_name_ptr);
3124
+free_parent:
3125
+ if (destroy_parent)
3126
+ dput(parent);
29723127 out_drop_write:
29733128 mnt_drop_write_file(file);
29743129 out:
3130
+ kfree(vol_args2);
29753131 kfree(vol_args);
29763132 return err;
29773133 }
....@@ -3056,7 +3212,7 @@
30563212 if (!capable(CAP_SYS_ADMIN))
30573213 return -EPERM;
30583214
3059
- if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags))
3215
+ if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_DEV_ADD))
30603216 return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
30613217
30623218 vol_args = memdup_user(arg, sizeof(*vol_args));
....@@ -3073,7 +3229,7 @@
30733229
30743230 kfree(vol_args);
30753231 out:
3076
- clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
3232
+ btrfs_exclop_finish(fs_info);
30773233 return ret;
30783234 }
30793235
....@@ -3097,13 +3253,12 @@
30973253 goto err_drop;
30983254 }
30993255
3100
- /* Check for compatibility reject unknown flags */
3101
- if (vol_args->flags & ~BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED) {
3256
+ if (vol_args->flags & ~BTRFS_DEVICE_REMOVE_ARGS_MASK) {
31023257 ret = -EOPNOTSUPP;
31033258 goto out;
31043259 }
31053260
3106
- if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
3261
+ if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_DEV_REMOVE)) {
31073262 ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
31083263 goto out;
31093264 }
....@@ -3114,7 +3269,7 @@
31143269 vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
31153270 ret = btrfs_rm_device(fs_info, vol_args->name, 0);
31163271 }
3117
- clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
3272
+ btrfs_exclop_finish(fs_info);
31183273
31193274 if (!ret) {
31203275 if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID)
....@@ -3145,7 +3300,7 @@
31453300 if (ret)
31463301 return ret;
31473302
3148
- if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
3303
+ if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_DEV_REMOVE)) {
31493304 ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
31503305 goto out_drop_write;
31513306 }
....@@ -3163,7 +3318,7 @@
31633318 btrfs_info(fs_info, "disk deleted %s", vol_args->name);
31643319 kfree(vol_args);
31653320 out:
3166
- clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
3321
+ btrfs_exclop_finish(fs_info);
31673322 out_drop_write:
31683323 mnt_drop_write_file(file);
31693324
....@@ -3176,11 +3331,15 @@
31763331 struct btrfs_ioctl_fs_info_args *fi_args;
31773332 struct btrfs_device *device;
31783333 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
3334
+ u64 flags_in;
31793335 int ret = 0;
31803336
3181
- fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL);
3182
- if (!fi_args)
3183
- return -ENOMEM;
3337
+ fi_args = memdup_user(arg, sizeof(*fi_args));
3338
+ if (IS_ERR(fi_args))
3339
+ return PTR_ERR(fi_args);
3340
+
3341
+ flags_in = fi_args->flags;
3342
+ memset(fi_args, 0, sizeof(*fi_args));
31843343
31853344 rcu_read_lock();
31863345 fi_args->num_devices = fs_devices->num_devices;
....@@ -3191,10 +3350,27 @@
31913350 }
31923351 rcu_read_unlock();
31933352
3194
- memcpy(&fi_args->fsid, fs_info->fsid, sizeof(fi_args->fsid));
3353
+ memcpy(&fi_args->fsid, fs_devices->fsid, sizeof(fi_args->fsid));
31953354 fi_args->nodesize = fs_info->nodesize;
31963355 fi_args->sectorsize = fs_info->sectorsize;
31973356 fi_args->clone_alignment = fs_info->sectorsize;
3357
+
3358
+ if (flags_in & BTRFS_FS_INFO_FLAG_CSUM_INFO) {
3359
+ fi_args->csum_type = btrfs_super_csum_type(fs_info->super_copy);
3360
+ fi_args->csum_size = btrfs_super_csum_size(fs_info->super_copy);
3361
+ fi_args->flags |= BTRFS_FS_INFO_FLAG_CSUM_INFO;
3362
+ }
3363
+
3364
+ if (flags_in & BTRFS_FS_INFO_FLAG_GENERATION) {
3365
+ fi_args->generation = fs_info->generation;
3366
+ fi_args->flags |= BTRFS_FS_INFO_FLAG_GENERATION;
3367
+ }
3368
+
3369
+ if (flags_in & BTRFS_FS_INFO_FLAG_METADATA_UUID) {
3370
+ memcpy(&fi_args->metadata_uuid, fs_devices->metadata_uuid,
3371
+ sizeof(fi_args->metadata_uuid));
3372
+ fi_args->flags |= BTRFS_FS_INFO_FLAG_METADATA_UUID;
3373
+ }
31983374
31993375 if (copy_to_user(arg, fi_args, sizeof(*fi_args)))
32003376 ret = -EFAULT;
....@@ -3231,13 +3407,10 @@
32313407 di_args->bytes_used = btrfs_device_get_bytes_used(dev);
32323408 di_args->total_bytes = btrfs_device_get_total_bytes(dev);
32333409 memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
3234
- if (dev->name) {
3235
- strncpy(di_args->path, rcu_str_deref(dev->name),
3236
- sizeof(di_args->path) - 1);
3237
- di_args->path[sizeof(di_args->path) - 1] = 0;
3238
- } else {
3410
+ if (dev->name)
3411
+ strscpy(di_args->path, rcu_str_deref(dev->name), sizeof(di_args->path));
3412
+ else
32393413 di_args->path[0] = '\0';
3240
- }
32413414
32423415 out:
32433416 rcu_read_unlock();
....@@ -3248,1183 +3421,6 @@
32483421 return ret;
32493422 }
32503423
3251
-static struct page *extent_same_get_page(struct inode *inode, pgoff_t index)
3252
-{
3253
- struct page *page;
3254
-
3255
- page = grab_cache_page(inode->i_mapping, index);
3256
- if (!page)
3257
- return ERR_PTR(-ENOMEM);
3258
-
3259
- if (!PageUptodate(page)) {
3260
- int ret;
3261
-
3262
- ret = btrfs_readpage(NULL, page);
3263
- if (ret)
3264
- return ERR_PTR(ret);
3265
- lock_page(page);
3266
- if (!PageUptodate(page)) {
3267
- unlock_page(page);
3268
- put_page(page);
3269
- return ERR_PTR(-EIO);
3270
- }
3271
- if (page->mapping != inode->i_mapping) {
3272
- unlock_page(page);
3273
- put_page(page);
3274
- return ERR_PTR(-EAGAIN);
3275
- }
3276
- }
3277
-
3278
- return page;
3279
-}
3280
-
3281
-static int gather_extent_pages(struct inode *inode, struct page **pages,
3282
- int num_pages, u64 off)
3283
-{
3284
- int i;
3285
- pgoff_t index = off >> PAGE_SHIFT;
3286
-
3287
- for (i = 0; i < num_pages; i++) {
3288
-again:
3289
- pages[i] = extent_same_get_page(inode, index + i);
3290
- if (IS_ERR(pages[i])) {
3291
- int err = PTR_ERR(pages[i]);
3292
-
3293
- if (err == -EAGAIN)
3294
- goto again;
3295
- pages[i] = NULL;
3296
- return err;
3297
- }
3298
- }
3299
- return 0;
3300
-}
3301
-
3302
-static int lock_extent_range(struct inode *inode, u64 off, u64 len,
3303
- bool retry_range_locking)
3304
-{
3305
- /*
3306
- * Do any pending delalloc/csum calculations on inode, one way or
3307
- * another, and lock file content.
3308
- * The locking order is:
3309
- *
3310
- * 1) pages
3311
- * 2) range in the inode's io tree
3312
- */
3313
- while (1) {
3314
- struct btrfs_ordered_extent *ordered;
3315
- lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
3316
- ordered = btrfs_lookup_first_ordered_extent(inode,
3317
- off + len - 1);
3318
- if ((!ordered ||
3319
- ordered->file_offset + ordered->len <= off ||
3320
- ordered->file_offset >= off + len) &&
3321
- !test_range_bit(&BTRFS_I(inode)->io_tree, off,
3322
- off + len - 1, EXTENT_DELALLOC, 0, NULL)) {
3323
- if (ordered)
3324
- btrfs_put_ordered_extent(ordered);
3325
- break;
3326
- }
3327
- unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
3328
- if (ordered)
3329
- btrfs_put_ordered_extent(ordered);
3330
- if (!retry_range_locking)
3331
- return -EAGAIN;
3332
- btrfs_wait_ordered_range(inode, off, len);
3333
- }
3334
- return 0;
3335
-}
3336
-
3337
-static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2)
3338
-{
3339
- inode_unlock(inode1);
3340
- inode_unlock(inode2);
3341
-}
3342
-
3343
-static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2)
3344
-{
3345
- if (inode1 < inode2)
3346
- swap(inode1, inode2);
3347
-
3348
- inode_lock_nested(inode1, I_MUTEX_PARENT);
3349
- inode_lock_nested(inode2, I_MUTEX_CHILD);
3350
-}
3351
-
3352
-static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
3353
- struct inode *inode2, u64 loff2, u64 len)
3354
-{
3355
- unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
3356
- unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
3357
-}
3358
-
3359
-static int btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
3360
- struct inode *inode2, u64 loff2, u64 len,
3361
- bool retry_range_locking)
3362
-{
3363
- int ret;
3364
-
3365
- if (inode1 < inode2) {
3366
- swap(inode1, inode2);
3367
- swap(loff1, loff2);
3368
- }
3369
- ret = lock_extent_range(inode1, loff1, len, retry_range_locking);
3370
- if (ret)
3371
- return ret;
3372
- ret = lock_extent_range(inode2, loff2, len, retry_range_locking);
3373
- if (ret)
3374
- unlock_extent(&BTRFS_I(inode1)->io_tree, loff1,
3375
- loff1 + len - 1);
3376
- return ret;
3377
-}
3378
-
3379
-struct cmp_pages {
3380
- int num_pages;
3381
- struct page **src_pages;
3382
- struct page **dst_pages;
3383
-};
3384
-
3385
-static void btrfs_cmp_data_free(struct cmp_pages *cmp)
3386
-{
3387
- int i;
3388
- struct page *pg;
3389
-
3390
- for (i = 0; i < cmp->num_pages; i++) {
3391
- pg = cmp->src_pages[i];
3392
- if (pg) {
3393
- unlock_page(pg);
3394
- put_page(pg);
3395
- cmp->src_pages[i] = NULL;
3396
- }
3397
- pg = cmp->dst_pages[i];
3398
- if (pg) {
3399
- unlock_page(pg);
3400
- put_page(pg);
3401
- cmp->dst_pages[i] = NULL;
3402
- }
3403
- }
3404
-}
3405
-
3406
-static int btrfs_cmp_data_prepare(struct inode *src, u64 loff,
3407
- struct inode *dst, u64 dst_loff,
3408
- u64 len, struct cmp_pages *cmp)
3409
-{
3410
- int ret;
3411
- int num_pages = PAGE_ALIGN(len) >> PAGE_SHIFT;
3412
-
3413
- cmp->num_pages = num_pages;
3414
-
3415
- ret = gather_extent_pages(src, cmp->src_pages, num_pages, loff);
3416
- if (ret)
3417
- goto out;
3418
-
3419
- ret = gather_extent_pages(dst, cmp->dst_pages, num_pages, dst_loff);
3420
-
3421
-out:
3422
- if (ret)
3423
- btrfs_cmp_data_free(cmp);
3424
- return ret;
3425
-}
3426
-
3427
-static int btrfs_cmp_data(u64 len, struct cmp_pages *cmp)
3428
-{
3429
- int ret = 0;
3430
- int i;
3431
- struct page *src_page, *dst_page;
3432
- unsigned int cmp_len = PAGE_SIZE;
3433
- void *addr, *dst_addr;
3434
-
3435
- i = 0;
3436
- while (len) {
3437
- if (len < PAGE_SIZE)
3438
- cmp_len = len;
3439
-
3440
- BUG_ON(i >= cmp->num_pages);
3441
-
3442
- src_page = cmp->src_pages[i];
3443
- dst_page = cmp->dst_pages[i];
3444
- ASSERT(PageLocked(src_page));
3445
- ASSERT(PageLocked(dst_page));
3446
-
3447
- addr = kmap_atomic(src_page);
3448
- dst_addr = kmap_atomic(dst_page);
3449
-
3450
- flush_dcache_page(src_page);
3451
- flush_dcache_page(dst_page);
3452
-
3453
- if (memcmp(addr, dst_addr, cmp_len))
3454
- ret = -EBADE;
3455
-
3456
- kunmap_atomic(addr);
3457
- kunmap_atomic(dst_addr);
3458
-
3459
- if (ret)
3460
- break;
3461
-
3462
- len -= cmp_len;
3463
- i++;
3464
- }
3465
-
3466
- return ret;
3467
-}
3468
-
3469
-static int extent_same_check_offsets(struct inode *inode, u64 off, u64 *plen,
3470
- u64 olen)
3471
-{
3472
- u64 len = *plen;
3473
- u64 bs = BTRFS_I(inode)->root->fs_info->sb->s_blocksize;
3474
-
3475
- if (off + olen > inode->i_size || off + olen < off)
3476
- return -EINVAL;
3477
-
3478
- /* if we extend to eof, continue to block boundary */
3479
- if (off + len == inode->i_size)
3480
- *plen = len = ALIGN(inode->i_size, bs) - off;
3481
-
3482
- /* Check that we are block aligned - btrfs_clone() requires this */
3483
- if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs))
3484
- return -EINVAL;
3485
-
3486
- return 0;
3487
-}
3488
-
3489
-static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen,
3490
- struct inode *dst, u64 dst_loff,
3491
- struct cmp_pages *cmp)
3492
-{
3493
- int ret;
3494
- u64 len = olen;
3495
- bool same_inode = (src == dst);
3496
- u64 same_lock_start = 0;
3497
- u64 same_lock_len = 0;
3498
-
3499
- ret = extent_same_check_offsets(src, loff, &len, olen);
3500
- if (ret)
3501
- return ret;
3502
-
3503
- ret = extent_same_check_offsets(dst, dst_loff, &len, olen);
3504
- if (ret)
3505
- return ret;
3506
-
3507
- if (same_inode) {
3508
- /*
3509
- * Single inode case wants the same checks, except we
3510
- * don't want our length pushed out past i_size as
3511
- * comparing that data range makes no sense.
3512
- *
3513
- * extent_same_check_offsets() will do this for an
3514
- * unaligned length at i_size, so catch it here and
3515
- * reject the request.
3516
- *
3517
- * This effectively means we require aligned extents
3518
- * for the single-inode case, whereas the other cases
3519
- * allow an unaligned length so long as it ends at
3520
- * i_size.
3521
- */
3522
- if (len != olen)
3523
- return -EINVAL;
3524
-
3525
- /* Check for overlapping ranges */
3526
- if (dst_loff + len > loff && dst_loff < loff + len)
3527
- return -EINVAL;
3528
-
3529
- same_lock_start = min_t(u64, loff, dst_loff);
3530
- same_lock_len = max_t(u64, loff, dst_loff) + len - same_lock_start;
3531
- } else {
3532
- /*
3533
- * If the source and destination inodes are different, the
3534
- * source's range end offset matches the source's i_size, that
3535
- * i_size is not a multiple of the sector size, and the
3536
- * destination range does not go past the destination's i_size,
3537
- * we must round down the length to the nearest sector size
3538
- * multiple. If we don't do this adjustment we end replacing
3539
- * with zeroes the bytes in the range that starts at the
3540
- * deduplication range's end offset and ends at the next sector
3541
- * size multiple.
3542
- */
3543
- if (loff + olen == i_size_read(src) &&
3544
- dst_loff + len < i_size_read(dst)) {
3545
- const u64 sz = BTRFS_I(src)->root->fs_info->sectorsize;
3546
-
3547
- len = round_down(i_size_read(src), sz) - loff;
3548
- if (len == 0)
3549
- return 0;
3550
- olen = len;
3551
- }
3552
- }
3553
-
3554
-again:
3555
- ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, cmp);
3556
- if (ret)
3557
- return ret;
3558
-
3559
- if (same_inode)
3560
- ret = lock_extent_range(src, same_lock_start, same_lock_len,
3561
- false);
3562
- else
3563
- ret = btrfs_double_extent_lock(src, loff, dst, dst_loff, len,
3564
- false);
3565
- /*
3566
- * If one of the inodes has dirty pages in the respective range or
3567
- * ordered extents, we need to flush dellaloc and wait for all ordered
3568
- * extents in the range. We must unlock the pages and the ranges in the
3569
- * io trees to avoid deadlocks when flushing delalloc (requires locking
3570
- * pages) and when waiting for ordered extents to complete (they require
3571
- * range locking).
3572
- */
3573
- if (ret == -EAGAIN) {
3574
- /*
3575
- * Ranges in the io trees already unlocked. Now unlock all
3576
- * pages before waiting for all IO to complete.
3577
- */
3578
- btrfs_cmp_data_free(cmp);
3579
- if (same_inode) {
3580
- btrfs_wait_ordered_range(src, same_lock_start,
3581
- same_lock_len);
3582
- } else {
3583
- btrfs_wait_ordered_range(src, loff, len);
3584
- btrfs_wait_ordered_range(dst, dst_loff, len);
3585
- }
3586
- goto again;
3587
- }
3588
- ASSERT(ret == 0);
3589
- if (WARN_ON(ret)) {
3590
- /* ranges in the io trees already unlocked */
3591
- btrfs_cmp_data_free(cmp);
3592
- return ret;
3593
- }
3594
-
3595
- /* pass original length for comparison so we stay within i_size */
3596
- ret = btrfs_cmp_data(olen, cmp);
3597
- if (ret == 0)
3598
- ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1);
3599
-
3600
- if (same_inode)
3601
- unlock_extent(&BTRFS_I(src)->io_tree, same_lock_start,
3602
- same_lock_start + same_lock_len - 1);
3603
- else
3604
- btrfs_double_extent_unlock(src, loff, dst, dst_loff, len);
3605
-
3606
- btrfs_cmp_data_free(cmp);
3607
-
3608
- return ret;
3609
-}
3610
-
3611
-#define BTRFS_MAX_DEDUPE_LEN SZ_16M
3612
-
3613
-static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
3614
- struct inode *dst, u64 dst_loff)
3615
-{
3616
- int ret;
3617
- struct cmp_pages cmp;
3618
- int num_pages = PAGE_ALIGN(BTRFS_MAX_DEDUPE_LEN) >> PAGE_SHIFT;
3619
- bool same_inode = (src == dst);
3620
- u64 i, tail_len, chunk_count;
3621
-
3622
- if (olen == 0)
3623
- return 0;
3624
-
3625
- if (same_inode)
3626
- inode_lock(src);
3627
- else
3628
- btrfs_double_inode_lock(src, dst);
3629
-
3630
- /* don't make the dst file partly checksummed */
3631
- if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
3632
- (BTRFS_I(dst)->flags & BTRFS_INODE_NODATASUM)) {
3633
- ret = -EINVAL;
3634
- goto out_unlock;
3635
- }
3636
-
3637
- tail_len = olen % BTRFS_MAX_DEDUPE_LEN;
3638
- chunk_count = div_u64(olen, BTRFS_MAX_DEDUPE_LEN);
3639
- if (chunk_count == 0)
3640
- num_pages = PAGE_ALIGN(tail_len) >> PAGE_SHIFT;
3641
-
3642
- /*
3643
- * If deduping ranges in the same inode, locking rules make it
3644
- * mandatory to always lock pages in ascending order to avoid deadlocks
3645
- * with concurrent tasks (such as starting writeback/delalloc).
3646
- */
3647
- if (same_inode && dst_loff < loff)
3648
- swap(loff, dst_loff);
3649
-
3650
- /*
3651
- * We must gather up all the pages before we initiate our extent
3652
- * locking. We use an array for the page pointers. Size of the array is
3653
- * bounded by len, which is in turn bounded by BTRFS_MAX_DEDUPE_LEN.
3654
- */
3655
- cmp.src_pages = kvmalloc_array(num_pages, sizeof(struct page *),
3656
- GFP_KERNEL | __GFP_ZERO);
3657
- cmp.dst_pages = kvmalloc_array(num_pages, sizeof(struct page *),
3658
- GFP_KERNEL | __GFP_ZERO);
3659
- if (!cmp.src_pages || !cmp.dst_pages) {
3660
- ret = -ENOMEM;
3661
- goto out_free;
3662
- }
3663
-
3664
- for (i = 0; i < chunk_count; i++) {
3665
- ret = btrfs_extent_same_range(src, loff, BTRFS_MAX_DEDUPE_LEN,
3666
- dst, dst_loff, &cmp);
3667
- if (ret)
3668
- goto out_free;
3669
-
3670
- loff += BTRFS_MAX_DEDUPE_LEN;
3671
- dst_loff += BTRFS_MAX_DEDUPE_LEN;
3672
- }
3673
-
3674
- if (tail_len > 0)
3675
- ret = btrfs_extent_same_range(src, loff, tail_len, dst,
3676
- dst_loff, &cmp);
3677
-
3678
-out_free:
3679
- kvfree(cmp.src_pages);
3680
- kvfree(cmp.dst_pages);
3681
-
3682
-out_unlock:
3683
- if (same_inode)
3684
- inode_unlock(src);
3685
- else
3686
- btrfs_double_inode_unlock(src, dst);
3687
-
3688
- return ret;
3689
-}
3690
-
3691
-int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff,
3692
- struct file *dst_file, loff_t dst_loff,
3693
- u64 olen)
3694
-{
3695
- struct inode *src = file_inode(src_file);
3696
- struct inode *dst = file_inode(dst_file);
3697
- u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
3698
-
3699
- if (WARN_ON_ONCE(bs < PAGE_SIZE)) {
3700
- /*
3701
- * Btrfs does not support blocksize < page_size. As a
3702
- * result, btrfs_cmp_data() won't correctly handle
3703
- * this situation without an update.
3704
- */
3705
- return -EINVAL;
3706
- }
3707
-
3708
- return btrfs_extent_same(src, src_loff, olen, dst, dst_loff);
3709
-}
3710
-
3711
-static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
3712
- struct inode *inode,
3713
- u64 endoff,
3714
- const u64 destoff,
3715
- const u64 olen,
3716
- int no_time_update)
3717
-{
3718
- struct btrfs_root *root = BTRFS_I(inode)->root;
3719
- int ret;
3720
-
3721
- inode_inc_iversion(inode);
3722
- if (!no_time_update)
3723
- inode->i_mtime = inode->i_ctime = current_time(inode);
3724
- /*
3725
- * We round up to the block size at eof when determining which
3726
- * extents to clone above, but shouldn't round up the file size.
3727
- */
3728
- if (endoff > destoff + olen)
3729
- endoff = destoff + olen;
3730
- if (endoff > inode->i_size)
3731
- btrfs_i_size_write(BTRFS_I(inode), endoff);
3732
-
3733
- ret = btrfs_update_inode(trans, root, inode);
3734
- if (ret) {
3735
- btrfs_abort_transaction(trans, ret);
3736
- btrfs_end_transaction(trans);
3737
- goto out;
3738
- }
3739
- ret = btrfs_end_transaction(trans);
3740
-out:
3741
- return ret;
3742
-}
3743
-
3744
-static void clone_update_extent_map(struct btrfs_inode *inode,
3745
- const struct btrfs_trans_handle *trans,
3746
- const struct btrfs_path *path,
3747
- const u64 hole_offset,
3748
- const u64 hole_len)
3749
-{
3750
- struct extent_map_tree *em_tree = &inode->extent_tree;
3751
- struct extent_map *em;
3752
- int ret;
3753
-
3754
- em = alloc_extent_map();
3755
- if (!em) {
3756
- set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
3757
- return;
3758
- }
3759
-
3760
- if (path) {
3761
- struct btrfs_file_extent_item *fi;
3762
-
3763
- fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
3764
- struct btrfs_file_extent_item);
3765
- btrfs_extent_item_to_extent_map(inode, path, fi, false, em);
3766
- em->generation = -1;
3767
- if (btrfs_file_extent_type(path->nodes[0], fi) ==
3768
- BTRFS_FILE_EXTENT_INLINE)
3769
- set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
3770
- &inode->runtime_flags);
3771
- } else {
3772
- em->start = hole_offset;
3773
- em->len = hole_len;
3774
- em->ram_bytes = em->len;
3775
- em->orig_start = hole_offset;
3776
- em->block_start = EXTENT_MAP_HOLE;
3777
- em->block_len = 0;
3778
- em->orig_block_len = 0;
3779
- em->compress_type = BTRFS_COMPRESS_NONE;
3780
- em->generation = trans->transid;
3781
- }
3782
-
3783
- while (1) {
3784
- write_lock(&em_tree->lock);
3785
- ret = add_extent_mapping(em_tree, em, 1);
3786
- write_unlock(&em_tree->lock);
3787
- if (ret != -EEXIST) {
3788
- free_extent_map(em);
3789
- break;
3790
- }
3791
- btrfs_drop_extent_cache(inode, em->start,
3792
- em->start + em->len - 1, 0);
3793
- }
3794
-
3795
- if (ret)
3796
- set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
3797
-}
3798
-
3799
-/*
3800
- * Make sure we do not end up inserting an inline extent into a file that has
3801
- * already other (non-inline) extents. If a file has an inline extent it can
3802
- * not have any other extents and the (single) inline extent must start at the
3803
- * file offset 0. Failing to respect these rules will lead to file corruption,
3804
- * resulting in EIO errors on read/write operations, hitting BUG_ON's in mm, etc
3805
- *
3806
- * We can have extents that have been already written to disk or we can have
3807
- * dirty ranges still in delalloc, in which case the extent maps and items are
3808
- * created only when we run delalloc, and the delalloc ranges might fall outside
3809
- * the range we are currently locking in the inode's io tree. So we check the
3810
- * inode's i_size because of that (i_size updates are done while holding the
3811
- * i_mutex, which we are holding here).
3812
- * We also check to see if the inode has a size not greater than "datal" but has
3813
- * extents beyond it, due to an fallocate with FALLOC_FL_KEEP_SIZE (and we are
3814
- * protected against such concurrent fallocate calls by the i_mutex).
3815
- *
3816
- * If the file has no extents but a size greater than datal, do not allow the
3817
- * copy because we would need turn the inline extent into a non-inline one (even
3818
- * with NO_HOLES enabled). If we find our destination inode only has one inline
3819
- * extent, just overwrite it with the source inline extent if its size is less
3820
- * than the source extent's size, or we could copy the source inline extent's
3821
- * data into the destination inode's inline extent if the later is greater then
3822
- * the former.
3823
- */
3824
-static int clone_copy_inline_extent(struct inode *dst,
3825
- struct btrfs_trans_handle *trans,
3826
- struct btrfs_path *path,
3827
- struct btrfs_key *new_key,
3828
- const u64 drop_start,
3829
- const u64 datal,
3830
- const u64 skip,
3831
- const u64 size,
3832
- char *inline_data)
3833
-{
3834
- struct btrfs_fs_info *fs_info = btrfs_sb(dst->i_sb);
3835
- struct btrfs_root *root = BTRFS_I(dst)->root;
3836
- const u64 aligned_end = ALIGN(new_key->offset + datal,
3837
- fs_info->sectorsize);
3838
- int ret;
3839
- struct btrfs_key key;
3840
-
3841
- if (new_key->offset > 0)
3842
- return -EOPNOTSUPP;
3843
-
3844
- key.objectid = btrfs_ino(BTRFS_I(dst));
3845
- key.type = BTRFS_EXTENT_DATA_KEY;
3846
- key.offset = 0;
3847
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3848
- if (ret < 0) {
3849
- return ret;
3850
- } else if (ret > 0) {
3851
- if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
3852
- ret = btrfs_next_leaf(root, path);
3853
- if (ret < 0)
3854
- return ret;
3855
- else if (ret > 0)
3856
- goto copy_inline_extent;
3857
- }
3858
- btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
3859
- if (key.objectid == btrfs_ino(BTRFS_I(dst)) &&
3860
- key.type == BTRFS_EXTENT_DATA_KEY) {
3861
- ASSERT(key.offset > 0);
3862
- return -EOPNOTSUPP;
3863
- }
3864
- } else if (i_size_read(dst) <= datal) {
3865
- struct btrfs_file_extent_item *ei;
3866
- u64 ext_len;
3867
-
3868
- /*
3869
- * If the file size is <= datal, make sure there are no other
3870
- * extents following (can happen do to an fallocate call with
3871
- * the flag FALLOC_FL_KEEP_SIZE).
3872
- */
3873
- ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
3874
- struct btrfs_file_extent_item);
3875
- /*
3876
- * If it's an inline extent, it can not have other extents
3877
- * following it.
3878
- */
3879
- if (btrfs_file_extent_type(path->nodes[0], ei) ==
3880
- BTRFS_FILE_EXTENT_INLINE)
3881
- goto copy_inline_extent;
3882
-
3883
- ext_len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
3884
- if (ext_len > aligned_end)
3885
- return -EOPNOTSUPP;
3886
-
3887
- ret = btrfs_next_item(root, path);
3888
- if (ret < 0) {
3889
- return ret;
3890
- } else if (ret == 0) {
3891
- btrfs_item_key_to_cpu(path->nodes[0], &key,
3892
- path->slots[0]);
3893
- if (key.objectid == btrfs_ino(BTRFS_I(dst)) &&
3894
- key.type == BTRFS_EXTENT_DATA_KEY)
3895
- return -EOPNOTSUPP;
3896
- }
3897
- }
3898
-
3899
-copy_inline_extent:
3900
- /*
3901
- * We have no extent items, or we have an extent at offset 0 which may
3902
- * or may not be inlined. All these cases are dealt the same way.
3903
- */
3904
- if (i_size_read(dst) > datal) {
3905
- /*
3906
- * If the destination inode has an inline extent...
3907
- * This would require copying the data from the source inline
3908
- * extent into the beginning of the destination's inline extent.
3909
- * But this is really complex, both extents can be compressed
3910
- * or just one of them, which would require decompressing and
3911
- * re-compressing data (which could increase the new compressed
3912
- * size, not allowing the compressed data to fit anymore in an
3913
- * inline extent).
3914
- * So just don't support this case for now (it should be rare,
3915
- * we are not really saving space when cloning inline extents).
3916
- */
3917
- return -EOPNOTSUPP;
3918
- }
3919
-
3920
- btrfs_release_path(path);
3921
- ret = btrfs_drop_extents(trans, root, dst, drop_start, aligned_end, 1);
3922
- if (ret)
3923
- return ret;
3924
- ret = btrfs_insert_empty_item(trans, root, path, new_key, size);
3925
- if (ret)
3926
- return ret;
3927
-
3928
- if (skip) {
3929
- const u32 start = btrfs_file_extent_calc_inline_size(0);
3930
-
3931
- memmove(inline_data + start, inline_data + start + skip, datal);
3932
- }
3933
-
3934
- write_extent_buffer(path->nodes[0], inline_data,
3935
- btrfs_item_ptr_offset(path->nodes[0],
3936
- path->slots[0]),
3937
- size);
3938
- inode_add_bytes(dst, datal);
3939
-
3940
- return 0;
3941
-}
3942
-
3943
-/**
3944
- * btrfs_clone() - clone a range from inode file to another
3945
- *
3946
- * @src: Inode to clone from
3947
- * @inode: Inode to clone to
3948
- * @off: Offset within source to start clone from
3949
- * @olen: Original length, passed by user, of range to clone
3950
- * @olen_aligned: Block-aligned value of olen
3951
- * @destoff: Offset within @inode to start clone
3952
- * @no_time_update: Whether to update mtime/ctime on the target inode
3953
- */
3954
-static int btrfs_clone(struct inode *src, struct inode *inode,
3955
- const u64 off, const u64 olen, const u64 olen_aligned,
3956
- const u64 destoff, int no_time_update)
3957
-{
3958
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3959
- struct btrfs_root *root = BTRFS_I(inode)->root;
3960
- struct btrfs_path *path = NULL;
3961
- struct extent_buffer *leaf;
3962
- struct btrfs_trans_handle *trans;
3963
- char *buf = NULL;
3964
- struct btrfs_key key;
3965
- u32 nritems;
3966
- int slot;
3967
- int ret;
3968
- const u64 len = olen_aligned;
3969
- u64 last_dest_end = destoff;
3970
-
3971
- ret = -ENOMEM;
3972
- buf = kvmalloc(fs_info->nodesize, GFP_KERNEL);
3973
- if (!buf)
3974
- return ret;
3975
-
3976
- path = btrfs_alloc_path();
3977
- if (!path) {
3978
- kvfree(buf);
3979
- return ret;
3980
- }
3981
-
3982
- path->reada = READA_FORWARD;
3983
- /* clone data */
3984
- key.objectid = btrfs_ino(BTRFS_I(src));
3985
- key.type = BTRFS_EXTENT_DATA_KEY;
3986
- key.offset = off;
3987
-
3988
- while (1) {
3989
- u64 next_key_min_offset = key.offset + 1;
3990
-
3991
- /*
3992
- * note the key will change type as we walk through the
3993
- * tree.
3994
- */
3995
- path->leave_spinning = 1;
3996
- ret = btrfs_search_slot(NULL, BTRFS_I(src)->root, &key, path,
3997
- 0, 0);
3998
- if (ret < 0)
3999
- goto out;
4000
- /*
4001
- * First search, if no extent item that starts at offset off was
4002
- * found but the previous item is an extent item, it's possible
4003
- * it might overlap our target range, therefore process it.
4004
- */
4005
- if (key.offset == off && ret > 0 && path->slots[0] > 0) {
4006
- btrfs_item_key_to_cpu(path->nodes[0], &key,
4007
- path->slots[0] - 1);
4008
- if (key.type == BTRFS_EXTENT_DATA_KEY)
4009
- path->slots[0]--;
4010
- }
4011
-
4012
- nritems = btrfs_header_nritems(path->nodes[0]);
4013
-process_slot:
4014
- if (path->slots[0] >= nritems) {
4015
- ret = btrfs_next_leaf(BTRFS_I(src)->root, path);
4016
- if (ret < 0)
4017
- goto out;
4018
- if (ret > 0)
4019
- break;
4020
- nritems = btrfs_header_nritems(path->nodes[0]);
4021
- }
4022
- leaf = path->nodes[0];
4023
- slot = path->slots[0];
4024
-
4025
- btrfs_item_key_to_cpu(leaf, &key, slot);
4026
- if (key.type > BTRFS_EXTENT_DATA_KEY ||
4027
- key.objectid != btrfs_ino(BTRFS_I(src)))
4028
- break;
4029
-
4030
- if (key.type == BTRFS_EXTENT_DATA_KEY) {
4031
- struct btrfs_file_extent_item *extent;
4032
- int type;
4033
- u32 size;
4034
- struct btrfs_key new_key;
4035
- u64 disko = 0, diskl = 0;
4036
- u64 datao = 0, datal = 0;
4037
- u8 comp;
4038
- u64 drop_start;
4039
-
4040
- extent = btrfs_item_ptr(leaf, slot,
4041
- struct btrfs_file_extent_item);
4042
- comp = btrfs_file_extent_compression(leaf, extent);
4043
- type = btrfs_file_extent_type(leaf, extent);
4044
- if (type == BTRFS_FILE_EXTENT_REG ||
4045
- type == BTRFS_FILE_EXTENT_PREALLOC) {
4046
- disko = btrfs_file_extent_disk_bytenr(leaf,
4047
- extent);
4048
- diskl = btrfs_file_extent_disk_num_bytes(leaf,
4049
- extent);
4050
- datao = btrfs_file_extent_offset(leaf, extent);
4051
- datal = btrfs_file_extent_num_bytes(leaf,
4052
- extent);
4053
- } else if (type == BTRFS_FILE_EXTENT_INLINE) {
4054
- /* take upper bound, may be compressed */
4055
- datal = btrfs_file_extent_ram_bytes(leaf,
4056
- extent);
4057
- }
4058
-
4059
- /*
4060
- * The first search might have left us at an extent
4061
- * item that ends before our target range's start, can
4062
- * happen if we have holes and NO_HOLES feature enabled.
4063
- */
4064
- if (key.offset + datal <= off) {
4065
- path->slots[0]++;
4066
- goto process_slot;
4067
- } else if (key.offset >= off + len) {
4068
- break;
4069
- }
4070
- next_key_min_offset = key.offset + datal;
4071
- size = btrfs_item_size_nr(leaf, slot);
4072
- read_extent_buffer(leaf, buf,
4073
- btrfs_item_ptr_offset(leaf, slot),
4074
- size);
4075
-
4076
- btrfs_release_path(path);
4077
- path->leave_spinning = 0;
4078
-
4079
- memcpy(&new_key, &key, sizeof(new_key));
4080
- new_key.objectid = btrfs_ino(BTRFS_I(inode));
4081
- if (off <= key.offset)
4082
- new_key.offset = key.offset + destoff - off;
4083
- else
4084
- new_key.offset = destoff;
4085
-
4086
- /*
4087
- * Deal with a hole that doesn't have an extent item
4088
- * that represents it (NO_HOLES feature enabled).
4089
- * This hole is either in the middle of the cloning
4090
- * range or at the beginning (fully overlaps it or
4091
- * partially overlaps it).
4092
- */
4093
- if (new_key.offset != last_dest_end)
4094
- drop_start = last_dest_end;
4095
- else
4096
- drop_start = new_key.offset;
4097
-
4098
- /*
4099
- * 1 - adjusting old extent (we may have to split it)
4100
- * 1 - add new extent
4101
- * 1 - inode update
4102
- */
4103
- trans = btrfs_start_transaction(root, 3);
4104
- if (IS_ERR(trans)) {
4105
- ret = PTR_ERR(trans);
4106
- goto out;
4107
- }
4108
-
4109
- if (type == BTRFS_FILE_EXTENT_REG ||
4110
- type == BTRFS_FILE_EXTENT_PREALLOC) {
4111
- /*
4112
- * a | --- range to clone ---| b
4113
- * | ------------- extent ------------- |
4114
- */
4115
-
4116
- /* subtract range b */
4117
- if (key.offset + datal > off + len)
4118
- datal = off + len - key.offset;
4119
-
4120
- /* subtract range a */
4121
- if (off > key.offset) {
4122
- datao += off - key.offset;
4123
- datal -= off - key.offset;
4124
- }
4125
-
4126
- ret = btrfs_drop_extents(trans, root, inode,
4127
- drop_start,
4128
- new_key.offset + datal,
4129
- 1);
4130
- if (ret) {
4131
- if (ret != -EOPNOTSUPP)
4132
- btrfs_abort_transaction(trans,
4133
- ret);
4134
- btrfs_end_transaction(trans);
4135
- goto out;
4136
- }
4137
-
4138
- ret = btrfs_insert_empty_item(trans, root, path,
4139
- &new_key, size);
4140
- if (ret) {
4141
- btrfs_abort_transaction(trans, ret);
4142
- btrfs_end_transaction(trans);
4143
- goto out;
4144
- }
4145
-
4146
- leaf = path->nodes[0];
4147
- slot = path->slots[0];
4148
- write_extent_buffer(leaf, buf,
4149
- btrfs_item_ptr_offset(leaf, slot),
4150
- size);
4151
-
4152
- extent = btrfs_item_ptr(leaf, slot,
4153
- struct btrfs_file_extent_item);
4154
-
4155
- /* disko == 0 means it's a hole */
4156
- if (!disko)
4157
- datao = 0;
4158
-
4159
- btrfs_set_file_extent_offset(leaf, extent,
4160
- datao);
4161
- btrfs_set_file_extent_num_bytes(leaf, extent,
4162
- datal);
4163
-
4164
- if (disko) {
4165
- inode_add_bytes(inode, datal);
4166
- ret = btrfs_inc_extent_ref(trans,
4167
- root,
4168
- disko, diskl, 0,
4169
- root->root_key.objectid,
4170
- btrfs_ino(BTRFS_I(inode)),
4171
- new_key.offset - datao);
4172
- if (ret) {
4173
- btrfs_abort_transaction(trans,
4174
- ret);
4175
- btrfs_end_transaction(trans);
4176
- goto out;
4177
-
4178
- }
4179
- }
4180
- } else if (type == BTRFS_FILE_EXTENT_INLINE) {
4181
- u64 skip = 0;
4182
- u64 trim = 0;
4183
-
4184
- if (off > key.offset) {
4185
- skip = off - key.offset;
4186
- new_key.offset += skip;
4187
- }
4188
-
4189
- if (key.offset + datal > off + len)
4190
- trim = key.offset + datal - (off + len);
4191
-
4192
- if (comp && (skip || trim)) {
4193
- ret = -EINVAL;
4194
- btrfs_end_transaction(trans);
4195
- goto out;
4196
- }
4197
- size -= skip + trim;
4198
- datal -= skip + trim;
4199
-
4200
- ret = clone_copy_inline_extent(inode,
4201
- trans, path,
4202
- &new_key,
4203
- drop_start,
4204
- datal,
4205
- skip, size, buf);
4206
- if (ret) {
4207
- if (ret != -EOPNOTSUPP)
4208
- btrfs_abort_transaction(trans,
4209
- ret);
4210
- btrfs_end_transaction(trans);
4211
- goto out;
4212
- }
4213
- leaf = path->nodes[0];
4214
- slot = path->slots[0];
4215
- }
4216
-
4217
- /* If we have an implicit hole (NO_HOLES feature). */
4218
- if (drop_start < new_key.offset)
4219
- clone_update_extent_map(BTRFS_I(inode), trans,
4220
- NULL, drop_start,
4221
- new_key.offset - drop_start);
4222
-
4223
- clone_update_extent_map(BTRFS_I(inode), trans,
4224
- path, 0, 0);
4225
-
4226
- btrfs_mark_buffer_dirty(leaf);
4227
- btrfs_release_path(path);
4228
-
4229
- last_dest_end = ALIGN(new_key.offset + datal,
4230
- fs_info->sectorsize);
4231
- ret = clone_finish_inode_update(trans, inode,
4232
- last_dest_end,
4233
- destoff, olen,
4234
- no_time_update);
4235
- if (ret)
4236
- goto out;
4237
- if (new_key.offset + datal >= destoff + len)
4238
- break;
4239
- }
4240
- btrfs_release_path(path);
4241
- key.offset = next_key_min_offset;
4242
-
4243
- if (fatal_signal_pending(current)) {
4244
- ret = -EINTR;
4245
- goto out;
4246
- }
4247
-
4248
- cond_resched();
4249
- }
4250
- ret = 0;
4251
-
4252
- if (last_dest_end < destoff + len) {
4253
- /*
4254
- * We have an implicit hole (NO_HOLES feature is enabled) that
4255
- * fully or partially overlaps our cloning range at its end.
4256
- */
4257
- btrfs_release_path(path);
4258
-
4259
- /*
4260
- * 1 - remove extent(s)
4261
- * 1 - inode update
4262
- */
4263
- trans = btrfs_start_transaction(root, 2);
4264
- if (IS_ERR(trans)) {
4265
- ret = PTR_ERR(trans);
4266
- goto out;
4267
- }
4268
- ret = btrfs_drop_extents(trans, root, inode,
4269
- last_dest_end, destoff + len, 1);
4270
- if (ret) {
4271
- if (ret != -EOPNOTSUPP)
4272
- btrfs_abort_transaction(trans, ret);
4273
- btrfs_end_transaction(trans);
4274
- goto out;
4275
- }
4276
- clone_update_extent_map(BTRFS_I(inode), trans, NULL,
4277
- last_dest_end,
4278
- destoff + len - last_dest_end);
4279
- ret = clone_finish_inode_update(trans, inode, destoff + len,
4280
- destoff, olen, no_time_update);
4281
- }
4282
-
4283
-out:
4284
- btrfs_free_path(path);
4285
- kvfree(buf);
4286
- return ret;
4287
-}
4288
-
4289
-static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
4290
- u64 off, u64 olen, u64 destoff)
4291
-{
4292
- struct inode *inode = file_inode(file);
4293
- struct inode *src = file_inode(file_src);
4294
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
4295
- struct btrfs_root *root = BTRFS_I(inode)->root;
4296
- int ret;
4297
- u64 len = olen;
4298
- u64 bs = fs_info->sb->s_blocksize;
4299
- int same_inode = src == inode;
4300
-
4301
- /*
4302
- * TODO:
4303
- * - split compressed inline extents. annoying: we need to
4304
- * decompress into destination's address_space (the file offset
4305
- * may change, so source mapping won't do), then recompress (or
4306
- * otherwise reinsert) a subrange.
4307
- *
4308
- * - split destination inode's inline extents. The inline extents can
4309
- * be either compressed or non-compressed.
4310
- */
4311
-
4312
- if (btrfs_root_readonly(root))
4313
- return -EROFS;
4314
-
4315
- if (file_src->f_path.mnt != file->f_path.mnt ||
4316
- src->i_sb != inode->i_sb)
4317
- return -EXDEV;
4318
-
4319
- if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
4320
- return -EISDIR;
4321
-
4322
- if (!same_inode) {
4323
- btrfs_double_inode_lock(src, inode);
4324
- } else {
4325
- inode_lock(src);
4326
- }
4327
-
4328
- /* don't make the dst file partly checksummed */
4329
- if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
4330
- (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
4331
- ret = -EINVAL;
4332
- goto out_unlock;
4333
- }
4334
-
4335
- /* determine range to clone */
4336
- ret = -EINVAL;
4337
- if (off + len > src->i_size || off + len < off)
4338
- goto out_unlock;
4339
- if (len == 0)
4340
- olen = len = src->i_size - off;
4341
- /*
4342
- * If we extend to eof, continue to block boundary if and only if the
4343
- * destination end offset matches the destination file's size, otherwise
4344
- * we would be corrupting data by placing the eof block into the middle
4345
- * of a file.
4346
- */
4347
- if (off + len == src->i_size) {
4348
- if (!IS_ALIGNED(len, bs) && destoff + len < inode->i_size)
4349
- goto out_unlock;
4350
- len = ALIGN(src->i_size, bs) - off;
4351
- }
4352
-
4353
- if (len == 0) {
4354
- ret = 0;
4355
- goto out_unlock;
4356
- }
4357
-
4358
- /* verify the end result is block aligned */
4359
- if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) ||
4360
- !IS_ALIGNED(destoff, bs))
4361
- goto out_unlock;
4362
-
4363
- /* verify if ranges are overlapped within the same file */
4364
- if (same_inode) {
4365
- if (destoff + len > off && destoff < off + len)
4366
- goto out_unlock;
4367
- }
4368
-
4369
- if (destoff > inode->i_size) {
4370
- ret = btrfs_cont_expand(inode, inode->i_size, destoff);
4371
- if (ret)
4372
- goto out_unlock;
4373
- }
4374
-
4375
- /*
4376
- * Lock the target range too. Right after we replace the file extent
4377
- * items in the fs tree (which now point to the cloned data), we might
4378
- * have a worker replace them with extent items relative to a write
4379
- * operation that was issued before this clone operation (i.e. confront
4380
- * with inode.c:btrfs_finish_ordered_io).
4381
- */
4382
- if (same_inode) {
4383
- u64 lock_start = min_t(u64, off, destoff);
4384
- u64 lock_len = max_t(u64, off, destoff) + len - lock_start;
4385
-
4386
- ret = lock_extent_range(src, lock_start, lock_len, true);
4387
- } else {
4388
- ret = btrfs_double_extent_lock(src, off, inode, destoff, len,
4389
- true);
4390
- }
4391
- ASSERT(ret == 0);
4392
- if (WARN_ON(ret)) {
4393
- /* ranges in the io trees already unlocked */
4394
- goto out_unlock;
4395
- }
4396
-
4397
- ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);
4398
-
4399
- if (same_inode) {
4400
- u64 lock_start = min_t(u64, off, destoff);
4401
- u64 lock_end = max_t(u64, off, destoff) + len - 1;
4402
-
4403
- unlock_extent(&BTRFS_I(src)->io_tree, lock_start, lock_end);
4404
- } else {
4405
- btrfs_double_extent_unlock(src, off, inode, destoff, len);
4406
- }
4407
- /*
4408
- * Truncate page cache pages so that future reads will see the cloned
4409
- * data immediately and not the previous data.
4410
- */
4411
- truncate_inode_pages_range(&inode->i_data,
4412
- round_down(destoff, PAGE_SIZE),
4413
- round_up(destoff + len, PAGE_SIZE) - 1);
4414
-out_unlock:
4415
- if (!same_inode)
4416
- btrfs_double_inode_unlock(src, inode);
4417
- else
4418
- inode_unlock(src);
4419
- return ret;
4420
-}
4421
-
4422
-int btrfs_clone_file_range(struct file *src_file, loff_t off,
4423
- struct file *dst_file, loff_t destoff, u64 len)
4424
-{
4425
- return btrfs_clone_files(dst_file, src_file, off, len, destoff);
4426
-}
4427
-
44283424 static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
44293425 {
44303426 struct inode *inode = file_inode(file);
....@@ -4433,8 +3429,7 @@
44333429 struct btrfs_root *new_root;
44343430 struct btrfs_dir_item *di;
44353431 struct btrfs_trans_handle *trans;
4436
- struct btrfs_path *path;
4437
- struct btrfs_key location;
3432
+ struct btrfs_path *path = NULL;
44383433 struct btrfs_disk_key disk_key;
44393434 u64 objectid = 0;
44403435 u64 dir_id;
....@@ -4455,53 +3450,51 @@
44553450 if (!objectid)
44563451 objectid = BTRFS_FS_TREE_OBJECTID;
44573452
4458
- location.objectid = objectid;
4459
- location.type = BTRFS_ROOT_ITEM_KEY;
4460
- location.offset = (u64)-1;
4461
-
4462
- new_root = btrfs_read_fs_root_no_name(fs_info, &location);
3453
+ new_root = btrfs_get_fs_root(fs_info, objectid, true);
44633454 if (IS_ERR(new_root)) {
44643455 ret = PTR_ERR(new_root);
44653456 goto out;
44663457 }
4467
- if (!is_fstree(new_root->objectid)) {
3458
+ if (!is_fstree(new_root->root_key.objectid)) {
44683459 ret = -ENOENT;
4469
- goto out;
3460
+ goto out_free;
44703461 }
44713462
44723463 path = btrfs_alloc_path();
44733464 if (!path) {
44743465 ret = -ENOMEM;
4475
- goto out;
3466
+ goto out_free;
44763467 }
44773468 path->leave_spinning = 1;
44783469
44793470 trans = btrfs_start_transaction(root, 1);
44803471 if (IS_ERR(trans)) {
4481
- btrfs_free_path(path);
44823472 ret = PTR_ERR(trans);
4483
- goto out;
3473
+ goto out_free;
44843474 }
44853475
44863476 dir_id = btrfs_super_root_dir(fs_info->super_copy);
44873477 di = btrfs_lookup_dir_item(trans, fs_info->tree_root, path,
44883478 dir_id, "default", 7, 1);
44893479 if (IS_ERR_OR_NULL(di)) {
4490
- btrfs_free_path(path);
3480
+ btrfs_release_path(path);
44913481 btrfs_end_transaction(trans);
44923482 btrfs_err(fs_info,
44933483 "Umm, you don't have the default diritem, this isn't going to work");
44943484 ret = -ENOENT;
4495
- goto out;
3485
+ goto out_free;
44963486 }
44973487
44983488 btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key);
44993489 btrfs_set_dir_item_key(path->nodes[0], di, &disk_key);
45003490 btrfs_mark_buffer_dirty(path->nodes[0]);
4501
- btrfs_free_path(path);
3491
+ btrfs_release_path(path);
45023492
45033493 btrfs_set_fs_incompat(fs_info, DEFAULT_SUBVOL);
45043494 btrfs_end_transaction(trans);
3495
+out_free:
3496
+ btrfs_put_root(new_root);
3497
+ btrfs_free_path(path);
45053498 out:
45063499 mnt_drop_write_file(file);
45073500 return ret;
....@@ -4510,16 +3503,15 @@
45103503 static void get_block_group_info(struct list_head *groups_list,
45113504 struct btrfs_ioctl_space_info *space)
45123505 {
4513
- struct btrfs_block_group_cache *block_group;
3506
+ struct btrfs_block_group *block_group;
45143507
45153508 space->total_bytes = 0;
45163509 space->used_bytes = 0;
45173510 space->flags = 0;
45183511 list_for_each_entry(block_group, groups_list, list) {
45193512 space->flags = block_group->flags;
4520
- space->total_bytes += block_group->key.offset;
4521
- space->used_bytes +=
4522
- btrfs_block_group_used(&block_group->item);
3513
+ space->total_bytes += block_group->length;
3514
+ space->used_bytes += block_group->used;
45233515 }
45243516 }
45253517
....@@ -4553,15 +3545,12 @@
45533545 struct btrfs_space_info *tmp;
45543546
45553547 info = NULL;
4556
- rcu_read_lock();
4557
- list_for_each_entry_rcu(tmp, &fs_info->space_info,
4558
- list) {
3548
+ list_for_each_entry(tmp, &fs_info->space_info, list) {
45593549 if (tmp->flags == types[i]) {
45603550 info = tmp;
45613551 break;
45623552 }
45633553 }
4564
- rcu_read_unlock();
45653554
45663555 if (!info)
45673556 continue;
....@@ -4609,15 +3598,12 @@
46093598 break;
46103599
46113600 info = NULL;
4612
- rcu_read_lock();
4613
- list_for_each_entry_rcu(tmp, &fs_info->space_info,
4614
- list) {
3601
+ list_for_each_entry(tmp, &fs_info->space_info, list) {
46153602 if (tmp->flags == types[i]) {
46163603 info = tmp;
46173604 break;
46183605 }
46193606 }
4620
- rcu_read_unlock();
46213607
46223608 if (!info)
46233609 continue;
....@@ -4722,6 +3708,11 @@
47223708 if (IS_ERR(sa))
47233709 return PTR_ERR(sa);
47243710
3711
+ if (sa->flags & ~BTRFS_SCRUB_SUPPORTED_FLAGS) {
3712
+ ret = -EOPNOTSUPP;
3713
+ goto out;
3714
+ }
3715
+
47253716 if (!(sa->flags & BTRFS_SCRUB_READONLY)) {
47263717 ret = mnt_want_write_file(file);
47273718 if (ret)
....@@ -4732,6 +3723,18 @@
47323723 &sa->progress, sa->flags & BTRFS_SCRUB_READONLY,
47333724 0);
47343725
3726
+ /*
3727
+ * Copy scrub args to user space even if btrfs_scrub_dev() returned an
3728
+ * error. This is important as it allows user space to know how much
3729
+ * progress scrub has done. For example, if scrub is canceled we get
3730
+ * -ECANCELED from btrfs_scrub_dev() and return that error back to user
3731
+ * space. Later user space can inspect the progress from the structure
3732
+ * btrfs_ioctl_scrub_args and resume scrub from where it left off
3733
+ * previously (btrfs-progs does this).
3734
+ * If we fail to copy the btrfs_ioctl_scrub_args structure to user space
3735
+ * then return -EFAULT to signal the structure was not copied or it may
3736
+ * be corrupt and unreliable due to a partial copy.
3737
+ */
47353738 if (copy_to_user(arg, sa, sizeof(*sa)))
47363739 ret = -EFAULT;
47373740
....@@ -4765,7 +3768,7 @@
47653768
47663769 ret = btrfs_scrub_progress(fs_info, sa->devid, &sa->progress);
47673770
4768
- if (copy_to_user(arg, sa, sizeof(*sa)))
3771
+ if (ret == 0 && copy_to_user(arg, sa, sizeof(*sa)))
47693772 ret = -EFAULT;
47703773
47713774 kfree(sa);
....@@ -4789,7 +3792,7 @@
47893792
47903793 ret = btrfs_get_dev_stats(fs_info, sa);
47913794
4792
- if (copy_to_user(arg, sa, sizeof(*sa)))
3795
+ if (ret == 0 && copy_to_user(arg, sa, sizeof(*sa)))
47933796 ret = -EFAULT;
47943797
47953798 kfree(sa);
....@@ -4815,11 +3818,11 @@
48153818 ret = -EROFS;
48163819 goto out;
48173820 }
4818
- if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
3821
+ if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_DEV_REPLACE)) {
48193822 ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
48203823 } else {
48213824 ret = btrfs_dev_replace_by_ioctl(fs_info, p);
4822
- clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
3825
+ btrfs_exclop_finish(fs_info);
48233826 }
48243827 break;
48253828 case BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS:
....@@ -4835,7 +3838,7 @@
48353838 break;
48363839 }
48373840
4838
- if (copy_to_user(arg, p, sizeof(*p)))
3841
+ if ((ret == 0 || ret == -ECANCELED) && copy_to_user(arg, p, sizeof(*p)))
48393842 ret = -EFAULT;
48403843 out:
48413844 kfree(p);
....@@ -4886,6 +3889,8 @@
48863889 ipath->fspath->val[i] = rel_ptr;
48873890 }
48883891
3892
+ btrfs_free_path(path);
3893
+ path = NULL;
48893894 ret = copy_to_user((void __user *)(unsigned long)ipa->fspath,
48903895 ipath->fspath, size);
48913896 if (ret) {
....@@ -4899,26 +3904,6 @@
48993904 kfree(ipa);
49003905
49013906 return ret;
4902
-}
4903
-
4904
-static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx)
4905
-{
4906
- struct btrfs_data_container *inodes = ctx;
4907
- const size_t c = 3 * sizeof(u64);
4908
-
4909
- if (inodes->bytes_left >= c) {
4910
- inodes->bytes_left -= c;
4911
- inodes->val[inodes->elem_cnt] = inum;
4912
- inodes->val[inodes->elem_cnt + 1] = offset;
4913
- inodes->val[inodes->elem_cnt + 2] = root;
4914
- inodes->elem_cnt += 3;
4915
- } else {
4916
- inodes->bytes_missing += c - inodes->bytes_left;
4917
- inodes->bytes_left = 0;
4918
- inodes->elem_missed += 3;
4919
- }
4920
-
4921
- return 0;
49223907 }
49233908
49243909 static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
....@@ -4956,21 +3941,20 @@
49563941 size = min_t(u32, loi->size, SZ_16M);
49573942 }
49583943
3944
+ inodes = init_data_container(size);
3945
+ if (IS_ERR(inodes)) {
3946
+ ret = PTR_ERR(inodes);
3947
+ goto out_loi;
3948
+ }
3949
+
49593950 path = btrfs_alloc_path();
49603951 if (!path) {
49613952 ret = -ENOMEM;
49623953 goto out;
49633954 }
4964
-
4965
- inodes = init_data_container(size);
4966
- if (IS_ERR(inodes)) {
4967
- ret = PTR_ERR(inodes);
4968
- inodes = NULL;
4969
- goto out;
4970
- }
4971
-
49723955 ret = iterate_inodes_from_logical(loi->logical, fs_info, path,
4973
- build_ino_list, inodes, ignore_offset);
3956
+ inodes, ignore_offset);
3957
+ btrfs_free_path(path);
49743958 if (ret == -EINVAL)
49753959 ret = -ENOENT;
49763960 if (ret < 0)
....@@ -4982,7 +3966,6 @@
49823966 ret = -EFAULT;
49833967
49843968 out:
4985
- btrfs_free_path(path);
49863969 kvfree(inodes);
49873970 out_loi:
49883971 kfree(loi);
....@@ -5030,7 +4013,7 @@
50304013 return ret;
50314014
50324015 again:
5033
- if (!test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
4016
+ if (btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
50344017 mutex_lock(&fs_info->balance_mutex);
50354018 need_unlock = true;
50364019 goto locked;
....@@ -5076,7 +4059,6 @@
50764059 }
50774060
50784061 locked:
5079
- BUG_ON(!test_bit(BTRFS_FS_EXCL_OP, &fs_info->flags));
50804062
50814063 if (arg) {
50824064 bargs = memdup_user(arg, sizeof(*bargs));
....@@ -5131,17 +4113,17 @@
51314113
51324114 do_balance:
51334115 /*
5134
- * Ownership of bctl and filesystem flag BTRFS_FS_EXCL_OP goes to
5135
- * btrfs_balance. bctl is freed in reset_balance_state, or, if
5136
- * restriper was paused all the way until unmount, in free_fs_info.
5137
- * The flag should be cleared after reset_balance_state.
4116
+ * Ownership of bctl and exclusive operation goes to btrfs_balance.
4117
+ * bctl is freed in reset_balance_state, or, if restriper was paused
4118
+ * all the way until unmount, in free_fs_info. The flag should be
4119
+ * cleared after reset_balance_state.
51384120 */
51394121 need_unlock = false;
51404122
51414123 ret = btrfs_balance(fs_info, bctl, bargs);
51424124 bctl = NULL;
51434125
5144
- if (arg) {
4126
+ if ((ret == 0 || ret == -ECANCELED) && arg) {
51454127 if (copy_to_user(arg, bargs, sizeof(*bargs)))
51464128 ret = -EFAULT;
51474129 }
....@@ -5153,7 +4135,7 @@
51534135 out_unlock:
51544136 mutex_unlock(&fs_info->balance_mutex);
51554137 if (need_unlock)
5156
- clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
4138
+ btrfs_exclop_finish(fs_info);
51574139 out:
51584140 mnt_drop_write_file(file);
51594141 return ret;
....@@ -5283,7 +4265,9 @@
52834265 }
52844266
52854267 /* update qgroup status and info */
4268
+ mutex_lock(&fs_info->qgroup_ioctl_lock);
52864269 err = btrfs_run_qgroups(trans);
4270
+ mutex_unlock(&fs_info->qgroup_ioctl_lock);
52874271 if (err < 0)
52884272 btrfs_handle_fs_error(fs_info, err,
52894273 "failed to update qgroup status and info");
....@@ -5430,10 +4414,9 @@
54304414 return ret;
54314415 }
54324416
5433
-static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg)
4417
+static long btrfs_ioctl_quota_rescan_status(struct btrfs_fs_info *fs_info,
4418
+ void __user *arg)
54344419 {
5435
- struct inode *inode = file_inode(file);
5436
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
54374420 struct btrfs_ioctl_quota_rescan_args *qsa;
54384421 int ret = 0;
54394422
....@@ -5456,11 +4439,9 @@
54564439 return ret;
54574440 }
54584441
5459
-static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg)
4442
+static long btrfs_ioctl_quota_rescan_wait(struct btrfs_fs_info *fs_info,
4443
+ void __user *arg)
54604444 {
5461
- struct inode *inode = file_inode(file);
5462
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
5463
-
54644445 if (!capable(CAP_SYS_ADMIN))
54654446 return -EPERM;
54664447
....@@ -5632,10 +4613,9 @@
56324613 return ret;
56334614 }
56344615
5635
-static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg)
4616
+static int btrfs_ioctl_get_fslabel(struct btrfs_fs_info *fs_info,
4617
+ void __user *arg)
56364618 {
5637
- struct inode *inode = file_inode(file);
5638
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
56394619 size_t len;
56404620 int ret;
56414621 char label[BTRFS_LABEL_SIZE];
....@@ -5719,10 +4699,9 @@
57194699 return 0;
57204700 }
57214701
5722
-static int btrfs_ioctl_get_features(struct file *file, void __user *arg)
4702
+static int btrfs_ioctl_get_features(struct btrfs_fs_info *fs_info,
4703
+ void __user *arg)
57234704 {
5724
- struct inode *inode = file_inode(file);
5725
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
57264705 struct btrfs_super_block *super_block = fs_info->super_copy;
57274706 struct btrfs_ioctl_feature_flags features;
57284707
....@@ -5741,7 +4720,7 @@
57414720 u64 change_mask, u64 flags, u64 supported_flags,
57424721 u64 safe_set, u64 safe_clear)
57434722 {
5744
- const char *type = btrfs_feature_set_names[set];
4723
+ const char *type = btrfs_feature_set_name(set);
57454724 char *names;
57464725 u64 disallowed, unsupported;
57474726 u64 set_mask = flags & change_mask;
....@@ -5922,8 +4901,12 @@
59224901 return btrfs_ioctl_setflags(file, argp);
59234902 case FS_IOC_GETVERSION:
59244903 return btrfs_ioctl_getversion(file, argp);
4904
+ case FS_IOC_GETFSLABEL:
4905
+ return btrfs_ioctl_get_fslabel(fs_info, argp);
4906
+ case FS_IOC_SETFSLABEL:
4907
+ return btrfs_ioctl_set_fslabel(file, argp);
59254908 case FITRIM:
5926
- return btrfs_ioctl_fitrim(file, argp);
4909
+ return btrfs_ioctl_fitrim(fs_info, argp);
59274910 case BTRFS_IOC_SNAP_CREATE:
59284911 return btrfs_ioctl_snap_create(file, argp, 0);
59294912 case BTRFS_IOC_SNAP_CREATE_V2:
....@@ -5933,7 +4916,9 @@
59334916 case BTRFS_IOC_SUBVOL_CREATE_V2:
59344917 return btrfs_ioctl_snap_create_v2(file, argp, 1);
59354918 case BTRFS_IOC_SNAP_DESTROY:
5936
- return btrfs_ioctl_snap_destroy(file, argp);
4919
+ return btrfs_ioctl_snap_destroy(file, argp, false);
4920
+ case BTRFS_IOC_SNAP_DESTROY_V2:
4921
+ return btrfs_ioctl_snap_destroy(file, argp, true);
59374922 case BTRFS_IOC_SUBVOL_GETFLAGS:
59384923 return btrfs_ioctl_subvol_getflags(file, argp);
59394924 case BTRFS_IOC_SUBVOL_SETFLAGS:
....@@ -5975,7 +4960,7 @@
59754960 case BTRFS_IOC_SYNC: {
59764961 int ret;
59774962
5978
- ret = btrfs_start_delalloc_roots(fs_info, -1);
4963
+ ret = btrfs_start_delalloc_roots(fs_info, U64_MAX, false);
59794964 if (ret)
59804965 return ret;
59814966 ret = btrfs_sync_fs(inode->i_sb, 1);
....@@ -6028,19 +5013,15 @@
60285013 case BTRFS_IOC_QUOTA_RESCAN:
60295014 return btrfs_ioctl_quota_rescan(file, argp);
60305015 case BTRFS_IOC_QUOTA_RESCAN_STATUS:
6031
- return btrfs_ioctl_quota_rescan_status(file, argp);
5016
+ return btrfs_ioctl_quota_rescan_status(fs_info, argp);
60325017 case BTRFS_IOC_QUOTA_RESCAN_WAIT:
6033
- return btrfs_ioctl_quota_rescan_wait(file, argp);
5018
+ return btrfs_ioctl_quota_rescan_wait(fs_info, argp);
60345019 case BTRFS_IOC_DEV_REPLACE:
60355020 return btrfs_ioctl_dev_replace(fs_info, argp);
6036
- case BTRFS_IOC_GET_FSLABEL:
6037
- return btrfs_ioctl_get_fslabel(file, argp);
6038
- case BTRFS_IOC_SET_FSLABEL:
6039
- return btrfs_ioctl_set_fslabel(file, argp);
60405021 case BTRFS_IOC_GET_SUPPORTED_FEATURES:
60415022 return btrfs_ioctl_get_supported_features(argp);
60425023 case BTRFS_IOC_GET_FEATURES:
6043
- return btrfs_ioctl_get_features(file, argp);
5024
+ return btrfs_ioctl_get_features(fs_info, argp);
60445025 case BTRFS_IOC_SET_FEATURES:
60455026 return btrfs_ioctl_set_features(file, argp);
60465027 case FS_IOC_FSGETXATTR: