.. | .. |
---|
122 | 122 | |
---|
123 | 123 | struct file_ra_state ra; |
---|
124 | 124 | |
---|
125 | | - char *read_buf; |
---|
126 | | - |
---|
127 | 125 | /* |
---|
128 | 126 | * We process inodes by their increasing order, so if before an |
---|
129 | 127 | * incremental send we reverse the parent/child relationship of |
---|
.. | .. |
---|
268 | 266 | int need_later_update; |
---|
269 | 267 | int name_len; |
---|
270 | 268 | char name[]; |
---|
| 269 | +}; |
---|
| 270 | + |
---|
| 271 | +#define ADVANCE 1 |
---|
| 272 | +#define ADVANCE_ONLY_NEXT -1 |
---|
| 273 | + |
---|
| 274 | +enum btrfs_compare_tree_result { |
---|
| 275 | + BTRFS_COMPARE_TREE_NEW, |
---|
| 276 | + BTRFS_COMPARE_TREE_DELETED, |
---|
| 277 | + BTRFS_COMPARE_TREE_CHANGED, |
---|
| 278 | + BTRFS_COMPARE_TREE_SAME, |
---|
271 | 279 | }; |
---|
272 | 280 | |
---|
273 | 281 | __cold |
---|
.. | .. |
---|
570 | 578 | return -EOVERFLOW; |
---|
571 | 579 | |
---|
572 | 580 | hdr = (struct btrfs_tlv_header *) (sctx->send_buf + sctx->send_size); |
---|
573 | | - hdr->tlv_type = cpu_to_le16(attr); |
---|
574 | | - hdr->tlv_len = cpu_to_le16(len); |
---|
| 581 | + put_unaligned_le16(attr, &hdr->tlv_type); |
---|
| 582 | + put_unaligned_le16(len, &hdr->tlv_len); |
---|
575 | 583 | memcpy(hdr + 1, data, len); |
---|
576 | 584 | sctx->send_size += total_len; |
---|
577 | 585 | |
---|
.. | .. |
---|
681 | 689 | |
---|
682 | 690 | sctx->send_size += sizeof(*hdr); |
---|
683 | 691 | hdr = (struct btrfs_cmd_header *)sctx->send_buf; |
---|
684 | | - hdr->cmd = cpu_to_le16(cmd); |
---|
| 692 | + put_unaligned_le16(cmd, &hdr->cmd); |
---|
685 | 693 | |
---|
686 | 694 | return 0; |
---|
687 | 695 | } |
---|
.. | .. |
---|
693 | 701 | u32 crc; |
---|
694 | 702 | |
---|
695 | 703 | hdr = (struct btrfs_cmd_header *)sctx->send_buf; |
---|
696 | | - hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr)); |
---|
697 | | - hdr->crc = 0; |
---|
| 704 | + put_unaligned_le32(sctx->send_size - sizeof(*hdr), &hdr->len); |
---|
| 705 | + put_unaligned_le32(0, &hdr->crc); |
---|
698 | 706 | |
---|
699 | | - crc = crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size); |
---|
700 | | - hdr->crc = cpu_to_le32(crc); |
---|
| 707 | + crc = btrfs_crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size); |
---|
| 708 | + put_unaligned_le32(crc, &hdr->crc); |
---|
701 | 709 | |
---|
702 | 710 | ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size, |
---|
703 | 711 | &sctx->send_off); |
---|
704 | 712 | |
---|
705 | 713 | sctx->total_send_size += sctx->send_size; |
---|
706 | | - sctx->cmd_send_size[le16_to_cpu(hdr->cmd)] += sctx->send_size; |
---|
| 714 | + sctx->cmd_send_size[get_unaligned_le16(&hdr->cmd)] += sctx->send_size; |
---|
707 | 715 | sctx->send_size = 0; |
---|
708 | 716 | |
---|
709 | 717 | return ret; |
---|
.. | .. |
---|
1170 | 1178 | struct backref_ctx { |
---|
1171 | 1179 | struct send_ctx *sctx; |
---|
1172 | 1180 | |
---|
1173 | | - struct btrfs_path *path; |
---|
1174 | 1181 | /* number of total found references */ |
---|
1175 | 1182 | u64 found; |
---|
1176 | 1183 | |
---|
.. | .. |
---|
1196 | 1203 | u64 root = (u64)(uintptr_t)key; |
---|
1197 | 1204 | struct clone_root *cr = (struct clone_root *)elt; |
---|
1198 | 1205 | |
---|
1199 | | - if (root < cr->root->objectid) |
---|
| 1206 | + if (root < cr->root->root_key.objectid) |
---|
1200 | 1207 | return -1; |
---|
1201 | | - if (root > cr->root->objectid) |
---|
| 1208 | + if (root > cr->root->root_key.objectid) |
---|
1202 | 1209 | return 1; |
---|
1203 | 1210 | return 0; |
---|
1204 | 1211 | } |
---|
.. | .. |
---|
1208 | 1215 | struct clone_root *cr1 = (struct clone_root *)e1; |
---|
1209 | 1216 | struct clone_root *cr2 = (struct clone_root *)e2; |
---|
1210 | 1217 | |
---|
1211 | | - if (cr1->root->objectid < cr2->root->objectid) |
---|
| 1218 | + if (cr1->root->root_key.objectid < cr2->root->root_key.objectid) |
---|
1212 | 1219 | return -1; |
---|
1213 | | - if (cr1->root->objectid > cr2->root->objectid) |
---|
| 1220 | + if (cr1->root->root_key.objectid > cr2->root->root_key.objectid) |
---|
1214 | 1221 | return 1; |
---|
1215 | 1222 | return 0; |
---|
1216 | 1223 | } |
---|
.. | .. |
---|
1223 | 1230 | { |
---|
1224 | 1231 | struct backref_ctx *bctx = ctx_; |
---|
1225 | 1232 | struct clone_root *found; |
---|
1226 | | - int ret; |
---|
1227 | | - u64 i_size; |
---|
1228 | 1233 | |
---|
1229 | 1234 | /* First check if the root is in the list of accepted clone sources */ |
---|
1230 | 1235 | found = bsearch((void *)(uintptr_t)root, bctx->sctx->clone_roots, |
---|
.. | .. |
---|
1241 | 1246 | } |
---|
1242 | 1247 | |
---|
1243 | 1248 | /* |
---|
1244 | | - * There are inodes that have extents that lie behind its i_size. Don't |
---|
1245 | | - * accept clones from these extents. |
---|
1246 | | - */ |
---|
1247 | | - ret = __get_inode_info(found->root, bctx->path, ino, &i_size, NULL, NULL, |
---|
1248 | | - NULL, NULL, NULL); |
---|
1249 | | - btrfs_release_path(bctx->path); |
---|
1250 | | - if (ret < 0) |
---|
1251 | | - return ret; |
---|
1252 | | - |
---|
1253 | | - if (offset + bctx->data_offset + bctx->extent_len > i_size) |
---|
1254 | | - return 0; |
---|
1255 | | - |
---|
1256 | | - /* |
---|
1257 | 1249 | * Make sure we don't consider clones from send_root that are |
---|
1258 | 1250 | * behind the current inode/offset. |
---|
1259 | 1251 | */ |
---|
1260 | 1252 | if (found->root == bctx->sctx->send_root) { |
---|
1261 | 1253 | /* |
---|
1262 | | - * TODO for the moment we don't accept clones from the inode |
---|
1263 | | - * that is currently send. We may change this when |
---|
1264 | | - * BTRFS_IOC_CLONE_RANGE supports cloning from and to the same |
---|
1265 | | - * file. |
---|
| 1254 | + * If the source inode was not yet processed we can't issue a |
---|
| 1255 | + * clone operation, as the source extent does not exist yet at |
---|
| 1256 | + * the destination of the stream. |
---|
1266 | 1257 | */ |
---|
1267 | | - if (ino >= bctx->cur_objectid) |
---|
| 1258 | + if (ino > bctx->cur_objectid) |
---|
| 1259 | + return 0; |
---|
| 1260 | + /* |
---|
| 1261 | + * We clone from the inode currently being sent as long as the |
---|
| 1262 | + * source extent is already processed, otherwise we could try |
---|
| 1263 | + * to clone from an extent that does not exist yet at the |
---|
| 1264 | + * destination of the stream. |
---|
| 1265 | + */ |
---|
| 1266 | + if (ino == bctx->cur_objectid && |
---|
| 1267 | + offset + bctx->extent_len > |
---|
| 1268 | + bctx->sctx->cur_inode_next_write_offset) |
---|
1268 | 1269 | return 0; |
---|
1269 | 1270 | } |
---|
1270 | 1271 | |
---|
.. | .. |
---|
1329 | 1330 | ret = -ENOMEM; |
---|
1330 | 1331 | goto out; |
---|
1331 | 1332 | } |
---|
1332 | | - |
---|
1333 | | - backref_ctx->path = tmp_path; |
---|
1334 | 1333 | |
---|
1335 | 1334 | if (data_offset >= ino_size) { |
---|
1336 | 1335 | /* |
---|
.. | .. |
---|
1718 | 1717 | |
---|
1719 | 1718 | di = btrfs_lookup_dir_item(NULL, root, path, |
---|
1720 | 1719 | dir, name, name_len, 0); |
---|
1721 | | - if (!di) { |
---|
1722 | | - ret = -ENOENT; |
---|
1723 | | - goto out; |
---|
1724 | | - } |
---|
1725 | | - if (IS_ERR(di)) { |
---|
1726 | | - ret = PTR_ERR(di); |
---|
| 1720 | + if (IS_ERR_OR_NULL(di)) { |
---|
| 1721 | + ret = di ? PTR_ERR(di) : -ENOENT; |
---|
1727 | 1722 | goto out; |
---|
1728 | 1723 | } |
---|
1729 | 1724 | btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); |
---|
.. | .. |
---|
2267 | 2262 | * inodes "orphan" name instead of the real name and stop. Same with new inodes |
---|
2268 | 2263 | * that were not created yet and overwritten inodes/refs. |
---|
2269 | 2264 | * |
---|
2270 | | - * When do we have have orphan inodes: |
---|
| 2265 | + * When do we have orphan inodes: |
---|
2271 | 2266 | * 1. When an inode is freshly created and thus no valid refs are available yet |
---|
2272 | 2267 | * 2. When a directory lost all it's refs (deleted) but still has dir items |
---|
2273 | 2268 | * inside which were not processed yet (pending for move/delete). If anyone |
---|
.. | .. |
---|
2371 | 2366 | return -ENOMEM; |
---|
2372 | 2367 | } |
---|
2373 | 2368 | |
---|
2374 | | - key.objectid = send_root->objectid; |
---|
| 2369 | + key.objectid = send_root->root_key.objectid; |
---|
2375 | 2370 | key.type = BTRFS_ROOT_BACKREF_KEY; |
---|
2376 | 2371 | key.offset = 0; |
---|
2377 | 2372 | |
---|
.. | .. |
---|
2387 | 2382 | leaf = path->nodes[0]; |
---|
2388 | 2383 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); |
---|
2389 | 2384 | if (key.type != BTRFS_ROOT_BACKREF_KEY || |
---|
2390 | | - key.objectid != send_root->objectid) { |
---|
| 2385 | + key.objectid != send_root->root_key.objectid) { |
---|
2391 | 2386 | ret = -ENOENT; |
---|
2392 | 2387 | goto out; |
---|
2393 | 2388 | } |
---|
.. | .. |
---|
3957 | 3952 | goto out; |
---|
3958 | 3953 | } |
---|
3959 | 3954 | |
---|
| 3955 | + /* |
---|
| 3956 | + * Before doing any rename and link operations, do a first pass on the |
---|
| 3957 | + * new references to orphanize any unprocessed inodes that may have a |
---|
| 3958 | + * reference that conflicts with one of the new references of the current |
---|
| 3959 | + * inode. This needs to happen first because a new reference may conflict |
---|
| 3960 | + * with the old reference of a parent directory, so we must make sure |
---|
| 3961 | + * that the path used for link and rename commands don't use an |
---|
| 3962 | + * orphanized name when an ancestor was not yet orphanized. |
---|
| 3963 | + * |
---|
| 3964 | + * Example: |
---|
| 3965 | + * |
---|
| 3966 | + * Parent snapshot: |
---|
| 3967 | + * |
---|
| 3968 | + * . (ino 256) |
---|
| 3969 | + * |----- testdir/ (ino 259) |
---|
| 3970 | + * | |----- a (ino 257) |
---|
| 3971 | + * | |
---|
| 3972 | + * |----- b (ino 258) |
---|
| 3973 | + * |
---|
| 3974 | + * Send snapshot: |
---|
| 3975 | + * |
---|
| 3976 | + * . (ino 256) |
---|
| 3977 | + * |----- testdir_2/ (ino 259) |
---|
| 3978 | + * | |----- a (ino 260) |
---|
| 3979 | + * | |
---|
| 3980 | + * |----- testdir (ino 257) |
---|
| 3981 | + * |----- b (ino 257) |
---|
| 3982 | + * |----- b2 (ino 258) |
---|
| 3983 | + * |
---|
| 3984 | + * Processing the new reference for inode 257 with name "b" may happen |
---|
| 3985 | + * before processing the new reference with name "testdir". If so, we |
---|
| 3986 | + * must make sure that by the time we send a link command to create the |
---|
| 3987 | + * hard link "b", inode 259 was already orphanized, since the generated |
---|
| 3988 | + * path in "valid_path" already contains the orphanized name for 259. |
---|
| 3989 | + * We are processing inode 257, so only later when processing 259 we do |
---|
| 3990 | + * the rename operation to change its temporary (orphanized) name to |
---|
| 3991 | + * "testdir_2". |
---|
| 3992 | + */ |
---|
3960 | 3993 | list_for_each_entry(cur, &sctx->new_refs, list) { |
---|
3961 | | - /* |
---|
3962 | | - * We may have refs where the parent directory does not exist |
---|
3963 | | - * yet. This happens if the parent directories inum is higher |
---|
3964 | | - * the the current inum. To handle this case, we create the |
---|
3965 | | - * parent directory out of order. But we need to check if this |
---|
3966 | | - * did already happen before due to other refs in the same dir. |
---|
3967 | | - */ |
---|
3968 | 3994 | ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen); |
---|
3969 | 3995 | if (ret < 0) |
---|
3970 | 3996 | goto out; |
---|
3971 | | - if (ret == inode_state_will_create) { |
---|
3972 | | - ret = 0; |
---|
3973 | | - /* |
---|
3974 | | - * First check if any of the current inodes refs did |
---|
3975 | | - * already create the dir. |
---|
3976 | | - */ |
---|
3977 | | - list_for_each_entry(cur2, &sctx->new_refs, list) { |
---|
3978 | | - if (cur == cur2) |
---|
3979 | | - break; |
---|
3980 | | - if (cur2->dir == cur->dir) { |
---|
3981 | | - ret = 1; |
---|
3982 | | - break; |
---|
3983 | | - } |
---|
3984 | | - } |
---|
3985 | | - |
---|
3986 | | - /* |
---|
3987 | | - * If that did not happen, check if a previous inode |
---|
3988 | | - * did already create the dir. |
---|
3989 | | - */ |
---|
3990 | | - if (!ret) |
---|
3991 | | - ret = did_create_dir(sctx, cur->dir); |
---|
3992 | | - if (ret < 0) |
---|
3993 | | - goto out; |
---|
3994 | | - if (!ret) { |
---|
3995 | | - ret = send_create_inode(sctx, cur->dir); |
---|
3996 | | - if (ret < 0) |
---|
3997 | | - goto out; |
---|
3998 | | - } |
---|
3999 | | - } |
---|
| 3997 | + if (ret == inode_state_will_create) |
---|
| 3998 | + continue; |
---|
4000 | 3999 | |
---|
4001 | 4000 | /* |
---|
4002 | | - * Check if this new ref would overwrite the first ref of |
---|
4003 | | - * another unprocessed inode. If yes, orphanize the |
---|
4004 | | - * overwritten inode. If we find an overwritten ref that is |
---|
4005 | | - * not the first ref, simply unlink it. |
---|
| 4001 | + * Check if this new ref would overwrite the first ref of another |
---|
| 4002 | + * unprocessed inode. If yes, orphanize the overwritten inode. |
---|
| 4003 | + * If we find an overwritten ref that is not the first ref, |
---|
| 4004 | + * simply unlink it. |
---|
4006 | 4005 | */ |
---|
4007 | 4006 | ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen, |
---|
4008 | 4007 | cur->name, cur->name_len, |
---|
.. | .. |
---|
4093 | 4092 | goto out; |
---|
4094 | 4093 | } |
---|
4095 | 4094 | ret = send_unlink(sctx, cur->full_path); |
---|
| 4095 | + if (ret < 0) |
---|
| 4096 | + goto out; |
---|
| 4097 | + } |
---|
| 4098 | + } |
---|
| 4099 | + |
---|
| 4100 | + } |
---|
| 4101 | + |
---|
| 4102 | + list_for_each_entry(cur, &sctx->new_refs, list) { |
---|
| 4103 | + /* |
---|
| 4104 | + * We may have refs where the parent directory does not exist |
---|
| 4105 | + * yet. This happens if the parent directories inum is higher |
---|
| 4106 | + * than the current inum. To handle this case, we create the |
---|
| 4107 | + * parent directory out of order. But we need to check if this |
---|
| 4108 | + * did already happen before due to other refs in the same dir. |
---|
| 4109 | + */ |
---|
| 4110 | + ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen); |
---|
| 4111 | + if (ret < 0) |
---|
| 4112 | + goto out; |
---|
| 4113 | + if (ret == inode_state_will_create) { |
---|
| 4114 | + ret = 0; |
---|
| 4115 | + /* |
---|
| 4116 | + * First check if any of the current inodes refs did |
---|
| 4117 | + * already create the dir. |
---|
| 4118 | + */ |
---|
| 4119 | + list_for_each_entry(cur2, &sctx->new_refs, list) { |
---|
| 4120 | + if (cur == cur2) |
---|
| 4121 | + break; |
---|
| 4122 | + if (cur2->dir == cur->dir) { |
---|
| 4123 | + ret = 1; |
---|
| 4124 | + break; |
---|
| 4125 | + } |
---|
| 4126 | + } |
---|
| 4127 | + |
---|
| 4128 | + /* |
---|
| 4129 | + * If that did not happen, check if a previous inode |
---|
| 4130 | + * did already create the dir. |
---|
| 4131 | + */ |
---|
| 4132 | + if (!ret) |
---|
| 4133 | + ret = did_create_dir(sctx, cur->dir); |
---|
| 4134 | + if (ret < 0) |
---|
| 4135 | + goto out; |
---|
| 4136 | + if (!ret) { |
---|
| 4137 | + ret = send_create_inode(sctx, cur->dir); |
---|
4096 | 4138 | if (ret < 0) |
---|
4097 | 4139 | goto out; |
---|
4098 | 4140 | } |
---|
.. | .. |
---|
4893 | 4935 | return ret; |
---|
4894 | 4936 | } |
---|
4895 | 4937 | |
---|
4896 | | -static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) |
---|
| 4938 | +static inline u64 max_send_read_size(const struct send_ctx *sctx) |
---|
| 4939 | +{ |
---|
| 4940 | + return sctx->send_max_size - SZ_16K; |
---|
| 4941 | +} |
---|
| 4942 | + |
---|
| 4943 | +static int put_data_header(struct send_ctx *sctx, u32 len) |
---|
| 4944 | +{ |
---|
| 4945 | + struct btrfs_tlv_header *hdr; |
---|
| 4946 | + |
---|
| 4947 | + if (sctx->send_max_size - sctx->send_size < sizeof(*hdr) + len) |
---|
| 4948 | + return -EOVERFLOW; |
---|
| 4949 | + hdr = (struct btrfs_tlv_header *)(sctx->send_buf + sctx->send_size); |
---|
| 4950 | + put_unaligned_le16(BTRFS_SEND_A_DATA, &hdr->tlv_type); |
---|
| 4951 | + put_unaligned_le16(len, &hdr->tlv_len); |
---|
| 4952 | + sctx->send_size += sizeof(*hdr); |
---|
| 4953 | + return 0; |
---|
| 4954 | +} |
---|
| 4955 | + |
---|
| 4956 | +static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len) |
---|
4897 | 4957 | { |
---|
4898 | 4958 | struct btrfs_root *root = sctx->send_root; |
---|
4899 | 4959 | struct btrfs_fs_info *fs_info = root->fs_info; |
---|
4900 | 4960 | struct inode *inode; |
---|
4901 | 4961 | struct page *page; |
---|
4902 | 4962 | char *addr; |
---|
4903 | | - struct btrfs_key key; |
---|
4904 | 4963 | pgoff_t index = offset >> PAGE_SHIFT; |
---|
4905 | 4964 | pgoff_t last_index; |
---|
4906 | | - unsigned pg_offset = offset & ~PAGE_MASK; |
---|
4907 | | - ssize_t ret = 0; |
---|
| 4965 | + unsigned pg_offset = offset_in_page(offset); |
---|
| 4966 | + int ret; |
---|
4908 | 4967 | |
---|
4909 | | - key.objectid = sctx->cur_ino; |
---|
4910 | | - key.type = BTRFS_INODE_ITEM_KEY; |
---|
4911 | | - key.offset = 0; |
---|
| 4968 | + ret = put_data_header(sctx, len); |
---|
| 4969 | + if (ret) |
---|
| 4970 | + return ret; |
---|
4912 | 4971 | |
---|
4913 | | - inode = btrfs_iget(fs_info->sb, &key, root, NULL); |
---|
| 4972 | + inode = btrfs_iget(fs_info->sb, sctx->cur_ino, root); |
---|
4914 | 4973 | if (IS_ERR(inode)) |
---|
4915 | 4974 | return PTR_ERR(inode); |
---|
4916 | | - |
---|
4917 | | - if (offset + len > i_size_read(inode)) { |
---|
4918 | | - if (offset > i_size_read(inode)) |
---|
4919 | | - len = 0; |
---|
4920 | | - else |
---|
4921 | | - len = offset - i_size_read(inode); |
---|
4922 | | - } |
---|
4923 | | - if (len == 0) |
---|
4924 | | - goto out; |
---|
4925 | 4975 | |
---|
4926 | 4976 | last_index = (offset + len - 1) >> PAGE_SHIFT; |
---|
4927 | 4977 | |
---|
.. | .. |
---|
4967 | 5017 | } |
---|
4968 | 5018 | |
---|
4969 | 5019 | addr = kmap(page); |
---|
4970 | | - memcpy(sctx->read_buf + ret, addr + pg_offset, cur_len); |
---|
| 5020 | + memcpy(sctx->send_buf + sctx->send_size, addr + pg_offset, |
---|
| 5021 | + cur_len); |
---|
4971 | 5022 | kunmap(page); |
---|
4972 | 5023 | unlock_page(page); |
---|
4973 | 5024 | put_page(page); |
---|
4974 | 5025 | index++; |
---|
4975 | 5026 | pg_offset = 0; |
---|
4976 | 5027 | len -= cur_len; |
---|
4977 | | - ret += cur_len; |
---|
| 5028 | + sctx->send_size += cur_len; |
---|
4978 | 5029 | } |
---|
4979 | | -out: |
---|
4980 | 5030 | iput(inode); |
---|
4981 | 5031 | return ret; |
---|
4982 | 5032 | } |
---|
.. | .. |
---|
4990 | 5040 | struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; |
---|
4991 | 5041 | int ret = 0; |
---|
4992 | 5042 | struct fs_path *p; |
---|
4993 | | - ssize_t num_read = 0; |
---|
4994 | 5043 | |
---|
4995 | 5044 | p = fs_path_alloc(); |
---|
4996 | 5045 | if (!p) |
---|
4997 | 5046 | return -ENOMEM; |
---|
4998 | 5047 | |
---|
4999 | 5048 | btrfs_debug(fs_info, "send_write offset=%llu, len=%d", offset, len); |
---|
5000 | | - |
---|
5001 | | - num_read = fill_read_buf(sctx, offset, len); |
---|
5002 | | - if (num_read <= 0) { |
---|
5003 | | - if (num_read < 0) |
---|
5004 | | - ret = num_read; |
---|
5005 | | - goto out; |
---|
5006 | | - } |
---|
5007 | 5049 | |
---|
5008 | 5050 | ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); |
---|
5009 | 5051 | if (ret < 0) |
---|
.. | .. |
---|
5015 | 5057 | |
---|
5016 | 5058 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); |
---|
5017 | 5059 | TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); |
---|
5018 | | - TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, num_read); |
---|
| 5060 | + ret = put_file_data(sctx, offset, len); |
---|
| 5061 | + if (ret < 0) |
---|
| 5062 | + goto out; |
---|
5019 | 5063 | |
---|
5020 | 5064 | ret = send_cmd(sctx); |
---|
5021 | 5065 | |
---|
5022 | 5066 | tlv_put_failure: |
---|
5023 | 5067 | out: |
---|
5024 | 5068 | fs_path_free(p); |
---|
5025 | | - if (ret < 0) |
---|
5026 | | - return ret; |
---|
5027 | | - return num_read; |
---|
| 5069 | + return ret; |
---|
5028 | 5070 | } |
---|
5029 | 5071 | |
---|
5030 | 5072 | /* |
---|
.. | .. |
---|
5040 | 5082 | |
---|
5041 | 5083 | btrfs_debug(sctx->send_root->fs_info, |
---|
5042 | 5084 | "send_clone offset=%llu, len=%d, clone_root=%llu, clone_inode=%llu, clone_offset=%llu", |
---|
5043 | | - offset, len, clone_root->root->objectid, clone_root->ino, |
---|
5044 | | - clone_root->offset); |
---|
| 5085 | + offset, len, clone_root->root->root_key.objectid, |
---|
| 5086 | + clone_root->ino, clone_root->offset); |
---|
5045 | 5087 | |
---|
5046 | 5088 | p = fs_path_alloc(); |
---|
5047 | 5089 | if (!p) |
---|
.. | .. |
---|
5136 | 5178 | static int send_hole(struct send_ctx *sctx, u64 end) |
---|
5137 | 5179 | { |
---|
5138 | 5180 | struct fs_path *p = NULL; |
---|
| 5181 | + u64 read_size = max_send_read_size(sctx); |
---|
5139 | 5182 | u64 offset = sctx->cur_inode_last_extent; |
---|
5140 | | - u64 len; |
---|
5141 | 5183 | int ret = 0; |
---|
5142 | 5184 | |
---|
5143 | 5185 | /* |
---|
.. | .. |
---|
5164 | 5206 | ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); |
---|
5165 | 5207 | if (ret < 0) |
---|
5166 | 5208 | goto tlv_put_failure; |
---|
5167 | | - memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE); |
---|
5168 | 5209 | while (offset < end) { |
---|
5169 | | - len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE); |
---|
| 5210 | + u64 len = min(end - offset, read_size); |
---|
5170 | 5211 | |
---|
5171 | 5212 | ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); |
---|
5172 | 5213 | if (ret < 0) |
---|
5173 | 5214 | break; |
---|
5174 | 5215 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); |
---|
5175 | 5216 | TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); |
---|
5176 | | - TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len); |
---|
| 5217 | + ret = put_data_header(sctx, len); |
---|
| 5218 | + if (ret < 0) |
---|
| 5219 | + break; |
---|
| 5220 | + memset(sctx->send_buf + sctx->send_size, 0, len); |
---|
| 5221 | + sctx->send_size += len; |
---|
5177 | 5222 | ret = send_cmd(sctx); |
---|
5178 | 5223 | if (ret < 0) |
---|
5179 | 5224 | break; |
---|
.. | .. |
---|
5189 | 5234 | const u64 offset, |
---|
5190 | 5235 | const u64 len) |
---|
5191 | 5236 | { |
---|
| 5237 | + u64 read_size = max_send_read_size(sctx); |
---|
5192 | 5238 | u64 sent = 0; |
---|
5193 | 5239 | |
---|
5194 | 5240 | if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) |
---|
5195 | 5241 | return send_update_extent(sctx, offset, len); |
---|
5196 | 5242 | |
---|
5197 | 5243 | while (sent < len) { |
---|
5198 | | - u64 size = len - sent; |
---|
| 5244 | + u64 size = min(len - sent, read_size); |
---|
5199 | 5245 | int ret; |
---|
5200 | 5246 | |
---|
5201 | | - if (size > BTRFS_SEND_READ_SIZE) |
---|
5202 | | - size = BTRFS_SEND_READ_SIZE; |
---|
5203 | 5247 | ret = send_write(sctx, offset + sent, size); |
---|
5204 | 5248 | if (ret < 0) |
---|
5205 | 5249 | return ret; |
---|
5206 | | - if (!ret) |
---|
5207 | | - break; |
---|
5208 | | - sent += ret; |
---|
| 5250 | + sent += size; |
---|
5209 | 5251 | } |
---|
5210 | 5252 | return 0; |
---|
5211 | 5253 | } |
---|
.. | .. |
---|
5278 | 5320 | struct btrfs_path *path; |
---|
5279 | 5321 | struct btrfs_key key; |
---|
5280 | 5322 | int ret; |
---|
| 5323 | + u64 clone_src_i_size = 0; |
---|
5281 | 5324 | |
---|
5282 | 5325 | /* |
---|
5283 | 5326 | * Prevent cloning from a zero offset with a length matching the sector |
---|
.. | .. |
---|
5301 | 5344 | path = alloc_path_for_send(); |
---|
5302 | 5345 | if (!path) |
---|
5303 | 5346 | return -ENOMEM; |
---|
| 5347 | + |
---|
| 5348 | + /* |
---|
| 5349 | + * There are inodes that have extents that lie behind its i_size. Don't |
---|
| 5350 | + * accept clones from these extents. |
---|
| 5351 | + */ |
---|
| 5352 | + ret = __get_inode_info(clone_root->root, path, clone_root->ino, |
---|
| 5353 | + &clone_src_i_size, NULL, NULL, NULL, NULL, NULL); |
---|
| 5354 | + btrfs_release_path(path); |
---|
| 5355 | + if (ret < 0) |
---|
| 5356 | + goto out; |
---|
5304 | 5357 | |
---|
5305 | 5358 | /* |
---|
5306 | 5359 | * We can't send a clone operation for the entire range if we find |
---|
.. | .. |
---|
5344 | 5397 | u8 type; |
---|
5345 | 5398 | u64 ext_len; |
---|
5346 | 5399 | u64 clone_len; |
---|
| 5400 | + u64 clone_data_offset; |
---|
| 5401 | + bool crossed_src_i_size = false; |
---|
5347 | 5402 | |
---|
5348 | 5403 | if (slot >= btrfs_header_nritems(leaf)) { |
---|
5349 | 5404 | ret = btrfs_next_leaf(clone_root->root, path); |
---|
.. | .. |
---|
5397 | 5452 | if (key.offset >= clone_root->offset + len) |
---|
5398 | 5453 | break; |
---|
5399 | 5454 | |
---|
| 5455 | + if (key.offset >= clone_src_i_size) |
---|
| 5456 | + break; |
---|
| 5457 | + |
---|
| 5458 | + if (key.offset + ext_len > clone_src_i_size) { |
---|
| 5459 | + ext_len = clone_src_i_size - key.offset; |
---|
| 5460 | + crossed_src_i_size = true; |
---|
| 5461 | + } |
---|
| 5462 | + |
---|
| 5463 | + clone_data_offset = btrfs_file_extent_offset(leaf, ei); |
---|
| 5464 | + if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte) { |
---|
| 5465 | + clone_root->offset = key.offset; |
---|
| 5466 | + if (clone_data_offset < data_offset && |
---|
| 5467 | + clone_data_offset + ext_len > data_offset) { |
---|
| 5468 | + u64 extent_offset; |
---|
| 5469 | + |
---|
| 5470 | + extent_offset = data_offset - clone_data_offset; |
---|
| 5471 | + ext_len -= extent_offset; |
---|
| 5472 | + clone_data_offset += extent_offset; |
---|
| 5473 | + clone_root->offset += extent_offset; |
---|
| 5474 | + } |
---|
| 5475 | + } |
---|
| 5476 | + |
---|
5400 | 5477 | clone_len = min_t(u64, ext_len, len); |
---|
5401 | 5478 | |
---|
5402 | 5479 | if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte && |
---|
5403 | | - btrfs_file_extent_offset(leaf, ei) == data_offset) |
---|
5404 | | - ret = send_clone(sctx, offset, clone_len, clone_root); |
---|
5405 | | - else |
---|
| 5480 | + clone_data_offset == data_offset) { |
---|
| 5481 | + const u64 src_end = clone_root->offset + clone_len; |
---|
| 5482 | + const u64 sectorsize = SZ_64K; |
---|
| 5483 | + |
---|
| 5484 | + /* |
---|
| 5485 | + * We can't clone the last block, when its size is not |
---|
| 5486 | + * sector size aligned, into the middle of a file. If we |
---|
| 5487 | + * do so, the receiver will get a failure (-EINVAL) when |
---|
| 5488 | + * trying to clone or will silently corrupt the data in |
---|
| 5489 | + * the destination file if it's on a kernel without the |
---|
| 5490 | + * fix introduced by commit ac765f83f1397646 |
---|
| 5491 | + * ("Btrfs: fix data corruption due to cloning of eof |
---|
| 5492 | + * block). |
---|
| 5493 | + * |
---|
| 5494 | + * So issue a clone of the aligned down range plus a |
---|
| 5495 | + * regular write for the eof block, if we hit that case. |
---|
| 5496 | + * |
---|
| 5497 | + * Also, we use the maximum possible sector size, 64K, |
---|
| 5498 | + * because we don't know what's the sector size of the |
---|
| 5499 | + * filesystem that receives the stream, so we have to |
---|
| 5500 | + * assume the largest possible sector size. |
---|
| 5501 | + */ |
---|
| 5502 | + if (src_end == clone_src_i_size && |
---|
| 5503 | + !IS_ALIGNED(src_end, sectorsize) && |
---|
| 5504 | + offset + clone_len < sctx->cur_inode_size) { |
---|
| 5505 | + u64 slen; |
---|
| 5506 | + |
---|
| 5507 | + slen = ALIGN_DOWN(src_end - clone_root->offset, |
---|
| 5508 | + sectorsize); |
---|
| 5509 | + if (slen > 0) { |
---|
| 5510 | + ret = send_clone(sctx, offset, slen, |
---|
| 5511 | + clone_root); |
---|
| 5512 | + if (ret < 0) |
---|
| 5513 | + goto out; |
---|
| 5514 | + } |
---|
| 5515 | + ret = send_extent_data(sctx, offset + slen, |
---|
| 5516 | + clone_len - slen); |
---|
| 5517 | + } else { |
---|
| 5518 | + ret = send_clone(sctx, offset, clone_len, |
---|
| 5519 | + clone_root); |
---|
| 5520 | + } |
---|
| 5521 | + } else if (crossed_src_i_size && clone_len < len) { |
---|
| 5522 | + /* |
---|
| 5523 | + * If we are at i_size of the clone source inode and we |
---|
| 5524 | + * can not clone from it, terminate the loop. This is |
---|
| 5525 | + * to avoid sending two write operations, one with a |
---|
| 5526 | + * length matching clone_len and the final one after |
---|
| 5527 | + * this loop with a length of len - clone_len. |
---|
| 5528 | + * |
---|
| 5529 | + * When using encoded writes (BTRFS_SEND_FLAG_COMPRESSED |
---|
| 5530 | + * was passed to the send ioctl), this helps avoid |
---|
| 5531 | + * sending an encoded write for an offset that is not |
---|
| 5532 | + * sector size aligned, in case the i_size of the source |
---|
| 5533 | + * inode is not sector size aligned. That will make the |
---|
| 5534 | + * receiver fallback to decompression of the data and |
---|
| 5535 | + * writing it using regular buffered IO, therefore while |
---|
| 5536 | + * not incorrect, it's not optimal due decompression and |
---|
| 5537 | + * possible re-compression at the receiver. |
---|
| 5538 | + */ |
---|
| 5539 | + break; |
---|
| 5540 | + } else { |
---|
5406 | 5541 | ret = send_extent_data(sctx, offset, clone_len); |
---|
| 5542 | + } |
---|
5407 | 5543 | |
---|
5408 | 5544 | if (ret < 0) |
---|
5409 | 5545 | goto out; |
---|
.. | .. |
---|
5413 | 5549 | break; |
---|
5414 | 5550 | offset += clone_len; |
---|
5415 | 5551 | clone_root->offset += clone_len; |
---|
| 5552 | + |
---|
| 5553 | + /* |
---|
| 5554 | + * If we are cloning from the file we are currently processing, |
---|
| 5555 | + * and using the send root as the clone root, we must stop once |
---|
| 5556 | + * the current clone offset reaches the current eof of the file |
---|
| 5557 | + * at the receiver, otherwise we would issue an invalid clone |
---|
| 5558 | + * operation (source range going beyond eof) and cause the |
---|
| 5559 | + * receiver to fail. So if we reach the current eof, bail out |
---|
| 5560 | + * and fallback to a regular write. |
---|
| 5561 | + */ |
---|
| 5562 | + if (clone_root->root == sctx->send_root && |
---|
| 5563 | + clone_root->ino == sctx->cur_ino && |
---|
| 5564 | + clone_root->offset >= sctx->cur_inode_next_write_offset) |
---|
| 5565 | + break; |
---|
| 5566 | + |
---|
5416 | 5567 | data_offset += clone_len; |
---|
5417 | 5568 | next: |
---|
5418 | 5569 | path->slots[0]++; |
---|
.. | .. |
---|
5433 | 5584 | struct clone_root *clone_root) |
---|
5434 | 5585 | { |
---|
5435 | 5586 | int ret = 0; |
---|
5436 | | - struct btrfs_file_extent_item *ei; |
---|
5437 | 5587 | u64 offset = key->offset; |
---|
5438 | | - u64 len; |
---|
5439 | | - u8 type; |
---|
| 5588 | + u64 end; |
---|
5440 | 5589 | u64 bs = sctx->send_root->fs_info->sb->s_blocksize; |
---|
5441 | 5590 | |
---|
5442 | | - ei = btrfs_item_ptr(path->nodes[0], path->slots[0], |
---|
5443 | | - struct btrfs_file_extent_item); |
---|
5444 | | - type = btrfs_file_extent_type(path->nodes[0], ei); |
---|
5445 | | - if (type == BTRFS_FILE_EXTENT_INLINE) { |
---|
5446 | | - len = btrfs_file_extent_ram_bytes(path->nodes[0], ei); |
---|
5447 | | - /* |
---|
5448 | | - * it is possible the inline item won't cover the whole page, |
---|
5449 | | - * but there may be items after this page. Make |
---|
5450 | | - * sure to send the whole thing |
---|
5451 | | - */ |
---|
5452 | | - len = PAGE_ALIGN(len); |
---|
5453 | | - } else { |
---|
5454 | | - len = btrfs_file_extent_num_bytes(path->nodes[0], ei); |
---|
5455 | | - } |
---|
| 5591 | + end = min_t(u64, btrfs_file_extent_end(path), sctx->cur_inode_size); |
---|
| 5592 | + if (offset >= end) |
---|
| 5593 | + return 0; |
---|
5456 | 5594 | |
---|
5457 | | - if (offset >= sctx->cur_inode_size) { |
---|
5458 | | - ret = 0; |
---|
5459 | | - goto out; |
---|
5460 | | - } |
---|
5461 | | - if (offset + len > sctx->cur_inode_size) |
---|
5462 | | - len = sctx->cur_inode_size - offset; |
---|
5463 | | - if (len == 0) { |
---|
5464 | | - ret = 0; |
---|
5465 | | - goto out; |
---|
5466 | | - } |
---|
5467 | | - |
---|
5468 | | - if (clone_root && IS_ALIGNED(offset + len, bs)) { |
---|
| 5595 | + if (clone_root && IS_ALIGNED(end, bs)) { |
---|
| 5596 | + struct btrfs_file_extent_item *ei; |
---|
5469 | 5597 | u64 disk_byte; |
---|
5470 | 5598 | u64 data_offset; |
---|
5471 | 5599 | |
---|
| 5600 | + ei = btrfs_item_ptr(path->nodes[0], path->slots[0], |
---|
| 5601 | + struct btrfs_file_extent_item); |
---|
5472 | 5602 | disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei); |
---|
5473 | 5603 | data_offset = btrfs_file_extent_offset(path->nodes[0], ei); |
---|
5474 | 5604 | ret = clone_range(sctx, clone_root, disk_byte, data_offset, |
---|
5475 | | - offset, len); |
---|
| 5605 | + offset, end - offset); |
---|
5476 | 5606 | } else { |
---|
5477 | | - ret = send_extent_data(sctx, offset, len); |
---|
| 5607 | + ret = send_extent_data(sctx, offset, end - offset); |
---|
5478 | 5608 | } |
---|
5479 | | - sctx->cur_inode_next_write_offset = offset + len; |
---|
5480 | | -out: |
---|
| 5609 | + sctx->cur_inode_next_write_offset = end; |
---|
5481 | 5610 | return ret; |
---|
5482 | 5611 | } |
---|
5483 | 5612 | |
---|
.. | .. |
---|
5675 | 5804 | { |
---|
5676 | 5805 | struct btrfs_path *path; |
---|
5677 | 5806 | struct btrfs_root *root = sctx->send_root; |
---|
5678 | | - struct btrfs_file_extent_item *fi; |
---|
5679 | 5807 | struct btrfs_key key; |
---|
5680 | | - u64 extent_end; |
---|
5681 | | - u8 type; |
---|
5682 | 5808 | int ret; |
---|
5683 | 5809 | |
---|
5684 | 5810 | path = alloc_path_for_send(); |
---|
.. | .. |
---|
5698 | 5824 | if (key.objectid != sctx->cur_ino || key.type != BTRFS_EXTENT_DATA_KEY) |
---|
5699 | 5825 | goto out; |
---|
5700 | 5826 | |
---|
5701 | | - fi = btrfs_item_ptr(path->nodes[0], path->slots[0], |
---|
5702 | | - struct btrfs_file_extent_item); |
---|
5703 | | - type = btrfs_file_extent_type(path->nodes[0], fi); |
---|
5704 | | - if (type == BTRFS_FILE_EXTENT_INLINE) { |
---|
5705 | | - u64 size = btrfs_file_extent_ram_bytes(path->nodes[0], fi); |
---|
5706 | | - extent_end = ALIGN(key.offset + size, |
---|
5707 | | - sctx->send_root->fs_info->sectorsize); |
---|
5708 | | - } else { |
---|
5709 | | - extent_end = key.offset + |
---|
5710 | | - btrfs_file_extent_num_bytes(path->nodes[0], fi); |
---|
5711 | | - } |
---|
5712 | | - sctx->cur_inode_last_extent = extent_end; |
---|
| 5827 | + sctx->cur_inode_last_extent = btrfs_file_extent_end(path); |
---|
5713 | 5828 | out: |
---|
5714 | 5829 | btrfs_free_path(path); |
---|
5715 | 5830 | return ret; |
---|
.. | .. |
---|
5763 | 5878 | break; |
---|
5764 | 5879 | |
---|
5765 | 5880 | fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); |
---|
5766 | | - if (btrfs_file_extent_type(leaf, fi) == |
---|
5767 | | - BTRFS_FILE_EXTENT_INLINE) { |
---|
5768 | | - u64 size = btrfs_file_extent_ram_bytes(leaf, fi); |
---|
5769 | | - |
---|
5770 | | - extent_end = ALIGN(key.offset + size, |
---|
5771 | | - root->fs_info->sectorsize); |
---|
5772 | | - } else { |
---|
5773 | | - extent_end = key.offset + |
---|
5774 | | - btrfs_file_extent_num_bytes(leaf, fi); |
---|
5775 | | - } |
---|
| 5881 | + extent_end = btrfs_file_extent_end(path); |
---|
5776 | 5882 | if (extent_end <= start) |
---|
5777 | 5883 | goto next; |
---|
5778 | 5884 | if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) { |
---|
.. | .. |
---|
5793 | 5899 | static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path, |
---|
5794 | 5900 | struct btrfs_key *key) |
---|
5795 | 5901 | { |
---|
5796 | | - struct btrfs_file_extent_item *fi; |
---|
5797 | | - u64 extent_end; |
---|
5798 | | - u8 type; |
---|
5799 | 5902 | int ret = 0; |
---|
5800 | 5903 | |
---|
5801 | 5904 | if (sctx->cur_ino != key->objectid || !need_send_hole(sctx)) |
---|
.. | .. |
---|
5805 | 5908 | ret = get_last_extent(sctx, key->offset - 1); |
---|
5806 | 5909 | if (ret) |
---|
5807 | 5910 | return ret; |
---|
5808 | | - } |
---|
5809 | | - |
---|
5810 | | - fi = btrfs_item_ptr(path->nodes[0], path->slots[0], |
---|
5811 | | - struct btrfs_file_extent_item); |
---|
5812 | | - type = btrfs_file_extent_type(path->nodes[0], fi); |
---|
5813 | | - if (type == BTRFS_FILE_EXTENT_INLINE) { |
---|
5814 | | - u64 size = btrfs_file_extent_ram_bytes(path->nodes[0], fi); |
---|
5815 | | - extent_end = ALIGN(key->offset + size, |
---|
5816 | | - sctx->send_root->fs_info->sectorsize); |
---|
5817 | | - } else { |
---|
5818 | | - extent_end = key->offset + |
---|
5819 | | - btrfs_file_extent_num_bytes(path->nodes[0], fi); |
---|
5820 | 5911 | } |
---|
5821 | 5912 | |
---|
5822 | 5913 | if (path->slots[0] == 0 && |
---|
.. | .. |
---|
5844 | 5935 | else |
---|
5845 | 5936 | ret = 0; |
---|
5846 | 5937 | } |
---|
5847 | | - sctx->cur_inode_last_extent = extent_end; |
---|
| 5938 | + sctx->cur_inode_last_extent = btrfs_file_extent_end(path); |
---|
5848 | 5939 | return ret; |
---|
5849 | 5940 | } |
---|
5850 | 5941 | |
---|
.. | .. |
---|
6654 | 6745 | return ret; |
---|
6655 | 6746 | } |
---|
6656 | 6747 | |
---|
| 6748 | +static int tree_move_down(struct btrfs_path *path, int *level) |
---|
| 6749 | +{ |
---|
| 6750 | + struct extent_buffer *eb; |
---|
| 6751 | + |
---|
| 6752 | + BUG_ON(*level == 0); |
---|
| 6753 | + eb = btrfs_read_node_slot(path->nodes[*level], path->slots[*level]); |
---|
| 6754 | + if (IS_ERR(eb)) |
---|
| 6755 | + return PTR_ERR(eb); |
---|
| 6756 | + |
---|
| 6757 | + path->nodes[*level - 1] = eb; |
---|
| 6758 | + path->slots[*level - 1] = 0; |
---|
| 6759 | + (*level)--; |
---|
| 6760 | + return 0; |
---|
| 6761 | +} |
---|
| 6762 | + |
---|
| 6763 | +static int tree_move_next_or_upnext(struct btrfs_path *path, |
---|
| 6764 | + int *level, int root_level) |
---|
| 6765 | +{ |
---|
| 6766 | + int ret = 0; |
---|
| 6767 | + int nritems; |
---|
| 6768 | + nritems = btrfs_header_nritems(path->nodes[*level]); |
---|
| 6769 | + |
---|
| 6770 | + path->slots[*level]++; |
---|
| 6771 | + |
---|
| 6772 | + while (path->slots[*level] >= nritems) { |
---|
| 6773 | + if (*level == root_level) |
---|
| 6774 | + return -1; |
---|
| 6775 | + |
---|
| 6776 | + /* move upnext */ |
---|
| 6777 | + path->slots[*level] = 0; |
---|
| 6778 | + free_extent_buffer(path->nodes[*level]); |
---|
| 6779 | + path->nodes[*level] = NULL; |
---|
| 6780 | + (*level)++; |
---|
| 6781 | + path->slots[*level]++; |
---|
| 6782 | + |
---|
| 6783 | + nritems = btrfs_header_nritems(path->nodes[*level]); |
---|
| 6784 | + ret = 1; |
---|
| 6785 | + } |
---|
| 6786 | + return ret; |
---|
| 6787 | +} |
---|
| 6788 | + |
---|
| 6789 | +/* |
---|
| 6790 | + * Returns 1 if it had to move up and next. 0 is returned if it moved only next |
---|
| 6791 | + * or down. |
---|
| 6792 | + */ |
---|
| 6793 | +static int tree_advance(struct btrfs_path *path, |
---|
| 6794 | + int *level, int root_level, |
---|
| 6795 | + int allow_down, |
---|
| 6796 | + struct btrfs_key *key) |
---|
| 6797 | +{ |
---|
| 6798 | + int ret; |
---|
| 6799 | + |
---|
| 6800 | + if (*level == 0 || !allow_down) { |
---|
| 6801 | + ret = tree_move_next_or_upnext(path, level, root_level); |
---|
| 6802 | + } else { |
---|
| 6803 | + ret = tree_move_down(path, level); |
---|
| 6804 | + } |
---|
| 6805 | + if (ret >= 0) { |
---|
| 6806 | + if (*level == 0) |
---|
| 6807 | + btrfs_item_key_to_cpu(path->nodes[*level], key, |
---|
| 6808 | + path->slots[*level]); |
---|
| 6809 | + else |
---|
| 6810 | + btrfs_node_key_to_cpu(path->nodes[*level], key, |
---|
| 6811 | + path->slots[*level]); |
---|
| 6812 | + } |
---|
| 6813 | + return ret; |
---|
| 6814 | +} |
---|
| 6815 | + |
---|
| 6816 | +static int tree_compare_item(struct btrfs_path *left_path, |
---|
| 6817 | + struct btrfs_path *right_path, |
---|
| 6818 | + char *tmp_buf) |
---|
| 6819 | +{ |
---|
| 6820 | + int cmp; |
---|
| 6821 | + int len1, len2; |
---|
| 6822 | + unsigned long off1, off2; |
---|
| 6823 | + |
---|
| 6824 | + len1 = btrfs_item_size_nr(left_path->nodes[0], left_path->slots[0]); |
---|
| 6825 | + len2 = btrfs_item_size_nr(right_path->nodes[0], right_path->slots[0]); |
---|
| 6826 | + if (len1 != len2) |
---|
| 6827 | + return 1; |
---|
| 6828 | + |
---|
| 6829 | + off1 = btrfs_item_ptr_offset(left_path->nodes[0], left_path->slots[0]); |
---|
| 6830 | + off2 = btrfs_item_ptr_offset(right_path->nodes[0], |
---|
| 6831 | + right_path->slots[0]); |
---|
| 6832 | + |
---|
| 6833 | + read_extent_buffer(left_path->nodes[0], tmp_buf, off1, len1); |
---|
| 6834 | + |
---|
| 6835 | + cmp = memcmp_extent_buffer(right_path->nodes[0], tmp_buf, off2, len1); |
---|
| 6836 | + if (cmp) |
---|
| 6837 | + return 1; |
---|
| 6838 | + return 0; |
---|
| 6839 | +} |
---|
| 6840 | + |
---|
| 6841 | +/* |
---|
| 6842 | + * This function compares two trees and calls the provided callback for |
---|
| 6843 | + * every changed/new/deleted item it finds. |
---|
| 6844 | + * If shared tree blocks are encountered, whole subtrees are skipped, making |
---|
| 6845 | + * the compare pretty fast on snapshotted subvolumes. |
---|
| 6846 | + * |
---|
| 6847 | + * This currently works on commit roots only. As commit roots are read only, |
---|
| 6848 | + * we don't do any locking. The commit roots are protected with transactions. |
---|
| 6849 | + * Transactions are ended and rejoined when a commit is tried in between. |
---|
| 6850 | + * |
---|
| 6851 | + * This function checks for modifications done to the trees while comparing. |
---|
| 6852 | + * If it detects a change, it aborts immediately. |
---|
| 6853 | + */ |
---|
| 6854 | +static int btrfs_compare_trees(struct btrfs_root *left_root, |
---|
| 6855 | + struct btrfs_root *right_root, void *ctx) |
---|
| 6856 | +{ |
---|
| 6857 | + struct btrfs_fs_info *fs_info = left_root->fs_info; |
---|
| 6858 | + int ret; |
---|
| 6859 | + int cmp; |
---|
| 6860 | + struct btrfs_path *left_path = NULL; |
---|
| 6861 | + struct btrfs_path *right_path = NULL; |
---|
| 6862 | + struct btrfs_key left_key; |
---|
| 6863 | + struct btrfs_key right_key; |
---|
| 6864 | + char *tmp_buf = NULL; |
---|
| 6865 | + int left_root_level; |
---|
| 6866 | + int right_root_level; |
---|
| 6867 | + int left_level; |
---|
| 6868 | + int right_level; |
---|
| 6869 | + int left_end_reached; |
---|
| 6870 | + int right_end_reached; |
---|
| 6871 | + int advance_left; |
---|
| 6872 | + int advance_right; |
---|
| 6873 | + u64 left_blockptr; |
---|
| 6874 | + u64 right_blockptr; |
---|
| 6875 | + u64 left_gen; |
---|
| 6876 | + u64 right_gen; |
---|
| 6877 | + |
---|
| 6878 | + left_path = btrfs_alloc_path(); |
---|
| 6879 | + if (!left_path) { |
---|
| 6880 | + ret = -ENOMEM; |
---|
| 6881 | + goto out; |
---|
| 6882 | + } |
---|
| 6883 | + right_path = btrfs_alloc_path(); |
---|
| 6884 | + if (!right_path) { |
---|
| 6885 | + ret = -ENOMEM; |
---|
| 6886 | + goto out; |
---|
| 6887 | + } |
---|
| 6888 | + |
---|
| 6889 | + tmp_buf = kvmalloc(fs_info->nodesize, GFP_KERNEL); |
---|
| 6890 | + if (!tmp_buf) { |
---|
| 6891 | + ret = -ENOMEM; |
---|
| 6892 | + goto out; |
---|
| 6893 | + } |
---|
| 6894 | + |
---|
| 6895 | + left_path->search_commit_root = 1; |
---|
| 6896 | + left_path->skip_locking = 1; |
---|
| 6897 | + right_path->search_commit_root = 1; |
---|
| 6898 | + right_path->skip_locking = 1; |
---|
| 6899 | + |
---|
| 6900 | + /* |
---|
| 6901 | + * Strategy: Go to the first items of both trees. Then do |
---|
| 6902 | + * |
---|
| 6903 | + * If both trees are at level 0 |
---|
| 6904 | + * Compare keys of current items |
---|
| 6905 | + * If left < right treat left item as new, advance left tree |
---|
| 6906 | + * and repeat |
---|
| 6907 | + * If left > right treat right item as deleted, advance right tree |
---|
| 6908 | + * and repeat |
---|
| 6909 | + * If left == right do deep compare of items, treat as changed if |
---|
| 6910 | + * needed, advance both trees and repeat |
---|
| 6911 | + * If both trees are at the same level but not at level 0 |
---|
| 6912 | + * Compare keys of current nodes/leafs |
---|
| 6913 | + * If left < right advance left tree and repeat |
---|
| 6914 | + * If left > right advance right tree and repeat |
---|
| 6915 | + * If left == right compare blockptrs of the next nodes/leafs |
---|
| 6916 | + * If they match advance both trees but stay at the same level |
---|
| 6917 | + * and repeat |
---|
| 6918 | + * If they don't match advance both trees while allowing to go |
---|
| 6919 | + * deeper and repeat |
---|
| 6920 | + * If tree levels are different |
---|
| 6921 | + * Advance the tree that needs it and repeat |
---|
| 6922 | + * |
---|
| 6923 | + * Advancing a tree means: |
---|
| 6924 | + * If we are at level 0, try to go to the next slot. If that's not |
---|
| 6925 | + * possible, go one level up and repeat. Stop when we found a level |
---|
| 6926 | + * where we could go to the next slot. We may at this point be on a |
---|
| 6927 | + * node or a leaf. |
---|
| 6928 | + * |
---|
| 6929 | + * If we are not at level 0 and not on shared tree blocks, go one |
---|
| 6930 | + * level deeper. |
---|
| 6931 | + * |
---|
| 6932 | + * If we are not at level 0 and on shared tree blocks, go one slot to |
---|
| 6933 | + * the right if possible or go up and right. |
---|
| 6934 | + */ |
---|
| 6935 | + |
---|
| 6936 | + down_read(&fs_info->commit_root_sem); |
---|
| 6937 | + left_level = btrfs_header_level(left_root->commit_root); |
---|
| 6938 | + left_root_level = left_level; |
---|
| 6939 | + left_path->nodes[left_level] = |
---|
| 6940 | + btrfs_clone_extent_buffer(left_root->commit_root); |
---|
| 6941 | + if (!left_path->nodes[left_level]) { |
---|
| 6942 | + up_read(&fs_info->commit_root_sem); |
---|
| 6943 | + ret = -ENOMEM; |
---|
| 6944 | + goto out; |
---|
| 6945 | + } |
---|
| 6946 | + |
---|
| 6947 | + right_level = btrfs_header_level(right_root->commit_root); |
---|
| 6948 | + right_root_level = right_level; |
---|
| 6949 | + right_path->nodes[right_level] = |
---|
| 6950 | + btrfs_clone_extent_buffer(right_root->commit_root); |
---|
| 6951 | + if (!right_path->nodes[right_level]) { |
---|
| 6952 | + up_read(&fs_info->commit_root_sem); |
---|
| 6953 | + ret = -ENOMEM; |
---|
| 6954 | + goto out; |
---|
| 6955 | + } |
---|
| 6956 | + up_read(&fs_info->commit_root_sem); |
---|
| 6957 | + |
---|
| 6958 | + if (left_level == 0) |
---|
| 6959 | + btrfs_item_key_to_cpu(left_path->nodes[left_level], |
---|
| 6960 | + &left_key, left_path->slots[left_level]); |
---|
| 6961 | + else |
---|
| 6962 | + btrfs_node_key_to_cpu(left_path->nodes[left_level], |
---|
| 6963 | + &left_key, left_path->slots[left_level]); |
---|
| 6964 | + if (right_level == 0) |
---|
| 6965 | + btrfs_item_key_to_cpu(right_path->nodes[right_level], |
---|
| 6966 | + &right_key, right_path->slots[right_level]); |
---|
| 6967 | + else |
---|
| 6968 | + btrfs_node_key_to_cpu(right_path->nodes[right_level], |
---|
| 6969 | + &right_key, right_path->slots[right_level]); |
---|
| 6970 | + |
---|
| 6971 | + left_end_reached = right_end_reached = 0; |
---|
| 6972 | + advance_left = advance_right = 0; |
---|
| 6973 | + |
---|
| 6974 | + while (1) { |
---|
| 6975 | + cond_resched(); |
---|
| 6976 | + if (advance_left && !left_end_reached) { |
---|
| 6977 | + ret = tree_advance(left_path, &left_level, |
---|
| 6978 | + left_root_level, |
---|
| 6979 | + advance_left != ADVANCE_ONLY_NEXT, |
---|
| 6980 | + &left_key); |
---|
| 6981 | + if (ret == -1) |
---|
| 6982 | + left_end_reached = ADVANCE; |
---|
| 6983 | + else if (ret < 0) |
---|
| 6984 | + goto out; |
---|
| 6985 | + advance_left = 0; |
---|
| 6986 | + } |
---|
| 6987 | + if (advance_right && !right_end_reached) { |
---|
| 6988 | + ret = tree_advance(right_path, &right_level, |
---|
| 6989 | + right_root_level, |
---|
| 6990 | + advance_right != ADVANCE_ONLY_NEXT, |
---|
| 6991 | + &right_key); |
---|
| 6992 | + if (ret == -1) |
---|
| 6993 | + right_end_reached = ADVANCE; |
---|
| 6994 | + else if (ret < 0) |
---|
| 6995 | + goto out; |
---|
| 6996 | + advance_right = 0; |
---|
| 6997 | + } |
---|
| 6998 | + |
---|
| 6999 | + if (left_end_reached && right_end_reached) { |
---|
| 7000 | + ret = 0; |
---|
| 7001 | + goto out; |
---|
| 7002 | + } else if (left_end_reached) { |
---|
| 7003 | + if (right_level == 0) { |
---|
| 7004 | + ret = changed_cb(left_path, right_path, |
---|
| 7005 | + &right_key, |
---|
| 7006 | + BTRFS_COMPARE_TREE_DELETED, |
---|
| 7007 | + ctx); |
---|
| 7008 | + if (ret < 0) |
---|
| 7009 | + goto out; |
---|
| 7010 | + } |
---|
| 7011 | + advance_right = ADVANCE; |
---|
| 7012 | + continue; |
---|
| 7013 | + } else if (right_end_reached) { |
---|
| 7014 | + if (left_level == 0) { |
---|
| 7015 | + ret = changed_cb(left_path, right_path, |
---|
| 7016 | + &left_key, |
---|
| 7017 | + BTRFS_COMPARE_TREE_NEW, |
---|
| 7018 | + ctx); |
---|
| 7019 | + if (ret < 0) |
---|
| 7020 | + goto out; |
---|
| 7021 | + } |
---|
| 7022 | + advance_left = ADVANCE; |
---|
| 7023 | + continue; |
---|
| 7024 | + } |
---|
| 7025 | + |
---|
| 7026 | + if (left_level == 0 && right_level == 0) { |
---|
| 7027 | + cmp = btrfs_comp_cpu_keys(&left_key, &right_key); |
---|
| 7028 | + if (cmp < 0) { |
---|
| 7029 | + ret = changed_cb(left_path, right_path, |
---|
| 7030 | + &left_key, |
---|
| 7031 | + BTRFS_COMPARE_TREE_NEW, |
---|
| 7032 | + ctx); |
---|
| 7033 | + if (ret < 0) |
---|
| 7034 | + goto out; |
---|
| 7035 | + advance_left = ADVANCE; |
---|
| 7036 | + } else if (cmp > 0) { |
---|
| 7037 | + ret = changed_cb(left_path, right_path, |
---|
| 7038 | + &right_key, |
---|
| 7039 | + BTRFS_COMPARE_TREE_DELETED, |
---|
| 7040 | + ctx); |
---|
| 7041 | + if (ret < 0) |
---|
| 7042 | + goto out; |
---|
| 7043 | + advance_right = ADVANCE; |
---|
| 7044 | + } else { |
---|
| 7045 | + enum btrfs_compare_tree_result result; |
---|
| 7046 | + |
---|
| 7047 | + WARN_ON(!extent_buffer_uptodate(left_path->nodes[0])); |
---|
| 7048 | + ret = tree_compare_item(left_path, right_path, |
---|
| 7049 | + tmp_buf); |
---|
| 7050 | + if (ret) |
---|
| 7051 | + result = BTRFS_COMPARE_TREE_CHANGED; |
---|
| 7052 | + else |
---|
| 7053 | + result = BTRFS_COMPARE_TREE_SAME; |
---|
| 7054 | + ret = changed_cb(left_path, right_path, |
---|
| 7055 | + &left_key, result, ctx); |
---|
| 7056 | + if (ret < 0) |
---|
| 7057 | + goto out; |
---|
| 7058 | + advance_left = ADVANCE; |
---|
| 7059 | + advance_right = ADVANCE; |
---|
| 7060 | + } |
---|
| 7061 | + } else if (left_level == right_level) { |
---|
| 7062 | + cmp = btrfs_comp_cpu_keys(&left_key, &right_key); |
---|
| 7063 | + if (cmp < 0) { |
---|
| 7064 | + advance_left = ADVANCE; |
---|
| 7065 | + } else if (cmp > 0) { |
---|
| 7066 | + advance_right = ADVANCE; |
---|
| 7067 | + } else { |
---|
| 7068 | + left_blockptr = btrfs_node_blockptr( |
---|
| 7069 | + left_path->nodes[left_level], |
---|
| 7070 | + left_path->slots[left_level]); |
---|
| 7071 | + right_blockptr = btrfs_node_blockptr( |
---|
| 7072 | + right_path->nodes[right_level], |
---|
| 7073 | + right_path->slots[right_level]); |
---|
| 7074 | + left_gen = btrfs_node_ptr_generation( |
---|
| 7075 | + left_path->nodes[left_level], |
---|
| 7076 | + left_path->slots[left_level]); |
---|
| 7077 | + right_gen = btrfs_node_ptr_generation( |
---|
| 7078 | + right_path->nodes[right_level], |
---|
| 7079 | + right_path->slots[right_level]); |
---|
| 7080 | + if (left_blockptr == right_blockptr && |
---|
| 7081 | + left_gen == right_gen) { |
---|
| 7082 | + /* |
---|
| 7083 | + * As we're on a shared block, don't |
---|
| 7084 | + * allow to go deeper. |
---|
| 7085 | + */ |
---|
| 7086 | + advance_left = ADVANCE_ONLY_NEXT; |
---|
| 7087 | + advance_right = ADVANCE_ONLY_NEXT; |
---|
| 7088 | + } else { |
---|
| 7089 | + advance_left = ADVANCE; |
---|
| 7090 | + advance_right = ADVANCE; |
---|
| 7091 | + } |
---|
| 7092 | + } |
---|
| 7093 | + } else if (left_level < right_level) { |
---|
| 7094 | + advance_right = ADVANCE; |
---|
| 7095 | + } else { |
---|
| 7096 | + advance_left = ADVANCE; |
---|
| 7097 | + } |
---|
| 7098 | + } |
---|
| 7099 | + |
---|
| 7100 | +out: |
---|
| 7101 | + btrfs_free_path(left_path); |
---|
| 7102 | + btrfs_free_path(right_path); |
---|
| 7103 | + kvfree(tmp_buf); |
---|
| 7104 | + return ret; |
---|
| 7105 | +} |
---|
| 7106 | + |
---|
6657 | 7107 | static int send_subvol(struct send_ctx *sctx) |
---|
6658 | 7108 | { |
---|
6659 | 7109 | int ret; |
---|
.. | .. |
---|
6669 | 7119 | goto out; |
---|
6670 | 7120 | |
---|
6671 | 7121 | if (sctx->parent_root) { |
---|
6672 | | - ret = btrfs_compare_trees(sctx->send_root, sctx->parent_root, |
---|
6673 | | - changed_cb, sctx); |
---|
| 7122 | + ret = btrfs_compare_trees(sctx->send_root, sctx->parent_root, sctx); |
---|
6674 | 7123 | if (ret < 0) |
---|
6675 | 7124 | goto out; |
---|
6676 | 7125 | ret = finish_inode_if_needed(sctx, 1); |
---|
.. | .. |
---|
6779 | 7228 | spin_unlock(&root->root_item_lock); |
---|
6780 | 7229 | } |
---|
6781 | 7230 | |
---|
| 7231 | +static void dedupe_in_progress_warn(const struct btrfs_root *root) |
---|
| 7232 | +{ |
---|
| 7233 | + btrfs_warn_rl(root->fs_info, |
---|
| 7234 | +"cannot use root %llu for send while deduplications on it are in progress (%d in progress)", |
---|
| 7235 | + root->root_key.objectid, root->dedupe_in_progress); |
---|
| 7236 | +} |
---|
| 7237 | + |
---|
6782 | 7238 | long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg) |
---|
6783 | 7239 | { |
---|
6784 | 7240 | int ret = 0; |
---|
6785 | 7241 | struct btrfs_root *send_root = BTRFS_I(file_inode(mnt_file))->root; |
---|
6786 | 7242 | struct btrfs_fs_info *fs_info = send_root->fs_info; |
---|
6787 | 7243 | struct btrfs_root *clone_root; |
---|
6788 | | - struct btrfs_key key; |
---|
6789 | 7244 | struct send_ctx *sctx = NULL; |
---|
6790 | 7245 | u32 i; |
---|
6791 | 7246 | u64 *clone_sources_tmp = NULL; |
---|
6792 | 7247 | int clone_sources_to_rollback = 0; |
---|
6793 | | - unsigned alloc_size; |
---|
| 7248 | + size_t alloc_size; |
---|
6794 | 7249 | int sort_clone_roots = 0; |
---|
6795 | | - int index; |
---|
6796 | 7250 | |
---|
6797 | 7251 | if (!capable(CAP_SYS_ADMIN)) |
---|
6798 | 7252 | return -EPERM; |
---|
.. | .. |
---|
6802 | 7256 | * making it RW. This also protects against deletion. |
---|
6803 | 7257 | */ |
---|
6804 | 7258 | spin_lock(&send_root->root_item_lock); |
---|
| 7259 | + if (btrfs_root_readonly(send_root) && send_root->dedupe_in_progress) { |
---|
| 7260 | + dedupe_in_progress_warn(send_root); |
---|
| 7261 | + spin_unlock(&send_root->root_item_lock); |
---|
| 7262 | + return -EAGAIN; |
---|
| 7263 | + } |
---|
6805 | 7264 | send_root->send_in_progress++; |
---|
6806 | 7265 | spin_unlock(&send_root->root_item_lock); |
---|
6807 | 7266 | |
---|
.. | .. |
---|
6817 | 7276 | /* |
---|
6818 | 7277 | * Check that we don't overflow at later allocations, we request |
---|
6819 | 7278 | * clone_sources_count + 1 items, and compare to unsigned long inside |
---|
6820 | | - * access_ok. |
---|
| 7279 | + * access_ok. Also set an upper limit for allocation size so this can't |
---|
| 7280 | + * easily exhaust memory. Max number of clone sources is about 200K. |
---|
6821 | 7281 | */ |
---|
6822 | | - if (arg->clone_sources_count > |
---|
6823 | | - ULONG_MAX / sizeof(struct clone_root) - 1) { |
---|
| 7282 | + if (arg->clone_sources_count > SZ_8M / sizeof(struct clone_root)) { |
---|
6824 | 7283 | ret = -EINVAL; |
---|
6825 | | - goto out; |
---|
6826 | | - } |
---|
6827 | | - |
---|
6828 | | - if (!access_ok(VERIFY_READ, arg->clone_sources, |
---|
6829 | | - sizeof(*arg->clone_sources) * |
---|
6830 | | - arg->clone_sources_count)) { |
---|
6831 | | - ret = -EFAULT; |
---|
6832 | 7284 | goto out; |
---|
6833 | 7285 | } |
---|
6834 | 7286 | |
---|
.. | .. |
---|
6875 | 7327 | goto out; |
---|
6876 | 7328 | } |
---|
6877 | 7329 | |
---|
6878 | | - sctx->read_buf = kvmalloc(BTRFS_SEND_READ_SIZE, GFP_KERNEL); |
---|
6879 | | - if (!sctx->read_buf) { |
---|
6880 | | - ret = -ENOMEM; |
---|
6881 | | - goto out; |
---|
6882 | | - } |
---|
6883 | | - |
---|
6884 | 7330 | sctx->pending_dir_moves = RB_ROOT; |
---|
6885 | 7331 | sctx->waiting_dir_moves = RB_ROOT; |
---|
6886 | 7332 | sctx->orphan_dirs = RB_ROOT; |
---|
6887 | 7333 | |
---|
6888 | | - alloc_size = sizeof(struct clone_root) * (arg->clone_sources_count + 1); |
---|
6889 | | - |
---|
6890 | | - sctx->clone_roots = kvzalloc(alloc_size, GFP_KERNEL); |
---|
| 7334 | + sctx->clone_roots = kvcalloc(sizeof(*sctx->clone_roots), |
---|
| 7335 | + arg->clone_sources_count + 1, |
---|
| 7336 | + GFP_KERNEL); |
---|
6891 | 7337 | if (!sctx->clone_roots) { |
---|
6892 | 7338 | ret = -ENOMEM; |
---|
6893 | 7339 | goto out; |
---|
6894 | 7340 | } |
---|
6895 | 7341 | |
---|
6896 | | - alloc_size = arg->clone_sources_count * sizeof(*arg->clone_sources); |
---|
| 7342 | + alloc_size = array_size(sizeof(*arg->clone_sources), |
---|
| 7343 | + arg->clone_sources_count); |
---|
6897 | 7344 | |
---|
6898 | 7345 | if (arg->clone_sources_count) { |
---|
6899 | 7346 | clone_sources_tmp = kvmalloc(alloc_size, GFP_KERNEL); |
---|
.. | .. |
---|
6910 | 7357 | } |
---|
6911 | 7358 | |
---|
6912 | 7359 | for (i = 0; i < arg->clone_sources_count; i++) { |
---|
6913 | | - key.objectid = clone_sources_tmp[i]; |
---|
6914 | | - key.type = BTRFS_ROOT_ITEM_KEY; |
---|
6915 | | - key.offset = (u64)-1; |
---|
6916 | | - |
---|
6917 | | - index = srcu_read_lock(&fs_info->subvol_srcu); |
---|
6918 | | - |
---|
6919 | | - clone_root = btrfs_read_fs_root_no_name(fs_info, &key); |
---|
| 7360 | + clone_root = btrfs_get_fs_root(fs_info, |
---|
| 7361 | + clone_sources_tmp[i], true); |
---|
6920 | 7362 | if (IS_ERR(clone_root)) { |
---|
6921 | | - srcu_read_unlock(&fs_info->subvol_srcu, index); |
---|
6922 | 7363 | ret = PTR_ERR(clone_root); |
---|
6923 | 7364 | goto out; |
---|
6924 | 7365 | } |
---|
.. | .. |
---|
6926 | 7367 | if (!btrfs_root_readonly(clone_root) || |
---|
6927 | 7368 | btrfs_root_dead(clone_root)) { |
---|
6928 | 7369 | spin_unlock(&clone_root->root_item_lock); |
---|
6929 | | - srcu_read_unlock(&fs_info->subvol_srcu, index); |
---|
| 7370 | + btrfs_put_root(clone_root); |
---|
6930 | 7371 | ret = -EPERM; |
---|
| 7372 | + goto out; |
---|
| 7373 | + } |
---|
| 7374 | + if (clone_root->dedupe_in_progress) { |
---|
| 7375 | + dedupe_in_progress_warn(clone_root); |
---|
| 7376 | + spin_unlock(&clone_root->root_item_lock); |
---|
| 7377 | + btrfs_put_root(clone_root); |
---|
| 7378 | + ret = -EAGAIN; |
---|
6931 | 7379 | goto out; |
---|
6932 | 7380 | } |
---|
6933 | 7381 | clone_root->send_in_progress++; |
---|
6934 | 7382 | spin_unlock(&clone_root->root_item_lock); |
---|
6935 | | - srcu_read_unlock(&fs_info->subvol_srcu, index); |
---|
6936 | 7383 | |
---|
6937 | 7384 | sctx->clone_roots[i].root = clone_root; |
---|
6938 | 7385 | clone_sources_to_rollback = i + 1; |
---|
.. | .. |
---|
6942 | 7389 | } |
---|
6943 | 7390 | |
---|
6944 | 7391 | if (arg->parent_root) { |
---|
6945 | | - key.objectid = arg->parent_root; |
---|
6946 | | - key.type = BTRFS_ROOT_ITEM_KEY; |
---|
6947 | | - key.offset = (u64)-1; |
---|
6948 | | - |
---|
6949 | | - index = srcu_read_lock(&fs_info->subvol_srcu); |
---|
6950 | | - |
---|
6951 | | - sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key); |
---|
| 7392 | + sctx->parent_root = btrfs_get_fs_root(fs_info, arg->parent_root, |
---|
| 7393 | + true); |
---|
6952 | 7394 | if (IS_ERR(sctx->parent_root)) { |
---|
6953 | | - srcu_read_unlock(&fs_info->subvol_srcu, index); |
---|
6954 | 7395 | ret = PTR_ERR(sctx->parent_root); |
---|
6955 | 7396 | goto out; |
---|
6956 | 7397 | } |
---|
.. | .. |
---|
6960 | 7401 | if (!btrfs_root_readonly(sctx->parent_root) || |
---|
6961 | 7402 | btrfs_root_dead(sctx->parent_root)) { |
---|
6962 | 7403 | spin_unlock(&sctx->parent_root->root_item_lock); |
---|
6963 | | - srcu_read_unlock(&fs_info->subvol_srcu, index); |
---|
6964 | 7404 | ret = -EPERM; |
---|
6965 | 7405 | goto out; |
---|
6966 | 7406 | } |
---|
| 7407 | + if (sctx->parent_root->dedupe_in_progress) { |
---|
| 7408 | + dedupe_in_progress_warn(sctx->parent_root); |
---|
| 7409 | + spin_unlock(&sctx->parent_root->root_item_lock); |
---|
| 7410 | + ret = -EAGAIN; |
---|
| 7411 | + goto out; |
---|
| 7412 | + } |
---|
6967 | 7413 | spin_unlock(&sctx->parent_root->root_item_lock); |
---|
6968 | | - |
---|
6969 | | - srcu_read_unlock(&fs_info->subvol_srcu, index); |
---|
6970 | 7414 | } |
---|
6971 | 7415 | |
---|
6972 | 7416 | /* |
---|
.. | .. |
---|
6974 | 7418 | * is behind the current send position. This is checked while searching |
---|
6975 | 7419 | * for possible clone sources. |
---|
6976 | 7420 | */ |
---|
6977 | | - sctx->clone_roots[sctx->clone_roots_cnt++].root = sctx->send_root; |
---|
| 7421 | + sctx->clone_roots[sctx->clone_roots_cnt++].root = |
---|
| 7422 | + btrfs_grab_root(sctx->send_root); |
---|
6978 | 7423 | |
---|
6979 | 7424 | /* We do a bsearch later */ |
---|
6980 | 7425 | sort(sctx->clone_roots, sctx->clone_roots_cnt, |
---|
.. | .. |
---|
6990 | 7435 | if (ret) |
---|
6991 | 7436 | goto out; |
---|
6992 | 7437 | |
---|
| 7438 | + mutex_lock(&fs_info->balance_mutex); |
---|
| 7439 | + if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) { |
---|
| 7440 | + mutex_unlock(&fs_info->balance_mutex); |
---|
| 7441 | + btrfs_warn_rl(fs_info, |
---|
| 7442 | + "cannot run send because a balance operation is in progress"); |
---|
| 7443 | + ret = -EAGAIN; |
---|
| 7444 | + goto out; |
---|
| 7445 | + } |
---|
| 7446 | + fs_info->send_in_progress++; |
---|
| 7447 | + mutex_unlock(&fs_info->balance_mutex); |
---|
| 7448 | + |
---|
6993 | 7449 | current->journal_info = BTRFS_SEND_TRANS_STUB; |
---|
6994 | 7450 | ret = send_subvol(sctx); |
---|
6995 | 7451 | current->journal_info = NULL; |
---|
| 7452 | + mutex_lock(&fs_info->balance_mutex); |
---|
| 7453 | + fs_info->send_in_progress--; |
---|
| 7454 | + mutex_unlock(&fs_info->balance_mutex); |
---|
6996 | 7455 | if (ret < 0) |
---|
6997 | 7456 | goto out; |
---|
6998 | 7457 | |
---|
.. | .. |
---|
7045 | 7504 | } |
---|
7046 | 7505 | |
---|
7047 | 7506 | if (sort_clone_roots) { |
---|
7048 | | - for (i = 0; i < sctx->clone_roots_cnt; i++) |
---|
| 7507 | + for (i = 0; i < sctx->clone_roots_cnt; i++) { |
---|
7049 | 7508 | btrfs_root_dec_send_in_progress( |
---|
7050 | 7509 | sctx->clone_roots[i].root); |
---|
| 7510 | + btrfs_put_root(sctx->clone_roots[i].root); |
---|
| 7511 | + } |
---|
7051 | 7512 | } else { |
---|
7052 | | - for (i = 0; sctx && i < clone_sources_to_rollback; i++) |
---|
| 7513 | + for (i = 0; sctx && i < clone_sources_to_rollback; i++) { |
---|
7053 | 7514 | btrfs_root_dec_send_in_progress( |
---|
7054 | 7515 | sctx->clone_roots[i].root); |
---|
| 7516 | + btrfs_put_root(sctx->clone_roots[i].root); |
---|
| 7517 | + } |
---|
7055 | 7518 | |
---|
7056 | 7519 | btrfs_root_dec_send_in_progress(send_root); |
---|
7057 | 7520 | } |
---|
7058 | | - if (sctx && !IS_ERR_OR_NULL(sctx->parent_root)) |
---|
| 7521 | + if (sctx && !IS_ERR_OR_NULL(sctx->parent_root)) { |
---|
7059 | 7522 | btrfs_root_dec_send_in_progress(sctx->parent_root); |
---|
| 7523 | + btrfs_put_root(sctx->parent_root); |
---|
| 7524 | + } |
---|
7060 | 7525 | |
---|
7061 | 7526 | kvfree(clone_sources_tmp); |
---|
7062 | 7527 | |
---|
.. | .. |
---|
7066 | 7531 | |
---|
7067 | 7532 | kvfree(sctx->clone_roots); |
---|
7068 | 7533 | kvfree(sctx->send_buf); |
---|
7069 | | - kvfree(sctx->read_buf); |
---|
7070 | 7534 | |
---|
7071 | 7535 | name_cache_free(sctx); |
---|
7072 | 7536 | |
---|