| .. | .. |
|---|
| 122 | 122 | |
|---|
| 123 | 123 | struct file_ra_state ra; |
|---|
| 124 | 124 | |
|---|
| 125 | | - char *read_buf; |
|---|
| 126 | | - |
|---|
| 127 | 125 | /* |
|---|
| 128 | 126 | * We process inodes by their increasing order, so if before an |
|---|
| 129 | 127 | * incremental send we reverse the parent/child relationship of |
|---|
| .. | .. |
|---|
| 268 | 266 | int need_later_update; |
|---|
| 269 | 267 | int name_len; |
|---|
| 270 | 268 | char name[]; |
|---|
| 269 | +}; |
|---|
| 270 | + |
|---|
| 271 | +#define ADVANCE 1 |
|---|
| 272 | +#define ADVANCE_ONLY_NEXT -1 |
|---|
| 273 | + |
|---|
| 274 | +enum btrfs_compare_tree_result { |
|---|
| 275 | + BTRFS_COMPARE_TREE_NEW, |
|---|
| 276 | + BTRFS_COMPARE_TREE_DELETED, |
|---|
| 277 | + BTRFS_COMPARE_TREE_CHANGED, |
|---|
| 278 | + BTRFS_COMPARE_TREE_SAME, |
|---|
| 271 | 279 | }; |
|---|
| 272 | 280 | |
|---|
| 273 | 281 | __cold |
|---|
| .. | .. |
|---|
| 570 | 578 | return -EOVERFLOW; |
|---|
| 571 | 579 | |
|---|
| 572 | 580 | hdr = (struct btrfs_tlv_header *) (sctx->send_buf + sctx->send_size); |
|---|
| 573 | | - hdr->tlv_type = cpu_to_le16(attr); |
|---|
| 574 | | - hdr->tlv_len = cpu_to_le16(len); |
|---|
| 581 | + put_unaligned_le16(attr, &hdr->tlv_type); |
|---|
| 582 | + put_unaligned_le16(len, &hdr->tlv_len); |
|---|
| 575 | 583 | memcpy(hdr + 1, data, len); |
|---|
| 576 | 584 | sctx->send_size += total_len; |
|---|
| 577 | 585 | |
|---|
| .. | .. |
|---|
| 681 | 689 | |
|---|
| 682 | 690 | sctx->send_size += sizeof(*hdr); |
|---|
| 683 | 691 | hdr = (struct btrfs_cmd_header *)sctx->send_buf; |
|---|
| 684 | | - hdr->cmd = cpu_to_le16(cmd); |
|---|
| 692 | + put_unaligned_le16(cmd, &hdr->cmd); |
|---|
| 685 | 693 | |
|---|
| 686 | 694 | return 0; |
|---|
| 687 | 695 | } |
|---|
| .. | .. |
|---|
| 693 | 701 | u32 crc; |
|---|
| 694 | 702 | |
|---|
| 695 | 703 | hdr = (struct btrfs_cmd_header *)sctx->send_buf; |
|---|
| 696 | | - hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr)); |
|---|
| 697 | | - hdr->crc = 0; |
|---|
| 704 | + put_unaligned_le32(sctx->send_size - sizeof(*hdr), &hdr->len); |
|---|
| 705 | + put_unaligned_le32(0, &hdr->crc); |
|---|
| 698 | 706 | |
|---|
| 699 | | - crc = crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size); |
|---|
| 700 | | - hdr->crc = cpu_to_le32(crc); |
|---|
| 707 | + crc = btrfs_crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size); |
|---|
| 708 | + put_unaligned_le32(crc, &hdr->crc); |
|---|
| 701 | 709 | |
|---|
| 702 | 710 | ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size, |
|---|
| 703 | 711 | &sctx->send_off); |
|---|
| 704 | 712 | |
|---|
| 705 | 713 | sctx->total_send_size += sctx->send_size; |
|---|
| 706 | | - sctx->cmd_send_size[le16_to_cpu(hdr->cmd)] += sctx->send_size; |
|---|
| 714 | + sctx->cmd_send_size[get_unaligned_le16(&hdr->cmd)] += sctx->send_size; |
|---|
| 707 | 715 | sctx->send_size = 0; |
|---|
| 708 | 716 | |
|---|
| 709 | 717 | return ret; |
|---|
| .. | .. |
|---|
| 1170 | 1178 | struct backref_ctx { |
|---|
| 1171 | 1179 | struct send_ctx *sctx; |
|---|
| 1172 | 1180 | |
|---|
| 1173 | | - struct btrfs_path *path; |
|---|
| 1174 | 1181 | /* number of total found references */ |
|---|
| 1175 | 1182 | u64 found; |
|---|
| 1176 | 1183 | |
|---|
| .. | .. |
|---|
| 1196 | 1203 | u64 root = (u64)(uintptr_t)key; |
|---|
| 1197 | 1204 | struct clone_root *cr = (struct clone_root *)elt; |
|---|
| 1198 | 1205 | |
|---|
| 1199 | | - if (root < cr->root->objectid) |
|---|
| 1206 | + if (root < cr->root->root_key.objectid) |
|---|
| 1200 | 1207 | return -1; |
|---|
| 1201 | | - if (root > cr->root->objectid) |
|---|
| 1208 | + if (root > cr->root->root_key.objectid) |
|---|
| 1202 | 1209 | return 1; |
|---|
| 1203 | 1210 | return 0; |
|---|
| 1204 | 1211 | } |
|---|
| .. | .. |
|---|
| 1208 | 1215 | struct clone_root *cr1 = (struct clone_root *)e1; |
|---|
| 1209 | 1216 | struct clone_root *cr2 = (struct clone_root *)e2; |
|---|
| 1210 | 1217 | |
|---|
| 1211 | | - if (cr1->root->objectid < cr2->root->objectid) |
|---|
| 1218 | + if (cr1->root->root_key.objectid < cr2->root->root_key.objectid) |
|---|
| 1212 | 1219 | return -1; |
|---|
| 1213 | | - if (cr1->root->objectid > cr2->root->objectid) |
|---|
| 1220 | + if (cr1->root->root_key.objectid > cr2->root->root_key.objectid) |
|---|
| 1214 | 1221 | return 1; |
|---|
| 1215 | 1222 | return 0; |
|---|
| 1216 | 1223 | } |
|---|
| .. | .. |
|---|
| 1223 | 1230 | { |
|---|
| 1224 | 1231 | struct backref_ctx *bctx = ctx_; |
|---|
| 1225 | 1232 | struct clone_root *found; |
|---|
| 1226 | | - int ret; |
|---|
| 1227 | | - u64 i_size; |
|---|
| 1228 | 1233 | |
|---|
| 1229 | 1234 | /* First check if the root is in the list of accepted clone sources */ |
|---|
| 1230 | 1235 | found = bsearch((void *)(uintptr_t)root, bctx->sctx->clone_roots, |
|---|
| .. | .. |
|---|
| 1241 | 1246 | } |
|---|
| 1242 | 1247 | |
|---|
| 1243 | 1248 | /* |
|---|
| 1244 | | - * There are inodes that have extents that lie behind its i_size. Don't |
|---|
| 1245 | | - * accept clones from these extents. |
|---|
| 1246 | | - */ |
|---|
| 1247 | | - ret = __get_inode_info(found->root, bctx->path, ino, &i_size, NULL, NULL, |
|---|
| 1248 | | - NULL, NULL, NULL); |
|---|
| 1249 | | - btrfs_release_path(bctx->path); |
|---|
| 1250 | | - if (ret < 0) |
|---|
| 1251 | | - return ret; |
|---|
| 1252 | | - |
|---|
| 1253 | | - if (offset + bctx->data_offset + bctx->extent_len > i_size) |
|---|
| 1254 | | - return 0; |
|---|
| 1255 | | - |
|---|
| 1256 | | - /* |
|---|
| 1257 | 1249 | * Make sure we don't consider clones from send_root that are |
|---|
| 1258 | 1250 | * behind the current inode/offset. |
|---|
| 1259 | 1251 | */ |
|---|
| 1260 | 1252 | if (found->root == bctx->sctx->send_root) { |
|---|
| 1261 | 1253 | /* |
|---|
| 1262 | | - * TODO for the moment we don't accept clones from the inode |
|---|
| 1263 | | - * that is currently send. We may change this when |
|---|
| 1264 | | - * BTRFS_IOC_CLONE_RANGE supports cloning from and to the same |
|---|
| 1265 | | - * file. |
|---|
| 1254 | + * If the source inode was not yet processed we can't issue a |
|---|
| 1255 | + * clone operation, as the source extent does not exist yet at |
|---|
| 1256 | + * the destination of the stream. |
|---|
| 1266 | 1257 | */ |
|---|
| 1267 | | - if (ino >= bctx->cur_objectid) |
|---|
| 1258 | + if (ino > bctx->cur_objectid) |
|---|
| 1259 | + return 0; |
|---|
| 1260 | + /* |
|---|
| 1261 | + * We clone from the inode currently being sent as long as the |
|---|
| 1262 | + * source extent is already processed, otherwise we could try |
|---|
| 1263 | + * to clone from an extent that does not exist yet at the |
|---|
| 1264 | + * destination of the stream. |
|---|
| 1265 | + */ |
|---|
| 1266 | + if (ino == bctx->cur_objectid && |
|---|
| 1267 | + offset + bctx->extent_len > |
|---|
| 1268 | + bctx->sctx->cur_inode_next_write_offset) |
|---|
| 1268 | 1269 | return 0; |
|---|
| 1269 | 1270 | } |
|---|
| 1270 | 1271 | |
|---|
| .. | .. |
|---|
| 1329 | 1330 | ret = -ENOMEM; |
|---|
| 1330 | 1331 | goto out; |
|---|
| 1331 | 1332 | } |
|---|
| 1332 | | - |
|---|
| 1333 | | - backref_ctx->path = tmp_path; |
|---|
| 1334 | 1333 | |
|---|
| 1335 | 1334 | if (data_offset >= ino_size) { |
|---|
| 1336 | 1335 | /* |
|---|
| .. | .. |
|---|
| 1718 | 1717 | |
|---|
| 1719 | 1718 | di = btrfs_lookup_dir_item(NULL, root, path, |
|---|
| 1720 | 1719 | dir, name, name_len, 0); |
|---|
| 1721 | | - if (!di) { |
|---|
| 1722 | | - ret = -ENOENT; |
|---|
| 1723 | | - goto out; |
|---|
| 1724 | | - } |
|---|
| 1725 | | - if (IS_ERR(di)) { |
|---|
| 1726 | | - ret = PTR_ERR(di); |
|---|
| 1720 | + if (IS_ERR_OR_NULL(di)) { |
|---|
| 1721 | + ret = di ? PTR_ERR(di) : -ENOENT; |
|---|
| 1727 | 1722 | goto out; |
|---|
| 1728 | 1723 | } |
|---|
| 1729 | 1724 | btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); |
|---|
| .. | .. |
|---|
| 2267 | 2262 | * inodes "orphan" name instead of the real name and stop. Same with new inodes |
|---|
| 2268 | 2263 | * that were not created yet and overwritten inodes/refs. |
|---|
| 2269 | 2264 | * |
|---|
| 2270 | | - * When do we have have orphan inodes: |
|---|
| 2265 | + * When do we have orphan inodes: |
|---|
| 2271 | 2266 | * 1. When an inode is freshly created and thus no valid refs are available yet |
|---|
| 2272 | 2267 | * 2. When a directory lost all it's refs (deleted) but still has dir items |
|---|
| 2273 | 2268 | * inside which were not processed yet (pending for move/delete). If anyone |
|---|
| .. | .. |
|---|
| 2371 | 2366 | return -ENOMEM; |
|---|
| 2372 | 2367 | } |
|---|
| 2373 | 2368 | |
|---|
| 2374 | | - key.objectid = send_root->objectid; |
|---|
| 2369 | + key.objectid = send_root->root_key.objectid; |
|---|
| 2375 | 2370 | key.type = BTRFS_ROOT_BACKREF_KEY; |
|---|
| 2376 | 2371 | key.offset = 0; |
|---|
| 2377 | 2372 | |
|---|
| .. | .. |
|---|
| 2387 | 2382 | leaf = path->nodes[0]; |
|---|
| 2388 | 2383 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); |
|---|
| 2389 | 2384 | if (key.type != BTRFS_ROOT_BACKREF_KEY || |
|---|
| 2390 | | - key.objectid != send_root->objectid) { |
|---|
| 2385 | + key.objectid != send_root->root_key.objectid) { |
|---|
| 2391 | 2386 | ret = -ENOENT; |
|---|
| 2392 | 2387 | goto out; |
|---|
| 2393 | 2388 | } |
|---|
| .. | .. |
|---|
| 3957 | 3952 | goto out; |
|---|
| 3958 | 3953 | } |
|---|
| 3959 | 3954 | |
|---|
| 3955 | + /* |
|---|
| 3956 | + * Before doing any rename and link operations, do a first pass on the |
|---|
| 3957 | + * new references to orphanize any unprocessed inodes that may have a |
|---|
| 3958 | + * reference that conflicts with one of the new references of the current |
|---|
| 3959 | + * inode. This needs to happen first because a new reference may conflict |
|---|
| 3960 | + * with the old reference of a parent directory, so we must make sure |
|---|
| 3961 | + * that the path used for link and rename commands don't use an |
|---|
| 3962 | + * orphanized name when an ancestor was not yet orphanized. |
|---|
| 3963 | + * |
|---|
| 3964 | + * Example: |
|---|
| 3965 | + * |
|---|
| 3966 | + * Parent snapshot: |
|---|
| 3967 | + * |
|---|
| 3968 | + * . (ino 256) |
|---|
| 3969 | + * |----- testdir/ (ino 259) |
|---|
| 3970 | + * | |----- a (ino 257) |
|---|
| 3971 | + * | |
|---|
| 3972 | + * |----- b (ino 258) |
|---|
| 3973 | + * |
|---|
| 3974 | + * Send snapshot: |
|---|
| 3975 | + * |
|---|
| 3976 | + * . (ino 256) |
|---|
| 3977 | + * |----- testdir_2/ (ino 259) |
|---|
| 3978 | + * | |----- a (ino 260) |
|---|
| 3979 | + * | |
|---|
| 3980 | + * |----- testdir (ino 257) |
|---|
| 3981 | + * |----- b (ino 257) |
|---|
| 3982 | + * |----- b2 (ino 258) |
|---|
| 3983 | + * |
|---|
| 3984 | + * Processing the new reference for inode 257 with name "b" may happen |
|---|
| 3985 | + * before processing the new reference with name "testdir". If so, we |
|---|
| 3986 | + * must make sure that by the time we send a link command to create the |
|---|
| 3987 | + * hard link "b", inode 259 was already orphanized, since the generated |
|---|
| 3988 | + * path in "valid_path" already contains the orphanized name for 259. |
|---|
| 3989 | + * We are processing inode 257, so only later when processing 259 we do |
|---|
| 3990 | + * the rename operation to change its temporary (orphanized) name to |
|---|
| 3991 | + * "testdir_2". |
|---|
| 3992 | + */ |
|---|
| 3960 | 3993 | list_for_each_entry(cur, &sctx->new_refs, list) { |
|---|
| 3961 | | - /* |
|---|
| 3962 | | - * We may have refs where the parent directory does not exist |
|---|
| 3963 | | - * yet. This happens if the parent directories inum is higher |
|---|
| 3964 | | - * the the current inum. To handle this case, we create the |
|---|
| 3965 | | - * parent directory out of order. But we need to check if this |
|---|
| 3966 | | - * did already happen before due to other refs in the same dir. |
|---|
| 3967 | | - */ |
|---|
| 3968 | 3994 | ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen); |
|---|
| 3969 | 3995 | if (ret < 0) |
|---|
| 3970 | 3996 | goto out; |
|---|
| 3971 | | - if (ret == inode_state_will_create) { |
|---|
| 3972 | | - ret = 0; |
|---|
| 3973 | | - /* |
|---|
| 3974 | | - * First check if any of the current inodes refs did |
|---|
| 3975 | | - * already create the dir. |
|---|
| 3976 | | - */ |
|---|
| 3977 | | - list_for_each_entry(cur2, &sctx->new_refs, list) { |
|---|
| 3978 | | - if (cur == cur2) |
|---|
| 3979 | | - break; |
|---|
| 3980 | | - if (cur2->dir == cur->dir) { |
|---|
| 3981 | | - ret = 1; |
|---|
| 3982 | | - break; |
|---|
| 3983 | | - } |
|---|
| 3984 | | - } |
|---|
| 3985 | | - |
|---|
| 3986 | | - /* |
|---|
| 3987 | | - * If that did not happen, check if a previous inode |
|---|
| 3988 | | - * did already create the dir. |
|---|
| 3989 | | - */ |
|---|
| 3990 | | - if (!ret) |
|---|
| 3991 | | - ret = did_create_dir(sctx, cur->dir); |
|---|
| 3992 | | - if (ret < 0) |
|---|
| 3993 | | - goto out; |
|---|
| 3994 | | - if (!ret) { |
|---|
| 3995 | | - ret = send_create_inode(sctx, cur->dir); |
|---|
| 3996 | | - if (ret < 0) |
|---|
| 3997 | | - goto out; |
|---|
| 3998 | | - } |
|---|
| 3999 | | - } |
|---|
| 3997 | + if (ret == inode_state_will_create) |
|---|
| 3998 | + continue; |
|---|
| 4000 | 3999 | |
|---|
| 4001 | 4000 | /* |
|---|
| 4002 | | - * Check if this new ref would overwrite the first ref of |
|---|
| 4003 | | - * another unprocessed inode. If yes, orphanize the |
|---|
| 4004 | | - * overwritten inode. If we find an overwritten ref that is |
|---|
| 4005 | | - * not the first ref, simply unlink it. |
|---|
| 4001 | + * Check if this new ref would overwrite the first ref of another |
|---|
| 4002 | + * unprocessed inode. If yes, orphanize the overwritten inode. |
|---|
| 4003 | + * If we find an overwritten ref that is not the first ref, |
|---|
| 4004 | + * simply unlink it. |
|---|
| 4006 | 4005 | */ |
|---|
| 4007 | 4006 | ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen, |
|---|
| 4008 | 4007 | cur->name, cur->name_len, |
|---|
| .. | .. |
|---|
| 4093 | 4092 | goto out; |
|---|
| 4094 | 4093 | } |
|---|
| 4095 | 4094 | ret = send_unlink(sctx, cur->full_path); |
|---|
| 4095 | + if (ret < 0) |
|---|
| 4096 | + goto out; |
|---|
| 4097 | + } |
|---|
| 4098 | + } |
|---|
| 4099 | + |
|---|
| 4100 | + } |
|---|
| 4101 | + |
|---|
| 4102 | + list_for_each_entry(cur, &sctx->new_refs, list) { |
|---|
| 4103 | + /* |
|---|
| 4104 | + * We may have refs where the parent directory does not exist |
|---|
| 4105 | + * yet. This happens if the parent directories inum is higher |
|---|
| 4106 | + * than the current inum. To handle this case, we create the |
|---|
| 4107 | + * parent directory out of order. But we need to check if this |
|---|
| 4108 | + * did already happen before due to other refs in the same dir. |
|---|
| 4109 | + */ |
|---|
| 4110 | + ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen); |
|---|
| 4111 | + if (ret < 0) |
|---|
| 4112 | + goto out; |
|---|
| 4113 | + if (ret == inode_state_will_create) { |
|---|
| 4114 | + ret = 0; |
|---|
| 4115 | + /* |
|---|
| 4116 | + * First check if any of the current inodes refs did |
|---|
| 4117 | + * already create the dir. |
|---|
| 4118 | + */ |
|---|
| 4119 | + list_for_each_entry(cur2, &sctx->new_refs, list) { |
|---|
| 4120 | + if (cur == cur2) |
|---|
| 4121 | + break; |
|---|
| 4122 | + if (cur2->dir == cur->dir) { |
|---|
| 4123 | + ret = 1; |
|---|
| 4124 | + break; |
|---|
| 4125 | + } |
|---|
| 4126 | + } |
|---|
| 4127 | + |
|---|
| 4128 | + /* |
|---|
| 4129 | + * If that did not happen, check if a previous inode |
|---|
| 4130 | + * did already create the dir. |
|---|
| 4131 | + */ |
|---|
| 4132 | + if (!ret) |
|---|
| 4133 | + ret = did_create_dir(sctx, cur->dir); |
|---|
| 4134 | + if (ret < 0) |
|---|
| 4135 | + goto out; |
|---|
| 4136 | + if (!ret) { |
|---|
| 4137 | + ret = send_create_inode(sctx, cur->dir); |
|---|
| 4096 | 4138 | if (ret < 0) |
|---|
| 4097 | 4139 | goto out; |
|---|
| 4098 | 4140 | } |
|---|
| .. | .. |
|---|
| 4893 | 4935 | return ret; |
|---|
| 4894 | 4936 | } |
|---|
| 4895 | 4937 | |
|---|
| 4896 | | -static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) |
|---|
| 4938 | +static inline u64 max_send_read_size(const struct send_ctx *sctx) |
|---|
| 4939 | +{ |
|---|
| 4940 | + return sctx->send_max_size - SZ_16K; |
|---|
| 4941 | +} |
|---|
| 4942 | + |
|---|
| 4943 | +static int put_data_header(struct send_ctx *sctx, u32 len) |
|---|
| 4944 | +{ |
|---|
| 4945 | + struct btrfs_tlv_header *hdr; |
|---|
| 4946 | + |
|---|
| 4947 | + if (sctx->send_max_size - sctx->send_size < sizeof(*hdr) + len) |
|---|
| 4948 | + return -EOVERFLOW; |
|---|
| 4949 | + hdr = (struct btrfs_tlv_header *)(sctx->send_buf + sctx->send_size); |
|---|
| 4950 | + put_unaligned_le16(BTRFS_SEND_A_DATA, &hdr->tlv_type); |
|---|
| 4951 | + put_unaligned_le16(len, &hdr->tlv_len); |
|---|
| 4952 | + sctx->send_size += sizeof(*hdr); |
|---|
| 4953 | + return 0; |
|---|
| 4954 | +} |
|---|
| 4955 | + |
|---|
| 4956 | +static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len) |
|---|
| 4897 | 4957 | { |
|---|
| 4898 | 4958 | struct btrfs_root *root = sctx->send_root; |
|---|
| 4899 | 4959 | struct btrfs_fs_info *fs_info = root->fs_info; |
|---|
| 4900 | 4960 | struct inode *inode; |
|---|
| 4901 | 4961 | struct page *page; |
|---|
| 4902 | 4962 | char *addr; |
|---|
| 4903 | | - struct btrfs_key key; |
|---|
| 4904 | 4963 | pgoff_t index = offset >> PAGE_SHIFT; |
|---|
| 4905 | 4964 | pgoff_t last_index; |
|---|
| 4906 | | - unsigned pg_offset = offset & ~PAGE_MASK; |
|---|
| 4907 | | - ssize_t ret = 0; |
|---|
| 4965 | + unsigned pg_offset = offset_in_page(offset); |
|---|
| 4966 | + int ret; |
|---|
| 4908 | 4967 | |
|---|
| 4909 | | - key.objectid = sctx->cur_ino; |
|---|
| 4910 | | - key.type = BTRFS_INODE_ITEM_KEY; |
|---|
| 4911 | | - key.offset = 0; |
|---|
| 4968 | + ret = put_data_header(sctx, len); |
|---|
| 4969 | + if (ret) |
|---|
| 4970 | + return ret; |
|---|
| 4912 | 4971 | |
|---|
| 4913 | | - inode = btrfs_iget(fs_info->sb, &key, root, NULL); |
|---|
| 4972 | + inode = btrfs_iget(fs_info->sb, sctx->cur_ino, root); |
|---|
| 4914 | 4973 | if (IS_ERR(inode)) |
|---|
| 4915 | 4974 | return PTR_ERR(inode); |
|---|
| 4916 | | - |
|---|
| 4917 | | - if (offset + len > i_size_read(inode)) { |
|---|
| 4918 | | - if (offset > i_size_read(inode)) |
|---|
| 4919 | | - len = 0; |
|---|
| 4920 | | - else |
|---|
| 4921 | | - len = offset - i_size_read(inode); |
|---|
| 4922 | | - } |
|---|
| 4923 | | - if (len == 0) |
|---|
| 4924 | | - goto out; |
|---|
| 4925 | 4975 | |
|---|
| 4926 | 4976 | last_index = (offset + len - 1) >> PAGE_SHIFT; |
|---|
| 4927 | 4977 | |
|---|
| .. | .. |
|---|
| 4967 | 5017 | } |
|---|
| 4968 | 5018 | |
|---|
| 4969 | 5019 | addr = kmap(page); |
|---|
| 4970 | | - memcpy(sctx->read_buf + ret, addr + pg_offset, cur_len); |
|---|
| 5020 | + memcpy(sctx->send_buf + sctx->send_size, addr + pg_offset, |
|---|
| 5021 | + cur_len); |
|---|
| 4971 | 5022 | kunmap(page); |
|---|
| 4972 | 5023 | unlock_page(page); |
|---|
| 4973 | 5024 | put_page(page); |
|---|
| 4974 | 5025 | index++; |
|---|
| 4975 | 5026 | pg_offset = 0; |
|---|
| 4976 | 5027 | len -= cur_len; |
|---|
| 4977 | | - ret += cur_len; |
|---|
| 5028 | + sctx->send_size += cur_len; |
|---|
| 4978 | 5029 | } |
|---|
| 4979 | | -out: |
|---|
| 4980 | 5030 | iput(inode); |
|---|
| 4981 | 5031 | return ret; |
|---|
| 4982 | 5032 | } |
|---|
| .. | .. |
|---|
| 4990 | 5040 | struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; |
|---|
| 4991 | 5041 | int ret = 0; |
|---|
| 4992 | 5042 | struct fs_path *p; |
|---|
| 4993 | | - ssize_t num_read = 0; |
|---|
| 4994 | 5043 | |
|---|
| 4995 | 5044 | p = fs_path_alloc(); |
|---|
| 4996 | 5045 | if (!p) |
|---|
| 4997 | 5046 | return -ENOMEM; |
|---|
| 4998 | 5047 | |
|---|
| 4999 | 5048 | btrfs_debug(fs_info, "send_write offset=%llu, len=%d", offset, len); |
|---|
| 5000 | | - |
|---|
| 5001 | | - num_read = fill_read_buf(sctx, offset, len); |
|---|
| 5002 | | - if (num_read <= 0) { |
|---|
| 5003 | | - if (num_read < 0) |
|---|
| 5004 | | - ret = num_read; |
|---|
| 5005 | | - goto out; |
|---|
| 5006 | | - } |
|---|
| 5007 | 5049 | |
|---|
| 5008 | 5050 | ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); |
|---|
| 5009 | 5051 | if (ret < 0) |
|---|
| .. | .. |
|---|
| 5015 | 5057 | |
|---|
| 5016 | 5058 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); |
|---|
| 5017 | 5059 | TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); |
|---|
| 5018 | | - TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, num_read); |
|---|
| 5060 | + ret = put_file_data(sctx, offset, len); |
|---|
| 5061 | + if (ret < 0) |
|---|
| 5062 | + goto out; |
|---|
| 5019 | 5063 | |
|---|
| 5020 | 5064 | ret = send_cmd(sctx); |
|---|
| 5021 | 5065 | |
|---|
| 5022 | 5066 | tlv_put_failure: |
|---|
| 5023 | 5067 | out: |
|---|
| 5024 | 5068 | fs_path_free(p); |
|---|
| 5025 | | - if (ret < 0) |
|---|
| 5026 | | - return ret; |
|---|
| 5027 | | - return num_read; |
|---|
| 5069 | + return ret; |
|---|
| 5028 | 5070 | } |
|---|
| 5029 | 5071 | |
|---|
| 5030 | 5072 | /* |
|---|
| .. | .. |
|---|
| 5040 | 5082 | |
|---|
| 5041 | 5083 | btrfs_debug(sctx->send_root->fs_info, |
|---|
| 5042 | 5084 | "send_clone offset=%llu, len=%d, clone_root=%llu, clone_inode=%llu, clone_offset=%llu", |
|---|
| 5043 | | - offset, len, clone_root->root->objectid, clone_root->ino, |
|---|
| 5044 | | - clone_root->offset); |
|---|
| 5085 | + offset, len, clone_root->root->root_key.objectid, |
|---|
| 5086 | + clone_root->ino, clone_root->offset); |
|---|
| 5045 | 5087 | |
|---|
| 5046 | 5088 | p = fs_path_alloc(); |
|---|
| 5047 | 5089 | if (!p) |
|---|
| .. | .. |
|---|
| 5136 | 5178 | static int send_hole(struct send_ctx *sctx, u64 end) |
|---|
| 5137 | 5179 | { |
|---|
| 5138 | 5180 | struct fs_path *p = NULL; |
|---|
| 5181 | + u64 read_size = max_send_read_size(sctx); |
|---|
| 5139 | 5182 | u64 offset = sctx->cur_inode_last_extent; |
|---|
| 5140 | | - u64 len; |
|---|
| 5141 | 5183 | int ret = 0; |
|---|
| 5142 | 5184 | |
|---|
| 5143 | 5185 | /* |
|---|
| .. | .. |
|---|
| 5164 | 5206 | ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); |
|---|
| 5165 | 5207 | if (ret < 0) |
|---|
| 5166 | 5208 | goto tlv_put_failure; |
|---|
| 5167 | | - memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE); |
|---|
| 5168 | 5209 | while (offset < end) { |
|---|
| 5169 | | - len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE); |
|---|
| 5210 | + u64 len = min(end - offset, read_size); |
|---|
| 5170 | 5211 | |
|---|
| 5171 | 5212 | ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); |
|---|
| 5172 | 5213 | if (ret < 0) |
|---|
| 5173 | 5214 | break; |
|---|
| 5174 | 5215 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); |
|---|
| 5175 | 5216 | TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); |
|---|
| 5176 | | - TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len); |
|---|
| 5217 | + ret = put_data_header(sctx, len); |
|---|
| 5218 | + if (ret < 0) |
|---|
| 5219 | + break; |
|---|
| 5220 | + memset(sctx->send_buf + sctx->send_size, 0, len); |
|---|
| 5221 | + sctx->send_size += len; |
|---|
| 5177 | 5222 | ret = send_cmd(sctx); |
|---|
| 5178 | 5223 | if (ret < 0) |
|---|
| 5179 | 5224 | break; |
|---|
| .. | .. |
|---|
| 5189 | 5234 | const u64 offset, |
|---|
| 5190 | 5235 | const u64 len) |
|---|
| 5191 | 5236 | { |
|---|
| 5237 | + u64 read_size = max_send_read_size(sctx); |
|---|
| 5192 | 5238 | u64 sent = 0; |
|---|
| 5193 | 5239 | |
|---|
| 5194 | 5240 | if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) |
|---|
| 5195 | 5241 | return send_update_extent(sctx, offset, len); |
|---|
| 5196 | 5242 | |
|---|
| 5197 | 5243 | while (sent < len) { |
|---|
| 5198 | | - u64 size = len - sent; |
|---|
| 5244 | + u64 size = min(len - sent, read_size); |
|---|
| 5199 | 5245 | int ret; |
|---|
| 5200 | 5246 | |
|---|
| 5201 | | - if (size > BTRFS_SEND_READ_SIZE) |
|---|
| 5202 | | - size = BTRFS_SEND_READ_SIZE; |
|---|
| 5203 | 5247 | ret = send_write(sctx, offset + sent, size); |
|---|
| 5204 | 5248 | if (ret < 0) |
|---|
| 5205 | 5249 | return ret; |
|---|
| 5206 | | - if (!ret) |
|---|
| 5207 | | - break; |
|---|
| 5208 | | - sent += ret; |
|---|
| 5250 | + sent += size; |
|---|
| 5209 | 5251 | } |
|---|
| 5210 | 5252 | return 0; |
|---|
| 5211 | 5253 | } |
|---|
| .. | .. |
|---|
| 5278 | 5320 | struct btrfs_path *path; |
|---|
| 5279 | 5321 | struct btrfs_key key; |
|---|
| 5280 | 5322 | int ret; |
|---|
| 5323 | + u64 clone_src_i_size = 0; |
|---|
| 5281 | 5324 | |
|---|
| 5282 | 5325 | /* |
|---|
| 5283 | 5326 | * Prevent cloning from a zero offset with a length matching the sector |
|---|
| .. | .. |
|---|
| 5301 | 5344 | path = alloc_path_for_send(); |
|---|
| 5302 | 5345 | if (!path) |
|---|
| 5303 | 5346 | return -ENOMEM; |
|---|
| 5347 | + |
|---|
| 5348 | + /* |
|---|
| 5349 | + * There are inodes that have extents that lie behind its i_size. Don't |
|---|
| 5350 | + * accept clones from these extents. |
|---|
| 5351 | + */ |
|---|
| 5352 | + ret = __get_inode_info(clone_root->root, path, clone_root->ino, |
|---|
| 5353 | + &clone_src_i_size, NULL, NULL, NULL, NULL, NULL); |
|---|
| 5354 | + btrfs_release_path(path); |
|---|
| 5355 | + if (ret < 0) |
|---|
| 5356 | + goto out; |
|---|
| 5304 | 5357 | |
|---|
| 5305 | 5358 | /* |
|---|
| 5306 | 5359 | * We can't send a clone operation for the entire range if we find |
|---|
| .. | .. |
|---|
| 5344 | 5397 | u8 type; |
|---|
| 5345 | 5398 | u64 ext_len; |
|---|
| 5346 | 5399 | u64 clone_len; |
|---|
| 5400 | + u64 clone_data_offset; |
|---|
| 5401 | + bool crossed_src_i_size = false; |
|---|
| 5347 | 5402 | |
|---|
| 5348 | 5403 | if (slot >= btrfs_header_nritems(leaf)) { |
|---|
| 5349 | 5404 | ret = btrfs_next_leaf(clone_root->root, path); |
|---|
| .. | .. |
|---|
| 5397 | 5452 | if (key.offset >= clone_root->offset + len) |
|---|
| 5398 | 5453 | break; |
|---|
| 5399 | 5454 | |
|---|
| 5455 | + if (key.offset >= clone_src_i_size) |
|---|
| 5456 | + break; |
|---|
| 5457 | + |
|---|
| 5458 | + if (key.offset + ext_len > clone_src_i_size) { |
|---|
| 5459 | + ext_len = clone_src_i_size - key.offset; |
|---|
| 5460 | + crossed_src_i_size = true; |
|---|
| 5461 | + } |
|---|
| 5462 | + |
|---|
| 5463 | + clone_data_offset = btrfs_file_extent_offset(leaf, ei); |
|---|
| 5464 | + if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte) { |
|---|
| 5465 | + clone_root->offset = key.offset; |
|---|
| 5466 | + if (clone_data_offset < data_offset && |
|---|
| 5467 | + clone_data_offset + ext_len > data_offset) { |
|---|
| 5468 | + u64 extent_offset; |
|---|
| 5469 | + |
|---|
| 5470 | + extent_offset = data_offset - clone_data_offset; |
|---|
| 5471 | + ext_len -= extent_offset; |
|---|
| 5472 | + clone_data_offset += extent_offset; |
|---|
| 5473 | + clone_root->offset += extent_offset; |
|---|
| 5474 | + } |
|---|
| 5475 | + } |
|---|
| 5476 | + |
|---|
| 5400 | 5477 | clone_len = min_t(u64, ext_len, len); |
|---|
| 5401 | 5478 | |
|---|
| 5402 | 5479 | if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte && |
|---|
| 5403 | | - btrfs_file_extent_offset(leaf, ei) == data_offset) |
|---|
| 5404 | | - ret = send_clone(sctx, offset, clone_len, clone_root); |
|---|
| 5405 | | - else |
|---|
| 5480 | + clone_data_offset == data_offset) { |
|---|
| 5481 | + const u64 src_end = clone_root->offset + clone_len; |
|---|
| 5482 | + const u64 sectorsize = SZ_64K; |
|---|
| 5483 | + |
|---|
| 5484 | + /* |
|---|
| 5485 | + * We can't clone the last block, when its size is not |
|---|
| 5486 | + * sector size aligned, into the middle of a file. If we |
|---|
| 5487 | + * do so, the receiver will get a failure (-EINVAL) when |
|---|
| 5488 | + * trying to clone or will silently corrupt the data in |
|---|
| 5489 | + * the destination file if it's on a kernel without the |
|---|
| 5490 | + * fix introduced by commit ac765f83f1397646 |
|---|
| 5491 | + * ("Btrfs: fix data corruption due to cloning of eof |
|---|
| 5492 | + * block). |
|---|
| 5493 | + * |
|---|
| 5494 | + * So issue a clone of the aligned down range plus a |
|---|
| 5495 | + * regular write for the eof block, if we hit that case. |
|---|
| 5496 | + * |
|---|
| 5497 | + * Also, we use the maximum possible sector size, 64K, |
|---|
| 5498 | + * because we don't know what's the sector size of the |
|---|
| 5499 | + * filesystem that receives the stream, so we have to |
|---|
| 5500 | + * assume the largest possible sector size. |
|---|
| 5501 | + */ |
|---|
| 5502 | + if (src_end == clone_src_i_size && |
|---|
| 5503 | + !IS_ALIGNED(src_end, sectorsize) && |
|---|
| 5504 | + offset + clone_len < sctx->cur_inode_size) { |
|---|
| 5505 | + u64 slen; |
|---|
| 5506 | + |
|---|
| 5507 | + slen = ALIGN_DOWN(src_end - clone_root->offset, |
|---|
| 5508 | + sectorsize); |
|---|
| 5509 | + if (slen > 0) { |
|---|
| 5510 | + ret = send_clone(sctx, offset, slen, |
|---|
| 5511 | + clone_root); |
|---|
| 5512 | + if (ret < 0) |
|---|
| 5513 | + goto out; |
|---|
| 5514 | + } |
|---|
| 5515 | + ret = send_extent_data(sctx, offset + slen, |
|---|
| 5516 | + clone_len - slen); |
|---|
| 5517 | + } else { |
|---|
| 5518 | + ret = send_clone(sctx, offset, clone_len, |
|---|
| 5519 | + clone_root); |
|---|
| 5520 | + } |
|---|
| 5521 | + } else if (crossed_src_i_size && clone_len < len) { |
|---|
| 5522 | + /* |
|---|
| 5523 | + * If we are at i_size of the clone source inode and we |
|---|
| 5524 | + * can not clone from it, terminate the loop. This is |
|---|
| 5525 | + * to avoid sending two write operations, one with a |
|---|
| 5526 | + * length matching clone_len and the final one after |
|---|
| 5527 | + * this loop with a length of len - clone_len. |
|---|
| 5528 | + * |
|---|
| 5529 | + * When using encoded writes (BTRFS_SEND_FLAG_COMPRESSED |
|---|
| 5530 | + * was passed to the send ioctl), this helps avoid |
|---|
| 5531 | + * sending an encoded write for an offset that is not |
|---|
| 5532 | + * sector size aligned, in case the i_size of the source |
|---|
| 5533 | + * inode is not sector size aligned. That will make the |
|---|
| 5534 | + * receiver fallback to decompression of the data and |
|---|
| 5535 | + * writing it using regular buffered IO, therefore while |
|---|
| 5536 | + * not incorrect, it's not optimal due decompression and |
|---|
| 5537 | + * possible re-compression at the receiver. |
|---|
| 5538 | + */ |
|---|
| 5539 | + break; |
|---|
| 5540 | + } else { |
|---|
| 5406 | 5541 | ret = send_extent_data(sctx, offset, clone_len); |
|---|
| 5542 | + } |
|---|
| 5407 | 5543 | |
|---|
| 5408 | 5544 | if (ret < 0) |
|---|
| 5409 | 5545 | goto out; |
|---|
| .. | .. |
|---|
| 5413 | 5549 | break; |
|---|
| 5414 | 5550 | offset += clone_len; |
|---|
| 5415 | 5551 | clone_root->offset += clone_len; |
|---|
| 5552 | + |
|---|
| 5553 | + /* |
|---|
| 5554 | + * If we are cloning from the file we are currently processing, |
|---|
| 5555 | + * and using the send root as the clone root, we must stop once |
|---|
| 5556 | + * the current clone offset reaches the current eof of the file |
|---|
| 5557 | + * at the receiver, otherwise we would issue an invalid clone |
|---|
| 5558 | + * operation (source range going beyond eof) and cause the |
|---|
| 5559 | + * receiver to fail. So if we reach the current eof, bail out |
|---|
| 5560 | + * and fallback to a regular write. |
|---|
| 5561 | + */ |
|---|
| 5562 | + if (clone_root->root == sctx->send_root && |
|---|
| 5563 | + clone_root->ino == sctx->cur_ino && |
|---|
| 5564 | + clone_root->offset >= sctx->cur_inode_next_write_offset) |
|---|
| 5565 | + break; |
|---|
| 5566 | + |
|---|
| 5416 | 5567 | data_offset += clone_len; |
|---|
| 5417 | 5568 | next: |
|---|
| 5418 | 5569 | path->slots[0]++; |
|---|
| .. | .. |
|---|
| 5433 | 5584 | struct clone_root *clone_root) |
|---|
| 5434 | 5585 | { |
|---|
| 5435 | 5586 | int ret = 0; |
|---|
| 5436 | | - struct btrfs_file_extent_item *ei; |
|---|
| 5437 | 5587 | u64 offset = key->offset; |
|---|
| 5438 | | - u64 len; |
|---|
| 5439 | | - u8 type; |
|---|
| 5588 | + u64 end; |
|---|
| 5440 | 5589 | u64 bs = sctx->send_root->fs_info->sb->s_blocksize; |
|---|
| 5441 | 5590 | |
|---|
| 5442 | | - ei = btrfs_item_ptr(path->nodes[0], path->slots[0], |
|---|
| 5443 | | - struct btrfs_file_extent_item); |
|---|
| 5444 | | - type = btrfs_file_extent_type(path->nodes[0], ei); |
|---|
| 5445 | | - if (type == BTRFS_FILE_EXTENT_INLINE) { |
|---|
| 5446 | | - len = btrfs_file_extent_ram_bytes(path->nodes[0], ei); |
|---|
| 5447 | | - /* |
|---|
| 5448 | | - * it is possible the inline item won't cover the whole page, |
|---|
| 5449 | | - * but there may be items after this page. Make |
|---|
| 5450 | | - * sure to send the whole thing |
|---|
| 5451 | | - */ |
|---|
| 5452 | | - len = PAGE_ALIGN(len); |
|---|
| 5453 | | - } else { |
|---|
| 5454 | | - len = btrfs_file_extent_num_bytes(path->nodes[0], ei); |
|---|
| 5455 | | - } |
|---|
| 5591 | + end = min_t(u64, btrfs_file_extent_end(path), sctx->cur_inode_size); |
|---|
| 5592 | + if (offset >= end) |
|---|
| 5593 | + return 0; |
|---|
| 5456 | 5594 | |
|---|
| 5457 | | - if (offset >= sctx->cur_inode_size) { |
|---|
| 5458 | | - ret = 0; |
|---|
| 5459 | | - goto out; |
|---|
| 5460 | | - } |
|---|
| 5461 | | - if (offset + len > sctx->cur_inode_size) |
|---|
| 5462 | | - len = sctx->cur_inode_size - offset; |
|---|
| 5463 | | - if (len == 0) { |
|---|
| 5464 | | - ret = 0; |
|---|
| 5465 | | - goto out; |
|---|
| 5466 | | - } |
|---|
| 5467 | | - |
|---|
| 5468 | | - if (clone_root && IS_ALIGNED(offset + len, bs)) { |
|---|
| 5595 | + if (clone_root && IS_ALIGNED(end, bs)) { |
|---|
| 5596 | + struct btrfs_file_extent_item *ei; |
|---|
| 5469 | 5597 | u64 disk_byte; |
|---|
| 5470 | 5598 | u64 data_offset; |
|---|
| 5471 | 5599 | |
|---|
| 5600 | + ei = btrfs_item_ptr(path->nodes[0], path->slots[0], |
|---|
| 5601 | + struct btrfs_file_extent_item); |
|---|
| 5472 | 5602 | disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei); |
|---|
| 5473 | 5603 | data_offset = btrfs_file_extent_offset(path->nodes[0], ei); |
|---|
| 5474 | 5604 | ret = clone_range(sctx, clone_root, disk_byte, data_offset, |
|---|
| 5475 | | - offset, len); |
|---|
| 5605 | + offset, end - offset); |
|---|
| 5476 | 5606 | } else { |
|---|
| 5477 | | - ret = send_extent_data(sctx, offset, len); |
|---|
| 5607 | + ret = send_extent_data(sctx, offset, end - offset); |
|---|
| 5478 | 5608 | } |
|---|
| 5479 | | - sctx->cur_inode_next_write_offset = offset + len; |
|---|
| 5480 | | -out: |
|---|
| 5609 | + sctx->cur_inode_next_write_offset = end; |
|---|
| 5481 | 5610 | return ret; |
|---|
| 5482 | 5611 | } |
|---|
| 5483 | 5612 | |
|---|
| .. | .. |
|---|
| 5675 | 5804 | { |
|---|
| 5676 | 5805 | struct btrfs_path *path; |
|---|
| 5677 | 5806 | struct btrfs_root *root = sctx->send_root; |
|---|
| 5678 | | - struct btrfs_file_extent_item *fi; |
|---|
| 5679 | 5807 | struct btrfs_key key; |
|---|
| 5680 | | - u64 extent_end; |
|---|
| 5681 | | - u8 type; |
|---|
| 5682 | 5808 | int ret; |
|---|
| 5683 | 5809 | |
|---|
| 5684 | 5810 | path = alloc_path_for_send(); |
|---|
| .. | .. |
|---|
| 5698 | 5824 | if (key.objectid != sctx->cur_ino || key.type != BTRFS_EXTENT_DATA_KEY) |
|---|
| 5699 | 5825 | goto out; |
|---|
| 5700 | 5826 | |
|---|
| 5701 | | - fi = btrfs_item_ptr(path->nodes[0], path->slots[0], |
|---|
| 5702 | | - struct btrfs_file_extent_item); |
|---|
| 5703 | | - type = btrfs_file_extent_type(path->nodes[0], fi); |
|---|
| 5704 | | - if (type == BTRFS_FILE_EXTENT_INLINE) { |
|---|
| 5705 | | - u64 size = btrfs_file_extent_ram_bytes(path->nodes[0], fi); |
|---|
| 5706 | | - extent_end = ALIGN(key.offset + size, |
|---|
| 5707 | | - sctx->send_root->fs_info->sectorsize); |
|---|
| 5708 | | - } else { |
|---|
| 5709 | | - extent_end = key.offset + |
|---|
| 5710 | | - btrfs_file_extent_num_bytes(path->nodes[0], fi); |
|---|
| 5711 | | - } |
|---|
| 5712 | | - sctx->cur_inode_last_extent = extent_end; |
|---|
| 5827 | + sctx->cur_inode_last_extent = btrfs_file_extent_end(path); |
|---|
| 5713 | 5828 | out: |
|---|
| 5714 | 5829 | btrfs_free_path(path); |
|---|
| 5715 | 5830 | return ret; |
|---|
| .. | .. |
|---|
| 5763 | 5878 | break; |
|---|
| 5764 | 5879 | |
|---|
| 5765 | 5880 | fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); |
|---|
| 5766 | | - if (btrfs_file_extent_type(leaf, fi) == |
|---|
| 5767 | | - BTRFS_FILE_EXTENT_INLINE) { |
|---|
| 5768 | | - u64 size = btrfs_file_extent_ram_bytes(leaf, fi); |
|---|
| 5769 | | - |
|---|
| 5770 | | - extent_end = ALIGN(key.offset + size, |
|---|
| 5771 | | - root->fs_info->sectorsize); |
|---|
| 5772 | | - } else { |
|---|
| 5773 | | - extent_end = key.offset + |
|---|
| 5774 | | - btrfs_file_extent_num_bytes(leaf, fi); |
|---|
| 5775 | | - } |
|---|
| 5881 | + extent_end = btrfs_file_extent_end(path); |
|---|
| 5776 | 5882 | if (extent_end <= start) |
|---|
| 5777 | 5883 | goto next; |
|---|
| 5778 | 5884 | if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) { |
|---|
| .. | .. |
|---|
| 5793 | 5899 | static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path, |
|---|
| 5794 | 5900 | struct btrfs_key *key) |
|---|
| 5795 | 5901 | { |
|---|
| 5796 | | - struct btrfs_file_extent_item *fi; |
|---|
| 5797 | | - u64 extent_end; |
|---|
| 5798 | | - u8 type; |
|---|
| 5799 | 5902 | int ret = 0; |
|---|
| 5800 | 5903 | |
|---|
| 5801 | 5904 | if (sctx->cur_ino != key->objectid || !need_send_hole(sctx)) |
|---|
| .. | .. |
|---|
| 5805 | 5908 | ret = get_last_extent(sctx, key->offset - 1); |
|---|
| 5806 | 5909 | if (ret) |
|---|
| 5807 | 5910 | return ret; |
|---|
| 5808 | | - } |
|---|
| 5809 | | - |
|---|
| 5810 | | - fi = btrfs_item_ptr(path->nodes[0], path->slots[0], |
|---|
| 5811 | | - struct btrfs_file_extent_item); |
|---|
| 5812 | | - type = btrfs_file_extent_type(path->nodes[0], fi); |
|---|
| 5813 | | - if (type == BTRFS_FILE_EXTENT_INLINE) { |
|---|
| 5814 | | - u64 size = btrfs_file_extent_ram_bytes(path->nodes[0], fi); |
|---|
| 5815 | | - extent_end = ALIGN(key->offset + size, |
|---|
| 5816 | | - sctx->send_root->fs_info->sectorsize); |
|---|
| 5817 | | - } else { |
|---|
| 5818 | | - extent_end = key->offset + |
|---|
| 5819 | | - btrfs_file_extent_num_bytes(path->nodes[0], fi); |
|---|
| 5820 | 5911 | } |
|---|
| 5821 | 5912 | |
|---|
| 5822 | 5913 | if (path->slots[0] == 0 && |
|---|
| .. | .. |
|---|
| 5844 | 5935 | else |
|---|
| 5845 | 5936 | ret = 0; |
|---|
| 5846 | 5937 | } |
|---|
| 5847 | | - sctx->cur_inode_last_extent = extent_end; |
|---|
| 5938 | + sctx->cur_inode_last_extent = btrfs_file_extent_end(path); |
|---|
| 5848 | 5939 | return ret; |
|---|
| 5849 | 5940 | } |
|---|
| 5850 | 5941 | |
|---|
| .. | .. |
|---|
| 6654 | 6745 | return ret; |
|---|
| 6655 | 6746 | } |
|---|
| 6656 | 6747 | |
|---|
| 6748 | +static int tree_move_down(struct btrfs_path *path, int *level) |
|---|
| 6749 | +{ |
|---|
| 6750 | + struct extent_buffer *eb; |
|---|
| 6751 | + |
|---|
| 6752 | + BUG_ON(*level == 0); |
|---|
| 6753 | + eb = btrfs_read_node_slot(path->nodes[*level], path->slots[*level]); |
|---|
| 6754 | + if (IS_ERR(eb)) |
|---|
| 6755 | + return PTR_ERR(eb); |
|---|
| 6756 | + |
|---|
| 6757 | + path->nodes[*level - 1] = eb; |
|---|
| 6758 | + path->slots[*level - 1] = 0; |
|---|
| 6759 | + (*level)--; |
|---|
| 6760 | + return 0; |
|---|
| 6761 | +} |
|---|
| 6762 | + |
|---|
| 6763 | +static int tree_move_next_or_upnext(struct btrfs_path *path, |
|---|
| 6764 | + int *level, int root_level) |
|---|
| 6765 | +{ |
|---|
| 6766 | + int ret = 0; |
|---|
| 6767 | + int nritems; |
|---|
| 6768 | + nritems = btrfs_header_nritems(path->nodes[*level]); |
|---|
| 6769 | + |
|---|
| 6770 | + path->slots[*level]++; |
|---|
| 6771 | + |
|---|
| 6772 | + while (path->slots[*level] >= nritems) { |
|---|
| 6773 | + if (*level == root_level) |
|---|
| 6774 | + return -1; |
|---|
| 6775 | + |
|---|
| 6776 | + /* move upnext */ |
|---|
| 6777 | + path->slots[*level] = 0; |
|---|
| 6778 | + free_extent_buffer(path->nodes[*level]); |
|---|
| 6779 | + path->nodes[*level] = NULL; |
|---|
| 6780 | + (*level)++; |
|---|
| 6781 | + path->slots[*level]++; |
|---|
| 6782 | + |
|---|
| 6783 | + nritems = btrfs_header_nritems(path->nodes[*level]); |
|---|
| 6784 | + ret = 1; |
|---|
| 6785 | + } |
|---|
| 6786 | + return ret; |
|---|
| 6787 | +} |
|---|
| 6788 | + |
|---|
| 6789 | +/* |
|---|
| 6790 | + * Returns 1 if it had to move up and next. 0 is returned if it moved only next |
|---|
| 6791 | + * or down. |
|---|
| 6792 | + */ |
|---|
| 6793 | +static int tree_advance(struct btrfs_path *path, |
|---|
| 6794 | + int *level, int root_level, |
|---|
| 6795 | + int allow_down, |
|---|
| 6796 | + struct btrfs_key *key) |
|---|
| 6797 | +{ |
|---|
| 6798 | + int ret; |
|---|
| 6799 | + |
|---|
| 6800 | + if (*level == 0 || !allow_down) { |
|---|
| 6801 | + ret = tree_move_next_or_upnext(path, level, root_level); |
|---|
| 6802 | + } else { |
|---|
| 6803 | + ret = tree_move_down(path, level); |
|---|
| 6804 | + } |
|---|
| 6805 | + if (ret >= 0) { |
|---|
| 6806 | + if (*level == 0) |
|---|
| 6807 | + btrfs_item_key_to_cpu(path->nodes[*level], key, |
|---|
| 6808 | + path->slots[*level]); |
|---|
| 6809 | + else |
|---|
| 6810 | + btrfs_node_key_to_cpu(path->nodes[*level], key, |
|---|
| 6811 | + path->slots[*level]); |
|---|
| 6812 | + } |
|---|
| 6813 | + return ret; |
|---|
| 6814 | +} |
|---|
| 6815 | + |
|---|
| 6816 | +static int tree_compare_item(struct btrfs_path *left_path, |
|---|
| 6817 | + struct btrfs_path *right_path, |
|---|
| 6818 | + char *tmp_buf) |
|---|
| 6819 | +{ |
|---|
| 6820 | + int cmp; |
|---|
| 6821 | + int len1, len2; |
|---|
| 6822 | + unsigned long off1, off2; |
|---|
| 6823 | + |
|---|
| 6824 | + len1 = btrfs_item_size_nr(left_path->nodes[0], left_path->slots[0]); |
|---|
| 6825 | + len2 = btrfs_item_size_nr(right_path->nodes[0], right_path->slots[0]); |
|---|
| 6826 | + if (len1 != len2) |
|---|
| 6827 | + return 1; |
|---|
| 6828 | + |
|---|
| 6829 | + off1 = btrfs_item_ptr_offset(left_path->nodes[0], left_path->slots[0]); |
|---|
| 6830 | + off2 = btrfs_item_ptr_offset(right_path->nodes[0], |
|---|
| 6831 | + right_path->slots[0]); |
|---|
| 6832 | + |
|---|
| 6833 | + read_extent_buffer(left_path->nodes[0], tmp_buf, off1, len1); |
|---|
| 6834 | + |
|---|
| 6835 | + cmp = memcmp_extent_buffer(right_path->nodes[0], tmp_buf, off2, len1); |
|---|
| 6836 | + if (cmp) |
|---|
| 6837 | + return 1; |
|---|
| 6838 | + return 0; |
|---|
| 6839 | +} |
|---|
| 6840 | + |
|---|
| 6841 | +/* |
|---|
| 6842 | + * This function compares two trees and calls the provided callback for |
|---|
| 6843 | + * every changed/new/deleted item it finds. |
|---|
| 6844 | + * If shared tree blocks are encountered, whole subtrees are skipped, making |
|---|
| 6845 | + * the compare pretty fast on snapshotted subvolumes. |
|---|
| 6846 | + * |
|---|
| 6847 | + * This currently works on commit roots only. As commit roots are read only, |
|---|
| 6848 | + * we don't do any locking. The commit roots are protected with transactions. |
|---|
| 6849 | + * Transactions are ended and rejoined when a commit is tried in between. |
|---|
| 6850 | + * |
|---|
| 6851 | + * This function checks for modifications done to the trees while comparing. |
|---|
| 6852 | + * If it detects a change, it aborts immediately. |
|---|
| 6853 | + */ |
|---|
| 6854 | +static int btrfs_compare_trees(struct btrfs_root *left_root, |
|---|
| 6855 | + struct btrfs_root *right_root, void *ctx) |
|---|
| 6856 | +{ |
|---|
| 6857 | + struct btrfs_fs_info *fs_info = left_root->fs_info; |
|---|
| 6858 | + int ret; |
|---|
| 6859 | + int cmp; |
|---|
| 6860 | + struct btrfs_path *left_path = NULL; |
|---|
| 6861 | + struct btrfs_path *right_path = NULL; |
|---|
| 6862 | + struct btrfs_key left_key; |
|---|
| 6863 | + struct btrfs_key right_key; |
|---|
| 6864 | + char *tmp_buf = NULL; |
|---|
| 6865 | + int left_root_level; |
|---|
| 6866 | + int right_root_level; |
|---|
| 6867 | + int left_level; |
|---|
| 6868 | + int right_level; |
|---|
| 6869 | + int left_end_reached; |
|---|
| 6870 | + int right_end_reached; |
|---|
| 6871 | + int advance_left; |
|---|
| 6872 | + int advance_right; |
|---|
| 6873 | + u64 left_blockptr; |
|---|
| 6874 | + u64 right_blockptr; |
|---|
| 6875 | + u64 left_gen; |
|---|
| 6876 | + u64 right_gen; |
|---|
| 6877 | + |
|---|
| 6878 | + left_path = btrfs_alloc_path(); |
|---|
| 6879 | + if (!left_path) { |
|---|
| 6880 | + ret = -ENOMEM; |
|---|
| 6881 | + goto out; |
|---|
| 6882 | + } |
|---|
| 6883 | + right_path = btrfs_alloc_path(); |
|---|
| 6884 | + if (!right_path) { |
|---|
| 6885 | + ret = -ENOMEM; |
|---|
| 6886 | + goto out; |
|---|
| 6887 | + } |
|---|
| 6888 | + |
|---|
| 6889 | + tmp_buf = kvmalloc(fs_info->nodesize, GFP_KERNEL); |
|---|
| 6890 | + if (!tmp_buf) { |
|---|
| 6891 | + ret = -ENOMEM; |
|---|
| 6892 | + goto out; |
|---|
| 6893 | + } |
|---|
| 6894 | + |
|---|
| 6895 | + left_path->search_commit_root = 1; |
|---|
| 6896 | + left_path->skip_locking = 1; |
|---|
| 6897 | + right_path->search_commit_root = 1; |
|---|
| 6898 | + right_path->skip_locking = 1; |
|---|
| 6899 | + |
|---|
| 6900 | + /* |
|---|
| 6901 | + * Strategy: Go to the first items of both trees. Then do |
|---|
| 6902 | + * |
|---|
| 6903 | + * If both trees are at level 0 |
|---|
| 6904 | + * Compare keys of current items |
|---|
| 6905 | + * If left < right treat left item as new, advance left tree |
|---|
| 6906 | + * and repeat |
|---|
| 6907 | + * If left > right treat right item as deleted, advance right tree |
|---|
| 6908 | + * and repeat |
|---|
| 6909 | + * If left == right do deep compare of items, treat as changed if |
|---|
| 6910 | + * needed, advance both trees and repeat |
|---|
| 6911 | + * If both trees are at the same level but not at level 0 |
|---|
| 6912 | + * Compare keys of current nodes/leafs |
|---|
| 6913 | + * If left < right advance left tree and repeat |
|---|
| 6914 | + * If left > right advance right tree and repeat |
|---|
| 6915 | + * If left == right compare blockptrs of the next nodes/leafs |
|---|
| 6916 | + * If they match advance both trees but stay at the same level |
|---|
| 6917 | + * and repeat |
|---|
| 6918 | + * If they don't match advance both trees while allowing to go |
|---|
| 6919 | + * deeper and repeat |
|---|
| 6920 | + * If tree levels are different |
|---|
| 6921 | + * Advance the tree that needs it and repeat |
|---|
| 6922 | + * |
|---|
| 6923 | + * Advancing a tree means: |
|---|
| 6924 | + * If we are at level 0, try to go to the next slot. If that's not |
|---|
| 6925 | + * possible, go one level up and repeat. Stop when we found a level |
|---|
| 6926 | + * where we could go to the next slot. We may at this point be on a |
|---|
| 6927 | + * node or a leaf. |
|---|
| 6928 | + * |
|---|
| 6929 | + * If we are not at level 0 and not on shared tree blocks, go one |
|---|
| 6930 | + * level deeper. |
|---|
| 6931 | + * |
|---|
| 6932 | + * If we are not at level 0 and on shared tree blocks, go one slot to |
|---|
| 6933 | + * the right if possible or go up and right. |
|---|
| 6934 | + */ |
|---|
| 6935 | + |
|---|
| 6936 | + down_read(&fs_info->commit_root_sem); |
|---|
| 6937 | + left_level = btrfs_header_level(left_root->commit_root); |
|---|
| 6938 | + left_root_level = left_level; |
|---|
| 6939 | + left_path->nodes[left_level] = |
|---|
| 6940 | + btrfs_clone_extent_buffer(left_root->commit_root); |
|---|
| 6941 | + if (!left_path->nodes[left_level]) { |
|---|
| 6942 | + up_read(&fs_info->commit_root_sem); |
|---|
| 6943 | + ret = -ENOMEM; |
|---|
| 6944 | + goto out; |
|---|
| 6945 | + } |
|---|
| 6946 | + |
|---|
| 6947 | + right_level = btrfs_header_level(right_root->commit_root); |
|---|
| 6948 | + right_root_level = right_level; |
|---|
| 6949 | + right_path->nodes[right_level] = |
|---|
| 6950 | + btrfs_clone_extent_buffer(right_root->commit_root); |
|---|
| 6951 | + if (!right_path->nodes[right_level]) { |
|---|
| 6952 | + up_read(&fs_info->commit_root_sem); |
|---|
| 6953 | + ret = -ENOMEM; |
|---|
| 6954 | + goto out; |
|---|
| 6955 | + } |
|---|
| 6956 | + up_read(&fs_info->commit_root_sem); |
|---|
| 6957 | + |
|---|
| 6958 | + if (left_level == 0) |
|---|
| 6959 | + btrfs_item_key_to_cpu(left_path->nodes[left_level], |
|---|
| 6960 | + &left_key, left_path->slots[left_level]); |
|---|
| 6961 | + else |
|---|
| 6962 | + btrfs_node_key_to_cpu(left_path->nodes[left_level], |
|---|
| 6963 | + &left_key, left_path->slots[left_level]); |
|---|
| 6964 | + if (right_level == 0) |
|---|
| 6965 | + btrfs_item_key_to_cpu(right_path->nodes[right_level], |
|---|
| 6966 | + &right_key, right_path->slots[right_level]); |
|---|
| 6967 | + else |
|---|
| 6968 | + btrfs_node_key_to_cpu(right_path->nodes[right_level], |
|---|
| 6969 | + &right_key, right_path->slots[right_level]); |
|---|
| 6970 | + |
|---|
| 6971 | + left_end_reached = right_end_reached = 0; |
|---|
| 6972 | + advance_left = advance_right = 0; |
|---|
| 6973 | + |
|---|
| 6974 | + while (1) { |
|---|
| 6975 | + cond_resched(); |
|---|
| 6976 | + if (advance_left && !left_end_reached) { |
|---|
| 6977 | + ret = tree_advance(left_path, &left_level, |
|---|
| 6978 | + left_root_level, |
|---|
| 6979 | + advance_left != ADVANCE_ONLY_NEXT, |
|---|
| 6980 | + &left_key); |
|---|
| 6981 | + if (ret == -1) |
|---|
| 6982 | + left_end_reached = ADVANCE; |
|---|
| 6983 | + else if (ret < 0) |
|---|
| 6984 | + goto out; |
|---|
| 6985 | + advance_left = 0; |
|---|
| 6986 | + } |
|---|
| 6987 | + if (advance_right && !right_end_reached) { |
|---|
| 6988 | + ret = tree_advance(right_path, &right_level, |
|---|
| 6989 | + right_root_level, |
|---|
| 6990 | + advance_right != ADVANCE_ONLY_NEXT, |
|---|
| 6991 | + &right_key); |
|---|
| 6992 | + if (ret == -1) |
|---|
| 6993 | + right_end_reached = ADVANCE; |
|---|
| 6994 | + else if (ret < 0) |
|---|
| 6995 | + goto out; |
|---|
| 6996 | + advance_right = 0; |
|---|
| 6997 | + } |
|---|
| 6998 | + |
|---|
| 6999 | + if (left_end_reached && right_end_reached) { |
|---|
| 7000 | + ret = 0; |
|---|
| 7001 | + goto out; |
|---|
| 7002 | + } else if (left_end_reached) { |
|---|
| 7003 | + if (right_level == 0) { |
|---|
| 7004 | + ret = changed_cb(left_path, right_path, |
|---|
| 7005 | + &right_key, |
|---|
| 7006 | + BTRFS_COMPARE_TREE_DELETED, |
|---|
| 7007 | + ctx); |
|---|
| 7008 | + if (ret < 0) |
|---|
| 7009 | + goto out; |
|---|
| 7010 | + } |
|---|
| 7011 | + advance_right = ADVANCE; |
|---|
| 7012 | + continue; |
|---|
| 7013 | + } else if (right_end_reached) { |
|---|
| 7014 | + if (left_level == 0) { |
|---|
| 7015 | + ret = changed_cb(left_path, right_path, |
|---|
| 7016 | + &left_key, |
|---|
| 7017 | + BTRFS_COMPARE_TREE_NEW, |
|---|
| 7018 | + ctx); |
|---|
| 7019 | + if (ret < 0) |
|---|
| 7020 | + goto out; |
|---|
| 7021 | + } |
|---|
| 7022 | + advance_left = ADVANCE; |
|---|
| 7023 | + continue; |
|---|
| 7024 | + } |
|---|
| 7025 | + |
|---|
| 7026 | + if (left_level == 0 && right_level == 0) { |
|---|
| 7027 | + cmp = btrfs_comp_cpu_keys(&left_key, &right_key); |
|---|
| 7028 | + if (cmp < 0) { |
|---|
| 7029 | + ret = changed_cb(left_path, right_path, |
|---|
| 7030 | + &left_key, |
|---|
| 7031 | + BTRFS_COMPARE_TREE_NEW, |
|---|
| 7032 | + ctx); |
|---|
| 7033 | + if (ret < 0) |
|---|
| 7034 | + goto out; |
|---|
| 7035 | + advance_left = ADVANCE; |
|---|
| 7036 | + } else if (cmp > 0) { |
|---|
| 7037 | + ret = changed_cb(left_path, right_path, |
|---|
| 7038 | + &right_key, |
|---|
| 7039 | + BTRFS_COMPARE_TREE_DELETED, |
|---|
| 7040 | + ctx); |
|---|
| 7041 | + if (ret < 0) |
|---|
| 7042 | + goto out; |
|---|
| 7043 | + advance_right = ADVANCE; |
|---|
| 7044 | + } else { |
|---|
| 7045 | + enum btrfs_compare_tree_result result; |
|---|
| 7046 | + |
|---|
| 7047 | + WARN_ON(!extent_buffer_uptodate(left_path->nodes[0])); |
|---|
| 7048 | + ret = tree_compare_item(left_path, right_path, |
|---|
| 7049 | + tmp_buf); |
|---|
| 7050 | + if (ret) |
|---|
| 7051 | + result = BTRFS_COMPARE_TREE_CHANGED; |
|---|
| 7052 | + else |
|---|
| 7053 | + result = BTRFS_COMPARE_TREE_SAME; |
|---|
| 7054 | + ret = changed_cb(left_path, right_path, |
|---|
| 7055 | + &left_key, result, ctx); |
|---|
| 7056 | + if (ret < 0) |
|---|
| 7057 | + goto out; |
|---|
| 7058 | + advance_left = ADVANCE; |
|---|
| 7059 | + advance_right = ADVANCE; |
|---|
| 7060 | + } |
|---|
| 7061 | + } else if (left_level == right_level) { |
|---|
| 7062 | + cmp = btrfs_comp_cpu_keys(&left_key, &right_key); |
|---|
| 7063 | + if (cmp < 0) { |
|---|
| 7064 | + advance_left = ADVANCE; |
|---|
| 7065 | + } else if (cmp > 0) { |
|---|
| 7066 | + advance_right = ADVANCE; |
|---|
| 7067 | + } else { |
|---|
| 7068 | + left_blockptr = btrfs_node_blockptr( |
|---|
| 7069 | + left_path->nodes[left_level], |
|---|
| 7070 | + left_path->slots[left_level]); |
|---|
| 7071 | + right_blockptr = btrfs_node_blockptr( |
|---|
| 7072 | + right_path->nodes[right_level], |
|---|
| 7073 | + right_path->slots[right_level]); |
|---|
| 7074 | + left_gen = btrfs_node_ptr_generation( |
|---|
| 7075 | + left_path->nodes[left_level], |
|---|
| 7076 | + left_path->slots[left_level]); |
|---|
| 7077 | + right_gen = btrfs_node_ptr_generation( |
|---|
| 7078 | + right_path->nodes[right_level], |
|---|
| 7079 | + right_path->slots[right_level]); |
|---|
| 7080 | + if (left_blockptr == right_blockptr && |
|---|
| 7081 | + left_gen == right_gen) { |
|---|
| 7082 | + /* |
|---|
| 7083 | + * As we're on a shared block, don't |
|---|
| 7084 | + * allow to go deeper. |
|---|
| 7085 | + */ |
|---|
| 7086 | + advance_left = ADVANCE_ONLY_NEXT; |
|---|
| 7087 | + advance_right = ADVANCE_ONLY_NEXT; |
|---|
| 7088 | + } else { |
|---|
| 7089 | + advance_left = ADVANCE; |
|---|
| 7090 | + advance_right = ADVANCE; |
|---|
| 7091 | + } |
|---|
| 7092 | + } |
|---|
| 7093 | + } else if (left_level < right_level) { |
|---|
| 7094 | + advance_right = ADVANCE; |
|---|
| 7095 | + } else { |
|---|
| 7096 | + advance_left = ADVANCE; |
|---|
| 7097 | + } |
|---|
| 7098 | + } |
|---|
| 7099 | + |
|---|
| 7100 | +out: |
|---|
| 7101 | + btrfs_free_path(left_path); |
|---|
| 7102 | + btrfs_free_path(right_path); |
|---|
| 7103 | + kvfree(tmp_buf); |
|---|
| 7104 | + return ret; |
|---|
| 7105 | +} |
|---|
| 7106 | + |
|---|
| 6657 | 7107 | static int send_subvol(struct send_ctx *sctx) |
|---|
| 6658 | 7108 | { |
|---|
| 6659 | 7109 | int ret; |
|---|
| .. | .. |
|---|
| 6669 | 7119 | goto out; |
|---|
| 6670 | 7120 | |
|---|
| 6671 | 7121 | if (sctx->parent_root) { |
|---|
| 6672 | | - ret = btrfs_compare_trees(sctx->send_root, sctx->parent_root, |
|---|
| 6673 | | - changed_cb, sctx); |
|---|
| 7122 | + ret = btrfs_compare_trees(sctx->send_root, sctx->parent_root, sctx); |
|---|
| 6674 | 7123 | if (ret < 0) |
|---|
| 6675 | 7124 | goto out; |
|---|
| 6676 | 7125 | ret = finish_inode_if_needed(sctx, 1); |
|---|
| .. | .. |
|---|
| 6779 | 7228 | spin_unlock(&root->root_item_lock); |
|---|
| 6780 | 7229 | } |
|---|
| 6781 | 7230 | |
|---|
| 7231 | +static void dedupe_in_progress_warn(const struct btrfs_root *root) |
|---|
| 7232 | +{ |
|---|
| 7233 | + btrfs_warn_rl(root->fs_info, |
|---|
| 7234 | +"cannot use root %llu for send while deduplications on it are in progress (%d in progress)", |
|---|
| 7235 | + root->root_key.objectid, root->dedupe_in_progress); |
|---|
| 7236 | +} |
|---|
| 7237 | + |
|---|
| 6782 | 7238 | long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg) |
|---|
| 6783 | 7239 | { |
|---|
| 6784 | 7240 | int ret = 0; |
|---|
| 6785 | 7241 | struct btrfs_root *send_root = BTRFS_I(file_inode(mnt_file))->root; |
|---|
| 6786 | 7242 | struct btrfs_fs_info *fs_info = send_root->fs_info; |
|---|
| 6787 | 7243 | struct btrfs_root *clone_root; |
|---|
| 6788 | | - struct btrfs_key key; |
|---|
| 6789 | 7244 | struct send_ctx *sctx = NULL; |
|---|
| 6790 | 7245 | u32 i; |
|---|
| 6791 | 7246 | u64 *clone_sources_tmp = NULL; |
|---|
| 6792 | 7247 | int clone_sources_to_rollback = 0; |
|---|
| 6793 | | - unsigned alloc_size; |
|---|
| 7248 | + size_t alloc_size; |
|---|
| 6794 | 7249 | int sort_clone_roots = 0; |
|---|
| 6795 | | - int index; |
|---|
| 6796 | 7250 | |
|---|
| 6797 | 7251 | if (!capable(CAP_SYS_ADMIN)) |
|---|
| 6798 | 7252 | return -EPERM; |
|---|
| .. | .. |
|---|
| 6802 | 7256 | * making it RW. This also protects against deletion. |
|---|
| 6803 | 7257 | */ |
|---|
| 6804 | 7258 | spin_lock(&send_root->root_item_lock); |
|---|
| 7259 | + if (btrfs_root_readonly(send_root) && send_root->dedupe_in_progress) { |
|---|
| 7260 | + dedupe_in_progress_warn(send_root); |
|---|
| 7261 | + spin_unlock(&send_root->root_item_lock); |
|---|
| 7262 | + return -EAGAIN; |
|---|
| 7263 | + } |
|---|
| 6805 | 7264 | send_root->send_in_progress++; |
|---|
| 6806 | 7265 | spin_unlock(&send_root->root_item_lock); |
|---|
| 6807 | 7266 | |
|---|
| .. | .. |
|---|
| 6822 | 7281 | if (arg->clone_sources_count > |
|---|
| 6823 | 7282 | ULONG_MAX / sizeof(struct clone_root) - 1) { |
|---|
| 6824 | 7283 | ret = -EINVAL; |
|---|
| 6825 | | - goto out; |
|---|
| 6826 | | - } |
|---|
| 6827 | | - |
|---|
| 6828 | | - if (!access_ok(VERIFY_READ, arg->clone_sources, |
|---|
| 6829 | | - sizeof(*arg->clone_sources) * |
|---|
| 6830 | | - arg->clone_sources_count)) { |
|---|
| 6831 | | - ret = -EFAULT; |
|---|
| 6832 | 7284 | goto out; |
|---|
| 6833 | 7285 | } |
|---|
| 6834 | 7286 | |
|---|
| .. | .. |
|---|
| 6875 | 7327 | goto out; |
|---|
| 6876 | 7328 | } |
|---|
| 6877 | 7329 | |
|---|
| 6878 | | - sctx->read_buf = kvmalloc(BTRFS_SEND_READ_SIZE, GFP_KERNEL); |
|---|
| 6879 | | - if (!sctx->read_buf) { |
|---|
| 6880 | | - ret = -ENOMEM; |
|---|
| 6881 | | - goto out; |
|---|
| 6882 | | - } |
|---|
| 6883 | | - |
|---|
| 6884 | 7330 | sctx->pending_dir_moves = RB_ROOT; |
|---|
| 6885 | 7331 | sctx->waiting_dir_moves = RB_ROOT; |
|---|
| 6886 | 7332 | sctx->orphan_dirs = RB_ROOT; |
|---|
| 6887 | 7333 | |
|---|
| 6888 | | - alloc_size = sizeof(struct clone_root) * (arg->clone_sources_count + 1); |
|---|
| 6889 | | - |
|---|
| 6890 | | - sctx->clone_roots = kvzalloc(alloc_size, GFP_KERNEL); |
|---|
| 7334 | + sctx->clone_roots = kvcalloc(sizeof(*sctx->clone_roots), |
|---|
| 7335 | + arg->clone_sources_count + 1, |
|---|
| 7336 | + GFP_KERNEL); |
|---|
| 6891 | 7337 | if (!sctx->clone_roots) { |
|---|
| 6892 | 7338 | ret = -ENOMEM; |
|---|
| 6893 | 7339 | goto out; |
|---|
| 6894 | 7340 | } |
|---|
| 6895 | 7341 | |
|---|
| 6896 | | - alloc_size = arg->clone_sources_count * sizeof(*arg->clone_sources); |
|---|
| 7342 | + alloc_size = array_size(sizeof(*arg->clone_sources), |
|---|
| 7343 | + arg->clone_sources_count); |
|---|
| 6897 | 7344 | |
|---|
| 6898 | 7345 | if (arg->clone_sources_count) { |
|---|
| 6899 | 7346 | clone_sources_tmp = kvmalloc(alloc_size, GFP_KERNEL); |
|---|
| .. | .. |
|---|
| 6910 | 7357 | } |
|---|
| 6911 | 7358 | |
|---|
| 6912 | 7359 | for (i = 0; i < arg->clone_sources_count; i++) { |
|---|
| 6913 | | - key.objectid = clone_sources_tmp[i]; |
|---|
| 6914 | | - key.type = BTRFS_ROOT_ITEM_KEY; |
|---|
| 6915 | | - key.offset = (u64)-1; |
|---|
| 6916 | | - |
|---|
| 6917 | | - index = srcu_read_lock(&fs_info->subvol_srcu); |
|---|
| 6918 | | - |
|---|
| 6919 | | - clone_root = btrfs_read_fs_root_no_name(fs_info, &key); |
|---|
| 7360 | + clone_root = btrfs_get_fs_root(fs_info, |
|---|
| 7361 | + clone_sources_tmp[i], true); |
|---|
| 6920 | 7362 | if (IS_ERR(clone_root)) { |
|---|
| 6921 | | - srcu_read_unlock(&fs_info->subvol_srcu, index); |
|---|
| 6922 | 7363 | ret = PTR_ERR(clone_root); |
|---|
| 6923 | 7364 | goto out; |
|---|
| 6924 | 7365 | } |
|---|
| .. | .. |
|---|
| 6926 | 7367 | if (!btrfs_root_readonly(clone_root) || |
|---|
| 6927 | 7368 | btrfs_root_dead(clone_root)) { |
|---|
| 6928 | 7369 | spin_unlock(&clone_root->root_item_lock); |
|---|
| 6929 | | - srcu_read_unlock(&fs_info->subvol_srcu, index); |
|---|
| 7370 | + btrfs_put_root(clone_root); |
|---|
| 6930 | 7371 | ret = -EPERM; |
|---|
| 7372 | + goto out; |
|---|
| 7373 | + } |
|---|
| 7374 | + if (clone_root->dedupe_in_progress) { |
|---|
| 7375 | + dedupe_in_progress_warn(clone_root); |
|---|
| 7376 | + spin_unlock(&clone_root->root_item_lock); |
|---|
| 7377 | + btrfs_put_root(clone_root); |
|---|
| 7378 | + ret = -EAGAIN; |
|---|
| 6931 | 7379 | goto out; |
|---|
| 6932 | 7380 | } |
|---|
| 6933 | 7381 | clone_root->send_in_progress++; |
|---|
| 6934 | 7382 | spin_unlock(&clone_root->root_item_lock); |
|---|
| 6935 | | - srcu_read_unlock(&fs_info->subvol_srcu, index); |
|---|
| 6936 | 7383 | |
|---|
| 6937 | 7384 | sctx->clone_roots[i].root = clone_root; |
|---|
| 6938 | 7385 | clone_sources_to_rollback = i + 1; |
|---|
| .. | .. |
|---|
| 6942 | 7389 | } |
|---|
| 6943 | 7390 | |
|---|
| 6944 | 7391 | if (arg->parent_root) { |
|---|
| 6945 | | - key.objectid = arg->parent_root; |
|---|
| 6946 | | - key.type = BTRFS_ROOT_ITEM_KEY; |
|---|
| 6947 | | - key.offset = (u64)-1; |
|---|
| 6948 | | - |
|---|
| 6949 | | - index = srcu_read_lock(&fs_info->subvol_srcu); |
|---|
| 6950 | | - |
|---|
| 6951 | | - sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key); |
|---|
| 7392 | + sctx->parent_root = btrfs_get_fs_root(fs_info, arg->parent_root, |
|---|
| 7393 | + true); |
|---|
| 6952 | 7394 | if (IS_ERR(sctx->parent_root)) { |
|---|
| 6953 | | - srcu_read_unlock(&fs_info->subvol_srcu, index); |
|---|
| 6954 | 7395 | ret = PTR_ERR(sctx->parent_root); |
|---|
| 6955 | 7396 | goto out; |
|---|
| 6956 | 7397 | } |
|---|
| .. | .. |
|---|
| 6960 | 7401 | if (!btrfs_root_readonly(sctx->parent_root) || |
|---|
| 6961 | 7402 | btrfs_root_dead(sctx->parent_root)) { |
|---|
| 6962 | 7403 | spin_unlock(&sctx->parent_root->root_item_lock); |
|---|
| 6963 | | - srcu_read_unlock(&fs_info->subvol_srcu, index); |
|---|
| 6964 | 7404 | ret = -EPERM; |
|---|
| 6965 | 7405 | goto out; |
|---|
| 6966 | 7406 | } |
|---|
| 7407 | + if (sctx->parent_root->dedupe_in_progress) { |
|---|
| 7408 | + dedupe_in_progress_warn(sctx->parent_root); |
|---|
| 7409 | + spin_unlock(&sctx->parent_root->root_item_lock); |
|---|
| 7410 | + ret = -EAGAIN; |
|---|
| 7411 | + goto out; |
|---|
| 7412 | + } |
|---|
| 6967 | 7413 | spin_unlock(&sctx->parent_root->root_item_lock); |
|---|
| 6968 | | - |
|---|
| 6969 | | - srcu_read_unlock(&fs_info->subvol_srcu, index); |
|---|
| 6970 | 7414 | } |
|---|
| 6971 | 7415 | |
|---|
| 6972 | 7416 | /* |
|---|
| .. | .. |
|---|
| 6974 | 7418 | * is behind the current send position. This is checked while searching |
|---|
| 6975 | 7419 | * for possible clone sources. |
|---|
| 6976 | 7420 | */ |
|---|
| 6977 | | - sctx->clone_roots[sctx->clone_roots_cnt++].root = sctx->send_root; |
|---|
| 7421 | + sctx->clone_roots[sctx->clone_roots_cnt++].root = |
|---|
| 7422 | + btrfs_grab_root(sctx->send_root); |
|---|
| 6978 | 7423 | |
|---|
| 6979 | 7424 | /* We do a bsearch later */ |
|---|
| 6980 | 7425 | sort(sctx->clone_roots, sctx->clone_roots_cnt, |
|---|
| .. | .. |
|---|
| 6990 | 7435 | if (ret) |
|---|
| 6991 | 7436 | goto out; |
|---|
| 6992 | 7437 | |
|---|
| 7438 | + mutex_lock(&fs_info->balance_mutex); |
|---|
| 7439 | + if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) { |
|---|
| 7440 | + mutex_unlock(&fs_info->balance_mutex); |
|---|
| 7441 | + btrfs_warn_rl(fs_info, |
|---|
| 7442 | + "cannot run send because a balance operation is in progress"); |
|---|
| 7443 | + ret = -EAGAIN; |
|---|
| 7444 | + goto out; |
|---|
| 7445 | + } |
|---|
| 7446 | + fs_info->send_in_progress++; |
|---|
| 7447 | + mutex_unlock(&fs_info->balance_mutex); |
|---|
| 7448 | + |
|---|
| 6993 | 7449 | current->journal_info = BTRFS_SEND_TRANS_STUB; |
|---|
| 6994 | 7450 | ret = send_subvol(sctx); |
|---|
| 6995 | 7451 | current->journal_info = NULL; |
|---|
| 7452 | + mutex_lock(&fs_info->balance_mutex); |
|---|
| 7453 | + fs_info->send_in_progress--; |
|---|
| 7454 | + mutex_unlock(&fs_info->balance_mutex); |
|---|
| 6996 | 7455 | if (ret < 0) |
|---|
| 6997 | 7456 | goto out; |
|---|
| 6998 | 7457 | |
|---|
| .. | .. |
|---|
| 7045 | 7504 | } |
|---|
| 7046 | 7505 | |
|---|
| 7047 | 7506 | if (sort_clone_roots) { |
|---|
| 7048 | | - for (i = 0; i < sctx->clone_roots_cnt; i++) |
|---|
| 7507 | + for (i = 0; i < sctx->clone_roots_cnt; i++) { |
|---|
| 7049 | 7508 | btrfs_root_dec_send_in_progress( |
|---|
| 7050 | 7509 | sctx->clone_roots[i].root); |
|---|
| 7510 | + btrfs_put_root(sctx->clone_roots[i].root); |
|---|
| 7511 | + } |
|---|
| 7051 | 7512 | } else { |
|---|
| 7052 | | - for (i = 0; sctx && i < clone_sources_to_rollback; i++) |
|---|
| 7513 | + for (i = 0; sctx && i < clone_sources_to_rollback; i++) { |
|---|
| 7053 | 7514 | btrfs_root_dec_send_in_progress( |
|---|
| 7054 | 7515 | sctx->clone_roots[i].root); |
|---|
| 7516 | + btrfs_put_root(sctx->clone_roots[i].root); |
|---|
| 7517 | + } |
|---|
| 7055 | 7518 | |
|---|
| 7056 | 7519 | btrfs_root_dec_send_in_progress(send_root); |
|---|
| 7057 | 7520 | } |
|---|
| 7058 | | - if (sctx && !IS_ERR_OR_NULL(sctx->parent_root)) |
|---|
| 7521 | + if (sctx && !IS_ERR_OR_NULL(sctx->parent_root)) { |
|---|
| 7059 | 7522 | btrfs_root_dec_send_in_progress(sctx->parent_root); |
|---|
| 7523 | + btrfs_put_root(sctx->parent_root); |
|---|
| 7524 | + } |
|---|
| 7060 | 7525 | |
|---|
| 7061 | 7526 | kvfree(clone_sources_tmp); |
|---|
| 7062 | 7527 | |
|---|
| .. | .. |
|---|
| 7066 | 7531 | |
|---|
| 7067 | 7532 | kvfree(sctx->clone_roots); |
|---|
| 7068 | 7533 | kvfree(sctx->send_buf); |
|---|
| 7069 | | - kvfree(sctx->read_buf); |
|---|
| 7070 | 7534 | |
|---|
| 7071 | 7535 | name_cache_free(sctx); |
|---|
| 7072 | 7536 | |
|---|