| .. | .. |
|---|
| 7 | 7 | #include <linux/sched/mm.h> |
|---|
| 8 | 8 | #include <linux/bio.h> |
|---|
| 9 | 9 | #include <linux/slab.h> |
|---|
| 10 | | -#include <linux/buffer_head.h> |
|---|
| 11 | 10 | #include <linux/blkdev.h> |
|---|
| 12 | 11 | #include <linux/ratelimit.h> |
|---|
| 13 | 12 | #include <linux/kthread.h> |
|---|
| .. | .. |
|---|
| 15 | 14 | #include <linux/semaphore.h> |
|---|
| 16 | 15 | #include <linux/uuid.h> |
|---|
| 17 | 16 | #include <linux/list_sort.h> |
|---|
| 17 | +#include <linux/namei.h> |
|---|
| 18 | +#include "misc.h" |
|---|
| 18 | 19 | #include "ctree.h" |
|---|
| 19 | 20 | #include "extent_map.h" |
|---|
| 20 | 21 | #include "disk-io.h" |
|---|
| .. | .. |
|---|
| 25 | 26 | #include "async-thread.h" |
|---|
| 26 | 27 | #include "check-integrity.h" |
|---|
| 27 | 28 | #include "rcu-string.h" |
|---|
| 28 | | -#include "math.h" |
|---|
| 29 | 29 | #include "dev-replace.h" |
|---|
| 30 | 30 | #include "sysfs.h" |
|---|
| 31 | 31 | #include "tree-checker.h" |
|---|
| 32 | +#include "space-info.h" |
|---|
| 33 | +#include "block-group.h" |
|---|
| 34 | +#include "discard.h" |
|---|
| 32 | 35 | |
|---|
| 33 | 36 | const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { |
|---|
| 34 | 37 | [BTRFS_RAID_RAID10] = { |
|---|
| .. | .. |
|---|
| 39 | 42 | .tolerated_failures = 1, |
|---|
| 40 | 43 | .devs_increment = 2, |
|---|
| 41 | 44 | .ncopies = 2, |
|---|
| 45 | + .nparity = 0, |
|---|
| 42 | 46 | .raid_name = "raid10", |
|---|
| 43 | 47 | .bg_flag = BTRFS_BLOCK_GROUP_RAID10, |
|---|
| 44 | 48 | .mindev_error = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET, |
|---|
| .. | .. |
|---|
| 51 | 55 | .tolerated_failures = 1, |
|---|
| 52 | 56 | .devs_increment = 2, |
|---|
| 53 | 57 | .ncopies = 2, |
|---|
| 58 | + .nparity = 0, |
|---|
| 54 | 59 | .raid_name = "raid1", |
|---|
| 55 | 60 | .bg_flag = BTRFS_BLOCK_GROUP_RAID1, |
|---|
| 56 | 61 | .mindev_error = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET, |
|---|
| 62 | + }, |
|---|
| 63 | + [BTRFS_RAID_RAID1C3] = { |
|---|
| 64 | + .sub_stripes = 1, |
|---|
| 65 | + .dev_stripes = 1, |
|---|
| 66 | + .devs_max = 3, |
|---|
| 67 | + .devs_min = 3, |
|---|
| 68 | + .tolerated_failures = 2, |
|---|
| 69 | + .devs_increment = 3, |
|---|
| 70 | + .ncopies = 3, |
|---|
| 71 | + .nparity = 0, |
|---|
| 72 | + .raid_name = "raid1c3", |
|---|
| 73 | + .bg_flag = BTRFS_BLOCK_GROUP_RAID1C3, |
|---|
| 74 | + .mindev_error = BTRFS_ERROR_DEV_RAID1C3_MIN_NOT_MET, |
|---|
| 75 | + }, |
|---|
| 76 | + [BTRFS_RAID_RAID1C4] = { |
|---|
| 77 | + .sub_stripes = 1, |
|---|
| 78 | + .dev_stripes = 1, |
|---|
| 79 | + .devs_max = 4, |
|---|
| 80 | + .devs_min = 4, |
|---|
| 81 | + .tolerated_failures = 3, |
|---|
| 82 | + .devs_increment = 4, |
|---|
| 83 | + .ncopies = 4, |
|---|
| 84 | + .nparity = 0, |
|---|
| 85 | + .raid_name = "raid1c4", |
|---|
| 86 | + .bg_flag = BTRFS_BLOCK_GROUP_RAID1C4, |
|---|
| 87 | + .mindev_error = BTRFS_ERROR_DEV_RAID1C4_MIN_NOT_MET, |
|---|
| 57 | 88 | }, |
|---|
| 58 | 89 | [BTRFS_RAID_DUP] = { |
|---|
| 59 | 90 | .sub_stripes = 1, |
|---|
| .. | .. |
|---|
| 63 | 94 | .tolerated_failures = 0, |
|---|
| 64 | 95 | .devs_increment = 1, |
|---|
| 65 | 96 | .ncopies = 2, |
|---|
| 97 | + .nparity = 0, |
|---|
| 66 | 98 | .raid_name = "dup", |
|---|
| 67 | 99 | .bg_flag = BTRFS_BLOCK_GROUP_DUP, |
|---|
| 68 | 100 | .mindev_error = 0, |
|---|
| .. | .. |
|---|
| 75 | 107 | .tolerated_failures = 0, |
|---|
| 76 | 108 | .devs_increment = 1, |
|---|
| 77 | 109 | .ncopies = 1, |
|---|
| 110 | + .nparity = 0, |
|---|
| 78 | 111 | .raid_name = "raid0", |
|---|
| 79 | 112 | .bg_flag = BTRFS_BLOCK_GROUP_RAID0, |
|---|
| 80 | 113 | .mindev_error = 0, |
|---|
| .. | .. |
|---|
| 87 | 120 | .tolerated_failures = 0, |
|---|
| 88 | 121 | .devs_increment = 1, |
|---|
| 89 | 122 | .ncopies = 1, |
|---|
| 123 | + .nparity = 0, |
|---|
| 90 | 124 | .raid_name = "single", |
|---|
| 91 | 125 | .bg_flag = 0, |
|---|
| 92 | 126 | .mindev_error = 0, |
|---|
| .. | .. |
|---|
| 99 | 133 | .tolerated_failures = 1, |
|---|
| 100 | 134 | .devs_increment = 1, |
|---|
| 101 | 135 | .ncopies = 1, |
|---|
| 136 | + .nparity = 1, |
|---|
| 102 | 137 | .raid_name = "raid5", |
|---|
| 103 | 138 | .bg_flag = BTRFS_BLOCK_GROUP_RAID5, |
|---|
| 104 | 139 | .mindev_error = BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET, |
|---|
| .. | .. |
|---|
| 111 | 146 | .tolerated_failures = 2, |
|---|
| 112 | 147 | .devs_increment = 1, |
|---|
| 113 | 148 | .ncopies = 1, |
|---|
| 149 | + .nparity = 2, |
|---|
| 114 | 150 | .raid_name = "raid6", |
|---|
| 115 | 151 | .bg_flag = BTRFS_BLOCK_GROUP_RAID6, |
|---|
| 116 | 152 | .mindev_error = BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET, |
|---|
| 117 | 153 | }, |
|---|
| 118 | 154 | }; |
|---|
| 119 | 155 | |
|---|
| 120 | | -const char *get_raid_name(enum btrfs_raid_types type) |
|---|
| 156 | +const char *btrfs_bg_type_to_raid_name(u64 flags) |
|---|
| 121 | 157 | { |
|---|
| 122 | | - if (type >= BTRFS_NR_RAID_TYPES) |
|---|
| 158 | + const int index = btrfs_bg_flags_to_raid_index(flags); |
|---|
| 159 | + |
|---|
| 160 | + if (index >= BTRFS_NR_RAID_TYPES) |
|---|
| 123 | 161 | return NULL; |
|---|
| 124 | 162 | |
|---|
| 125 | | - return btrfs_raid_array[type].raid_name; |
|---|
| 163 | + return btrfs_raid_array[index].raid_name; |
|---|
| 126 | 164 | } |
|---|
| 127 | 165 | |
|---|
| 128 | | -static int init_first_rw_device(struct btrfs_trans_handle *trans, |
|---|
| 129 | | - struct btrfs_fs_info *fs_info); |
|---|
| 166 | +/* |
|---|
| 167 | + * Fill @buf with textual description of @bg_flags, no more than @size_buf |
|---|
| 168 | + * bytes including terminating null byte. |
|---|
| 169 | + */ |
|---|
| 170 | +void btrfs_describe_block_groups(u64 bg_flags, char *buf, u32 size_buf) |
|---|
| 171 | +{ |
|---|
| 172 | + int i; |
|---|
| 173 | + int ret; |
|---|
| 174 | + char *bp = buf; |
|---|
| 175 | + u64 flags = bg_flags; |
|---|
| 176 | + u32 size_bp = size_buf; |
|---|
| 177 | + |
|---|
| 178 | + if (!flags) { |
|---|
| 179 | + strcpy(bp, "NONE"); |
|---|
| 180 | + return; |
|---|
| 181 | + } |
|---|
| 182 | + |
|---|
| 183 | +#define DESCRIBE_FLAG(flag, desc) \ |
|---|
| 184 | + do { \ |
|---|
| 185 | + if (flags & (flag)) { \ |
|---|
| 186 | + ret = snprintf(bp, size_bp, "%s|", (desc)); \ |
|---|
| 187 | + if (ret < 0 || ret >= size_bp) \ |
|---|
| 188 | + goto out_overflow; \ |
|---|
| 189 | + size_bp -= ret; \ |
|---|
| 190 | + bp += ret; \ |
|---|
| 191 | + flags &= ~(flag); \ |
|---|
| 192 | + } \ |
|---|
| 193 | + } while (0) |
|---|
| 194 | + |
|---|
| 195 | + DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_DATA, "data"); |
|---|
| 196 | + DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_SYSTEM, "system"); |
|---|
| 197 | + DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_METADATA, "metadata"); |
|---|
| 198 | + |
|---|
| 199 | + DESCRIBE_FLAG(BTRFS_AVAIL_ALLOC_BIT_SINGLE, "single"); |
|---|
| 200 | + for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) |
|---|
| 201 | + DESCRIBE_FLAG(btrfs_raid_array[i].bg_flag, |
|---|
| 202 | + btrfs_raid_array[i].raid_name); |
|---|
| 203 | +#undef DESCRIBE_FLAG |
|---|
| 204 | + |
|---|
| 205 | + if (flags) { |
|---|
| 206 | + ret = snprintf(bp, size_bp, "0x%llx|", flags); |
|---|
| 207 | + size_bp -= ret; |
|---|
| 208 | + } |
|---|
| 209 | + |
|---|
| 210 | + if (size_bp < size_buf) |
|---|
| 211 | + buf[size_buf - size_bp - 1] = '\0'; /* remove last | */ |
|---|
| 212 | + |
|---|
| 213 | + /* |
|---|
| 214 | + * The text is trimmed, it's up to the caller to provide sufficiently |
|---|
| 215 | + * large buffer |
|---|
| 216 | + */ |
|---|
| 217 | +out_overflow:; |
|---|
| 218 | +} |
|---|
| 219 | + |
|---|
| 220 | +static int init_first_rw_device(struct btrfs_trans_handle *trans); |
|---|
| 130 | 221 | static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info); |
|---|
| 131 | | -static void __btrfs_reset_dev_stats(struct btrfs_device *dev); |
|---|
| 132 | 222 | static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev); |
|---|
| 133 | 223 | static void btrfs_dev_stat_print_on_load(struct btrfs_device *device); |
|---|
| 134 | 224 | static int __btrfs_map_block(struct btrfs_fs_info *fs_info, |
|---|
| .. | .. |
|---|
| 153 | 243 | * the mutex can be very coarse and can cover long-running operations |
|---|
| 154 | 244 | * |
|---|
| 155 | 245 | * protects: updates to fs_devices counters like missing devices, rw devices, |
|---|
| 156 | | - * seeding, structure cloning, openning/closing devices at mount/umount time |
|---|
| 246 | + * seeding, structure cloning, opening/closing devices at mount/umount time |
|---|
| 157 | 247 | * |
|---|
| 158 | 248 | * global::fs_devs - add, remove, updates to the global list |
|---|
| 159 | 249 | * |
|---|
| .. | .. |
|---|
| 183 | 273 | * chunk_mutex |
|---|
| 184 | 274 | * ----------- |
|---|
| 185 | 275 | * protects chunks, adding or removing during allocation, trim or when a new |
|---|
| 186 | | - * device is added/removed |
|---|
| 276 | + * device is added/removed. Additionally it also protects post_commit_list of |
|---|
| 277 | + * individual devices, since they can be added to the transaction's |
|---|
| 278 | + * post_commit_list only with chunk_mutex held. |
|---|
| 187 | 279 | * |
|---|
| 188 | 280 | * cleaner_mutex |
|---|
| 189 | 281 | * ------------- |
|---|
| .. | .. |
|---|
| 195 | 287 | * ============ |
|---|
| 196 | 288 | * |
|---|
| 197 | 289 | * uuid_mutex |
|---|
| 198 | | - * volume_mutex |
|---|
| 199 | | - * device_list_mutex |
|---|
| 200 | | - * chunk_mutex |
|---|
| 201 | | - * balance_mutex |
|---|
| 290 | + * device_list_mutex |
|---|
| 291 | + * chunk_mutex |
|---|
| 292 | + * balance_mutex |
|---|
| 202 | 293 | * |
|---|
| 203 | 294 | * |
|---|
| 204 | | - * Exclusive operations, BTRFS_FS_EXCL_OP |
|---|
| 205 | | - * ====================================== |
|---|
| 295 | + * Exclusive operations |
|---|
| 296 | + * ==================== |
|---|
| 206 | 297 | * |
|---|
| 207 | 298 | * Maintains the exclusivity of the following operations that apply to the |
|---|
| 208 | 299 | * whole filesystem and cannot run in parallel. |
|---|
| .. | .. |
|---|
| 228 | 319 | * - system power-cycle and filesystem mounted as read-only |
|---|
| 229 | 320 | * - filesystem or device errors leading to forced read-only |
|---|
| 230 | 321 | * |
|---|
| 231 | | - * BTRFS_FS_EXCL_OP flag is set and cleared using atomic operations. |
|---|
| 232 | | - * During the course of Paused state, the BTRFS_FS_EXCL_OP remains set. |
|---|
| 322 | + * The status of exclusive operation is set and cleared atomically. |
|---|
| 323 | + * During the course of Paused state, fs_info::exclusive_operation remains set. |
|---|
| 233 | 324 | * A device operation in Paused or Running state can be canceled or resumed |
|---|
| 234 | 325 | * either by ioctl (Balance only) or when remounted as read-write. |
|---|
| 235 | | - * BTRFS_FS_EXCL_OP flag is cleared when the device operation is canceled or |
|---|
| 326 | + * The exclusive status is cleared when the device operation is canceled or |
|---|
| 236 | 327 | * completed. |
|---|
| 237 | 328 | */ |
|---|
| 238 | 329 | |
|---|
| 239 | 330 | DEFINE_MUTEX(uuid_mutex); |
|---|
| 240 | 331 | static LIST_HEAD(fs_uuids); |
|---|
| 241 | | -struct list_head *btrfs_get_fs_uuids(void) |
|---|
| 332 | +struct list_head * __attribute_const__ btrfs_get_fs_uuids(void) |
|---|
| 242 | 333 | { |
|---|
| 243 | 334 | return &fs_uuids; |
|---|
| 244 | 335 | } |
|---|
| 245 | 336 | |
|---|
| 246 | 337 | /* |
|---|
| 247 | 338 | * alloc_fs_devices - allocate struct btrfs_fs_devices |
|---|
| 248 | | - * @fsid: if not NULL, copy the uuid to fs_devices::fsid |
|---|
| 339 | + * @fsid: if not NULL, copy the UUID to fs_devices::fsid |
|---|
| 340 | + * @metadata_fsid: if not NULL, copy the UUID to fs_devices::metadata_fsid |
|---|
| 249 | 341 | * |
|---|
| 250 | 342 | * Return a pointer to a new struct btrfs_fs_devices on success, or ERR_PTR(). |
|---|
| 251 | 343 | * The returned struct is not linked onto any lists and can be destroyed with |
|---|
| 252 | 344 | * kfree() right away. |
|---|
| 253 | 345 | */ |
|---|
| 254 | | -static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid) |
|---|
| 346 | +static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid, |
|---|
| 347 | + const u8 *metadata_fsid) |
|---|
| 255 | 348 | { |
|---|
| 256 | 349 | struct btrfs_fs_devices *fs_devs; |
|---|
| 257 | 350 | |
|---|
| .. | .. |
|---|
| 262 | 355 | mutex_init(&fs_devs->device_list_mutex); |
|---|
| 263 | 356 | |
|---|
| 264 | 357 | INIT_LIST_HEAD(&fs_devs->devices); |
|---|
| 265 | | - INIT_LIST_HEAD(&fs_devs->resized_devices); |
|---|
| 266 | 358 | INIT_LIST_HEAD(&fs_devs->alloc_list); |
|---|
| 267 | 359 | INIT_LIST_HEAD(&fs_devs->fs_list); |
|---|
| 360 | + INIT_LIST_HEAD(&fs_devs->seed_list); |
|---|
| 268 | 361 | if (fsid) |
|---|
| 269 | 362 | memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE); |
|---|
| 363 | + |
|---|
| 364 | + if (metadata_fsid) |
|---|
| 365 | + memcpy(fs_devs->metadata_uuid, metadata_fsid, BTRFS_FSID_SIZE); |
|---|
| 366 | + else if (fsid) |
|---|
| 367 | + memcpy(fs_devs->metadata_uuid, fsid, BTRFS_FSID_SIZE); |
|---|
| 270 | 368 | |
|---|
| 271 | 369 | return fs_devs; |
|---|
| 272 | 370 | } |
|---|
| 273 | 371 | |
|---|
| 274 | 372 | void btrfs_free_device(struct btrfs_device *device) |
|---|
| 275 | 373 | { |
|---|
| 374 | + WARN_ON(!list_empty(&device->post_commit_list)); |
|---|
| 276 | 375 | rcu_string_free(device->name); |
|---|
| 376 | + extent_io_tree_release(&device->alloc_state); |
|---|
| 277 | 377 | bio_put(device->flush_bio); |
|---|
| 278 | 378 | kfree(device); |
|---|
| 279 | 379 | } |
|---|
| .. | .. |
|---|
| 289 | 389 | btrfs_free_device(device); |
|---|
| 290 | 390 | } |
|---|
| 291 | 391 | kfree(fs_devices); |
|---|
| 292 | | -} |
|---|
| 293 | | - |
|---|
| 294 | | -static void btrfs_kobject_uevent(struct block_device *bdev, |
|---|
| 295 | | - enum kobject_action action) |
|---|
| 296 | | -{ |
|---|
| 297 | | - int ret; |
|---|
| 298 | | - |
|---|
| 299 | | - ret = kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, action); |
|---|
| 300 | | - if (ret) |
|---|
| 301 | | - pr_warn("BTRFS: Sending event '%d' to kobject: '%s' (%p): failed\n", |
|---|
| 302 | | - action, |
|---|
| 303 | | - kobject_name(&disk_to_dev(bdev->bd_disk)->kobj), |
|---|
| 304 | | - &disk_to_dev(bdev->bd_disk)->kobj); |
|---|
| 305 | 392 | } |
|---|
| 306 | 393 | |
|---|
| 307 | 394 | void __exit btrfs_cleanup_fs_uuids(void) |
|---|
| .. | .. |
|---|
| 321 | 408 | * Returned struct is not linked onto any lists and must be destroyed using |
|---|
| 322 | 409 | * btrfs_free_device. |
|---|
| 323 | 410 | */ |
|---|
| 324 | | -static struct btrfs_device *__alloc_device(void) |
|---|
| 411 | +static struct btrfs_device *__alloc_device(struct btrfs_fs_info *fs_info) |
|---|
| 325 | 412 | { |
|---|
| 326 | 413 | struct btrfs_device *dev; |
|---|
| 327 | 414 | |
|---|
| .. | .. |
|---|
| 341 | 428 | |
|---|
| 342 | 429 | INIT_LIST_HEAD(&dev->dev_list); |
|---|
| 343 | 430 | INIT_LIST_HEAD(&dev->dev_alloc_list); |
|---|
| 344 | | - INIT_LIST_HEAD(&dev->resized_list); |
|---|
| 345 | | - |
|---|
| 346 | | - spin_lock_init(&dev->io_lock); |
|---|
| 431 | + INIT_LIST_HEAD(&dev->post_commit_list); |
|---|
| 347 | 432 | |
|---|
| 348 | 433 | atomic_set(&dev->reada_in_flight, 0); |
|---|
| 349 | 434 | atomic_set(&dev->dev_stats_ccnt, 0); |
|---|
| 350 | 435 | btrfs_device_data_ordered_init(dev); |
|---|
| 351 | 436 | INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); |
|---|
| 352 | 437 | INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); |
|---|
| 438 | + extent_io_tree_init(fs_info, &dev->alloc_state, |
|---|
| 439 | + IO_TREE_DEVICE_ALLOC_STATE, NULL); |
|---|
| 353 | 440 | |
|---|
| 354 | 441 | return dev; |
|---|
| 355 | 442 | } |
|---|
| 356 | 443 | |
|---|
| 357 | | -static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid) |
|---|
| 444 | +static noinline struct btrfs_fs_devices *find_fsid( |
|---|
| 445 | + const u8 *fsid, const u8 *metadata_fsid) |
|---|
| 358 | 446 | { |
|---|
| 359 | 447 | struct btrfs_fs_devices *fs_devices; |
|---|
| 360 | 448 | |
|---|
| 449 | + ASSERT(fsid); |
|---|
| 450 | + |
|---|
| 451 | + /* Handle non-split brain cases */ |
|---|
| 361 | 452 | list_for_each_entry(fs_devices, &fs_uuids, fs_list) { |
|---|
| 362 | | - if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0) |
|---|
| 363 | | - return fs_devices; |
|---|
| 453 | + if (metadata_fsid) { |
|---|
| 454 | + if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0 |
|---|
| 455 | + && memcmp(metadata_fsid, fs_devices->metadata_uuid, |
|---|
| 456 | + BTRFS_FSID_SIZE) == 0) |
|---|
| 457 | + return fs_devices; |
|---|
| 458 | + } else { |
|---|
| 459 | + if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0) |
|---|
| 460 | + return fs_devices; |
|---|
| 461 | + } |
|---|
| 364 | 462 | } |
|---|
| 365 | 463 | return NULL; |
|---|
| 366 | 464 | } |
|---|
| 367 | 465 | |
|---|
| 466 | +static struct btrfs_fs_devices *find_fsid_with_metadata_uuid( |
|---|
| 467 | + struct btrfs_super_block *disk_super) |
|---|
| 468 | +{ |
|---|
| 469 | + |
|---|
| 470 | + struct btrfs_fs_devices *fs_devices; |
|---|
| 471 | + |
|---|
| 472 | + /* |
|---|
| 473 | + * Handle scanned device having completed its fsid change but |
|---|
| 474 | + * belonging to a fs_devices that was created by first scanning |
|---|
| 475 | + * a device which didn't have its fsid/metadata_uuid changed |
|---|
| 476 | + * at all and the CHANGING_FSID_V2 flag set. |
|---|
| 477 | + */ |
|---|
| 478 | + list_for_each_entry(fs_devices, &fs_uuids, fs_list) { |
|---|
| 479 | + if (fs_devices->fsid_change && |
|---|
| 480 | + memcmp(disk_super->metadata_uuid, fs_devices->fsid, |
|---|
| 481 | + BTRFS_FSID_SIZE) == 0 && |
|---|
| 482 | + memcmp(fs_devices->fsid, fs_devices->metadata_uuid, |
|---|
| 483 | + BTRFS_FSID_SIZE) == 0) { |
|---|
| 484 | + return fs_devices; |
|---|
| 485 | + } |
|---|
| 486 | + } |
|---|
| 487 | + /* |
|---|
| 488 | + * Handle scanned device having completed its fsid change but |
|---|
| 489 | + * belonging to a fs_devices that was created by a device that |
|---|
| 490 | + * has an outdated pair of fsid/metadata_uuid and |
|---|
| 491 | + * CHANGING_FSID_V2 flag set. |
|---|
| 492 | + */ |
|---|
| 493 | + list_for_each_entry(fs_devices, &fs_uuids, fs_list) { |
|---|
| 494 | + if (fs_devices->fsid_change && |
|---|
| 495 | + memcmp(fs_devices->metadata_uuid, |
|---|
| 496 | + fs_devices->fsid, BTRFS_FSID_SIZE) != 0 && |
|---|
| 497 | + memcmp(disk_super->metadata_uuid, fs_devices->metadata_uuid, |
|---|
| 498 | + BTRFS_FSID_SIZE) == 0) { |
|---|
| 499 | + return fs_devices; |
|---|
| 500 | + } |
|---|
| 501 | + } |
|---|
| 502 | + |
|---|
| 503 | + return find_fsid(disk_super->fsid, disk_super->metadata_uuid); |
|---|
| 504 | +} |
|---|
| 505 | + |
|---|
| 506 | + |
|---|
| 368 | 507 | static int |
|---|
| 369 | 508 | btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder, |
|---|
| 370 | 509 | int flush, struct block_device **bdev, |
|---|
| 371 | | - struct buffer_head **bh) |
|---|
| 510 | + struct btrfs_super_block **disk_super) |
|---|
| 372 | 511 | { |
|---|
| 373 | 512 | int ret; |
|---|
| 374 | 513 | |
|---|
| .. | .. |
|---|
| 387 | 526 | goto error; |
|---|
| 388 | 527 | } |
|---|
| 389 | 528 | invalidate_bdev(*bdev); |
|---|
| 390 | | - *bh = btrfs_read_dev_super(*bdev); |
|---|
| 391 | | - if (IS_ERR(*bh)) { |
|---|
| 392 | | - ret = PTR_ERR(*bh); |
|---|
| 529 | + *disk_super = btrfs_read_dev_super(*bdev); |
|---|
| 530 | + if (IS_ERR(*disk_super)) { |
|---|
| 531 | + ret = PTR_ERR(*disk_super); |
|---|
| 393 | 532 | blkdev_put(*bdev, flags); |
|---|
| 394 | 533 | goto error; |
|---|
| 395 | 534 | } |
|---|
| .. | .. |
|---|
| 398 | 537 | |
|---|
| 399 | 538 | error: |
|---|
| 400 | 539 | *bdev = NULL; |
|---|
| 401 | | - *bh = NULL; |
|---|
| 402 | 540 | return ret; |
|---|
| 403 | 541 | } |
|---|
| 404 | 542 | |
|---|
| 405 | | -static void requeue_list(struct btrfs_pending_bios *pending_bios, |
|---|
| 406 | | - struct bio *head, struct bio *tail) |
|---|
| 407 | | -{ |
|---|
| 408 | | - |
|---|
| 409 | | - struct bio *old_head; |
|---|
| 410 | | - |
|---|
| 411 | | - old_head = pending_bios->head; |
|---|
| 412 | | - pending_bios->head = head; |
|---|
| 413 | | - if (pending_bios->tail) |
|---|
| 414 | | - tail->bi_next = old_head; |
|---|
| 415 | | - else |
|---|
| 416 | | - pending_bios->tail = tail; |
|---|
| 417 | | -} |
|---|
| 418 | | - |
|---|
| 419 | 543 | /* |
|---|
| 420 | | - * we try to collect pending bios for a device so we don't get a large |
|---|
| 421 | | - * number of procs sending bios down to the same device. This greatly |
|---|
| 422 | | - * improves the schedulers ability to collect and merge the bios. |
|---|
| 544 | + * Check if the device in the path matches the device in the given struct device. |
|---|
| 423 | 545 | * |
|---|
| 424 | | - * But, it also turns into a long list of bios to process and that is sure |
|---|
| 425 | | - * to eventually make the worker thread block. The solution here is to |
|---|
| 426 | | - * make some progress and then put this work struct back at the end of |
|---|
| 427 | | - * the list if the block device is congested. This way, multiple devices |
|---|
| 428 | | - * can make progress from a single worker thread. |
|---|
| 546 | + * Returns: |
|---|
| 547 | + * true If it is the same device. |
|---|
| 548 | + * false If it is not the same device or on error. |
|---|
| 429 | 549 | */ |
|---|
| 430 | | -static noinline void run_scheduled_bios(struct btrfs_device *device) |
|---|
| 550 | +static bool device_matched(const struct btrfs_device *device, const char *path) |
|---|
| 431 | 551 | { |
|---|
| 432 | | - struct btrfs_fs_info *fs_info = device->fs_info; |
|---|
| 433 | | - struct bio *pending; |
|---|
| 434 | | - struct backing_dev_info *bdi; |
|---|
| 435 | | - struct btrfs_pending_bios *pending_bios; |
|---|
| 436 | | - struct bio *tail; |
|---|
| 437 | | - struct bio *cur; |
|---|
| 438 | | - int again = 0; |
|---|
| 439 | | - unsigned long num_run; |
|---|
| 440 | | - unsigned long batch_run = 0; |
|---|
| 441 | | - unsigned long last_waited = 0; |
|---|
| 442 | | - int force_reg = 0; |
|---|
| 443 | | - int sync_pending = 0; |
|---|
| 444 | | - struct blk_plug plug; |
|---|
| 552 | + char *device_name; |
|---|
| 553 | + struct block_device *bdev_old; |
|---|
| 554 | + struct block_device *bdev_new; |
|---|
| 445 | 555 | |
|---|
| 446 | 556 | /* |
|---|
| 447 | | - * this function runs all the bios we've collected for |
|---|
| 448 | | - * a particular device. We don't want to wander off to |
|---|
| 449 | | - * another device without first sending all of these down. |
|---|
| 450 | | - * So, setup a plug here and finish it off before we return |
|---|
| 557 | + * If we are looking for a device with the matching dev_t, then skip |
|---|
| 558 | + * device without a name (a missing device). |
|---|
| 451 | 559 | */ |
|---|
| 452 | | - blk_start_plug(&plug); |
|---|
| 560 | + if (!device->name) |
|---|
| 561 | + return false; |
|---|
| 453 | 562 | |
|---|
| 454 | | - bdi = device->bdev->bd_bdi; |
|---|
| 563 | + device_name = kzalloc(BTRFS_PATH_NAME_MAX, GFP_KERNEL); |
|---|
| 564 | + if (!device_name) |
|---|
| 565 | + return false; |
|---|
| 455 | 566 | |
|---|
| 456 | | -loop: |
|---|
| 457 | | - spin_lock(&device->io_lock); |
|---|
| 567 | + rcu_read_lock(); |
|---|
| 568 | + scnprintf(device_name, BTRFS_PATH_NAME_MAX, "%s", rcu_str_deref(device->name)); |
|---|
| 569 | + rcu_read_unlock(); |
|---|
| 458 | 570 | |
|---|
| 459 | | -loop_lock: |
|---|
| 460 | | - num_run = 0; |
|---|
| 571 | + bdev_old = lookup_bdev(device_name); |
|---|
| 572 | + kfree(device_name); |
|---|
| 573 | + if (IS_ERR(bdev_old)) |
|---|
| 574 | + return false; |
|---|
| 461 | 575 | |
|---|
| 462 | | - /* take all the bios off the list at once and process them |
|---|
| 463 | | - * later on (without the lock held). But, remember the |
|---|
| 464 | | - * tail and other pointers so the bios can be properly reinserted |
|---|
| 465 | | - * into the list if we hit congestion |
|---|
| 466 | | - */ |
|---|
| 467 | | - if (!force_reg && device->pending_sync_bios.head) { |
|---|
| 468 | | - pending_bios = &device->pending_sync_bios; |
|---|
| 469 | | - force_reg = 1; |
|---|
| 470 | | - } else { |
|---|
| 471 | | - pending_bios = &device->pending_bios; |
|---|
| 472 | | - force_reg = 0; |
|---|
| 473 | | - } |
|---|
| 576 | + bdev_new = lookup_bdev(path); |
|---|
| 577 | + if (IS_ERR(bdev_new)) |
|---|
| 578 | + return false; |
|---|
| 474 | 579 | |
|---|
| 475 | | - pending = pending_bios->head; |
|---|
| 476 | | - tail = pending_bios->tail; |
|---|
| 477 | | - WARN_ON(pending && !tail); |
|---|
| 580 | + if (bdev_old == bdev_new) |
|---|
| 581 | + return true; |
|---|
| 478 | 582 | |
|---|
| 479 | | - /* |
|---|
| 480 | | - * if pending was null this time around, no bios need processing |
|---|
| 481 | | - * at all and we can stop. Otherwise it'll loop back up again |
|---|
| 482 | | - * and do an additional check so no bios are missed. |
|---|
| 483 | | - * |
|---|
| 484 | | - * device->running_pending is used to synchronize with the |
|---|
| 485 | | - * schedule_bio code. |
|---|
| 486 | | - */ |
|---|
| 487 | | - if (device->pending_sync_bios.head == NULL && |
|---|
| 488 | | - device->pending_bios.head == NULL) { |
|---|
| 489 | | - again = 0; |
|---|
| 490 | | - device->running_pending = 0; |
|---|
| 491 | | - } else { |
|---|
| 492 | | - again = 1; |
|---|
| 493 | | - device->running_pending = 1; |
|---|
| 494 | | - } |
|---|
| 495 | | - |
|---|
| 496 | | - pending_bios->head = NULL; |
|---|
| 497 | | - pending_bios->tail = NULL; |
|---|
| 498 | | - |
|---|
| 499 | | - spin_unlock(&device->io_lock); |
|---|
| 500 | | - |
|---|
| 501 | | - while (pending) { |
|---|
| 502 | | - |
|---|
| 503 | | - rmb(); |
|---|
| 504 | | - /* we want to work on both lists, but do more bios on the |
|---|
| 505 | | - * sync list than the regular list |
|---|
| 506 | | - */ |
|---|
| 507 | | - if ((num_run > 32 && |
|---|
| 508 | | - pending_bios != &device->pending_sync_bios && |
|---|
| 509 | | - device->pending_sync_bios.head) || |
|---|
| 510 | | - (num_run > 64 && pending_bios == &device->pending_sync_bios && |
|---|
| 511 | | - device->pending_bios.head)) { |
|---|
| 512 | | - spin_lock(&device->io_lock); |
|---|
| 513 | | - requeue_list(pending_bios, pending, tail); |
|---|
| 514 | | - goto loop_lock; |
|---|
| 515 | | - } |
|---|
| 516 | | - |
|---|
| 517 | | - cur = pending; |
|---|
| 518 | | - pending = pending->bi_next; |
|---|
| 519 | | - cur->bi_next = NULL; |
|---|
| 520 | | - |
|---|
| 521 | | - BUG_ON(atomic_read(&cur->__bi_cnt) == 0); |
|---|
| 522 | | - |
|---|
| 523 | | - /* |
|---|
| 524 | | - * if we're doing the sync list, record that our |
|---|
| 525 | | - * plug has some sync requests on it |
|---|
| 526 | | - * |
|---|
| 527 | | - * If we're doing the regular list and there are |
|---|
| 528 | | - * sync requests sitting around, unplug before |
|---|
| 529 | | - * we add more |
|---|
| 530 | | - */ |
|---|
| 531 | | - if (pending_bios == &device->pending_sync_bios) { |
|---|
| 532 | | - sync_pending = 1; |
|---|
| 533 | | - } else if (sync_pending) { |
|---|
| 534 | | - blk_finish_plug(&plug); |
|---|
| 535 | | - blk_start_plug(&plug); |
|---|
| 536 | | - sync_pending = 0; |
|---|
| 537 | | - } |
|---|
| 538 | | - |
|---|
| 539 | | - btrfsic_submit_bio(cur); |
|---|
| 540 | | - num_run++; |
|---|
| 541 | | - batch_run++; |
|---|
| 542 | | - |
|---|
| 543 | | - cond_resched(); |
|---|
| 544 | | - |
|---|
| 545 | | - /* |
|---|
| 546 | | - * we made progress, there is more work to do and the bdi |
|---|
| 547 | | - * is now congested. Back off and let other work structs |
|---|
| 548 | | - * run instead |
|---|
| 549 | | - */ |
|---|
| 550 | | - if (pending && bdi_write_congested(bdi) && batch_run > 8 && |
|---|
| 551 | | - fs_info->fs_devices->open_devices > 1) { |
|---|
| 552 | | - struct io_context *ioc; |
|---|
| 553 | | - |
|---|
| 554 | | - ioc = current->io_context; |
|---|
| 555 | | - |
|---|
| 556 | | - /* |
|---|
| 557 | | - * the main goal here is that we don't want to |
|---|
| 558 | | - * block if we're going to be able to submit |
|---|
| 559 | | - * more requests without blocking. |
|---|
| 560 | | - * |
|---|
| 561 | | - * This code does two great things, it pokes into |
|---|
| 562 | | - * the elevator code from a filesystem _and_ |
|---|
| 563 | | - * it makes assumptions about how batching works. |
|---|
| 564 | | - */ |
|---|
| 565 | | - if (ioc && ioc->nr_batch_requests > 0 && |
|---|
| 566 | | - time_before(jiffies, ioc->last_waited + HZ/50UL) && |
|---|
| 567 | | - (last_waited == 0 || |
|---|
| 568 | | - ioc->last_waited == last_waited)) { |
|---|
| 569 | | - /* |
|---|
| 570 | | - * we want to go through our batch of |
|---|
| 571 | | - * requests and stop. So, we copy out |
|---|
| 572 | | - * the ioc->last_waited time and test |
|---|
| 573 | | - * against it before looping |
|---|
| 574 | | - */ |
|---|
| 575 | | - last_waited = ioc->last_waited; |
|---|
| 576 | | - cond_resched(); |
|---|
| 577 | | - continue; |
|---|
| 578 | | - } |
|---|
| 579 | | - spin_lock(&device->io_lock); |
|---|
| 580 | | - requeue_list(pending_bios, pending, tail); |
|---|
| 581 | | - device->running_pending = 1; |
|---|
| 582 | | - |
|---|
| 583 | | - spin_unlock(&device->io_lock); |
|---|
| 584 | | - btrfs_queue_work(fs_info->submit_workers, |
|---|
| 585 | | - &device->work); |
|---|
| 586 | | - goto done; |
|---|
| 587 | | - } |
|---|
| 588 | | - } |
|---|
| 589 | | - |
|---|
| 590 | | - cond_resched(); |
|---|
| 591 | | - if (again) |
|---|
| 592 | | - goto loop; |
|---|
| 593 | | - |
|---|
| 594 | | - spin_lock(&device->io_lock); |
|---|
| 595 | | - if (device->pending_bios.head || device->pending_sync_bios.head) |
|---|
| 596 | | - goto loop_lock; |
|---|
| 597 | | - spin_unlock(&device->io_lock); |
|---|
| 598 | | - |
|---|
| 599 | | -done: |
|---|
| 600 | | - blk_finish_plug(&plug); |
|---|
| 601 | | -} |
|---|
| 602 | | - |
|---|
| 603 | | -static void pending_bios_fn(struct btrfs_work *work) |
|---|
| 604 | | -{ |
|---|
| 605 | | - struct btrfs_device *device; |
|---|
| 606 | | - |
|---|
| 607 | | - device = container_of(work, struct btrfs_device, work); |
|---|
| 608 | | - run_scheduled_bios(device); |
|---|
| 583 | + return false; |
|---|
| 609 | 584 | } |
|---|
| 610 | 585 | |
|---|
| 611 | 586 | /* |
|---|
| .. | .. |
|---|
| 615 | 590 | * matching this path only. |
|---|
| 616 | 591 | * skip_dev: Optional. Will skip this device when searching for the stale |
|---|
| 617 | 592 | * devices. |
|---|
| 593 | + * Return: 0 for success or if @path is NULL. |
|---|
| 594 | + * -EBUSY if @path is a mounted device. |
|---|
| 595 | + * -ENOENT if @path does not match any device in the list. |
|---|
| 618 | 596 | */ |
|---|
| 619 | | -static void btrfs_free_stale_devices(const char *path, |
|---|
| 597 | +static int btrfs_free_stale_devices(const char *path, |
|---|
| 620 | 598 | struct btrfs_device *skip_device) |
|---|
| 621 | 599 | { |
|---|
| 622 | 600 | struct btrfs_fs_devices *fs_devices, *tmp_fs_devices; |
|---|
| 623 | 601 | struct btrfs_device *device, *tmp_device; |
|---|
| 602 | + int ret = 0; |
|---|
| 603 | + |
|---|
| 604 | + lockdep_assert_held(&uuid_mutex); |
|---|
| 605 | + |
|---|
| 606 | + if (path) |
|---|
| 607 | + ret = -ENOENT; |
|---|
| 624 | 608 | |
|---|
| 625 | 609 | list_for_each_entry_safe(fs_devices, tmp_fs_devices, &fs_uuids, fs_list) { |
|---|
| 626 | | - mutex_lock(&fs_devices->device_list_mutex); |
|---|
| 627 | | - if (fs_devices->opened) { |
|---|
| 628 | | - mutex_unlock(&fs_devices->device_list_mutex); |
|---|
| 629 | | - continue; |
|---|
| 630 | | - } |
|---|
| 631 | 610 | |
|---|
| 611 | + mutex_lock(&fs_devices->device_list_mutex); |
|---|
| 632 | 612 | list_for_each_entry_safe(device, tmp_device, |
|---|
| 633 | 613 | &fs_devices->devices, dev_list) { |
|---|
| 634 | | - int not_found = 0; |
|---|
| 635 | | - |
|---|
| 636 | 614 | if (skip_device && skip_device == device) |
|---|
| 637 | 615 | continue; |
|---|
| 638 | | - if (path && !device->name) |
|---|
| 616 | + if (path && !device_matched(device, path)) |
|---|
| 639 | 617 | continue; |
|---|
| 640 | | - |
|---|
| 641 | | - rcu_read_lock(); |
|---|
| 642 | | - if (path) |
|---|
| 643 | | - not_found = strcmp(rcu_str_deref(device->name), |
|---|
| 644 | | - path); |
|---|
| 645 | | - rcu_read_unlock(); |
|---|
| 646 | | - if (not_found) |
|---|
| 647 | | - continue; |
|---|
| 618 | + if (fs_devices->opened) { |
|---|
| 619 | + /* for an already deleted device return 0 */ |
|---|
| 620 | + if (path && ret != 0) |
|---|
| 621 | + ret = -EBUSY; |
|---|
| 622 | + break; |
|---|
| 623 | + } |
|---|
| 648 | 624 | |
|---|
| 649 | 625 | /* delete the stale device */ |
|---|
| 650 | 626 | fs_devices->num_devices--; |
|---|
| 651 | 627 | list_del(&device->dev_list); |
|---|
| 652 | 628 | btrfs_free_device(device); |
|---|
| 653 | 629 | |
|---|
| 654 | | - if (fs_devices->num_devices == 0) |
|---|
| 655 | | - break; |
|---|
| 630 | + ret = 0; |
|---|
| 656 | 631 | } |
|---|
| 657 | 632 | mutex_unlock(&fs_devices->device_list_mutex); |
|---|
| 633 | + |
|---|
| 658 | 634 | if (fs_devices->num_devices == 0) { |
|---|
| 659 | 635 | btrfs_sysfs_remove_fsid(fs_devices); |
|---|
| 660 | 636 | list_del(&fs_devices->fs_list); |
|---|
| 661 | 637 | free_fs_devices(fs_devices); |
|---|
| 662 | 638 | } |
|---|
| 663 | 639 | } |
|---|
| 640 | + |
|---|
| 641 | + return ret; |
|---|
| 664 | 642 | } |
|---|
| 665 | 643 | |
|---|
| 666 | 644 | /* |
|---|
| .. | .. |
|---|
| 674 | 652 | { |
|---|
| 675 | 653 | struct request_queue *q; |
|---|
| 676 | 654 | struct block_device *bdev; |
|---|
| 677 | | - struct buffer_head *bh; |
|---|
| 678 | 655 | struct btrfs_super_block *disk_super; |
|---|
| 679 | 656 | u64 devid; |
|---|
| 680 | 657 | int ret; |
|---|
| .. | .. |
|---|
| 685 | 662 | return -EINVAL; |
|---|
| 686 | 663 | |
|---|
| 687 | 664 | ret = btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1, |
|---|
| 688 | | - &bdev, &bh); |
|---|
| 665 | + &bdev, &disk_super); |
|---|
| 689 | 666 | if (ret) |
|---|
| 690 | 667 | return ret; |
|---|
| 691 | 668 | |
|---|
| 692 | | - disk_super = (struct btrfs_super_block *)bh->b_data; |
|---|
| 693 | 669 | devid = btrfs_stack_device_id(&disk_super->dev_item); |
|---|
| 694 | 670 | if (devid != device->devid) |
|---|
| 695 | | - goto error_brelse; |
|---|
| 671 | + goto error_free_page; |
|---|
| 696 | 672 | |
|---|
| 697 | 673 | if (memcmp(device->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE)) |
|---|
| 698 | | - goto error_brelse; |
|---|
| 674 | + goto error_free_page; |
|---|
| 699 | 675 | |
|---|
| 700 | 676 | device->generation = btrfs_super_generation(disk_super); |
|---|
| 701 | 677 | |
|---|
| 702 | 678 | if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) { |
|---|
| 679 | + if (btrfs_super_incompat_flags(disk_super) & |
|---|
| 680 | + BTRFS_FEATURE_INCOMPAT_METADATA_UUID) { |
|---|
| 681 | + pr_err( |
|---|
| 682 | + "BTRFS: Invalid seeding and uuid-changed device detected\n"); |
|---|
| 683 | + goto error_free_page; |
|---|
| 684 | + } |
|---|
| 685 | + |
|---|
| 703 | 686 | clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); |
|---|
| 704 | | - fs_devices->seeding = 1; |
|---|
| 687 | + fs_devices->seeding = true; |
|---|
| 705 | 688 | } else { |
|---|
| 706 | 689 | if (bdev_read_only(bdev)) |
|---|
| 707 | 690 | clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); |
|---|
| .. | .. |
|---|
| 711 | 694 | |
|---|
| 712 | 695 | q = bdev_get_queue(bdev); |
|---|
| 713 | 696 | if (!blk_queue_nonrot(q)) |
|---|
| 714 | | - fs_devices->rotating = 1; |
|---|
| 697 | + fs_devices->rotating = true; |
|---|
| 715 | 698 | |
|---|
| 716 | 699 | device->bdev = bdev; |
|---|
| 717 | 700 | clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); |
|---|
| .. | .. |
|---|
| 723 | 706 | fs_devices->rw_devices++; |
|---|
| 724 | 707 | list_add_tail(&device->dev_alloc_list, &fs_devices->alloc_list); |
|---|
| 725 | 708 | } |
|---|
| 726 | | - brelse(bh); |
|---|
| 709 | + btrfs_release_disk_super(disk_super); |
|---|
| 727 | 710 | |
|---|
| 728 | 711 | return 0; |
|---|
| 729 | 712 | |
|---|
| 730 | | -error_brelse: |
|---|
| 731 | | - brelse(bh); |
|---|
| 713 | +error_free_page: |
|---|
| 714 | + btrfs_release_disk_super(disk_super); |
|---|
| 732 | 715 | blkdev_put(bdev, flags); |
|---|
| 733 | 716 | |
|---|
| 734 | 717 | return -EINVAL; |
|---|
| 735 | 718 | } |
|---|
| 736 | 719 | |
|---|
| 720 | +/* |
|---|
| 721 | + * Handle scanned device having its CHANGING_FSID_V2 flag set and the fs_devices |
|---|
| 722 | + * being created with a disk that has already completed its fsid change. Such |
|---|
| 723 | + * disk can belong to an fs which has its FSID changed or to one which doesn't. |
|---|
| 724 | + * Handle both cases here. |
|---|
| 725 | + */ |
|---|
| 726 | +static struct btrfs_fs_devices *find_fsid_inprogress( |
|---|
| 727 | + struct btrfs_super_block *disk_super) |
|---|
| 728 | +{ |
|---|
| 729 | + struct btrfs_fs_devices *fs_devices; |
|---|
| 730 | + |
|---|
| 731 | + list_for_each_entry(fs_devices, &fs_uuids, fs_list) { |
|---|
| 732 | + if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid, |
|---|
| 733 | + BTRFS_FSID_SIZE) != 0 && |
|---|
| 734 | + memcmp(fs_devices->metadata_uuid, disk_super->fsid, |
|---|
| 735 | + BTRFS_FSID_SIZE) == 0 && !fs_devices->fsid_change) { |
|---|
| 736 | + return fs_devices; |
|---|
| 737 | + } |
|---|
| 738 | + } |
|---|
| 739 | + |
|---|
| 740 | + return find_fsid(disk_super->fsid, NULL); |
|---|
| 741 | +} |
|---|
| 742 | + |
|---|
| 743 | + |
|---|
| 744 | +static struct btrfs_fs_devices *find_fsid_changed( |
|---|
| 745 | + struct btrfs_super_block *disk_super) |
|---|
| 746 | +{ |
|---|
| 747 | + struct btrfs_fs_devices *fs_devices; |
|---|
| 748 | + |
|---|
| 749 | + /* |
|---|
| 750 | + * Handles the case where scanned device is part of an fs that had |
|---|
| 751 | + * multiple successful changes of FSID but curently device didn't |
|---|
| 752 | + * observe it. Meaning our fsid will be different than theirs. We need |
|---|
| 753 | + * to handle two subcases : |
|---|
| 754 | + * 1 - The fs still continues to have different METADATA/FSID uuids. |
|---|
| 755 | + * 2 - The fs is switched back to its original FSID (METADATA/FSID |
|---|
| 756 | + * are equal). |
|---|
| 757 | + */ |
|---|
| 758 | + list_for_each_entry(fs_devices, &fs_uuids, fs_list) { |
|---|
| 759 | + /* Changed UUIDs */ |
|---|
| 760 | + if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid, |
|---|
| 761 | + BTRFS_FSID_SIZE) != 0 && |
|---|
| 762 | + memcmp(fs_devices->metadata_uuid, disk_super->metadata_uuid, |
|---|
| 763 | + BTRFS_FSID_SIZE) == 0 && |
|---|
| 764 | + memcmp(fs_devices->fsid, disk_super->fsid, |
|---|
| 765 | + BTRFS_FSID_SIZE) != 0) |
|---|
| 766 | + return fs_devices; |
|---|
| 767 | + |
|---|
| 768 | + /* Unchanged UUIDs */ |
|---|
| 769 | + if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid, |
|---|
| 770 | + BTRFS_FSID_SIZE) == 0 && |
|---|
| 771 | + memcmp(fs_devices->fsid, disk_super->metadata_uuid, |
|---|
| 772 | + BTRFS_FSID_SIZE) == 0) |
|---|
| 773 | + return fs_devices; |
|---|
| 774 | + } |
|---|
| 775 | + |
|---|
| 776 | + return NULL; |
|---|
| 777 | +} |
|---|
| 778 | + |
|---|
| 779 | +static struct btrfs_fs_devices *find_fsid_reverted_metadata( |
|---|
| 780 | + struct btrfs_super_block *disk_super) |
|---|
| 781 | +{ |
|---|
| 782 | + struct btrfs_fs_devices *fs_devices; |
|---|
| 783 | + |
|---|
| 784 | + /* |
|---|
| 785 | + * Handle the case where the scanned device is part of an fs whose last |
|---|
| 786 | + * metadata UUID change reverted it to the original FSID. At the same |
|---|
| 787 | + * time * fs_devices was first created by another constitutent device |
|---|
| 788 | + * which didn't fully observe the operation. This results in an |
|---|
| 789 | + * btrfs_fs_devices created with metadata/fsid different AND |
|---|
| 790 | + * btrfs_fs_devices::fsid_change set AND the metadata_uuid of the |
|---|
| 791 | + * fs_devices equal to the FSID of the disk. |
|---|
| 792 | + */ |
|---|
| 793 | + list_for_each_entry(fs_devices, &fs_uuids, fs_list) { |
|---|
| 794 | + if (memcmp(fs_devices->fsid, fs_devices->metadata_uuid, |
|---|
| 795 | + BTRFS_FSID_SIZE) != 0 && |
|---|
| 796 | + memcmp(fs_devices->metadata_uuid, disk_super->fsid, |
|---|
| 797 | + BTRFS_FSID_SIZE) == 0 && |
|---|
| 798 | + fs_devices->fsid_change) |
|---|
| 799 | + return fs_devices; |
|---|
| 800 | + } |
|---|
| 801 | + |
|---|
| 802 | + return NULL; |
|---|
| 803 | +} |
|---|
| 737 | 804 | /* |
|---|
| 738 | 805 | * Add new device to list of registered devices |
|---|
| 739 | 806 | * |
|---|
| .. | .. |
|---|
| 746 | 813 | bool *new_device_added) |
|---|
| 747 | 814 | { |
|---|
| 748 | 815 | struct btrfs_device *device; |
|---|
| 749 | | - struct btrfs_fs_devices *fs_devices; |
|---|
| 816 | + struct btrfs_fs_devices *fs_devices = NULL; |
|---|
| 750 | 817 | struct rcu_string *name; |
|---|
| 751 | 818 | u64 found_transid = btrfs_super_generation(disk_super); |
|---|
| 752 | 819 | u64 devid = btrfs_stack_device_id(&disk_super->dev_item); |
|---|
| 820 | + bool has_metadata_uuid = (btrfs_super_incompat_flags(disk_super) & |
|---|
| 821 | + BTRFS_FEATURE_INCOMPAT_METADATA_UUID); |
|---|
| 822 | + bool fsid_change_in_progress = (btrfs_super_flags(disk_super) & |
|---|
| 823 | + BTRFS_SUPER_FLAG_CHANGING_FSID_V2); |
|---|
| 753 | 824 | |
|---|
| 754 | | - fs_devices = find_fsid(disk_super->fsid); |
|---|
| 825 | + if (fsid_change_in_progress) { |
|---|
| 826 | + if (!has_metadata_uuid) |
|---|
| 827 | + fs_devices = find_fsid_inprogress(disk_super); |
|---|
| 828 | + else |
|---|
| 829 | + fs_devices = find_fsid_changed(disk_super); |
|---|
| 830 | + } else if (has_metadata_uuid) { |
|---|
| 831 | + fs_devices = find_fsid_with_metadata_uuid(disk_super); |
|---|
| 832 | + } else { |
|---|
| 833 | + fs_devices = find_fsid_reverted_metadata(disk_super); |
|---|
| 834 | + if (!fs_devices) |
|---|
| 835 | + fs_devices = find_fsid(disk_super->fsid, NULL); |
|---|
| 836 | + } |
|---|
| 837 | + |
|---|
| 838 | + |
|---|
| 755 | 839 | if (!fs_devices) { |
|---|
| 756 | | - fs_devices = alloc_fs_devices(disk_super->fsid); |
|---|
| 840 | + if (has_metadata_uuid) |
|---|
| 841 | + fs_devices = alloc_fs_devices(disk_super->fsid, |
|---|
| 842 | + disk_super->metadata_uuid); |
|---|
| 843 | + else |
|---|
| 844 | + fs_devices = alloc_fs_devices(disk_super->fsid, NULL); |
|---|
| 845 | + |
|---|
| 757 | 846 | if (IS_ERR(fs_devices)) |
|---|
| 758 | 847 | return ERR_CAST(fs_devices); |
|---|
| 848 | + |
|---|
| 849 | + fs_devices->fsid_change = fsid_change_in_progress; |
|---|
| 759 | 850 | |
|---|
| 760 | 851 | mutex_lock(&fs_devices->device_list_mutex); |
|---|
| 761 | 852 | list_add(&fs_devices->fs_list, &fs_uuids); |
|---|
| .. | .. |
|---|
| 765 | 856 | mutex_lock(&fs_devices->device_list_mutex); |
|---|
| 766 | 857 | device = btrfs_find_device(fs_devices, devid, |
|---|
| 767 | 858 | disk_super->dev_item.uuid, NULL, false); |
|---|
| 859 | + |
|---|
| 860 | + /* |
|---|
| 861 | + * If this disk has been pulled into an fs devices created by |
|---|
| 862 | + * a device which had the CHANGING_FSID_V2 flag then replace the |
|---|
| 863 | + * metadata_uuid/fsid values of the fs_devices. |
|---|
| 864 | + */ |
|---|
| 865 | + if (fs_devices->fsid_change && |
|---|
| 866 | + found_transid > fs_devices->latest_generation) { |
|---|
| 867 | + memcpy(fs_devices->fsid, disk_super->fsid, |
|---|
| 868 | + BTRFS_FSID_SIZE); |
|---|
| 869 | + |
|---|
| 870 | + if (has_metadata_uuid) |
|---|
| 871 | + memcpy(fs_devices->metadata_uuid, |
|---|
| 872 | + disk_super->metadata_uuid, |
|---|
| 873 | + BTRFS_FSID_SIZE); |
|---|
| 874 | + else |
|---|
| 875 | + memcpy(fs_devices->metadata_uuid, |
|---|
| 876 | + disk_super->fsid, BTRFS_FSID_SIZE); |
|---|
| 877 | + |
|---|
| 878 | + fs_devices->fsid_change = false; |
|---|
| 879 | + } |
|---|
| 768 | 880 | } |
|---|
| 769 | 881 | |
|---|
| 770 | 882 | if (!device) { |
|---|
| .. | .. |
|---|
| 796 | 908 | *new_device_added = true; |
|---|
| 797 | 909 | |
|---|
| 798 | 910 | if (disk_super->label[0]) |
|---|
| 799 | | - pr_info("BTRFS: device label %s devid %llu transid %llu %s\n", |
|---|
| 800 | | - disk_super->label, devid, found_transid, path); |
|---|
| 911 | + pr_info( |
|---|
| 912 | + "BTRFS: device label %s devid %llu transid %llu %s scanned by %s (%d)\n", |
|---|
| 913 | + disk_super->label, devid, found_transid, path, |
|---|
| 914 | + current->comm, task_pid_nr(current)); |
|---|
| 801 | 915 | else |
|---|
| 802 | | - pr_info("BTRFS: device fsid %pU devid %llu transid %llu %s\n", |
|---|
| 803 | | - disk_super->fsid, devid, found_transid, path); |
|---|
| 916 | + pr_info( |
|---|
| 917 | + "BTRFS: device fsid %pU devid %llu transid %llu %s scanned by %s (%d)\n", |
|---|
| 918 | + disk_super->fsid, devid, found_transid, path, |
|---|
| 919 | + current->comm, task_pid_nr(current)); |
|---|
| 804 | 920 | |
|---|
| 805 | 921 | } else if (!device->name || strcmp(device->name->str, path)) { |
|---|
| 806 | 922 | /* |
|---|
| .. | .. |
|---|
| 897 | 1013 | * it back. We need it to pick the disk with largest generation |
|---|
| 898 | 1014 | * (as above). |
|---|
| 899 | 1015 | */ |
|---|
| 900 | | - if (!fs_devices->opened) |
|---|
| 1016 | + if (!fs_devices->opened) { |
|---|
| 901 | 1017 | device->generation = found_transid; |
|---|
| 1018 | + fs_devices->latest_generation = max_t(u64, found_transid, |
|---|
| 1019 | + fs_devices->latest_generation); |
|---|
| 1020 | + } |
|---|
| 902 | 1021 | |
|---|
| 903 | 1022 | fs_devices->total_devices = btrfs_super_num_devices(disk_super); |
|---|
| 904 | 1023 | |
|---|
| .. | .. |
|---|
| 911 | 1030 | struct btrfs_fs_devices *fs_devices; |
|---|
| 912 | 1031 | struct btrfs_device *device; |
|---|
| 913 | 1032 | struct btrfs_device *orig_dev; |
|---|
| 1033 | + int ret = 0; |
|---|
| 914 | 1034 | |
|---|
| 915 | | - fs_devices = alloc_fs_devices(orig->fsid); |
|---|
| 1035 | + lockdep_assert_held(&uuid_mutex); |
|---|
| 1036 | + |
|---|
| 1037 | + fs_devices = alloc_fs_devices(orig->fsid, NULL); |
|---|
| 916 | 1038 | if (IS_ERR(fs_devices)) |
|---|
| 917 | 1039 | return fs_devices; |
|---|
| 918 | 1040 | |
|---|
| 919 | | - mutex_lock(&orig->device_list_mutex); |
|---|
| 920 | 1041 | fs_devices->total_devices = orig->total_devices; |
|---|
| 921 | 1042 | |
|---|
| 922 | | - /* We have held the volume lock, it is safe to get the devices. */ |
|---|
| 923 | 1043 | list_for_each_entry(orig_dev, &orig->devices, dev_list) { |
|---|
| 924 | 1044 | struct rcu_string *name; |
|---|
| 925 | 1045 | |
|---|
| 926 | 1046 | device = btrfs_alloc_device(NULL, &orig_dev->devid, |
|---|
| 927 | 1047 | orig_dev->uuid); |
|---|
| 928 | | - if (IS_ERR(device)) |
|---|
| 1048 | + if (IS_ERR(device)) { |
|---|
| 1049 | + ret = PTR_ERR(device); |
|---|
| 929 | 1050 | goto error; |
|---|
| 1051 | + } |
|---|
| 930 | 1052 | |
|---|
| 931 | 1053 | /* |
|---|
| 932 | 1054 | * This is ok to do without rcu read locked because we hold the |
|---|
| .. | .. |
|---|
| 937 | 1059 | GFP_KERNEL); |
|---|
| 938 | 1060 | if (!name) { |
|---|
| 939 | 1061 | btrfs_free_device(device); |
|---|
| 1062 | + ret = -ENOMEM; |
|---|
| 940 | 1063 | goto error; |
|---|
| 941 | 1064 | } |
|---|
| 942 | 1065 | rcu_assign_pointer(device->name, name); |
|---|
| .. | .. |
|---|
| 946 | 1069 | device->fs_devices = fs_devices; |
|---|
| 947 | 1070 | fs_devices->num_devices++; |
|---|
| 948 | 1071 | } |
|---|
| 949 | | - mutex_unlock(&orig->device_list_mutex); |
|---|
| 950 | 1072 | return fs_devices; |
|---|
| 951 | 1073 | error: |
|---|
| 952 | | - mutex_unlock(&orig->device_list_mutex); |
|---|
| 953 | 1074 | free_fs_devices(fs_devices); |
|---|
| 954 | | - return ERR_PTR(-ENOMEM); |
|---|
| 1075 | + return ERR_PTR(ret); |
|---|
| 955 | 1076 | } |
|---|
| 956 | 1077 | |
|---|
| 957 | | -/* |
|---|
| 958 | | - * After we have read the system tree and know devids belonging to |
|---|
| 959 | | - * this filesystem, remove the device which does not belong there. |
|---|
| 960 | | - */ |
|---|
| 961 | | -void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step) |
|---|
| 1078 | +static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, |
|---|
| 1079 | + int step, struct btrfs_device **latest_dev) |
|---|
| 962 | 1080 | { |
|---|
| 963 | 1081 | struct btrfs_device *device, *next; |
|---|
| 964 | | - struct btrfs_device *latest_dev = NULL; |
|---|
| 965 | 1082 | |
|---|
| 966 | | - mutex_lock(&uuid_mutex); |
|---|
| 967 | | -again: |
|---|
| 968 | 1083 | /* This is the initialized path, it is safe to release the devices. */ |
|---|
| 969 | 1084 | list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { |
|---|
| 970 | | - if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, |
|---|
| 971 | | - &device->dev_state)) { |
|---|
| 1085 | + if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state)) { |
|---|
| 972 | 1086 | if (!test_bit(BTRFS_DEV_STATE_REPLACE_TGT, |
|---|
| 973 | | - &device->dev_state) && |
|---|
| 1087 | + &device->dev_state) && |
|---|
| 974 | 1088 | !test_bit(BTRFS_DEV_STATE_MISSING, |
|---|
| 975 | 1089 | &device->dev_state) && |
|---|
| 976 | | - (!latest_dev || |
|---|
| 977 | | - device->generation > latest_dev->generation)) { |
|---|
| 978 | | - latest_dev = device; |
|---|
| 1090 | + (!*latest_dev || |
|---|
| 1091 | + device->generation > (*latest_dev)->generation)) { |
|---|
| 1092 | + *latest_dev = device; |
|---|
| 979 | 1093 | } |
|---|
| 980 | 1094 | continue; |
|---|
| 981 | 1095 | } |
|---|
| .. | .. |
|---|
| 1002 | 1116 | btrfs_free_device(device); |
|---|
| 1003 | 1117 | } |
|---|
| 1004 | 1118 | |
|---|
| 1005 | | - if (fs_devices->seed) { |
|---|
| 1006 | | - fs_devices = fs_devices->seed; |
|---|
| 1007 | | - goto again; |
|---|
| 1008 | | - } |
|---|
| 1119 | +} |
|---|
| 1120 | + |
|---|
| 1121 | +/* |
|---|
| 1122 | + * After we have read the system tree and know devids belonging to this |
|---|
| 1123 | + * filesystem, remove the device which does not belong there. |
|---|
| 1124 | + */ |
|---|
| 1125 | +void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step) |
|---|
| 1126 | +{ |
|---|
| 1127 | + struct btrfs_device *latest_dev = NULL; |
|---|
| 1128 | + struct btrfs_fs_devices *seed_dev; |
|---|
| 1129 | + |
|---|
| 1130 | + mutex_lock(&uuid_mutex); |
|---|
| 1131 | + __btrfs_free_extra_devids(fs_devices, step, &latest_dev); |
|---|
| 1132 | + |
|---|
| 1133 | + list_for_each_entry(seed_dev, &fs_devices->seed_list, seed_list) |
|---|
| 1134 | + __btrfs_free_extra_devids(seed_dev, step, &latest_dev); |
|---|
| 1009 | 1135 | |
|---|
| 1010 | 1136 | fs_devices->latest_bdev = latest_dev->bdev; |
|---|
| 1011 | 1137 | |
|---|
| 1012 | 1138 | mutex_unlock(&uuid_mutex); |
|---|
| 1013 | | -} |
|---|
| 1014 | | - |
|---|
| 1015 | | -static void free_device_rcu(struct rcu_head *head) |
|---|
| 1016 | | -{ |
|---|
| 1017 | | - struct btrfs_device *device; |
|---|
| 1018 | | - |
|---|
| 1019 | | - device = container_of(head, struct btrfs_device, rcu); |
|---|
| 1020 | | - btrfs_free_device(device); |
|---|
| 1021 | 1139 | } |
|---|
| 1022 | 1140 | |
|---|
| 1023 | 1141 | static void btrfs_close_bdev(struct btrfs_device *device) |
|---|
| .. | .. |
|---|
| 1036 | 1154 | static void btrfs_close_one_device(struct btrfs_device *device) |
|---|
| 1037 | 1155 | { |
|---|
| 1038 | 1156 | struct btrfs_fs_devices *fs_devices = device->fs_devices; |
|---|
| 1039 | | - struct btrfs_device *new_device; |
|---|
| 1040 | | - struct rcu_string *name; |
|---|
| 1041 | | - |
|---|
| 1042 | | - if (device->bdev) |
|---|
| 1043 | | - fs_devices->open_devices--; |
|---|
| 1044 | 1157 | |
|---|
| 1045 | 1158 | if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) && |
|---|
| 1046 | 1159 | device->devid != BTRFS_DEV_REPLACE_DEVID) { |
|---|
| .. | .. |
|---|
| 1057 | 1170 | } |
|---|
| 1058 | 1171 | |
|---|
| 1059 | 1172 | btrfs_close_bdev(device); |
|---|
| 1060 | | - |
|---|
| 1061 | | - new_device = btrfs_alloc_device(NULL, &device->devid, |
|---|
| 1062 | | - device->uuid); |
|---|
| 1063 | | - BUG_ON(IS_ERR(new_device)); /* -ENOMEM */ |
|---|
| 1064 | | - |
|---|
| 1065 | | - /* Safe because we are under uuid_mutex */ |
|---|
| 1066 | | - if (device->name) { |
|---|
| 1067 | | - name = rcu_string_strdup(device->name->str, GFP_NOFS); |
|---|
| 1068 | | - BUG_ON(!name); /* -ENOMEM */ |
|---|
| 1069 | | - rcu_assign_pointer(new_device->name, name); |
|---|
| 1173 | + if (device->bdev) { |
|---|
| 1174 | + fs_devices->open_devices--; |
|---|
| 1175 | + device->bdev = NULL; |
|---|
| 1070 | 1176 | } |
|---|
| 1177 | + clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); |
|---|
| 1071 | 1178 | |
|---|
| 1072 | | - list_replace_rcu(&device->dev_list, &new_device->dev_list); |
|---|
| 1073 | | - new_device->fs_devices = device->fs_devices; |
|---|
| 1179 | + device->fs_info = NULL; |
|---|
| 1180 | + atomic_set(&device->dev_stats_ccnt, 0); |
|---|
| 1181 | + extent_io_tree_release(&device->alloc_state); |
|---|
| 1074 | 1182 | |
|---|
| 1075 | | - call_rcu(&device->rcu, free_device_rcu); |
|---|
| 1183 | + /* |
|---|
| 1184 | + * Reset the flush error record. We might have a transient flush error |
|---|
| 1185 | + * in this mount, and if so we aborted the current transaction and set |
|---|
| 1186 | + * the fs to an error state, guaranteeing no super blocks can be further |
|---|
| 1187 | + * committed. However that error might be transient and if we unmount the |
|---|
| 1188 | + * filesystem and mount it again, we should allow the mount to succeed |
|---|
| 1189 | + * (btrfs_check_rw_degradable() should not fail) - if after mounting the |
|---|
| 1190 | + * filesystem again we still get flush errors, then we will again abort |
|---|
| 1191 | + * any transaction and set the error state, guaranteeing no commits of |
|---|
| 1192 | + * unsafe super blocks. |
|---|
| 1193 | + */ |
|---|
| 1194 | + device->last_flush_error = 0; |
|---|
| 1195 | + |
|---|
| 1196 | + /* Verify the device is back in a pristine state */ |
|---|
| 1197 | + ASSERT(!test_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state)); |
|---|
| 1198 | + ASSERT(!test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)); |
|---|
| 1199 | + ASSERT(list_empty(&device->dev_alloc_list)); |
|---|
| 1200 | + ASSERT(list_empty(&device->post_commit_list)); |
|---|
| 1201 | + ASSERT(atomic_read(&device->reada_in_flight) == 0); |
|---|
| 1076 | 1202 | } |
|---|
| 1077 | 1203 | |
|---|
| 1078 | | -static int close_fs_devices(struct btrfs_fs_devices *fs_devices) |
|---|
| 1204 | +static void close_fs_devices(struct btrfs_fs_devices *fs_devices) |
|---|
| 1079 | 1205 | { |
|---|
| 1080 | 1206 | struct btrfs_device *device, *tmp; |
|---|
| 1081 | 1207 | |
|---|
| 1082 | | - if (--fs_devices->opened > 0) |
|---|
| 1083 | | - return 0; |
|---|
| 1208 | + lockdep_assert_held(&uuid_mutex); |
|---|
| 1084 | 1209 | |
|---|
| 1085 | | - mutex_lock(&fs_devices->device_list_mutex); |
|---|
| 1086 | | - list_for_each_entry_safe(device, tmp, &fs_devices->devices, dev_list) { |
|---|
| 1210 | + if (--fs_devices->opened > 0) |
|---|
| 1211 | + return; |
|---|
| 1212 | + |
|---|
| 1213 | + list_for_each_entry_safe(device, tmp, &fs_devices->devices, dev_list) |
|---|
| 1087 | 1214 | btrfs_close_one_device(device); |
|---|
| 1088 | | - } |
|---|
| 1089 | | - mutex_unlock(&fs_devices->device_list_mutex); |
|---|
| 1090 | 1215 | |
|---|
| 1091 | 1216 | WARN_ON(fs_devices->open_devices); |
|---|
| 1092 | 1217 | WARN_ON(fs_devices->rw_devices); |
|---|
| 1093 | 1218 | fs_devices->opened = 0; |
|---|
| 1094 | | - fs_devices->seeding = 0; |
|---|
| 1095 | | - |
|---|
| 1096 | | - return 0; |
|---|
| 1219 | + fs_devices->seeding = false; |
|---|
| 1220 | + fs_devices->fs_info = NULL; |
|---|
| 1097 | 1221 | } |
|---|
| 1098 | 1222 | |
|---|
| 1099 | | -int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) |
|---|
| 1223 | +void btrfs_close_devices(struct btrfs_fs_devices *fs_devices) |
|---|
| 1100 | 1224 | { |
|---|
| 1101 | | - struct btrfs_fs_devices *seed_devices = NULL; |
|---|
| 1102 | | - int ret; |
|---|
| 1225 | + LIST_HEAD(list); |
|---|
| 1226 | + struct btrfs_fs_devices *tmp; |
|---|
| 1103 | 1227 | |
|---|
| 1104 | 1228 | mutex_lock(&uuid_mutex); |
|---|
| 1105 | | - ret = close_fs_devices(fs_devices); |
|---|
| 1106 | | - if (!fs_devices->opened) { |
|---|
| 1107 | | - seed_devices = fs_devices->seed; |
|---|
| 1108 | | - fs_devices->seed = NULL; |
|---|
| 1109 | | - } |
|---|
| 1110 | | - mutex_unlock(&uuid_mutex); |
|---|
| 1229 | + close_fs_devices(fs_devices); |
|---|
| 1230 | + if (!fs_devices->opened) |
|---|
| 1231 | + list_splice_init(&fs_devices->seed_list, &list); |
|---|
| 1111 | 1232 | |
|---|
| 1112 | | - while (seed_devices) { |
|---|
| 1113 | | - fs_devices = seed_devices; |
|---|
| 1114 | | - seed_devices = fs_devices->seed; |
|---|
| 1233 | + list_for_each_entry_safe(fs_devices, tmp, &list, seed_list) { |
|---|
| 1115 | 1234 | close_fs_devices(fs_devices); |
|---|
| 1235 | + list_del(&fs_devices->seed_list); |
|---|
| 1116 | 1236 | free_fs_devices(fs_devices); |
|---|
| 1117 | 1237 | } |
|---|
| 1118 | | - return ret; |
|---|
| 1238 | + mutex_unlock(&uuid_mutex); |
|---|
| 1119 | 1239 | } |
|---|
| 1120 | 1240 | |
|---|
| 1121 | 1241 | static int open_fs_devices(struct btrfs_fs_devices *fs_devices, |
|---|
| .. | .. |
|---|
| 1123 | 1243 | { |
|---|
| 1124 | 1244 | struct btrfs_device *device; |
|---|
| 1125 | 1245 | struct btrfs_device *latest_dev = NULL; |
|---|
| 1126 | | - int ret = 0; |
|---|
| 1246 | + struct btrfs_device *tmp_device; |
|---|
| 1127 | 1247 | |
|---|
| 1128 | 1248 | flags |= FMODE_EXCL; |
|---|
| 1129 | 1249 | |
|---|
| 1130 | | - list_for_each_entry(device, &fs_devices->devices, dev_list) { |
|---|
| 1131 | | - /* Just open everything we can; ignore failures here */ |
|---|
| 1132 | | - if (btrfs_open_one_device(fs_devices, device, flags, holder)) |
|---|
| 1133 | | - continue; |
|---|
| 1250 | + list_for_each_entry_safe(device, tmp_device, &fs_devices->devices, |
|---|
| 1251 | + dev_list) { |
|---|
| 1252 | + int ret; |
|---|
| 1134 | 1253 | |
|---|
| 1135 | | - if (!latest_dev || |
|---|
| 1136 | | - device->generation > latest_dev->generation) |
|---|
| 1254 | + ret = btrfs_open_one_device(fs_devices, device, flags, holder); |
|---|
| 1255 | + if (ret == 0 && |
|---|
| 1256 | + (!latest_dev || device->generation > latest_dev->generation)) { |
|---|
| 1137 | 1257 | latest_dev = device; |
|---|
| 1258 | + } else if (ret == -ENODATA) { |
|---|
| 1259 | + fs_devices->num_devices--; |
|---|
| 1260 | + list_del(&device->dev_list); |
|---|
| 1261 | + btrfs_free_device(device); |
|---|
| 1262 | + } |
|---|
| 1138 | 1263 | } |
|---|
| 1139 | | - if (fs_devices->open_devices == 0) { |
|---|
| 1140 | | - ret = -EINVAL; |
|---|
| 1141 | | - goto out; |
|---|
| 1142 | | - } |
|---|
| 1264 | + if (fs_devices->open_devices == 0) |
|---|
| 1265 | + return -EINVAL; |
|---|
| 1266 | + |
|---|
| 1143 | 1267 | fs_devices->opened = 1; |
|---|
| 1144 | 1268 | fs_devices->latest_bdev = latest_dev->bdev; |
|---|
| 1145 | 1269 | fs_devices->total_rw_bytes = 0; |
|---|
| 1146 | | -out: |
|---|
| 1147 | | - return ret; |
|---|
| 1270 | + fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_REGULAR; |
|---|
| 1271 | + |
|---|
| 1272 | + return 0; |
|---|
| 1148 | 1273 | } |
|---|
| 1149 | 1274 | |
|---|
| 1150 | 1275 | static int devid_cmp(void *priv, struct list_head *a, struct list_head *b) |
|---|
| .. | .. |
|---|
| 1186 | 1311 | return ret; |
|---|
| 1187 | 1312 | } |
|---|
| 1188 | 1313 | |
|---|
| 1189 | | -static void btrfs_release_disk_super(struct page *page) |
|---|
| 1314 | +void btrfs_release_disk_super(struct btrfs_super_block *super) |
|---|
| 1190 | 1315 | { |
|---|
| 1191 | | - kunmap(page); |
|---|
| 1316 | + struct page *page = virt_to_page(super); |
|---|
| 1317 | + |
|---|
| 1192 | 1318 | put_page(page); |
|---|
| 1193 | 1319 | } |
|---|
| 1194 | 1320 | |
|---|
| 1195 | | -static int btrfs_read_disk_super(struct block_device *bdev, u64 bytenr, |
|---|
| 1196 | | - struct page **page, |
|---|
| 1197 | | - struct btrfs_super_block **disk_super) |
|---|
| 1321 | +static struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev, |
|---|
| 1322 | + u64 bytenr) |
|---|
| 1198 | 1323 | { |
|---|
| 1324 | + struct btrfs_super_block *disk_super; |
|---|
| 1325 | + struct page *page; |
|---|
| 1199 | 1326 | void *p; |
|---|
| 1200 | 1327 | pgoff_t index; |
|---|
| 1201 | 1328 | |
|---|
| 1202 | 1329 | /* make sure our super fits in the device */ |
|---|
| 1203 | 1330 | if (bytenr + PAGE_SIZE >= i_size_read(bdev->bd_inode)) |
|---|
| 1204 | | - return 1; |
|---|
| 1331 | + return ERR_PTR(-EINVAL); |
|---|
| 1205 | 1332 | |
|---|
| 1206 | 1333 | /* make sure our super fits in the page */ |
|---|
| 1207 | | - if (sizeof(**disk_super) > PAGE_SIZE) |
|---|
| 1208 | | - return 1; |
|---|
| 1334 | + if (sizeof(*disk_super) > PAGE_SIZE) |
|---|
| 1335 | + return ERR_PTR(-EINVAL); |
|---|
| 1209 | 1336 | |
|---|
| 1210 | 1337 | /* make sure our super doesn't straddle pages on disk */ |
|---|
| 1211 | 1338 | index = bytenr >> PAGE_SHIFT; |
|---|
| 1212 | | - if ((bytenr + sizeof(**disk_super) - 1) >> PAGE_SHIFT != index) |
|---|
| 1213 | | - return 1; |
|---|
| 1339 | + if ((bytenr + sizeof(*disk_super) - 1) >> PAGE_SHIFT != index) |
|---|
| 1340 | + return ERR_PTR(-EINVAL); |
|---|
| 1214 | 1341 | |
|---|
| 1215 | 1342 | /* pull in the page with our super */ |
|---|
| 1216 | | - *page = read_cache_page_gfp(bdev->bd_inode->i_mapping, |
|---|
| 1217 | | - index, GFP_KERNEL); |
|---|
| 1343 | + page = read_cache_page_gfp(bdev->bd_inode->i_mapping, index, GFP_KERNEL); |
|---|
| 1218 | 1344 | |
|---|
| 1219 | | - if (IS_ERR_OR_NULL(*page)) |
|---|
| 1220 | | - return 1; |
|---|
| 1345 | + if (IS_ERR(page)) |
|---|
| 1346 | + return ERR_CAST(page); |
|---|
| 1221 | 1347 | |
|---|
| 1222 | | - p = kmap(*page); |
|---|
| 1348 | + p = page_address(page); |
|---|
| 1223 | 1349 | |
|---|
| 1224 | 1350 | /* align our pointer to the offset of the super block */ |
|---|
| 1225 | | - *disk_super = p + (bytenr & ~PAGE_MASK); |
|---|
| 1351 | + disk_super = p + offset_in_page(bytenr); |
|---|
| 1226 | 1352 | |
|---|
| 1227 | | - if (btrfs_super_bytenr(*disk_super) != bytenr || |
|---|
| 1228 | | - btrfs_super_magic(*disk_super) != BTRFS_MAGIC) { |
|---|
| 1229 | | - btrfs_release_disk_super(*page); |
|---|
| 1230 | | - return 1; |
|---|
| 1353 | + if (btrfs_super_bytenr(disk_super) != bytenr || |
|---|
| 1354 | + btrfs_super_magic(disk_super) != BTRFS_MAGIC) { |
|---|
| 1355 | + btrfs_release_disk_super(p); |
|---|
| 1356 | + return ERR_PTR(-EINVAL); |
|---|
| 1231 | 1357 | } |
|---|
| 1232 | 1358 | |
|---|
| 1233 | | - if ((*disk_super)->label[0] && |
|---|
| 1234 | | - (*disk_super)->label[BTRFS_LABEL_SIZE - 1]) |
|---|
| 1235 | | - (*disk_super)->label[BTRFS_LABEL_SIZE - 1] = '\0'; |
|---|
| 1359 | + if (disk_super->label[0] && disk_super->label[BTRFS_LABEL_SIZE - 1]) |
|---|
| 1360 | + disk_super->label[BTRFS_LABEL_SIZE - 1] = 0; |
|---|
| 1236 | 1361 | |
|---|
| 1237 | | - return 0; |
|---|
| 1362 | + return disk_super; |
|---|
| 1363 | +} |
|---|
| 1364 | + |
|---|
| 1365 | +int btrfs_forget_devices(const char *path) |
|---|
| 1366 | +{ |
|---|
| 1367 | + int ret; |
|---|
| 1368 | + |
|---|
| 1369 | + mutex_lock(&uuid_mutex); |
|---|
| 1370 | + ret = btrfs_free_stale_devices(strlen(path) ? path : NULL, NULL); |
|---|
| 1371 | + mutex_unlock(&uuid_mutex); |
|---|
| 1372 | + |
|---|
| 1373 | + return ret; |
|---|
| 1238 | 1374 | } |
|---|
| 1239 | 1375 | |
|---|
| 1240 | 1376 | /* |
|---|
| .. | .. |
|---|
| 1249 | 1385 | bool new_device_added = false; |
|---|
| 1250 | 1386 | struct btrfs_device *device = NULL; |
|---|
| 1251 | 1387 | struct block_device *bdev; |
|---|
| 1252 | | - struct page *page; |
|---|
| 1253 | 1388 | u64 bytenr; |
|---|
| 1254 | 1389 | |
|---|
| 1255 | 1390 | lockdep_assert_held(&uuid_mutex); |
|---|
| .. | .. |
|---|
| 1267 | 1402 | if (IS_ERR(bdev)) |
|---|
| 1268 | 1403 | return ERR_CAST(bdev); |
|---|
| 1269 | 1404 | |
|---|
| 1270 | | - if (btrfs_read_disk_super(bdev, bytenr, &page, &disk_super)) { |
|---|
| 1271 | | - device = ERR_PTR(-EINVAL); |
|---|
| 1405 | + disk_super = btrfs_read_disk_super(bdev, bytenr); |
|---|
| 1406 | + if (IS_ERR(disk_super)) { |
|---|
| 1407 | + device = ERR_CAST(disk_super); |
|---|
| 1272 | 1408 | goto error_bdev_put; |
|---|
| 1273 | 1409 | } |
|---|
| 1274 | 1410 | |
|---|
| .. | .. |
|---|
| 1278 | 1414 | btrfs_free_stale_devices(path, device); |
|---|
| 1279 | 1415 | } |
|---|
| 1280 | 1416 | |
|---|
| 1281 | | - btrfs_release_disk_super(page); |
|---|
| 1417 | + btrfs_release_disk_super(disk_super); |
|---|
| 1282 | 1418 | |
|---|
| 1283 | 1419 | error_bdev_put: |
|---|
| 1284 | 1420 | blkdev_put(bdev, flags); |
|---|
| .. | .. |
|---|
| 1286 | 1422 | return device; |
|---|
| 1287 | 1423 | } |
|---|
| 1288 | 1424 | |
|---|
| 1289 | | -static int contains_pending_extent(struct btrfs_transaction *transaction, |
|---|
| 1290 | | - struct btrfs_device *device, |
|---|
| 1291 | | - u64 *start, u64 len) |
|---|
| 1425 | +/* |
|---|
| 1426 | + * Try to find a chunk that intersects [start, start + len] range and when one |
|---|
| 1427 | + * such is found, record the end of it in *start |
|---|
| 1428 | + */ |
|---|
| 1429 | +static bool contains_pending_extent(struct btrfs_device *device, u64 *start, |
|---|
| 1430 | + u64 len) |
|---|
| 1292 | 1431 | { |
|---|
| 1293 | | - struct btrfs_fs_info *fs_info = device->fs_info; |
|---|
| 1294 | | - struct extent_map *em; |
|---|
| 1295 | | - struct list_head *search_list = &fs_info->pinned_chunks; |
|---|
| 1296 | | - int ret = 0; |
|---|
| 1297 | | - u64 physical_start = *start; |
|---|
| 1432 | + u64 physical_start, physical_end; |
|---|
| 1298 | 1433 | |
|---|
| 1299 | | - if (transaction) |
|---|
| 1300 | | - search_list = &transaction->pending_chunks; |
|---|
| 1301 | | -again: |
|---|
| 1302 | | - list_for_each_entry(em, search_list, list) { |
|---|
| 1303 | | - struct map_lookup *map; |
|---|
| 1304 | | - int i; |
|---|
| 1434 | + lockdep_assert_held(&device->fs_info->chunk_mutex); |
|---|
| 1305 | 1435 | |
|---|
| 1306 | | - map = em->map_lookup; |
|---|
| 1307 | | - for (i = 0; i < map->num_stripes; i++) { |
|---|
| 1308 | | - u64 end; |
|---|
| 1436 | + if (!find_first_extent_bit(&device->alloc_state, *start, |
|---|
| 1437 | + &physical_start, &physical_end, |
|---|
| 1438 | + CHUNK_ALLOCATED, NULL)) { |
|---|
| 1309 | 1439 | |
|---|
| 1310 | | - if (map->stripes[i].dev != device) |
|---|
| 1311 | | - continue; |
|---|
| 1312 | | - if (map->stripes[i].physical >= physical_start + len || |
|---|
| 1313 | | - map->stripes[i].physical + em->orig_block_len <= |
|---|
| 1314 | | - physical_start) |
|---|
| 1315 | | - continue; |
|---|
| 1316 | | - /* |
|---|
| 1317 | | - * Make sure that while processing the pinned list we do |
|---|
| 1318 | | - * not override our *start with a lower value, because |
|---|
| 1319 | | - * we can have pinned chunks that fall within this |
|---|
| 1320 | | - * device hole and that have lower physical addresses |
|---|
| 1321 | | - * than the pending chunks we processed before. If we |
|---|
| 1322 | | - * do not take this special care we can end up getting |
|---|
| 1323 | | - * 2 pending chunks that start at the same physical |
|---|
| 1324 | | - * device offsets because the end offset of a pinned |
|---|
| 1325 | | - * chunk can be equal to the start offset of some |
|---|
| 1326 | | - * pending chunk. |
|---|
| 1327 | | - */ |
|---|
| 1328 | | - end = map->stripes[i].physical + em->orig_block_len; |
|---|
| 1329 | | - if (end > *start) { |
|---|
| 1330 | | - *start = end; |
|---|
| 1331 | | - ret = 1; |
|---|
| 1332 | | - } |
|---|
| 1440 | + if (in_range(physical_start, *start, len) || |
|---|
| 1441 | + in_range(*start, physical_start, |
|---|
| 1442 | + physical_end - physical_start)) { |
|---|
| 1443 | + *start = physical_end + 1; |
|---|
| 1444 | + return true; |
|---|
| 1333 | 1445 | } |
|---|
| 1334 | 1446 | } |
|---|
| 1335 | | - if (search_list != &fs_info->pinned_chunks) { |
|---|
| 1336 | | - search_list = &fs_info->pinned_chunks; |
|---|
| 1337 | | - goto again; |
|---|
| 1338 | | - } |
|---|
| 1339 | | - |
|---|
| 1340 | | - return ret; |
|---|
| 1447 | + return false; |
|---|
| 1341 | 1448 | } |
|---|
| 1342 | 1449 | |
|---|
| 1450 | +static u64 dev_extent_search_start(struct btrfs_device *device, u64 start) |
|---|
| 1451 | +{ |
|---|
| 1452 | + switch (device->fs_devices->chunk_alloc_policy) { |
|---|
| 1453 | + case BTRFS_CHUNK_ALLOC_REGULAR: |
|---|
| 1454 | + /* |
|---|
| 1455 | + * We don't want to overwrite the superblock on the drive nor |
|---|
| 1456 | + * any area used by the boot loader (grub for example), so we |
|---|
| 1457 | + * make sure to start at an offset of at least 1MB. |
|---|
| 1458 | + */ |
|---|
| 1459 | + return max_t(u64, start, SZ_1M); |
|---|
| 1460 | + default: |
|---|
| 1461 | + BUG(); |
|---|
| 1462 | + } |
|---|
| 1463 | +} |
|---|
| 1464 | + |
|---|
| 1465 | +/** |
|---|
| 1466 | + * dev_extent_hole_check - check if specified hole is suitable for allocation |
|---|
| 1467 | + * @device: the device which we have the hole |
|---|
| 1468 | + * @hole_start: starting position of the hole |
|---|
| 1469 | + * @hole_size: the size of the hole |
|---|
| 1470 | + * @num_bytes: the size of the free space that we need |
|---|
| 1471 | + * |
|---|
| 1472 | + * This function may modify @hole_start and @hole_end to reflect the suitable |
|---|
| 1473 | + * position for allocation. Returns 1 if hole position is updated, 0 otherwise. |
|---|
| 1474 | + */ |
|---|
| 1475 | +static bool dev_extent_hole_check(struct btrfs_device *device, u64 *hole_start, |
|---|
| 1476 | + u64 *hole_size, u64 num_bytes) |
|---|
| 1477 | +{ |
|---|
| 1478 | + bool changed = false; |
|---|
| 1479 | + u64 hole_end = *hole_start + *hole_size; |
|---|
| 1480 | + |
|---|
| 1481 | + /* |
|---|
| 1482 | + * Check before we set max_hole_start, otherwise we could end up |
|---|
| 1483 | + * sending back this offset anyway. |
|---|
| 1484 | + */ |
|---|
| 1485 | + if (contains_pending_extent(device, hole_start, *hole_size)) { |
|---|
| 1486 | + if (hole_end >= *hole_start) |
|---|
| 1487 | + *hole_size = hole_end - *hole_start; |
|---|
| 1488 | + else |
|---|
| 1489 | + *hole_size = 0; |
|---|
| 1490 | + changed = true; |
|---|
| 1491 | + } |
|---|
| 1492 | + |
|---|
| 1493 | + switch (device->fs_devices->chunk_alloc_policy) { |
|---|
| 1494 | + case BTRFS_CHUNK_ALLOC_REGULAR: |
|---|
| 1495 | + /* No extra check */ |
|---|
| 1496 | + break; |
|---|
| 1497 | + default: |
|---|
| 1498 | + BUG(); |
|---|
| 1499 | + } |
|---|
| 1500 | + |
|---|
| 1501 | + return changed; |
|---|
| 1502 | +} |
|---|
| 1343 | 1503 | |
|---|
| 1344 | 1504 | /* |
|---|
| 1345 | 1505 | * find_free_dev_extent_start - find free space in the specified device |
|---|
| .. | .. |
|---|
| 1361 | 1521 | * @len is used to store the size of the free space that we find. |
|---|
| 1362 | 1522 | * But if we don't find suitable free space, it is used to store the size of |
|---|
| 1363 | 1523 | * the max free space. |
|---|
| 1524 | + * |
|---|
| 1525 | + * NOTE: This function will search *commit* root of device tree, and does extra |
|---|
| 1526 | + * check to ensure dev extents are not double allocated. |
|---|
| 1527 | + * This makes the function safe to allocate dev extents but may not report |
|---|
| 1528 | + * correct usable device space, as device extent freed in current transaction |
|---|
| 1529 | + * is not reported as avaiable. |
|---|
| 1364 | 1530 | */ |
|---|
| 1365 | | -int find_free_dev_extent_start(struct btrfs_transaction *transaction, |
|---|
| 1366 | | - struct btrfs_device *device, u64 num_bytes, |
|---|
| 1367 | | - u64 search_start, u64 *start, u64 *len) |
|---|
| 1531 | +static int find_free_dev_extent_start(struct btrfs_device *device, |
|---|
| 1532 | + u64 num_bytes, u64 search_start, u64 *start, |
|---|
| 1533 | + u64 *len) |
|---|
| 1368 | 1534 | { |
|---|
| 1369 | 1535 | struct btrfs_fs_info *fs_info = device->fs_info; |
|---|
| 1370 | 1536 | struct btrfs_root *root = fs_info->dev_root; |
|---|
| .. | .. |
|---|
| 1380 | 1546 | int slot; |
|---|
| 1381 | 1547 | struct extent_buffer *l; |
|---|
| 1382 | 1548 | |
|---|
| 1383 | | - /* |
|---|
| 1384 | | - * We don't want to overwrite the superblock on the drive nor any area |
|---|
| 1385 | | - * used by the boot loader (grub for example), so we make sure to start |
|---|
| 1386 | | - * at an offset of at least 1MB. |
|---|
| 1387 | | - */ |
|---|
| 1388 | | - search_start = max_t(u64, search_start, SZ_1M); |
|---|
| 1549 | + search_start = dev_extent_search_start(device, search_start); |
|---|
| 1389 | 1550 | |
|---|
| 1390 | 1551 | path = btrfs_alloc_path(); |
|---|
| 1391 | 1552 | if (!path) |
|---|
| .. | .. |
|---|
| 1443 | 1604 | |
|---|
| 1444 | 1605 | if (key.offset > search_start) { |
|---|
| 1445 | 1606 | hole_size = key.offset - search_start; |
|---|
| 1446 | | - |
|---|
| 1447 | | - /* |
|---|
| 1448 | | - * Have to check before we set max_hole_start, otherwise |
|---|
| 1449 | | - * we could end up sending back this offset anyway. |
|---|
| 1450 | | - */ |
|---|
| 1451 | | - if (contains_pending_extent(transaction, device, |
|---|
| 1452 | | - &search_start, |
|---|
| 1453 | | - hole_size)) { |
|---|
| 1454 | | - if (key.offset >= search_start) { |
|---|
| 1455 | | - hole_size = key.offset - search_start; |
|---|
| 1456 | | - } else { |
|---|
| 1457 | | - WARN_ON_ONCE(1); |
|---|
| 1458 | | - hole_size = 0; |
|---|
| 1459 | | - } |
|---|
| 1460 | | - } |
|---|
| 1607 | + dev_extent_hole_check(device, &search_start, &hole_size, |
|---|
| 1608 | + num_bytes); |
|---|
| 1461 | 1609 | |
|---|
| 1462 | 1610 | if (hole_size > max_hole_size) { |
|---|
| 1463 | 1611 | max_hole_start = search_start; |
|---|
| .. | .. |
|---|
| 1496 | 1644 | */ |
|---|
| 1497 | 1645 | if (search_end > search_start) { |
|---|
| 1498 | 1646 | hole_size = search_end - search_start; |
|---|
| 1499 | | - |
|---|
| 1500 | | - if (contains_pending_extent(transaction, device, &search_start, |
|---|
| 1501 | | - hole_size)) { |
|---|
| 1647 | + if (dev_extent_hole_check(device, &search_start, &hole_size, |
|---|
| 1648 | + num_bytes)) { |
|---|
| 1502 | 1649 | btrfs_release_path(path); |
|---|
| 1503 | 1650 | goto again; |
|---|
| 1504 | 1651 | } |
|---|
| .. | .. |
|---|
| 1523 | 1670 | return ret; |
|---|
| 1524 | 1671 | } |
|---|
| 1525 | 1672 | |
|---|
| 1526 | | -int find_free_dev_extent(struct btrfs_trans_handle *trans, |
|---|
| 1527 | | - struct btrfs_device *device, u64 num_bytes, |
|---|
| 1673 | +int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, |
|---|
| 1528 | 1674 | u64 *start, u64 *len) |
|---|
| 1529 | 1675 | { |
|---|
| 1530 | 1676 | /* FIXME use last free of some kind */ |
|---|
| 1531 | | - return find_free_dev_extent_start(trans->transaction, device, |
|---|
| 1532 | | - num_bytes, 0, start, len); |
|---|
| 1677 | + return find_free_dev_extent_start(device, num_bytes, 0, start, len); |
|---|
| 1533 | 1678 | } |
|---|
| 1534 | 1679 | |
|---|
| 1535 | 1680 | static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, |
|---|
| .. | .. |
|---|
| 1640 | 1785 | struct rb_node *n; |
|---|
| 1641 | 1786 | u64 ret = 0; |
|---|
| 1642 | 1787 | |
|---|
| 1643 | | - em_tree = &fs_info->mapping_tree.map_tree; |
|---|
| 1788 | + em_tree = &fs_info->mapping_tree; |
|---|
| 1644 | 1789 | read_lock(&em_tree->lock); |
|---|
| 1645 | | - n = rb_last(&em_tree->map); |
|---|
| 1790 | + n = rb_last(&em_tree->map.rb_root); |
|---|
| 1646 | 1791 | if (n) { |
|---|
| 1647 | 1792 | em = rb_entry(n, struct extent_map, rb_node); |
|---|
| 1648 | 1793 | ret = em->start + em->len; |
|---|
| .. | .. |
|---|
| 1672 | 1817 | if (ret < 0) |
|---|
| 1673 | 1818 | goto error; |
|---|
| 1674 | 1819 | |
|---|
| 1675 | | - BUG_ON(ret == 0); /* Corruption */ |
|---|
| 1820 | + if (ret == 0) { |
|---|
| 1821 | + /* Corruption */ |
|---|
| 1822 | + btrfs_err(fs_info, "corrupted chunk tree devid -1 matched"); |
|---|
| 1823 | + ret = -EUCLEAN; |
|---|
| 1824 | + goto error; |
|---|
| 1825 | + } |
|---|
| 1676 | 1826 | |
|---|
| 1677 | 1827 | ret = btrfs_previous_item(fs_info->chunk_root, path, |
|---|
| 1678 | 1828 | BTRFS_DEV_ITEMS_OBJECTID, |
|---|
| .. | .. |
|---|
| 1738 | 1888 | ptr = btrfs_device_uuid(dev_item); |
|---|
| 1739 | 1889 | write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); |
|---|
| 1740 | 1890 | ptr = btrfs_device_fsid(dev_item); |
|---|
| 1741 | | - write_extent_buffer(leaf, trans->fs_info->fsid, ptr, BTRFS_FSID_SIZE); |
|---|
| 1891 | + write_extent_buffer(leaf, trans->fs_info->fs_devices->metadata_uuid, |
|---|
| 1892 | + ptr, BTRFS_FSID_SIZE); |
|---|
| 1742 | 1893 | btrfs_mark_buffer_dirty(leaf); |
|---|
| 1743 | 1894 | |
|---|
| 1744 | 1895 | ret = 0; |
|---|
| .. | .. |
|---|
| 1750 | 1901 | /* |
|---|
| 1751 | 1902 | * Function to update ctime/mtime for a given device path. |
|---|
| 1752 | 1903 | * Mainly used for ctime/mtime based probe like libblkid. |
|---|
| 1904 | + * |
|---|
| 1905 | + * We don't care about errors here, this is just to be kind to userspace. |
|---|
| 1753 | 1906 | */ |
|---|
| 1754 | | -static void update_dev_time(const char *path_name) |
|---|
| 1907 | +static void update_dev_time(const char *device_path) |
|---|
| 1755 | 1908 | { |
|---|
| 1756 | | - struct file *filp; |
|---|
| 1909 | + struct path path; |
|---|
| 1910 | + struct timespec64 now; |
|---|
| 1911 | + int ret; |
|---|
| 1757 | 1912 | |
|---|
| 1758 | | - filp = filp_open(path_name, O_RDWR, 0); |
|---|
| 1759 | | - if (IS_ERR(filp)) |
|---|
| 1913 | + ret = kern_path(device_path, LOOKUP_FOLLOW, &path); |
|---|
| 1914 | + if (ret) |
|---|
| 1760 | 1915 | return; |
|---|
| 1761 | | - file_update_time(filp); |
|---|
| 1762 | | - filp_close(filp, NULL); |
|---|
| 1916 | + |
|---|
| 1917 | + now = current_time(d_inode(path.dentry)); |
|---|
| 1918 | + inode_update_time(d_inode(path.dentry), &now, S_MTIME | S_CTIME); |
|---|
| 1919 | + path_put(&path); |
|---|
| 1763 | 1920 | } |
|---|
| 1764 | 1921 | |
|---|
| 1765 | | -static int btrfs_rm_dev_item(struct btrfs_fs_info *fs_info, |
|---|
| 1766 | | - struct btrfs_device *device) |
|---|
| 1922 | +static int btrfs_rm_dev_item(struct btrfs_device *device) |
|---|
| 1767 | 1923 | { |
|---|
| 1768 | | - struct btrfs_root *root = fs_info->chunk_root; |
|---|
| 1924 | + struct btrfs_root *root = device->fs_info->chunk_root; |
|---|
| 1769 | 1925 | int ret; |
|---|
| 1770 | 1926 | struct btrfs_path *path; |
|---|
| 1771 | 1927 | struct btrfs_key key; |
|---|
| .. | .. |
|---|
| 1862 | 2018 | * where this function called, there should be always be another device (or |
|---|
| 1863 | 2019 | * this_dev) which is active. |
|---|
| 1864 | 2020 | */ |
|---|
| 1865 | | -void btrfs_assign_next_active_device(struct btrfs_device *device, |
|---|
| 1866 | | - struct btrfs_device *this_dev) |
|---|
| 2021 | +void __cold btrfs_assign_next_active_device(struct btrfs_device *device, |
|---|
| 2022 | + struct btrfs_device *next_device) |
|---|
| 1867 | 2023 | { |
|---|
| 1868 | 2024 | struct btrfs_fs_info *fs_info = device->fs_info; |
|---|
| 1869 | | - struct btrfs_device *next_device; |
|---|
| 1870 | 2025 | |
|---|
| 1871 | | - if (this_dev) |
|---|
| 1872 | | - next_device = this_dev; |
|---|
| 1873 | | - else |
|---|
| 2026 | + if (!next_device) |
|---|
| 1874 | 2027 | next_device = btrfs_find_next_active_device(fs_info->fs_devices, |
|---|
| 1875 | | - device); |
|---|
| 2028 | + device); |
|---|
| 1876 | 2029 | ASSERT(next_device); |
|---|
| 1877 | 2030 | |
|---|
| 1878 | 2031 | if (fs_info->sb->s_bdev && |
|---|
| .. | .. |
|---|
| 1883 | 2036 | fs_info->fs_devices->latest_bdev = next_device->bdev; |
|---|
| 1884 | 2037 | } |
|---|
| 1885 | 2038 | |
|---|
| 2039 | +/* |
|---|
| 2040 | + * Return btrfs_fs_devices::num_devices excluding the device that's being |
|---|
| 2041 | + * currently replaced. |
|---|
| 2042 | + */ |
|---|
| 2043 | +static u64 btrfs_num_devices(struct btrfs_fs_info *fs_info) |
|---|
| 2044 | +{ |
|---|
| 2045 | + u64 num_devices = fs_info->fs_devices->num_devices; |
|---|
| 2046 | + |
|---|
| 2047 | + down_read(&fs_info->dev_replace.rwsem); |
|---|
| 2048 | + if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) { |
|---|
| 2049 | + ASSERT(num_devices > 1); |
|---|
| 2050 | + num_devices--; |
|---|
| 2051 | + } |
|---|
| 2052 | + up_read(&fs_info->dev_replace.rwsem); |
|---|
| 2053 | + |
|---|
| 2054 | + return num_devices; |
|---|
| 2055 | +} |
|---|
| 2056 | + |
|---|
| 2057 | +void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, |
|---|
| 2058 | + struct block_device *bdev, |
|---|
| 2059 | + const char *device_path) |
|---|
| 2060 | +{ |
|---|
| 2061 | + struct btrfs_super_block *disk_super; |
|---|
| 2062 | + int copy_num; |
|---|
| 2063 | + |
|---|
| 2064 | + if (!bdev) |
|---|
| 2065 | + return; |
|---|
| 2066 | + |
|---|
| 2067 | + for (copy_num = 0; copy_num < BTRFS_SUPER_MIRROR_MAX; copy_num++) { |
|---|
| 2068 | + struct page *page; |
|---|
| 2069 | + int ret; |
|---|
| 2070 | + |
|---|
| 2071 | + disk_super = btrfs_read_dev_one_super(bdev, copy_num); |
|---|
| 2072 | + if (IS_ERR(disk_super)) |
|---|
| 2073 | + continue; |
|---|
| 2074 | + |
|---|
| 2075 | + memset(&disk_super->magic, 0, sizeof(disk_super->magic)); |
|---|
| 2076 | + |
|---|
| 2077 | + page = virt_to_page(disk_super); |
|---|
| 2078 | + set_page_dirty(page); |
|---|
| 2079 | + lock_page(page); |
|---|
| 2080 | + /* write_on_page() unlocks the page */ |
|---|
| 2081 | + ret = write_one_page(page); |
|---|
| 2082 | + if (ret) |
|---|
| 2083 | + btrfs_warn(fs_info, |
|---|
| 2084 | + "error clearing superblock number %d (%d)", |
|---|
| 2085 | + copy_num, ret); |
|---|
| 2086 | + btrfs_release_disk_super(disk_super); |
|---|
| 2087 | + |
|---|
| 2088 | + } |
|---|
| 2089 | + |
|---|
| 2090 | + /* Notify udev that device has changed */ |
|---|
| 2091 | + btrfs_kobject_uevent(bdev, KOBJ_CHANGE); |
|---|
| 2092 | + |
|---|
| 2093 | + /* Update ctime/mtime for device path for libblkid */ |
|---|
| 2094 | + update_dev_time(device_path); |
|---|
| 2095 | +} |
|---|
| 2096 | + |
|---|
| 1886 | 2097 | int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, |
|---|
| 1887 | | - u64 devid) |
|---|
| 2098 | + u64 devid) |
|---|
| 1888 | 2099 | { |
|---|
| 1889 | 2100 | struct btrfs_device *device; |
|---|
| 1890 | 2101 | struct btrfs_fs_devices *cur_devices; |
|---|
| .. | .. |
|---|
| 1892 | 2103 | u64 num_devices; |
|---|
| 1893 | 2104 | int ret = 0; |
|---|
| 1894 | 2105 | |
|---|
| 1895 | | - mutex_lock(&uuid_mutex); |
|---|
| 1896 | | - |
|---|
| 1897 | | - num_devices = fs_devices->num_devices; |
|---|
| 1898 | | - btrfs_dev_replace_read_lock(&fs_info->dev_replace); |
|---|
| 1899 | | - if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) { |
|---|
| 1900 | | - WARN_ON(num_devices < 1); |
|---|
| 1901 | | - num_devices--; |
|---|
| 1902 | | - } |
|---|
| 1903 | | - btrfs_dev_replace_read_unlock(&fs_info->dev_replace); |
|---|
| 2106 | + /* |
|---|
| 2107 | + * The device list in fs_devices is accessed without locks (neither |
|---|
| 2108 | + * uuid_mutex nor device_list_mutex) as it won't change on a mounted |
|---|
| 2109 | + * filesystem and another device rm cannot run. |
|---|
| 2110 | + */ |
|---|
| 2111 | + num_devices = btrfs_num_devices(fs_info); |
|---|
| 1904 | 2112 | |
|---|
| 1905 | 2113 | ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1); |
|---|
| 1906 | 2114 | if (ret) |
|---|
| 1907 | 2115 | goto out; |
|---|
| 1908 | 2116 | |
|---|
| 1909 | | - ret = btrfs_find_device_by_devspec(fs_info, devid, device_path, |
|---|
| 1910 | | - &device); |
|---|
| 1911 | | - if (ret) |
|---|
| 2117 | + device = btrfs_find_device_by_devspec(fs_info, devid, device_path); |
|---|
| 2118 | + |
|---|
| 2119 | + if (IS_ERR(device)) { |
|---|
| 2120 | + if (PTR_ERR(device) == -ENOENT && |
|---|
| 2121 | + device_path && strcmp(device_path, "missing") == 0) |
|---|
| 2122 | + ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND; |
|---|
| 2123 | + else |
|---|
| 2124 | + ret = PTR_ERR(device); |
|---|
| 1912 | 2125 | goto out; |
|---|
| 2126 | + } |
|---|
| 2127 | + |
|---|
| 2128 | + if (btrfs_pinned_by_swapfile(fs_info, device)) { |
|---|
| 2129 | + btrfs_warn_in_rcu(fs_info, |
|---|
| 2130 | + "cannot remove device %s (devid %llu) due to active swapfile", |
|---|
| 2131 | + rcu_str_deref(device->name), device->devid); |
|---|
| 2132 | + ret = -ETXTBSY; |
|---|
| 2133 | + goto out; |
|---|
| 2134 | + } |
|---|
| 1913 | 2135 | |
|---|
| 1914 | 2136 | if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) { |
|---|
| 1915 | 2137 | ret = BTRFS_ERROR_DEV_TGT_REPLACE; |
|---|
| .. | .. |
|---|
| 1929 | 2151 | mutex_unlock(&fs_info->chunk_mutex); |
|---|
| 1930 | 2152 | } |
|---|
| 1931 | 2153 | |
|---|
| 1932 | | - mutex_unlock(&uuid_mutex); |
|---|
| 1933 | 2154 | ret = btrfs_shrink_device(device, 0); |
|---|
| 1934 | | - mutex_lock(&uuid_mutex); |
|---|
| 2155 | + if (!ret) |
|---|
| 2156 | + btrfs_reada_remove_dev(device); |
|---|
| 1935 | 2157 | if (ret) |
|---|
| 1936 | 2158 | goto error_undo; |
|---|
| 1937 | 2159 | |
|---|
| .. | .. |
|---|
| 1940 | 2162 | * counter although write_all_supers() is not locked out. This |
|---|
| 1941 | 2163 | * could give a filesystem state which requires a degraded mount. |
|---|
| 1942 | 2164 | */ |
|---|
| 1943 | | - ret = btrfs_rm_dev_item(fs_info, device); |
|---|
| 2165 | + ret = btrfs_rm_dev_item(device); |
|---|
| 1944 | 2166 | if (ret) |
|---|
| 1945 | 2167 | goto error_undo; |
|---|
| 1946 | 2168 | |
|---|
| 1947 | 2169 | clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); |
|---|
| 1948 | | - btrfs_scrub_cancel_dev(fs_info, device); |
|---|
| 2170 | + btrfs_scrub_cancel_dev(device); |
|---|
| 1949 | 2171 | |
|---|
| 1950 | 2172 | /* |
|---|
| 1951 | 2173 | * the device list mutex makes sure that we don't change |
|---|
| .. | .. |
|---|
| 1980 | 2202 | if (device->bdev) { |
|---|
| 1981 | 2203 | cur_devices->open_devices--; |
|---|
| 1982 | 2204 | /* remove sysfs entry */ |
|---|
| 1983 | | - btrfs_sysfs_rm_device_link(fs_devices, device); |
|---|
| 2205 | + btrfs_sysfs_remove_device(device); |
|---|
| 1984 | 2206 | } |
|---|
| 1985 | 2207 | |
|---|
| 1986 | 2208 | num_devices = btrfs_super_num_devices(fs_info->super_copy) - 1; |
|---|
| .. | .. |
|---|
| 1993 | 2215 | * supers and free the device. |
|---|
| 1994 | 2216 | */ |
|---|
| 1995 | 2217 | if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) |
|---|
| 1996 | | - btrfs_scratch_superblocks(device->bdev, device->name->str); |
|---|
| 2218 | + btrfs_scratch_superblocks(fs_info, device->bdev, |
|---|
| 2219 | + device->name->str); |
|---|
| 1997 | 2220 | |
|---|
| 1998 | 2221 | btrfs_close_bdev(device); |
|---|
| 1999 | | - call_rcu(&device->rcu, free_device_rcu); |
|---|
| 2222 | + synchronize_rcu(); |
|---|
| 2223 | + btrfs_free_device(device); |
|---|
| 2000 | 2224 | |
|---|
| 2001 | 2225 | if (cur_devices->open_devices == 0) { |
|---|
| 2002 | | - while (fs_devices) { |
|---|
| 2003 | | - if (fs_devices->seed == cur_devices) { |
|---|
| 2004 | | - fs_devices->seed = cur_devices->seed; |
|---|
| 2005 | | - break; |
|---|
| 2006 | | - } |
|---|
| 2007 | | - fs_devices = fs_devices->seed; |
|---|
| 2008 | | - } |
|---|
| 2009 | | - cur_devices->seed = NULL; |
|---|
| 2226 | + list_del_init(&cur_devices->seed_list); |
|---|
| 2010 | 2227 | close_fs_devices(cur_devices); |
|---|
| 2011 | 2228 | free_fs_devices(cur_devices); |
|---|
| 2012 | 2229 | } |
|---|
| 2013 | 2230 | |
|---|
| 2014 | 2231 | out: |
|---|
| 2015 | | - mutex_unlock(&uuid_mutex); |
|---|
| 2016 | 2232 | return ret; |
|---|
| 2017 | 2233 | |
|---|
| 2018 | 2234 | error_undo: |
|---|
| 2235 | + btrfs_reada_undo_remove_dev(device); |
|---|
| 2019 | 2236 | if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { |
|---|
| 2020 | 2237 | mutex_lock(&fs_info->chunk_mutex); |
|---|
| 2021 | 2238 | list_add(&device->dev_alloc_list, |
|---|
| .. | .. |
|---|
| 2053 | 2270 | fs_devices->open_devices--; |
|---|
| 2054 | 2271 | } |
|---|
| 2055 | 2272 | |
|---|
| 2056 | | -void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info, |
|---|
| 2057 | | - struct btrfs_device *srcdev) |
|---|
| 2273 | +void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev) |
|---|
| 2058 | 2274 | { |
|---|
| 2059 | 2275 | struct btrfs_fs_devices *fs_devices = srcdev->fs_devices; |
|---|
| 2060 | 2276 | |
|---|
| 2061 | | - if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &srcdev->dev_state)) { |
|---|
| 2062 | | - /* zero out the old super if it is writable */ |
|---|
| 2063 | | - btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str); |
|---|
| 2064 | | - } |
|---|
| 2277 | + mutex_lock(&uuid_mutex); |
|---|
| 2065 | 2278 | |
|---|
| 2066 | 2279 | btrfs_close_bdev(srcdev); |
|---|
| 2067 | | - call_rcu(&srcdev->rcu, free_device_rcu); |
|---|
| 2280 | + synchronize_rcu(); |
|---|
| 2281 | + btrfs_free_device(srcdev); |
|---|
| 2068 | 2282 | |
|---|
| 2069 | 2283 | /* if this is no devs we rather delete the fs_devices */ |
|---|
| 2070 | 2284 | if (!fs_devices->num_devices) { |
|---|
| 2071 | | - struct btrfs_fs_devices *tmp_fs_devices; |
|---|
| 2072 | | - |
|---|
| 2073 | 2285 | /* |
|---|
| 2074 | 2286 | * On a mounted FS, num_devices can't be zero unless it's a |
|---|
| 2075 | 2287 | * seed. In case of a seed device being replaced, the replace |
|---|
| .. | .. |
|---|
| 2078 | 2290 | */ |
|---|
| 2079 | 2291 | ASSERT(fs_devices->seeding); |
|---|
| 2080 | 2292 | |
|---|
| 2081 | | - tmp_fs_devices = fs_info->fs_devices; |
|---|
| 2082 | | - while (tmp_fs_devices) { |
|---|
| 2083 | | - if (tmp_fs_devices->seed == fs_devices) { |
|---|
| 2084 | | - tmp_fs_devices->seed = fs_devices->seed; |
|---|
| 2085 | | - break; |
|---|
| 2086 | | - } |
|---|
| 2087 | | - tmp_fs_devices = tmp_fs_devices->seed; |
|---|
| 2088 | | - } |
|---|
| 2089 | | - fs_devices->seed = NULL; |
|---|
| 2293 | + list_del_init(&fs_devices->seed_list); |
|---|
| 2090 | 2294 | close_fs_devices(fs_devices); |
|---|
| 2091 | 2295 | free_fs_devices(fs_devices); |
|---|
| 2092 | 2296 | } |
|---|
| 2297 | + mutex_unlock(&uuid_mutex); |
|---|
| 2093 | 2298 | } |
|---|
| 2094 | 2299 | |
|---|
| 2095 | 2300 | void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev) |
|---|
| 2096 | 2301 | { |
|---|
| 2097 | 2302 | struct btrfs_fs_devices *fs_devices = tgtdev->fs_info->fs_devices; |
|---|
| 2098 | 2303 | |
|---|
| 2099 | | - WARN_ON(!tgtdev); |
|---|
| 2100 | 2304 | mutex_lock(&fs_devices->device_list_mutex); |
|---|
| 2101 | 2305 | |
|---|
| 2102 | | - btrfs_sysfs_rm_device_link(fs_devices, tgtdev); |
|---|
| 2306 | + btrfs_sysfs_remove_device(tgtdev); |
|---|
| 2103 | 2307 | |
|---|
| 2104 | 2308 | if (tgtdev->bdev) |
|---|
| 2105 | 2309 | fs_devices->open_devices--; |
|---|
| .. | .. |
|---|
| 2119 | 2323 | * is already out of device list, so we don't have to hold |
|---|
| 2120 | 2324 | * the device_list_mutex lock. |
|---|
| 2121 | 2325 | */ |
|---|
| 2122 | | - btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str); |
|---|
| 2326 | + btrfs_scratch_superblocks(tgtdev->fs_info, tgtdev->bdev, |
|---|
| 2327 | + tgtdev->name->str); |
|---|
| 2123 | 2328 | |
|---|
| 2124 | 2329 | btrfs_close_bdev(tgtdev); |
|---|
| 2125 | | - call_rcu(&tgtdev->rcu, free_device_rcu); |
|---|
| 2330 | + synchronize_rcu(); |
|---|
| 2331 | + btrfs_free_device(tgtdev); |
|---|
| 2126 | 2332 | } |
|---|
| 2127 | 2333 | |
|---|
| 2128 | | -static int btrfs_find_device_by_path(struct btrfs_fs_info *fs_info, |
|---|
| 2129 | | - const char *device_path, |
|---|
| 2130 | | - struct btrfs_device **device) |
|---|
| 2334 | +static struct btrfs_device *btrfs_find_device_by_path( |
|---|
| 2335 | + struct btrfs_fs_info *fs_info, const char *device_path) |
|---|
| 2131 | 2336 | { |
|---|
| 2132 | 2337 | int ret = 0; |
|---|
| 2133 | 2338 | struct btrfs_super_block *disk_super; |
|---|
| 2134 | 2339 | u64 devid; |
|---|
| 2135 | 2340 | u8 *dev_uuid; |
|---|
| 2136 | 2341 | struct block_device *bdev; |
|---|
| 2137 | | - struct buffer_head *bh; |
|---|
| 2342 | + struct btrfs_device *device; |
|---|
| 2138 | 2343 | |
|---|
| 2139 | | - *device = NULL; |
|---|
| 2140 | 2344 | ret = btrfs_get_bdev_and_sb(device_path, FMODE_READ, |
|---|
| 2141 | | - fs_info->bdev_holder, 0, &bdev, &bh); |
|---|
| 2345 | + fs_info->bdev_holder, 0, &bdev, &disk_super); |
|---|
| 2142 | 2346 | if (ret) |
|---|
| 2143 | | - return ret; |
|---|
| 2144 | | - disk_super = (struct btrfs_super_block *)bh->b_data; |
|---|
| 2347 | + return ERR_PTR(ret); |
|---|
| 2348 | + |
|---|
| 2145 | 2349 | devid = btrfs_stack_device_id(&disk_super->dev_item); |
|---|
| 2146 | 2350 | dev_uuid = disk_super->dev_item.uuid; |
|---|
| 2147 | | - *device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid, |
|---|
| 2148 | | - disk_super->fsid, true); |
|---|
| 2149 | | - brelse(bh); |
|---|
| 2150 | | - if (!*device) |
|---|
| 2151 | | - ret = -ENOENT; |
|---|
| 2351 | + if (btrfs_fs_incompat(fs_info, METADATA_UUID)) |
|---|
| 2352 | + device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid, |
|---|
| 2353 | + disk_super->metadata_uuid, true); |
|---|
| 2354 | + else |
|---|
| 2355 | + device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid, |
|---|
| 2356 | + disk_super->fsid, true); |
|---|
| 2357 | + |
|---|
| 2358 | + btrfs_release_disk_super(disk_super); |
|---|
| 2359 | + if (!device) |
|---|
| 2360 | + device = ERR_PTR(-ENOENT); |
|---|
| 2152 | 2361 | blkdev_put(bdev, FMODE_READ); |
|---|
| 2153 | | - return ret; |
|---|
| 2154 | | -} |
|---|
| 2155 | | - |
|---|
| 2156 | | -int btrfs_find_device_missing_or_by_path(struct btrfs_fs_info *fs_info, |
|---|
| 2157 | | - const char *device_path, |
|---|
| 2158 | | - struct btrfs_device **device) |
|---|
| 2159 | | -{ |
|---|
| 2160 | | - *device = NULL; |
|---|
| 2161 | | - if (strcmp(device_path, "missing") == 0) { |
|---|
| 2162 | | - struct list_head *devices; |
|---|
| 2163 | | - struct btrfs_device *tmp; |
|---|
| 2164 | | - |
|---|
| 2165 | | - devices = &fs_info->fs_devices->devices; |
|---|
| 2166 | | - list_for_each_entry(tmp, devices, dev_list) { |
|---|
| 2167 | | - if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, |
|---|
| 2168 | | - &tmp->dev_state) && !tmp->bdev) { |
|---|
| 2169 | | - *device = tmp; |
|---|
| 2170 | | - break; |
|---|
| 2171 | | - } |
|---|
| 2172 | | - } |
|---|
| 2173 | | - |
|---|
| 2174 | | - if (!*device) |
|---|
| 2175 | | - return BTRFS_ERROR_DEV_MISSING_NOT_FOUND; |
|---|
| 2176 | | - |
|---|
| 2177 | | - return 0; |
|---|
| 2178 | | - } else { |
|---|
| 2179 | | - return btrfs_find_device_by_path(fs_info, device_path, device); |
|---|
| 2180 | | - } |
|---|
| 2362 | + return device; |
|---|
| 2181 | 2363 | } |
|---|
| 2182 | 2364 | |
|---|
| 2183 | 2365 | /* |
|---|
| 2184 | 2366 | * Lookup a device given by device id, or the path if the id is 0. |
|---|
| 2185 | 2367 | */ |
|---|
| 2186 | | -int btrfs_find_device_by_devspec(struct btrfs_fs_info *fs_info, u64 devid, |
|---|
| 2187 | | - const char *devpath, |
|---|
| 2188 | | - struct btrfs_device **device) |
|---|
| 2368 | +struct btrfs_device *btrfs_find_device_by_devspec( |
|---|
| 2369 | + struct btrfs_fs_info *fs_info, u64 devid, |
|---|
| 2370 | + const char *device_path) |
|---|
| 2189 | 2371 | { |
|---|
| 2190 | | - int ret; |
|---|
| 2372 | + struct btrfs_device *device; |
|---|
| 2191 | 2373 | |
|---|
| 2192 | 2374 | if (devid) { |
|---|
| 2193 | | - ret = 0; |
|---|
| 2194 | | - *device = btrfs_find_device(fs_info->fs_devices, devid, |
|---|
| 2195 | | - NULL, NULL, true); |
|---|
| 2196 | | - if (!*device) |
|---|
| 2197 | | - ret = -ENOENT; |
|---|
| 2198 | | - } else { |
|---|
| 2199 | | - if (!devpath || !devpath[0]) |
|---|
| 2200 | | - return -EINVAL; |
|---|
| 2201 | | - |
|---|
| 2202 | | - ret = btrfs_find_device_missing_or_by_path(fs_info, devpath, |
|---|
| 2203 | | - device); |
|---|
| 2375 | + device = btrfs_find_device(fs_info->fs_devices, devid, NULL, |
|---|
| 2376 | + NULL, true); |
|---|
| 2377 | + if (!device) |
|---|
| 2378 | + return ERR_PTR(-ENOENT); |
|---|
| 2379 | + return device; |
|---|
| 2204 | 2380 | } |
|---|
| 2205 | | - return ret; |
|---|
| 2381 | + |
|---|
| 2382 | + if (!device_path || !device_path[0]) |
|---|
| 2383 | + return ERR_PTR(-EINVAL); |
|---|
| 2384 | + |
|---|
| 2385 | + if (strcmp(device_path, "missing") == 0) { |
|---|
| 2386 | + /* Find first missing device */ |
|---|
| 2387 | + list_for_each_entry(device, &fs_info->fs_devices->devices, |
|---|
| 2388 | + dev_list) { |
|---|
| 2389 | + if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, |
|---|
| 2390 | + &device->dev_state) && !device->bdev) |
|---|
| 2391 | + return device; |
|---|
| 2392 | + } |
|---|
| 2393 | + return ERR_PTR(-ENOENT); |
|---|
| 2394 | + } |
|---|
| 2395 | + |
|---|
| 2396 | + return btrfs_find_device_by_path(fs_info, device_path); |
|---|
| 2206 | 2397 | } |
|---|
| 2207 | 2398 | |
|---|
| 2208 | 2399 | /* |
|---|
| .. | .. |
|---|
| 2221 | 2412 | if (!fs_devices->seeding) |
|---|
| 2222 | 2413 | return -EINVAL; |
|---|
| 2223 | 2414 | |
|---|
| 2224 | | - seed_devices = alloc_fs_devices(NULL); |
|---|
| 2415 | + /* |
|---|
| 2416 | + * Private copy of the seed devices, anchored at |
|---|
| 2417 | + * fs_info->fs_devices->seed_list |
|---|
| 2418 | + */ |
|---|
| 2419 | + seed_devices = alloc_fs_devices(NULL, NULL); |
|---|
| 2225 | 2420 | if (IS_ERR(seed_devices)) |
|---|
| 2226 | 2421 | return PTR_ERR(seed_devices); |
|---|
| 2227 | 2422 | |
|---|
| 2423 | + /* |
|---|
| 2424 | + * It's necessary to retain a copy of the original seed fs_devices in |
|---|
| 2425 | + * fs_uuids so that filesystems which have been seeded can successfully |
|---|
| 2426 | + * reference the seed device from open_seed_devices. This also supports |
|---|
| 2427 | + * multiple fs seed. |
|---|
| 2428 | + */ |
|---|
| 2228 | 2429 | old_devices = clone_fs_devices(fs_devices); |
|---|
| 2229 | 2430 | if (IS_ERR(old_devices)) { |
|---|
| 2230 | 2431 | kfree(seed_devices); |
|---|
| .. | .. |
|---|
| 2245 | 2446 | list_for_each_entry(device, &seed_devices->devices, dev_list) |
|---|
| 2246 | 2447 | device->fs_devices = seed_devices; |
|---|
| 2247 | 2448 | |
|---|
| 2248 | | - mutex_lock(&fs_info->chunk_mutex); |
|---|
| 2249 | | - list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list); |
|---|
| 2250 | | - mutex_unlock(&fs_info->chunk_mutex); |
|---|
| 2251 | | - |
|---|
| 2252 | | - fs_devices->seeding = 0; |
|---|
| 2449 | + fs_devices->seeding = false; |
|---|
| 2253 | 2450 | fs_devices->num_devices = 0; |
|---|
| 2254 | 2451 | fs_devices->open_devices = 0; |
|---|
| 2255 | 2452 | fs_devices->missing_devices = 0; |
|---|
| 2256 | | - fs_devices->rotating = 0; |
|---|
| 2257 | | - fs_devices->seed = seed_devices; |
|---|
| 2453 | + fs_devices->rotating = false; |
|---|
| 2454 | + list_add(&seed_devices->seed_list, &fs_devices->seed_list); |
|---|
| 2258 | 2455 | |
|---|
| 2259 | 2456 | generate_random_uuid(fs_devices->fsid); |
|---|
| 2260 | | - memcpy(fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE); |
|---|
| 2457 | + memcpy(fs_devices->metadata_uuid, fs_devices->fsid, BTRFS_FSID_SIZE); |
|---|
| 2261 | 2458 | memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE); |
|---|
| 2262 | 2459 | mutex_unlock(&fs_devices->device_list_mutex); |
|---|
| 2263 | 2460 | |
|---|
| .. | .. |
|---|
| 2271 | 2468 | /* |
|---|
| 2272 | 2469 | * Store the expected generation for seed devices in device items. |
|---|
| 2273 | 2470 | */ |
|---|
| 2274 | | -static int btrfs_finish_sprout(struct btrfs_trans_handle *trans, |
|---|
| 2275 | | - struct btrfs_fs_info *fs_info) |
|---|
| 2471 | +static int btrfs_finish_sprout(struct btrfs_trans_handle *trans) |
|---|
| 2276 | 2472 | { |
|---|
| 2473 | + struct btrfs_fs_info *fs_info = trans->fs_info; |
|---|
| 2277 | 2474 | struct btrfs_root *root = fs_info->chunk_root; |
|---|
| 2278 | 2475 | struct btrfs_path *path; |
|---|
| 2279 | 2476 | struct extent_buffer *leaf; |
|---|
| .. | .. |
|---|
| 2357 | 2554 | u64 orig_super_num_devices; |
|---|
| 2358 | 2555 | int seeding_dev = 0; |
|---|
| 2359 | 2556 | int ret = 0; |
|---|
| 2360 | | - bool unlocked = false; |
|---|
| 2557 | + bool locked = false; |
|---|
| 2361 | 2558 | |
|---|
| 2362 | 2559 | if (sb_rdonly(sb) && !fs_devices->seeding) |
|---|
| 2363 | 2560 | return -EROFS; |
|---|
| .. | .. |
|---|
| 2371 | 2568 | seeding_dev = 1; |
|---|
| 2372 | 2569 | down_write(&sb->s_umount); |
|---|
| 2373 | 2570 | mutex_lock(&uuid_mutex); |
|---|
| 2571 | + locked = true; |
|---|
| 2374 | 2572 | } |
|---|
| 2375 | 2573 | |
|---|
| 2376 | | - filemap_write_and_wait(bdev->bd_inode->i_mapping); |
|---|
| 2574 | + sync_blockdev(bdev); |
|---|
| 2377 | 2575 | |
|---|
| 2378 | | - mutex_lock(&fs_devices->device_list_mutex); |
|---|
| 2379 | | - list_for_each_entry(device, &fs_devices->devices, dev_list) { |
|---|
| 2576 | + rcu_read_lock(); |
|---|
| 2577 | + list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) { |
|---|
| 2380 | 2578 | if (device->bdev == bdev) { |
|---|
| 2381 | 2579 | ret = -EEXIST; |
|---|
| 2382 | | - mutex_unlock( |
|---|
| 2383 | | - &fs_devices->device_list_mutex); |
|---|
| 2580 | + rcu_read_unlock(); |
|---|
| 2384 | 2581 | goto error; |
|---|
| 2385 | 2582 | } |
|---|
| 2386 | 2583 | } |
|---|
| 2387 | | - mutex_unlock(&fs_devices->device_list_mutex); |
|---|
| 2584 | + rcu_read_unlock(); |
|---|
| 2388 | 2585 | |
|---|
| 2389 | 2586 | device = btrfs_alloc_device(fs_info, NULL, NULL); |
|---|
| 2390 | 2587 | if (IS_ERR(device)) { |
|---|
| .. | .. |
|---|
| 2448 | 2645 | atomic64_add(device->total_bytes, &fs_info->free_chunk_space); |
|---|
| 2449 | 2646 | |
|---|
| 2450 | 2647 | if (!blk_queue_nonrot(q)) |
|---|
| 2451 | | - fs_devices->rotating = 1; |
|---|
| 2648 | + fs_devices->rotating = true; |
|---|
| 2452 | 2649 | |
|---|
| 2453 | 2650 | orig_super_total_bytes = btrfs_super_total_bytes(fs_info->super_copy); |
|---|
| 2454 | 2651 | btrfs_set_super_total_bytes(fs_info->super_copy, |
|---|
| .. | .. |
|---|
| 2468 | 2665 | mutex_unlock(&fs_info->chunk_mutex); |
|---|
| 2469 | 2666 | |
|---|
| 2470 | 2667 | /* Add sysfs device entry */ |
|---|
| 2471 | | - btrfs_sysfs_add_device_link(fs_devices, device); |
|---|
| 2668 | + btrfs_sysfs_add_device(device); |
|---|
| 2472 | 2669 | |
|---|
| 2473 | 2670 | mutex_unlock(&fs_devices->device_list_mutex); |
|---|
| 2474 | 2671 | |
|---|
| 2475 | 2672 | if (seeding_dev) { |
|---|
| 2476 | 2673 | mutex_lock(&fs_info->chunk_mutex); |
|---|
| 2477 | | - ret = init_first_rw_device(trans, fs_info); |
|---|
| 2674 | + ret = init_first_rw_device(trans); |
|---|
| 2478 | 2675 | mutex_unlock(&fs_info->chunk_mutex); |
|---|
| 2479 | 2676 | if (ret) { |
|---|
| 2480 | 2677 | btrfs_abort_transaction(trans, ret); |
|---|
| .. | .. |
|---|
| 2489 | 2686 | } |
|---|
| 2490 | 2687 | |
|---|
| 2491 | 2688 | if (seeding_dev) { |
|---|
| 2492 | | - char fsid_buf[BTRFS_UUID_UNPARSED_SIZE]; |
|---|
| 2493 | | - |
|---|
| 2494 | | - ret = btrfs_finish_sprout(trans, fs_info); |
|---|
| 2689 | + ret = btrfs_finish_sprout(trans); |
|---|
| 2495 | 2690 | if (ret) { |
|---|
| 2496 | 2691 | btrfs_abort_transaction(trans, ret); |
|---|
| 2497 | 2692 | goto error_sysfs; |
|---|
| 2498 | 2693 | } |
|---|
| 2499 | 2694 | |
|---|
| 2500 | | - /* Sprouting would change fsid of the mounted root, |
|---|
| 2501 | | - * so rename the fsid on the sysfs |
|---|
| 2695 | + /* |
|---|
| 2696 | + * fs_devices now represents the newly sprouted filesystem and |
|---|
| 2697 | + * its fsid has been changed by btrfs_prepare_sprout |
|---|
| 2502 | 2698 | */ |
|---|
| 2503 | | - snprintf(fsid_buf, BTRFS_UUID_UNPARSED_SIZE, "%pU", |
|---|
| 2504 | | - fs_info->fsid); |
|---|
| 2505 | | - if (kobject_rename(&fs_devices->fsid_kobj, fsid_buf)) |
|---|
| 2506 | | - btrfs_warn(fs_info, |
|---|
| 2507 | | - "sysfs: failed to create fsid for sprout"); |
|---|
| 2699 | + btrfs_sysfs_update_sprout_fsid(fs_devices); |
|---|
| 2508 | 2700 | } |
|---|
| 2509 | 2701 | |
|---|
| 2510 | 2702 | ret = btrfs_commit_transaction(trans); |
|---|
| .. | .. |
|---|
| 2512 | 2704 | if (seeding_dev) { |
|---|
| 2513 | 2705 | mutex_unlock(&uuid_mutex); |
|---|
| 2514 | 2706 | up_write(&sb->s_umount); |
|---|
| 2515 | | - unlocked = true; |
|---|
| 2707 | + locked = false; |
|---|
| 2516 | 2708 | |
|---|
| 2517 | 2709 | if (ret) /* transaction commit */ |
|---|
| 2518 | 2710 | return ret; |
|---|
| .. | .. |
|---|
| 2532 | 2724 | ret = btrfs_commit_transaction(trans); |
|---|
| 2533 | 2725 | } |
|---|
| 2534 | 2726 | |
|---|
| 2535 | | - /* Update ctime/mtime for libblkid */ |
|---|
| 2727 | + /* |
|---|
| 2728 | + * Now that we have written a new super block to this device, check all |
|---|
| 2729 | + * other fs_devices list if device_path alienates any other scanned |
|---|
| 2730 | + * device. |
|---|
| 2731 | + * We can ignore the return value as it typically returns -EINVAL and |
|---|
| 2732 | + * only succeeds if the device was an alien. |
|---|
| 2733 | + */ |
|---|
| 2734 | + btrfs_forget_devices(device_path); |
|---|
| 2735 | + |
|---|
| 2736 | + /* Update ctime/mtime for blkid or udev */ |
|---|
| 2536 | 2737 | update_dev_time(device_path); |
|---|
| 2738 | + |
|---|
| 2537 | 2739 | return ret; |
|---|
| 2538 | 2740 | |
|---|
| 2539 | 2741 | error_sysfs: |
|---|
| 2540 | | - btrfs_sysfs_rm_device_link(fs_devices, device); |
|---|
| 2742 | + btrfs_sysfs_remove_device(device); |
|---|
| 2541 | 2743 | mutex_lock(&fs_info->fs_devices->device_list_mutex); |
|---|
| 2542 | 2744 | mutex_lock(&fs_info->chunk_mutex); |
|---|
| 2543 | 2745 | list_del_rcu(&device->dev_list); |
|---|
| .. | .. |
|---|
| 2563 | 2765 | btrfs_free_device(device); |
|---|
| 2564 | 2766 | error: |
|---|
| 2565 | 2767 | blkdev_put(bdev, FMODE_EXCL); |
|---|
| 2566 | | - if (seeding_dev && !unlocked) { |
|---|
| 2768 | + if (locked) { |
|---|
| 2567 | 2769 | mutex_unlock(&uuid_mutex); |
|---|
| 2568 | 2770 | up_write(&sb->s_umount); |
|---|
| 2569 | 2771 | } |
|---|
| .. | .. |
|---|
| 2621 | 2823 | { |
|---|
| 2622 | 2824 | struct btrfs_fs_info *fs_info = device->fs_info; |
|---|
| 2623 | 2825 | struct btrfs_super_block *super_copy = fs_info->super_copy; |
|---|
| 2624 | | - struct btrfs_fs_devices *fs_devices; |
|---|
| 2625 | 2826 | u64 old_total; |
|---|
| 2626 | 2827 | u64 diff; |
|---|
| 2627 | 2828 | |
|---|
| .. | .. |
|---|
| 2640 | 2841 | return -EINVAL; |
|---|
| 2641 | 2842 | } |
|---|
| 2642 | 2843 | |
|---|
| 2643 | | - fs_devices = fs_info->fs_devices; |
|---|
| 2644 | | - |
|---|
| 2645 | 2844 | btrfs_set_super_total_bytes(super_copy, |
|---|
| 2646 | 2845 | round_down(old_total + diff, fs_info->sectorsize)); |
|---|
| 2647 | 2846 | device->fs_devices->total_rw_bytes += diff; |
|---|
| .. | .. |
|---|
| 2649 | 2848 | btrfs_device_set_total_bytes(device, new_size); |
|---|
| 2650 | 2849 | btrfs_device_set_disk_total_bytes(device, new_size); |
|---|
| 2651 | 2850 | btrfs_clear_space_info_full(device->fs_info); |
|---|
| 2652 | | - if (list_empty(&device->resized_list)) |
|---|
| 2653 | | - list_add_tail(&device->resized_list, |
|---|
| 2654 | | - &fs_devices->resized_devices); |
|---|
| 2851 | + if (list_empty(&device->post_commit_list)) |
|---|
| 2852 | + list_add_tail(&device->post_commit_list, |
|---|
| 2853 | + &trans->transaction->dev_update_list); |
|---|
| 2655 | 2854 | mutex_unlock(&fs_info->chunk_mutex); |
|---|
| 2656 | 2855 | |
|---|
| 2657 | 2856 | return btrfs_update_device(trans, device); |
|---|
| .. | .. |
|---|
| 2739 | 2938 | return ret; |
|---|
| 2740 | 2939 | } |
|---|
| 2741 | 2940 | |
|---|
| 2742 | | -static struct extent_map *get_chunk_map(struct btrfs_fs_info *fs_info, |
|---|
| 2743 | | - u64 logical, u64 length) |
|---|
| 2941 | +/* |
|---|
| 2942 | + * btrfs_get_chunk_map() - Find the mapping containing the given logical extent. |
|---|
| 2943 | + * @logical: Logical block offset in bytes. |
|---|
| 2944 | + * @length: Length of extent in bytes. |
|---|
| 2945 | + * |
|---|
| 2946 | + * Return: Chunk mapping or ERR_PTR. |
|---|
| 2947 | + */ |
|---|
| 2948 | +struct extent_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info, |
|---|
| 2949 | + u64 logical, u64 length) |
|---|
| 2744 | 2950 | { |
|---|
| 2745 | 2951 | struct extent_map_tree *em_tree; |
|---|
| 2746 | 2952 | struct extent_map *em; |
|---|
| 2747 | 2953 | |
|---|
| 2748 | | - em_tree = &fs_info->mapping_tree.map_tree; |
|---|
| 2954 | + em_tree = &fs_info->mapping_tree; |
|---|
| 2749 | 2955 | read_lock(&em_tree->lock); |
|---|
| 2750 | 2956 | em = lookup_extent_mapping(em_tree, logical, length); |
|---|
| 2751 | 2957 | read_unlock(&em_tree->lock); |
|---|
| .. | .. |
|---|
| 2777 | 2983 | int i, ret = 0; |
|---|
| 2778 | 2984 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; |
|---|
| 2779 | 2985 | |
|---|
| 2780 | | - em = get_chunk_map(fs_info, chunk_offset, 1); |
|---|
| 2986 | + em = btrfs_get_chunk_map(fs_info, chunk_offset, 1); |
|---|
| 2781 | 2987 | if (IS_ERR(em)) { |
|---|
| 2782 | 2988 | /* |
|---|
| 2783 | 2989 | * This is a logic error, but we don't want to just rely on the |
|---|
| .. | .. |
|---|
| 2818 | 3024 | mutex_unlock(&fs_info->chunk_mutex); |
|---|
| 2819 | 3025 | } |
|---|
| 2820 | 3026 | |
|---|
| 2821 | | - if (map->stripes[i].dev) { |
|---|
| 2822 | | - ret = btrfs_update_device(trans, map->stripes[i].dev); |
|---|
| 2823 | | - if (ret) { |
|---|
| 2824 | | - mutex_unlock(&fs_devices->device_list_mutex); |
|---|
| 2825 | | - btrfs_abort_transaction(trans, ret); |
|---|
| 2826 | | - goto out; |
|---|
| 2827 | | - } |
|---|
| 3027 | + ret = btrfs_update_device(trans, device); |
|---|
| 3028 | + if (ret) { |
|---|
| 3029 | + mutex_unlock(&fs_devices->device_list_mutex); |
|---|
| 3030 | + btrfs_abort_transaction(trans, ret); |
|---|
| 3031 | + goto out; |
|---|
| 2828 | 3032 | } |
|---|
| 2829 | 3033 | } |
|---|
| 2830 | 3034 | mutex_unlock(&fs_devices->device_list_mutex); |
|---|
| .. | .. |
|---|
| 2861 | 3065 | { |
|---|
| 2862 | 3066 | struct btrfs_root *root = fs_info->chunk_root; |
|---|
| 2863 | 3067 | struct btrfs_trans_handle *trans; |
|---|
| 3068 | + struct btrfs_block_group *block_group; |
|---|
| 2864 | 3069 | int ret; |
|---|
| 2865 | 3070 | |
|---|
| 2866 | 3071 | /* |
|---|
| .. | .. |
|---|
| 2877 | 3082 | */ |
|---|
| 2878 | 3083 | lockdep_assert_held(&fs_info->delete_unused_bgs_mutex); |
|---|
| 2879 | 3084 | |
|---|
| 2880 | | - ret = btrfs_can_relocate(fs_info, chunk_offset); |
|---|
| 2881 | | - if (ret) |
|---|
| 2882 | | - return -ENOSPC; |
|---|
| 2883 | | - |
|---|
| 2884 | 3085 | /* step one, relocate all the extents inside this chunk */ |
|---|
| 2885 | 3086 | btrfs_scrub_pause(fs_info); |
|---|
| 2886 | 3087 | ret = btrfs_relocate_block_group(fs_info, chunk_offset); |
|---|
| .. | .. |
|---|
| 2888 | 3089 | if (ret) |
|---|
| 2889 | 3090 | return ret; |
|---|
| 2890 | 3091 | |
|---|
| 2891 | | - /* |
|---|
| 2892 | | - * We add the kobjects here (and after forcing data chunk creation) |
|---|
| 2893 | | - * since relocation is the only place we'll create chunks of a new |
|---|
| 2894 | | - * type at runtime. The only place where we'll remove the last |
|---|
| 2895 | | - * chunk of a type is the call immediately below this one. Even |
|---|
| 2896 | | - * so, we're protected against races with the cleaner thread since |
|---|
| 2897 | | - * we're covered by the delete_unused_bgs_mutex. |
|---|
| 2898 | | - */ |
|---|
| 2899 | | - btrfs_add_raid_kobjects(fs_info); |
|---|
| 3092 | + block_group = btrfs_lookup_block_group(fs_info, chunk_offset); |
|---|
| 3093 | + if (!block_group) |
|---|
| 3094 | + return -ENOENT; |
|---|
| 3095 | + btrfs_discard_cancel_work(&fs_info->discard_ctl, block_group); |
|---|
| 3096 | + btrfs_put_block_group(block_group); |
|---|
| 2900 | 3097 | |
|---|
| 2901 | 3098 | trans = btrfs_start_trans_remove_block_group(root->fs_info, |
|---|
| 2902 | 3099 | chunk_offset); |
|---|
| .. | .. |
|---|
| 2997 | 3194 | static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info, |
|---|
| 2998 | 3195 | u64 chunk_offset) |
|---|
| 2999 | 3196 | { |
|---|
| 3000 | | - struct btrfs_block_group_cache *cache; |
|---|
| 3197 | + struct btrfs_block_group *cache; |
|---|
| 3001 | 3198 | u64 bytes_used; |
|---|
| 3002 | 3199 | u64 chunk_type; |
|---|
| 3003 | 3200 | |
|---|
| .. | .. |
|---|
| 3006 | 3203 | chunk_type = cache->flags; |
|---|
| 3007 | 3204 | btrfs_put_block_group(cache); |
|---|
| 3008 | 3205 | |
|---|
| 3009 | | - if (chunk_type & BTRFS_BLOCK_GROUP_DATA) { |
|---|
| 3010 | | - spin_lock(&fs_info->data_sinfo->lock); |
|---|
| 3011 | | - bytes_used = fs_info->data_sinfo->bytes_used; |
|---|
| 3012 | | - spin_unlock(&fs_info->data_sinfo->lock); |
|---|
| 3206 | + if (!(chunk_type & BTRFS_BLOCK_GROUP_DATA)) |
|---|
| 3207 | + return 0; |
|---|
| 3013 | 3208 | |
|---|
| 3014 | | - if (!bytes_used) { |
|---|
| 3015 | | - struct btrfs_trans_handle *trans; |
|---|
| 3016 | | - int ret; |
|---|
| 3209 | + spin_lock(&fs_info->data_sinfo->lock); |
|---|
| 3210 | + bytes_used = fs_info->data_sinfo->bytes_used; |
|---|
| 3211 | + spin_unlock(&fs_info->data_sinfo->lock); |
|---|
| 3017 | 3212 | |
|---|
| 3018 | | - trans = btrfs_join_transaction(fs_info->tree_root); |
|---|
| 3019 | | - if (IS_ERR(trans)) |
|---|
| 3020 | | - return PTR_ERR(trans); |
|---|
| 3213 | + if (!bytes_used) { |
|---|
| 3214 | + struct btrfs_trans_handle *trans; |
|---|
| 3215 | + int ret; |
|---|
| 3021 | 3216 | |
|---|
| 3022 | | - ret = btrfs_force_chunk_alloc(trans, |
|---|
| 3023 | | - BTRFS_BLOCK_GROUP_DATA); |
|---|
| 3024 | | - btrfs_end_transaction(trans); |
|---|
| 3025 | | - if (ret < 0) |
|---|
| 3026 | | - return ret; |
|---|
| 3217 | + trans = btrfs_join_transaction(fs_info->tree_root); |
|---|
| 3218 | + if (IS_ERR(trans)) |
|---|
| 3219 | + return PTR_ERR(trans); |
|---|
| 3027 | 3220 | |
|---|
| 3028 | | - btrfs_add_raid_kobjects(fs_info); |
|---|
| 3029 | | - |
|---|
| 3030 | | - return 1; |
|---|
| 3031 | | - } |
|---|
| 3221 | + ret = btrfs_force_chunk_alloc(trans, BTRFS_BLOCK_GROUP_DATA); |
|---|
| 3222 | + btrfs_end_transaction(trans); |
|---|
| 3223 | + if (ret < 0) |
|---|
| 3224 | + return ret; |
|---|
| 3225 | + return 1; |
|---|
| 3032 | 3226 | } |
|---|
| 3227 | + |
|---|
| 3033 | 3228 | return 0; |
|---|
| 3034 | 3229 | } |
|---|
| 3035 | 3230 | |
|---|
| .. | .. |
|---|
| 3099 | 3294 | if (!path) |
|---|
| 3100 | 3295 | return -ENOMEM; |
|---|
| 3101 | 3296 | |
|---|
| 3102 | | - trans = btrfs_start_transaction(root, 0); |
|---|
| 3297 | + trans = btrfs_start_transaction_fallback_global_rsv(root, 0); |
|---|
| 3103 | 3298 | if (IS_ERR(trans)) { |
|---|
| 3104 | 3299 | btrfs_free_path(path); |
|---|
| 3105 | 3300 | return PTR_ERR(trans); |
|---|
| .. | .. |
|---|
| 3208 | 3403 | static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset, |
|---|
| 3209 | 3404 | struct btrfs_balance_args *bargs) |
|---|
| 3210 | 3405 | { |
|---|
| 3211 | | - struct btrfs_block_group_cache *cache; |
|---|
| 3406 | + struct btrfs_block_group *cache; |
|---|
| 3212 | 3407 | u64 chunk_used; |
|---|
| 3213 | 3408 | u64 user_thresh_min; |
|---|
| 3214 | 3409 | u64 user_thresh_max; |
|---|
| 3215 | 3410 | int ret = 1; |
|---|
| 3216 | 3411 | |
|---|
| 3217 | 3412 | cache = btrfs_lookup_block_group(fs_info, chunk_offset); |
|---|
| 3218 | | - chunk_used = btrfs_block_group_used(&cache->item); |
|---|
| 3413 | + chunk_used = cache->used; |
|---|
| 3219 | 3414 | |
|---|
| 3220 | 3415 | if (bargs->usage_min == 0) |
|---|
| 3221 | 3416 | user_thresh_min = 0; |
|---|
| 3222 | 3417 | else |
|---|
| 3223 | | - user_thresh_min = div_factor_fine(cache->key.offset, |
|---|
| 3224 | | - bargs->usage_min); |
|---|
| 3418 | + user_thresh_min = div_factor_fine(cache->length, |
|---|
| 3419 | + bargs->usage_min); |
|---|
| 3225 | 3420 | |
|---|
| 3226 | 3421 | if (bargs->usage_max == 0) |
|---|
| 3227 | 3422 | user_thresh_max = 1; |
|---|
| 3228 | 3423 | else if (bargs->usage_max > 100) |
|---|
| 3229 | | - user_thresh_max = cache->key.offset; |
|---|
| 3424 | + user_thresh_max = cache->length; |
|---|
| 3230 | 3425 | else |
|---|
| 3231 | | - user_thresh_max = div_factor_fine(cache->key.offset, |
|---|
| 3232 | | - bargs->usage_max); |
|---|
| 3426 | + user_thresh_max = div_factor_fine(cache->length, |
|---|
| 3427 | + bargs->usage_max); |
|---|
| 3233 | 3428 | |
|---|
| 3234 | 3429 | if (user_thresh_min <= chunk_used && chunk_used < user_thresh_max) |
|---|
| 3235 | 3430 | ret = 0; |
|---|
| .. | .. |
|---|
| 3241 | 3436 | static int chunk_usage_filter(struct btrfs_fs_info *fs_info, |
|---|
| 3242 | 3437 | u64 chunk_offset, struct btrfs_balance_args *bargs) |
|---|
| 3243 | 3438 | { |
|---|
| 3244 | | - struct btrfs_block_group_cache *cache; |
|---|
| 3439 | + struct btrfs_block_group *cache; |
|---|
| 3245 | 3440 | u64 chunk_used, user_thresh; |
|---|
| 3246 | 3441 | int ret = 1; |
|---|
| 3247 | 3442 | |
|---|
| 3248 | 3443 | cache = btrfs_lookup_block_group(fs_info, chunk_offset); |
|---|
| 3249 | | - chunk_used = btrfs_block_group_used(&cache->item); |
|---|
| 3444 | + chunk_used = cache->used; |
|---|
| 3250 | 3445 | |
|---|
| 3251 | 3446 | if (bargs->usage_min == 0) |
|---|
| 3252 | 3447 | user_thresh = 1; |
|---|
| 3253 | 3448 | else if (bargs->usage > 100) |
|---|
| 3254 | | - user_thresh = cache->key.offset; |
|---|
| 3449 | + user_thresh = cache->length; |
|---|
| 3255 | 3450 | else |
|---|
| 3256 | | - user_thresh = div_factor_fine(cache->key.offset, |
|---|
| 3257 | | - bargs->usage); |
|---|
| 3451 | + user_thresh = div_factor_fine(cache->length, bargs->usage); |
|---|
| 3258 | 3452 | |
|---|
| 3259 | 3453 | if (chunk_used < user_thresh) |
|---|
| 3260 | 3454 | ret = 0; |
|---|
| .. | .. |
|---|
| 3280 | 3474 | return 1; |
|---|
| 3281 | 3475 | } |
|---|
| 3282 | 3476 | |
|---|
| 3477 | +static u64 calc_data_stripes(u64 type, int num_stripes) |
|---|
| 3478 | +{ |
|---|
| 3479 | + const int index = btrfs_bg_flags_to_raid_index(type); |
|---|
| 3480 | + const int ncopies = btrfs_raid_array[index].ncopies; |
|---|
| 3481 | + const int nparity = btrfs_raid_array[index].nparity; |
|---|
| 3482 | + |
|---|
| 3483 | + if (nparity) |
|---|
| 3484 | + return num_stripes - nparity; |
|---|
| 3485 | + else |
|---|
| 3486 | + return num_stripes / ncopies; |
|---|
| 3487 | +} |
|---|
| 3488 | + |
|---|
| 3283 | 3489 | /* [pstart, pend) */ |
|---|
| 3284 | 3490 | static int chunk_drange_filter(struct extent_buffer *leaf, |
|---|
| 3285 | 3491 | struct btrfs_chunk *chunk, |
|---|
| .. | .. |
|---|
| 3289 | 3495 | int num_stripes = btrfs_chunk_num_stripes(leaf, chunk); |
|---|
| 3290 | 3496 | u64 stripe_offset; |
|---|
| 3291 | 3497 | u64 stripe_length; |
|---|
| 3498 | + u64 type; |
|---|
| 3292 | 3499 | int factor; |
|---|
| 3293 | 3500 | int i; |
|---|
| 3294 | 3501 | |
|---|
| 3295 | 3502 | if (!(bargs->flags & BTRFS_BALANCE_ARGS_DEVID)) |
|---|
| 3296 | 3503 | return 0; |
|---|
| 3297 | 3504 | |
|---|
| 3298 | | - if (btrfs_chunk_type(leaf, chunk) & (BTRFS_BLOCK_GROUP_DUP | |
|---|
| 3299 | | - BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10)) { |
|---|
| 3300 | | - factor = num_stripes / 2; |
|---|
| 3301 | | - } else if (btrfs_chunk_type(leaf, chunk) & BTRFS_BLOCK_GROUP_RAID5) { |
|---|
| 3302 | | - factor = num_stripes - 1; |
|---|
| 3303 | | - } else if (btrfs_chunk_type(leaf, chunk) & BTRFS_BLOCK_GROUP_RAID6) { |
|---|
| 3304 | | - factor = num_stripes - 2; |
|---|
| 3305 | | - } else { |
|---|
| 3306 | | - factor = num_stripes; |
|---|
| 3307 | | - } |
|---|
| 3505 | + type = btrfs_chunk_type(leaf, chunk); |
|---|
| 3506 | + factor = calc_data_stripes(type, num_stripes); |
|---|
| 3308 | 3507 | |
|---|
| 3309 | 3508 | for (i = 0; i < num_stripes; i++) { |
|---|
| 3310 | 3509 | stripe = btrfs_stripe_nr(chunk, i); |
|---|
| .. | .. |
|---|
| 3365 | 3564 | return 0; |
|---|
| 3366 | 3565 | } |
|---|
| 3367 | 3566 | |
|---|
| 3368 | | -static int should_balance_chunk(struct btrfs_fs_info *fs_info, |
|---|
| 3369 | | - struct extent_buffer *leaf, |
|---|
| 3567 | +static int should_balance_chunk(struct extent_buffer *leaf, |
|---|
| 3370 | 3568 | struct btrfs_chunk *chunk, u64 chunk_offset) |
|---|
| 3371 | 3569 | { |
|---|
| 3570 | + struct btrfs_fs_info *fs_info = leaf->fs_info; |
|---|
| 3372 | 3571 | struct btrfs_balance_control *bctl = fs_info->balance_ctl; |
|---|
| 3373 | 3572 | struct btrfs_balance_args *bargs = NULL; |
|---|
| 3374 | 3573 | u64 chunk_type = btrfs_chunk_type(leaf, chunk); |
|---|
| .. | .. |
|---|
| 3458 | 3657 | { |
|---|
| 3459 | 3658 | struct btrfs_balance_control *bctl = fs_info->balance_ctl; |
|---|
| 3460 | 3659 | struct btrfs_root *chunk_root = fs_info->chunk_root; |
|---|
| 3461 | | - struct btrfs_root *dev_root = fs_info->dev_root; |
|---|
| 3462 | | - struct list_head *devices; |
|---|
| 3463 | | - struct btrfs_device *device; |
|---|
| 3464 | | - u64 old_size; |
|---|
| 3465 | | - u64 size_to_free; |
|---|
| 3466 | 3660 | u64 chunk_type; |
|---|
| 3467 | 3661 | struct btrfs_chunk *chunk; |
|---|
| 3468 | 3662 | struct btrfs_path *path = NULL; |
|---|
| 3469 | 3663 | struct btrfs_key key; |
|---|
| 3470 | 3664 | struct btrfs_key found_key; |
|---|
| 3471 | | - struct btrfs_trans_handle *trans; |
|---|
| 3472 | 3665 | struct extent_buffer *leaf; |
|---|
| 3473 | 3666 | int slot; |
|---|
| 3474 | 3667 | int ret; |
|---|
| .. | .. |
|---|
| 3483 | 3676 | u32 count_sys = 0; |
|---|
| 3484 | 3677 | int chunk_reserved = 0; |
|---|
| 3485 | 3678 | |
|---|
| 3486 | | - /* step one make some room on all the devices */ |
|---|
| 3487 | | - devices = &fs_info->fs_devices->devices; |
|---|
| 3488 | | - list_for_each_entry(device, devices, dev_list) { |
|---|
| 3489 | | - old_size = btrfs_device_get_total_bytes(device); |
|---|
| 3490 | | - size_to_free = div_factor(old_size, 1); |
|---|
| 3491 | | - size_to_free = min_t(u64, size_to_free, SZ_1M); |
|---|
| 3492 | | - if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) || |
|---|
| 3493 | | - btrfs_device_get_total_bytes(device) - |
|---|
| 3494 | | - btrfs_device_get_bytes_used(device) > size_to_free || |
|---|
| 3495 | | - test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) |
|---|
| 3496 | | - continue; |
|---|
| 3497 | | - |
|---|
| 3498 | | - ret = btrfs_shrink_device(device, old_size - size_to_free); |
|---|
| 3499 | | - if (ret == -ENOSPC) |
|---|
| 3500 | | - break; |
|---|
| 3501 | | - if (ret) { |
|---|
| 3502 | | - /* btrfs_shrink_device never returns ret > 0 */ |
|---|
| 3503 | | - WARN_ON(ret > 0); |
|---|
| 3504 | | - goto error; |
|---|
| 3505 | | - } |
|---|
| 3506 | | - |
|---|
| 3507 | | - trans = btrfs_start_transaction(dev_root, 0); |
|---|
| 3508 | | - if (IS_ERR(trans)) { |
|---|
| 3509 | | - ret = PTR_ERR(trans); |
|---|
| 3510 | | - btrfs_info_in_rcu(fs_info, |
|---|
| 3511 | | - "resize: unable to start transaction after shrinking device %s (error %d), old size %llu, new size %llu", |
|---|
| 3512 | | - rcu_str_deref(device->name), ret, |
|---|
| 3513 | | - old_size, old_size - size_to_free); |
|---|
| 3514 | | - goto error; |
|---|
| 3515 | | - } |
|---|
| 3516 | | - |
|---|
| 3517 | | - ret = btrfs_grow_device(trans, device, old_size); |
|---|
| 3518 | | - if (ret) { |
|---|
| 3519 | | - btrfs_end_transaction(trans); |
|---|
| 3520 | | - /* btrfs_grow_device never returns ret > 0 */ |
|---|
| 3521 | | - WARN_ON(ret > 0); |
|---|
| 3522 | | - btrfs_info_in_rcu(fs_info, |
|---|
| 3523 | | - "resize: unable to grow device after shrinking device %s (error %d), old size %llu, new size %llu", |
|---|
| 3524 | | - rcu_str_deref(device->name), ret, |
|---|
| 3525 | | - old_size, old_size - size_to_free); |
|---|
| 3526 | | - goto error; |
|---|
| 3527 | | - } |
|---|
| 3528 | | - |
|---|
| 3529 | | - btrfs_end_transaction(trans); |
|---|
| 3530 | | - } |
|---|
| 3531 | | - |
|---|
| 3532 | | - /* step two, relocate all the chunks */ |
|---|
| 3533 | 3679 | path = btrfs_alloc_path(); |
|---|
| 3534 | 3680 | if (!path) { |
|---|
| 3535 | 3681 | ret = -ENOMEM; |
|---|
| .. | .. |
|---|
| 3601 | 3747 | spin_unlock(&fs_info->balance_lock); |
|---|
| 3602 | 3748 | } |
|---|
| 3603 | 3749 | |
|---|
| 3604 | | - ret = should_balance_chunk(fs_info, leaf, chunk, |
|---|
| 3605 | | - found_key.offset); |
|---|
| 3750 | + ret = should_balance_chunk(leaf, chunk, found_key.offset); |
|---|
| 3606 | 3751 | |
|---|
| 3607 | 3752 | btrfs_release_path(path); |
|---|
| 3608 | 3753 | if (!ret) { |
|---|
| .. | .. |
|---|
| 3659 | 3804 | |
|---|
| 3660 | 3805 | ret = btrfs_relocate_chunk(fs_info, found_key.offset); |
|---|
| 3661 | 3806 | mutex_unlock(&fs_info->delete_unused_bgs_mutex); |
|---|
| 3662 | | - if (ret && ret != -ENOSPC) |
|---|
| 3663 | | - goto error; |
|---|
| 3664 | 3807 | if (ret == -ENOSPC) { |
|---|
| 3665 | 3808 | enospc_errors++; |
|---|
| 3809 | + } else if (ret == -ETXTBSY) { |
|---|
| 3810 | + btrfs_info(fs_info, |
|---|
| 3811 | + "skipping relocation of block group %llu due to active swapfile", |
|---|
| 3812 | + found_key.offset); |
|---|
| 3813 | + ret = 0; |
|---|
| 3814 | + } else if (ret) { |
|---|
| 3815 | + goto error; |
|---|
| 3666 | 3816 | } else { |
|---|
| 3667 | 3817 | spin_lock(&fs_info->balance_lock); |
|---|
| 3668 | 3818 | bctl->stat.completed++; |
|---|
| .. | .. |
|---|
| 3711 | 3861 | if (flags == 0) |
|---|
| 3712 | 3862 | return !extended; /* "0" is valid for usual profiles */ |
|---|
| 3713 | 3863 | |
|---|
| 3714 | | - /* true if exactly one bit set */ |
|---|
| 3715 | | - return (flags & (flags - 1)) == 0; |
|---|
| 3864 | + return has_single_bit_set(flags); |
|---|
| 3716 | 3865 | } |
|---|
| 3717 | 3866 | |
|---|
| 3718 | 3867 | static inline int balance_need_close(struct btrfs_fs_info *fs_info) |
|---|
| .. | .. |
|---|
| 3723 | 3872 | atomic_read(&fs_info->balance_cancel_req) == 0); |
|---|
| 3724 | 3873 | } |
|---|
| 3725 | 3874 | |
|---|
| 3726 | | -/* Non-zero return value signifies invalidity */ |
|---|
| 3727 | | -static inline int validate_convert_profile(struct btrfs_balance_args *bctl_arg, |
|---|
| 3728 | | - u64 allowed) |
|---|
| 3875 | +/* |
|---|
| 3876 | + * Validate target profile against allowed profiles and return true if it's OK. |
|---|
| 3877 | + * Otherwise print the error message and return false. |
|---|
| 3878 | + */ |
|---|
| 3879 | +static inline int validate_convert_profile(struct btrfs_fs_info *fs_info, |
|---|
| 3880 | + const struct btrfs_balance_args *bargs, |
|---|
| 3881 | + u64 allowed, const char *type) |
|---|
| 3729 | 3882 | { |
|---|
| 3730 | | - return ((bctl_arg->flags & BTRFS_BALANCE_ARGS_CONVERT) && |
|---|
| 3731 | | - (!alloc_profile_is_valid(bctl_arg->target, 1) || |
|---|
| 3732 | | - (bctl_arg->target & ~allowed))); |
|---|
| 3883 | + if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT)) |
|---|
| 3884 | + return true; |
|---|
| 3885 | + |
|---|
| 3886 | + /* Profile is valid and does not have bits outside of the allowed set */ |
|---|
| 3887 | + if (alloc_profile_is_valid(bargs->target, 1) && |
|---|
| 3888 | + (bargs->target & ~allowed) == 0) |
|---|
| 3889 | + return true; |
|---|
| 3890 | + |
|---|
| 3891 | + btrfs_err(fs_info, "balance: invalid convert %s profile %s", |
|---|
| 3892 | + type, btrfs_bg_type_to_raid_name(bargs->target)); |
|---|
| 3893 | + return false; |
|---|
| 3894 | +} |
|---|
| 3895 | + |
|---|
| 3896 | +/* |
|---|
| 3897 | + * Fill @buf with textual description of balance filter flags @bargs, up to |
|---|
| 3898 | + * @size_buf including the terminating null. The output may be trimmed if it |
|---|
| 3899 | + * does not fit into the provided buffer. |
|---|
| 3900 | + */ |
|---|
| 3901 | +static void describe_balance_args(struct btrfs_balance_args *bargs, char *buf, |
|---|
| 3902 | + u32 size_buf) |
|---|
| 3903 | +{ |
|---|
| 3904 | + int ret; |
|---|
| 3905 | + u32 size_bp = size_buf; |
|---|
| 3906 | + char *bp = buf; |
|---|
| 3907 | + u64 flags = bargs->flags; |
|---|
| 3908 | + char tmp_buf[128] = {'\0'}; |
|---|
| 3909 | + |
|---|
| 3910 | + if (!flags) |
|---|
| 3911 | + return; |
|---|
| 3912 | + |
|---|
| 3913 | +#define CHECK_APPEND_NOARG(a) \ |
|---|
| 3914 | + do { \ |
|---|
| 3915 | + ret = snprintf(bp, size_bp, (a)); \ |
|---|
| 3916 | + if (ret < 0 || ret >= size_bp) \ |
|---|
| 3917 | + goto out_overflow; \ |
|---|
| 3918 | + size_bp -= ret; \ |
|---|
| 3919 | + bp += ret; \ |
|---|
| 3920 | + } while (0) |
|---|
| 3921 | + |
|---|
| 3922 | +#define CHECK_APPEND_1ARG(a, v1) \ |
|---|
| 3923 | + do { \ |
|---|
| 3924 | + ret = snprintf(bp, size_bp, (a), (v1)); \ |
|---|
| 3925 | + if (ret < 0 || ret >= size_bp) \ |
|---|
| 3926 | + goto out_overflow; \ |
|---|
| 3927 | + size_bp -= ret; \ |
|---|
| 3928 | + bp += ret; \ |
|---|
| 3929 | + } while (0) |
|---|
| 3930 | + |
|---|
| 3931 | +#define CHECK_APPEND_2ARG(a, v1, v2) \ |
|---|
| 3932 | + do { \ |
|---|
| 3933 | + ret = snprintf(bp, size_bp, (a), (v1), (v2)); \ |
|---|
| 3934 | + if (ret < 0 || ret >= size_bp) \ |
|---|
| 3935 | + goto out_overflow; \ |
|---|
| 3936 | + size_bp -= ret; \ |
|---|
| 3937 | + bp += ret; \ |
|---|
| 3938 | + } while (0) |
|---|
| 3939 | + |
|---|
| 3940 | + if (flags & BTRFS_BALANCE_ARGS_CONVERT) |
|---|
| 3941 | + CHECK_APPEND_1ARG("convert=%s,", |
|---|
| 3942 | + btrfs_bg_type_to_raid_name(bargs->target)); |
|---|
| 3943 | + |
|---|
| 3944 | + if (flags & BTRFS_BALANCE_ARGS_SOFT) |
|---|
| 3945 | + CHECK_APPEND_NOARG("soft,"); |
|---|
| 3946 | + |
|---|
| 3947 | + if (flags & BTRFS_BALANCE_ARGS_PROFILES) { |
|---|
| 3948 | + btrfs_describe_block_groups(bargs->profiles, tmp_buf, |
|---|
| 3949 | + sizeof(tmp_buf)); |
|---|
| 3950 | + CHECK_APPEND_1ARG("profiles=%s,", tmp_buf); |
|---|
| 3951 | + } |
|---|
| 3952 | + |
|---|
| 3953 | + if (flags & BTRFS_BALANCE_ARGS_USAGE) |
|---|
| 3954 | + CHECK_APPEND_1ARG("usage=%llu,", bargs->usage); |
|---|
| 3955 | + |
|---|
| 3956 | + if (flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) |
|---|
| 3957 | + CHECK_APPEND_2ARG("usage=%u..%u,", |
|---|
| 3958 | + bargs->usage_min, bargs->usage_max); |
|---|
| 3959 | + |
|---|
| 3960 | + if (flags & BTRFS_BALANCE_ARGS_DEVID) |
|---|
| 3961 | + CHECK_APPEND_1ARG("devid=%llu,", bargs->devid); |
|---|
| 3962 | + |
|---|
| 3963 | + if (flags & BTRFS_BALANCE_ARGS_DRANGE) |
|---|
| 3964 | + CHECK_APPEND_2ARG("drange=%llu..%llu,", |
|---|
| 3965 | + bargs->pstart, bargs->pend); |
|---|
| 3966 | + |
|---|
| 3967 | + if (flags & BTRFS_BALANCE_ARGS_VRANGE) |
|---|
| 3968 | + CHECK_APPEND_2ARG("vrange=%llu..%llu,", |
|---|
| 3969 | + bargs->vstart, bargs->vend); |
|---|
| 3970 | + |
|---|
| 3971 | + if (flags & BTRFS_BALANCE_ARGS_LIMIT) |
|---|
| 3972 | + CHECK_APPEND_1ARG("limit=%llu,", bargs->limit); |
|---|
| 3973 | + |
|---|
| 3974 | + if (flags & BTRFS_BALANCE_ARGS_LIMIT_RANGE) |
|---|
| 3975 | + CHECK_APPEND_2ARG("limit=%u..%u,", |
|---|
| 3976 | + bargs->limit_min, bargs->limit_max); |
|---|
| 3977 | + |
|---|
| 3978 | + if (flags & BTRFS_BALANCE_ARGS_STRIPES_RANGE) |
|---|
| 3979 | + CHECK_APPEND_2ARG("stripes=%u..%u,", |
|---|
| 3980 | + bargs->stripes_min, bargs->stripes_max); |
|---|
| 3981 | + |
|---|
| 3982 | +#undef CHECK_APPEND_2ARG |
|---|
| 3983 | +#undef CHECK_APPEND_1ARG |
|---|
| 3984 | +#undef CHECK_APPEND_NOARG |
|---|
| 3985 | + |
|---|
| 3986 | +out_overflow: |
|---|
| 3987 | + |
|---|
| 3988 | + if (size_bp < size_buf) |
|---|
| 3989 | + buf[size_buf - size_bp - 1] = '\0'; /* remove last , */ |
|---|
| 3990 | + else |
|---|
| 3991 | + buf[0] = '\0'; |
|---|
| 3992 | +} |
|---|
| 3993 | + |
|---|
| 3994 | +static void describe_balance_start_or_resume(struct btrfs_fs_info *fs_info) |
|---|
| 3995 | +{ |
|---|
| 3996 | + u32 size_buf = 1024; |
|---|
| 3997 | + char tmp_buf[192] = {'\0'}; |
|---|
| 3998 | + char *buf; |
|---|
| 3999 | + char *bp; |
|---|
| 4000 | + u32 size_bp = size_buf; |
|---|
| 4001 | + int ret; |
|---|
| 4002 | + struct btrfs_balance_control *bctl = fs_info->balance_ctl; |
|---|
| 4003 | + |
|---|
| 4004 | + buf = kzalloc(size_buf, GFP_KERNEL); |
|---|
| 4005 | + if (!buf) |
|---|
| 4006 | + return; |
|---|
| 4007 | + |
|---|
| 4008 | + bp = buf; |
|---|
| 4009 | + |
|---|
| 4010 | +#define CHECK_APPEND_1ARG(a, v1) \ |
|---|
| 4011 | + do { \ |
|---|
| 4012 | + ret = snprintf(bp, size_bp, (a), (v1)); \ |
|---|
| 4013 | + if (ret < 0 || ret >= size_bp) \ |
|---|
| 4014 | + goto out_overflow; \ |
|---|
| 4015 | + size_bp -= ret; \ |
|---|
| 4016 | + bp += ret; \ |
|---|
| 4017 | + } while (0) |
|---|
| 4018 | + |
|---|
| 4019 | + if (bctl->flags & BTRFS_BALANCE_FORCE) |
|---|
| 4020 | + CHECK_APPEND_1ARG("%s", "-f "); |
|---|
| 4021 | + |
|---|
| 4022 | + if (bctl->flags & BTRFS_BALANCE_DATA) { |
|---|
| 4023 | + describe_balance_args(&bctl->data, tmp_buf, sizeof(tmp_buf)); |
|---|
| 4024 | + CHECK_APPEND_1ARG("-d%s ", tmp_buf); |
|---|
| 4025 | + } |
|---|
| 4026 | + |
|---|
| 4027 | + if (bctl->flags & BTRFS_BALANCE_METADATA) { |
|---|
| 4028 | + describe_balance_args(&bctl->meta, tmp_buf, sizeof(tmp_buf)); |
|---|
| 4029 | + CHECK_APPEND_1ARG("-m%s ", tmp_buf); |
|---|
| 4030 | + } |
|---|
| 4031 | + |
|---|
| 4032 | + if (bctl->flags & BTRFS_BALANCE_SYSTEM) { |
|---|
| 4033 | + describe_balance_args(&bctl->sys, tmp_buf, sizeof(tmp_buf)); |
|---|
| 4034 | + CHECK_APPEND_1ARG("-s%s ", tmp_buf); |
|---|
| 4035 | + } |
|---|
| 4036 | + |
|---|
| 4037 | +#undef CHECK_APPEND_1ARG |
|---|
| 4038 | + |
|---|
| 4039 | +out_overflow: |
|---|
| 4040 | + |
|---|
| 4041 | + if (size_bp < size_buf) |
|---|
| 4042 | + buf[size_buf - size_bp - 1] = '\0'; /* remove last " " */ |
|---|
| 4043 | + btrfs_info(fs_info, "balance: %s %s", |
|---|
| 4044 | + (bctl->flags & BTRFS_BALANCE_RESUME) ? |
|---|
| 4045 | + "resume" : "start", buf); |
|---|
| 4046 | + |
|---|
| 4047 | + kfree(buf); |
|---|
| 3733 | 4048 | } |
|---|
| 3734 | 4049 | |
|---|
| 3735 | 4050 | /* |
|---|
| .. | .. |
|---|
| 3745 | 4060 | int ret; |
|---|
| 3746 | 4061 | u64 num_devices; |
|---|
| 3747 | 4062 | unsigned seq; |
|---|
| 3748 | | - bool reducing_integrity; |
|---|
| 4063 | + bool reducing_redundancy; |
|---|
| 4064 | + int i; |
|---|
| 3749 | 4065 | |
|---|
| 3750 | 4066 | if (btrfs_fs_closing(fs_info) || |
|---|
| 3751 | 4067 | atomic_read(&fs_info->balance_pause_req) || |
|---|
| 3752 | | - atomic_read(&fs_info->balance_cancel_req)) { |
|---|
| 4068 | + btrfs_should_cancel_balance(fs_info)) { |
|---|
| 3753 | 4069 | ret = -EINVAL; |
|---|
| 3754 | 4070 | goto out; |
|---|
| 3755 | 4071 | } |
|---|
| .. | .. |
|---|
| 3774 | 4090 | } |
|---|
| 3775 | 4091 | } |
|---|
| 3776 | 4092 | |
|---|
| 3777 | | - num_devices = fs_info->fs_devices->num_devices; |
|---|
| 3778 | | - btrfs_dev_replace_read_lock(&fs_info->dev_replace); |
|---|
| 3779 | | - if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) { |
|---|
| 3780 | | - BUG_ON(num_devices < 1); |
|---|
| 3781 | | - num_devices--; |
|---|
| 3782 | | - } |
|---|
| 3783 | | - btrfs_dev_replace_read_unlock(&fs_info->dev_replace); |
|---|
| 3784 | | - allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE | BTRFS_BLOCK_GROUP_DUP; |
|---|
| 3785 | | - if (num_devices > 1) |
|---|
| 3786 | | - allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1); |
|---|
| 3787 | | - if (num_devices > 2) |
|---|
| 3788 | | - allowed |= BTRFS_BLOCK_GROUP_RAID5; |
|---|
| 3789 | | - if (num_devices > 3) |
|---|
| 3790 | | - allowed |= (BTRFS_BLOCK_GROUP_RAID10 | |
|---|
| 3791 | | - BTRFS_BLOCK_GROUP_RAID6); |
|---|
| 3792 | | - if (validate_convert_profile(&bctl->data, allowed)) { |
|---|
| 3793 | | - int index = btrfs_bg_flags_to_raid_index(bctl->data.target); |
|---|
| 4093 | + /* |
|---|
| 4094 | + * rw_devices will not change at the moment, device add/delete/replace |
|---|
| 4095 | + * are exclusive |
|---|
| 4096 | + */ |
|---|
| 4097 | + num_devices = fs_info->fs_devices->rw_devices; |
|---|
| 3794 | 4098 | |
|---|
| 3795 | | - btrfs_err(fs_info, |
|---|
| 3796 | | - "balance: invalid convert data profile %s", |
|---|
| 3797 | | - get_raid_name(index)); |
|---|
| 3798 | | - ret = -EINVAL; |
|---|
| 3799 | | - goto out; |
|---|
| 3800 | | - } |
|---|
| 3801 | | - if (validate_convert_profile(&bctl->meta, allowed)) { |
|---|
| 3802 | | - int index = btrfs_bg_flags_to_raid_index(bctl->meta.target); |
|---|
| 4099 | + /* |
|---|
| 4100 | + * SINGLE profile on-disk has no profile bit, but in-memory we have a |
|---|
| 4101 | + * special bit for it, to make it easier to distinguish. Thus we need |
|---|
| 4102 | + * to set it manually, or balance would refuse the profile. |
|---|
| 4103 | + */ |
|---|
| 4104 | + allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE; |
|---|
| 4105 | + for (i = 0; i < ARRAY_SIZE(btrfs_raid_array); i++) |
|---|
| 4106 | + if (num_devices >= btrfs_raid_array[i].devs_min) |
|---|
| 4107 | + allowed |= btrfs_raid_array[i].bg_flag; |
|---|
| 3803 | 4108 | |
|---|
| 3804 | | - btrfs_err(fs_info, |
|---|
| 3805 | | - "balance: invalid convert metadata profile %s", |
|---|
| 3806 | | - get_raid_name(index)); |
|---|
| 3807 | | - ret = -EINVAL; |
|---|
| 3808 | | - goto out; |
|---|
| 3809 | | - } |
|---|
| 3810 | | - if (validate_convert_profile(&bctl->sys, allowed)) { |
|---|
| 3811 | | - int index = btrfs_bg_flags_to_raid_index(bctl->sys.target); |
|---|
| 3812 | | - |
|---|
| 3813 | | - btrfs_err(fs_info, |
|---|
| 3814 | | - "balance: invalid convert system profile %s", |
|---|
| 3815 | | - get_raid_name(index)); |
|---|
| 4109 | + if (!validate_convert_profile(fs_info, &bctl->data, allowed, "data") || |
|---|
| 4110 | + !validate_convert_profile(fs_info, &bctl->meta, allowed, "metadata") || |
|---|
| 4111 | + !validate_convert_profile(fs_info, &bctl->sys, allowed, "system")) { |
|---|
| 3816 | 4112 | ret = -EINVAL; |
|---|
| 3817 | 4113 | goto out; |
|---|
| 3818 | 4114 | } |
|---|
| 3819 | 4115 | |
|---|
| 3820 | | - /* allow to reduce meta or sys integrity only if force set */ |
|---|
| 3821 | | - allowed = BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | |
|---|
| 3822 | | - BTRFS_BLOCK_GROUP_RAID10 | |
|---|
| 3823 | | - BTRFS_BLOCK_GROUP_RAID5 | |
|---|
| 3824 | | - BTRFS_BLOCK_GROUP_RAID6; |
|---|
| 4116 | + /* |
|---|
| 4117 | + * Allow to reduce metadata or system integrity only if force set for |
|---|
| 4118 | + * profiles with redundancy (copies, parity) |
|---|
| 4119 | + */ |
|---|
| 4120 | + allowed = 0; |
|---|
| 4121 | + for (i = 0; i < ARRAY_SIZE(btrfs_raid_array); i++) { |
|---|
| 4122 | + if (btrfs_raid_array[i].ncopies >= 2 || |
|---|
| 4123 | + btrfs_raid_array[i].tolerated_failures >= 1) |
|---|
| 4124 | + allowed |= btrfs_raid_array[i].bg_flag; |
|---|
| 4125 | + } |
|---|
| 3825 | 4126 | do { |
|---|
| 3826 | 4127 | seq = read_seqbegin(&fs_info->profiles_lock); |
|---|
| 3827 | 4128 | |
|---|
| .. | .. |
|---|
| 3831 | 4132 | ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) && |
|---|
| 3832 | 4133 | (fs_info->avail_metadata_alloc_bits & allowed) && |
|---|
| 3833 | 4134 | !(bctl->meta.target & allowed))) |
|---|
| 3834 | | - reducing_integrity = true; |
|---|
| 4135 | + reducing_redundancy = true; |
|---|
| 3835 | 4136 | else |
|---|
| 3836 | | - reducing_integrity = false; |
|---|
| 4137 | + reducing_redundancy = false; |
|---|
| 3837 | 4138 | |
|---|
| 3838 | 4139 | /* if we're not converting, the target field is uninitialized */ |
|---|
| 3839 | 4140 | meta_target = (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) ? |
|---|
| .. | .. |
|---|
| 3842 | 4143 | bctl->data.target : fs_info->avail_data_alloc_bits; |
|---|
| 3843 | 4144 | } while (read_seqretry(&fs_info->profiles_lock, seq)); |
|---|
| 3844 | 4145 | |
|---|
| 3845 | | - if (reducing_integrity) { |
|---|
| 4146 | + if (reducing_redundancy) { |
|---|
| 3846 | 4147 | if (bctl->flags & BTRFS_BALANCE_FORCE) { |
|---|
| 3847 | 4148 | btrfs_info(fs_info, |
|---|
| 3848 | | - "balance: force reducing metadata integrity"); |
|---|
| 4149 | + "balance: force reducing metadata redundancy"); |
|---|
| 3849 | 4150 | } else { |
|---|
| 3850 | 4151 | btrfs_err(fs_info, |
|---|
| 3851 | | - "balance: reduces metadata integrity, use --force if you want this"); |
|---|
| 4152 | + "balance: reduces metadata redundancy, use --force if you want this"); |
|---|
| 3852 | 4153 | ret = -EINVAL; |
|---|
| 3853 | 4154 | goto out; |
|---|
| 3854 | 4155 | } |
|---|
| .. | .. |
|---|
| 3856 | 4157 | |
|---|
| 3857 | 4158 | if (btrfs_get_num_tolerated_disk_barrier_failures(meta_target) < |
|---|
| 3858 | 4159 | btrfs_get_num_tolerated_disk_barrier_failures(data_target)) { |
|---|
| 3859 | | - int meta_index = btrfs_bg_flags_to_raid_index(meta_target); |
|---|
| 3860 | | - int data_index = btrfs_bg_flags_to_raid_index(data_target); |
|---|
| 3861 | | - |
|---|
| 3862 | 4160 | btrfs_warn(fs_info, |
|---|
| 3863 | 4161 | "balance: metadata profile %s has lower redundancy than data profile %s", |
|---|
| 3864 | | - get_raid_name(meta_index), get_raid_name(data_index)); |
|---|
| 4162 | + btrfs_bg_type_to_raid_name(meta_target), |
|---|
| 4163 | + btrfs_bg_type_to_raid_name(data_target)); |
|---|
| 4164 | + } |
|---|
| 4165 | + |
|---|
| 4166 | + if (fs_info->send_in_progress) { |
|---|
| 4167 | + btrfs_warn_rl(fs_info, |
|---|
| 4168 | +"cannot run balance while send operations are in progress (%d in progress)", |
|---|
| 4169 | + fs_info->send_in_progress); |
|---|
| 4170 | + ret = -EAGAIN; |
|---|
| 4171 | + goto out; |
|---|
| 3865 | 4172 | } |
|---|
| 3866 | 4173 | |
|---|
| 3867 | 4174 | ret = insert_balance_item(fs_info, bctl); |
|---|
| .. | .. |
|---|
| 3883 | 4190 | |
|---|
| 3884 | 4191 | ASSERT(!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)); |
|---|
| 3885 | 4192 | set_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags); |
|---|
| 4193 | + describe_balance_start_or_resume(fs_info); |
|---|
| 3886 | 4194 | mutex_unlock(&fs_info->balance_mutex); |
|---|
| 3887 | 4195 | |
|---|
| 3888 | 4196 | ret = __btrfs_balance(fs_info); |
|---|
| 3889 | 4197 | |
|---|
| 3890 | 4198 | mutex_lock(&fs_info->balance_mutex); |
|---|
| 4199 | + if (ret == -ECANCELED && atomic_read(&fs_info->balance_pause_req)) |
|---|
| 4200 | + btrfs_info(fs_info, "balance: paused"); |
|---|
| 4201 | + /* |
|---|
| 4202 | + * Balance can be canceled by: |
|---|
| 4203 | + * |
|---|
| 4204 | + * - Regular cancel request |
|---|
| 4205 | + * Then ret == -ECANCELED and balance_cancel_req > 0 |
|---|
| 4206 | + * |
|---|
| 4207 | + * - Fatal signal to "btrfs" process |
|---|
| 4208 | + * Either the signal caught by wait_reserve_ticket() and callers |
|---|
| 4209 | + * got -EINTR, or caught by btrfs_should_cancel_balance() and |
|---|
| 4210 | + * got -ECANCELED. |
|---|
| 4211 | + * Either way, in this case balance_cancel_req = 0, and |
|---|
| 4212 | + * ret == -EINTR or ret == -ECANCELED. |
|---|
| 4213 | + * |
|---|
| 4214 | + * So here we only check the return value to catch canceled balance. |
|---|
| 4215 | + */ |
|---|
| 4216 | + else if (ret == -ECANCELED || ret == -EINTR) |
|---|
| 4217 | + btrfs_info(fs_info, "balance: canceled"); |
|---|
| 4218 | + else |
|---|
| 4219 | + btrfs_info(fs_info, "balance: ended with status: %d", ret); |
|---|
| 4220 | + |
|---|
| 3891 | 4221 | clear_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags); |
|---|
| 3892 | 4222 | |
|---|
| 3893 | 4223 | if (bargs) { |
|---|
| .. | .. |
|---|
| 3898 | 4228 | if ((ret && ret != -ECANCELED && ret != -ENOSPC) || |
|---|
| 3899 | 4229 | balance_need_close(fs_info)) { |
|---|
| 3900 | 4230 | reset_balance_state(fs_info); |
|---|
| 3901 | | - clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags); |
|---|
| 4231 | + btrfs_exclop_finish(fs_info); |
|---|
| 3902 | 4232 | } |
|---|
| 3903 | 4233 | |
|---|
| 3904 | 4234 | wake_up(&fs_info->balance_wait_q); |
|---|
| .. | .. |
|---|
| 3909 | 4239 | reset_balance_state(fs_info); |
|---|
| 3910 | 4240 | else |
|---|
| 3911 | 4241 | kfree(bctl); |
|---|
| 3912 | | - clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags); |
|---|
| 4242 | + btrfs_exclop_finish(fs_info); |
|---|
| 3913 | 4243 | |
|---|
| 3914 | 4244 | return ret; |
|---|
| 3915 | 4245 | } |
|---|
| .. | .. |
|---|
| 3919 | 4249 | struct btrfs_fs_info *fs_info = data; |
|---|
| 3920 | 4250 | int ret = 0; |
|---|
| 3921 | 4251 | |
|---|
| 4252 | + sb_start_write(fs_info->sb); |
|---|
| 3922 | 4253 | mutex_lock(&fs_info->balance_mutex); |
|---|
| 3923 | | - if (fs_info->balance_ctl) { |
|---|
| 3924 | | - btrfs_info(fs_info, "balance: resuming"); |
|---|
| 4254 | + if (fs_info->balance_ctl) |
|---|
| 3925 | 4255 | ret = btrfs_balance(fs_info, fs_info->balance_ctl, NULL); |
|---|
| 3926 | | - } |
|---|
| 3927 | 4256 | mutex_unlock(&fs_info->balance_mutex); |
|---|
| 4257 | + sb_end_write(fs_info->sb); |
|---|
| 3928 | 4258 | |
|---|
| 3929 | 4259 | return ret; |
|---|
| 3930 | 4260 | } |
|---|
| .. | .. |
|---|
| 4013 | 4343 | * is in a paused state and must have fs_info::balance_ctl properly |
|---|
| 4014 | 4344 | * set up. |
|---|
| 4015 | 4345 | */ |
|---|
| 4016 | | - if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) |
|---|
| 4346 | + if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) |
|---|
| 4017 | 4347 | btrfs_warn(fs_info, |
|---|
| 4018 | 4348 | "balance: cannot set exclusive op status, resume manually"); |
|---|
| 4019 | 4349 | |
|---|
| .. | .. |
|---|
| 4097 | 4427 | |
|---|
| 4098 | 4428 | if (fs_info->balance_ctl) { |
|---|
| 4099 | 4429 | reset_balance_state(fs_info); |
|---|
| 4100 | | - clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags); |
|---|
| 4430 | + btrfs_exclop_finish(fs_info); |
|---|
| 4101 | 4431 | btrfs_info(fs_info, "balance: canceled"); |
|---|
| 4102 | 4432 | } |
|---|
| 4103 | 4433 | } |
|---|
| .. | .. |
|---|
| 4109 | 4439 | return 0; |
|---|
| 4110 | 4440 | } |
|---|
| 4111 | 4441 | |
|---|
| 4112 | | -static int btrfs_uuid_scan_kthread(void *data) |
|---|
| 4442 | +int btrfs_uuid_scan_kthread(void *data) |
|---|
| 4113 | 4443 | { |
|---|
| 4114 | 4444 | struct btrfs_fs_info *fs_info = data; |
|---|
| 4115 | 4445 | struct btrfs_root *root = fs_info->tree_root; |
|---|
| .. | .. |
|---|
| 4121 | 4451 | struct btrfs_root_item root_item; |
|---|
| 4122 | 4452 | u32 item_size; |
|---|
| 4123 | 4453 | struct btrfs_trans_handle *trans = NULL; |
|---|
| 4454 | + bool closing = false; |
|---|
| 4124 | 4455 | |
|---|
| 4125 | 4456 | path = btrfs_alloc_path(); |
|---|
| 4126 | 4457 | if (!path) { |
|---|
| .. | .. |
|---|
| 4133 | 4464 | key.offset = 0; |
|---|
| 4134 | 4465 | |
|---|
| 4135 | 4466 | while (1) { |
|---|
| 4467 | + if (btrfs_fs_closing(fs_info)) { |
|---|
| 4468 | + closing = true; |
|---|
| 4469 | + break; |
|---|
| 4470 | + } |
|---|
| 4136 | 4471 | ret = btrfs_search_forward(root, &key, path, |
|---|
| 4137 | 4472 | BTRFS_OLDEST_GENERATION); |
|---|
| 4138 | 4473 | if (ret) { |
|---|
| .. | .. |
|---|
| 4233 | 4568 | btrfs_end_transaction(trans); |
|---|
| 4234 | 4569 | if (ret) |
|---|
| 4235 | 4570 | btrfs_warn(fs_info, "btrfs_uuid_scan_kthread failed %d", ret); |
|---|
| 4236 | | - else |
|---|
| 4571 | + else if (!closing) |
|---|
| 4237 | 4572 | set_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags); |
|---|
| 4238 | 4573 | up(&fs_info->uuid_tree_rescan_sem); |
|---|
| 4239 | 4574 | return 0; |
|---|
| 4240 | | -} |
|---|
| 4241 | | - |
|---|
| 4242 | | -/* |
|---|
| 4243 | | - * Callback for btrfs_uuid_tree_iterate(). |
|---|
| 4244 | | - * returns: |
|---|
| 4245 | | - * 0 check succeeded, the entry is not outdated. |
|---|
| 4246 | | - * < 0 if an error occurred. |
|---|
| 4247 | | - * > 0 if the check failed, which means the caller shall remove the entry. |
|---|
| 4248 | | - */ |
|---|
| 4249 | | -static int btrfs_check_uuid_tree_entry(struct btrfs_fs_info *fs_info, |
|---|
| 4250 | | - u8 *uuid, u8 type, u64 subid) |
|---|
| 4251 | | -{ |
|---|
| 4252 | | - struct btrfs_key key; |
|---|
| 4253 | | - int ret = 0; |
|---|
| 4254 | | - struct btrfs_root *subvol_root; |
|---|
| 4255 | | - |
|---|
| 4256 | | - if (type != BTRFS_UUID_KEY_SUBVOL && |
|---|
| 4257 | | - type != BTRFS_UUID_KEY_RECEIVED_SUBVOL) |
|---|
| 4258 | | - goto out; |
|---|
| 4259 | | - |
|---|
| 4260 | | - key.objectid = subid; |
|---|
| 4261 | | - key.type = BTRFS_ROOT_ITEM_KEY; |
|---|
| 4262 | | - key.offset = (u64)-1; |
|---|
| 4263 | | - subvol_root = btrfs_read_fs_root_no_name(fs_info, &key); |
|---|
| 4264 | | - if (IS_ERR(subvol_root)) { |
|---|
| 4265 | | - ret = PTR_ERR(subvol_root); |
|---|
| 4266 | | - if (ret == -ENOENT) |
|---|
| 4267 | | - ret = 1; |
|---|
| 4268 | | - goto out; |
|---|
| 4269 | | - } |
|---|
| 4270 | | - |
|---|
| 4271 | | - switch (type) { |
|---|
| 4272 | | - case BTRFS_UUID_KEY_SUBVOL: |
|---|
| 4273 | | - if (memcmp(uuid, subvol_root->root_item.uuid, BTRFS_UUID_SIZE)) |
|---|
| 4274 | | - ret = 1; |
|---|
| 4275 | | - break; |
|---|
| 4276 | | - case BTRFS_UUID_KEY_RECEIVED_SUBVOL: |
|---|
| 4277 | | - if (memcmp(uuid, subvol_root->root_item.received_uuid, |
|---|
| 4278 | | - BTRFS_UUID_SIZE)) |
|---|
| 4279 | | - ret = 1; |
|---|
| 4280 | | - break; |
|---|
| 4281 | | - } |
|---|
| 4282 | | - |
|---|
| 4283 | | -out: |
|---|
| 4284 | | - return ret; |
|---|
| 4285 | | -} |
|---|
| 4286 | | - |
|---|
| 4287 | | -static int btrfs_uuid_rescan_kthread(void *data) |
|---|
| 4288 | | -{ |
|---|
| 4289 | | - struct btrfs_fs_info *fs_info = (struct btrfs_fs_info *)data; |
|---|
| 4290 | | - int ret; |
|---|
| 4291 | | - |
|---|
| 4292 | | - /* |
|---|
| 4293 | | - * 1st step is to iterate through the existing UUID tree and |
|---|
| 4294 | | - * to delete all entries that contain outdated data. |
|---|
| 4295 | | - * 2nd step is to add all missing entries to the UUID tree. |
|---|
| 4296 | | - */ |
|---|
| 4297 | | - ret = btrfs_uuid_tree_iterate(fs_info, btrfs_check_uuid_tree_entry); |
|---|
| 4298 | | - if (ret < 0) { |
|---|
| 4299 | | - btrfs_warn(fs_info, "iterating uuid_tree failed %d", ret); |
|---|
| 4300 | | - up(&fs_info->uuid_tree_rescan_sem); |
|---|
| 4301 | | - return ret; |
|---|
| 4302 | | - } |
|---|
| 4303 | | - return btrfs_uuid_scan_kthread(data); |
|---|
| 4304 | 4575 | } |
|---|
| 4305 | 4576 | |
|---|
| 4306 | 4577 | int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info) |
|---|
| .. | .. |
|---|
| 4319 | 4590 | if (IS_ERR(trans)) |
|---|
| 4320 | 4591 | return PTR_ERR(trans); |
|---|
| 4321 | 4592 | |
|---|
| 4322 | | - uuid_root = btrfs_create_tree(trans, fs_info, |
|---|
| 4323 | | - BTRFS_UUID_TREE_OBJECTID); |
|---|
| 4593 | + uuid_root = btrfs_create_tree(trans, BTRFS_UUID_TREE_OBJECTID); |
|---|
| 4324 | 4594 | if (IS_ERR(uuid_root)) { |
|---|
| 4325 | 4595 | ret = PTR_ERR(uuid_root); |
|---|
| 4326 | 4596 | btrfs_abort_transaction(trans, ret); |
|---|
| .. | .. |
|---|
| 4346 | 4616 | return 0; |
|---|
| 4347 | 4617 | } |
|---|
| 4348 | 4618 | |
|---|
| 4349 | | -int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info) |
|---|
| 4350 | | -{ |
|---|
| 4351 | | - struct task_struct *task; |
|---|
| 4352 | | - |
|---|
| 4353 | | - down(&fs_info->uuid_tree_rescan_sem); |
|---|
| 4354 | | - task = kthread_run(btrfs_uuid_rescan_kthread, fs_info, "btrfs-uuid"); |
|---|
| 4355 | | - if (IS_ERR(task)) { |
|---|
| 4356 | | - /* fs_info->update_uuid_tree_gen remains 0 in all error case */ |
|---|
| 4357 | | - btrfs_warn(fs_info, "failed to start uuid_rescan task"); |
|---|
| 4358 | | - up(&fs_info->uuid_tree_rescan_sem); |
|---|
| 4359 | | - return PTR_ERR(task); |
|---|
| 4360 | | - } |
|---|
| 4361 | | - |
|---|
| 4362 | | - return 0; |
|---|
| 4363 | | -} |
|---|
| 4364 | | - |
|---|
| 4365 | 4619 | /* |
|---|
| 4366 | 4620 | * shrinking a device means finding all of the device extents past |
|---|
| 4367 | 4621 | * the new size, and then following the back refs to the chunks. |
|---|
| .. | .. |
|---|
| 4380 | 4634 | int slot; |
|---|
| 4381 | 4635 | int failed = 0; |
|---|
| 4382 | 4636 | bool retried = false; |
|---|
| 4383 | | - bool checked_pending_chunks = false; |
|---|
| 4384 | 4637 | struct extent_buffer *l; |
|---|
| 4385 | 4638 | struct btrfs_key key; |
|---|
| 4386 | 4639 | struct btrfs_super_block *super_copy = fs_info->super_copy; |
|---|
| 4387 | 4640 | u64 old_total = btrfs_super_total_bytes(super_copy); |
|---|
| 4388 | 4641 | u64 old_size = btrfs_device_get_total_bytes(device); |
|---|
| 4389 | 4642 | u64 diff; |
|---|
| 4643 | + u64 start; |
|---|
| 4390 | 4644 | |
|---|
| 4391 | 4645 | new_size = round_down(new_size, fs_info->sectorsize); |
|---|
| 4646 | + start = new_size; |
|---|
| 4392 | 4647 | diff = round_down(old_size - new_size, fs_info->sectorsize); |
|---|
| 4393 | 4648 | |
|---|
| 4394 | 4649 | if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) |
|---|
| .. | .. |
|---|
| 4400 | 4655 | |
|---|
| 4401 | 4656 | path->reada = READA_BACK; |
|---|
| 4402 | 4657 | |
|---|
| 4658 | + trans = btrfs_start_transaction(root, 0); |
|---|
| 4659 | + if (IS_ERR(trans)) { |
|---|
| 4660 | + btrfs_free_path(path); |
|---|
| 4661 | + return PTR_ERR(trans); |
|---|
| 4662 | + } |
|---|
| 4663 | + |
|---|
| 4403 | 4664 | mutex_lock(&fs_info->chunk_mutex); |
|---|
| 4404 | 4665 | |
|---|
| 4405 | 4666 | btrfs_device_set_total_bytes(device, new_size); |
|---|
| .. | .. |
|---|
| 4407 | 4668 | device->fs_devices->total_rw_bytes -= diff; |
|---|
| 4408 | 4669 | atomic64_sub(diff, &fs_info->free_chunk_space); |
|---|
| 4409 | 4670 | } |
|---|
| 4410 | | - mutex_unlock(&fs_info->chunk_mutex); |
|---|
| 4671 | + |
|---|
| 4672 | + /* |
|---|
| 4673 | + * Once the device's size has been set to the new size, ensure all |
|---|
| 4674 | + * in-memory chunks are synced to disk so that the loop below sees them |
|---|
| 4675 | + * and relocates them accordingly. |
|---|
| 4676 | + */ |
|---|
| 4677 | + if (contains_pending_extent(device, &start, diff)) { |
|---|
| 4678 | + mutex_unlock(&fs_info->chunk_mutex); |
|---|
| 4679 | + ret = btrfs_commit_transaction(trans); |
|---|
| 4680 | + if (ret) |
|---|
| 4681 | + goto done; |
|---|
| 4682 | + } else { |
|---|
| 4683 | + mutex_unlock(&fs_info->chunk_mutex); |
|---|
| 4684 | + btrfs_end_transaction(trans); |
|---|
| 4685 | + } |
|---|
| 4411 | 4686 | |
|---|
| 4412 | 4687 | again: |
|---|
| 4413 | 4688 | key.objectid = device->devid; |
|---|
| .. | .. |
|---|
| 4469 | 4744 | |
|---|
| 4470 | 4745 | ret = btrfs_relocate_chunk(fs_info, chunk_offset); |
|---|
| 4471 | 4746 | mutex_unlock(&fs_info->delete_unused_bgs_mutex); |
|---|
| 4472 | | - if (ret && ret != -ENOSPC) |
|---|
| 4473 | | - goto done; |
|---|
| 4474 | | - if (ret == -ENOSPC) |
|---|
| 4747 | + if (ret == -ENOSPC) { |
|---|
| 4475 | 4748 | failed++; |
|---|
| 4749 | + } else if (ret) { |
|---|
| 4750 | + if (ret == -ETXTBSY) { |
|---|
| 4751 | + btrfs_warn(fs_info, |
|---|
| 4752 | + "could not shrink block group %llu due to active swapfile", |
|---|
| 4753 | + chunk_offset); |
|---|
| 4754 | + } |
|---|
| 4755 | + goto done; |
|---|
| 4756 | + } |
|---|
| 4476 | 4757 | } while (key.offset-- > 0); |
|---|
| 4477 | 4758 | |
|---|
| 4478 | 4759 | if (failed && !retried) { |
|---|
| .. | .. |
|---|
| 4492 | 4773 | } |
|---|
| 4493 | 4774 | |
|---|
| 4494 | 4775 | mutex_lock(&fs_info->chunk_mutex); |
|---|
| 4495 | | - |
|---|
| 4496 | | - /* |
|---|
| 4497 | | - * We checked in the above loop all device extents that were already in |
|---|
| 4498 | | - * the device tree. However before we have updated the device's |
|---|
| 4499 | | - * total_bytes to the new size, we might have had chunk allocations that |
|---|
| 4500 | | - * have not complete yet (new block groups attached to transaction |
|---|
| 4501 | | - * handles), and therefore their device extents were not yet in the |
|---|
| 4502 | | - * device tree and we missed them in the loop above. So if we have any |
|---|
| 4503 | | - * pending chunk using a device extent that overlaps the device range |
|---|
| 4504 | | - * that we can not use anymore, commit the current transaction and |
|---|
| 4505 | | - * repeat the search on the device tree - this way we guarantee we will |
|---|
| 4506 | | - * not have chunks using device extents that end beyond 'new_size'. |
|---|
| 4507 | | - */ |
|---|
| 4508 | | - if (!checked_pending_chunks) { |
|---|
| 4509 | | - u64 start = new_size; |
|---|
| 4510 | | - u64 len = old_size - new_size; |
|---|
| 4511 | | - |
|---|
| 4512 | | - if (contains_pending_extent(trans->transaction, device, |
|---|
| 4513 | | - &start, len)) { |
|---|
| 4514 | | - mutex_unlock(&fs_info->chunk_mutex); |
|---|
| 4515 | | - checked_pending_chunks = true; |
|---|
| 4516 | | - failed = 0; |
|---|
| 4517 | | - retried = false; |
|---|
| 4518 | | - ret = btrfs_commit_transaction(trans); |
|---|
| 4519 | | - if (ret) |
|---|
| 4520 | | - goto done; |
|---|
| 4521 | | - goto again; |
|---|
| 4522 | | - } |
|---|
| 4523 | | - } |
|---|
| 4776 | + /* Clear all state bits beyond the shrunk device size */ |
|---|
| 4777 | + clear_extent_bits(&device->alloc_state, new_size, (u64)-1, |
|---|
| 4778 | + CHUNK_STATE_MASK); |
|---|
| 4524 | 4779 | |
|---|
| 4525 | 4780 | btrfs_device_set_disk_total_bytes(device, new_size); |
|---|
| 4526 | | - if (list_empty(&device->resized_list)) |
|---|
| 4527 | | - list_add_tail(&device->resized_list, |
|---|
| 4528 | | - &fs_info->fs_devices->resized_devices); |
|---|
| 4781 | + if (list_empty(&device->post_commit_list)) |
|---|
| 4782 | + list_add_tail(&device->post_commit_list, |
|---|
| 4783 | + &trans->transaction->dev_update_list); |
|---|
| 4529 | 4784 | |
|---|
| 4530 | 4785 | WARN_ON(diff > old_total); |
|---|
| 4531 | 4786 | btrfs_set_super_total_bytes(super_copy, |
|---|
| .. | .. |
|---|
| 4609 | 4864 | btrfs_set_fs_incompat(info, RAID56); |
|---|
| 4610 | 4865 | } |
|---|
| 4611 | 4866 | |
|---|
| 4612 | | -static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, |
|---|
| 4613 | | - u64 start, u64 type) |
|---|
| 4867 | +static void check_raid1c34_incompat_flag(struct btrfs_fs_info *info, u64 type) |
|---|
| 4614 | 4868 | { |
|---|
| 4615 | | - struct btrfs_fs_info *info = trans->fs_info; |
|---|
| 4616 | | - struct btrfs_fs_devices *fs_devices = info->fs_devices; |
|---|
| 4617 | | - struct btrfs_device *device; |
|---|
| 4618 | | - struct map_lookup *map = NULL; |
|---|
| 4619 | | - struct extent_map_tree *em_tree; |
|---|
| 4620 | | - struct extent_map *em; |
|---|
| 4621 | | - struct btrfs_device_info *devices_info = NULL; |
|---|
| 4622 | | - u64 total_avail; |
|---|
| 4623 | | - int num_stripes; /* total number of stripes to allocate */ |
|---|
| 4624 | | - int data_stripes; /* number of stripes that count for |
|---|
| 4625 | | - block group size */ |
|---|
| 4626 | | - int sub_stripes; /* sub_stripes info for map */ |
|---|
| 4627 | | - int dev_stripes; /* stripes per dev */ |
|---|
| 4628 | | - int devs_max; /* max devs to use */ |
|---|
| 4629 | | - int devs_min; /* min devs needed */ |
|---|
| 4630 | | - int devs_increment; /* ndevs has to be a multiple of this */ |
|---|
| 4631 | | - int ncopies; /* how many copies to data has */ |
|---|
| 4632 | | - int ret; |
|---|
| 4869 | + if (!(type & (BTRFS_BLOCK_GROUP_RAID1C3 | BTRFS_BLOCK_GROUP_RAID1C4))) |
|---|
| 4870 | + return; |
|---|
| 4871 | + |
|---|
| 4872 | + btrfs_set_fs_incompat(info, RAID1C34); |
|---|
| 4873 | +} |
|---|
| 4874 | + |
|---|
| 4875 | +/* |
|---|
| 4876 | + * Structure used internally for __btrfs_alloc_chunk() function. |
|---|
| 4877 | + * Wraps needed parameters. |
|---|
| 4878 | + */ |
|---|
| 4879 | +struct alloc_chunk_ctl { |
|---|
| 4880 | + u64 start; |
|---|
| 4881 | + u64 type; |
|---|
| 4882 | + /* Total number of stripes to allocate */ |
|---|
| 4883 | + int num_stripes; |
|---|
| 4884 | + /* sub_stripes info for map */ |
|---|
| 4885 | + int sub_stripes; |
|---|
| 4886 | + /* Stripes per device */ |
|---|
| 4887 | + int dev_stripes; |
|---|
| 4888 | + /* Maximum number of devices to use */ |
|---|
| 4889 | + int devs_max; |
|---|
| 4890 | + /* Minimum number of devices to use */ |
|---|
| 4891 | + int devs_min; |
|---|
| 4892 | + /* ndevs has to be a multiple of this */ |
|---|
| 4893 | + int devs_increment; |
|---|
| 4894 | + /* Number of copies */ |
|---|
| 4895 | + int ncopies; |
|---|
| 4896 | + /* Number of stripes worth of bytes to store parity information */ |
|---|
| 4897 | + int nparity; |
|---|
| 4633 | 4898 | u64 max_stripe_size; |
|---|
| 4634 | 4899 | u64 max_chunk_size; |
|---|
| 4900 | + u64 dev_extent_min; |
|---|
| 4635 | 4901 | u64 stripe_size; |
|---|
| 4636 | | - u64 num_bytes; |
|---|
| 4902 | + u64 chunk_size; |
|---|
| 4637 | 4903 | int ndevs; |
|---|
| 4638 | | - int i; |
|---|
| 4639 | | - int j; |
|---|
| 4640 | | - int index; |
|---|
| 4904 | +}; |
|---|
| 4641 | 4905 | |
|---|
| 4642 | | - BUG_ON(!alloc_profile_is_valid(type, 0)); |
|---|
| 4643 | | - |
|---|
| 4644 | | - if (list_empty(&fs_devices->alloc_list)) { |
|---|
| 4645 | | - if (btrfs_test_opt(info, ENOSPC_DEBUG)) |
|---|
| 4646 | | - btrfs_debug(info, "%s: no writable device", __func__); |
|---|
| 4647 | | - return -ENOSPC; |
|---|
| 4648 | | - } |
|---|
| 4649 | | - |
|---|
| 4650 | | - index = btrfs_bg_flags_to_raid_index(type); |
|---|
| 4651 | | - |
|---|
| 4652 | | - sub_stripes = btrfs_raid_array[index].sub_stripes; |
|---|
| 4653 | | - dev_stripes = btrfs_raid_array[index].dev_stripes; |
|---|
| 4654 | | - devs_max = btrfs_raid_array[index].devs_max; |
|---|
| 4655 | | - devs_min = btrfs_raid_array[index].devs_min; |
|---|
| 4656 | | - devs_increment = btrfs_raid_array[index].devs_increment; |
|---|
| 4657 | | - ncopies = btrfs_raid_array[index].ncopies; |
|---|
| 4906 | +static void init_alloc_chunk_ctl_policy_regular( |
|---|
| 4907 | + struct btrfs_fs_devices *fs_devices, |
|---|
| 4908 | + struct alloc_chunk_ctl *ctl) |
|---|
| 4909 | +{ |
|---|
| 4910 | + u64 type = ctl->type; |
|---|
| 4658 | 4911 | |
|---|
| 4659 | 4912 | if (type & BTRFS_BLOCK_GROUP_DATA) { |
|---|
| 4660 | | - max_stripe_size = SZ_1G; |
|---|
| 4661 | | - max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE; |
|---|
| 4662 | | - if (!devs_max) |
|---|
| 4663 | | - devs_max = BTRFS_MAX_DEVS(info); |
|---|
| 4913 | + ctl->max_stripe_size = SZ_1G; |
|---|
| 4914 | + ctl->max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE; |
|---|
| 4664 | 4915 | } else if (type & BTRFS_BLOCK_GROUP_METADATA) { |
|---|
| 4665 | | - /* for larger filesystems, use larger metadata chunks */ |
|---|
| 4916 | + /* For larger filesystems, use larger metadata chunks */ |
|---|
| 4666 | 4917 | if (fs_devices->total_rw_bytes > 50ULL * SZ_1G) |
|---|
| 4667 | | - max_stripe_size = SZ_1G; |
|---|
| 4918 | + ctl->max_stripe_size = SZ_1G; |
|---|
| 4668 | 4919 | else |
|---|
| 4669 | | - max_stripe_size = SZ_256M; |
|---|
| 4670 | | - max_chunk_size = max_stripe_size; |
|---|
| 4671 | | - if (!devs_max) |
|---|
| 4672 | | - devs_max = BTRFS_MAX_DEVS(info); |
|---|
| 4920 | + ctl->max_stripe_size = SZ_256M; |
|---|
| 4921 | + ctl->max_chunk_size = ctl->max_stripe_size; |
|---|
| 4673 | 4922 | } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { |
|---|
| 4674 | | - max_stripe_size = SZ_32M; |
|---|
| 4675 | | - max_chunk_size = 2 * max_stripe_size; |
|---|
| 4676 | | - if (!devs_max) |
|---|
| 4677 | | - devs_max = BTRFS_MAX_DEVS_SYS_CHUNK; |
|---|
| 4923 | + ctl->max_stripe_size = SZ_32M; |
|---|
| 4924 | + ctl->max_chunk_size = 2 * ctl->max_stripe_size; |
|---|
| 4925 | + ctl->devs_max = min_t(int, ctl->devs_max, |
|---|
| 4926 | + BTRFS_MAX_DEVS_SYS_CHUNK); |
|---|
| 4678 | 4927 | } else { |
|---|
| 4679 | | - btrfs_err(info, "invalid chunk type 0x%llx requested", |
|---|
| 4680 | | - type); |
|---|
| 4681 | | - BUG_ON(1); |
|---|
| 4928 | + BUG(); |
|---|
| 4682 | 4929 | } |
|---|
| 4683 | 4930 | |
|---|
| 4684 | | - /* we don't want a chunk larger than 10% of writeable space */ |
|---|
| 4685 | | - max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), |
|---|
| 4686 | | - max_chunk_size); |
|---|
| 4931 | + /* We don't want a chunk larger than 10% of writable space */ |
|---|
| 4932 | + ctl->max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), |
|---|
| 4933 | + ctl->max_chunk_size); |
|---|
| 4934 | + ctl->dev_extent_min = BTRFS_STRIPE_LEN * ctl->dev_stripes; |
|---|
| 4935 | +} |
|---|
| 4687 | 4936 | |
|---|
| 4688 | | - devices_info = kcalloc(fs_devices->rw_devices, sizeof(*devices_info), |
|---|
| 4689 | | - GFP_NOFS); |
|---|
| 4690 | | - if (!devices_info) |
|---|
| 4691 | | - return -ENOMEM; |
|---|
| 4937 | +static void init_alloc_chunk_ctl(struct btrfs_fs_devices *fs_devices, |
|---|
| 4938 | + struct alloc_chunk_ctl *ctl) |
|---|
| 4939 | +{ |
|---|
| 4940 | + int index = btrfs_bg_flags_to_raid_index(ctl->type); |
|---|
| 4941 | + |
|---|
| 4942 | + ctl->sub_stripes = btrfs_raid_array[index].sub_stripes; |
|---|
| 4943 | + ctl->dev_stripes = btrfs_raid_array[index].dev_stripes; |
|---|
| 4944 | + ctl->devs_max = btrfs_raid_array[index].devs_max; |
|---|
| 4945 | + if (!ctl->devs_max) |
|---|
| 4946 | + ctl->devs_max = BTRFS_MAX_DEVS(fs_devices->fs_info); |
|---|
| 4947 | + ctl->devs_min = btrfs_raid_array[index].devs_min; |
|---|
| 4948 | + ctl->devs_increment = btrfs_raid_array[index].devs_increment; |
|---|
| 4949 | + ctl->ncopies = btrfs_raid_array[index].ncopies; |
|---|
| 4950 | + ctl->nparity = btrfs_raid_array[index].nparity; |
|---|
| 4951 | + ctl->ndevs = 0; |
|---|
| 4952 | + |
|---|
| 4953 | + switch (fs_devices->chunk_alloc_policy) { |
|---|
| 4954 | + case BTRFS_CHUNK_ALLOC_REGULAR: |
|---|
| 4955 | + init_alloc_chunk_ctl_policy_regular(fs_devices, ctl); |
|---|
| 4956 | + break; |
|---|
| 4957 | + default: |
|---|
| 4958 | + BUG(); |
|---|
| 4959 | + } |
|---|
| 4960 | +} |
|---|
| 4961 | + |
|---|
| 4962 | +static int gather_device_info(struct btrfs_fs_devices *fs_devices, |
|---|
| 4963 | + struct alloc_chunk_ctl *ctl, |
|---|
| 4964 | + struct btrfs_device_info *devices_info) |
|---|
| 4965 | +{ |
|---|
| 4966 | + struct btrfs_fs_info *info = fs_devices->fs_info; |
|---|
| 4967 | + struct btrfs_device *device; |
|---|
| 4968 | + u64 total_avail; |
|---|
| 4969 | + u64 dev_extent_want = ctl->max_stripe_size * ctl->dev_stripes; |
|---|
| 4970 | + int ret; |
|---|
| 4971 | + int ndevs = 0; |
|---|
| 4972 | + u64 max_avail; |
|---|
| 4973 | + u64 dev_offset; |
|---|
| 4692 | 4974 | |
|---|
| 4693 | 4975 | /* |
|---|
| 4694 | 4976 | * in the first pass through the devices list, we gather information |
|---|
| 4695 | 4977 | * about the available holes on each device. |
|---|
| 4696 | 4978 | */ |
|---|
| 4697 | | - ndevs = 0; |
|---|
| 4698 | 4979 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { |
|---|
| 4699 | | - u64 max_avail; |
|---|
| 4700 | | - u64 dev_offset; |
|---|
| 4701 | | - |
|---|
| 4702 | 4980 | if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { |
|---|
| 4703 | 4981 | WARN(1, KERN_ERR |
|---|
| 4704 | 4982 | "BTRFS: read-only device in alloc_list\n"); |
|---|
| .. | .. |
|---|
| 4716 | 4994 | total_avail = 0; |
|---|
| 4717 | 4995 | |
|---|
| 4718 | 4996 | /* If there is no space on this device, skip it. */ |
|---|
| 4719 | | - if (total_avail == 0) |
|---|
| 4997 | + if (total_avail < ctl->dev_extent_min) |
|---|
| 4720 | 4998 | continue; |
|---|
| 4721 | 4999 | |
|---|
| 4722 | | - ret = find_free_dev_extent(trans, device, |
|---|
| 4723 | | - max_stripe_size * dev_stripes, |
|---|
| 4724 | | - &dev_offset, &max_avail); |
|---|
| 5000 | + ret = find_free_dev_extent(device, dev_extent_want, &dev_offset, |
|---|
| 5001 | + &max_avail); |
|---|
| 4725 | 5002 | if (ret && ret != -ENOSPC) |
|---|
| 4726 | | - goto error; |
|---|
| 5003 | + return ret; |
|---|
| 4727 | 5004 | |
|---|
| 4728 | 5005 | if (ret == 0) |
|---|
| 4729 | | - max_avail = max_stripe_size * dev_stripes; |
|---|
| 5006 | + max_avail = dev_extent_want; |
|---|
| 4730 | 5007 | |
|---|
| 4731 | | - if (max_avail < BTRFS_STRIPE_LEN * dev_stripes) { |
|---|
| 5008 | + if (max_avail < ctl->dev_extent_min) { |
|---|
| 4732 | 5009 | if (btrfs_test_opt(info, ENOSPC_DEBUG)) |
|---|
| 4733 | 5010 | btrfs_debug(info, |
|---|
| 4734 | | - "%s: devid %llu has no free space, have=%llu want=%u", |
|---|
| 5011 | + "%s: devid %llu has no free space, have=%llu want=%llu", |
|---|
| 4735 | 5012 | __func__, device->devid, max_avail, |
|---|
| 4736 | | - BTRFS_STRIPE_LEN * dev_stripes); |
|---|
| 5013 | + ctl->dev_extent_min); |
|---|
| 4737 | 5014 | continue; |
|---|
| 4738 | 5015 | } |
|---|
| 4739 | 5016 | |
|---|
| .. | .. |
|---|
| 4748 | 5025 | devices_info[ndevs].dev = device; |
|---|
| 4749 | 5026 | ++ndevs; |
|---|
| 4750 | 5027 | } |
|---|
| 5028 | + ctl->ndevs = ndevs; |
|---|
| 4751 | 5029 | |
|---|
| 4752 | 5030 | /* |
|---|
| 4753 | 5031 | * now sort the devices by hole size / available space |
|---|
| .. | .. |
|---|
| 4755 | 5033 | sort(devices_info, ndevs, sizeof(struct btrfs_device_info), |
|---|
| 4756 | 5034 | btrfs_cmp_device_info, NULL); |
|---|
| 4757 | 5035 | |
|---|
| 4758 | | - /* round down to number of usable stripes */ |
|---|
| 4759 | | - ndevs = round_down(ndevs, devs_increment); |
|---|
| 5036 | + return 0; |
|---|
| 5037 | +} |
|---|
| 4760 | 5038 | |
|---|
| 4761 | | - if (ndevs < devs_min) { |
|---|
| 4762 | | - ret = -ENOSPC; |
|---|
| 4763 | | - if (btrfs_test_opt(info, ENOSPC_DEBUG)) { |
|---|
| 4764 | | - btrfs_debug(info, |
|---|
| 4765 | | - "%s: not enough devices with free space: have=%d minimum required=%d", |
|---|
| 4766 | | - __func__, ndevs, devs_min); |
|---|
| 4767 | | - } |
|---|
| 4768 | | - goto error; |
|---|
| 4769 | | - } |
|---|
| 4770 | | - |
|---|
| 4771 | | - ndevs = min(ndevs, devs_max); |
|---|
| 5039 | +static int decide_stripe_size_regular(struct alloc_chunk_ctl *ctl, |
|---|
| 5040 | + struct btrfs_device_info *devices_info) |
|---|
| 5041 | +{ |
|---|
| 5042 | + /* Number of stripes that count for block group size */ |
|---|
| 5043 | + int data_stripes; |
|---|
| 4772 | 5044 | |
|---|
| 4773 | 5045 | /* |
|---|
| 4774 | 5046 | * The primary goal is to maximize the number of stripes, so use as |
|---|
| .. | .. |
|---|
| 4777 | 5049 | * The DUP profile stores more than one stripe per device, the |
|---|
| 4778 | 5050 | * max_avail is the total size so we have to adjust. |
|---|
| 4779 | 5051 | */ |
|---|
| 4780 | | - stripe_size = div_u64(devices_info[ndevs - 1].max_avail, dev_stripes); |
|---|
| 4781 | | - num_stripes = ndevs * dev_stripes; |
|---|
| 5052 | + ctl->stripe_size = div_u64(devices_info[ctl->ndevs - 1].max_avail, |
|---|
| 5053 | + ctl->dev_stripes); |
|---|
| 5054 | + ctl->num_stripes = ctl->ndevs * ctl->dev_stripes; |
|---|
| 5055 | + |
|---|
| 5056 | + /* This will have to be fixed for RAID1 and RAID10 over more drives */ |
|---|
| 5057 | + data_stripes = (ctl->num_stripes - ctl->nparity) / ctl->ncopies; |
|---|
| 4782 | 5058 | |
|---|
| 4783 | 5059 | /* |
|---|
| 4784 | | - * this will have to be fixed for RAID1 and RAID10 over |
|---|
| 4785 | | - * more drives |
|---|
| 5060 | + * Use the number of data stripes to figure out how big this chunk is |
|---|
| 5061 | + * really going to be in terms of logical address space, and compare |
|---|
| 5062 | + * that answer with the max chunk size. If it's higher, we try to |
|---|
| 5063 | + * reduce stripe_size. |
|---|
| 4786 | 5064 | */ |
|---|
| 4787 | | - data_stripes = num_stripes / ncopies; |
|---|
| 4788 | | - |
|---|
| 4789 | | - if (type & BTRFS_BLOCK_GROUP_RAID5) |
|---|
| 4790 | | - data_stripes = num_stripes - 1; |
|---|
| 4791 | | - |
|---|
| 4792 | | - if (type & BTRFS_BLOCK_GROUP_RAID6) |
|---|
| 4793 | | - data_stripes = num_stripes - 2; |
|---|
| 4794 | | - |
|---|
| 4795 | | - /* |
|---|
| 4796 | | - * Use the number of data stripes to figure out how big this chunk |
|---|
| 4797 | | - * is really going to be in terms of logical address space, |
|---|
| 4798 | | - * and compare that answer with the max chunk size. If it's higher, |
|---|
| 4799 | | - * we try to reduce stripe_size. |
|---|
| 4800 | | - */ |
|---|
| 4801 | | - if (stripe_size * data_stripes > max_chunk_size) { |
|---|
| 5065 | + if (ctl->stripe_size * data_stripes > ctl->max_chunk_size) { |
|---|
| 4802 | 5066 | /* |
|---|
| 4803 | 5067 | * Reduce stripe_size, round it up to a 16MB boundary again and |
|---|
| 4804 | 5068 | * then use it, unless it ends up being even bigger than the |
|---|
| 4805 | 5069 | * previous value we had already. |
|---|
| 4806 | 5070 | */ |
|---|
| 4807 | | - stripe_size = min(round_up(div_u64(max_chunk_size, |
|---|
| 4808 | | - data_stripes), SZ_16M), |
|---|
| 4809 | | - stripe_size); |
|---|
| 5071 | + ctl->stripe_size = min(round_up(div_u64(ctl->max_chunk_size, |
|---|
| 5072 | + data_stripes), SZ_16M), |
|---|
| 5073 | + ctl->stripe_size); |
|---|
| 4810 | 5074 | } |
|---|
| 4811 | 5075 | |
|---|
| 4812 | | - /* align to BTRFS_STRIPE_LEN */ |
|---|
| 4813 | | - stripe_size = round_down(stripe_size, BTRFS_STRIPE_LEN); |
|---|
| 5076 | + /* Align to BTRFS_STRIPE_LEN */ |
|---|
| 5077 | + ctl->stripe_size = round_down(ctl->stripe_size, BTRFS_STRIPE_LEN); |
|---|
| 5078 | + ctl->chunk_size = ctl->stripe_size * data_stripes; |
|---|
| 4814 | 5079 | |
|---|
| 4815 | | - map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); |
|---|
| 4816 | | - if (!map) { |
|---|
| 4817 | | - ret = -ENOMEM; |
|---|
| 4818 | | - goto error; |
|---|
| 5080 | + return 0; |
|---|
| 5081 | +} |
|---|
| 5082 | + |
|---|
| 5083 | +static int decide_stripe_size(struct btrfs_fs_devices *fs_devices, |
|---|
| 5084 | + struct alloc_chunk_ctl *ctl, |
|---|
| 5085 | + struct btrfs_device_info *devices_info) |
|---|
| 5086 | +{ |
|---|
| 5087 | + struct btrfs_fs_info *info = fs_devices->fs_info; |
|---|
| 5088 | + |
|---|
| 5089 | + /* |
|---|
| 5090 | + * Round down to number of usable stripes, devs_increment can be any |
|---|
| 5091 | + * number so we can't use round_down() that requires power of 2, while |
|---|
| 5092 | + * rounddown is safe. |
|---|
| 5093 | + */ |
|---|
| 5094 | + ctl->ndevs = rounddown(ctl->ndevs, ctl->devs_increment); |
|---|
| 5095 | + |
|---|
| 5096 | + if (ctl->ndevs < ctl->devs_min) { |
|---|
| 5097 | + if (btrfs_test_opt(info, ENOSPC_DEBUG)) { |
|---|
| 5098 | + btrfs_debug(info, |
|---|
| 5099 | + "%s: not enough devices with free space: have=%d minimum required=%d", |
|---|
| 5100 | + __func__, ctl->ndevs, ctl->devs_min); |
|---|
| 5101 | + } |
|---|
| 5102 | + return -ENOSPC; |
|---|
| 4819 | 5103 | } |
|---|
| 4820 | | - map->num_stripes = num_stripes; |
|---|
| 4821 | 5104 | |
|---|
| 4822 | | - for (i = 0; i < ndevs; ++i) { |
|---|
| 4823 | | - for (j = 0; j < dev_stripes; ++j) { |
|---|
| 4824 | | - int s = i * dev_stripes + j; |
|---|
| 5105 | + ctl->ndevs = min(ctl->ndevs, ctl->devs_max); |
|---|
| 5106 | + |
|---|
| 5107 | + switch (fs_devices->chunk_alloc_policy) { |
|---|
| 5108 | + case BTRFS_CHUNK_ALLOC_REGULAR: |
|---|
| 5109 | + return decide_stripe_size_regular(ctl, devices_info); |
|---|
| 5110 | + default: |
|---|
| 5111 | + BUG(); |
|---|
| 5112 | + } |
|---|
| 5113 | +} |
|---|
| 5114 | + |
|---|
| 5115 | +static int create_chunk(struct btrfs_trans_handle *trans, |
|---|
| 5116 | + struct alloc_chunk_ctl *ctl, |
|---|
| 5117 | + struct btrfs_device_info *devices_info) |
|---|
| 5118 | +{ |
|---|
| 5119 | + struct btrfs_fs_info *info = trans->fs_info; |
|---|
| 5120 | + struct map_lookup *map = NULL; |
|---|
| 5121 | + struct extent_map_tree *em_tree; |
|---|
| 5122 | + struct extent_map *em; |
|---|
| 5123 | + u64 start = ctl->start; |
|---|
| 5124 | + u64 type = ctl->type; |
|---|
| 5125 | + int ret; |
|---|
| 5126 | + int i; |
|---|
| 5127 | + int j; |
|---|
| 5128 | + |
|---|
| 5129 | + map = kmalloc(map_lookup_size(ctl->num_stripes), GFP_NOFS); |
|---|
| 5130 | + if (!map) |
|---|
| 5131 | + return -ENOMEM; |
|---|
| 5132 | + map->num_stripes = ctl->num_stripes; |
|---|
| 5133 | + |
|---|
| 5134 | + for (i = 0; i < ctl->ndevs; ++i) { |
|---|
| 5135 | + for (j = 0; j < ctl->dev_stripes; ++j) { |
|---|
| 5136 | + int s = i * ctl->dev_stripes + j; |
|---|
| 4825 | 5137 | map->stripes[s].dev = devices_info[i].dev; |
|---|
| 4826 | 5138 | map->stripes[s].physical = devices_info[i].dev_offset + |
|---|
| 4827 | | - j * stripe_size; |
|---|
| 5139 | + j * ctl->stripe_size; |
|---|
| 4828 | 5140 | } |
|---|
| 4829 | 5141 | } |
|---|
| 4830 | 5142 | map->stripe_len = BTRFS_STRIPE_LEN; |
|---|
| 4831 | 5143 | map->io_align = BTRFS_STRIPE_LEN; |
|---|
| 4832 | 5144 | map->io_width = BTRFS_STRIPE_LEN; |
|---|
| 4833 | 5145 | map->type = type; |
|---|
| 4834 | | - map->sub_stripes = sub_stripes; |
|---|
| 5146 | + map->sub_stripes = ctl->sub_stripes; |
|---|
| 4835 | 5147 | |
|---|
| 4836 | | - num_bytes = stripe_size * data_stripes; |
|---|
| 4837 | | - |
|---|
| 4838 | | - trace_btrfs_chunk_alloc(info, map, start, num_bytes); |
|---|
| 5148 | + trace_btrfs_chunk_alloc(info, map, start, ctl->chunk_size); |
|---|
| 4839 | 5149 | |
|---|
| 4840 | 5150 | em = alloc_extent_map(); |
|---|
| 4841 | 5151 | if (!em) { |
|---|
| 4842 | 5152 | kfree(map); |
|---|
| 4843 | | - ret = -ENOMEM; |
|---|
| 4844 | | - goto error; |
|---|
| 5153 | + return -ENOMEM; |
|---|
| 4845 | 5154 | } |
|---|
| 4846 | 5155 | set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags); |
|---|
| 4847 | 5156 | em->map_lookup = map; |
|---|
| 4848 | 5157 | em->start = start; |
|---|
| 4849 | | - em->len = num_bytes; |
|---|
| 5158 | + em->len = ctl->chunk_size; |
|---|
| 4850 | 5159 | em->block_start = 0; |
|---|
| 4851 | 5160 | em->block_len = em->len; |
|---|
| 4852 | | - em->orig_block_len = stripe_size; |
|---|
| 5161 | + em->orig_block_len = ctl->stripe_size; |
|---|
| 4853 | 5162 | |
|---|
| 4854 | | - em_tree = &info->mapping_tree.map_tree; |
|---|
| 5163 | + em_tree = &info->mapping_tree; |
|---|
| 4855 | 5164 | write_lock(&em_tree->lock); |
|---|
| 4856 | 5165 | ret = add_extent_mapping(em_tree, em, 0); |
|---|
| 4857 | 5166 | if (ret) { |
|---|
| 4858 | 5167 | write_unlock(&em_tree->lock); |
|---|
| 4859 | 5168 | free_extent_map(em); |
|---|
| 4860 | | - goto error; |
|---|
| 5169 | + return ret; |
|---|
| 4861 | 5170 | } |
|---|
| 4862 | | - |
|---|
| 4863 | | - list_add_tail(&em->list, &trans->transaction->pending_chunks); |
|---|
| 4864 | | - refcount_inc(&em->refs); |
|---|
| 4865 | 5171 | write_unlock(&em_tree->lock); |
|---|
| 4866 | 5172 | |
|---|
| 4867 | | - ret = btrfs_make_block_group(trans, 0, type, start, num_bytes); |
|---|
| 5173 | + ret = btrfs_make_block_group(trans, 0, type, start, ctl->chunk_size); |
|---|
| 4868 | 5174 | if (ret) |
|---|
| 4869 | 5175 | goto error_del_extent; |
|---|
| 4870 | 5176 | |
|---|
| 4871 | 5177 | for (i = 0; i < map->num_stripes; i++) { |
|---|
| 4872 | | - num_bytes = map->stripes[i].dev->bytes_used + stripe_size; |
|---|
| 4873 | | - btrfs_device_set_bytes_used(map->stripes[i].dev, num_bytes); |
|---|
| 4874 | | - map->stripes[i].dev->has_pending_chunks = true; |
|---|
| 5178 | + struct btrfs_device *dev = map->stripes[i].dev; |
|---|
| 5179 | + |
|---|
| 5180 | + btrfs_device_set_bytes_used(dev, |
|---|
| 5181 | + dev->bytes_used + ctl->stripe_size); |
|---|
| 5182 | + if (list_empty(&dev->post_commit_list)) |
|---|
| 5183 | + list_add_tail(&dev->post_commit_list, |
|---|
| 5184 | + &trans->transaction->dev_update_list); |
|---|
| 4875 | 5185 | } |
|---|
| 4876 | 5186 | |
|---|
| 4877 | | - atomic64_sub(stripe_size * map->num_stripes, &info->free_chunk_space); |
|---|
| 5187 | + atomic64_sub(ctl->stripe_size * map->num_stripes, |
|---|
| 5188 | + &info->free_chunk_space); |
|---|
| 4878 | 5189 | |
|---|
| 4879 | 5190 | free_extent_map(em); |
|---|
| 4880 | 5191 | check_raid56_incompat_flag(info, type); |
|---|
| 5192 | + check_raid1c34_incompat_flag(info, type); |
|---|
| 4881 | 5193 | |
|---|
| 4882 | | - kfree(devices_info); |
|---|
| 4883 | 5194 | return 0; |
|---|
| 4884 | 5195 | |
|---|
| 4885 | 5196 | error_del_extent: |
|---|
| .. | .. |
|---|
| 4891 | 5202 | free_extent_map(em); |
|---|
| 4892 | 5203 | /* One for the tree reference */ |
|---|
| 4893 | 5204 | free_extent_map(em); |
|---|
| 4894 | | - /* One for the pending_chunks list reference */ |
|---|
| 4895 | | - free_extent_map(em); |
|---|
| 4896 | | -error: |
|---|
| 5205 | + |
|---|
| 5206 | + return ret; |
|---|
| 5207 | +} |
|---|
| 5208 | + |
|---|
| 5209 | +int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type) |
|---|
| 5210 | +{ |
|---|
| 5211 | + struct btrfs_fs_info *info = trans->fs_info; |
|---|
| 5212 | + struct btrfs_fs_devices *fs_devices = info->fs_devices; |
|---|
| 5213 | + struct btrfs_device_info *devices_info = NULL; |
|---|
| 5214 | + struct alloc_chunk_ctl ctl; |
|---|
| 5215 | + int ret; |
|---|
| 5216 | + |
|---|
| 5217 | + lockdep_assert_held(&info->chunk_mutex); |
|---|
| 5218 | + |
|---|
| 5219 | + if (!alloc_profile_is_valid(type, 0)) { |
|---|
| 5220 | + ASSERT(0); |
|---|
| 5221 | + return -EINVAL; |
|---|
| 5222 | + } |
|---|
| 5223 | + |
|---|
| 5224 | + if (list_empty(&fs_devices->alloc_list)) { |
|---|
| 5225 | + if (btrfs_test_opt(info, ENOSPC_DEBUG)) |
|---|
| 5226 | + btrfs_debug(info, "%s: no writable device", __func__); |
|---|
| 5227 | + return -ENOSPC; |
|---|
| 5228 | + } |
|---|
| 5229 | + |
|---|
| 5230 | + if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) { |
|---|
| 5231 | + btrfs_err(info, "invalid chunk type 0x%llx requested", type); |
|---|
| 5232 | + ASSERT(0); |
|---|
| 5233 | + return -EINVAL; |
|---|
| 5234 | + } |
|---|
| 5235 | + |
|---|
| 5236 | + ctl.start = find_next_chunk(info); |
|---|
| 5237 | + ctl.type = type; |
|---|
| 5238 | + init_alloc_chunk_ctl(fs_devices, &ctl); |
|---|
| 5239 | + |
|---|
| 5240 | + devices_info = kcalloc(fs_devices->rw_devices, sizeof(*devices_info), |
|---|
| 5241 | + GFP_NOFS); |
|---|
| 5242 | + if (!devices_info) |
|---|
| 5243 | + return -ENOMEM; |
|---|
| 5244 | + |
|---|
| 5245 | + ret = gather_device_info(fs_devices, &ctl, devices_info); |
|---|
| 5246 | + if (ret < 0) |
|---|
| 5247 | + goto out; |
|---|
| 5248 | + |
|---|
| 5249 | + ret = decide_stripe_size(fs_devices, &ctl, devices_info); |
|---|
| 5250 | + if (ret < 0) |
|---|
| 5251 | + goto out; |
|---|
| 5252 | + |
|---|
| 5253 | + ret = create_chunk(trans, &ctl, devices_info); |
|---|
| 5254 | + |
|---|
| 5255 | +out: |
|---|
| 4897 | 5256 | kfree(devices_info); |
|---|
| 4898 | 5257 | return ret; |
|---|
| 4899 | 5258 | } |
|---|
| 4900 | 5259 | |
|---|
| 5260 | +/* |
|---|
| 5261 | + * Chunk allocation falls into two parts. The first part does work |
|---|
| 5262 | + * that makes the new allocated chunk usable, but does not do any operation |
|---|
| 5263 | + * that modifies the chunk tree. The second part does the work that |
|---|
| 5264 | + * requires modifying the chunk tree. This division is important for the |
|---|
| 5265 | + * bootstrap process of adding storage to a seed btrfs. |
|---|
| 5266 | + */ |
|---|
| 4901 | 5267 | int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, |
|---|
| 4902 | 5268 | u64 chunk_offset, u64 chunk_size) |
|---|
| 4903 | 5269 | { |
|---|
| .. | .. |
|---|
| 4916 | 5282 | int i = 0; |
|---|
| 4917 | 5283 | int ret = 0; |
|---|
| 4918 | 5284 | |
|---|
| 4919 | | - em = get_chunk_map(fs_info, chunk_offset, chunk_size); |
|---|
| 5285 | + em = btrfs_get_chunk_map(fs_info, chunk_offset, chunk_size); |
|---|
| 4920 | 5286 | if (IS_ERR(em)) |
|---|
| 4921 | 5287 | return PTR_ERR(em); |
|---|
| 4922 | 5288 | |
|---|
| .. | .. |
|---|
| 4996 | 5362 | return ret; |
|---|
| 4997 | 5363 | } |
|---|
| 4998 | 5364 | |
|---|
| 4999 | | -/* |
|---|
| 5000 | | - * Chunk allocation falls into two parts. The first part does works |
|---|
| 5001 | | - * that make the new allocated chunk useable, but not do any operation |
|---|
| 5002 | | - * that modifies the chunk tree. The second part does the works that |
|---|
| 5003 | | - * require modifying the chunk tree. This division is important for the |
|---|
| 5004 | | - * bootstrap process of adding storage to a seed btrfs. |
|---|
| 5005 | | - */ |
|---|
| 5006 | | -int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type) |
|---|
| 5365 | +static noinline int init_first_rw_device(struct btrfs_trans_handle *trans) |
|---|
| 5007 | 5366 | { |
|---|
| 5008 | | - u64 chunk_offset; |
|---|
| 5009 | | - |
|---|
| 5010 | | - lockdep_assert_held(&trans->fs_info->chunk_mutex); |
|---|
| 5011 | | - chunk_offset = find_next_chunk(trans->fs_info); |
|---|
| 5012 | | - return __btrfs_alloc_chunk(trans, chunk_offset, type); |
|---|
| 5013 | | -} |
|---|
| 5014 | | - |
|---|
| 5015 | | -static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, |
|---|
| 5016 | | - struct btrfs_fs_info *fs_info) |
|---|
| 5017 | | -{ |
|---|
| 5018 | | - u64 chunk_offset; |
|---|
| 5019 | | - u64 sys_chunk_offset; |
|---|
| 5367 | + struct btrfs_fs_info *fs_info = trans->fs_info; |
|---|
| 5020 | 5368 | u64 alloc_profile; |
|---|
| 5021 | 5369 | int ret; |
|---|
| 5022 | 5370 | |
|---|
| 5023 | | - chunk_offset = find_next_chunk(fs_info); |
|---|
| 5024 | 5371 | alloc_profile = btrfs_metadata_alloc_profile(fs_info); |
|---|
| 5025 | | - ret = __btrfs_alloc_chunk(trans, chunk_offset, alloc_profile); |
|---|
| 5372 | + ret = btrfs_alloc_chunk(trans, alloc_profile); |
|---|
| 5026 | 5373 | if (ret) |
|---|
| 5027 | 5374 | return ret; |
|---|
| 5028 | 5375 | |
|---|
| 5029 | | - sys_chunk_offset = find_next_chunk(fs_info); |
|---|
| 5030 | 5376 | alloc_profile = btrfs_system_alloc_profile(fs_info); |
|---|
| 5031 | | - ret = __btrfs_alloc_chunk(trans, sys_chunk_offset, alloc_profile); |
|---|
| 5377 | + ret = btrfs_alloc_chunk(trans, alloc_profile); |
|---|
| 5032 | 5378 | return ret; |
|---|
| 5033 | 5379 | } |
|---|
| 5034 | 5380 | |
|---|
| 5035 | 5381 | static inline int btrfs_chunk_max_errors(struct map_lookup *map) |
|---|
| 5036 | 5382 | { |
|---|
| 5037 | | - int max_errors; |
|---|
| 5383 | + const int index = btrfs_bg_flags_to_raid_index(map->type); |
|---|
| 5038 | 5384 | |
|---|
| 5039 | | - if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | |
|---|
| 5040 | | - BTRFS_BLOCK_GROUP_RAID10 | |
|---|
| 5041 | | - BTRFS_BLOCK_GROUP_RAID5)) { |
|---|
| 5042 | | - max_errors = 1; |
|---|
| 5043 | | - } else if (map->type & BTRFS_BLOCK_GROUP_RAID6) { |
|---|
| 5044 | | - max_errors = 2; |
|---|
| 5045 | | - } else { |
|---|
| 5046 | | - max_errors = 0; |
|---|
| 5047 | | - } |
|---|
| 5048 | | - |
|---|
| 5049 | | - return max_errors; |
|---|
| 5385 | + return btrfs_raid_array[index].tolerated_failures; |
|---|
| 5050 | 5386 | } |
|---|
| 5051 | 5387 | |
|---|
| 5052 | 5388 | int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset) |
|---|
| .. | .. |
|---|
| 5057 | 5393 | int miss_ndevs = 0; |
|---|
| 5058 | 5394 | int i; |
|---|
| 5059 | 5395 | |
|---|
| 5060 | | - em = get_chunk_map(fs_info, chunk_offset, 1); |
|---|
| 5396 | + em = btrfs_get_chunk_map(fs_info, chunk_offset, 1); |
|---|
| 5061 | 5397 | if (IS_ERR(em)) |
|---|
| 5062 | 5398 | return 1; |
|---|
| 5063 | 5399 | |
|---|
| .. | .. |
|---|
| 5087 | 5423 | return readonly; |
|---|
| 5088 | 5424 | } |
|---|
| 5089 | 5425 | |
|---|
| 5090 | | -void btrfs_mapping_init(struct btrfs_mapping_tree *tree) |
|---|
| 5091 | | -{ |
|---|
| 5092 | | - extent_map_tree_init(&tree->map_tree); |
|---|
| 5093 | | -} |
|---|
| 5094 | | - |
|---|
| 5095 | | -void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree) |
|---|
| 5426 | +void btrfs_mapping_tree_free(struct extent_map_tree *tree) |
|---|
| 5096 | 5427 | { |
|---|
| 5097 | 5428 | struct extent_map *em; |
|---|
| 5098 | 5429 | |
|---|
| 5099 | 5430 | while (1) { |
|---|
| 5100 | | - write_lock(&tree->map_tree.lock); |
|---|
| 5101 | | - em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1); |
|---|
| 5431 | + write_lock(&tree->lock); |
|---|
| 5432 | + em = lookup_extent_mapping(tree, 0, (u64)-1); |
|---|
| 5102 | 5433 | if (em) |
|---|
| 5103 | | - remove_extent_mapping(&tree->map_tree, em); |
|---|
| 5104 | | - write_unlock(&tree->map_tree.lock); |
|---|
| 5434 | + remove_extent_mapping(tree, em); |
|---|
| 5435 | + write_unlock(&tree->lock); |
|---|
| 5105 | 5436 | if (!em) |
|---|
| 5106 | 5437 | break; |
|---|
| 5107 | 5438 | /* once for us */ |
|---|
| .. | .. |
|---|
| 5117 | 5448 | struct map_lookup *map; |
|---|
| 5118 | 5449 | int ret; |
|---|
| 5119 | 5450 | |
|---|
| 5120 | | - em = get_chunk_map(fs_info, logical, len); |
|---|
| 5451 | + em = btrfs_get_chunk_map(fs_info, logical, len); |
|---|
| 5121 | 5452 | if (IS_ERR(em)) |
|---|
| 5122 | 5453 | /* |
|---|
| 5123 | 5454 | * We could return errors for these cases, but that could get |
|---|
| .. | .. |
|---|
| 5128 | 5459 | return 1; |
|---|
| 5129 | 5460 | |
|---|
| 5130 | 5461 | map = em->map_lookup; |
|---|
| 5131 | | - if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1)) |
|---|
| 5462 | + if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1_MASK)) |
|---|
| 5132 | 5463 | ret = map->num_stripes; |
|---|
| 5133 | 5464 | else if (map->type & BTRFS_BLOCK_GROUP_RAID10) |
|---|
| 5134 | 5465 | ret = map->sub_stripes; |
|---|
| .. | .. |
|---|
| 5147 | 5478 | ret = 1; |
|---|
| 5148 | 5479 | free_extent_map(em); |
|---|
| 5149 | 5480 | |
|---|
| 5150 | | - btrfs_dev_replace_read_lock(&fs_info->dev_replace); |
|---|
| 5481 | + down_read(&fs_info->dev_replace.rwsem); |
|---|
| 5151 | 5482 | if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace) && |
|---|
| 5152 | 5483 | fs_info->dev_replace.tgtdev) |
|---|
| 5153 | 5484 | ret++; |
|---|
| 5154 | | - btrfs_dev_replace_read_unlock(&fs_info->dev_replace); |
|---|
| 5485 | + up_read(&fs_info->dev_replace.rwsem); |
|---|
| 5155 | 5486 | |
|---|
| 5156 | 5487 | return ret; |
|---|
| 5157 | 5488 | } |
|---|
| .. | .. |
|---|
| 5163 | 5494 | struct map_lookup *map; |
|---|
| 5164 | 5495 | unsigned long len = fs_info->sectorsize; |
|---|
| 5165 | 5496 | |
|---|
| 5166 | | - em = get_chunk_map(fs_info, logical, len); |
|---|
| 5497 | + em = btrfs_get_chunk_map(fs_info, logical, len); |
|---|
| 5167 | 5498 | |
|---|
| 5168 | 5499 | if (!WARN_ON(IS_ERR(em))) { |
|---|
| 5169 | 5500 | map = em->map_lookup; |
|---|
| .. | .. |
|---|
| 5180 | 5511 | struct map_lookup *map; |
|---|
| 5181 | 5512 | int ret = 0; |
|---|
| 5182 | 5513 | |
|---|
| 5183 | | - em = get_chunk_map(fs_info, logical, len); |
|---|
| 5514 | + em = btrfs_get_chunk_map(fs_info, logical, len); |
|---|
| 5184 | 5515 | |
|---|
| 5185 | 5516 | if(!WARN_ON(IS_ERR(em))) { |
|---|
| 5186 | 5517 | map = em->map_lookup; |
|---|
| .. | .. |
|---|
| 5202 | 5533 | struct btrfs_device *srcdev; |
|---|
| 5203 | 5534 | |
|---|
| 5204 | 5535 | ASSERT((map->type & |
|---|
| 5205 | | - (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10))); |
|---|
| 5536 | + (BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID10))); |
|---|
| 5206 | 5537 | |
|---|
| 5207 | 5538 | if (map->type & BTRFS_BLOCK_GROUP_RAID10) |
|---|
| 5208 | 5539 | num_stripes = map->sub_stripes; |
|---|
| .. | .. |
|---|
| 5240 | 5571 | return preferred_mirror; |
|---|
| 5241 | 5572 | } |
|---|
| 5242 | 5573 | |
|---|
| 5243 | | -static inline int parity_smaller(u64 a, u64 b) |
|---|
| 5244 | | -{ |
|---|
| 5245 | | - return a > b; |
|---|
| 5246 | | -} |
|---|
| 5247 | | - |
|---|
| 5248 | 5574 | /* Bubble-sort the stripe set to put the parity/syndrome stripes last */ |
|---|
| 5249 | 5575 | static void sort_parity_stripes(struct btrfs_bio *bbio, int num_stripes) |
|---|
| 5250 | 5576 | { |
|---|
| 5251 | | - struct btrfs_bio_stripe s; |
|---|
| 5252 | 5577 | int i; |
|---|
| 5253 | | - u64 l; |
|---|
| 5254 | 5578 | int again = 1; |
|---|
| 5255 | 5579 | |
|---|
| 5256 | 5580 | while (again) { |
|---|
| 5257 | 5581 | again = 0; |
|---|
| 5258 | 5582 | for (i = 0; i < num_stripes - 1; i++) { |
|---|
| 5259 | | - if (parity_smaller(bbio->raid_map[i], |
|---|
| 5260 | | - bbio->raid_map[i+1])) { |
|---|
| 5261 | | - s = bbio->stripes[i]; |
|---|
| 5262 | | - l = bbio->raid_map[i]; |
|---|
| 5263 | | - bbio->stripes[i] = bbio->stripes[i+1]; |
|---|
| 5264 | | - bbio->raid_map[i] = bbio->raid_map[i+1]; |
|---|
| 5265 | | - bbio->stripes[i+1] = s; |
|---|
| 5266 | | - bbio->raid_map[i+1] = l; |
|---|
| 5267 | | - |
|---|
| 5583 | + /* Swap if parity is on a smaller index */ |
|---|
| 5584 | + if (bbio->raid_map[i] > bbio->raid_map[i + 1]) { |
|---|
| 5585 | + swap(bbio->stripes[i], bbio->stripes[i + 1]); |
|---|
| 5586 | + swap(bbio->raid_map[i], bbio->raid_map[i + 1]); |
|---|
| 5268 | 5587 | again = 1; |
|---|
| 5269 | 5588 | } |
|---|
| 5270 | 5589 | } |
|---|
| .. | .. |
|---|
| 5290 | 5609 | atomic_set(&bbio->error, 0); |
|---|
| 5291 | 5610 | refcount_set(&bbio->refs, 1); |
|---|
| 5292 | 5611 | |
|---|
| 5612 | + bbio->tgtdev_map = (int *)(bbio->stripes + total_stripes); |
|---|
| 5613 | + bbio->raid_map = (u64 *)(bbio->tgtdev_map + real_stripes); |
|---|
| 5614 | + |
|---|
| 5293 | 5615 | return bbio; |
|---|
| 5294 | 5616 | } |
|---|
| 5295 | 5617 | |
|---|
| .. | .. |
|---|
| 5313 | 5635 | * replace. |
|---|
| 5314 | 5636 | */ |
|---|
| 5315 | 5637 | static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info, |
|---|
| 5316 | | - u64 logical, u64 length, |
|---|
| 5638 | + u64 logical, u64 *length_ret, |
|---|
| 5317 | 5639 | struct btrfs_bio **bbio_ret) |
|---|
| 5318 | 5640 | { |
|---|
| 5319 | 5641 | struct extent_map *em; |
|---|
| 5320 | 5642 | struct map_lookup *map; |
|---|
| 5321 | 5643 | struct btrfs_bio *bbio; |
|---|
| 5644 | + u64 length = *length_ret; |
|---|
| 5322 | 5645 | u64 offset; |
|---|
| 5323 | 5646 | u64 stripe_nr; |
|---|
| 5324 | 5647 | u64 stripe_nr_end; |
|---|
| .. | .. |
|---|
| 5339 | 5662 | /* discard always return a bbio */ |
|---|
| 5340 | 5663 | ASSERT(bbio_ret); |
|---|
| 5341 | 5664 | |
|---|
| 5342 | | - em = get_chunk_map(fs_info, logical, length); |
|---|
| 5665 | + em = btrfs_get_chunk_map(fs_info, logical, length); |
|---|
| 5343 | 5666 | if (IS_ERR(em)) |
|---|
| 5344 | 5667 | return PTR_ERR(em); |
|---|
| 5345 | 5668 | |
|---|
| .. | .. |
|---|
| 5351 | 5674 | } |
|---|
| 5352 | 5675 | |
|---|
| 5353 | 5676 | offset = logical - em->start; |
|---|
| 5354 | | - length = min_t(u64, em->len - offset, length); |
|---|
| 5677 | + length = min_t(u64, em->start + em->len - logical, length); |
|---|
| 5678 | + *length_ret = length; |
|---|
| 5355 | 5679 | |
|---|
| 5356 | 5680 | stripe_len = map->stripe_len; |
|---|
| 5357 | 5681 | /* |
|---|
| .. | .. |
|---|
| 5391 | 5715 | &remaining_stripes); |
|---|
| 5392 | 5716 | div_u64_rem(stripe_nr_end - 1, factor, &last_stripe); |
|---|
| 5393 | 5717 | last_stripe *= sub_stripes; |
|---|
| 5394 | | - } else if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | |
|---|
| 5718 | + } else if (map->type & (BTRFS_BLOCK_GROUP_RAID1_MASK | |
|---|
| 5395 | 5719 | BTRFS_BLOCK_GROUP_DUP)) { |
|---|
| 5396 | 5720 | num_stripes = map->num_stripes; |
|---|
| 5397 | 5721 | } else { |
|---|
| .. | .. |
|---|
| 5635 | 5959 | return (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS); |
|---|
| 5636 | 5960 | } |
|---|
| 5637 | 5961 | |
|---|
| 5962 | +/* |
|---|
| 5963 | + * btrfs_get_io_geometry - calculates the geomery of a particular (address, len) |
|---|
| 5964 | + * tuple. This information is used to calculate how big a |
|---|
| 5965 | + * particular bio can get before it straddles a stripe. |
|---|
| 5966 | + * |
|---|
| 5967 | + * @fs_info - the filesystem |
|---|
| 5968 | + * @logical - address that we want to figure out the geometry of |
|---|
| 5969 | + * @len - the length of IO we are going to perform, starting at @logical |
|---|
| 5970 | + * @op - type of operation - write or read |
|---|
| 5971 | + * @io_geom - pointer used to return values |
|---|
| 5972 | + * |
|---|
| 5973 | + * Returns < 0 in case a chunk for the given logical address cannot be found, |
|---|
| 5974 | + * usually shouldn't happen unless @logical is corrupted, 0 otherwise. |
|---|
| 5975 | + */ |
|---|
| 5976 | +int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, |
|---|
| 5977 | + u64 logical, u64 len, struct btrfs_io_geometry *io_geom) |
|---|
| 5978 | +{ |
|---|
| 5979 | + struct extent_map *em; |
|---|
| 5980 | + struct map_lookup *map; |
|---|
| 5981 | + u64 offset; |
|---|
| 5982 | + u64 stripe_offset; |
|---|
| 5983 | + u64 stripe_nr; |
|---|
| 5984 | + u64 stripe_len; |
|---|
| 5985 | + u64 raid56_full_stripe_start = (u64)-1; |
|---|
| 5986 | + int data_stripes; |
|---|
| 5987 | + int ret = 0; |
|---|
| 5988 | + |
|---|
| 5989 | + ASSERT(op != BTRFS_MAP_DISCARD); |
|---|
| 5990 | + |
|---|
| 5991 | + em = btrfs_get_chunk_map(fs_info, logical, len); |
|---|
| 5992 | + if (IS_ERR(em)) |
|---|
| 5993 | + return PTR_ERR(em); |
|---|
| 5994 | + |
|---|
| 5995 | + map = em->map_lookup; |
|---|
| 5996 | + /* Offset of this logical address in the chunk */ |
|---|
| 5997 | + offset = logical - em->start; |
|---|
| 5998 | + /* Len of a stripe in a chunk */ |
|---|
| 5999 | + stripe_len = map->stripe_len; |
|---|
| 6000 | + /* Stripe wher this block falls in */ |
|---|
| 6001 | + stripe_nr = div64_u64(offset, stripe_len); |
|---|
| 6002 | + /* Offset of stripe in the chunk */ |
|---|
| 6003 | + stripe_offset = stripe_nr * stripe_len; |
|---|
| 6004 | + if (offset < stripe_offset) { |
|---|
| 6005 | + btrfs_crit(fs_info, |
|---|
| 6006 | +"stripe math has gone wrong, stripe_offset=%llu offset=%llu start=%llu logical=%llu stripe_len=%llu", |
|---|
| 6007 | + stripe_offset, offset, em->start, logical, stripe_len); |
|---|
| 6008 | + ret = -EINVAL; |
|---|
| 6009 | + goto out; |
|---|
| 6010 | + } |
|---|
| 6011 | + |
|---|
| 6012 | + /* stripe_offset is the offset of this block in its stripe */ |
|---|
| 6013 | + stripe_offset = offset - stripe_offset; |
|---|
| 6014 | + data_stripes = nr_data_stripes(map); |
|---|
| 6015 | + |
|---|
| 6016 | + if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { |
|---|
| 6017 | + u64 max_len = stripe_len - stripe_offset; |
|---|
| 6018 | + |
|---|
| 6019 | + /* |
|---|
| 6020 | + * In case of raid56, we need to know the stripe aligned start |
|---|
| 6021 | + */ |
|---|
| 6022 | + if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { |
|---|
| 6023 | + unsigned long full_stripe_len = stripe_len * data_stripes; |
|---|
| 6024 | + raid56_full_stripe_start = offset; |
|---|
| 6025 | + |
|---|
| 6026 | + /* |
|---|
| 6027 | + * Allow a write of a full stripe, but make sure we |
|---|
| 6028 | + * don't allow straddling of stripes |
|---|
| 6029 | + */ |
|---|
| 6030 | + raid56_full_stripe_start = div64_u64(raid56_full_stripe_start, |
|---|
| 6031 | + full_stripe_len); |
|---|
| 6032 | + raid56_full_stripe_start *= full_stripe_len; |
|---|
| 6033 | + |
|---|
| 6034 | + /* |
|---|
| 6035 | + * For writes to RAID[56], allow a full stripeset across |
|---|
| 6036 | + * all disks. For other RAID types and for RAID[56] |
|---|
| 6037 | + * reads, just allow a single stripe (on a single disk). |
|---|
| 6038 | + */ |
|---|
| 6039 | + if (op == BTRFS_MAP_WRITE) { |
|---|
| 6040 | + max_len = stripe_len * data_stripes - |
|---|
| 6041 | + (offset - raid56_full_stripe_start); |
|---|
| 6042 | + } |
|---|
| 6043 | + } |
|---|
| 6044 | + len = min_t(u64, em->len - offset, max_len); |
|---|
| 6045 | + } else { |
|---|
| 6046 | + len = em->len - offset; |
|---|
| 6047 | + } |
|---|
| 6048 | + |
|---|
| 6049 | + io_geom->len = len; |
|---|
| 6050 | + io_geom->offset = offset; |
|---|
| 6051 | + io_geom->stripe_len = stripe_len; |
|---|
| 6052 | + io_geom->stripe_nr = stripe_nr; |
|---|
| 6053 | + io_geom->stripe_offset = stripe_offset; |
|---|
| 6054 | + io_geom->raid56_stripe_offset = raid56_full_stripe_start; |
|---|
| 6055 | + |
|---|
| 6056 | +out: |
|---|
| 6057 | + /* once for us */ |
|---|
| 6058 | + free_extent_map(em); |
|---|
| 6059 | + return ret; |
|---|
| 6060 | +} |
|---|
| 6061 | + |
|---|
| 5638 | 6062 | static int __btrfs_map_block(struct btrfs_fs_info *fs_info, |
|---|
| 5639 | 6063 | enum btrfs_map_op op, |
|---|
| 5640 | 6064 | u64 logical, u64 *length, |
|---|
| .. | .. |
|---|
| 5643 | 6067 | { |
|---|
| 5644 | 6068 | struct extent_map *em; |
|---|
| 5645 | 6069 | struct map_lookup *map; |
|---|
| 5646 | | - u64 offset; |
|---|
| 5647 | 6070 | u64 stripe_offset; |
|---|
| 5648 | 6071 | u64 stripe_nr; |
|---|
| 5649 | 6072 | u64 stripe_len; |
|---|
| 5650 | 6073 | u32 stripe_index; |
|---|
| 6074 | + int data_stripes; |
|---|
| 5651 | 6075 | int i; |
|---|
| 5652 | 6076 | int ret = 0; |
|---|
| 5653 | 6077 | int num_stripes; |
|---|
| .. | .. |
|---|
| 5660 | 6084 | int patch_the_first_stripe_for_dev_replace = 0; |
|---|
| 5661 | 6085 | u64 physical_to_patch_in_first_stripe = 0; |
|---|
| 5662 | 6086 | u64 raid56_full_stripe_start = (u64)-1; |
|---|
| 6087 | + struct btrfs_io_geometry geom; |
|---|
| 5663 | 6088 | |
|---|
| 5664 | | - if (op == BTRFS_MAP_DISCARD) |
|---|
| 5665 | | - return __btrfs_map_block_for_discard(fs_info, logical, |
|---|
| 5666 | | - *length, bbio_ret); |
|---|
| 6089 | + ASSERT(bbio_ret); |
|---|
| 6090 | + ASSERT(op != BTRFS_MAP_DISCARD); |
|---|
| 5667 | 6091 | |
|---|
| 5668 | | - em = get_chunk_map(fs_info, logical, *length); |
|---|
| 5669 | | - if (IS_ERR(em)) |
|---|
| 5670 | | - return PTR_ERR(em); |
|---|
| 6092 | + ret = btrfs_get_io_geometry(fs_info, op, logical, *length, &geom); |
|---|
| 6093 | + if (ret < 0) |
|---|
| 6094 | + return ret; |
|---|
| 5671 | 6095 | |
|---|
| 6096 | + em = btrfs_get_chunk_map(fs_info, logical, *length); |
|---|
| 6097 | + ASSERT(!IS_ERR(em)); |
|---|
| 5672 | 6098 | map = em->map_lookup; |
|---|
| 5673 | | - offset = logical - em->start; |
|---|
| 5674 | 6099 | |
|---|
| 5675 | | - stripe_len = map->stripe_len; |
|---|
| 5676 | | - stripe_nr = offset; |
|---|
| 5677 | | - /* |
|---|
| 5678 | | - * stripe_nr counts the total number of stripes we have to stride |
|---|
| 5679 | | - * to get to this block |
|---|
| 5680 | | - */ |
|---|
| 5681 | | - stripe_nr = div64_u64(stripe_nr, stripe_len); |
|---|
| 6100 | + *length = geom.len; |
|---|
| 6101 | + stripe_len = geom.stripe_len; |
|---|
| 6102 | + stripe_nr = geom.stripe_nr; |
|---|
| 6103 | + stripe_offset = geom.stripe_offset; |
|---|
| 6104 | + raid56_full_stripe_start = geom.raid56_stripe_offset; |
|---|
| 6105 | + data_stripes = nr_data_stripes(map); |
|---|
| 5682 | 6106 | |
|---|
| 5683 | | - stripe_offset = stripe_nr * stripe_len; |
|---|
| 5684 | | - if (offset < stripe_offset) { |
|---|
| 5685 | | - btrfs_crit(fs_info, |
|---|
| 5686 | | - "stripe math has gone wrong, stripe_offset=%llu, offset=%llu, start=%llu, logical=%llu, stripe_len=%llu", |
|---|
| 5687 | | - stripe_offset, offset, em->start, logical, |
|---|
| 5688 | | - stripe_len); |
|---|
| 5689 | | - free_extent_map(em); |
|---|
| 5690 | | - return -EINVAL; |
|---|
| 5691 | | - } |
|---|
| 5692 | | - |
|---|
| 5693 | | - /* stripe_offset is the offset of this block in its stripe*/ |
|---|
| 5694 | | - stripe_offset = offset - stripe_offset; |
|---|
| 5695 | | - |
|---|
| 5696 | | - /* if we're here for raid56, we need to know the stripe aligned start */ |
|---|
| 5697 | | - if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { |
|---|
| 5698 | | - unsigned long full_stripe_len = stripe_len * nr_data_stripes(map); |
|---|
| 5699 | | - raid56_full_stripe_start = offset; |
|---|
| 5700 | | - |
|---|
| 5701 | | - /* allow a write of a full stripe, but make sure we don't |
|---|
| 5702 | | - * allow straddling of stripes |
|---|
| 5703 | | - */ |
|---|
| 5704 | | - raid56_full_stripe_start = div64_u64(raid56_full_stripe_start, |
|---|
| 5705 | | - full_stripe_len); |
|---|
| 5706 | | - raid56_full_stripe_start *= full_stripe_len; |
|---|
| 5707 | | - } |
|---|
| 5708 | | - |
|---|
| 5709 | | - if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { |
|---|
| 5710 | | - u64 max_len; |
|---|
| 5711 | | - /* For writes to RAID[56], allow a full stripeset across all disks. |
|---|
| 5712 | | - For other RAID types and for RAID[56] reads, just allow a single |
|---|
| 5713 | | - stripe (on a single disk). */ |
|---|
| 5714 | | - if ((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && |
|---|
| 5715 | | - (op == BTRFS_MAP_WRITE)) { |
|---|
| 5716 | | - max_len = stripe_len * nr_data_stripes(map) - |
|---|
| 5717 | | - (offset - raid56_full_stripe_start); |
|---|
| 5718 | | - } else { |
|---|
| 5719 | | - /* we limit the length of each bio to what fits in a stripe */ |
|---|
| 5720 | | - max_len = stripe_len - stripe_offset; |
|---|
| 5721 | | - } |
|---|
| 5722 | | - *length = min_t(u64, em->len - offset, max_len); |
|---|
| 5723 | | - } else { |
|---|
| 5724 | | - *length = em->len - offset; |
|---|
| 5725 | | - } |
|---|
| 5726 | | - |
|---|
| 5727 | | - /* This is for when we're called from btrfs_merge_bio_hook() and all |
|---|
| 5728 | | - it cares about is the length */ |
|---|
| 5729 | | - if (!bbio_ret) |
|---|
| 5730 | | - goto out; |
|---|
| 5731 | | - |
|---|
| 5732 | | - btrfs_dev_replace_read_lock(dev_replace); |
|---|
| 6107 | + down_read(&dev_replace->rwsem); |
|---|
| 5733 | 6108 | dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace); |
|---|
| 6109 | + /* |
|---|
| 6110 | + * Hold the semaphore for read during the whole operation, write is |
|---|
| 6111 | + * requested at commit time but must wait. |
|---|
| 6112 | + */ |
|---|
| 5734 | 6113 | if (!dev_replace_is_ongoing) |
|---|
| 5735 | | - btrfs_dev_replace_read_unlock(dev_replace); |
|---|
| 5736 | | - else |
|---|
| 5737 | | - btrfs_dev_replace_set_lock_blocking(dev_replace); |
|---|
| 6114 | + up_read(&dev_replace->rwsem); |
|---|
| 5738 | 6115 | |
|---|
| 5739 | 6116 | if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 && |
|---|
| 5740 | 6117 | !need_full_stripe(op) && dev_replace->tgtdev != NULL) { |
|---|
| .. | .. |
|---|
| 5757 | 6134 | &stripe_index); |
|---|
| 5758 | 6135 | if (!need_full_stripe(op)) |
|---|
| 5759 | 6136 | mirror_num = 1; |
|---|
| 5760 | | - } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { |
|---|
| 6137 | + } else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) { |
|---|
| 5761 | 6138 | if (need_full_stripe(op)) |
|---|
| 5762 | 6139 | num_stripes = map->num_stripes; |
|---|
| 5763 | 6140 | else if (mirror_num) |
|---|
| .. | .. |
|---|
| 5799 | 6176 | if (need_raid_map && (need_full_stripe(op) || mirror_num > 1)) { |
|---|
| 5800 | 6177 | /* push stripe_nr back to the start of the full stripe */ |
|---|
| 5801 | 6178 | stripe_nr = div64_u64(raid56_full_stripe_start, |
|---|
| 5802 | | - stripe_len * nr_data_stripes(map)); |
|---|
| 6179 | + stripe_len * data_stripes); |
|---|
| 5803 | 6180 | |
|---|
| 5804 | 6181 | /* RAID[56] write or recovery. Return all stripes */ |
|---|
| 5805 | 6182 | num_stripes = map->num_stripes; |
|---|
| .. | .. |
|---|
| 5815 | 6192 | * Mirror #3 is RAID6 Q block. |
|---|
| 5816 | 6193 | */ |
|---|
| 5817 | 6194 | stripe_nr = div_u64_rem(stripe_nr, |
|---|
| 5818 | | - nr_data_stripes(map), &stripe_index); |
|---|
| 6195 | + data_stripes, &stripe_index); |
|---|
| 5819 | 6196 | if (mirror_num > 1) |
|---|
| 5820 | | - stripe_index = nr_data_stripes(map) + |
|---|
| 5821 | | - mirror_num - 2; |
|---|
| 6197 | + stripe_index = data_stripes + mirror_num - 2; |
|---|
| 5822 | 6198 | |
|---|
| 5823 | 6199 | /* We distribute the parity blocks across stripes */ |
|---|
| 5824 | 6200 | div_u64_rem(stripe_nr + stripe_index, map->num_stripes, |
|---|
| .. | .. |
|---|
| 5858 | 6234 | ret = -ENOMEM; |
|---|
| 5859 | 6235 | goto out; |
|---|
| 5860 | 6236 | } |
|---|
| 5861 | | - if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) |
|---|
| 5862 | | - bbio->tgtdev_map = (int *)(bbio->stripes + num_alloc_stripes); |
|---|
| 6237 | + |
|---|
| 6238 | + for (i = 0; i < num_stripes; i++) { |
|---|
| 6239 | + bbio->stripes[i].physical = map->stripes[stripe_index].physical + |
|---|
| 6240 | + stripe_offset + stripe_nr * map->stripe_len; |
|---|
| 6241 | + bbio->stripes[i].dev = map->stripes[stripe_index].dev; |
|---|
| 6242 | + stripe_index++; |
|---|
| 6243 | + } |
|---|
| 5863 | 6244 | |
|---|
| 5864 | 6245 | /* build raid_map */ |
|---|
| 5865 | 6246 | if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map && |
|---|
| .. | .. |
|---|
| 5867 | 6248 | u64 tmp; |
|---|
| 5868 | 6249 | unsigned rot; |
|---|
| 5869 | 6250 | |
|---|
| 5870 | | - bbio->raid_map = (u64 *)((void *)bbio->stripes + |
|---|
| 5871 | | - sizeof(struct btrfs_bio_stripe) * |
|---|
| 5872 | | - num_alloc_stripes + |
|---|
| 5873 | | - sizeof(int) * tgtdev_indexes); |
|---|
| 5874 | | - |
|---|
| 5875 | 6251 | /* Work out the disk rotation on this stripe-set */ |
|---|
| 5876 | 6252 | div_u64_rem(stripe_nr, num_stripes, &rot); |
|---|
| 5877 | 6253 | |
|---|
| 5878 | 6254 | /* Fill in the logical address of each stripe */ |
|---|
| 5879 | | - tmp = stripe_nr * nr_data_stripes(map); |
|---|
| 5880 | | - for (i = 0; i < nr_data_stripes(map); i++) |
|---|
| 6255 | + tmp = stripe_nr * data_stripes; |
|---|
| 6256 | + for (i = 0; i < data_stripes; i++) |
|---|
| 5881 | 6257 | bbio->raid_map[(i+rot) % num_stripes] = |
|---|
| 5882 | 6258 | em->start + (tmp + i) * map->stripe_len; |
|---|
| 5883 | 6259 | |
|---|
| .. | .. |
|---|
| 5885 | 6261 | if (map->type & BTRFS_BLOCK_GROUP_RAID6) |
|---|
| 5886 | 6262 | bbio->raid_map[(i+rot+1) % num_stripes] = |
|---|
| 5887 | 6263 | RAID6_Q_STRIPE; |
|---|
| 5888 | | - } |
|---|
| 5889 | 6264 | |
|---|
| 5890 | | - |
|---|
| 5891 | | - for (i = 0; i < num_stripes; i++) { |
|---|
| 5892 | | - bbio->stripes[i].physical = |
|---|
| 5893 | | - map->stripes[stripe_index].physical + |
|---|
| 5894 | | - stripe_offset + |
|---|
| 5895 | | - stripe_nr * map->stripe_len; |
|---|
| 5896 | | - bbio->stripes[i].dev = |
|---|
| 5897 | | - map->stripes[stripe_index].dev; |
|---|
| 5898 | | - stripe_index++; |
|---|
| 6265 | + sort_parity_stripes(bbio, num_stripes); |
|---|
| 5899 | 6266 | } |
|---|
| 5900 | 6267 | |
|---|
| 5901 | 6268 | if (need_full_stripe(op)) |
|---|
| 5902 | 6269 | max_errors = btrfs_chunk_max_errors(map); |
|---|
| 5903 | | - |
|---|
| 5904 | | - if (bbio->raid_map) |
|---|
| 5905 | | - sort_parity_stripes(bbio, num_stripes); |
|---|
| 5906 | 6270 | |
|---|
| 5907 | 6271 | if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL && |
|---|
| 5908 | 6272 | need_full_stripe(op)) { |
|---|
| .. | .. |
|---|
| 5929 | 6293 | } |
|---|
| 5930 | 6294 | out: |
|---|
| 5931 | 6295 | if (dev_replace_is_ongoing) { |
|---|
| 5932 | | - btrfs_dev_replace_clear_lock_blocking(dev_replace); |
|---|
| 5933 | | - btrfs_dev_replace_read_unlock(dev_replace); |
|---|
| 6296 | + lockdep_assert_held(&dev_replace->rwsem); |
|---|
| 6297 | + /* Unlock and let waiting writers proceed */ |
|---|
| 6298 | + up_read(&dev_replace->rwsem); |
|---|
| 5934 | 6299 | } |
|---|
| 5935 | 6300 | free_extent_map(em); |
|---|
| 5936 | 6301 | return ret; |
|---|
| .. | .. |
|---|
| 5940 | 6305 | u64 logical, u64 *length, |
|---|
| 5941 | 6306 | struct btrfs_bio **bbio_ret, int mirror_num) |
|---|
| 5942 | 6307 | { |
|---|
| 6308 | + if (op == BTRFS_MAP_DISCARD) |
|---|
| 6309 | + return __btrfs_map_block_for_discard(fs_info, logical, |
|---|
| 6310 | + length, bbio_ret); |
|---|
| 6311 | + |
|---|
| 5943 | 6312 | return __btrfs_map_block(fs_info, op, logical, length, bbio_ret, |
|---|
| 5944 | 6313 | mirror_num, 0); |
|---|
| 5945 | 6314 | } |
|---|
| .. | .. |
|---|
| 5950 | 6319 | struct btrfs_bio **bbio_ret) |
|---|
| 5951 | 6320 | { |
|---|
| 5952 | 6321 | return __btrfs_map_block(fs_info, op, logical, length, bbio_ret, 0, 1); |
|---|
| 5953 | | -} |
|---|
| 5954 | | - |
|---|
| 5955 | | -int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start, |
|---|
| 5956 | | - u64 physical, u64 **logical, int *naddrs, int *stripe_len) |
|---|
| 5957 | | -{ |
|---|
| 5958 | | - struct extent_map *em; |
|---|
| 5959 | | - struct map_lookup *map; |
|---|
| 5960 | | - u64 *buf; |
|---|
| 5961 | | - u64 bytenr; |
|---|
| 5962 | | - u64 length; |
|---|
| 5963 | | - u64 stripe_nr; |
|---|
| 5964 | | - u64 rmap_len; |
|---|
| 5965 | | - int i, j, nr = 0; |
|---|
| 5966 | | - |
|---|
| 5967 | | - em = get_chunk_map(fs_info, chunk_start, 1); |
|---|
| 5968 | | - if (IS_ERR(em)) |
|---|
| 5969 | | - return -EIO; |
|---|
| 5970 | | - |
|---|
| 5971 | | - map = em->map_lookup; |
|---|
| 5972 | | - length = em->len; |
|---|
| 5973 | | - rmap_len = map->stripe_len; |
|---|
| 5974 | | - |
|---|
| 5975 | | - if (map->type & BTRFS_BLOCK_GROUP_RAID10) |
|---|
| 5976 | | - length = div_u64(length, map->num_stripes / map->sub_stripes); |
|---|
| 5977 | | - else if (map->type & BTRFS_BLOCK_GROUP_RAID0) |
|---|
| 5978 | | - length = div_u64(length, map->num_stripes); |
|---|
| 5979 | | - else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { |
|---|
| 5980 | | - length = div_u64(length, nr_data_stripes(map)); |
|---|
| 5981 | | - rmap_len = map->stripe_len * nr_data_stripes(map); |
|---|
| 5982 | | - } |
|---|
| 5983 | | - |
|---|
| 5984 | | - buf = kcalloc(map->num_stripes, sizeof(u64), GFP_NOFS); |
|---|
| 5985 | | - BUG_ON(!buf); /* -ENOMEM */ |
|---|
| 5986 | | - |
|---|
| 5987 | | - for (i = 0; i < map->num_stripes; i++) { |
|---|
| 5988 | | - if (map->stripes[i].physical > physical || |
|---|
| 5989 | | - map->stripes[i].physical + length <= physical) |
|---|
| 5990 | | - continue; |
|---|
| 5991 | | - |
|---|
| 5992 | | - stripe_nr = physical - map->stripes[i].physical; |
|---|
| 5993 | | - stripe_nr = div64_u64(stripe_nr, map->stripe_len); |
|---|
| 5994 | | - |
|---|
| 5995 | | - if (map->type & BTRFS_BLOCK_GROUP_RAID10) { |
|---|
| 5996 | | - stripe_nr = stripe_nr * map->num_stripes + i; |
|---|
| 5997 | | - stripe_nr = div_u64(stripe_nr, map->sub_stripes); |
|---|
| 5998 | | - } else if (map->type & BTRFS_BLOCK_GROUP_RAID0) { |
|---|
| 5999 | | - stripe_nr = stripe_nr * map->num_stripes + i; |
|---|
| 6000 | | - } /* else if RAID[56], multiply by nr_data_stripes(). |
|---|
| 6001 | | - * Alternatively, just use rmap_len below instead of |
|---|
| 6002 | | - * map->stripe_len */ |
|---|
| 6003 | | - |
|---|
| 6004 | | - bytenr = chunk_start + stripe_nr * rmap_len; |
|---|
| 6005 | | - WARN_ON(nr >= map->num_stripes); |
|---|
| 6006 | | - for (j = 0; j < nr; j++) { |
|---|
| 6007 | | - if (buf[j] == bytenr) |
|---|
| 6008 | | - break; |
|---|
| 6009 | | - } |
|---|
| 6010 | | - if (j == nr) { |
|---|
| 6011 | | - WARN_ON(nr >= map->num_stripes); |
|---|
| 6012 | | - buf[nr++] = bytenr; |
|---|
| 6013 | | - } |
|---|
| 6014 | | - } |
|---|
| 6015 | | - |
|---|
| 6016 | | - *logical = buf; |
|---|
| 6017 | | - *naddrs = nr; |
|---|
| 6018 | | - *stripe_len = rmap_len; |
|---|
| 6019 | | - |
|---|
| 6020 | | - free_extent_map(em); |
|---|
| 6021 | | - return 0; |
|---|
| 6022 | 6322 | } |
|---|
| 6023 | 6323 | |
|---|
| 6024 | 6324 | static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio) |
|---|
| .. | .. |
|---|
| 6039 | 6339 | atomic_inc(&bbio->error); |
|---|
| 6040 | 6340 | if (bio->bi_status == BLK_STS_IOERR || |
|---|
| 6041 | 6341 | bio->bi_status == BLK_STS_TARGET) { |
|---|
| 6042 | | - unsigned int stripe_index = |
|---|
| 6043 | | - btrfs_io_bio(bio)->stripe_index; |
|---|
| 6044 | | - struct btrfs_device *dev; |
|---|
| 6342 | + struct btrfs_device *dev = btrfs_io_bio(bio)->device; |
|---|
| 6045 | 6343 | |
|---|
| 6046 | | - BUG_ON(stripe_index >= bbio->num_stripes); |
|---|
| 6047 | | - dev = bbio->stripes[stripe_index].dev; |
|---|
| 6048 | | - if (dev->bdev) { |
|---|
| 6049 | | - if (bio_op(bio) == REQ_OP_WRITE) |
|---|
| 6050 | | - btrfs_dev_stat_inc_and_print(dev, |
|---|
| 6344 | + ASSERT(dev->bdev); |
|---|
| 6345 | + if (bio_op(bio) == REQ_OP_WRITE) |
|---|
| 6346 | + btrfs_dev_stat_inc_and_print(dev, |
|---|
| 6051 | 6347 | BTRFS_DEV_STAT_WRITE_ERRS); |
|---|
| 6052 | | - else if (!(bio->bi_opf & REQ_RAHEAD)) |
|---|
| 6053 | | - btrfs_dev_stat_inc_and_print(dev, |
|---|
| 6348 | + else if (!(bio->bi_opf & REQ_RAHEAD)) |
|---|
| 6349 | + btrfs_dev_stat_inc_and_print(dev, |
|---|
| 6054 | 6350 | BTRFS_DEV_STAT_READ_ERRS); |
|---|
| 6055 | | - if (bio->bi_opf & REQ_PREFLUSH) |
|---|
| 6056 | | - btrfs_dev_stat_inc_and_print(dev, |
|---|
| 6351 | + if (bio->bi_opf & REQ_PREFLUSH) |
|---|
| 6352 | + btrfs_dev_stat_inc_and_print(dev, |
|---|
| 6057 | 6353 | BTRFS_DEV_STAT_FLUSH_ERRS); |
|---|
| 6058 | | - } |
|---|
| 6059 | 6354 | } |
|---|
| 6060 | 6355 | } |
|---|
| 6061 | 6356 | |
|---|
| .. | .. |
|---|
| 6090 | 6385 | } |
|---|
| 6091 | 6386 | } |
|---|
| 6092 | 6387 | |
|---|
| 6093 | | -/* |
|---|
| 6094 | | - * see run_scheduled_bios for a description of why bios are collected for |
|---|
| 6095 | | - * async submit. |
|---|
| 6096 | | - * |
|---|
| 6097 | | - * This will add one bio to the pending list for a device and make sure |
|---|
| 6098 | | - * the work struct is scheduled. |
|---|
| 6099 | | - */ |
|---|
| 6100 | | -static noinline void btrfs_schedule_bio(struct btrfs_device *device, |
|---|
| 6101 | | - struct bio *bio) |
|---|
| 6102 | | -{ |
|---|
| 6103 | | - struct btrfs_fs_info *fs_info = device->fs_info; |
|---|
| 6104 | | - int should_queue = 1; |
|---|
| 6105 | | - struct btrfs_pending_bios *pending_bios; |
|---|
| 6106 | | - |
|---|
| 6107 | | - /* don't bother with additional async steps for reads, right now */ |
|---|
| 6108 | | - if (bio_op(bio) == REQ_OP_READ) { |
|---|
| 6109 | | - btrfsic_submit_bio(bio); |
|---|
| 6110 | | - return; |
|---|
| 6111 | | - } |
|---|
| 6112 | | - |
|---|
| 6113 | | - WARN_ON(bio->bi_next); |
|---|
| 6114 | | - bio->bi_next = NULL; |
|---|
| 6115 | | - |
|---|
| 6116 | | - spin_lock(&device->io_lock); |
|---|
| 6117 | | - if (op_is_sync(bio->bi_opf)) |
|---|
| 6118 | | - pending_bios = &device->pending_sync_bios; |
|---|
| 6119 | | - else |
|---|
| 6120 | | - pending_bios = &device->pending_bios; |
|---|
| 6121 | | - |
|---|
| 6122 | | - if (pending_bios->tail) |
|---|
| 6123 | | - pending_bios->tail->bi_next = bio; |
|---|
| 6124 | | - |
|---|
| 6125 | | - pending_bios->tail = bio; |
|---|
| 6126 | | - if (!pending_bios->head) |
|---|
| 6127 | | - pending_bios->head = bio; |
|---|
| 6128 | | - if (device->running_pending) |
|---|
| 6129 | | - should_queue = 0; |
|---|
| 6130 | | - |
|---|
| 6131 | | - spin_unlock(&device->io_lock); |
|---|
| 6132 | | - |
|---|
| 6133 | | - if (should_queue) |
|---|
| 6134 | | - btrfs_queue_work(fs_info->submit_workers, &device->work); |
|---|
| 6135 | | -} |
|---|
| 6136 | | - |
|---|
| 6137 | 6388 | static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio, |
|---|
| 6138 | | - u64 physical, int dev_nr, int async) |
|---|
| 6389 | + u64 physical, struct btrfs_device *dev) |
|---|
| 6139 | 6390 | { |
|---|
| 6140 | | - struct btrfs_device *dev = bbio->stripes[dev_nr].dev; |
|---|
| 6141 | 6391 | struct btrfs_fs_info *fs_info = bbio->fs_info; |
|---|
| 6142 | 6392 | |
|---|
| 6143 | 6393 | bio->bi_private = bbio; |
|---|
| 6144 | | - btrfs_io_bio(bio)->stripe_index = dev_nr; |
|---|
| 6394 | + btrfs_io_bio(bio)->device = dev; |
|---|
| 6145 | 6395 | bio->bi_end_io = btrfs_end_bio; |
|---|
| 6146 | 6396 | bio->bi_iter.bi_sector = physical >> 9; |
|---|
| 6147 | 6397 | btrfs_debug_in_rcu(fs_info, |
|---|
| 6148 | 6398 | "btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u", |
|---|
| 6149 | 6399 | bio_op(bio), bio->bi_opf, (u64)bio->bi_iter.bi_sector, |
|---|
| 6150 | | - (u_long)dev->bdev->bd_dev, rcu_str_deref(dev->name), dev->devid, |
|---|
| 6151 | | - bio->bi_iter.bi_size); |
|---|
| 6400 | + (unsigned long)dev->bdev->bd_dev, rcu_str_deref(dev->name), |
|---|
| 6401 | + dev->devid, bio->bi_iter.bi_size); |
|---|
| 6152 | 6402 | bio_set_dev(bio, dev->bdev); |
|---|
| 6153 | 6403 | |
|---|
| 6154 | 6404 | btrfs_bio_counter_inc_noblocked(fs_info); |
|---|
| 6155 | 6405 | |
|---|
| 6156 | | - if (async) |
|---|
| 6157 | | - btrfs_schedule_bio(dev, bio); |
|---|
| 6158 | | - else |
|---|
| 6159 | | - btrfsic_submit_bio(bio); |
|---|
| 6406 | + btrfsic_submit_bio(bio); |
|---|
| 6160 | 6407 | } |
|---|
| 6161 | 6408 | |
|---|
| 6162 | 6409 | static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical) |
|---|
| .. | .. |
|---|
| 6177 | 6424 | } |
|---|
| 6178 | 6425 | |
|---|
| 6179 | 6426 | blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, |
|---|
| 6180 | | - int mirror_num, int async_submit) |
|---|
| 6427 | + int mirror_num) |
|---|
| 6181 | 6428 | { |
|---|
| 6182 | 6429 | struct btrfs_device *dev; |
|---|
| 6183 | 6430 | struct bio *first_bio = bio; |
|---|
| .. | .. |
|---|
| 6245 | 6492 | else |
|---|
| 6246 | 6493 | bio = first_bio; |
|---|
| 6247 | 6494 | |
|---|
| 6248 | | - submit_stripe_bio(bbio, bio, bbio->stripes[dev_nr].physical, |
|---|
| 6249 | | - dev_nr, async_submit); |
|---|
| 6495 | + submit_stripe_bio(bbio, bio, bbio->stripes[dev_nr].physical, dev); |
|---|
| 6250 | 6496 | } |
|---|
| 6251 | 6497 | btrfs_bio_counter_dec(fs_info); |
|---|
| 6252 | 6498 | return BLK_STS_OK; |
|---|
| .. | .. |
|---|
| 6262 | 6508 | * If @seed is true, traverse through the seed devices. |
|---|
| 6263 | 6509 | */ |
|---|
| 6264 | 6510 | struct btrfs_device *btrfs_find_device(struct btrfs_fs_devices *fs_devices, |
|---|
| 6265 | | - u64 devid, u8 *uuid, u8 *fsid, |
|---|
| 6266 | | - bool seed) |
|---|
| 6511 | + u64 devid, u8 *uuid, u8 *fsid, |
|---|
| 6512 | + bool seed) |
|---|
| 6267 | 6513 | { |
|---|
| 6268 | 6514 | struct btrfs_device *device; |
|---|
| 6515 | + struct btrfs_fs_devices *seed_devs; |
|---|
| 6269 | 6516 | |
|---|
| 6270 | | - while (fs_devices) { |
|---|
| 6517 | + if (!fsid || !memcmp(fs_devices->metadata_uuid, fsid, BTRFS_FSID_SIZE)) { |
|---|
| 6518 | + list_for_each_entry(device, &fs_devices->devices, dev_list) { |
|---|
| 6519 | + if (device->devid == devid && |
|---|
| 6520 | + (!uuid || memcmp(device->uuid, uuid, |
|---|
| 6521 | + BTRFS_UUID_SIZE) == 0)) |
|---|
| 6522 | + return device; |
|---|
| 6523 | + } |
|---|
| 6524 | + } |
|---|
| 6525 | + |
|---|
| 6526 | + list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list) { |
|---|
| 6271 | 6527 | if (!fsid || |
|---|
| 6272 | | - !memcmp(fs_devices->fsid, fsid, BTRFS_FSID_SIZE)) { |
|---|
| 6273 | | - list_for_each_entry(device, &fs_devices->devices, |
|---|
| 6528 | + !memcmp(seed_devs->metadata_uuid, fsid, BTRFS_FSID_SIZE)) { |
|---|
| 6529 | + list_for_each_entry(device, &seed_devs->devices, |
|---|
| 6274 | 6530 | dev_list) { |
|---|
| 6275 | 6531 | if (device->devid == devid && |
|---|
| 6276 | 6532 | (!uuid || memcmp(device->uuid, uuid, |
|---|
| .. | .. |
|---|
| 6278 | 6534 | return device; |
|---|
| 6279 | 6535 | } |
|---|
| 6280 | 6536 | } |
|---|
| 6281 | | - if (seed) |
|---|
| 6282 | | - fs_devices = fs_devices->seed; |
|---|
| 6283 | | - else |
|---|
| 6284 | | - return NULL; |
|---|
| 6285 | 6537 | } |
|---|
| 6538 | + |
|---|
| 6286 | 6539 | return NULL; |
|---|
| 6287 | 6540 | } |
|---|
| 6288 | 6541 | |
|---|
| .. | .. |
|---|
| 6337 | 6590 | if (WARN_ON(!devid && !fs_info)) |
|---|
| 6338 | 6591 | return ERR_PTR(-EINVAL); |
|---|
| 6339 | 6592 | |
|---|
| 6340 | | - dev = __alloc_device(); |
|---|
| 6593 | + dev = __alloc_device(fs_info); |
|---|
| 6341 | 6594 | if (IS_ERR(dev)) |
|---|
| 6342 | 6595 | return dev; |
|---|
| 6343 | 6596 | |
|---|
| .. | .. |
|---|
| 6359 | 6612 | else |
|---|
| 6360 | 6613 | generate_random_uuid(dev->uuid); |
|---|
| 6361 | 6614 | |
|---|
| 6362 | | - btrfs_init_work(&dev->work, btrfs_submit_helper, |
|---|
| 6363 | | - pending_bios_fn, NULL, NULL); |
|---|
| 6364 | | - |
|---|
| 6365 | 6615 | return dev; |
|---|
| 6366 | 6616 | } |
|---|
| 6367 | 6617 | |
|---|
| .. | .. |
|---|
| 6376 | 6626 | devid, uuid); |
|---|
| 6377 | 6627 | } |
|---|
| 6378 | 6628 | |
|---|
| 6379 | | -static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key, |
|---|
| 6380 | | - struct extent_buffer *leaf, |
|---|
| 6629 | +static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes) |
|---|
| 6630 | +{ |
|---|
| 6631 | + int index = btrfs_bg_flags_to_raid_index(type); |
|---|
| 6632 | + int ncopies = btrfs_raid_array[index].ncopies; |
|---|
| 6633 | + const int nparity = btrfs_raid_array[index].nparity; |
|---|
| 6634 | + int data_stripes; |
|---|
| 6635 | + |
|---|
| 6636 | + if (nparity) |
|---|
| 6637 | + data_stripes = num_stripes - nparity; |
|---|
| 6638 | + else |
|---|
| 6639 | + data_stripes = num_stripes / ncopies; |
|---|
| 6640 | + |
|---|
| 6641 | + return div_u64(chunk_len, data_stripes); |
|---|
| 6642 | +} |
|---|
| 6643 | + |
|---|
| 6644 | +static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf, |
|---|
| 6381 | 6645 | struct btrfs_chunk *chunk) |
|---|
| 6382 | 6646 | { |
|---|
| 6383 | | - struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; |
|---|
| 6647 | + struct btrfs_fs_info *fs_info = leaf->fs_info; |
|---|
| 6648 | + struct extent_map_tree *map_tree = &fs_info->mapping_tree; |
|---|
| 6384 | 6649 | struct map_lookup *map; |
|---|
| 6385 | 6650 | struct extent_map *em; |
|---|
| 6386 | 6651 | u64 logical; |
|---|
| .. | .. |
|---|
| 6400 | 6665 | * as chunk item in tree block is already verified by tree-checker. |
|---|
| 6401 | 6666 | */ |
|---|
| 6402 | 6667 | if (leaf->start == BTRFS_SUPER_INFO_OFFSET) { |
|---|
| 6403 | | - ret = btrfs_check_chunk_valid(fs_info, leaf, chunk, logical); |
|---|
| 6668 | + ret = btrfs_check_chunk_valid(leaf, chunk, logical); |
|---|
| 6404 | 6669 | if (ret) |
|---|
| 6405 | 6670 | return ret; |
|---|
| 6406 | 6671 | } |
|---|
| 6407 | 6672 | |
|---|
| 6408 | | - read_lock(&map_tree->map_tree.lock); |
|---|
| 6409 | | - em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); |
|---|
| 6410 | | - read_unlock(&map_tree->map_tree.lock); |
|---|
| 6673 | + read_lock(&map_tree->lock); |
|---|
| 6674 | + em = lookup_extent_mapping(map_tree, logical, 1); |
|---|
| 6675 | + read_unlock(&map_tree->lock); |
|---|
| 6411 | 6676 | |
|---|
| 6412 | 6677 | /* already mapped? */ |
|---|
| 6413 | 6678 | if (em && em->start <= logical && em->start + em->len > logical) { |
|---|
| .. | .. |
|---|
| 6441 | 6706 | map->type = btrfs_chunk_type(leaf, chunk); |
|---|
| 6442 | 6707 | map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); |
|---|
| 6443 | 6708 | map->verified_stripes = 0; |
|---|
| 6709 | + em->orig_block_len = calc_stripe_length(map->type, em->len, |
|---|
| 6710 | + map->num_stripes); |
|---|
| 6444 | 6711 | for (i = 0; i < num_stripes; i++) { |
|---|
| 6445 | 6712 | map->stripes[i].physical = |
|---|
| 6446 | 6713 | btrfs_stripe_offset_nr(leaf, chunk, i); |
|---|
| .. | .. |
|---|
| 6449 | 6716 | btrfs_stripe_dev_uuid_nr(chunk, i), |
|---|
| 6450 | 6717 | BTRFS_UUID_SIZE); |
|---|
| 6451 | 6718 | map->stripes[i].dev = btrfs_find_device(fs_info->fs_devices, |
|---|
| 6452 | | - devid, uuid, NULL, true); |
|---|
| 6719 | + devid, uuid, NULL, true); |
|---|
| 6453 | 6720 | if (!map->stripes[i].dev && |
|---|
| 6454 | 6721 | !btrfs_test_opt(fs_info, DEGRADED)) { |
|---|
| 6455 | 6722 | free_extent_map(em); |
|---|
| .. | .. |
|---|
| 6474 | 6741 | |
|---|
| 6475 | 6742 | } |
|---|
| 6476 | 6743 | |
|---|
| 6477 | | - write_lock(&map_tree->map_tree.lock); |
|---|
| 6478 | | - ret = add_extent_mapping(&map_tree->map_tree, em, 0); |
|---|
| 6479 | | - write_unlock(&map_tree->map_tree.lock); |
|---|
| 6744 | + write_lock(&map_tree->lock); |
|---|
| 6745 | + ret = add_extent_mapping(map_tree, em, 0); |
|---|
| 6746 | + write_unlock(&map_tree->lock); |
|---|
| 6480 | 6747 | if (ret < 0) { |
|---|
| 6481 | 6748 | btrfs_err(fs_info, |
|---|
| 6482 | 6749 | "failed to add chunk map, start=%llu len=%llu: %d", |
|---|
| .. | .. |
|---|
| 6519 | 6786 | lockdep_assert_held(&uuid_mutex); |
|---|
| 6520 | 6787 | ASSERT(fsid); |
|---|
| 6521 | 6788 | |
|---|
| 6522 | | - fs_devices = fs_info->fs_devices->seed; |
|---|
| 6523 | | - while (fs_devices) { |
|---|
| 6789 | + /* This will match only for multi-device seed fs */ |
|---|
| 6790 | + list_for_each_entry(fs_devices, &fs_info->fs_devices->seed_list, seed_list) |
|---|
| 6524 | 6791 | if (!memcmp(fs_devices->fsid, fsid, BTRFS_FSID_SIZE)) |
|---|
| 6525 | 6792 | return fs_devices; |
|---|
| 6526 | 6793 | |
|---|
| 6527 | | - fs_devices = fs_devices->seed; |
|---|
| 6528 | | - } |
|---|
| 6529 | 6794 | |
|---|
| 6530 | | - fs_devices = find_fsid(fsid); |
|---|
| 6795 | + fs_devices = find_fsid(fsid, NULL); |
|---|
| 6531 | 6796 | if (!fs_devices) { |
|---|
| 6532 | 6797 | if (!btrfs_test_opt(fs_info, DEGRADED)) |
|---|
| 6533 | 6798 | return ERR_PTR(-ENOENT); |
|---|
| 6534 | 6799 | |
|---|
| 6535 | | - fs_devices = alloc_fs_devices(fsid); |
|---|
| 6800 | + fs_devices = alloc_fs_devices(fsid, NULL); |
|---|
| 6536 | 6801 | if (IS_ERR(fs_devices)) |
|---|
| 6537 | 6802 | return fs_devices; |
|---|
| 6538 | 6803 | |
|---|
| 6539 | | - fs_devices->seeding = 1; |
|---|
| 6804 | + fs_devices->seeding = true; |
|---|
| 6540 | 6805 | fs_devices->opened = 1; |
|---|
| 6541 | 6806 | return fs_devices; |
|---|
| 6542 | 6807 | } |
|---|
| 6543 | 6808 | |
|---|
| 6809 | + /* |
|---|
| 6810 | + * Upon first call for a seed fs fsid, just create a private copy of the |
|---|
| 6811 | + * respective fs_devices and anchor it at fs_info->fs_devices->seed_list |
|---|
| 6812 | + */ |
|---|
| 6544 | 6813 | fs_devices = clone_fs_devices(fs_devices); |
|---|
| 6545 | 6814 | if (IS_ERR(fs_devices)) |
|---|
| 6546 | 6815 | return fs_devices; |
|---|
| .. | .. |
|---|
| 6548 | 6817 | ret = open_fs_devices(fs_devices, FMODE_READ, fs_info->bdev_holder); |
|---|
| 6549 | 6818 | if (ret) { |
|---|
| 6550 | 6819 | free_fs_devices(fs_devices); |
|---|
| 6551 | | - fs_devices = ERR_PTR(ret); |
|---|
| 6552 | | - goto out; |
|---|
| 6820 | + return ERR_PTR(ret); |
|---|
| 6553 | 6821 | } |
|---|
| 6554 | 6822 | |
|---|
| 6555 | 6823 | if (!fs_devices->seeding) { |
|---|
| 6556 | 6824 | close_fs_devices(fs_devices); |
|---|
| 6557 | 6825 | free_fs_devices(fs_devices); |
|---|
| 6558 | | - fs_devices = ERR_PTR(-EINVAL); |
|---|
| 6559 | | - goto out; |
|---|
| 6826 | + return ERR_PTR(-EINVAL); |
|---|
| 6560 | 6827 | } |
|---|
| 6561 | 6828 | |
|---|
| 6562 | | - fs_devices->seed = fs_info->fs_devices->seed; |
|---|
| 6563 | | - fs_info->fs_devices->seed = fs_devices; |
|---|
| 6564 | | -out: |
|---|
| 6829 | + list_add(&fs_devices->seed_list, &fs_info->fs_devices->seed_list); |
|---|
| 6830 | + |
|---|
| 6565 | 6831 | return fs_devices; |
|---|
| 6566 | 6832 | } |
|---|
| 6567 | 6833 | |
|---|
| 6568 | | -static int read_one_dev(struct btrfs_fs_info *fs_info, |
|---|
| 6569 | | - struct extent_buffer *leaf, |
|---|
| 6834 | +static int read_one_dev(struct extent_buffer *leaf, |
|---|
| 6570 | 6835 | struct btrfs_dev_item *dev_item) |
|---|
| 6571 | 6836 | { |
|---|
| 6837 | + struct btrfs_fs_info *fs_info = leaf->fs_info; |
|---|
| 6572 | 6838 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; |
|---|
| 6573 | 6839 | struct btrfs_device *device; |
|---|
| 6574 | 6840 | u64 devid; |
|---|
| .. | .. |
|---|
| 6582 | 6848 | read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item), |
|---|
| 6583 | 6849 | BTRFS_FSID_SIZE); |
|---|
| 6584 | 6850 | |
|---|
| 6585 | | - if (memcmp(fs_uuid, fs_info->fsid, BTRFS_FSID_SIZE)) { |
|---|
| 6851 | + if (memcmp(fs_uuid, fs_devices->metadata_uuid, BTRFS_FSID_SIZE)) { |
|---|
| 6586 | 6852 | fs_devices = open_seed_devices(fs_info, fs_uuid); |
|---|
| 6587 | 6853 | if (IS_ERR(fs_devices)) |
|---|
| 6588 | 6854 | return PTR_ERR(fs_devices); |
|---|
| .. | .. |
|---|
| 6725 | 6991 | sb_array_offset += len; |
|---|
| 6726 | 6992 | cur_offset += len; |
|---|
| 6727 | 6993 | |
|---|
| 6728 | | - if (key.type == BTRFS_CHUNK_ITEM_KEY) { |
|---|
| 6729 | | - chunk = (struct btrfs_chunk *)sb_array_offset; |
|---|
| 6730 | | - /* |
|---|
| 6731 | | - * At least one btrfs_chunk with one stripe must be |
|---|
| 6732 | | - * present, exact stripe count check comes afterwards |
|---|
| 6733 | | - */ |
|---|
| 6734 | | - len = btrfs_chunk_item_size(1); |
|---|
| 6735 | | - if (cur_offset + len > array_size) |
|---|
| 6736 | | - goto out_short_read; |
|---|
| 6737 | | - |
|---|
| 6738 | | - num_stripes = btrfs_chunk_num_stripes(sb, chunk); |
|---|
| 6739 | | - if (!num_stripes) { |
|---|
| 6740 | | - btrfs_err(fs_info, |
|---|
| 6741 | | - "invalid number of stripes %u in sys_array at offset %u", |
|---|
| 6742 | | - num_stripes, cur_offset); |
|---|
| 6743 | | - ret = -EIO; |
|---|
| 6744 | | - break; |
|---|
| 6745 | | - } |
|---|
| 6746 | | - |
|---|
| 6747 | | - type = btrfs_chunk_type(sb, chunk); |
|---|
| 6748 | | - if ((type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) { |
|---|
| 6749 | | - btrfs_err(fs_info, |
|---|
| 6750 | | - "invalid chunk type %llu in sys_array at offset %u", |
|---|
| 6751 | | - type, cur_offset); |
|---|
| 6752 | | - ret = -EIO; |
|---|
| 6753 | | - break; |
|---|
| 6754 | | - } |
|---|
| 6755 | | - |
|---|
| 6756 | | - len = btrfs_chunk_item_size(num_stripes); |
|---|
| 6757 | | - if (cur_offset + len > array_size) |
|---|
| 6758 | | - goto out_short_read; |
|---|
| 6759 | | - |
|---|
| 6760 | | - ret = read_one_chunk(fs_info, &key, sb, chunk); |
|---|
| 6761 | | - if (ret) |
|---|
| 6762 | | - break; |
|---|
| 6763 | | - } else { |
|---|
| 6994 | + if (key.type != BTRFS_CHUNK_ITEM_KEY) { |
|---|
| 6764 | 6995 | btrfs_err(fs_info, |
|---|
| 6765 | 6996 | "unexpected item type %u in sys_array at offset %u", |
|---|
| 6766 | 6997 | (u32)key.type, cur_offset); |
|---|
| 6767 | 6998 | ret = -EIO; |
|---|
| 6768 | 6999 | break; |
|---|
| 6769 | 7000 | } |
|---|
| 7001 | + |
|---|
| 7002 | + chunk = (struct btrfs_chunk *)sb_array_offset; |
|---|
| 7003 | + /* |
|---|
| 7004 | + * At least one btrfs_chunk with one stripe must be present, |
|---|
| 7005 | + * exact stripe count check comes afterwards |
|---|
| 7006 | + */ |
|---|
| 7007 | + len = btrfs_chunk_item_size(1); |
|---|
| 7008 | + if (cur_offset + len > array_size) |
|---|
| 7009 | + goto out_short_read; |
|---|
| 7010 | + |
|---|
| 7011 | + num_stripes = btrfs_chunk_num_stripes(sb, chunk); |
|---|
| 7012 | + if (!num_stripes) { |
|---|
| 7013 | + btrfs_err(fs_info, |
|---|
| 7014 | + "invalid number of stripes %u in sys_array at offset %u", |
|---|
| 7015 | + num_stripes, cur_offset); |
|---|
| 7016 | + ret = -EIO; |
|---|
| 7017 | + break; |
|---|
| 7018 | + } |
|---|
| 7019 | + |
|---|
| 7020 | + type = btrfs_chunk_type(sb, chunk); |
|---|
| 7021 | + if ((type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) { |
|---|
| 7022 | + btrfs_err(fs_info, |
|---|
| 7023 | + "invalid chunk type %llu in sys_array at offset %u", |
|---|
| 7024 | + type, cur_offset); |
|---|
| 7025 | + ret = -EIO; |
|---|
| 7026 | + break; |
|---|
| 7027 | + } |
|---|
| 7028 | + |
|---|
| 7029 | + len = btrfs_chunk_item_size(num_stripes); |
|---|
| 7030 | + if (cur_offset + len > array_size) |
|---|
| 7031 | + goto out_short_read; |
|---|
| 7032 | + |
|---|
| 7033 | + ret = read_one_chunk(&key, sb, chunk); |
|---|
| 7034 | + if (ret) |
|---|
| 7035 | + break; |
|---|
| 7036 | + |
|---|
| 6770 | 7037 | array_ptr += len; |
|---|
| 6771 | 7038 | sb_array_offset += len; |
|---|
| 6772 | 7039 | cur_offset += len; |
|---|
| .. | .. |
|---|
| 6794 | 7061 | bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info, |
|---|
| 6795 | 7062 | struct btrfs_device *failing_dev) |
|---|
| 6796 | 7063 | { |
|---|
| 6797 | | - struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; |
|---|
| 7064 | + struct extent_map_tree *map_tree = &fs_info->mapping_tree; |
|---|
| 6798 | 7065 | struct extent_map *em; |
|---|
| 6799 | 7066 | u64 next_start = 0; |
|---|
| 6800 | 7067 | bool ret = true; |
|---|
| 6801 | 7068 | |
|---|
| 6802 | | - read_lock(&map_tree->map_tree.lock); |
|---|
| 6803 | | - em = lookup_extent_mapping(&map_tree->map_tree, 0, (u64)-1); |
|---|
| 6804 | | - read_unlock(&map_tree->map_tree.lock); |
|---|
| 7069 | + read_lock(&map_tree->lock); |
|---|
| 7070 | + em = lookup_extent_mapping(map_tree, 0, (u64)-1); |
|---|
| 7071 | + read_unlock(&map_tree->lock); |
|---|
| 6805 | 7072 | /* No chunk at all? Return false anyway */ |
|---|
| 6806 | 7073 | if (!em) { |
|---|
| 6807 | 7074 | ret = false; |
|---|
| .. | .. |
|---|
| 6830 | 7097 | if (missing > max_tolerated) { |
|---|
| 6831 | 7098 | if (!failing_dev) |
|---|
| 6832 | 7099 | btrfs_warn(fs_info, |
|---|
| 6833 | | - "chunk %llu missing %d devices, max tolerance is %d for writeable mount", |
|---|
| 7100 | + "chunk %llu missing %d devices, max tolerance is %d for writable mount", |
|---|
| 6834 | 7101 | em->start, missing, max_tolerated); |
|---|
| 6835 | 7102 | free_extent_map(em); |
|---|
| 6836 | 7103 | ret = false; |
|---|
| .. | .. |
|---|
| 6839 | 7106 | next_start = extent_map_end(em); |
|---|
| 6840 | 7107 | free_extent_map(em); |
|---|
| 6841 | 7108 | |
|---|
| 6842 | | - read_lock(&map_tree->map_tree.lock); |
|---|
| 6843 | | - em = lookup_extent_mapping(&map_tree->map_tree, next_start, |
|---|
| 7109 | + read_lock(&map_tree->lock); |
|---|
| 7110 | + em = lookup_extent_mapping(map_tree, next_start, |
|---|
| 6844 | 7111 | (u64)(-1) - next_start); |
|---|
| 6845 | | - read_unlock(&map_tree->map_tree.lock); |
|---|
| 7112 | + read_unlock(&map_tree->lock); |
|---|
| 6846 | 7113 | } |
|---|
| 6847 | 7114 | out: |
|---|
| 6848 | 7115 | return ret; |
|---|
| 7116 | +} |
|---|
| 7117 | + |
|---|
| 7118 | +static void readahead_tree_node_children(struct extent_buffer *node) |
|---|
| 7119 | +{ |
|---|
| 7120 | + int i; |
|---|
| 7121 | + const int nr_items = btrfs_header_nritems(node); |
|---|
| 7122 | + |
|---|
| 7123 | + for (i = 0; i < nr_items; i++) { |
|---|
| 7124 | + u64 start; |
|---|
| 7125 | + |
|---|
| 7126 | + start = btrfs_node_blockptr(node, i); |
|---|
| 7127 | + readahead_tree_block(node->fs_info, start); |
|---|
| 7128 | + } |
|---|
| 6849 | 7129 | } |
|---|
| 6850 | 7130 | |
|---|
| 6851 | 7131 | int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info) |
|---|
| .. | .. |
|---|
| 6858 | 7138 | int ret; |
|---|
| 6859 | 7139 | int slot; |
|---|
| 6860 | 7140 | u64 total_dev = 0; |
|---|
| 7141 | + u64 last_ra_node = 0; |
|---|
| 6861 | 7142 | |
|---|
| 6862 | 7143 | path = btrfs_alloc_path(); |
|---|
| 6863 | 7144 | if (!path) |
|---|
| .. | .. |
|---|
| 6868 | 7149 | * otherwise we don't need it. |
|---|
| 6869 | 7150 | */ |
|---|
| 6870 | 7151 | mutex_lock(&uuid_mutex); |
|---|
| 6871 | | - mutex_lock(&fs_info->chunk_mutex); |
|---|
| 6872 | 7152 | |
|---|
| 6873 | 7153 | /* |
|---|
| 6874 | 7154 | * It is possible for mount and umount to race in such a way that |
|---|
| .. | .. |
|---|
| 6891 | 7171 | if (ret < 0) |
|---|
| 6892 | 7172 | goto error; |
|---|
| 6893 | 7173 | while (1) { |
|---|
| 7174 | + struct extent_buffer *node; |
|---|
| 7175 | + |
|---|
| 6894 | 7176 | leaf = path->nodes[0]; |
|---|
| 6895 | 7177 | slot = path->slots[0]; |
|---|
| 6896 | 7178 | if (slot >= btrfs_header_nritems(leaf)) { |
|---|
| .. | .. |
|---|
| 6901 | 7183 | goto error; |
|---|
| 6902 | 7184 | break; |
|---|
| 6903 | 7185 | } |
|---|
| 7186 | + /* |
|---|
| 7187 | + * The nodes on level 1 are not locked but we don't need to do |
|---|
| 7188 | + * that during mount time as nothing else can access the tree |
|---|
| 7189 | + */ |
|---|
| 7190 | + node = path->nodes[1]; |
|---|
| 7191 | + if (node) { |
|---|
| 7192 | + if (last_ra_node != node->start) { |
|---|
| 7193 | + readahead_tree_node_children(node); |
|---|
| 7194 | + last_ra_node = node->start; |
|---|
| 7195 | + } |
|---|
| 7196 | + } |
|---|
| 6904 | 7197 | btrfs_item_key_to_cpu(leaf, &found_key, slot); |
|---|
| 6905 | 7198 | if (found_key.type == BTRFS_DEV_ITEM_KEY) { |
|---|
| 6906 | 7199 | struct btrfs_dev_item *dev_item; |
|---|
| 6907 | 7200 | dev_item = btrfs_item_ptr(leaf, slot, |
|---|
| 6908 | 7201 | struct btrfs_dev_item); |
|---|
| 6909 | | - ret = read_one_dev(fs_info, leaf, dev_item); |
|---|
| 7202 | + ret = read_one_dev(leaf, dev_item); |
|---|
| 6910 | 7203 | if (ret) |
|---|
| 6911 | 7204 | goto error; |
|---|
| 6912 | 7205 | total_dev++; |
|---|
| 6913 | 7206 | } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) { |
|---|
| 6914 | 7207 | struct btrfs_chunk *chunk; |
|---|
| 6915 | 7208 | chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); |
|---|
| 6916 | | - ret = read_one_chunk(fs_info, &found_key, leaf, chunk); |
|---|
| 7209 | + mutex_lock(&fs_info->chunk_mutex); |
|---|
| 7210 | + ret = read_one_chunk(&found_key, leaf, chunk); |
|---|
| 7211 | + mutex_unlock(&fs_info->chunk_mutex); |
|---|
| 6917 | 7212 | if (ret) |
|---|
| 6918 | 7213 | goto error; |
|---|
| 6919 | 7214 | } |
|---|
| .. | .. |
|---|
| 6925 | 7220 | * do another round of validation checks. |
|---|
| 6926 | 7221 | */ |
|---|
| 6927 | 7222 | if (total_dev != fs_info->fs_devices->total_devices) { |
|---|
| 6928 | | - btrfs_err(fs_info, |
|---|
| 6929 | | - "super_num_devices %llu mismatch with num_devices %llu found here", |
|---|
| 7223 | + btrfs_warn(fs_info, |
|---|
| 7224 | +"super block num_devices %llu mismatch with DEV_ITEM count %llu, will be repaired on next transaction commit", |
|---|
| 6930 | 7225 | btrfs_super_num_devices(fs_info->super_copy), |
|---|
| 6931 | 7226 | total_dev); |
|---|
| 6932 | | - ret = -EINVAL; |
|---|
| 6933 | | - goto error; |
|---|
| 7227 | + fs_info->fs_devices->total_devices = total_dev; |
|---|
| 7228 | + btrfs_set_super_num_devices(fs_info->super_copy, total_dev); |
|---|
| 6934 | 7229 | } |
|---|
| 6935 | 7230 | if (btrfs_super_total_bytes(fs_info->super_copy) < |
|---|
| 6936 | 7231 | fs_info->fs_devices->total_rw_bytes) { |
|---|
| .. | .. |
|---|
| 6943 | 7238 | } |
|---|
| 6944 | 7239 | ret = 0; |
|---|
| 6945 | 7240 | error: |
|---|
| 6946 | | - mutex_unlock(&fs_info->chunk_mutex); |
|---|
| 6947 | 7241 | mutex_unlock(&uuid_mutex); |
|---|
| 6948 | 7242 | |
|---|
| 6949 | 7243 | btrfs_free_path(path); |
|---|
| .. | .. |
|---|
| 6952 | 7246 | |
|---|
| 6953 | 7247 | void btrfs_init_devices_late(struct btrfs_fs_info *fs_info) |
|---|
| 6954 | 7248 | { |
|---|
| 6955 | | - struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; |
|---|
| 7249 | + struct btrfs_fs_devices *fs_devices = fs_info->fs_devices, *seed_devs; |
|---|
| 6956 | 7250 | struct btrfs_device *device; |
|---|
| 6957 | 7251 | |
|---|
| 6958 | | - while (fs_devices) { |
|---|
| 6959 | | - mutex_lock(&fs_devices->device_list_mutex); |
|---|
| 6960 | | - list_for_each_entry(device, &fs_devices->devices, dev_list) |
|---|
| 6961 | | - device->fs_info = fs_info; |
|---|
| 6962 | | - mutex_unlock(&fs_devices->device_list_mutex); |
|---|
| 7252 | + fs_devices->fs_info = fs_info; |
|---|
| 6963 | 7253 | |
|---|
| 6964 | | - fs_devices = fs_devices->seed; |
|---|
| 7254 | + mutex_lock(&fs_devices->device_list_mutex); |
|---|
| 7255 | + list_for_each_entry(device, &fs_devices->devices, dev_list) |
|---|
| 7256 | + device->fs_info = fs_info; |
|---|
| 7257 | + |
|---|
| 7258 | + list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list) { |
|---|
| 7259 | + list_for_each_entry(device, &seed_devs->devices, dev_list) |
|---|
| 7260 | + device->fs_info = fs_info; |
|---|
| 7261 | + |
|---|
| 7262 | + seed_devs->fs_info = fs_info; |
|---|
| 6965 | 7263 | } |
|---|
| 7264 | + mutex_unlock(&fs_devices->device_list_mutex); |
|---|
| 6966 | 7265 | } |
|---|
| 6967 | 7266 | |
|---|
| 6968 | | -static void __btrfs_reset_dev_stats(struct btrfs_device *dev) |
|---|
| 7267 | +static u64 btrfs_dev_stats_value(const struct extent_buffer *eb, |
|---|
| 7268 | + const struct btrfs_dev_stats_item *ptr, |
|---|
| 7269 | + int index) |
|---|
| 6969 | 7270 | { |
|---|
| 6970 | | - int i; |
|---|
| 7271 | + u64 val; |
|---|
| 6971 | 7272 | |
|---|
| 6972 | | - for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) |
|---|
| 6973 | | - btrfs_dev_stat_reset(dev, i); |
|---|
| 7273 | + read_extent_buffer(eb, &val, |
|---|
| 7274 | + offsetof(struct btrfs_dev_stats_item, values) + |
|---|
| 7275 | + ((unsigned long)ptr) + (index * sizeof(u64)), |
|---|
| 7276 | + sizeof(val)); |
|---|
| 7277 | + return val; |
|---|
| 7278 | +} |
|---|
| 7279 | + |
|---|
| 7280 | +static void btrfs_set_dev_stats_value(struct extent_buffer *eb, |
|---|
| 7281 | + struct btrfs_dev_stats_item *ptr, |
|---|
| 7282 | + int index, u64 val) |
|---|
| 7283 | +{ |
|---|
| 7284 | + write_extent_buffer(eb, &val, |
|---|
| 7285 | + offsetof(struct btrfs_dev_stats_item, values) + |
|---|
| 7286 | + ((unsigned long)ptr) + (index * sizeof(u64)), |
|---|
| 7287 | + sizeof(val)); |
|---|
| 7288 | +} |
|---|
| 7289 | + |
|---|
| 7290 | +static int btrfs_device_init_dev_stats(struct btrfs_device *device, |
|---|
| 7291 | + struct btrfs_path *path) |
|---|
| 7292 | +{ |
|---|
| 7293 | + struct btrfs_dev_stats_item *ptr; |
|---|
| 7294 | + struct extent_buffer *eb; |
|---|
| 7295 | + struct btrfs_key key; |
|---|
| 7296 | + int item_size; |
|---|
| 7297 | + int i, ret, slot; |
|---|
| 7298 | + |
|---|
| 7299 | + key.objectid = BTRFS_DEV_STATS_OBJECTID; |
|---|
| 7300 | + key.type = BTRFS_PERSISTENT_ITEM_KEY; |
|---|
| 7301 | + key.offset = device->devid; |
|---|
| 7302 | + ret = btrfs_search_slot(NULL, device->fs_info->dev_root, &key, path, 0, 0); |
|---|
| 7303 | + if (ret) { |
|---|
| 7304 | + for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) |
|---|
| 7305 | + btrfs_dev_stat_set(device, i, 0); |
|---|
| 7306 | + device->dev_stats_valid = 1; |
|---|
| 7307 | + btrfs_release_path(path); |
|---|
| 7308 | + return ret < 0 ? ret : 0; |
|---|
| 7309 | + } |
|---|
| 7310 | + slot = path->slots[0]; |
|---|
| 7311 | + eb = path->nodes[0]; |
|---|
| 7312 | + item_size = btrfs_item_size_nr(eb, slot); |
|---|
| 7313 | + |
|---|
| 7314 | + ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_stats_item); |
|---|
| 7315 | + |
|---|
| 7316 | + for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) { |
|---|
| 7317 | + if (item_size >= (1 + i) * sizeof(__le64)) |
|---|
| 7318 | + btrfs_dev_stat_set(device, i, |
|---|
| 7319 | + btrfs_dev_stats_value(eb, ptr, i)); |
|---|
| 7320 | + else |
|---|
| 7321 | + btrfs_dev_stat_set(device, i, 0); |
|---|
| 7322 | + } |
|---|
| 7323 | + |
|---|
| 7324 | + device->dev_stats_valid = 1; |
|---|
| 7325 | + btrfs_dev_stat_print_on_load(device); |
|---|
| 7326 | + btrfs_release_path(path); |
|---|
| 7327 | + |
|---|
| 7328 | + return 0; |
|---|
| 6974 | 7329 | } |
|---|
| 6975 | 7330 | |
|---|
| 6976 | 7331 | int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info) |
|---|
| 6977 | 7332 | { |
|---|
| 6978 | | - struct btrfs_key key; |
|---|
| 6979 | | - struct btrfs_key found_key; |
|---|
| 6980 | | - struct btrfs_root *dev_root = fs_info->dev_root; |
|---|
| 6981 | | - struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; |
|---|
| 6982 | | - struct extent_buffer *eb; |
|---|
| 6983 | | - int slot; |
|---|
| 6984 | | - int ret = 0; |
|---|
| 7333 | + struct btrfs_fs_devices *fs_devices = fs_info->fs_devices, *seed_devs; |
|---|
| 6985 | 7334 | struct btrfs_device *device; |
|---|
| 6986 | 7335 | struct btrfs_path *path = NULL; |
|---|
| 6987 | | - int i; |
|---|
| 7336 | + int ret = 0; |
|---|
| 6988 | 7337 | |
|---|
| 6989 | 7338 | path = btrfs_alloc_path(); |
|---|
| 6990 | | - if (!path) { |
|---|
| 6991 | | - ret = -ENOMEM; |
|---|
| 6992 | | - goto out; |
|---|
| 6993 | | - } |
|---|
| 7339 | + if (!path) |
|---|
| 7340 | + return -ENOMEM; |
|---|
| 6994 | 7341 | |
|---|
| 6995 | 7342 | mutex_lock(&fs_devices->device_list_mutex); |
|---|
| 6996 | 7343 | list_for_each_entry(device, &fs_devices->devices, dev_list) { |
|---|
| 6997 | | - int item_size; |
|---|
| 6998 | | - struct btrfs_dev_stats_item *ptr; |
|---|
| 6999 | | - |
|---|
| 7000 | | - key.objectid = BTRFS_DEV_STATS_OBJECTID; |
|---|
| 7001 | | - key.type = BTRFS_PERSISTENT_ITEM_KEY; |
|---|
| 7002 | | - key.offset = device->devid; |
|---|
| 7003 | | - ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0); |
|---|
| 7004 | | - if (ret) { |
|---|
| 7005 | | - __btrfs_reset_dev_stats(device); |
|---|
| 7006 | | - device->dev_stats_valid = 1; |
|---|
| 7007 | | - btrfs_release_path(path); |
|---|
| 7008 | | - continue; |
|---|
| 7009 | | - } |
|---|
| 7010 | | - slot = path->slots[0]; |
|---|
| 7011 | | - eb = path->nodes[0]; |
|---|
| 7012 | | - btrfs_item_key_to_cpu(eb, &found_key, slot); |
|---|
| 7013 | | - item_size = btrfs_item_size_nr(eb, slot); |
|---|
| 7014 | | - |
|---|
| 7015 | | - ptr = btrfs_item_ptr(eb, slot, |
|---|
| 7016 | | - struct btrfs_dev_stats_item); |
|---|
| 7017 | | - |
|---|
| 7018 | | - for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) { |
|---|
| 7019 | | - if (item_size >= (1 + i) * sizeof(__le64)) |
|---|
| 7020 | | - btrfs_dev_stat_set(device, i, |
|---|
| 7021 | | - btrfs_dev_stats_value(eb, ptr, i)); |
|---|
| 7022 | | - else |
|---|
| 7023 | | - btrfs_dev_stat_reset(device, i); |
|---|
| 7024 | | - } |
|---|
| 7025 | | - |
|---|
| 7026 | | - device->dev_stats_valid = 1; |
|---|
| 7027 | | - btrfs_dev_stat_print_on_load(device); |
|---|
| 7028 | | - btrfs_release_path(path); |
|---|
| 7344 | + ret = btrfs_device_init_dev_stats(device, path); |
|---|
| 7345 | + if (ret) |
|---|
| 7346 | + goto out; |
|---|
| 7029 | 7347 | } |
|---|
| 7348 | + list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list) { |
|---|
| 7349 | + list_for_each_entry(device, &seed_devs->devices, dev_list) { |
|---|
| 7350 | + ret = btrfs_device_init_dev_stats(device, path); |
|---|
| 7351 | + if (ret) |
|---|
| 7352 | + goto out; |
|---|
| 7353 | + } |
|---|
| 7354 | + } |
|---|
| 7355 | +out: |
|---|
| 7030 | 7356 | mutex_unlock(&fs_devices->device_list_mutex); |
|---|
| 7031 | 7357 | |
|---|
| 7032 | | -out: |
|---|
| 7033 | 7358 | btrfs_free_path(path); |
|---|
| 7034 | | - return ret < 0 ? ret : 0; |
|---|
| 7359 | + return ret; |
|---|
| 7035 | 7360 | } |
|---|
| 7036 | 7361 | |
|---|
| 7037 | 7362 | static int update_dev_stat_item(struct btrfs_trans_handle *trans, |
|---|
| .. | .. |
|---|
| 7102 | 7427 | /* |
|---|
| 7103 | 7428 | * called from commit_transaction. Writes all changed device stats to disk. |
|---|
| 7104 | 7429 | */ |
|---|
| 7105 | | -int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, |
|---|
| 7106 | | - struct btrfs_fs_info *fs_info) |
|---|
| 7430 | +int btrfs_run_dev_stats(struct btrfs_trans_handle *trans) |
|---|
| 7107 | 7431 | { |
|---|
| 7432 | + struct btrfs_fs_info *fs_info = trans->fs_info; |
|---|
| 7108 | 7433 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; |
|---|
| 7109 | 7434 | struct btrfs_device *device; |
|---|
| 7110 | 7435 | int stats_cnt; |
|---|
| .. | .. |
|---|
| 7187 | 7512 | int i; |
|---|
| 7188 | 7513 | |
|---|
| 7189 | 7514 | mutex_lock(&fs_devices->device_list_mutex); |
|---|
| 7190 | | - dev = btrfs_find_device(fs_info->fs_devices, stats->devid, |
|---|
| 7191 | | - NULL, NULL, true); |
|---|
| 7515 | + dev = btrfs_find_device(fs_info->fs_devices, stats->devid, NULL, NULL, |
|---|
| 7516 | + true); |
|---|
| 7192 | 7517 | mutex_unlock(&fs_devices->device_list_mutex); |
|---|
| 7193 | 7518 | |
|---|
| 7194 | 7519 | if (!dev) { |
|---|
| .. | .. |
|---|
| 7203 | 7528 | stats->values[i] = |
|---|
| 7204 | 7529 | btrfs_dev_stat_read_and_reset(dev, i); |
|---|
| 7205 | 7530 | else |
|---|
| 7206 | | - btrfs_dev_stat_reset(dev, i); |
|---|
| 7531 | + btrfs_dev_stat_set(dev, i, 0); |
|---|
| 7207 | 7532 | } |
|---|
| 7208 | 7533 | btrfs_info(fs_info, "device stats zeroed by %s (%d)", |
|---|
| 7209 | 7534 | current->comm, task_pid_nr(current)); |
|---|
| .. | .. |
|---|
| 7217 | 7542 | return 0; |
|---|
| 7218 | 7543 | } |
|---|
| 7219 | 7544 | |
|---|
| 7220 | | -void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path) |
|---|
| 7221 | | -{ |
|---|
| 7222 | | - struct buffer_head *bh; |
|---|
| 7223 | | - struct btrfs_super_block *disk_super; |
|---|
| 7224 | | - int copy_num; |
|---|
| 7225 | | - |
|---|
| 7226 | | - if (!bdev) |
|---|
| 7227 | | - return; |
|---|
| 7228 | | - |
|---|
| 7229 | | - for (copy_num = 0; copy_num < BTRFS_SUPER_MIRROR_MAX; |
|---|
| 7230 | | - copy_num++) { |
|---|
| 7231 | | - |
|---|
| 7232 | | - if (btrfs_read_dev_one_super(bdev, copy_num, &bh)) |
|---|
| 7233 | | - continue; |
|---|
| 7234 | | - |
|---|
| 7235 | | - disk_super = (struct btrfs_super_block *)bh->b_data; |
|---|
| 7236 | | - |
|---|
| 7237 | | - memset(&disk_super->magic, 0, sizeof(disk_super->magic)); |
|---|
| 7238 | | - set_buffer_dirty(bh); |
|---|
| 7239 | | - sync_dirty_buffer(bh); |
|---|
| 7240 | | - brelse(bh); |
|---|
| 7241 | | - } |
|---|
| 7242 | | - |
|---|
| 7243 | | - /* Notify udev that device has changed */ |
|---|
| 7244 | | - btrfs_kobject_uevent(bdev, KOBJ_CHANGE); |
|---|
| 7245 | | - |
|---|
| 7246 | | - /* Update ctime/mtime for device path for libblkid */ |
|---|
| 7247 | | - update_dev_time(device_path); |
|---|
| 7248 | | -} |
|---|
| 7249 | | - |
|---|
| 7250 | 7545 | /* |
|---|
| 7251 | | - * Update the size of all devices, which is used for writing out the |
|---|
| 7252 | | - * super blocks. |
|---|
| 7546 | + * Update the size and bytes used for each device where it changed. This is |
|---|
| 7547 | + * delayed since we would otherwise get errors while writing out the |
|---|
| 7548 | + * superblocks. |
|---|
| 7549 | + * |
|---|
| 7550 | + * Must be invoked during transaction commit. |
|---|
| 7253 | 7551 | */ |
|---|
| 7254 | | -void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info) |
|---|
| 7552 | +void btrfs_commit_device_sizes(struct btrfs_transaction *trans) |
|---|
| 7255 | 7553 | { |
|---|
| 7256 | | - struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; |
|---|
| 7257 | 7554 | struct btrfs_device *curr, *next; |
|---|
| 7258 | 7555 | |
|---|
| 7259 | | - if (list_empty(&fs_devices->resized_devices)) |
|---|
| 7556 | + ASSERT(trans->state == TRANS_STATE_COMMIT_DOING); |
|---|
| 7557 | + |
|---|
| 7558 | + if (list_empty(&trans->dev_update_list)) |
|---|
| 7260 | 7559 | return; |
|---|
| 7261 | 7560 | |
|---|
| 7262 | | - mutex_lock(&fs_devices->device_list_mutex); |
|---|
| 7263 | | - mutex_lock(&fs_info->chunk_mutex); |
|---|
| 7264 | | - list_for_each_entry_safe(curr, next, &fs_devices->resized_devices, |
|---|
| 7265 | | - resized_list) { |
|---|
| 7266 | | - list_del_init(&curr->resized_list); |
|---|
| 7561 | + /* |
|---|
| 7562 | + * We don't need the device_list_mutex here. This list is owned by the |
|---|
| 7563 | + * transaction and the transaction must complete before the device is |
|---|
| 7564 | + * released. |
|---|
| 7565 | + */ |
|---|
| 7566 | + mutex_lock(&trans->fs_info->chunk_mutex); |
|---|
| 7567 | + list_for_each_entry_safe(curr, next, &trans->dev_update_list, |
|---|
| 7568 | + post_commit_list) { |
|---|
| 7569 | + list_del_init(&curr->post_commit_list); |
|---|
| 7267 | 7570 | curr->commit_total_bytes = curr->disk_total_bytes; |
|---|
| 7571 | + curr->commit_bytes_used = curr->bytes_used; |
|---|
| 7268 | 7572 | } |
|---|
| 7269 | | - mutex_unlock(&fs_info->chunk_mutex); |
|---|
| 7270 | | - mutex_unlock(&fs_devices->device_list_mutex); |
|---|
| 7271 | | -} |
|---|
| 7272 | | - |
|---|
| 7273 | | -/* Must be invoked during the transaction commit */ |
|---|
| 7274 | | -void btrfs_update_commit_device_bytes_used(struct btrfs_transaction *trans) |
|---|
| 7275 | | -{ |
|---|
| 7276 | | - struct btrfs_fs_info *fs_info = trans->fs_info; |
|---|
| 7277 | | - struct extent_map *em; |
|---|
| 7278 | | - struct map_lookup *map; |
|---|
| 7279 | | - struct btrfs_device *dev; |
|---|
| 7280 | | - int i; |
|---|
| 7281 | | - |
|---|
| 7282 | | - if (list_empty(&trans->pending_chunks)) |
|---|
| 7283 | | - return; |
|---|
| 7284 | | - |
|---|
| 7285 | | - /* In order to kick the device replace finish process */ |
|---|
| 7286 | | - mutex_lock(&fs_info->chunk_mutex); |
|---|
| 7287 | | - list_for_each_entry(em, &trans->pending_chunks, list) { |
|---|
| 7288 | | - map = em->map_lookup; |
|---|
| 7289 | | - |
|---|
| 7290 | | - for (i = 0; i < map->num_stripes; i++) { |
|---|
| 7291 | | - dev = map->stripes[i].dev; |
|---|
| 7292 | | - dev->commit_bytes_used = dev->bytes_used; |
|---|
| 7293 | | - dev->has_pending_chunks = false; |
|---|
| 7294 | | - } |
|---|
| 7295 | | - } |
|---|
| 7296 | | - mutex_unlock(&fs_info->chunk_mutex); |
|---|
| 7297 | | -} |
|---|
| 7298 | | - |
|---|
| 7299 | | -void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info) |
|---|
| 7300 | | -{ |
|---|
| 7301 | | - struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; |
|---|
| 7302 | | - while (fs_devices) { |
|---|
| 7303 | | - fs_devices->fs_info = fs_info; |
|---|
| 7304 | | - fs_devices = fs_devices->seed; |
|---|
| 7305 | | - } |
|---|
| 7306 | | -} |
|---|
| 7307 | | - |
|---|
| 7308 | | -void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info) |
|---|
| 7309 | | -{ |
|---|
| 7310 | | - struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; |
|---|
| 7311 | | - while (fs_devices) { |
|---|
| 7312 | | - fs_devices->fs_info = NULL; |
|---|
| 7313 | | - fs_devices = fs_devices->seed; |
|---|
| 7314 | | - } |
|---|
| 7573 | + mutex_unlock(&trans->fs_info->chunk_mutex); |
|---|
| 7315 | 7574 | } |
|---|
| 7316 | 7575 | |
|---|
| 7317 | 7576 | /* |
|---|
| .. | .. |
|---|
| 7319 | 7578 | */ |
|---|
| 7320 | 7579 | int btrfs_bg_type_to_factor(u64 flags) |
|---|
| 7321 | 7580 | { |
|---|
| 7322 | | - if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | |
|---|
| 7323 | | - BTRFS_BLOCK_GROUP_RAID10)) |
|---|
| 7324 | | - return 2; |
|---|
| 7325 | | - return 1; |
|---|
| 7581 | + const int index = btrfs_bg_flags_to_raid_index(flags); |
|---|
| 7582 | + |
|---|
| 7583 | + return btrfs_raid_array[index].ncopies; |
|---|
| 7326 | 7584 | } |
|---|
| 7327 | 7585 | |
|---|
| 7328 | 7586 | |
|---|
| 7329 | | -static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes) |
|---|
| 7330 | | -{ |
|---|
| 7331 | | - int index = btrfs_bg_flags_to_raid_index(type); |
|---|
| 7332 | | - int ncopies = btrfs_raid_array[index].ncopies; |
|---|
| 7333 | | - int data_stripes; |
|---|
| 7334 | | - |
|---|
| 7335 | | - switch (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { |
|---|
| 7336 | | - case BTRFS_BLOCK_GROUP_RAID5: |
|---|
| 7337 | | - data_stripes = num_stripes - 1; |
|---|
| 7338 | | - break; |
|---|
| 7339 | | - case BTRFS_BLOCK_GROUP_RAID6: |
|---|
| 7340 | | - data_stripes = num_stripes - 2; |
|---|
| 7341 | | - break; |
|---|
| 7342 | | - default: |
|---|
| 7343 | | - data_stripes = num_stripes / ncopies; |
|---|
| 7344 | | - break; |
|---|
| 7345 | | - } |
|---|
| 7346 | | - return div_u64(chunk_len, data_stripes); |
|---|
| 7347 | | -} |
|---|
| 7348 | 7587 | |
|---|
| 7349 | 7588 | static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, |
|---|
| 7350 | 7589 | u64 chunk_offset, u64 devid, |
|---|
| 7351 | 7590 | u64 physical_offset, u64 physical_len) |
|---|
| 7352 | 7591 | { |
|---|
| 7353 | | - struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; |
|---|
| 7592 | + struct extent_map_tree *em_tree = &fs_info->mapping_tree; |
|---|
| 7354 | 7593 | struct extent_map *em; |
|---|
| 7355 | 7594 | struct map_lookup *map; |
|---|
| 7356 | 7595 | struct btrfs_device *dev; |
|---|
| .. | .. |
|---|
| 7414 | 7653 | |
|---|
| 7415 | 7654 | /* It's possible this device is a dummy for seed device */ |
|---|
| 7416 | 7655 | if (dev->disk_total_bytes == 0) { |
|---|
| 7417 | | - dev = btrfs_find_device(fs_info->fs_devices->seed, devid, |
|---|
| 7418 | | - NULL, NULL, false); |
|---|
| 7656 | + struct btrfs_fs_devices *devs; |
|---|
| 7657 | + |
|---|
| 7658 | + devs = list_first_entry(&fs_info->fs_devices->seed_list, |
|---|
| 7659 | + struct btrfs_fs_devices, seed_list); |
|---|
| 7660 | + dev = btrfs_find_device(devs, devid, NULL, NULL, false); |
|---|
| 7419 | 7661 | if (!dev) { |
|---|
| 7420 | 7662 | btrfs_err(fs_info, "failed to find seed devid %llu", |
|---|
| 7421 | 7663 | devid); |
|---|
| .. | .. |
|---|
| 7439 | 7681 | |
|---|
| 7440 | 7682 | static int verify_chunk_dev_extent_mapping(struct btrfs_fs_info *fs_info) |
|---|
| 7441 | 7683 | { |
|---|
| 7442 | | - struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; |
|---|
| 7684 | + struct extent_map_tree *em_tree = &fs_info->mapping_tree; |
|---|
| 7443 | 7685 | struct extent_map *em; |
|---|
| 7444 | 7686 | struct rb_node *node; |
|---|
| 7445 | 7687 | int ret = 0; |
|---|
| 7446 | 7688 | |
|---|
| 7447 | 7689 | read_lock(&em_tree->lock); |
|---|
| 7448 | | - for (node = rb_first(&em_tree->map); node; node = rb_next(node)) { |
|---|
| 7690 | + for (node = rb_first_cached(&em_tree->map); node; node = rb_next(node)) { |
|---|
| 7449 | 7691 | em = rb_entry(node, struct extent_map, rb_node); |
|---|
| 7450 | 7692 | if (em->map_lookup->num_stripes != |
|---|
| 7451 | 7693 | em->map_lookup->verified_stripes) { |
|---|
| .. | .. |
|---|
| 7551 | 7793 | btrfs_free_path(path); |
|---|
| 7552 | 7794 | return ret; |
|---|
| 7553 | 7795 | } |
|---|
| 7796 | + |
|---|
| 7797 | +/* |
|---|
| 7798 | + * Check whether the given block group or device is pinned by any inode being |
|---|
| 7799 | + * used as a swapfile. |
|---|
| 7800 | + */ |
|---|
| 7801 | +bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr) |
|---|
| 7802 | +{ |
|---|
| 7803 | + struct btrfs_swapfile_pin *sp; |
|---|
| 7804 | + struct rb_node *node; |
|---|
| 7805 | + |
|---|
| 7806 | + spin_lock(&fs_info->swapfile_pins_lock); |
|---|
| 7807 | + node = fs_info->swapfile_pins.rb_node; |
|---|
| 7808 | + while (node) { |
|---|
| 7809 | + sp = rb_entry(node, struct btrfs_swapfile_pin, node); |
|---|
| 7810 | + if (ptr < sp->ptr) |
|---|
| 7811 | + node = node->rb_left; |
|---|
| 7812 | + else if (ptr > sp->ptr) |
|---|
| 7813 | + node = node->rb_right; |
|---|
| 7814 | + else |
|---|
| 7815 | + break; |
|---|
| 7816 | + } |
|---|
| 7817 | + spin_unlock(&fs_info->swapfile_pins_lock); |
|---|
| 7818 | + return node != NULL; |
|---|
| 7819 | +} |
|---|