From 2f7c68cb55ecb7331f2381deb497c27155f32faf Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Wed, 03 Jan 2024 09:43:39 +0000 Subject: [PATCH] update kernel to 5.10.198 --- kernel/drivers/block/rbd.c | 637 +++++++++++++++++++++++++++++++++------------------------ 1 files changed, 372 insertions(+), 265 deletions(-) diff --git a/kernel/drivers/block/rbd.c b/kernel/drivers/block/rbd.c index 340b1df..b0f7930 100644 --- a/kernel/drivers/block/rbd.c +++ b/kernel/drivers/block/rbd.c @@ -632,9 +632,8 @@ static void rbd_dev_remove_parent(struct rbd_device *rbd_dev); static int rbd_dev_refresh(struct rbd_device *rbd_dev); -static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev); -static int rbd_dev_header_info(struct rbd_device *rbd_dev); -static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev); +static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev, + struct rbd_image_header *header); static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u64 snap_id); static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, @@ -1047,15 +1046,24 @@ RCU_INIT_POINTER(rbd_dev->layout.pool_ns, NULL); } +static void rbd_image_header_cleanup(struct rbd_image_header *header) +{ + kfree(header->object_prefix); + ceph_put_snap_context(header->snapc); + kfree(header->snap_sizes); + kfree(header->snap_names); + + memset(header, 0, sizeof(*header)); +} + /* * Fill an rbd image header with information from the given format 1 * on-disk header. */ -static int rbd_header_from_disk(struct rbd_device *rbd_dev, - struct rbd_image_header_ondisk *ondisk) +static int rbd_header_from_disk(struct rbd_image_header *header, + struct rbd_image_header_ondisk *ondisk, + bool first_time) { - struct rbd_image_header *header = &rbd_dev->header; - bool first_time = header->object_prefix == NULL; struct ceph_snap_context *snapc; char *object_prefix = NULL; char *snap_names = NULL; @@ -1122,11 +1130,6 @@ if (first_time) { header->object_prefix = object_prefix; header->obj_order = ondisk->options.order; - rbd_init_layout(rbd_dev); - } else { - ceph_put_snap_context(header->snapc); - kfree(header->snap_names); - kfree(header->snap_sizes); } /* The remaining fields always get updated (when we refresh) */ @@ -1397,14 +1400,30 @@ /* * Must be called after rbd_obj_calc_img_extents(). */ -static bool rbd_obj_copyup_enabled(struct rbd_obj_request *obj_req) +static void rbd_obj_set_copyup_enabled(struct rbd_obj_request *obj_req) { - if (!obj_req->num_img_extents || - (rbd_obj_is_entire(obj_req) && - !obj_req->img_request->snapc->num_snaps)) - return false; + rbd_assert(obj_req->img_request->snapc); - return true; + if (obj_req->img_request->op_type == OBJ_OP_DISCARD) { + dout("%s %p objno %llu discard\n", __func__, obj_req, + obj_req->ex.oe_objno); + return; + } + + if (!obj_req->num_img_extents) { + dout("%s %p objno %llu not overlapping\n", __func__, obj_req, + obj_req->ex.oe_objno); + return; + } + + if (rbd_obj_is_entire(obj_req) && + !obj_req->img_request->snapc->num_snaps) { + dout("%s %p objno %llu entire\n", __func__, obj_req, + obj_req->ex.oe_objno); + return; + } + + obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED; } static u64 rbd_obj_img_extents_bytes(struct rbd_obj_request *obj_req) @@ -1505,6 +1524,7 @@ static struct ceph_osd_request * rbd_obj_add_osd_request(struct rbd_obj_request *obj_req, int num_ops) { + rbd_assert(obj_req->img_request->snapc); return __rbd_obj_add_osd_request(obj_req, obj_req->img_request->snapc, num_ops); } @@ -1641,15 +1661,18 @@ mutex_init(&img_request->state_mutex); } +/* + * Only snap_id is captured here, for reads. For writes, snapshot + * context is captured in rbd_img_object_requests() after exclusive + * lock is ensured to be held. + */ static void rbd_img_capture_header(struct rbd_img_request *img_req) { struct rbd_device *rbd_dev = img_req->rbd_dev; lockdep_assert_held(&rbd_dev->header_rwsem); - if (rbd_img_is_write(img_req)) - img_req->snapc = ceph_get_snap_context(rbd_dev->header.snapc); - else + if (!rbd_img_is_write(img_req)) img_req->snap_id = rbd_dev->spec->snap_id; if (rbd_dev_parent_get(rbd_dev)) @@ -2296,9 +2319,6 @@ if (ret) return ret; - if (rbd_obj_copyup_enabled(obj_req)) - obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED; - obj_req->write_state = RBD_OBJ_WRITE_START; return 0; } @@ -2404,8 +2424,6 @@ if (ret) return ret; - if (rbd_obj_copyup_enabled(obj_req)) - obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED; if (!obj_req->num_img_extents) { obj_req->flags |= RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT; if (rbd_obj_is_entire(obj_req)) @@ -3351,6 +3369,7 @@ case RBD_OBJ_WRITE_START: rbd_assert(!*result); + rbd_obj_set_copyup_enabled(obj_req); if (rbd_obj_write_is_noop(obj_req)) return true; @@ -3537,9 +3556,19 @@ static void rbd_img_object_requests(struct rbd_img_request *img_req) { + struct rbd_device *rbd_dev = img_req->rbd_dev; struct rbd_obj_request *obj_req; rbd_assert(!img_req->pending.result && !img_req->pending.num_pending); + rbd_assert(!need_exclusive_lock(img_req) || + __rbd_is_lock_owner(rbd_dev)); + + if (rbd_img_is_write(img_req)) { + rbd_assert(!img_req->snapc); + down_read(&rbd_dev->header_rwsem); + img_req->snapc = ceph_get_snap_context(rbd_dev->header.snapc); + up_read(&rbd_dev->header_rwsem); + } for_each_obj_request(img_req, obj_req) { int result = 0; @@ -3557,7 +3586,6 @@ static bool rbd_img_advance(struct rbd_img_request *img_req, int *result) { - struct rbd_device *rbd_dev = img_req->rbd_dev; int ret; again: @@ -3577,9 +3605,6 @@ case RBD_IMG_EXCLUSIVE_LOCK: if (*result) return true; - - rbd_assert(!need_exclusive_lock(img_req) || - __rbd_is_lock_owner(rbd_dev)); rbd_img_object_requests(img_req); if (!img_req->pending.num_pending) { @@ -3718,7 +3743,7 @@ ret = ceph_cls_lock(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc, RBD_LOCK_NAME, CEPH_CLS_LOCK_EXCLUSIVE, cookie, RBD_LOCK_TAG, "", 0); - if (ret) + if (ret && ret != -EEXIST) return ret; __rbd_lock(rbd_dev, cookie); @@ -3892,10 +3917,26 @@ list_splice_tail_init(&rbd_dev->acquiring_list, &rbd_dev->running_list); } -static int get_lock_owner_info(struct rbd_device *rbd_dev, - struct ceph_locker **lockers, u32 *num_lockers) +static bool locker_equal(const struct ceph_locker *lhs, + const struct ceph_locker *rhs) +{ + return lhs->id.name.type == rhs->id.name.type && + lhs->id.name.num == rhs->id.name.num && + !strcmp(lhs->id.cookie, rhs->id.cookie) && + ceph_addr_equal_no_type(&lhs->info.addr, &rhs->info.addr); +} + +static void free_locker(struct ceph_locker *locker) +{ + if (locker) + ceph_free_lockers(locker, 1); +} + +static struct ceph_locker *get_lock_owner_info(struct rbd_device *rbd_dev) { struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; + struct ceph_locker *lockers; + u32 num_lockers; u8 lock_type; char *lock_tag; int ret; @@ -3904,39 +3945,45 @@ ret = ceph_cls_lock_info(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc, RBD_LOCK_NAME, - &lock_type, &lock_tag, lockers, num_lockers); - if (ret) - return ret; + &lock_type, &lock_tag, &lockers, &num_lockers); + if (ret) { + rbd_warn(rbd_dev, "failed to get header lockers: %d", ret); + return ERR_PTR(ret); + } - if (*num_lockers == 0) { + if (num_lockers == 0) { dout("%s rbd_dev %p no lockers detected\n", __func__, rbd_dev); + lockers = NULL; goto out; } if (strcmp(lock_tag, RBD_LOCK_TAG)) { rbd_warn(rbd_dev, "locked by external mechanism, tag %s", lock_tag); - ret = -EBUSY; - goto out; + goto err_busy; } if (lock_type == CEPH_CLS_LOCK_SHARED) { rbd_warn(rbd_dev, "shared lock type detected"); - ret = -EBUSY; - goto out; + goto err_busy; } - if (strncmp((*lockers)[0].id.cookie, RBD_LOCK_COOKIE_PREFIX, + WARN_ON(num_lockers != 1); + if (strncmp(lockers[0].id.cookie, RBD_LOCK_COOKIE_PREFIX, strlen(RBD_LOCK_COOKIE_PREFIX))) { rbd_warn(rbd_dev, "locked by external mechanism, cookie %s", - (*lockers)[0].id.cookie); - ret = -EBUSY; - goto out; + lockers[0].id.cookie); + goto err_busy; } out: kfree(lock_tag); - return ret; + return lockers; + +err_busy: + kfree(lock_tag); + ceph_free_lockers(lockers, num_lockers); + return ERR_PTR(-EBUSY); } static int find_watcher(struct rbd_device *rbd_dev, @@ -3952,13 +3999,19 @@ ret = ceph_osdc_list_watchers(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc, &watchers, &num_watchers); - if (ret) + if (ret) { + rbd_warn(rbd_dev, "failed to get watchers: %d", ret); return ret; + } sscanf(locker->id.cookie, RBD_LOCK_COOKIE_PREFIX " %llu", &cookie); for (i = 0; i < num_watchers; i++) { - if (!memcmp(&watchers[i].addr, &locker->info.addr, - sizeof(locker->info.addr)) && + /* + * Ignore addr->type while comparing. This mimics + * entity_addr_t::get_legacy_str() + strcmp(). + */ + if (ceph_addr_equal_no_type(&watchers[i].addr, + &locker->info.addr) && watchers[i].cookie == cookie) { struct rbd_client_id cid = { .gid = le64_to_cpu(watchers[i].name.num), @@ -3986,57 +4039,82 @@ static int rbd_try_lock(struct rbd_device *rbd_dev) { struct ceph_client *client = rbd_dev->rbd_client->client; - struct ceph_locker *lockers; - u32 num_lockers; + struct ceph_locker *locker, *refreshed_locker; int ret; for (;;) { + locker = refreshed_locker = NULL; + ret = rbd_lock(rbd_dev); - if (ret != -EBUSY) - return ret; + if (!ret) + goto out; + if (ret != -EBUSY) { + rbd_warn(rbd_dev, "failed to lock header: %d", ret); + goto out; + } /* determine if the current lock holder is still alive */ - ret = get_lock_owner_info(rbd_dev, &lockers, &num_lockers); - if (ret) - return ret; - - if (num_lockers == 0) + locker = get_lock_owner_info(rbd_dev); + if (IS_ERR(locker)) { + ret = PTR_ERR(locker); + locker = NULL; + goto out; + } + if (!locker) goto again; - ret = find_watcher(rbd_dev, lockers); + ret = find_watcher(rbd_dev, locker); if (ret) goto out; /* request lock or error */ + refreshed_locker = get_lock_owner_info(rbd_dev); + if (IS_ERR(refreshed_locker)) { + ret = PTR_ERR(refreshed_locker); + refreshed_locker = NULL; + goto out; + } + if (!refreshed_locker || + !locker_equal(locker, refreshed_locker)) + goto again; + rbd_warn(rbd_dev, "breaking header lock owned by %s%llu", - ENTITY_NAME(lockers[0].id.name)); + ENTITY_NAME(locker->id.name)); ret = ceph_monc_blocklist_add(&client->monc, - &lockers[0].info.addr); + &locker->info.addr); if (ret) { - rbd_warn(rbd_dev, "blocklist of %s%llu failed: %d", - ENTITY_NAME(lockers[0].id.name), ret); + rbd_warn(rbd_dev, "failed to blocklist %s%llu: %d", + ENTITY_NAME(locker->id.name), ret); goto out; } ret = ceph_cls_break_lock(&client->osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc, RBD_LOCK_NAME, - lockers[0].id.cookie, - &lockers[0].id.name); - if (ret && ret != -ENOENT) + locker->id.cookie, &locker->id.name); + if (ret && ret != -ENOENT) { + rbd_warn(rbd_dev, "failed to break header lock: %d", + ret); goto out; + } again: - ceph_free_lockers(lockers, num_lockers); + free_locker(refreshed_locker); + free_locker(locker); } out: - ceph_free_lockers(lockers, num_lockers); + free_locker(refreshed_locker); + free_locker(locker); return ret; } static int rbd_post_acquire_action(struct rbd_device *rbd_dev) { int ret; + + ret = rbd_dev_refresh(rbd_dev); + if (ret) + return ret; if (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP) { ret = rbd_object_map_open(rbd_dev); @@ -4076,11 +4154,8 @@ ret = rbd_try_lock(rbd_dev); if (ret < 0) { - rbd_warn(rbd_dev, "failed to lock header: %d", ret); - if (ret == -EBLOCKLISTED) - goto out; - - ret = 1; /* request lock anyway */ + rbd_warn(rbd_dev, "failed to acquire lock: %d", ret); + goto out; } if (ret > 0) { up_write(&rbd_dev->lock_rwsem); @@ -4844,7 +4919,9 @@ * return, the rbd_dev->header field will contain up-to-date * information about the image. */ -static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev) +static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev, + struct rbd_image_header *header, + bool first_time) { struct rbd_image_header_ondisk *ondisk = NULL; u32 snap_count = 0; @@ -4892,7 +4969,7 @@ snap_count = le32_to_cpu(ondisk->snap_count); } while (snap_count != want_count); - ret = rbd_header_from_disk(rbd_dev, ondisk); + ret = rbd_header_from_disk(header, ondisk, first_time); out: kfree(ondisk); @@ -4915,39 +4992,6 @@ set_capacity(rbd_dev->disk, size); revalidate_disk_size(rbd_dev->disk, true); } -} - -static int rbd_dev_refresh(struct rbd_device *rbd_dev) -{ - u64 mapping_size; - int ret; - - down_write(&rbd_dev->header_rwsem); - mapping_size = rbd_dev->mapping.size; - - ret = rbd_dev_header_info(rbd_dev); - if (ret) - goto out; - - /* - * If there is a parent, see if it has disappeared due to the - * mapped image getting flattened. - */ - if (rbd_dev->parent) { - ret = rbd_dev_v2_parent_info(rbd_dev); - if (ret) - goto out; - } - - rbd_assert(!rbd_is_snap(rbd_dev)); - rbd_dev->mapping.size = rbd_dev->header.image_size; - -out: - up_write(&rbd_dev->header_rwsem); - if (!ret && mapping_size != rbd_dev->mapping.size) - rbd_dev_update_size(rbd_dev); - - return ret; } static const struct blk_mq_ops rbd_mq_ops = { @@ -5369,8 +5413,7 @@ module_put(THIS_MODULE); } -static struct rbd_device *__rbd_dev_create(struct rbd_client *rbdc, - struct rbd_spec *spec) +static struct rbd_device *__rbd_dev_create(struct rbd_spec *spec) { struct rbd_device *rbd_dev; @@ -5415,9 +5458,6 @@ rbd_dev->dev.parent = &rbd_root_dev; device_initialize(&rbd_dev->dev); - rbd_dev->rbd_client = rbdc; - rbd_dev->spec = spec; - return rbd_dev; } @@ -5430,11 +5470,9 @@ { struct rbd_device *rbd_dev; - rbd_dev = __rbd_dev_create(rbdc, spec); + rbd_dev = __rbd_dev_create(spec); if (!rbd_dev) return NULL; - - rbd_dev->opts = opts; /* get an id and fill in device name */ rbd_dev->dev_id = ida_simple_get(&rbd_dev_id_ida, 0, @@ -5451,6 +5489,10 @@ /* we have a ref from do_rbd_add() */ __module_get(THIS_MODULE); + + rbd_dev->rbd_client = rbdc; + rbd_dev->spec = spec; + rbd_dev->opts = opts; dout("%s rbd_dev %p dev_id %d\n", __func__, rbd_dev, rbd_dev->dev_id); return rbd_dev; @@ -5506,17 +5548,12 @@ return 0; } -static int rbd_dev_v2_image_size(struct rbd_device *rbd_dev) -{ - return _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP, - &rbd_dev->header.obj_order, - &rbd_dev->header.image_size); -} - -static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev) +static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev, + char **pobject_prefix) { size_t size; void *reply_buf; + char *object_prefix; int ret; void *p; @@ -5534,16 +5571,16 @@ goto out; p = reply_buf; - rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p, - p + ret, NULL, GFP_NOIO); + object_prefix = ceph_extract_encoded_string(&p, p + ret, NULL, + GFP_NOIO); + if (IS_ERR(object_prefix)) { + ret = PTR_ERR(object_prefix); + goto out; + } ret = 0; - if (IS_ERR(rbd_dev->header.object_prefix)) { - ret = PTR_ERR(rbd_dev->header.object_prefix); - rbd_dev->header.object_prefix = NULL; - } else { - dout(" object_prefix = %s\n", rbd_dev->header.object_prefix); - } + *pobject_prefix = object_prefix; + dout(" object_prefix = %s\n", object_prefix); out: kfree(reply_buf); @@ -5594,13 +5631,6 @@ return 0; } -static int rbd_dev_v2_features(struct rbd_device *rbd_dev) -{ - return _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP, - rbd_is_ro(rbd_dev), - &rbd_dev->header.features); -} - /* * These are generic image flags, but since they are used only for * object map, store them in rbd_dev->object_map_flags. @@ -5636,6 +5666,14 @@ bool has_overlap; u64 overlap; }; + +static void rbd_parent_info_cleanup(struct parent_image_info *pii) +{ + kfree(pii->pool_ns); + kfree(pii->image_id); + + memset(pii, 0, sizeof(*pii)); +} /* * The caller is responsible for @pii. @@ -5706,6 +5744,9 @@ if (pii->has_overlap) ceph_decode_64_safe(&p, end, pii->overlap, e_inval); + dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n", + __func__, pii->pool_id, pii->pool_ns, pii->image_id, pii->snap_id, + pii->has_overlap, pii->overlap); return 0; e_inval: @@ -5744,14 +5785,17 @@ pii->has_overlap = true; ceph_decode_64_safe(&p, end, pii->overlap, e_inval); + dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n", + __func__, pii->pool_id, pii->pool_ns, pii->image_id, pii->snap_id, + pii->has_overlap, pii->overlap); return 0; e_inval: return -EINVAL; } -static int get_parent_info(struct rbd_device *rbd_dev, - struct parent_image_info *pii) +static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev, + struct parent_image_info *pii) { struct page *req_page, *reply_page; void *p; @@ -5779,7 +5823,7 @@ return ret; } -static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) +static int rbd_dev_setup_parent(struct rbd_device *rbd_dev) { struct rbd_spec *parent_spec; struct parent_image_info pii = { 0 }; @@ -5789,37 +5833,12 @@ if (!parent_spec) return -ENOMEM; - ret = get_parent_info(rbd_dev, &pii); + ret = rbd_dev_v2_parent_info(rbd_dev, &pii); if (ret) goto out_err; - dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n", - __func__, pii.pool_id, pii.pool_ns, pii.image_id, pii.snap_id, - pii.has_overlap, pii.overlap); - - if (pii.pool_id == CEPH_NOPOOL || !pii.has_overlap) { - /* - * Either the parent never existed, or we have - * record of it but the image got flattened so it no - * longer has a parent. When the parent of a - * layered image disappears we immediately set the - * overlap to 0. The effect of this is that all new - * requests will be treated as if the image had no - * parent. - * - * If !pii.has_overlap, the parent image spec is not - * applicable. It's there to avoid duplication in each - * snapshot record. - */ - if (rbd_dev->parent_overlap) { - rbd_dev->parent_overlap = 0; - rbd_dev_parent_put(rbd_dev); - pr_info("%s: clone image has been flattened\n", - rbd_dev->disk->disk_name); - } - + if (pii.pool_id == CEPH_NOPOOL || !pii.has_overlap) goto out; /* No parent? No problem. */ - } /* The ceph file layout needs to fit pool id in 32 bits */ @@ -5831,58 +5850,46 @@ } /* - * The parent won't change (except when the clone is - * flattened, already handled that). So we only need to - * record the parent spec we have not already done so. + * The parent won't change except when the clone is flattened, + * so we only need to record the parent image spec once. */ - if (!rbd_dev->parent_spec) { - parent_spec->pool_id = pii.pool_id; - if (pii.pool_ns && *pii.pool_ns) { - parent_spec->pool_ns = pii.pool_ns; - pii.pool_ns = NULL; - } - parent_spec->image_id = pii.image_id; - pii.image_id = NULL; - parent_spec->snap_id = pii.snap_id; - - rbd_dev->parent_spec = parent_spec; - parent_spec = NULL; /* rbd_dev now owns this */ + parent_spec->pool_id = pii.pool_id; + if (pii.pool_ns && *pii.pool_ns) { + parent_spec->pool_ns = pii.pool_ns; + pii.pool_ns = NULL; } + parent_spec->image_id = pii.image_id; + pii.image_id = NULL; + parent_spec->snap_id = pii.snap_id; + + rbd_assert(!rbd_dev->parent_spec); + rbd_dev->parent_spec = parent_spec; + parent_spec = NULL; /* rbd_dev now owns this */ /* - * We always update the parent overlap. If it's zero we issue - * a warning, as we will proceed as if there was no parent. + * Record the parent overlap. If it's zero, issue a warning as + * we will proceed as if there is no parent. */ - if (!pii.overlap) { - if (parent_spec) { - /* refresh, careful to warn just once */ - if (rbd_dev->parent_overlap) - rbd_warn(rbd_dev, - "clone now standalone (overlap became 0)"); - } else { - /* initial probe */ - rbd_warn(rbd_dev, "clone is standalone (overlap 0)"); - } - } + if (!pii.overlap) + rbd_warn(rbd_dev, "clone is standalone (overlap 0)"); rbd_dev->parent_overlap = pii.overlap; out: ret = 0; out_err: - kfree(pii.pool_ns); - kfree(pii.image_id); + rbd_parent_info_cleanup(&pii); rbd_spec_put(parent_spec); return ret; } -static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev) +static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev, + u64 *stripe_unit, u64 *stripe_count) { struct { __le64 stripe_unit; __le64 stripe_count; } __attribute__ ((packed)) striping_info_buf = { 0 }; size_t size = sizeof (striping_info_buf); - void *p; int ret; ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, @@ -5894,27 +5901,33 @@ if (ret < size) return -ERANGE; - p = &striping_info_buf; - rbd_dev->header.stripe_unit = ceph_decode_64(&p); - rbd_dev->header.stripe_count = ceph_decode_64(&p); + *stripe_unit = le64_to_cpu(striping_info_buf.stripe_unit); + *stripe_count = le64_to_cpu(striping_info_buf.stripe_count); + dout(" stripe_unit = %llu stripe_count = %llu\n", *stripe_unit, + *stripe_count); + return 0; } -static int rbd_dev_v2_data_pool(struct rbd_device *rbd_dev) +static int rbd_dev_v2_data_pool(struct rbd_device *rbd_dev, s64 *data_pool_id) { - __le64 data_pool_id; + __le64 data_pool_buf; int ret; ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, &rbd_dev->header_oloc, "get_data_pool", - NULL, 0, &data_pool_id, sizeof(data_pool_id)); + NULL, 0, &data_pool_buf, + sizeof(data_pool_buf)); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) return ret; - if (ret < sizeof(data_pool_id)) + if (ret < sizeof(data_pool_buf)) return -EBADMSG; - rbd_dev->header.data_pool_id = le64_to_cpu(data_pool_id); - WARN_ON(rbd_dev->header.data_pool_id == CEPH_NOPOOL); + *data_pool_id = le64_to_cpu(data_pool_buf); + dout(" data_pool_id = %lld\n", *data_pool_id); + WARN_ON(*data_pool_id == CEPH_NOPOOL); + return 0; } @@ -6106,7 +6119,8 @@ return ret; } -static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev) +static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, + struct ceph_snap_context **psnapc) { size_t size; int ret; @@ -6167,9 +6181,7 @@ for (i = 0; i < snap_count; i++) snapc->snaps[i] = ceph_decode_64(&p); - ceph_put_snap_context(rbd_dev->header.snapc); - rbd_dev->header.snapc = snapc; - + *psnapc = snapc; dout(" snap context seq = %llu, snap_count = %u\n", (unsigned long long)seq, (unsigned int)snap_count); out: @@ -6218,38 +6230,42 @@ return snap_name; } -static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev) +static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev, + struct rbd_image_header *header, + bool first_time) { - bool first_time = rbd_dev->header.object_prefix == NULL; int ret; - ret = rbd_dev_v2_image_size(rbd_dev); + ret = _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP, + first_time ? &header->obj_order : NULL, + &header->image_size); if (ret) return ret; if (first_time) { - ret = rbd_dev_v2_header_onetime(rbd_dev); + ret = rbd_dev_v2_header_onetime(rbd_dev, header); if (ret) return ret; } - ret = rbd_dev_v2_snap_context(rbd_dev); - if (ret && first_time) { - kfree(rbd_dev->header.object_prefix); - rbd_dev->header.object_prefix = NULL; - } + ret = rbd_dev_v2_snap_context(rbd_dev, &header->snapc); + if (ret) + return ret; - return ret; + return 0; } -static int rbd_dev_header_info(struct rbd_device *rbd_dev) +static int rbd_dev_header_info(struct rbd_device *rbd_dev, + struct rbd_image_header *header, + bool first_time) { rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); + rbd_assert(!header->object_prefix && !header->snapc); if (rbd_dev->image_format == 1) - return rbd_dev_v1_header_info(rbd_dev); + return rbd_dev_v1_header_info(rbd_dev, header, first_time); - return rbd_dev_v2_header_info(rbd_dev); + return rbd_dev_v2_header_info(rbd_dev, header, first_time); } /* @@ -6632,12 +6648,11 @@ cancel_delayed_work_sync(&rbd_dev->lock_dwork); if (!ret) ret = -ETIMEDOUT; - } - if (ret) { - rbd_warn(rbd_dev, "failed to acquire exclusive lock: %ld", ret); - return ret; + rbd_warn(rbd_dev, "failed to acquire lock: %ld", ret); } + if (ret) + return ret; /* * The lock may have been released by now, unless automatic lock @@ -6737,60 +6752,49 @@ */ static void rbd_dev_unprobe(struct rbd_device *rbd_dev) { - struct rbd_image_header *header; - rbd_dev_parent_put(rbd_dev); rbd_object_map_free(rbd_dev); rbd_dev_mapping_clear(rbd_dev); /* Free dynamic fields from the header, then zero it out */ - header = &rbd_dev->header; - ceph_put_snap_context(header->snapc); - kfree(header->snap_sizes); - kfree(header->snap_names); - kfree(header->object_prefix); - memset(header, 0, sizeof (*header)); + rbd_image_header_cleanup(&rbd_dev->header); } -static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev) +static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev, + struct rbd_image_header *header) { int ret; - ret = rbd_dev_v2_object_prefix(rbd_dev); + ret = rbd_dev_v2_object_prefix(rbd_dev, &header->object_prefix); if (ret) - goto out_err; + return ret; /* * Get the and check features for the image. Currently the * features are assumed to never change. */ - ret = rbd_dev_v2_features(rbd_dev); + ret = _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP, + rbd_is_ro(rbd_dev), &header->features); if (ret) - goto out_err; + return ret; /* If the image supports fancy striping, get its parameters */ - if (rbd_dev->header.features & RBD_FEATURE_STRIPINGV2) { - ret = rbd_dev_v2_striping_info(rbd_dev); - if (ret < 0) - goto out_err; - } - - if (rbd_dev->header.features & RBD_FEATURE_DATA_POOL) { - ret = rbd_dev_v2_data_pool(rbd_dev); + if (header->features & RBD_FEATURE_STRIPINGV2) { + ret = rbd_dev_v2_striping_info(rbd_dev, &header->stripe_unit, + &header->stripe_count); if (ret) - goto out_err; + return ret; } - rbd_init_layout(rbd_dev); - return 0; + if (header->features & RBD_FEATURE_DATA_POOL) { + ret = rbd_dev_v2_data_pool(rbd_dev, &header->data_pool_id); + if (ret) + return ret; + } -out_err: - rbd_dev->header.features = 0; - kfree(rbd_dev->header.object_prefix); - rbd_dev->header.object_prefix = NULL; - return ret; + return 0; } /* @@ -6812,7 +6816,7 @@ goto out_err; } - parent = __rbd_dev_create(rbd_dev->rbd_client, rbd_dev->parent_spec); + parent = __rbd_dev_create(rbd_dev->parent_spec); if (!parent) { ret = -ENOMEM; goto out_err; @@ -6822,8 +6826,8 @@ * Images related by parent/child relationships always share * rbd_client and spec/parent_spec, so bump their refcounts. */ - __rbd_get_client(rbd_dev->rbd_client); - rbd_spec_get(rbd_dev->parent_spec); + parent->rbd_client = __rbd_get_client(rbd_dev->rbd_client); + parent->spec = rbd_spec_get(rbd_dev->parent_spec); __set_bit(RBD_DEV_FLAG_READONLY, &parent->flags); @@ -6985,12 +6989,14 @@ if (!depth) down_write(&rbd_dev->header_rwsem); - ret = rbd_dev_header_info(rbd_dev); + ret = rbd_dev_header_info(rbd_dev, &rbd_dev->header, true); if (ret) { if (ret == -ENOENT && !need_watch) rbd_print_dne(rbd_dev, false); goto err_out_probe; } + + rbd_init_layout(rbd_dev); /* * If this image is the one being mapped, we have pool name and @@ -7020,7 +7026,7 @@ } if (rbd_dev->header.features & RBD_FEATURE_LAYERING) { - ret = rbd_dev_v2_parent_info(rbd_dev); + ret = rbd_dev_setup_parent(rbd_dev); if (ret) goto err_out_probe; } @@ -7046,6 +7052,107 @@ return ret; } +static void rbd_dev_update_header(struct rbd_device *rbd_dev, + struct rbd_image_header *header) +{ + rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); + rbd_assert(rbd_dev->header.object_prefix); /* !first_time */ + + if (rbd_dev->header.image_size != header->image_size) { + rbd_dev->header.image_size = header->image_size; + + if (!rbd_is_snap(rbd_dev)) { + rbd_dev->mapping.size = header->image_size; + rbd_dev_update_size(rbd_dev); + } + } + + ceph_put_snap_context(rbd_dev->header.snapc); + rbd_dev->header.snapc = header->snapc; + header->snapc = NULL; + + if (rbd_dev->image_format == 1) { + kfree(rbd_dev->header.snap_names); + rbd_dev->header.snap_names = header->snap_names; + header->snap_names = NULL; + + kfree(rbd_dev->header.snap_sizes); + rbd_dev->header.snap_sizes = header->snap_sizes; + header->snap_sizes = NULL; + } +} + +static void rbd_dev_update_parent(struct rbd_device *rbd_dev, + struct parent_image_info *pii) +{ + if (pii->pool_id == CEPH_NOPOOL || !pii->has_overlap) { + /* + * Either the parent never existed, or we have + * record of it but the image got flattened so it no + * longer has a parent. When the parent of a + * layered image disappears we immediately set the + * overlap to 0. The effect of this is that all new + * requests will be treated as if the image had no + * parent. + * + * If !pii.has_overlap, the parent image spec is not + * applicable. It's there to avoid duplication in each + * snapshot record. + */ + if (rbd_dev->parent_overlap) { + rbd_dev->parent_overlap = 0; + rbd_dev_parent_put(rbd_dev); + pr_info("%s: clone has been flattened\n", + rbd_dev->disk->disk_name); + } + } else { + rbd_assert(rbd_dev->parent_spec); + + /* + * Update the parent overlap. If it became zero, issue + * a warning as we will proceed as if there is no parent. + */ + if (!pii->overlap && rbd_dev->parent_overlap) + rbd_warn(rbd_dev, + "clone has become standalone (overlap 0)"); + rbd_dev->parent_overlap = pii->overlap; + } +} + +static int rbd_dev_refresh(struct rbd_device *rbd_dev) +{ + struct rbd_image_header header = { 0 }; + struct parent_image_info pii = { 0 }; + int ret; + + dout("%s rbd_dev %p\n", __func__, rbd_dev); + + ret = rbd_dev_header_info(rbd_dev, &header, false); + if (ret) + goto out; + + /* + * If there is a parent, see if it has disappeared due to the + * mapped image getting flattened. + */ + if (rbd_dev->parent) { + ret = rbd_dev_v2_parent_info(rbd_dev, &pii); + if (ret) + goto out; + } + + down_write(&rbd_dev->header_rwsem); + rbd_dev_update_header(rbd_dev, &header); + if (rbd_dev->parent) + rbd_dev_update_parent(rbd_dev, &pii); + up_write(&rbd_dev->header_rwsem); + +out: + rbd_parent_info_cleanup(&pii); + rbd_image_header_cleanup(&header); + return ret; +} + static ssize_t do_rbd_add(struct bus_type *bus, const char *buf, size_t count) -- Gitblit v1.6.2