hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/drivers/block/rbd.c
....@@ -632,9 +632,8 @@
632632 static void rbd_dev_remove_parent(struct rbd_device *rbd_dev);
633633
634634 static int rbd_dev_refresh(struct rbd_device *rbd_dev);
635
-static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev);
636
-static int rbd_dev_header_info(struct rbd_device *rbd_dev);
637
-static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev);
635
+static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev,
636
+ struct rbd_image_header *header);
638637 static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
639638 u64 snap_id);
640639 static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
....@@ -1047,15 +1046,24 @@
10471046 RCU_INIT_POINTER(rbd_dev->layout.pool_ns, NULL);
10481047 }
10491048
1049
+static void rbd_image_header_cleanup(struct rbd_image_header *header)
1050
+{
1051
+ kfree(header->object_prefix);
1052
+ ceph_put_snap_context(header->snapc);
1053
+ kfree(header->snap_sizes);
1054
+ kfree(header->snap_names);
1055
+
1056
+ memset(header, 0, sizeof(*header));
1057
+}
1058
+
10501059 /*
10511060 * Fill an rbd image header with information from the given format 1
10521061 * on-disk header.
10531062 */
1054
-static int rbd_header_from_disk(struct rbd_device *rbd_dev,
1055
- struct rbd_image_header_ondisk *ondisk)
1063
+static int rbd_header_from_disk(struct rbd_image_header *header,
1064
+ struct rbd_image_header_ondisk *ondisk,
1065
+ bool first_time)
10561066 {
1057
- struct rbd_image_header *header = &rbd_dev->header;
1058
- bool first_time = header->object_prefix == NULL;
10591067 struct ceph_snap_context *snapc;
10601068 char *object_prefix = NULL;
10611069 char *snap_names = NULL;
....@@ -1122,11 +1130,6 @@
11221130 if (first_time) {
11231131 header->object_prefix = object_prefix;
11241132 header->obj_order = ondisk->options.order;
1125
- rbd_init_layout(rbd_dev);
1126
- } else {
1127
- ceph_put_snap_context(header->snapc);
1128
- kfree(header->snap_names);
1129
- kfree(header->snap_sizes);
11301133 }
11311134
11321135 /* The remaining fields always get updated (when we refresh) */
....@@ -1397,14 +1400,30 @@
13971400 /*
13981401 * Must be called after rbd_obj_calc_img_extents().
13991402 */
1400
-static bool rbd_obj_copyup_enabled(struct rbd_obj_request *obj_req)
1403
+static void rbd_obj_set_copyup_enabled(struct rbd_obj_request *obj_req)
14011404 {
1402
- if (!obj_req->num_img_extents ||
1403
- (rbd_obj_is_entire(obj_req) &&
1404
- !obj_req->img_request->snapc->num_snaps))
1405
- return false;
1405
+ rbd_assert(obj_req->img_request->snapc);
14061406
1407
- return true;
1407
+ if (obj_req->img_request->op_type == OBJ_OP_DISCARD) {
1408
+ dout("%s %p objno %llu discard\n", __func__, obj_req,
1409
+ obj_req->ex.oe_objno);
1410
+ return;
1411
+ }
1412
+
1413
+ if (!obj_req->num_img_extents) {
1414
+ dout("%s %p objno %llu not overlapping\n", __func__, obj_req,
1415
+ obj_req->ex.oe_objno);
1416
+ return;
1417
+ }
1418
+
1419
+ if (rbd_obj_is_entire(obj_req) &&
1420
+ !obj_req->img_request->snapc->num_snaps) {
1421
+ dout("%s %p objno %llu entire\n", __func__, obj_req,
1422
+ obj_req->ex.oe_objno);
1423
+ return;
1424
+ }
1425
+
1426
+ obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED;
14081427 }
14091428
14101429 static u64 rbd_obj_img_extents_bytes(struct rbd_obj_request *obj_req)
....@@ -1505,6 +1524,7 @@
15051524 static struct ceph_osd_request *
15061525 rbd_obj_add_osd_request(struct rbd_obj_request *obj_req, int num_ops)
15071526 {
1527
+ rbd_assert(obj_req->img_request->snapc);
15081528 return __rbd_obj_add_osd_request(obj_req, obj_req->img_request->snapc,
15091529 num_ops);
15101530 }
....@@ -1641,15 +1661,18 @@
16411661 mutex_init(&img_request->state_mutex);
16421662 }
16431663
1664
+/*
1665
+ * Only snap_id is captured here, for reads. For writes, snapshot
1666
+ * context is captured in rbd_img_object_requests() after exclusive
1667
+ * lock is ensured to be held.
1668
+ */
16441669 static void rbd_img_capture_header(struct rbd_img_request *img_req)
16451670 {
16461671 struct rbd_device *rbd_dev = img_req->rbd_dev;
16471672
16481673 lockdep_assert_held(&rbd_dev->header_rwsem);
16491674
1650
- if (rbd_img_is_write(img_req))
1651
- img_req->snapc = ceph_get_snap_context(rbd_dev->header.snapc);
1652
- else
1675
+ if (!rbd_img_is_write(img_req))
16531676 img_req->snap_id = rbd_dev->spec->snap_id;
16541677
16551678 if (rbd_dev_parent_get(rbd_dev))
....@@ -2296,9 +2319,6 @@
22962319 if (ret)
22972320 return ret;
22982321
2299
- if (rbd_obj_copyup_enabled(obj_req))
2300
- obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED;
2301
-
23022322 obj_req->write_state = RBD_OBJ_WRITE_START;
23032323 return 0;
23042324 }
....@@ -2404,8 +2424,6 @@
24042424 if (ret)
24052425 return ret;
24062426
2407
- if (rbd_obj_copyup_enabled(obj_req))
2408
- obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED;
24092427 if (!obj_req->num_img_extents) {
24102428 obj_req->flags |= RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT;
24112429 if (rbd_obj_is_entire(obj_req))
....@@ -3351,6 +3369,7 @@
33513369 case RBD_OBJ_WRITE_START:
33523370 rbd_assert(!*result);
33533371
3372
+ rbd_obj_set_copyup_enabled(obj_req);
33543373 if (rbd_obj_write_is_noop(obj_req))
33553374 return true;
33563375
....@@ -3537,9 +3556,19 @@
35373556
35383557 static void rbd_img_object_requests(struct rbd_img_request *img_req)
35393558 {
3559
+ struct rbd_device *rbd_dev = img_req->rbd_dev;
35403560 struct rbd_obj_request *obj_req;
35413561
35423562 rbd_assert(!img_req->pending.result && !img_req->pending.num_pending);
3563
+ rbd_assert(!need_exclusive_lock(img_req) ||
3564
+ __rbd_is_lock_owner(rbd_dev));
3565
+
3566
+ if (rbd_img_is_write(img_req)) {
3567
+ rbd_assert(!img_req->snapc);
3568
+ down_read(&rbd_dev->header_rwsem);
3569
+ img_req->snapc = ceph_get_snap_context(rbd_dev->header.snapc);
3570
+ up_read(&rbd_dev->header_rwsem);
3571
+ }
35433572
35443573 for_each_obj_request(img_req, obj_req) {
35453574 int result = 0;
....@@ -3557,7 +3586,6 @@
35573586
35583587 static bool rbd_img_advance(struct rbd_img_request *img_req, int *result)
35593588 {
3560
- struct rbd_device *rbd_dev = img_req->rbd_dev;
35613589 int ret;
35623590
35633591 again:
....@@ -3577,9 +3605,6 @@
35773605 case RBD_IMG_EXCLUSIVE_LOCK:
35783606 if (*result)
35793607 return true;
3580
-
3581
- rbd_assert(!need_exclusive_lock(img_req) ||
3582
- __rbd_is_lock_owner(rbd_dev));
35833608
35843609 rbd_img_object_requests(img_req);
35853610 if (!img_req->pending.num_pending) {
....@@ -3718,7 +3743,7 @@
37183743 ret = ceph_cls_lock(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
37193744 RBD_LOCK_NAME, CEPH_CLS_LOCK_EXCLUSIVE, cookie,
37203745 RBD_LOCK_TAG, "", 0);
3721
- if (ret)
3746
+ if (ret && ret != -EEXIST)
37223747 return ret;
37233748
37243749 __rbd_lock(rbd_dev, cookie);
....@@ -3892,10 +3917,26 @@
38923917 list_splice_tail_init(&rbd_dev->acquiring_list, &rbd_dev->running_list);
38933918 }
38943919
3895
-static int get_lock_owner_info(struct rbd_device *rbd_dev,
3896
- struct ceph_locker **lockers, u32 *num_lockers)
3920
+static bool locker_equal(const struct ceph_locker *lhs,
3921
+ const struct ceph_locker *rhs)
3922
+{
3923
+ return lhs->id.name.type == rhs->id.name.type &&
3924
+ lhs->id.name.num == rhs->id.name.num &&
3925
+ !strcmp(lhs->id.cookie, rhs->id.cookie) &&
3926
+ ceph_addr_equal_no_type(&lhs->info.addr, &rhs->info.addr);
3927
+}
3928
+
3929
+static void free_locker(struct ceph_locker *locker)
3930
+{
3931
+ if (locker)
3932
+ ceph_free_lockers(locker, 1);
3933
+}
3934
+
3935
+static struct ceph_locker *get_lock_owner_info(struct rbd_device *rbd_dev)
38973936 {
38983937 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
3938
+ struct ceph_locker *lockers;
3939
+ u32 num_lockers;
38993940 u8 lock_type;
39003941 char *lock_tag;
39013942 int ret;
....@@ -3904,39 +3945,45 @@
39043945
39053946 ret = ceph_cls_lock_info(osdc, &rbd_dev->header_oid,
39063947 &rbd_dev->header_oloc, RBD_LOCK_NAME,
3907
- &lock_type, &lock_tag, lockers, num_lockers);
3908
- if (ret)
3909
- return ret;
3948
+ &lock_type, &lock_tag, &lockers, &num_lockers);
3949
+ if (ret) {
3950
+ rbd_warn(rbd_dev, "failed to get header lockers: %d", ret);
3951
+ return ERR_PTR(ret);
3952
+ }
39103953
3911
- if (*num_lockers == 0) {
3954
+ if (num_lockers == 0) {
39123955 dout("%s rbd_dev %p no lockers detected\n", __func__, rbd_dev);
3956
+ lockers = NULL;
39133957 goto out;
39143958 }
39153959
39163960 if (strcmp(lock_tag, RBD_LOCK_TAG)) {
39173961 rbd_warn(rbd_dev, "locked by external mechanism, tag %s",
39183962 lock_tag);
3919
- ret = -EBUSY;
3920
- goto out;
3963
+ goto err_busy;
39213964 }
39223965
39233966 if (lock_type == CEPH_CLS_LOCK_SHARED) {
39243967 rbd_warn(rbd_dev, "shared lock type detected");
3925
- ret = -EBUSY;
3926
- goto out;
3968
+ goto err_busy;
39273969 }
39283970
3929
- if (strncmp((*lockers)[0].id.cookie, RBD_LOCK_COOKIE_PREFIX,
3971
+ WARN_ON(num_lockers != 1);
3972
+ if (strncmp(lockers[0].id.cookie, RBD_LOCK_COOKIE_PREFIX,
39303973 strlen(RBD_LOCK_COOKIE_PREFIX))) {
39313974 rbd_warn(rbd_dev, "locked by external mechanism, cookie %s",
3932
- (*lockers)[0].id.cookie);
3933
- ret = -EBUSY;
3934
- goto out;
3975
+ lockers[0].id.cookie);
3976
+ goto err_busy;
39353977 }
39363978
39373979 out:
39383980 kfree(lock_tag);
3939
- return ret;
3981
+ return lockers;
3982
+
3983
+err_busy:
3984
+ kfree(lock_tag);
3985
+ ceph_free_lockers(lockers, num_lockers);
3986
+ return ERR_PTR(-EBUSY);
39403987 }
39413988
39423989 static int find_watcher(struct rbd_device *rbd_dev,
....@@ -3952,13 +3999,19 @@
39523999 ret = ceph_osdc_list_watchers(osdc, &rbd_dev->header_oid,
39534000 &rbd_dev->header_oloc, &watchers,
39544001 &num_watchers);
3955
- if (ret)
4002
+ if (ret) {
4003
+ rbd_warn(rbd_dev, "failed to get watchers: %d", ret);
39564004 return ret;
4005
+ }
39574006
39584007 sscanf(locker->id.cookie, RBD_LOCK_COOKIE_PREFIX " %llu", &cookie);
39594008 for (i = 0; i < num_watchers; i++) {
3960
- if (!memcmp(&watchers[i].addr, &locker->info.addr,
3961
- sizeof(locker->info.addr)) &&
4009
+ /*
4010
+ * Ignore addr->type while comparing. This mimics
4011
+ * entity_addr_t::get_legacy_str() + strcmp().
4012
+ */
4013
+ if (ceph_addr_equal_no_type(&watchers[i].addr,
4014
+ &locker->info.addr) &&
39624015 watchers[i].cookie == cookie) {
39634016 struct rbd_client_id cid = {
39644017 .gid = le64_to_cpu(watchers[i].name.num),
....@@ -3986,57 +4039,82 @@
39864039 static int rbd_try_lock(struct rbd_device *rbd_dev)
39874040 {
39884041 struct ceph_client *client = rbd_dev->rbd_client->client;
3989
- struct ceph_locker *lockers;
3990
- u32 num_lockers;
4042
+ struct ceph_locker *locker, *refreshed_locker;
39914043 int ret;
39924044
39934045 for (;;) {
4046
+ locker = refreshed_locker = NULL;
4047
+
39944048 ret = rbd_lock(rbd_dev);
3995
- if (ret != -EBUSY)
3996
- return ret;
4049
+ if (!ret)
4050
+ goto out;
4051
+ if (ret != -EBUSY) {
4052
+ rbd_warn(rbd_dev, "failed to lock header: %d", ret);
4053
+ goto out;
4054
+ }
39974055
39984056 /* determine if the current lock holder is still alive */
3999
- ret = get_lock_owner_info(rbd_dev, &lockers, &num_lockers);
4000
- if (ret)
4001
- return ret;
4002
-
4003
- if (num_lockers == 0)
4057
+ locker = get_lock_owner_info(rbd_dev);
4058
+ if (IS_ERR(locker)) {
4059
+ ret = PTR_ERR(locker);
4060
+ locker = NULL;
4061
+ goto out;
4062
+ }
4063
+ if (!locker)
40044064 goto again;
40054065
4006
- ret = find_watcher(rbd_dev, lockers);
4066
+ ret = find_watcher(rbd_dev, locker);
40074067 if (ret)
40084068 goto out; /* request lock or error */
40094069
4070
+ refreshed_locker = get_lock_owner_info(rbd_dev);
4071
+ if (IS_ERR(refreshed_locker)) {
4072
+ ret = PTR_ERR(refreshed_locker);
4073
+ refreshed_locker = NULL;
4074
+ goto out;
4075
+ }
4076
+ if (!refreshed_locker ||
4077
+ !locker_equal(locker, refreshed_locker))
4078
+ goto again;
4079
+
40104080 rbd_warn(rbd_dev, "breaking header lock owned by %s%llu",
4011
- ENTITY_NAME(lockers[0].id.name));
4081
+ ENTITY_NAME(locker->id.name));
40124082
40134083 ret = ceph_monc_blocklist_add(&client->monc,
4014
- &lockers[0].info.addr);
4084
+ &locker->info.addr);
40154085 if (ret) {
4016
- rbd_warn(rbd_dev, "blocklist of %s%llu failed: %d",
4017
- ENTITY_NAME(lockers[0].id.name), ret);
4086
+ rbd_warn(rbd_dev, "failed to blocklist %s%llu: %d",
4087
+ ENTITY_NAME(locker->id.name), ret);
40184088 goto out;
40194089 }
40204090
40214091 ret = ceph_cls_break_lock(&client->osdc, &rbd_dev->header_oid,
40224092 &rbd_dev->header_oloc, RBD_LOCK_NAME,
4023
- lockers[0].id.cookie,
4024
- &lockers[0].id.name);
4025
- if (ret && ret != -ENOENT)
4093
+ locker->id.cookie, &locker->id.name);
4094
+ if (ret && ret != -ENOENT) {
4095
+ rbd_warn(rbd_dev, "failed to break header lock: %d",
4096
+ ret);
40264097 goto out;
4098
+ }
40274099
40284100 again:
4029
- ceph_free_lockers(lockers, num_lockers);
4101
+ free_locker(refreshed_locker);
4102
+ free_locker(locker);
40304103 }
40314104
40324105 out:
4033
- ceph_free_lockers(lockers, num_lockers);
4106
+ free_locker(refreshed_locker);
4107
+ free_locker(locker);
40344108 return ret;
40354109 }
40364110
40374111 static int rbd_post_acquire_action(struct rbd_device *rbd_dev)
40384112 {
40394113 int ret;
4114
+
4115
+ ret = rbd_dev_refresh(rbd_dev);
4116
+ if (ret)
4117
+ return ret;
40404118
40414119 if (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP) {
40424120 ret = rbd_object_map_open(rbd_dev);
....@@ -4076,11 +4154,8 @@
40764154
40774155 ret = rbd_try_lock(rbd_dev);
40784156 if (ret < 0) {
4079
- rbd_warn(rbd_dev, "failed to lock header: %d", ret);
4080
- if (ret == -EBLOCKLISTED)
4081
- goto out;
4082
-
4083
- ret = 1; /* request lock anyway */
4157
+ rbd_warn(rbd_dev, "failed to acquire lock: %d", ret);
4158
+ goto out;
40844159 }
40854160 if (ret > 0) {
40864161 up_write(&rbd_dev->lock_rwsem);
....@@ -4844,7 +4919,9 @@
48444919 * return, the rbd_dev->header field will contain up-to-date
48454920 * information about the image.
48464921 */
4847
-static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev)
4922
+static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev,
4923
+ struct rbd_image_header *header,
4924
+ bool first_time)
48484925 {
48494926 struct rbd_image_header_ondisk *ondisk = NULL;
48504927 u32 snap_count = 0;
....@@ -4892,7 +4969,7 @@
48924969 snap_count = le32_to_cpu(ondisk->snap_count);
48934970 } while (snap_count != want_count);
48944971
4895
- ret = rbd_header_from_disk(rbd_dev, ondisk);
4972
+ ret = rbd_header_from_disk(header, ondisk, first_time);
48964973 out:
48974974 kfree(ondisk);
48984975
....@@ -4915,39 +4992,6 @@
49154992 set_capacity(rbd_dev->disk, size);
49164993 revalidate_disk_size(rbd_dev->disk, true);
49174994 }
4918
-}
4919
-
4920
-static int rbd_dev_refresh(struct rbd_device *rbd_dev)
4921
-{
4922
- u64 mapping_size;
4923
- int ret;
4924
-
4925
- down_write(&rbd_dev->header_rwsem);
4926
- mapping_size = rbd_dev->mapping.size;
4927
-
4928
- ret = rbd_dev_header_info(rbd_dev);
4929
- if (ret)
4930
- goto out;
4931
-
4932
- /*
4933
- * If there is a parent, see if it has disappeared due to the
4934
- * mapped image getting flattened.
4935
- */
4936
- if (rbd_dev->parent) {
4937
- ret = rbd_dev_v2_parent_info(rbd_dev);
4938
- if (ret)
4939
- goto out;
4940
- }
4941
-
4942
- rbd_assert(!rbd_is_snap(rbd_dev));
4943
- rbd_dev->mapping.size = rbd_dev->header.image_size;
4944
-
4945
-out:
4946
- up_write(&rbd_dev->header_rwsem);
4947
- if (!ret && mapping_size != rbd_dev->mapping.size)
4948
- rbd_dev_update_size(rbd_dev);
4949
-
4950
- return ret;
49514995 }
49524996
49534997 static const struct blk_mq_ops rbd_mq_ops = {
....@@ -5369,8 +5413,7 @@
53695413 module_put(THIS_MODULE);
53705414 }
53715415
5372
-static struct rbd_device *__rbd_dev_create(struct rbd_client *rbdc,
5373
- struct rbd_spec *spec)
5416
+static struct rbd_device *__rbd_dev_create(struct rbd_spec *spec)
53745417 {
53755418 struct rbd_device *rbd_dev;
53765419
....@@ -5415,9 +5458,6 @@
54155458 rbd_dev->dev.parent = &rbd_root_dev;
54165459 device_initialize(&rbd_dev->dev);
54175460
5418
- rbd_dev->rbd_client = rbdc;
5419
- rbd_dev->spec = spec;
5420
-
54215461 return rbd_dev;
54225462 }
54235463
....@@ -5430,11 +5470,9 @@
54305470 {
54315471 struct rbd_device *rbd_dev;
54325472
5433
- rbd_dev = __rbd_dev_create(rbdc, spec);
5473
+ rbd_dev = __rbd_dev_create(spec);
54345474 if (!rbd_dev)
54355475 return NULL;
5436
-
5437
- rbd_dev->opts = opts;
54385476
54395477 /* get an id and fill in device name */
54405478 rbd_dev->dev_id = ida_simple_get(&rbd_dev_id_ida, 0,
....@@ -5451,6 +5489,10 @@
54515489
54525490 /* we have a ref from do_rbd_add() */
54535491 __module_get(THIS_MODULE);
5492
+
5493
+ rbd_dev->rbd_client = rbdc;
5494
+ rbd_dev->spec = spec;
5495
+ rbd_dev->opts = opts;
54545496
54555497 dout("%s rbd_dev %p dev_id %d\n", __func__, rbd_dev, rbd_dev->dev_id);
54565498 return rbd_dev;
....@@ -5506,17 +5548,12 @@
55065548 return 0;
55075549 }
55085550
5509
-static int rbd_dev_v2_image_size(struct rbd_device *rbd_dev)
5510
-{
5511
- return _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP,
5512
- &rbd_dev->header.obj_order,
5513
- &rbd_dev->header.image_size);
5514
-}
5515
-
5516
-static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev)
5551
+static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev,
5552
+ char **pobject_prefix)
55175553 {
55185554 size_t size;
55195555 void *reply_buf;
5556
+ char *object_prefix;
55205557 int ret;
55215558 void *p;
55225559
....@@ -5534,16 +5571,16 @@
55345571 goto out;
55355572
55365573 p = reply_buf;
5537
- rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p,
5538
- p + ret, NULL, GFP_NOIO);
5574
+ object_prefix = ceph_extract_encoded_string(&p, p + ret, NULL,
5575
+ GFP_NOIO);
5576
+ if (IS_ERR(object_prefix)) {
5577
+ ret = PTR_ERR(object_prefix);
5578
+ goto out;
5579
+ }
55395580 ret = 0;
55405581
5541
- if (IS_ERR(rbd_dev->header.object_prefix)) {
5542
- ret = PTR_ERR(rbd_dev->header.object_prefix);
5543
- rbd_dev->header.object_prefix = NULL;
5544
- } else {
5545
- dout(" object_prefix = %s\n", rbd_dev->header.object_prefix);
5546
- }
5582
+ *pobject_prefix = object_prefix;
5583
+ dout(" object_prefix = %s\n", object_prefix);
55475584 out:
55485585 kfree(reply_buf);
55495586
....@@ -5594,13 +5631,6 @@
55945631 return 0;
55955632 }
55965633
5597
-static int rbd_dev_v2_features(struct rbd_device *rbd_dev)
5598
-{
5599
- return _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP,
5600
- rbd_is_ro(rbd_dev),
5601
- &rbd_dev->header.features);
5602
-}
5603
-
56045634 /*
56055635 * These are generic image flags, but since they are used only for
56065636 * object map, store them in rbd_dev->object_map_flags.
....@@ -5636,6 +5666,14 @@
56365666 bool has_overlap;
56375667 u64 overlap;
56385668 };
5669
+
5670
+static void rbd_parent_info_cleanup(struct parent_image_info *pii)
5671
+{
5672
+ kfree(pii->pool_ns);
5673
+ kfree(pii->image_id);
5674
+
5675
+ memset(pii, 0, sizeof(*pii));
5676
+}
56395677
56405678 /*
56415679 * The caller is responsible for @pii.
....@@ -5706,6 +5744,9 @@
57065744 if (pii->has_overlap)
57075745 ceph_decode_64_safe(&p, end, pii->overlap, e_inval);
57085746
5747
+ dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n",
5748
+ __func__, pii->pool_id, pii->pool_ns, pii->image_id, pii->snap_id,
5749
+ pii->has_overlap, pii->overlap);
57095750 return 0;
57105751
57115752 e_inval:
....@@ -5744,14 +5785,17 @@
57445785 pii->has_overlap = true;
57455786 ceph_decode_64_safe(&p, end, pii->overlap, e_inval);
57465787
5788
+ dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n",
5789
+ __func__, pii->pool_id, pii->pool_ns, pii->image_id, pii->snap_id,
5790
+ pii->has_overlap, pii->overlap);
57475791 return 0;
57485792
57495793 e_inval:
57505794 return -EINVAL;
57515795 }
57525796
5753
-static int get_parent_info(struct rbd_device *rbd_dev,
5754
- struct parent_image_info *pii)
5797
+static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev,
5798
+ struct parent_image_info *pii)
57555799 {
57565800 struct page *req_page, *reply_page;
57575801 void *p;
....@@ -5779,7 +5823,7 @@
57795823 return ret;
57805824 }
57815825
5782
-static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
5826
+static int rbd_dev_setup_parent(struct rbd_device *rbd_dev)
57835827 {
57845828 struct rbd_spec *parent_spec;
57855829 struct parent_image_info pii = { 0 };
....@@ -5789,37 +5833,12 @@
57895833 if (!parent_spec)
57905834 return -ENOMEM;
57915835
5792
- ret = get_parent_info(rbd_dev, &pii);
5836
+ ret = rbd_dev_v2_parent_info(rbd_dev, &pii);
57935837 if (ret)
57945838 goto out_err;
57955839
5796
- dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n",
5797
- __func__, pii.pool_id, pii.pool_ns, pii.image_id, pii.snap_id,
5798
- pii.has_overlap, pii.overlap);
5799
-
5800
- if (pii.pool_id == CEPH_NOPOOL || !pii.has_overlap) {
5801
- /*
5802
- * Either the parent never existed, or we have
5803
- * record of it but the image got flattened so it no
5804
- * longer has a parent. When the parent of a
5805
- * layered image disappears we immediately set the
5806
- * overlap to 0. The effect of this is that all new
5807
- * requests will be treated as if the image had no
5808
- * parent.
5809
- *
5810
- * If !pii.has_overlap, the parent image spec is not
5811
- * applicable. It's there to avoid duplication in each
5812
- * snapshot record.
5813
- */
5814
- if (rbd_dev->parent_overlap) {
5815
- rbd_dev->parent_overlap = 0;
5816
- rbd_dev_parent_put(rbd_dev);
5817
- pr_info("%s: clone image has been flattened\n",
5818
- rbd_dev->disk->disk_name);
5819
- }
5820
-
5840
+ if (pii.pool_id == CEPH_NOPOOL || !pii.has_overlap)
58215841 goto out; /* No parent? No problem. */
5822
- }
58235842
58245843 /* The ceph file layout needs to fit pool id in 32 bits */
58255844
....@@ -5831,58 +5850,46 @@
58315850 }
58325851
58335852 /*
5834
- * The parent won't change (except when the clone is
5835
- * flattened, already handled that). So we only need to
5836
- * record the parent spec we have not already done so.
5853
+ * The parent won't change except when the clone is flattened,
5854
+ * so we only need to record the parent image spec once.
58375855 */
5838
- if (!rbd_dev->parent_spec) {
5839
- parent_spec->pool_id = pii.pool_id;
5840
- if (pii.pool_ns && *pii.pool_ns) {
5841
- parent_spec->pool_ns = pii.pool_ns;
5842
- pii.pool_ns = NULL;
5843
- }
5844
- parent_spec->image_id = pii.image_id;
5845
- pii.image_id = NULL;
5846
- parent_spec->snap_id = pii.snap_id;
5847
-
5848
- rbd_dev->parent_spec = parent_spec;
5849
- parent_spec = NULL; /* rbd_dev now owns this */
5856
+ parent_spec->pool_id = pii.pool_id;
5857
+ if (pii.pool_ns && *pii.pool_ns) {
5858
+ parent_spec->pool_ns = pii.pool_ns;
5859
+ pii.pool_ns = NULL;
58505860 }
5861
+ parent_spec->image_id = pii.image_id;
5862
+ pii.image_id = NULL;
5863
+ parent_spec->snap_id = pii.snap_id;
5864
+
5865
+ rbd_assert(!rbd_dev->parent_spec);
5866
+ rbd_dev->parent_spec = parent_spec;
5867
+ parent_spec = NULL; /* rbd_dev now owns this */
58515868
58525869 /*
5853
- * We always update the parent overlap. If it's zero we issue
5854
- * a warning, as we will proceed as if there was no parent.
5870
+ * Record the parent overlap. If it's zero, issue a warning as
5871
+ * we will proceed as if there is no parent.
58555872 */
5856
- if (!pii.overlap) {
5857
- if (parent_spec) {
5858
- /* refresh, careful to warn just once */
5859
- if (rbd_dev->parent_overlap)
5860
- rbd_warn(rbd_dev,
5861
- "clone now standalone (overlap became 0)");
5862
- } else {
5863
- /* initial probe */
5864
- rbd_warn(rbd_dev, "clone is standalone (overlap 0)");
5865
- }
5866
- }
5873
+ if (!pii.overlap)
5874
+ rbd_warn(rbd_dev, "clone is standalone (overlap 0)");
58675875 rbd_dev->parent_overlap = pii.overlap;
58685876
58695877 out:
58705878 ret = 0;
58715879 out_err:
5872
- kfree(pii.pool_ns);
5873
- kfree(pii.image_id);
5880
+ rbd_parent_info_cleanup(&pii);
58745881 rbd_spec_put(parent_spec);
58755882 return ret;
58765883 }
58775884
5878
-static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev)
5885
+static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev,
5886
+ u64 *stripe_unit, u64 *stripe_count)
58795887 {
58805888 struct {
58815889 __le64 stripe_unit;
58825890 __le64 stripe_count;
58835891 } __attribute__ ((packed)) striping_info_buf = { 0 };
58845892 size_t size = sizeof (striping_info_buf);
5885
- void *p;
58865893 int ret;
58875894
58885895 ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
....@@ -5894,27 +5901,33 @@
58945901 if (ret < size)
58955902 return -ERANGE;
58965903
5897
- p = &striping_info_buf;
5898
- rbd_dev->header.stripe_unit = ceph_decode_64(&p);
5899
- rbd_dev->header.stripe_count = ceph_decode_64(&p);
5904
+ *stripe_unit = le64_to_cpu(striping_info_buf.stripe_unit);
5905
+ *stripe_count = le64_to_cpu(striping_info_buf.stripe_count);
5906
+ dout(" stripe_unit = %llu stripe_count = %llu\n", *stripe_unit,
5907
+ *stripe_count);
5908
+
59005909 return 0;
59015910 }
59025911
5903
-static int rbd_dev_v2_data_pool(struct rbd_device *rbd_dev)
5912
+static int rbd_dev_v2_data_pool(struct rbd_device *rbd_dev, s64 *data_pool_id)
59045913 {
5905
- __le64 data_pool_id;
5914
+ __le64 data_pool_buf;
59065915 int ret;
59075916
59085917 ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
59095918 &rbd_dev->header_oloc, "get_data_pool",
5910
- NULL, 0, &data_pool_id, sizeof(data_pool_id));
5919
+ NULL, 0, &data_pool_buf,
5920
+ sizeof(data_pool_buf));
5921
+ dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
59115922 if (ret < 0)
59125923 return ret;
5913
- if (ret < sizeof(data_pool_id))
5924
+ if (ret < sizeof(data_pool_buf))
59145925 return -EBADMSG;
59155926
5916
- rbd_dev->header.data_pool_id = le64_to_cpu(data_pool_id);
5917
- WARN_ON(rbd_dev->header.data_pool_id == CEPH_NOPOOL);
5927
+ *data_pool_id = le64_to_cpu(data_pool_buf);
5928
+ dout(" data_pool_id = %lld\n", *data_pool_id);
5929
+ WARN_ON(*data_pool_id == CEPH_NOPOOL);
5930
+
59185931 return 0;
59195932 }
59205933
....@@ -6106,7 +6119,8 @@
61066119 return ret;
61076120 }
61086121
6109
-static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev)
6122
+static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev,
6123
+ struct ceph_snap_context **psnapc)
61106124 {
61116125 size_t size;
61126126 int ret;
....@@ -6167,9 +6181,7 @@
61676181 for (i = 0; i < snap_count; i++)
61686182 snapc->snaps[i] = ceph_decode_64(&p);
61696183
6170
- ceph_put_snap_context(rbd_dev->header.snapc);
6171
- rbd_dev->header.snapc = snapc;
6172
-
6184
+ *psnapc = snapc;
61736185 dout(" snap context seq = %llu, snap_count = %u\n",
61746186 (unsigned long long)seq, (unsigned int)snap_count);
61756187 out:
....@@ -6218,38 +6230,42 @@
62186230 return snap_name;
62196231 }
62206232
6221
-static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev)
6233
+static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev,
6234
+ struct rbd_image_header *header,
6235
+ bool first_time)
62226236 {
6223
- bool first_time = rbd_dev->header.object_prefix == NULL;
62246237 int ret;
62256238
6226
- ret = rbd_dev_v2_image_size(rbd_dev);
6239
+ ret = _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP,
6240
+ first_time ? &header->obj_order : NULL,
6241
+ &header->image_size);
62276242 if (ret)
62286243 return ret;
62296244
62306245 if (first_time) {
6231
- ret = rbd_dev_v2_header_onetime(rbd_dev);
6246
+ ret = rbd_dev_v2_header_onetime(rbd_dev, header);
62326247 if (ret)
62336248 return ret;
62346249 }
62356250
6236
- ret = rbd_dev_v2_snap_context(rbd_dev);
6237
- if (ret && first_time) {
6238
- kfree(rbd_dev->header.object_prefix);
6239
- rbd_dev->header.object_prefix = NULL;
6240
- }
6251
+ ret = rbd_dev_v2_snap_context(rbd_dev, &header->snapc);
6252
+ if (ret)
6253
+ return ret;
62416254
6242
- return ret;
6255
+ return 0;
62436256 }
62446257
6245
-static int rbd_dev_header_info(struct rbd_device *rbd_dev)
6258
+static int rbd_dev_header_info(struct rbd_device *rbd_dev,
6259
+ struct rbd_image_header *header,
6260
+ bool first_time)
62466261 {
62476262 rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
6263
+ rbd_assert(!header->object_prefix && !header->snapc);
62486264
62496265 if (rbd_dev->image_format == 1)
6250
- return rbd_dev_v1_header_info(rbd_dev);
6266
+ return rbd_dev_v1_header_info(rbd_dev, header, first_time);
62516267
6252
- return rbd_dev_v2_header_info(rbd_dev);
6268
+ return rbd_dev_v2_header_info(rbd_dev, header, first_time);
62536269 }
62546270
62556271 /*
....@@ -6632,12 +6648,11 @@
66326648 cancel_delayed_work_sync(&rbd_dev->lock_dwork);
66336649 if (!ret)
66346650 ret = -ETIMEDOUT;
6635
- }
66366651
6637
- if (ret) {
6638
- rbd_warn(rbd_dev, "failed to acquire exclusive lock: %ld", ret);
6639
- return ret;
6652
+ rbd_warn(rbd_dev, "failed to acquire lock: %ld", ret);
66406653 }
6654
+ if (ret)
6655
+ return ret;
66416656
66426657 /*
66436658 * The lock may have been released by now, unless automatic lock
....@@ -6737,60 +6752,49 @@
67376752 */
67386753 static void rbd_dev_unprobe(struct rbd_device *rbd_dev)
67396754 {
6740
- struct rbd_image_header *header;
6741
-
67426755 rbd_dev_parent_put(rbd_dev);
67436756 rbd_object_map_free(rbd_dev);
67446757 rbd_dev_mapping_clear(rbd_dev);
67456758
67466759 /* Free dynamic fields from the header, then zero it out */
67476760
6748
- header = &rbd_dev->header;
6749
- ceph_put_snap_context(header->snapc);
6750
- kfree(header->snap_sizes);
6751
- kfree(header->snap_names);
6752
- kfree(header->object_prefix);
6753
- memset(header, 0, sizeof (*header));
6761
+ rbd_image_header_cleanup(&rbd_dev->header);
67546762 }
67556763
6756
-static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev)
6764
+static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev,
6765
+ struct rbd_image_header *header)
67576766 {
67586767 int ret;
67596768
6760
- ret = rbd_dev_v2_object_prefix(rbd_dev);
6769
+ ret = rbd_dev_v2_object_prefix(rbd_dev, &header->object_prefix);
67616770 if (ret)
6762
- goto out_err;
6771
+ return ret;
67636772
67646773 /*
67656774 * Get the and check features for the image. Currently the
67666775 * features are assumed to never change.
67676776 */
6768
- ret = rbd_dev_v2_features(rbd_dev);
6777
+ ret = _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP,
6778
+ rbd_is_ro(rbd_dev), &header->features);
67696779 if (ret)
6770
- goto out_err;
6780
+ return ret;
67716781
67726782 /* If the image supports fancy striping, get its parameters */
67736783
6774
- if (rbd_dev->header.features & RBD_FEATURE_STRIPINGV2) {
6775
- ret = rbd_dev_v2_striping_info(rbd_dev);
6776
- if (ret < 0)
6777
- goto out_err;
6778
- }
6779
-
6780
- if (rbd_dev->header.features & RBD_FEATURE_DATA_POOL) {
6781
- ret = rbd_dev_v2_data_pool(rbd_dev);
6784
+ if (header->features & RBD_FEATURE_STRIPINGV2) {
6785
+ ret = rbd_dev_v2_striping_info(rbd_dev, &header->stripe_unit,
6786
+ &header->stripe_count);
67826787 if (ret)
6783
- goto out_err;
6788
+ return ret;
67846789 }
67856790
6786
- rbd_init_layout(rbd_dev);
6787
- return 0;
6791
+ if (header->features & RBD_FEATURE_DATA_POOL) {
6792
+ ret = rbd_dev_v2_data_pool(rbd_dev, &header->data_pool_id);
6793
+ if (ret)
6794
+ return ret;
6795
+ }
67886796
6789
-out_err:
6790
- rbd_dev->header.features = 0;
6791
- kfree(rbd_dev->header.object_prefix);
6792
- rbd_dev->header.object_prefix = NULL;
6793
- return ret;
6797
+ return 0;
67946798 }
67956799
67966800 /*
....@@ -6812,7 +6816,7 @@
68126816 goto out_err;
68136817 }
68146818
6815
- parent = __rbd_dev_create(rbd_dev->rbd_client, rbd_dev->parent_spec);
6819
+ parent = __rbd_dev_create(rbd_dev->parent_spec);
68166820 if (!parent) {
68176821 ret = -ENOMEM;
68186822 goto out_err;
....@@ -6822,8 +6826,8 @@
68226826 * Images related by parent/child relationships always share
68236827 * rbd_client and spec/parent_spec, so bump their refcounts.
68246828 */
6825
- __rbd_get_client(rbd_dev->rbd_client);
6826
- rbd_spec_get(rbd_dev->parent_spec);
6829
+ parent->rbd_client = __rbd_get_client(rbd_dev->rbd_client);
6830
+ parent->spec = rbd_spec_get(rbd_dev->parent_spec);
68276831
68286832 __set_bit(RBD_DEV_FLAG_READONLY, &parent->flags);
68296833
....@@ -6985,12 +6989,14 @@
69856989 if (!depth)
69866990 down_write(&rbd_dev->header_rwsem);
69876991
6988
- ret = rbd_dev_header_info(rbd_dev);
6992
+ ret = rbd_dev_header_info(rbd_dev, &rbd_dev->header, true);
69896993 if (ret) {
69906994 if (ret == -ENOENT && !need_watch)
69916995 rbd_print_dne(rbd_dev, false);
69926996 goto err_out_probe;
69936997 }
6998
+
6999
+ rbd_init_layout(rbd_dev);
69947000
69957001 /*
69967002 * If this image is the one being mapped, we have pool name and
....@@ -7020,7 +7026,7 @@
70207026 }
70217027
70227028 if (rbd_dev->header.features & RBD_FEATURE_LAYERING) {
7023
- ret = rbd_dev_v2_parent_info(rbd_dev);
7029
+ ret = rbd_dev_setup_parent(rbd_dev);
70247030 if (ret)
70257031 goto err_out_probe;
70267032 }
....@@ -7046,6 +7052,107 @@
70467052 return ret;
70477053 }
70487054
7055
+static void rbd_dev_update_header(struct rbd_device *rbd_dev,
7056
+ struct rbd_image_header *header)
7057
+{
7058
+ rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
7059
+ rbd_assert(rbd_dev->header.object_prefix); /* !first_time */
7060
+
7061
+ if (rbd_dev->header.image_size != header->image_size) {
7062
+ rbd_dev->header.image_size = header->image_size;
7063
+
7064
+ if (!rbd_is_snap(rbd_dev)) {
7065
+ rbd_dev->mapping.size = header->image_size;
7066
+ rbd_dev_update_size(rbd_dev);
7067
+ }
7068
+ }
7069
+
7070
+ ceph_put_snap_context(rbd_dev->header.snapc);
7071
+ rbd_dev->header.snapc = header->snapc;
7072
+ header->snapc = NULL;
7073
+
7074
+ if (rbd_dev->image_format == 1) {
7075
+ kfree(rbd_dev->header.snap_names);
7076
+ rbd_dev->header.snap_names = header->snap_names;
7077
+ header->snap_names = NULL;
7078
+
7079
+ kfree(rbd_dev->header.snap_sizes);
7080
+ rbd_dev->header.snap_sizes = header->snap_sizes;
7081
+ header->snap_sizes = NULL;
7082
+ }
7083
+}
7084
+
7085
+static void rbd_dev_update_parent(struct rbd_device *rbd_dev,
7086
+ struct parent_image_info *pii)
7087
+{
7088
+ if (pii->pool_id == CEPH_NOPOOL || !pii->has_overlap) {
7089
+ /*
7090
+ * Either the parent never existed, or we have
7091
+ * record of it but the image got flattened so it no
7092
+ * longer has a parent. When the parent of a
7093
+ * layered image disappears we immediately set the
7094
+ * overlap to 0. The effect of this is that all new
7095
+ * requests will be treated as if the image had no
7096
+ * parent.
7097
+ *
7098
+ * If !pii.has_overlap, the parent image spec is not
7099
+ * applicable. It's there to avoid duplication in each
7100
+ * snapshot record.
7101
+ */
7102
+ if (rbd_dev->parent_overlap) {
7103
+ rbd_dev->parent_overlap = 0;
7104
+ rbd_dev_parent_put(rbd_dev);
7105
+ pr_info("%s: clone has been flattened\n",
7106
+ rbd_dev->disk->disk_name);
7107
+ }
7108
+ } else {
7109
+ rbd_assert(rbd_dev->parent_spec);
7110
+
7111
+ /*
7112
+ * Update the parent overlap. If it became zero, issue
7113
+ * a warning as we will proceed as if there is no parent.
7114
+ */
7115
+ if (!pii->overlap && rbd_dev->parent_overlap)
7116
+ rbd_warn(rbd_dev,
7117
+ "clone has become standalone (overlap 0)");
7118
+ rbd_dev->parent_overlap = pii->overlap;
7119
+ }
7120
+}
7121
+
7122
+static int rbd_dev_refresh(struct rbd_device *rbd_dev)
7123
+{
7124
+ struct rbd_image_header header = { 0 };
7125
+ struct parent_image_info pii = { 0 };
7126
+ int ret;
7127
+
7128
+ dout("%s rbd_dev %p\n", __func__, rbd_dev);
7129
+
7130
+ ret = rbd_dev_header_info(rbd_dev, &header, false);
7131
+ if (ret)
7132
+ goto out;
7133
+
7134
+ /*
7135
+ * If there is a parent, see if it has disappeared due to the
7136
+ * mapped image getting flattened.
7137
+ */
7138
+ if (rbd_dev->parent) {
7139
+ ret = rbd_dev_v2_parent_info(rbd_dev, &pii);
7140
+ if (ret)
7141
+ goto out;
7142
+ }
7143
+
7144
+ down_write(&rbd_dev->header_rwsem);
7145
+ rbd_dev_update_header(rbd_dev, &header);
7146
+ if (rbd_dev->parent)
7147
+ rbd_dev_update_parent(rbd_dev, &pii);
7148
+ up_write(&rbd_dev->header_rwsem);
7149
+
7150
+out:
7151
+ rbd_parent_info_cleanup(&pii);
7152
+ rbd_image_header_cleanup(&header);
7153
+ return ret;
7154
+}
7155
+
70497156 static ssize_t do_rbd_add(struct bus_type *bus,
70507157 const char *buf,
70517158 size_t count)