Skip to content

Commit e62e26d

Browse files
committed
Merge tag 'ceph-for-6.5-rc4' of https://github.com/ceph/ceph-client
Pull ceph fixes from Ilya Dryomov: "A patch to reduce the potential for erroneous RBD exclusive lock blocklisting (fencing) with a couple of prerequisites and a fixup to prevent metrics from being sent to the MDS even just once after that has been disabled by the user. All marked for stable" * tag 'ceph-for-6.5-rc4' of https://github.com/ceph/ceph-client: rbd: retrieve and check lock owner twice before blocklisting rbd: harden get_lock_owner_info() a bit rbd: make get_lock_owner_info() return a single locker or NULL ceph: never send metrics if disable_send_metrics is set
2 parents 28d79b7 + 5881590 commit e62e26d

File tree

3 files changed

+88
-39
lines changed

3 files changed

+88
-39
lines changed

drivers/block/rbd.c

Lines changed: 86 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -3849,51 +3849,82 @@ static void wake_lock_waiters(struct rbd_device *rbd_dev, int result)
38493849
list_splice_tail_init(&rbd_dev->acquiring_list, &rbd_dev->running_list);
38503850
}
38513851

3852-
static int get_lock_owner_info(struct rbd_device *rbd_dev,
3853-
struct ceph_locker **lockers, u32 *num_lockers)
3852+
static bool locker_equal(const struct ceph_locker *lhs,
3853+
const struct ceph_locker *rhs)
3854+
{
3855+
return lhs->id.name.type == rhs->id.name.type &&
3856+
lhs->id.name.num == rhs->id.name.num &&
3857+
!strcmp(lhs->id.cookie, rhs->id.cookie) &&
3858+
ceph_addr_equal_no_type(&lhs->info.addr, &rhs->info.addr);
3859+
}
3860+
3861+
static void free_locker(struct ceph_locker *locker)
3862+
{
3863+
if (locker)
3864+
ceph_free_lockers(locker, 1);
3865+
}
3866+
3867+
static struct ceph_locker *get_lock_owner_info(struct rbd_device *rbd_dev)
38543868
{
38553869
struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
3870+
struct ceph_locker *lockers;
3871+
u32 num_lockers;
38563872
u8 lock_type;
38573873
char *lock_tag;
3874+
u64 handle;
38583875
int ret;
38593876

3860-
dout("%s rbd_dev %p\n", __func__, rbd_dev);
3861-
38623877
ret = ceph_cls_lock_info(osdc, &rbd_dev->header_oid,
38633878
&rbd_dev->header_oloc, RBD_LOCK_NAME,
3864-
&lock_type, &lock_tag, lockers, num_lockers);
3865-
if (ret)
3866-
return ret;
3879+
&lock_type, &lock_tag, &lockers, &num_lockers);
3880+
if (ret) {
3881+
rbd_warn(rbd_dev, "failed to retrieve lockers: %d", ret);
3882+
return ERR_PTR(ret);
3883+
}
38673884

3868-
if (*num_lockers == 0) {
3885+
if (num_lockers == 0) {
38693886
dout("%s rbd_dev %p no lockers detected\n", __func__, rbd_dev);
3887+
lockers = NULL;
38703888
goto out;
38713889
}
38723890

38733891
if (strcmp(lock_tag, RBD_LOCK_TAG)) {
38743892
rbd_warn(rbd_dev, "locked by external mechanism, tag %s",
38753893
lock_tag);
3876-
ret = -EBUSY;
3877-
goto out;
3894+
goto err_busy;
38783895
}
38793896

3880-
if (lock_type == CEPH_CLS_LOCK_SHARED) {
3881-
rbd_warn(rbd_dev, "shared lock type detected");
3882-
ret = -EBUSY;
3883-
goto out;
3897+
if (lock_type != CEPH_CLS_LOCK_EXCLUSIVE) {
3898+
rbd_warn(rbd_dev, "incompatible lock type detected");
3899+
goto err_busy;
38843900
}
38853901

3886-
if (strncmp((*lockers)[0].id.cookie, RBD_LOCK_COOKIE_PREFIX,
3887-
strlen(RBD_LOCK_COOKIE_PREFIX))) {
3902+
WARN_ON(num_lockers != 1);
3903+
ret = sscanf(lockers[0].id.cookie, RBD_LOCK_COOKIE_PREFIX " %llu",
3904+
&handle);
3905+
if (ret != 1) {
38883906
rbd_warn(rbd_dev, "locked by external mechanism, cookie %s",
3889-
(*lockers)[0].id.cookie);
3890-
ret = -EBUSY;
3891-
goto out;
3907+
lockers[0].id.cookie);
3908+
goto err_busy;
38923909
}
3910+
if (ceph_addr_is_blank(&lockers[0].info.addr)) {
3911+
rbd_warn(rbd_dev, "locker has a blank address");
3912+
goto err_busy;
3913+
}
3914+
3915+
dout("%s rbd_dev %p got locker %s%llu@%pISpc/%u handle %llu\n",
3916+
__func__, rbd_dev, ENTITY_NAME(lockers[0].id.name),
3917+
&lockers[0].info.addr.in_addr,
3918+
le32_to_cpu(lockers[0].info.addr.nonce), handle);
38933919

38943920
out:
38953921
kfree(lock_tag);
3896-
return ret;
3922+
return lockers;
3923+
3924+
err_busy:
3925+
kfree(lock_tag);
3926+
ceph_free_lockers(lockers, num_lockers);
3927+
return ERR_PTR(-EBUSY);
38973928
}
38983929

38993930
static int find_watcher(struct rbd_device *rbd_dev,
@@ -3947,51 +3978,68 @@ static int find_watcher(struct rbd_device *rbd_dev,
39473978
static int rbd_try_lock(struct rbd_device *rbd_dev)
39483979
{
39493980
struct ceph_client *client = rbd_dev->rbd_client->client;
3950-
struct ceph_locker *lockers;
3951-
u32 num_lockers;
3981+
struct ceph_locker *locker, *refreshed_locker;
39523982
int ret;
39533983

39543984
for (;;) {
3985+
locker = refreshed_locker = NULL;
3986+
39553987
ret = rbd_lock(rbd_dev);
39563988
if (ret != -EBUSY)
3957-
return ret;
3989+
goto out;
39583990

39593991
/* determine if the current lock holder is still alive */
3960-
ret = get_lock_owner_info(rbd_dev, &lockers, &num_lockers);
3961-
if (ret)
3962-
return ret;
3963-
3964-
if (num_lockers == 0)
3992+
locker = get_lock_owner_info(rbd_dev);
3993+
if (IS_ERR(locker)) {
3994+
ret = PTR_ERR(locker);
3995+
locker = NULL;
3996+
goto out;
3997+
}
3998+
if (!locker)
39653999
goto again;
39664000

3967-
ret = find_watcher(rbd_dev, lockers);
4001+
ret = find_watcher(rbd_dev, locker);
39684002
if (ret)
39694003
goto out; /* request lock or error */
39704004

4005+
refreshed_locker = get_lock_owner_info(rbd_dev);
4006+
if (IS_ERR(refreshed_locker)) {
4007+
ret = PTR_ERR(refreshed_locker);
4008+
refreshed_locker = NULL;
4009+
goto out;
4010+
}
4011+
if (!refreshed_locker ||
4012+
!locker_equal(locker, refreshed_locker))
4013+
goto again;
4014+
39714015
rbd_warn(rbd_dev, "breaking header lock owned by %s%llu",
3972-
ENTITY_NAME(lockers[0].id.name));
4016+
ENTITY_NAME(locker->id.name));
39734017

39744018
ret = ceph_monc_blocklist_add(&client->monc,
3975-
&lockers[0].info.addr);
4019+
&locker->info.addr);
39764020
if (ret) {
3977-
rbd_warn(rbd_dev, "blocklist of %s%llu failed: %d",
3978-
ENTITY_NAME(lockers[0].id.name), ret);
4021+
rbd_warn(rbd_dev, "failed to blocklist %s%llu: %d",
4022+
ENTITY_NAME(locker->id.name), ret);
39794023
goto out;
39804024
}
39814025

39824026
ret = ceph_cls_break_lock(&client->osdc, &rbd_dev->header_oid,
39834027
&rbd_dev->header_oloc, RBD_LOCK_NAME,
3984-
lockers[0].id.cookie,
3985-
&lockers[0].id.name);
3986-
if (ret && ret != -ENOENT)
4028+
locker->id.cookie, &locker->id.name);
4029+
if (ret && ret != -ENOENT) {
4030+
rbd_warn(rbd_dev, "failed to break header lock: %d",
4031+
ret);
39874032
goto out;
4033+
}
39884034

39894035
again:
3990-
ceph_free_lockers(lockers, num_lockers);
4036+
free_locker(refreshed_locker);
4037+
free_locker(locker);
39914038
}
39924039

39934040
out:
3994-
ceph_free_lockers(lockers, num_lockers);
4041+
free_locker(refreshed_locker);
4042+
free_locker(locker);
39954043
return ret;
39964044
}
39974045

fs/ceph/metric.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ static void metric_delayed_work(struct work_struct *work)
216216
struct ceph_mds_client *mdsc =
217217
container_of(m, struct ceph_mds_client, metric);
218218

219-
if (mdsc->stopping)
219+
if (mdsc->stopping || disable_send_metrics)
220220
return;
221221

222222
if (!m->session || !check_session_state(m->session)) {

net/ceph/messenger.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1123,6 +1123,7 @@ bool ceph_addr_is_blank(const struct ceph_entity_addr *addr)
11231123
return true;
11241124
}
11251125
}
1126+
EXPORT_SYMBOL(ceph_addr_is_blank);
11261127

11271128
int ceph_addr_port(const struct ceph_entity_addr *addr)
11281129
{

0 commit comments

Comments
 (0)