Skip to content

Commit 8adc8b3

Browse files
committed
libceph: fix pg_temp mapping update
The incremental map updates have a record for each pg_temp mapping that is to be add/updated (len > 0) or removed (len == 0). The old code was written as if the updates were a complete enumeration; that was just wrong. Update the code to remove 0-length entries and drop the rbtree traversal. This avoids misdirected (and hung) requests that manifest as server errors like [WRN] client4104 10.0.1.219:0/275025290 misdirected client4104.1:129 0.1 to osd0 not [1,0] in e11/11 Signed-off-by: Sage Weil <[email protected]>
1 parent 782e182 commit 8adc8b3

File tree

1 file changed

+24
-26
lines changed

1 file changed

+24
-26
lines changed

net/ceph/osdmap.c

Lines changed: 24 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,7 @@ static int __insert_pg_mapping(struct ceph_pg_mapping *new,
339339
struct ceph_pg_mapping *pg = NULL;
340340
int c;
341341

342+
dout("__insert_pg_mapping %llx %p\n", *(u64 *)&new->pgid, new);
342343
while (*p) {
343344
parent = *p;
344345
pg = rb_entry(parent, struct ceph_pg_mapping, node);
@@ -366,16 +367,33 @@ static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root,
366367
while (n) {
367368
pg = rb_entry(n, struct ceph_pg_mapping, node);
368369
c = pgid_cmp(pgid, pg->pgid);
369-
if (c < 0)
370+
if (c < 0) {
370371
n = n->rb_left;
371-
else if (c > 0)
372+
} else if (c > 0) {
372373
n = n->rb_right;
373-
else
374+
} else {
375+
dout("__lookup_pg_mapping %llx got %p\n",
376+
*(u64 *)&pgid, pg);
374377
return pg;
378+
}
375379
}
376380
return NULL;
377381
}
378382

383+
static int __remove_pg_mapping(struct rb_root *root, struct ceph_pg pgid)
384+
{
385+
struct ceph_pg_mapping *pg = __lookup_pg_mapping(root, pgid);
386+
387+
if (pg) {
388+
dout("__remove_pg_mapping %llx %p\n", *(u64 *)&pgid, pg);
389+
rb_erase(&pg->node, root);
390+
kfree(pg);
391+
return 0;
392+
}
393+
dout("__remove_pg_mapping %llx dne\n", *(u64 *)&pgid);
394+
return -ENOENT;
395+
}
396+
379397
/*
380398
* rbtree of pg pool info
381399
*/
@@ -711,7 +729,6 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
711729
void *start = *p;
712730
int err = -EINVAL;
713731
u16 version;
714-
struct rb_node *rbp;
715732

716733
ceph_decode_16_safe(p, end, version, bad);
717734
if (version > CEPH_OSDMAP_INC_VERSION) {
@@ -861,7 +878,6 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
861878
}
862879

863880
/* new_pg_temp */
864-
rbp = rb_first(&map->pg_temp);
865881
ceph_decode_32_safe(p, end, len, bad);
866882
while (len--) {
867883
struct ceph_pg_mapping *pg;
@@ -872,18 +888,6 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
872888
ceph_decode_copy(p, &pgid, sizeof(pgid));
873889
pglen = ceph_decode_32(p);
874890

875-
/* remove any? */
876-
while (rbp && pgid_cmp(rb_entry(rbp, struct ceph_pg_mapping,
877-
node)->pgid, pgid) <= 0) {
878-
struct ceph_pg_mapping *cur =
879-
rb_entry(rbp, struct ceph_pg_mapping, node);
880-
881-
rbp = rb_next(rbp);
882-
dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid);
883-
rb_erase(&cur->node, &map->pg_temp);
884-
kfree(cur);
885-
}
886-
887891
if (pglen) {
888892
/* insert */
889893
ceph_decode_need(p, end, pglen*sizeof(u32), bad);
@@ -903,17 +907,11 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
903907
}
904908
dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid,
905909
pglen);
910+
} else {
911+
/* remove */
912+
__remove_pg_mapping(&map->pg_temp, pgid);
906913
}
907914
}
908-
while (rbp) {
909-
struct ceph_pg_mapping *cur =
910-
rb_entry(rbp, struct ceph_pg_mapping, node);
911-
912-
rbp = rb_next(rbp);
913-
dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid);
914-
rb_erase(&cur->node, &map->pg_temp);
915-
kfree(cur);
916-
}
917915

918916
/* ignore the rest */
919917
*p = end;

0 commit comments

Comments
 (0)