Skip to content

Commit afd7562

Browse files
GuoqingJiangshligit
authored andcommitted
md-cluster/raid10: resize all the bitmaps before start reshape
To support add disk under grow mode, we need to resize all the bitmaps of each node before reshape, so that we can ensure all nodes have the same view of the bitmap of the clustered raid. So after the master node resized the bitmap, it broadcast a message to other slave nodes, and it checks the size of each bitmap are same or not by compare pages. We can only continue the reshaping after all nodes update the bitmap to the same size (by checking the pages), otherwise revert bitmap size to previous value. The resize_bitmaps interface and BITMAP_RESIZE message are introduced in md-cluster.c for the purpose. Reviewed-by: NeilBrown <[email protected]> Signed-off-by: Guoqing Jiang <[email protected]> Signed-off-by: Shaohua Li <[email protected]>
1 parent 9e753ba commit afd7562

File tree

3 files changed

+120
-3
lines changed

3 files changed

+120
-3
lines changed

drivers/md/md-cluster.c

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ enum msg_type {
105105
RE_ADD,
106106
BITMAP_NEEDS_SYNC,
107107
CHANGE_CAPACITY,
108+
BITMAP_RESIZE,
108109
};
109110

110111
struct cluster_msg {
@@ -612,6 +613,11 @@ static int process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
612613
case BITMAP_NEEDS_SYNC:
613614
__recover_slot(mddev, le32_to_cpu(msg->slot));
614615
break;
616+
case BITMAP_RESIZE:
617+
if (le64_to_cpu(msg->high) != mddev->pers->size(mddev, 0, 0))
618+
ret = md_bitmap_resize(mddev->bitmap,
619+
le64_to_cpu(msg->high), 0, 0);
620+
break;
615621
default:
616622
ret = -1;
617623
pr_warn("%s:%d Received unknown message from %d\n",
@@ -1102,6 +1108,80 @@ static void metadata_update_cancel(struct mddev *mddev)
11021108
unlock_comm(cinfo);
11031109
}
11041110

1111+
static int update_bitmap_size(struct mddev *mddev, sector_t size)
1112+
{
1113+
struct md_cluster_info *cinfo = mddev->cluster_info;
1114+
struct cluster_msg cmsg = {0};
1115+
int ret;
1116+
1117+
cmsg.type = cpu_to_le32(BITMAP_RESIZE);
1118+
cmsg.high = cpu_to_le64(size);
1119+
ret = sendmsg(cinfo, &cmsg, 0);
1120+
if (ret)
1121+
pr_err("%s:%d: failed to send BITMAP_RESIZE message (%d)\n",
1122+
__func__, __LINE__, ret);
1123+
return ret;
1124+
}
1125+
1126+
static int resize_bitmaps(struct mddev *mddev, sector_t newsize, sector_t oldsize)
1127+
{
1128+
struct bitmap_counts *counts;
1129+
char str[64];
1130+
struct dlm_lock_resource *bm_lockres;
1131+
struct bitmap *bitmap = mddev->bitmap;
1132+
unsigned long my_pages = bitmap->counts.pages;
1133+
int i, rv;
1134+
1135+
/*
1136+
* We need to ensure all the nodes can grow to a larger
1137+
* bitmap size before make the reshaping.
1138+
*/
1139+
rv = update_bitmap_size(mddev, newsize);
1140+
if (rv)
1141+
return rv;
1142+
1143+
for (i = 0; i < mddev->bitmap_info.nodes; i++) {
1144+
if (i == md_cluster_ops->slot_number(mddev))
1145+
continue;
1146+
1147+
bitmap = get_bitmap_from_slot(mddev, i);
1148+
if (IS_ERR(bitmap)) {
1149+
pr_err("can't get bitmap from slot %d\n", i);
1150+
goto out;
1151+
}
1152+
counts = &bitmap->counts;
1153+
1154+
/*
1155+
* If we can hold the bitmap lock of one node then
1156+
* the slot is not occupied, update the pages.
1157+
*/
1158+
snprintf(str, 64, "bitmap%04d", i);
1159+
bm_lockres = lockres_init(mddev, str, NULL, 1);
1160+
if (!bm_lockres) {
1161+
pr_err("Cannot initialize %s lock\n", str);
1162+
goto out;
1163+
}
1164+
bm_lockres->flags |= DLM_LKF_NOQUEUE;
1165+
rv = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
1166+
if (!rv)
1167+
counts->pages = my_pages;
1168+
lockres_free(bm_lockres);
1169+
1170+
if (my_pages != counts->pages)
1171+
/*
1172+
* Let's revert the bitmap size if one node
1173+
* can't resize bitmap
1174+
*/
1175+
goto out;
1176+
}
1177+
1178+
return 0;
1179+
out:
1180+
md_bitmap_free(bitmap);
1181+
update_bitmap_size(mddev, oldsize);
1182+
return -1;
1183+
}
1184+
11051185
/*
11061186
* return 0 if all the bitmaps have the same sync_size
11071187
*/
@@ -1492,6 +1572,7 @@ static struct md_cluster_operations cluster_ops = {
14921572
.remove_disk = remove_disk,
14931573
.load_bitmaps = load_bitmaps,
14941574
.gather_bitmaps = gather_bitmaps,
1575+
.resize_bitmaps = resize_bitmaps,
14951576
.lock_all_bitmaps = lock_all_bitmaps,
14961577
.unlock_all_bitmaps = unlock_all_bitmaps,
14971578
.update_size = update_size,

drivers/md/md-cluster.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ struct md_cluster_operations {
2626
int (*remove_disk)(struct mddev *mddev, struct md_rdev *rdev);
2727
void (*load_bitmaps)(struct mddev *mddev, int total_slots);
2828
int (*gather_bitmaps)(struct md_rdev *rdev);
29+
int (*resize_bitmaps)(struct mddev *mddev, sector_t newsize, sector_t oldsize);
2930
int (*lock_all_bitmaps)(struct mddev *mddev);
3031
void (*unlock_all_bitmaps)(struct mddev *mddev);
3132
void (*update_size)(struct mddev *mddev, sector_t old_dev_sectors);

drivers/md/raid10.c

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include <linux/seq_file.h>
2626
#include <linux/ratelimit.h>
2727
#include <linux/kthread.h>
28+
#include <linux/raid/md_p.h>
2829
#include <trace/events/block.h>
2930
#include "md.h"
3031
#include "raid10.h"
@@ -4288,12 +4289,46 @@ static int raid10_start_reshape(struct mddev *mddev)
42884289
spin_unlock_irq(&conf->device_lock);
42894290

42904291
if (mddev->delta_disks && mddev->bitmap) {
4291-
ret = md_bitmap_resize(mddev->bitmap,
4292-
raid10_size(mddev, 0, conf->geo.raid_disks),
4293-
0, 0);
4292+
struct mdp_superblock_1 *sb = NULL;
4293+
sector_t oldsize, newsize;
4294+
4295+
oldsize = raid10_size(mddev, 0, 0);
4296+
newsize = raid10_size(mddev, 0, conf->geo.raid_disks);
4297+
4298+
if (!mddev_is_clustered(mddev)) {
4299+
ret = md_bitmap_resize(mddev->bitmap, newsize, 0, 0);
4300+
if (ret)
4301+
goto abort;
4302+
else
4303+
goto out;
4304+
}
4305+
4306+
rdev_for_each(rdev, mddev) {
4307+
if (rdev->raid_disk > -1 &&
4308+
!test_bit(Faulty, &rdev->flags))
4309+
sb = page_address(rdev->sb_page);
4310+
}
4311+
4312+
/*
4313+
* some node is already performing reshape, and no need to
4314+
* call md_bitmap_resize again since it should be called when
4315+
* receiving BITMAP_RESIZE msg
4316+
*/
4317+
if ((sb && (le32_to_cpu(sb->feature_map) &
4318+
MD_FEATURE_RESHAPE_ACTIVE)) || (oldsize == newsize))
4319+
goto out;
4320+
4321+
ret = md_bitmap_resize(mddev->bitmap, newsize, 0, 0);
42944322
if (ret)
42954323
goto abort;
4324+
4325+
ret = md_cluster_ops->resize_bitmaps(mddev, newsize, oldsize);
4326+
if (ret) {
4327+
md_bitmap_resize(mddev->bitmap, oldsize, 0, 0);
4328+
goto abort;
4329+
}
42964330
}
4331+
out:
42974332
if (mddev->delta_disks > 0) {
42984333
rdev_for_each(rdev, mddev)
42994334
if (rdev->raid_disk < 0 &&

0 commit comments

Comments
 (0)